From 4f75da3666c0c572967729a2401ac650be5581b6 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Thu, 10 Jan 2019 17:33:17 +0200 Subject: linux/dim: Move implementation to .c files Moved all logic from dim.h and net_dim.h to dim.c and net_dim.c. This is both more structurally appealing and would allow to only expose externally used functions. Signed-off-by: Tal Gilboa Signed-off-by: Saeed Mahameed --- lib/Kconfig | 8 +++ lib/Makefile | 1 + lib/dim/Makefile | 9 +++ lib/dim/dim.c | 74 +++++++++++++++++++++ lib/dim/net_dim.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 282 insertions(+) create mode 100644 lib/dim/Makefile create mode 100644 lib/dim/dim.c create mode 100644 lib/dim/net_dim.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 90623a0e1942..78ddb9526b62 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -562,6 +562,14 @@ config SIGNATURE Digital signature verification. Currently only RSA is supported. Implementation is done using GnuPG MPI library +config DIMLIB + bool "DIM library" + default y + help + Dynamic Interrupt Moderation library. + Implements an algorithm for dynamically change CQ modertion values + according to run time performance. + # # libfdt files, only selected if needed. # diff --git a/lib/Makefile b/lib/Makefile index fb7697031a79..dcb558c7554d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -202,6 +202,7 @@ obj-$(CONFIG_GLOB) += glob.o obj-$(CONFIG_GLOB_SELFTEST) += globtest.o obj-$(CONFIG_MPILIB) += mpi/ +obj-$(CONFIG_DIMLIB) += dim/ obj-$(CONFIG_SIGNATURE) += digsig.o lib-$(CONFIG_CLZ_TAB) += clz_tab.o diff --git a/lib/dim/Makefile b/lib/dim/Makefile new file mode 100644 index 000000000000..160afe288df0 --- /dev/null +++ b/lib/dim/Makefile @@ -0,0 +1,9 @@ +# +# DIM Dynamic Interrupt Moderation library +# + +obj-$(CONFIG_DIMLIB) = net_dim.o + +net_dim-y = \ + dim.o \ + net_dim.o diff --git a/lib/dim/dim.c b/lib/dim/dim.c new file mode 100644 index 000000000000..17d5236759bd --- /dev/null +++ b/lib/dim/dim.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include + +bool dim_on_top(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + return true; + case DIM_GOING_RIGHT: + return (dim->steps_left > 1) && (dim->steps_right == 1); + default: /* DIM_GOING_LEFT */ + return (dim->steps_right > 1) && (dim->steps_left == 1); + } +} +EXPORT_SYMBOL(dim_on_top); + +void dim_turn(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + dim->tune_state = DIM_GOING_LEFT; + dim->steps_left = 0; + break; + case DIM_GOING_LEFT: + dim->tune_state = DIM_GOING_RIGHT; + dim->steps_right = 0; + break; + } +} +EXPORT_SYMBOL(dim_turn); + +void dim_park_on_top(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tired = 0; + dim->tune_state = DIM_PARKING_ON_TOP; +} +EXPORT_SYMBOL(dim_park_on_top); + +void dim_park_tired(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tune_state = DIM_PARKING_TIRED; +} +EXPORT_SYMBOL(dim_park_tired); + +void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); + u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, + start->byte_ctr); + + if (!delta_us) + return; + + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); + curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, + delta_us); +} +EXPORT_SYMBOL(dim_calc_stats); diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c new file mode 100644 index 000000000000..5bcc902c5388 --- /dev/null +++ b/lib/dim/net_dim.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include + +struct dim_cq_moder +net_dim_get_rx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_rx_moderation); + +struct dim_cq_moder +net_dim_get_def_rx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_rx_moderation); + +struct dim_cq_moder +net_dim_get_tx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_tx_moderation); + +struct dim_cq_moder +net_dim_get_def_tx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_tx_moderation); + +static int net_dim_step(struct dim *dim) +{ + if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) + return DIM_TOO_TIRED; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) + return DIM_ON_EDGE; + dim->profile_ix++; + dim->steps_right++; + break; + case DIM_GOING_LEFT: + if (dim->profile_ix == 0) + return DIM_ON_EDGE; + dim->profile_ix--; + dim->steps_left++; + break; + } + + dim->tired++; + return DIM_STEPPED; +} + +static void net_dim_exit_parking(struct dim *dim) +{ + dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT; + net_dim_step(dim); +} + +static int net_dim_stats_compare(struct dim_stats *curr, + struct dim_stats *prev) +{ + if (!prev->bpms) + return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) + return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->ppms) + return curr->ppms ? DIM_STATS_BETTER : + DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) + return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->epms) + return DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) + return (curr->epms < prev->epms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + return DIM_STATS_SAME; +} + +static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) +{ + int prev_state = dim->tune_state; + int prev_ix = dim->profile_ix; + int stats_res; + int step_res; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_SAME) + net_dim_exit_parking(dim); + break; + + case DIM_PARKING_TIRED: + dim->tired--; + if (!dim->tired) + net_dim_exit_parking(dim); + break; + + case DIM_GOING_RIGHT: + case DIM_GOING_LEFT: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_BETTER) + dim_turn(dim); + + if (dim_on_top(dim)) { + dim_park_on_top(dim); + break; + } + + step_res = net_dim_step(dim); + switch (step_res) { + case DIM_ON_EDGE: + dim_park_on_top(dim); + break; + case DIM_TOO_TIRED: + dim_park_tired(dim); + break; + } + + break; + } + + if (prev_state != DIM_PARKING_ON_TOP || + dim->tune_state != DIM_PARKING_ON_TOP) + dim->prev_stats = *curr_stats; + + return dim->profile_ix != prev_ix; +} + +void net_dim(struct dim *dim, struct dim_sample end_sample) +{ + struct dim_stats curr_stats; + u16 nevents; + + switch (dim->state) { + case DIM_MEASURE_IN_PROGRESS: + nevents = BIT_GAP(BITS_PER_TYPE(u16), + end_sample.event_ctr, + dim->start_sample.event_ctr); + if (nevents < DIM_NEVENTS) + break; + dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); + if (net_dim_decision(&curr_stats, dim)) { + dim->state = DIM_APPLY_NEW_PROFILE; + schedule_work(&dim->work); + break; + } + /* fall through */ + case DIM_START_MEASURE: + dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, + end_sample.byte_ctr, &dim->start_sample); + dim->state = DIM_MEASURE_IN_PROGRESS; + break; + case DIM_APPLY_NEW_PROFILE: + break; + } +} +EXPORT_SYMBOL(net_dim); -- cgit v1.2.3-55-g7522 From 398c2b05bbee21cc172dfff017c0351d4d14e04c Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Thu, 22 Nov 2018 09:51:17 +0200 Subject: linux/dim: Add completions count to dim_sample Added a measurement of completions per/msec to allow for completion based dim algorithms. In order to use dynamic interrupt moderation with RDMA we need to have a different measurment than packets per second. This change is meant to prepare for adding a new DIM method. All drivers that use net_dim and thus do not need a completion count will have the completions set to 0. Signed-off-by: Yamin Friedman Reviewed-by: Max Gurtovoy Signed-off-by: Saeed Mahameed --- include/linux/dim.h | 28 +++++++++++++++++++++++++--- lib/dim/dim.c | 9 +++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/include/linux/dim.h b/include/linux/dim.h index f48ede3e0322..aa9bdd47a648 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -37,6 +37,7 @@ struct dim_cq_moder { u16 usec; u16 pkts; + u16 comps; u8 cq_period_mode; }; @@ -54,6 +55,7 @@ struct dim_sample { u32 pkt_ctr; u32 byte_ctr; u16 event_ctr; + u32 comp_ctr; }; /** @@ -65,9 +67,11 @@ struct dim_sample { * @epms: Events per msec */ struct dim_stats { - int ppms; - int bpms; - int epms; + int ppms; /* packets per msec */ + int bpms; /* bytes per msec */ + int epms; /* events per msec */ + int cpms; /* completions per msec */ + int cpe_ratio; /* ratio of completions to events */ }; /** @@ -89,6 +93,7 @@ struct dim { u8 state; struct dim_stats prev_stats; struct dim_sample start_sample; + struct dim_sample measuring_sample; struct work_struct work; u8 profile_ix; u8 mode; @@ -246,6 +251,23 @@ dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) s->event_ctr = event_ctr; } +/** + * dim_update_sample_with_comps - set a sample's fields with given + * values including the completion parameter + * @event_ctr: number of events to set + * @packets: number of packets to set + * @bytes: number of bytes to set + * @comps: number of completions to set + * @s: DIM sample + */ +static inline void +dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps, + struct dim_sample *s) +{ + dim_update_sample(event_ctr, packets, bytes, s); + s->comp_ctr = comps; +} + /* Net DIM */ /* diff --git a/lib/dim/dim.c b/lib/dim/dim.c index 17d5236759bd..439d641ec796 100644 --- a/lib/dim/dim.c +++ b/lib/dim/dim.c @@ -62,6 +62,8 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, start->byte_ctr); + u32 ncomps = BIT_GAP(BITS_PER_TYPE(u32), end->comp_ctr, + start->comp_ctr); if (!delta_us) return; @@ -70,5 +72,12 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, delta_us); + curr_stats->cpms = DIV_ROUND_UP(ncomps * USEC_PER_MSEC, delta_us); + if (curr_stats->epms != 0) + curr_stats->cpe_ratio = + (curr_stats->cpms * 100) / curr_stats->epms; + else + curr_stats->cpe_ratio = 0; + } EXPORT_SYMBOL(dim_calc_stats); -- cgit v1.2.3-55-g7522