From 237cd218099ce96edf2890a49aa191b38b84c2fc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:09 +0300 Subject: net/mlx5: Introduce device queue counters A queue counter can collect several statistics for one or more hardware queues (QPs, RQs, etc ..) that the counter is attached to. For Ethernet it will provide an "out of buffer" counter which collects the number of all packets that are dropped due to lack of software buffers. Here we add device commands to alloc/query/dealloc queue counters. Signed-off-by: Tariq Toukan Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 68 ++++++++++++++++++++++++++++ include/linux/mlx5/qp.h | 6 +++ 2 files changed, 74 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index def289375ecb..b720a274220d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -538,3 +538,71 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, mlx5_core_destroy_sq(dev, sq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); + +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); + +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); + +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, clear, reset); + MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size); +} +EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer) +{ + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + int err; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen); + if (!err) + *out_of_buffer = MLX5_GET(query_q_counter_out, out, + out_of_buffer); + + kfree(out); + return err; +} diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index cf031a3f16c5..64221027bf1f 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -668,6 +668,12 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq); +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id); +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size); +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer); static inline const char *mlx5_qp_type_str(int type) { -- cgit v1.2.3-55-g7522 From 593cf33829adfd3d5c75d42879cc42afded1b626 Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Wed, 20 Apr 2016 22:02:10 +0300 Subject: net/mlx5e: Allocate set of queue counters per netdev Connect all netdev RQs to this set of queue counters. Also, add an "rx_out_of_buffer" counter to ethtool, which indicates RX packet drops due to lack of receive buffers. Signed-off-by: Rana Shahout Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 ++++++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 11 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 42 ++++++++++++++++++++-- 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 879e6276c473..c4ddbe8501a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -236,6 +236,15 @@ struct mlx5e_pport_stats { __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS]; }; +static const char qcounter_stats_strings[][ETH_GSTRING_LEN] = { + "rx_out_of_buffer", +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; +#define NUM_Q_COUNTERS 1 +}; + static const char rq_stats_strings[][ETH_GSTRING_LEN] = { "packets", "bytes", @@ -293,6 +302,7 @@ struct mlx5e_sq_stats { struct mlx5e_stats { struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; + struct mlx5e_qcounter_stats qcnt; }; struct mlx5e_params { @@ -575,6 +585,7 @@ struct mlx5e_priv { struct net_device *netdev; struct mlx5e_stats stats; struct mlx5e_tstamp tstamp; + u16 q_counter; }; #define MLX5E_NET_IP_ALIGN 2 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 68834b715f6c..39c19021d154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -165,6 +165,8 @@ static const struct { }, }; +#define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) + static int mlx5e_get_sset_count(struct net_device *dev, int sset) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -172,6 +174,7 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + MLX5E_NUM_Q_CNTRS(priv) + priv->params.num_channels * NUM_RQ_STATS + priv->params.num_channels * priv->params.num_tc * NUM_SQ_STATS; @@ -200,6 +203,11 @@ static void mlx5e_get_strings(struct net_device *dev, strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_strings[i]); + /* Q counters */ + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + qcounter_stats_strings[i]); + /* PPORT counters */ for (i = 0; i < NUM_PPORT_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, @@ -240,6 +248,9 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, for (i = 0; i < NUM_VPORT_COUNTERS; i++) data[idx++] = ((u64 *)&priv->stats.vport)[i]; + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + data[idx++] = ((u32 *)&priv->stats.qcnt)[i]; + for (i = 0; i < NUM_PPORT_COUNTERS; i++) data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e0adb604f461..7fbe1ba86294 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -129,6 +129,17 @@ free_out: kvfree(out); } +static void mlx5e_update_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + + if (!priv->q_counter) + return; + + mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, + &qcnt->rx_out_of_buffer); +} + void mlx5e_update_stats(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -250,6 +261,8 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_csum_sw; mlx5e_update_pport_counters(priv); + mlx5e_update_q_counter(priv); + free_out: kvfree(out); } @@ -1055,6 +1068,7 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; @@ -2442,6 +2456,26 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, return err; } +static void mlx5e_create_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter); + if (err) { + mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err); + priv->q_counter = 0; + } +} + +static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) +{ + if (!priv->q_counter) + return; + + mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); +} + static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) { struct net_device *netdev; @@ -2527,13 +2561,15 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_destroy_tirs; } + mlx5e_create_q_counter(priv); + mlx5e_init_eth_addr(priv); mlx5e_vxlan_init(priv); err = mlx5e_tc_init(priv); if (err) - goto err_destroy_flow_tables; + goto err_dealloc_q_counters; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); @@ -2556,7 +2592,8 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) err_tc_cleanup: mlx5e_tc_cleanup(priv); -err_destroy_flow_tables: +err_dealloc_q_counters: + mlx5e_destroy_q_counter(priv); mlx5e_destroy_flow_tables(priv); err_destroy_tirs: @@ -2605,6 +2642,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) unregister_netdev(netdev); mlx5e_tc_cleanup(priv); mlx5e_vxlan_cleanup(priv); + mlx5e_destroy_q_counter(priv); mlx5e_destroy_flow_tables(priv); mlx5e_destroy_tirs(priv); mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); -- cgit v1.2.3-55-g7522 From d8c9660dac6287490ef450bc892593f05d364531 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:11 +0300 Subject: net/mlx5e: Use only close NUMA node for default RSS Distribute default RSS table uniformly over the rings of the close NUMA node, instead of all available channels. This way we enforce the preference of close rings over far ones. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c4ddbe8501a7..7f19644689f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -671,7 +671,8 @@ void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 39c19021d154..6f40ba448f07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -397,7 +397,7 @@ static int mlx5e_set_channels(struct net_device *dev, mlx5e_close_locked(dev); priv->params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (was_opened) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7fbe1ba86294..9b58ef6cab93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2297,11 +2297,22 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) } #endif -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels) { + int node = mdev->priv.numa_node; + int node_num_of_cores; int i; + if (node == -1) + node = first_online_node; + + node_num_of_cores = cpumask_weight(cpumask_of_node(node)); + + if (node_num_of_cores) + num_channels = min_t(int, num_channels, node_num_of_cores); + for (i = 0; i < len; i++) indirection_rqt[i] = i % num_channels; } @@ -2333,7 +2344,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, netdev_rss_key_fill(priv->params.toeplitz_hash_key, sizeof(priv->params.toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, num_channels); priv->params.lro_wqe_sz = -- cgit v1.2.3-55-g7522 From 2f48af128d9aa64dd4e8c6fe97491b0bde3681b2 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:12 +0300 Subject: net/mlx5e: Use function pointers for RX data path handling In preparation for Striding RQ feature, which will need its own RX handlers. This patch does not change any functionality. Signed-off-by: Tariq Toukan Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 33 ++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 74 ++++++++++++----------- 3 files changed, 62 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7f19644689f2..61e249d8d7f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -72,6 +72,17 @@ #define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_NUM_MAIN_GROUPS 9 +#define MLX5E_NET_IP_ALIGN 2 + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ @@ -357,6 +368,12 @@ struct mlx5e_cq { struct mlx5_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); +typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, + u16 ix); + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -368,6 +385,8 @@ struct mlx5e_rq { struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_alloc_wqe alloc_wqe; unsigned long state; int ix; @@ -588,18 +607,6 @@ struct mlx5e_priv { u16 q_counter; }; -#define MLX5E_NET_IP_ALIGN 2 - -struct mlx5e_tx_wqe { - struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; -}; - -struct mlx5e_rx_wqe { - struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data; -}; - enum mlx5e_link_mode { MLX5E_1000BASE_CX_SGMII = 0, MLX5E_1000BASE_KX = 1, @@ -642,7 +649,9 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_update_stats(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9b58ef6cab93..23ba12c3a738 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -357,6 +357,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, cpu_to_be32(byte_count | MLX5_HW_START_PADDING); } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = &priv->tstamp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 58d4e2f962c3..d7cccedddf34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,8 +42,7 @@ static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; } -static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct sk_buff *skb; dma_addr_t dma_addr; @@ -87,7 +86,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) while (!mlx5_wq_ll_is_full(wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, wq->head))) + if (unlikely(rq->alloc_wqe(rq, wqe, wq->head))) break; mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); @@ -229,50 +228,55 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; } +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + skb = rq->skb[wqe_counter]; + prefetch(skb->data); + rq->skb[wqe_counter] = NULL; + + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + dev_kfree_skb(skb); + goto wq_ll_pop; + } + + mlx5e_build_rx_skb(cqe, rq, skb); + rq->stats.packets++; + rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); int work_done; for (work_done = 0; work_done < budget; work_done++) { - struct mlx5e_rx_wqe *wqe; - struct mlx5_cqe64 *cqe; - struct sk_buff *skb; - __be16 wqe_counter_be; - u16 wqe_counter; + struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq); - cqe = mlx5e_get_cqe(cq); if (!cqe) break; mlx5_cqwq_pop(&cq->wq); - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; - - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { - rq->stats.wqe_err++; - dev_kfree_skb(skb); - goto wq_ll_pop; - } - - mlx5e_build_rx_skb(cqe, rq, skb); - rq->stats.packets++; - rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); - napi_gro_receive(cq->napi, skb); - -wq_ll_pop: - mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, - &wqe->next.next_wqe_index); + rq->handle_rx_cqe(rq, cqe); } mlx5_cqwq_update_db_record(&cq->wq); -- cgit v1.2.3-55-g7522 From 461017cb006aa1b39b0f647ae0ee2d9d84eef05b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:13 +0300 Subject: net/mlx5e: Support RX multi-packet WQE (Striding RQ) Introduce the feature of multi-packet WQE (RX Work Queue Element) referred to as (MPWQE or Striding RQ), in which WQEs are larger and serve multiple packets each. Every WQE consists of many strides of the same size, every received packet is aligned to a beginning of a stride and is written to consecutive strides within a WQE. In the regular approach, each regular WQE is big enough to be capable of serving one received packet of any size up to MTU or 64K in case of device LRO is enabled, making it very wasteful when dealing with small packets or device LRO is enabled. For its flexibility, MPWQE allows a better memory utilization (implying improvements in CPU utilization and packet rate) as packets consume strides according to their size, preserving the rest of the WQE to be available for other packets. MPWQE default configuration: Num of WQEs = 16 Strides Per WQE = 2048 Stride Size = 64 byte The default WQEs memory footprint went from 1024*mtu (~1.5MB) to 16 * 2048 * 64 = 2MB per ring. However, HW LRO can now be supported at no additional cost in memory footprint, and hence we turn it on by default and get an even better performance. Performance tested on ConnectX4-Lx 50G. To isolate the feature under test, the numbers below were measured with HW LRO turned off. We verified that the performance just improves when LRO is turned back on. * Netperf single TCP stream: - BW raised by 10-15% for representative packet sizes: default, 64B, 1024B, 1478B, 65536B. * Netperf multi TCP stream: - No degradation, line rate reached. * Pktgen: packet rate raised by 2-10% for traffic of different message sizes: 64B, 128B, 256B, 1024B, and 1500B. * Pktgen: packet loss in bursts of small messages (64byte), single stream: - | num packets | packets loss before | packets loss after | 2K | ~ 1K | 0 | 8K | ~ 6K | 0 | 16K | ~13K | 0 | 32K | ~28K | 0 | 64K | ~57K | ~24K As expected as the driver can receive as many small packets (<=64B) as the number of total strides in the ring (default = 2048 * 16) vs. 1024 (default ring size regardless of packets size) before this feature. Signed-off-by: Tariq Toukan Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 77 ++++++++++- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 15 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 109 +++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 153 +++++++++++++++++++-- include/linux/mlx5/device.h | 39 +++++- 5 files changed, 349 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 61e249d8d7f8..f519148d7dcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -57,12 +57,30 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 + +#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES) +#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE) +#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\ + MLX5_MPWRQ_LOG_STRIDE_SIZE) +#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ + MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) +#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ + MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) + #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) @@ -74,6 +92,38 @@ #define MLX5E_NUM_MAIN_GROUPS 9 #define MLX5E_NET_IP_ALIGN 2 +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +static inline int mlx5_min_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + } +} + +static inline int mlx5_max_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + } +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -128,6 +178,7 @@ static const char vport_strings[][ETH_GSTRING_LEN] = { "tx_queue_wake", "tx_queue_dropped", "rx_wqe_err", + "rx_mpwqe_filler", }; struct mlx5e_vport_stats { @@ -169,8 +220,9 @@ struct mlx5e_vport_stats { u64 tx_queue_wake; u64 tx_queue_dropped; u64 rx_wqe_err; + u64 rx_mpwqe_filler; -#define NUM_VPORT_COUNTERS 35 +#define NUM_VPORT_COUNTERS 36 }; static const char pport_strings[][ETH_GSTRING_LEN] = { @@ -263,7 +315,8 @@ static const char rq_stats_strings[][ETH_GSTRING_LEN] = { "csum_sw", "lro_packets", "lro_bytes", - "wqe_err" + "wqe_err", + "mpwqe_filler", }; struct mlx5e_rq_stats { @@ -274,7 +327,8 @@ struct mlx5e_rq_stats { u64 lro_packets; u64 lro_bytes; u64 wqe_err; -#define NUM_RQ_STATS 7 + u64 mpwqe_filler; +#define NUM_RQ_STATS 8 }; static const char sq_stats_strings[][ETH_GSTRING_LEN] = { @@ -318,6 +372,7 @@ struct mlx5e_stats { struct mlx5e_params { u8 log_sq_size; + u8 rq_wq_type; u8 log_rq_size; u16 num_channels; u8 num_tc; @@ -374,11 +429,23 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +struct mlx5e_dma_info { + struct page *page; + dma_addr_t addr; +}; + +struct mlx5e_mpw_info { + struct mlx5e_dma_info dma_info; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; u32 wqe_sz; struct sk_buff **skb; + struct mlx5e_mpw_info *wqe_info; struct device *pdev; struct net_device *netdev; @@ -393,6 +460,7 @@ struct mlx5e_rq { /* control */ struct mlx5_wq_ctrl wq_ctrl; + u8 wq_type; u32 rqn; struct mlx5e_channel *channel; struct mlx5e_priv *priv; @@ -649,9 +717,12 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_update_stats(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 6f40ba448f07..4077856aab76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -273,8 +273,9 @@ static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + int rq_wq_type = priv->params.rq_wq_type; - param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->params.log_rq_size; param->tx_pending = 1 << priv->params.log_sq_size; @@ -285,6 +286,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); bool was_opened; + int rq_wq_type = priv->params.rq_wq_type; u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; @@ -300,16 +302,16 @@ static int mlx5e_set_ringparam(struct net_device *dev, __func__); return -EINVAL; } - if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + 1 << mlx5_min_log_rq_size(rq_wq_type)); return -EINVAL; } - if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + 1 << mlx5_max_log_rq_size(rq_wq_type)); return -EINVAL; } if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { @@ -327,8 +329,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, log_rq_size = order_base_2(param->rx_pending); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = min_t(u16, param->rx_pending - 1, - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending); if (log_rq_size == priv->params.log_rq_size && log_sq_size == priv->params.log_sq_size && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 23ba12c3a738..871f3af204dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -175,6 +175,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_csum_none = 0; s->rx_csum_sw = 0; s->rx_wqe_err = 0; + s->rx_mpwqe_filler = 0; for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; @@ -185,6 +186,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_sw += rq_stats->csum_sw; s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler += rq_stats->mpwqe_filler; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -323,6 +325,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 byte_count; int wq_sz; int err; int i; @@ -337,28 +340,47 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe_info) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; + rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + + rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE; + byte_count = rq->wqe_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; + + rq->wqe_sz = (priv->params.lro_en) ? + priv->params.lro_wqe_sz : + MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; + byte_count |= MLX5_HW_START_PADDING; + } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); - u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = - cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + wqe->data.byte_count = cpu_to_be32(byte_count); } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; - rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = &priv->tstamp; @@ -376,7 +398,14 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - kfree(rq->skb); + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kfree(rq->wqe_info); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + kfree(rq->skb); + } + mlx5_wq_destroy(&rq->wq_ctrl); } @@ -1065,7 +1094,18 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + MLX5_SET(wq, wq, log_wqe_num_of_strides, + MLX5_MPWRQ_LOG_NUM_STRIDES - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, + MLX5_MPWRQ_LOG_STRIDE_SIZE - 6); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + } + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); @@ -1111,8 +1151,18 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; + u8 log_cq_size; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = priv->params.log_rq_size + + MLX5_MPWRQ_LOG_NUM_STRIDES; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + log_cq_size = priv->params.log_rq_size; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); mlx5e_build_common_cq_param(priv, param); } @@ -1983,7 +2033,8 @@ static int mlx5e_set_features(struct net_device *netdev, if (changes & NETIF_F_LRO) { bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) + if (was_opened && (priv->params.rq_wq_type == + MLX5_WQ_TYPE_LINKED_LIST)) mlx5e_close_locked(priv->netdev); priv->params.lro_en = !!(features & NETIF_F_LRO); @@ -1992,7 +2043,8 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", err); - if (was_opened) + if (was_opened && (priv->params.rq_wq_type == + MLX5_WQ_TYPE_LINKED_LIST)) err = mlx5e_open_locked(priv->netdev); } @@ -2327,8 +2379,21 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.log_rq_size = - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rq_wq_type = MLX5_CAP_GEN(mdev, striding_rq) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.lro_en = true; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); priv->params.rx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_pkts = @@ -2338,8 +2403,6 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.min_rx_wqes = - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d7cccedddf34..71f3a5d244ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -76,6 +76,41 @@ err_free_skb: return -ENOMEM; } +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + gfp_t gfp_mask; + int i; + + gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; + wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + MLX5_MPWRQ_WQE_PAGE_ORDER); + if (unlikely(!wi->dma_info.page)) + return -ENOMEM; + + wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, + rq->wqe_sz, PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { + put_page(wi->dma_info.page); + return -ENOMEM; + } + + /* We split the high-order page into order-0 ones and manage their + * reference counter to minimize the memory held by small skb fragments + */ + split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE, + &wi->dma_info.page[i]._count); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wqe->data.addr = cpu_to_be64(wi->dma_info.addr); + + return 0; +} + bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; @@ -100,7 +135,8 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !mlx5_wq_ll_is_full(wq); } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); @@ -111,7 +147,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); - u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETH_HLEN; + u16 tot_len = cqe_bcnt - ETH_HLEN; if (eth->h_proto == htons(ETH_P_IP)) { tcp = (struct tcphdr *)(skb->data + ETH_HLEN + @@ -191,19 +227,17 @@ csum_none: } static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; - skb_put(skb, cqe_bcnt); - lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe); + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; @@ -228,12 +262,24 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; } +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + napi_gro_receive(rq->cq.napi, skb); +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; u16 wqe_counter; + u32 cqe_bcnt; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); @@ -253,16 +299,103 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } - mlx5e_build_rx_skb(cqe, rq, skb); - rq->stats.packets++; - rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); - napi_gro_receive(rq->cq.napi, skb); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_put(skb, cqe_bcnt); + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); + struct sk_buff *skb; + u32 consumed_bytes; + u32 head_offset; + u32 frag_offset; + u32 wqe_offset; + u32 page_idx; + u16 byte_cnt; + u16 cqe_bcnt; + u16 headlen; + int i; + + wi->consumed_strides += cstrides; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + rq->stats.mpwqe_filler++; + goto mpwrq_cqe_out; + } + + skb = netdev_alloc_skb(rq->netdev, + ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, + sizeof(long))); + if (unlikely(!skb)) + goto mpwrq_cqe_out; + + prefetch(skb->data); + wqe_offset = stride_ix * MLX5_MPWRQ_STRIDE_SIZE; + consumed_bytes = cstrides * MLX5_MPWRQ_STRIDE_SIZE; + dma_sync_single_for_cpu(rq->pdev, wi->dma_info.addr + wqe_offset, + consumed_bytes, DMA_FROM_DEVICE); + + head_offset = wqe_offset & (PAGE_SIZE - 1); + page_idx = wqe_offset >> PAGE_SHIFT; + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); + frag_offset = head_offset + headlen; + + byte_cnt = cqe_bcnt - headlen; + while (byte_cnt) { + u32 pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + unsigned int truesize = + ALIGN(pg_consumed_bytes, MLX5_MPWRQ_STRIDE_SIZE); + + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + &wi->dma_info.page[page_idx], frag_offset, + pg_consumed_bytes, truesize); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + page_idx++; + } + + skb_copy_to_linear_data(skb, + page_address(wi->dma_info.page) + wqe_offset, + ALIGN(headlen, sizeof(long))); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES)) + return; + + dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, + PCI_DMA_FROMDEVICE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + &wi->dma_info.page[i]._count); + put_page(&wi->dma_info.page[i]); + } + mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8156e3c9239c..03f8d719b680 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -644,7 +644,8 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[4]; + u8 rsvd0[2]; + __be16 wqe_id; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; @@ -696,6 +697,42 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) return (u64)lo | ((u64)hi << 32); } +struct mpwrq_cqe_bc { + __be16 filler_consumed_strides; + __be16 byte_cnt; +}; + +static inline u16 mpwrq_get_cqe_byte_cnt(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return be16_to_cpu(bc->byte_cnt); +} + +static inline u16 mpwrq_get_cqe_bc_consumed_strides(struct mpwrq_cqe_bc *bc) +{ + return 0x7fff & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_consumed_strides(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return mpwrq_get_cqe_bc_consumed_strides(bc); +} + +static inline bool mpwrq_is_filler_cqe(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return 0x8000 & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_stride_index(struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->wqe_counter); +} + enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, -- cgit v1.2.3-55-g7522 From d3c9bc2743dc95b273ed0e6a3394a71ca314813c Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:14 +0300 Subject: net/mlx5e: Added ICO SQs Added ICO (Internal Control Operations) SQ per channel to be used for driver internal operations such as memory registration for fragmented memory and nop requests upon ifconfig up. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 135 ++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 55 +++++++++ 4 files changed, 174 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f519148d7dcc..a757fcf19332 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -488,6 +488,11 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; +struct mlx5e_ico_wqe_info { + u8 opcode; + u8 num_wqebbs; +}; + struct mlx5e_sq { /* data path */ @@ -529,6 +534,7 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; + struct mlx5e_ico_wqe_info *ico_wqe_info; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -545,6 +551,7 @@ struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_sq icosq; /* internal control operations */ struct napi_struct napi; struct device *pdev; struct net_device *netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 871f3af204dd..b25b429ebabb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -48,6 +48,7 @@ struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; u16 max_inline; + bool icosq; }; struct mlx5e_cq_param { @@ -59,8 +60,10 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; }; static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -502,6 +505,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { + struct mlx5e_sq *sq = &c->icosq; + u16 pi = sq->pc & sq->wq.sz_m1; int err; err = mlx5e_create_rq(c, param, rq); @@ -517,7 +522,10 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, goto err_disable_rq; set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); - mlx5e_send_nop(&c->sq[0], true); /* trigger mlx5e_post_rx_wqes() */ + + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -583,7 +591,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); - int txq_ix; int err; err = mlx5_alloc_map_uar(mdev, &sq->uar, true); @@ -611,8 +618,24 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - txq_ix = c->ix + tc * priv->params.num_channels; - sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + if (param->icosq) { + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * + wq_sz, + GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!sq->ico_wqe_info) { + err = -ENOMEM; + goto err_free_sq_db; + } + } else { + int txq_ix; + + txq_ix = c->ix + tc * priv->params.num_channels; + sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + priv->txq_to_sq_map[txq_ix] = sq; + } sq->pdev = c->pdev; sq->tstamp = &priv->tstamp; @@ -621,10 +644,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->tc = tc; sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; sq->bf_budget = MLX5E_SQ_BF_BUDGET; - priv->txq_to_sq_map[txq_ix] = sq; return 0; +err_free_sq_db: + mlx5e_free_sq_db(sq); + err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -639,6 +664,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; + kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -667,10 +693,10 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); - MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -745,9 +771,11 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_disable_sq; - set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - netdev_tx_reset_queue(sq->txq); - netif_tx_start_queue(sq->txq); + if (sq->txq) { + set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); + } return 0; @@ -768,15 +796,19 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */ - netif_tx_disable_queue(sq->txq); + if (sq->txq) { + clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + /* prevent netif_tx_wake_queue */ + napi_synchronize(&sq->channel->napi); + netif_tx_disable_queue(sq->txq); - /* ensure hw is notified of all pending wqes */ - if (mlx5e_sq_has_room_for(sq, 1)) - mlx5e_send_nop(sq, true); + /* ensure hw is notified of all pending wqes */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq, true); + + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + } - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); while (sq->cc != sq->pc) /* wait till sq is empty */ msleep(20); @@ -1030,10 +1062,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_tx_cqs(c, cparam); + err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0); if (err) goto err_napi_del; + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_close_icosq_cq; + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts); @@ -1042,10 +1078,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, napi_enable(&c->napi); - err = mlx5e_open_sqs(c, cparam); + err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); if (err) goto err_disable_napi; + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_close_icosq; + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; @@ -1058,6 +1098,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_close_sqs: mlx5e_close_sqs(c); +err_close_icosq: + mlx5e_close_sq(&c->icosq); + err_disable_napi: napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); @@ -1065,6 +1108,9 @@ err_disable_napi: err_close_tx_cqs: mlx5e_close_tx_cqs(c); +err_close_icosq_cq: + mlx5e_close_cq(&c->icosq.cq); + err_napi_del: netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1077,9 +1123,11 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); + mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1125,17 +1173,27 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); } -static void mlx5e_build_sq_param(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) +static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); +} + +static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + param->max_inline = priv->params.tx_max_inline; } @@ -1172,20 +1230,49 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); mlx5e_build_common_cq_param(priv, param); } +static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + u8 log_wq_size) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param, + u8 log_wq_size) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + + param->icosq = true; +} + static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { + u8 icosq_log_wq_sz = 0; + memset(cparam, 0, sizeof(*cparam)); mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz); } static int mlx5e_open_channels(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 1ffc7cb6f78c..a8d2935c50d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -54,6 +54,7 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) sq->skb[pi] = NULL; sq->pc++; + sq->stats.nop++; if (notify_hw) { cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; @@ -387,7 +388,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wi = &sq->wqe_info[ci]; if (unlikely(!skb)) { /* nop */ - sq->stats.nop++; sqcc++; continue; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bb4395aceeb..ad624cb6f147 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,6 +49,57 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) return cqe; } +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +{ + struct mlx5_wq_cyc *wq; + struct mlx5_cqe64 *cqe; + struct mlx5e_sq *sq; + u16 sqcc; + + cqe = mlx5e_get_cqe(cq); + if (likely(!cqe)) + return; + + sq = container_of(cq, struct mlx5e_sq, cq); + wq = &sq->wq; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + do { + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + + mlx5_cqwq_pop(&cq->wq); + sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + break; + } + + switch (icowi->opcode) { + case MLX5_OPCODE_NOP: + break; + default: + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); + } + + } while ((cqe = mlx5e_get_cqe(cq))); + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -64,6 +115,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + + mlx5e_poll_ico_cq(&c->icosq.cq); + busy |= mlx5e_post_rx_wqes(&c->rq); if (busy) @@ -80,6 +134,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&c->icosq.cq); return work_done; } -- cgit v1.2.3-55-g7522 From bc77b240b3c57236cdcc08d64ca390655d3a16ff Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:15 +0300 Subject: net/mlx5e: Add fragmented memory support for RX multi packet WQE If the allocation of a linear (physically continuous) MPWQE fails, we allocate a fragmented MPWQE. This is implemented via device's UMR (User Memory Registration) which allows to register multiple memory fragments into ConnectX hardware as a continuous buffer. UMR registration is an asynchronous operation and is done via ICO SQs. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 84 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 64 +++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 427 +++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 3 + 5 files changed, 514 insertions(+), 68 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a757fcf19332..c99fdff74c97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -72,6 +72,9 @@ #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \ + BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)) +#define MLX5_UMR_ALIGN (2048) #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) @@ -134,6 +137,13 @@ struct mlx5e_rx_wqe { struct mlx5_wqe_data_seg data; }; +struct mlx5e_umr_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + struct mlx5_wqe_data_seg data; +}; + #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ #define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ @@ -179,6 +189,7 @@ static const char vport_strings[][ETH_GSTRING_LEN] = { "tx_queue_dropped", "rx_wqe_err", "rx_mpwqe_filler", + "rx_mpwqe_frag", }; struct mlx5e_vport_stats { @@ -221,8 +232,9 @@ struct mlx5e_vport_stats { u64 tx_queue_dropped; u64 rx_wqe_err; u64 rx_mpwqe_filler; + u64 rx_mpwqe_frag; -#define NUM_VPORT_COUNTERS 36 +#define NUM_VPORT_COUNTERS 37 }; static const char pport_strings[][ETH_GSTRING_LEN] = { @@ -317,6 +329,7 @@ static const char rq_stats_strings[][ETH_GSTRING_LEN] = { "lro_bytes", "wqe_err", "mpwqe_filler", + "mpwqe_frag", }; struct mlx5e_rq_stats { @@ -328,7 +341,8 @@ struct mlx5e_rq_stats { u64 lro_bytes; u64 wqe_err; u64 mpwqe_filler; -#define NUM_RQ_STATS 8 + u64 mpwqe_frag; +#define NUM_RQ_STATS 9 }; static const char sq_stats_strings[][ETH_GSTRING_LEN] = { @@ -407,6 +421,7 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, + MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, }; struct mlx5e_cq { @@ -434,18 +449,14 @@ struct mlx5e_dma_info { dma_addr_t addr; }; -struct mlx5e_mpw_info { - struct mlx5e_dma_info dma_info; - u16 consumed_strides; - u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; -}; - struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; u32 wqe_sz; struct sk_buff **skb; struct mlx5e_mpw_info *wqe_info; + __be32 mkey_be; + __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; @@ -466,6 +477,36 @@ struct mlx5e_rq { struct mlx5e_priv *priv; } ____cacheline_aligned_in_smp; +struct mlx5e_umr_dma_info { + __be64 *mtt; + __be64 *mtt_no_align; + dma_addr_t mtt_addr; + struct mlx5e_dma_info *dma_info; +}; + +struct mlx5e_mpw_info { + union { + struct mlx5e_dma_info dma_info; + struct mlx5e_umr_dma_info umr; + }; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; + + void (*dma_pre_sync)(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len); + void (*add_skb_frag)(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, u32 len); + void (*copy_skb_header)(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen); + void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); +}; + struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; @@ -658,6 +699,7 @@ struct mlx5e_priv { u32 pdn; u32 tdn; struct mlx5_core_mkey mkey; + struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; @@ -730,6 +772,21 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); +void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_update_stats(struct mlx5e_priv *priv); @@ -763,7 +820,7 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, int num_channels); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5e_tx_wqe *wqe, int bf_sz) + struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; @@ -777,9 +834,9 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, */ wmb(); if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, &wqe->ctrl, bf_sz); + __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); else - mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); + mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); /* flush the write-combining mapped buffer */ wmb(); @@ -800,6 +857,11 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) MLX5E_MAX_NUM_CHANNELS); } +static inline int mlx5e_get_mtt_octw(int npages) +{ + return ALIGN(npages, 8) / 2; +} + extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b25b429ebabb..942829e6d8ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -179,6 +179,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_csum_sw = 0; s->rx_wqe_err = 0; s->rx_mpwqe_filler = 0; + s->rx_mpwqe_frag = 0; for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; @@ -190,6 +191,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_csum_sw += rq_stats->csum_sw; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; + s->rx_mpwqe_frag += rq_stats->mpwqe_frag; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -379,7 +381,6 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); - wqe->data.lkey = c->mkey_be; wqe->data.byte_count = cpu_to_be32(byte_count); } @@ -390,6 +391,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->mkey_be = c->mkey_be; + rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); return 0; @@ -1256,6 +1259,7 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); param->icosq = true; } @@ -1263,7 +1267,7 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { - u8 icosq_log_wq_sz = 0; + u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; memset(cparam, 0, sizeof(*cparam)); @@ -2458,6 +2462,13 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, indirection_rqt[i] = i % num_channels; } +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) @@ -2466,7 +2477,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.rq_wq_type = MLX5_CAP_GEN(mdev, striding_rq) ? + priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; @@ -2639,6 +2650,41 @@ static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); } +static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *mkc; + int inlen = sizeof(*in); + u64 npages = + mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + mkc = &in->seg; + mkc->status = MLX5_MKEY_STATUS_FREE; + mkc->flags = MLX5_PERM_UMR_EN | + MLX5_PERM_LOCAL_READ | + MLX5_PERM_LOCAL_WRITE | + MLX5_ACCESS_MODE_MTT; + + mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc->flags_pd = cpu_to_be32(priv->pdn); + mkc->len = cpu_to_be64(npages << PAGE_SHIFT); + mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); + mkc->log2_page_size = PAGE_SHIFT; + + err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, + NULL, NULL); + + kvfree(in); + + return err; +} + static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) { struct net_device *netdev; @@ -2688,10 +2734,16 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + err = mlx5e_create_umr_mkey(priv); + if (err) { + mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); + goto err_destroy_mkey; + } + err = mlx5e_create_tises(priv); if (err) { mlx5_core_warn(mdev, "create tises failed, %d\n", err); - goto err_destroy_mkey; + goto err_destroy_umr_mkey; } err = mlx5e_open_drop_rq(priv); @@ -2774,6 +2826,9 @@ err_close_drop_rq: err_destroy_tises: mlx5e_destroy_tises(priv); +err_destroy_umr_mkey: + mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); + err_destroy_mkey: mlx5_core_destroy_mkey(mdev, &priv->mkey); @@ -2812,6 +2867,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); + mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 71f3a5d244ff..d71919ccf912 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -65,6 +65,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) *((dma_addr_t *)skb->cb) = dma_addr; wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); + wqe->data.lkey = rq->mkey_be; rq->skb[ix] = skb; @@ -76,7 +77,295 @@ err_free_skb: return -ENOMEM; } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static inline void +mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, + len, DMA_FROM_DEVICE); +} + +static inline void +mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + /* No dma pre sync for fragmented MPWQE */ +} + +static inline void +mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE); + + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + &wi->dma_info.page[page_idx], frag_offset, + len, truesize); +} + +static inline void +mlx5e_add_skb_frag_fragmented_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE); + + dma_sync_single_for_cpu(pdev, + wi->umr.dma_info[page_idx].addr + frag_offset, + len, DMA_FROM_DEVICE); + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + wi->umr.dma_info[page_idx].page, frag_offset, + len, truesize); +} + +static inline void +mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + struct page *page = &wi->dma_info.page[page_idx]; + + skb_copy_to_linear_data(skb, page_address(page) + offset, + ALIGN(headlen, sizeof(long))); +} + +static inline void +mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; + unsigned int len; + + /* Aligning len to sizeof(long) optimizes memcpy performance */ + len = ALIGN(headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, 0, + page_address(dma_info->page) + offset, + len); +#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE) + if (unlikely(offset + headlen > PAGE_SIZE)) { + dma_info++; + headlen_pg = len; + len = ALIGN(headlen - headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, headlen_pg, + page_address(dma_info->page), + len); + } +#endif +} + +static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix) +{ + return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + +static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, + u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix); + + memset(wqe, 0, sizeof(*wqe)); + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->umr_mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_sq *sq = &rq->channel->icosq; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *wqe; + u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); + u16 pi; + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); + } + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + mlx5e_build_umr_wqe(rq, sq, wqe, ix); + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; + sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->pc += num_wqebbs; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + int i) +{ + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return -ENOMEM; + + wi->umr.dma_info[i].page = page; + wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { + put_page(page); + return -ENOMEM; + } + wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); + + return 0; +} + +static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT; + int i; + + wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * + MLX5_MPWRQ_PAGES_PER_WQE, + GFP_ATOMIC); + if (unlikely(!wi->umr.dma_info)) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, + GFP_ATOMIC); + if (unlikely(!wi->umr.mtt_no_align)) + goto err_free_umr; + + wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) + goto err_free_mtt; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + goto err_unmap; + atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE, + &wi->umr.dma_info[i].page->_count); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; + wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; + wqe->data.lkey = rq->umr_mkey_be; + wqe->data.addr = cpu_to_be64(dma_offset); + + return 0; + +err_unmap: + while (--i >= 0) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE, + &wi->umr.dma_info[i].page->_count); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + +err_free_mtt: + kfree(wi->umr.mtt_no_align); + +err_free_umr: + kfree(wi->umr.dma_info); + +err_out: + return -ENOMEM; +} + +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + &wi->umr.dma_info[i].page->_count); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + kfree(wi->umr.mtt_no_align); + kfree(wi->umr.dma_info); +} + +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + + clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + rq->stats.mpwqe_frag++; + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) { struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; gfp_t gfp_mask; @@ -106,16 +395,56 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) } wi->consumed_strides = 0; - wqe->data.addr = cpu_to_be64(wi->dma_info.addr); + wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; + wi->free_wqe = mlx5e_free_rx_linear_mpwqe; + wqe->data.lkey = rq->mkey_be; + wqe->data.addr = cpu_to_be64(wi->dma_info.addr); + + return 0; +} + +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int i; + + dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, + PCI_DMA_FROMDEVICE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + &wi->dma_info.page[i]._count); + put_page(&wi->dma_info.page[i]); + } +} + +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + int err; + + err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); + if (unlikely(err)) { + err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; + } return 0; } +#define RQ_CANNOT_POST(rq) \ + (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \ + test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; - if (unlikely(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state))) + if (unlikely(RQ_CANNOT_POST(rq))) return false; while (!mlx5_wq_ll_is_full(wq)) { @@ -309,23 +638,56 @@ wq_ll_pop: &wqe->next.next_wqe_index); } +static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_mpw_info *wi, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + u32 consumed_bytes = ALIGN(cqe_bcnt, MLX5_MPWRQ_STRIDE_SIZE); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix * MLX5_MPWRQ_STRIDE_SIZE; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_page_idx = page_idx; + u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); + u32 frag_offset = head_offset + headlen; + u16 byte_cnt = cqe_bcnt - headlen; + +#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE) + if (unlikely(frag_offset >= PAGE_SIZE)) { + page_idx++; + frag_offset -= PAGE_SIZE; + } +#endif + wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); + + while (byte_cnt) { + u32 pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + + wi->add_skb_frag(rq->pdev, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + page_idx++; + } + /* copy header */ + wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, + headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; +} + void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); - u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); struct sk_buff *skb; - u32 consumed_bytes; - u32 head_offset; - u32 frag_offset; - u32 wqe_offset; - u32 page_idx; - u16 byte_cnt; u16 cqe_bcnt; - u16 headlen; - int i; wi->consumed_strides += cstrides; @@ -346,53 +708,16 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto mpwrq_cqe_out; prefetch(skb->data); - wqe_offset = stride_ix * MLX5_MPWRQ_STRIDE_SIZE; - consumed_bytes = cstrides * MLX5_MPWRQ_STRIDE_SIZE; - dma_sync_single_for_cpu(rq->pdev, wi->dma_info.addr + wqe_offset, - consumed_bytes, DMA_FROM_DEVICE); - - head_offset = wqe_offset & (PAGE_SIZE - 1); - page_idx = wqe_offset >> PAGE_SHIFT; cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); - headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); - frag_offset = head_offset + headlen; - - byte_cnt = cqe_bcnt - headlen; - while (byte_cnt) { - u32 pg_consumed_bytes = - min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - unsigned int truesize = - ALIGN(pg_consumed_bytes, MLX5_MPWRQ_STRIDE_SIZE); - - wi->skbs_frags[page_idx]++; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - &wi->dma_info.page[page_idx], frag_offset, - pg_consumed_bytes, truesize); - byte_cnt -= pg_consumed_bytes; - frag_offset = 0; - page_idx++; - } - - skb_copy_to_linear_data(skb, - page_address(wi->dma_info.page) + wqe_offset, - ALIGN(headlen, sizeof(long))); - /* skb linear part was allocated with headlen and aligned to long */ - skb->tail += headlen; - skb->len += headlen; + mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); mpwrq_cqe_out: if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES)) return; - dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, - PCI_DMA_FROMDEVICE); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], - &wi->dma_info.page[i]._count); - put_page(&wi->dma_info.page[i]); - } + wi->free_wqe(rq, wi); mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index a8d2935c50d0..229ab16fb8d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -58,7 +58,7 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) if (notify_hw) { cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, 0); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } } @@ -310,7 +310,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) bf_sz = wi->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, bf_sz); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); } /* fill sq edge with nops to avoid wqe wrap around */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index ad624cb6f147..a3fd0f55ce2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -84,6 +84,9 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) switch (icowi->opcode) { case MLX5_OPCODE_NOP: break; + case MLX5_OPCODE_UMR: + mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + break; default: WARN_ONCE(true, "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", -- cgit v1.2.3-55-g7522 From c5adb96f6c4a22aceff2e8220612c5b9239ffeb2 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:16 +0300 Subject: net/mlx5e: Use napi_alloc_skb for RX SKB allocations Instead of netdev_alloc_skb, we use the napi_alloc_skb function which is designated to allocate skbuff's for RX in a channel-specific NAPI instance, and implies the IP packet alignment. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 +++++------- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c99fdff74c97..303e6cdf9fcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -93,7 +93,6 @@ #define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_NUM_MAIN_GROUPS 9 -#define MLX5E_NET_IP_ALIGN 2 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 942829e6d8ba..9b17bc064cc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -373,8 +373,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); - byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); + byte_count = rq->wqe_sz; byte_count |= MLX5_HW_START_PADDING; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d71919ccf912..5bdcc0b69f76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -47,7 +47,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) struct sk_buff *skb; dma_addr_t dma_addr; - skb = netdev_alloc_skb(rq->netdev, rq->wqe_sz); + skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); if (unlikely(!skb)) return -ENOMEM; @@ -61,10 +61,8 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) goto err_free_skb; - skb_reserve(skb, MLX5E_NET_IP_ALIGN); - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); + wqe->data.addr = cpu_to_be64(dma_addr); wqe->data.lkey = rq->mkey_be; rq->skb[ix] = skb; @@ -701,9 +699,9 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto mpwrq_cqe_out; } - skb = netdev_alloc_skb(rq->netdev, - ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, - sizeof(long))); + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, + sizeof(long))); if (unlikely(!skb)) goto mpwrq_cqe_out; -- cgit v1.2.3-55-g7522 From 1bfec31627bf9b351b93b8cef4520b90f48ca276 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:17 +0300 Subject: net/mlx5e: Remove redundant barrier The bit-op operation one line before is an explicit barrier by itself. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index a3fd0f55ce2e..c38781fa567d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -147,7 +147,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); - barrier(); napi_schedule(cq->napi); } -- cgit v1.2.3-55-g7522 From e20a0db30454a07f03f3a34a79e9f35881cfaa9d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 20 Apr 2016 22:02:18 +0300 Subject: net/mlx5e: Delay skb->data access Move mlx5e_handle_csum and eth_type_trans to the end of mlx5e_build_rx_skb to gain some more time before accessing skb->data, to reduce cache misses. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5bdcc0b69f76..ee5fa16aafd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -573,10 +573,6 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (unlikely(mlx5e_rx_hw_stamp(tstamp))) mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); - mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); - - skb->protocol = eth_type_trans(skb, netdev); - skb_record_rx_queue(skb, rq->ix); if (likely(netdev->features & NETIF_F_RXHASH)) @@ -587,6 +583,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, be16_to_cpu(cqe->vlan_info)); skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; + + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + skb->protocol = eth_type_trans(skb, netdev); } static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, -- cgit v1.2.3-55-g7522 From 54984407564ef6b35488f52654f828c17b9d6fa8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:19 +0300 Subject: net/mlx5e: Add ethtool counter for RX buffer allocation failures Counts the number of RX buffer allocation failures and shows it in ethtool statistics. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 ++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 11 +++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 303e6cdf9fcd..6e24e821a1d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -189,6 +189,7 @@ static const char vport_strings[][ETH_GSTRING_LEN] = { "rx_wqe_err", "rx_mpwqe_filler", "rx_mpwqe_frag", + "rx_buff_alloc_err", }; struct mlx5e_vport_stats { @@ -232,8 +233,9 @@ struct mlx5e_vport_stats { u64 rx_wqe_err; u64 rx_mpwqe_filler; u64 rx_mpwqe_frag; + u64 rx_buff_alloc_err; -#define NUM_VPORT_COUNTERS 37 +#define NUM_VPORT_COUNTERS 38 }; static const char pport_strings[][ETH_GSTRING_LEN] = { @@ -329,6 +331,7 @@ static const char rq_stats_strings[][ETH_GSTRING_LEN] = { "wqe_err", "mpwqe_filler", "mpwqe_frag", + "buff_alloc_err", }; struct mlx5e_rq_stats { @@ -341,7 +344,8 @@ struct mlx5e_rq_stats { u64 wqe_err; u64 mpwqe_filler; u64 mpwqe_frag; -#define NUM_RQ_STATS 9 + u64 buff_alloc_err; +#define NUM_RQ_STATS 10 }; static const char sq_stats_strings[][ETH_GSTRING_LEN] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9b17bc064cc8..d485d1e4e100 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -180,6 +180,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_wqe_err = 0; s->rx_mpwqe_filler = 0; s->rx_mpwqe_frag = 0; + s->rx_buff_alloc_err = 0; for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; @@ -192,6 +193,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_mpwqe_frag += rq_stats->mpwqe_frag; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ee5fa16aafd1..918b7c7fd74f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -447,9 +447,14 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) while (!mlx5_wq_ll_is_full(wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + int err; - if (unlikely(rq->alloc_wqe(rq, wqe, wq->head))) + err = rq->alloc_wqe(rq, wqe, wq->head); + if (unlikely(err)) { + if (err != -EBUSY) + rq->stats.buff_alloc_err++; break; + } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); } @@ -701,8 +706,10 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb = napi_alloc_skb(rq->cq.napi, ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, sizeof(long))); - if (unlikely(!skb)) + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; goto mpwrq_cqe_out; + } prefetch(skb->data); cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); -- cgit v1.2.3-55-g7522