From 939de57d30344ce728b0de61be87984e75af420e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 5 Nov 2018 16:05:37 -0600 Subject: net/mlx5e: Use CQE padding for Ethernet CQs Writing 64B CQEs to 128B cache lines results in a RMW operation. Padding the CQEs to 128B if possible improves performance on 128B cache line systems like PPC. Testing on PPC showed up to a 24% improvement in small packet throughput vs the default behavior, depending on the workload and system topology. Signed-off-by: Daniel Jurgens Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/wq.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/wq.h | 7 ++++++- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 88116a4750b0..2188e5ba908f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2224,6 +2224,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, void *cqc = param->cqc; MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); + if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 2dcbf1ebfd6a..953cc8efba69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl) { - u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6; + /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */ + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7; u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 9bc2184a46bc..ea934a48c90a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -179,7 +179,12 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { - return mlx5_frag_buf_get_wqe(&wq->fbc, ix); + struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix); + + /* For 128B CQEs the data is in the last 64B */ + cqe += wq->fbc.log_stride == 7; + + return cqe; } static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) -- cgit v1.2.3-55-g7522