summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
diff options
context:
space:
mode:
authorTariq Toukan2017-12-20 10:56:35 +0100
committerSaeed Mahameed2018-03-31 01:55:07 +0200
commitb8a98a4cf3221d8140969e3f5bde09206a6cb623 (patch)
tree4ab167a2152d139d86ed2e28cc334df6cd2f1816 /drivers/net/ethernet/mellanox/mlx5/core/en_main.c
parentnet/mlx5e: Remove page_ref bulking in Striding RQ (diff)
downloadkernel-qcow2-linux-b8a98a4cf3221d8140969e3f5bde09206a6cb623.tar.gz
kernel-qcow2-linux-b8a98a4cf3221d8140969e3f5bde09206a6cb623.tar.xz
kernel-qcow2-linux-b8a98a4cf3221d8140969e3f5bde09206a6cb623.zip
net/mlx5e: Keep single pre-initialized UMR WQE per RQ
All UMR WQEs of an RQ share many common fields. We use pre-initialized structures to save calculations in datapath. One field (xlt_offset) was the only reason we saved a pre-initialized copy per WQE index. Here we remove its initialization (move its calculation to datapath), and reduce the number of copies to one-per-RQ. A very small datapath calculation is added, it occurs once per a MPWQE (i.e. once every 256KB), but reduces memory consumption and gives better cache utilization. Performance testing: Tested packet rate, no degradation sensed. Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_main.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c20
1 files changed, 8 insertions, 12 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b03a2327356a..0339609cfa56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -306,13 +306,11 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
struct mlx5e_icosq *sq,
- struct mlx5e_umr_wqe *wqe,
- u16 ix)
+ struct mlx5e_umr_wqe *wqe)
{
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
- u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
ds_cnt);
@@ -322,8 +320,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
ucseg->xlt_octowords =
cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
- ucseg->bsf_octowords =
- cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
}
@@ -331,18 +327,13 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
struct mlx5e_channel *c)
{
int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
- int i;
rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
GFP_KERNEL, cpu_to_node(c->cpu));
if (!rq->mpwqe.info)
return -ENOMEM;
- for (i = 0; i < wq_sz; i++) {
- struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
-
- mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i);
- }
+ mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
return 0;
}
@@ -388,6 +379,11 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
}
+static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
+{
+ return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
+}
+
static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_rq_param *rqp,
@@ -520,7 +516,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT;
+ u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
wqe->data.addr = cpu_to_be64(dma_offset + rq->buff.headroom);
}