summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
diff options
context:
space:
mode:
authorTariq Toukan2018-03-21 15:31:08 +0100
committerSaeed Mahameed2018-05-25 23:11:00 +0200
commit043dc78ecf07f3fc5b87270518d7f322aea2f748 (patch)
tree8f239559368d60085731b7da57b8256caa0ffe55 /drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
parentnet/mlx5e: Use WQ API functions instead of direct fields access (diff)
downloadkernel-qcow2-linux-043dc78ecf07f3fc5b87270518d7f322aea2f748.tar.gz
kernel-qcow2-linux-043dc78ecf07f3fc5b87270518d7f322aea2f748.tar.xz
kernel-qcow2-linux-043dc78ecf07f3fc5b87270518d7f322aea2f748.zip
net/mlx5e: TX, Use actual WQE size for SQ edge fill
We fill SQ edge with NOPs to avoid WQEs wrap. Here, instead of doing that in advance for the maximum possible WQE size, we do it on-demand using the actual WQE size. We re-order some parts in mlx5e_sq_xmit to finish the calculation of WQE size (ds_cnt) before doing any writes to the WQE buffer. When SQ work queue is fragmented (introduced in an downstream patch), dealing with WQE wraps becomes more frequent. This change would drastically reduce the overhead in this case. Performance tests: ConnectX-5 100Gbps, CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Packet rate of 64B packets, single transmit ring, size 8K. Before: 14.9 Mpps After: 15.8 Mpps Improvement of 6%. Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c27
1 files changed, 22 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 7fd3ec877ba4..f4d2c8886492 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -383,6 +383,22 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
}
+static inline void mlx5e_fill_icosq_edge(struct mlx5e_icosq *sq,
+ struct mlx5_wq_cyc *wq,
+ u16 pi)
+{
+ struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
+ u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
+
+ edge_wi = wi + nnops;
+
+ /* fill sq edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->opcode = MLX5_OPCODE_NOP;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+}
+
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -391,14 +407,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
- int err;
u16 pi;
+ int err;
int i;
- /* fill sq edge with nops to avoid wqe wrap around */
- while ((pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc)) > sq->edge) {
- sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ if (unlikely(pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_size(wq))) {
+ mlx5e_fill_icosq_edge(sq, wq, pi);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);