summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en.h
diff options
context:
space:
mode:
authorTariq Toukan2018-02-07 13:41:25 +0100
committerSaeed Mahameed2018-03-31 01:54:49 +0200
commit619a8f2a42f1031cdbd74435b6a9191eb4913139 (patch)
tree93c6fc80525c54e20e666d7d76516f834f85abb2 /drivers/net/ethernet/mellanox/mlx5/core/en.h
parentnet/mlx5e: Use inline MTTs in UMR WQEs (diff)
downloadkernel-qcow2-linux-619a8f2a42f1031cdbd74435b6a9191eb4913139.tar.gz
kernel-qcow2-linux-619a8f2a42f1031cdbd74435b6a9191eb4913139.tar.xz
kernel-qcow2-linux-619a8f2a42f1031cdbd74435b6a9191eb4913139.zip
net/mlx5e: Use linear SKB in Striding RQ
Current Striding RQ HW feature utilizes the RX buffers so that there is no wasted room between the strides. This maximises the memory utilization. This prevents the use of build_skb() (which requires headroom and tailroom), and demands to memcpy the packets headers into the skb linear part. In this patch, whenever a set of conditions holds, we apply an RQ configuration that allows combining the use of linear SKB on top of a Striding RQ. To use build_skb() with Striding RQ, the following must hold: 1. packet does not cross a page boundary. 2. there is enough headroom and tailroom surrounding the packet. We can satisfy 1 and 2 by configuring: stride size = MTU + headroom + tailoom. This is possible only when: a. (MTU - headroom - tailoom) does not exceed PAGE_SIZE. b. HW LRO is turned off. Using linear SKB has many advantages: - Saves a memcpy of the headers. - No page-boundary checks in datapath. - No filler CQEs. - Significantly smaller CQ. - SKB data continuously resides in linear part, and not split to small amount (linear part) and large amount (fragment). This saves datapath cycles in driver and improves utilization of SKB fragments in GRO. - The fragments of a resulting GRO SKB follow the IP forwarding assumption of equal-size fragments. Some implementation details: HW writes the packets to the beginning of a stride, i.e. does not keep headroom. To overcome this we make sure we can extend backwards and use the last bytes of stride i-1. Extra care is needed for stride 0 as it has no preceding stride. We make sure headroom bytes are available by shifting the buffer pointer passed to HW by headroom bytes. This configuration now becomes default, whenever capable. Of course, this implies turning LRO off. Performance testing: ConnectX-5, single core, single RX ring, default MTU. UDP packet rate, early drop in TC layer: -------------------------------------------- | pkt size | before | after | ratio | -------------------------------------------- | 1500byte | 4.65 Mpps | 5.96 Mpps | 1.28x | | 500byte | 5.23 Mpps | 5.97 Mpps | 1.14x | | 64byte | 5.94 Mpps | 5.96 Mpps | 1.00x | -------------------------------------------- TCP streams: ~20% gain Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en.h')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c1d3a29388bd..d26dd4bc89f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -473,6 +473,9 @@ struct mlx5e_page_cache {
struct mlx5e_rq;
typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
@@ -491,6 +494,7 @@ struct mlx5e_rq {
} wqe;
struct {
struct mlx5e_mpw_info *info;
+ mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
u16 num_strides;
u8 log_stride_sz;
bool umr_in_progress;
@@ -834,6 +838,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
void mlx5e_update_stats(struct mlx5e_priv *priv);