summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
diff options
context:
space:
mode:
authorTariq Toukan2016-04-20 21:02:13 +0200
committerDavid S. Miller2016-04-21 21:09:05 +0200
commit461017cb006aa1b39b0f647ae0ee2d9d84eef05b (patch)
tree74774f39bddef03f341b0f999277769fb7a8aad7 /drivers/net/ethernet/mellanox/mlx5/core/en_main.c
parentnet/mlx5e: Use function pointers for RX data path handling (diff)
downloadkernel-qcow2-linux-461017cb006aa1b39b0f647ae0ee2d9d84eef05b.tar.gz
kernel-qcow2-linux-461017cb006aa1b39b0f647ae0ee2d9d84eef05b.tar.xz
kernel-qcow2-linux-461017cb006aa1b39b0f647ae0ee2d9d84eef05b.zip
net/mlx5e: Support RX multi-packet WQE (Striding RQ)
Introduce the feature of multi-packet WQE (RX Work Queue Element) referred to as (MPWQE or Striding RQ), in which WQEs are larger and serve multiple packets each. Every WQE consists of many strides of the same size, every received packet is aligned to a beginning of a stride and is written to consecutive strides within a WQE. In the regular approach, each regular WQE is big enough to be capable of serving one received packet of any size up to MTU or 64K in case of device LRO is enabled, making it very wasteful when dealing with small packets or device LRO is enabled. For its flexibility, MPWQE allows a better memory utilization (implying improvements in CPU utilization and packet rate) as packets consume strides according to their size, preserving the rest of the WQE to be available for other packets. MPWQE default configuration: Num of WQEs = 16 Strides Per WQE = 2048 Stride Size = 64 byte The default WQEs memory footprint went from 1024*mtu (~1.5MB) to 16 * 2048 * 64 = 2MB per ring. However, HW LRO can now be supported at no additional cost in memory footprint, and hence we turn it on by default and get an even better performance. Performance tested on ConnectX4-Lx 50G. To isolate the feature under test, the numbers below were measured with HW LRO turned off. We verified that the performance just improves when LRO is turned back on. * Netperf single TCP stream: - BW raised by 10-15% for representative packet sizes: default, 64B, 1024B, 1478B, 65536B. * Netperf multi TCP stream: - No degradation, line rate reached. * Pktgen: packet rate raised by 2-10% for traffic of different message sizes: 64B, 128B, 256B, 1024B, and 1500B. * Pktgen: packet loss in bursts of small messages (64byte), single stream: - | num packets | packets loss before | packets loss after | 2K | ~ 1K | 0 | 8K | ~ 6K | 0 | 16K | ~13K | 0 | 32K | ~28K | 0 | 64K | ~57K | ~24K As expected as the driver can receive as many small packets (<=64B) as the number of total strides in the ring (default = 2048 * 16) vs. 1024 (default ring size regardless of packets size) before this feature. Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Achiad Shochat <achiad@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_main.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c109
1 files changed, 86 insertions, 23 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 23ba12c3a738..871f3af204dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -175,6 +175,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
s->rx_csum_none = 0;
s->rx_csum_sw = 0;
s->rx_wqe_err = 0;
+ s->rx_mpwqe_filler = 0;
for (i = 0; i < priv->params.num_channels; i++) {
rq_stats = &priv->channel[i]->rq.stats;
@@ -185,6 +186,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_sw += rq_stats->csum_sw;
s->rx_wqe_err += rq_stats->wqe_err;
+ s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
for (j = 0; j < priv->params.num_tc; j++) {
sq_stats = &priv->channel[i]->sq[j].stats;
@@ -323,6 +325,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = priv->mdev;
void *rqc = param->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ u32 byte_count;
int wq_sz;
int err;
int i;
@@ -337,28 +340,47 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
- rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
- cpu_to_node(c->cpu));
- if (!rq->skb) {
- err = -ENOMEM;
- goto err_rq_wq_destroy;
- }
- rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz :
- MLX5E_SW2HW_MTU(priv->netdev->mtu);
- rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN);
+ switch (priv->params.rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->wqe_info) {
+ err = -ENOMEM;
+ goto err_rq_wq_destroy;
+ }
+ rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
+ rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+
+ rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
+ byte_count = rq->wqe_sz;
+ break;
+ default: /* MLX5_WQ_TYPE_LINKED_LIST */
+ rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
+ cpu_to_node(c->cpu));
+ if (!rq->skb) {
+ err = -ENOMEM;
+ goto err_rq_wq_destroy;
+ }
+ rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
+ rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+
+ rq->wqe_sz = (priv->params.lro_en) ?
+ priv->params.lro_wqe_sz :
+ MLX5E_SW2HW_MTU(priv->netdev->mtu);
+ rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN);
+ byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
+ byte_count |= MLX5_HW_START_PADDING;
+ }
for (i = 0; i < wq_sz; i++) {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
- u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
wqe->data.lkey = c->mkey_be;
- wqe->data.byte_count =
- cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
+ wqe->data.byte_count = cpu_to_be32(byte_count);
}
- rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
- rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+ rq->wq_type = priv->params.rq_wq_type;
rq->pdev = c->pdev;
rq->netdev = c->netdev;
rq->tstamp = &priv->tstamp;
@@ -376,7 +398,14 @@ err_rq_wq_destroy:
static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
{
- kfree(rq->skb);
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ kfree(rq->wqe_info);
+ break;
+ default: /* MLX5_WQ_TYPE_LINKED_LIST */
+ kfree(rq->skb);
+ }
+
mlx5_wq_destroy(&rq->wq_ctrl);
}
@@ -1065,7 +1094,18 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
- MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+ switch (priv->params.rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
+ break;
+ default: /* MLX5_WQ_TYPE_LINKED_LIST */
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+ }
+
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
@@ -1111,8 +1151,18 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
+ u8 log_cq_size;
- MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
+ switch (priv->params.rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ log_cq_size = priv->params.log_rq_size +
+ MLX5_MPWRQ_LOG_NUM_STRIDES;
+ break;
+ default: /* MLX5_WQ_TYPE_LINKED_LIST */
+ log_cq_size = priv->params.log_rq_size;
+ }
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
mlx5e_build_common_cq_param(priv, param);
}
@@ -1983,7 +2033,8 @@ static int mlx5e_set_features(struct net_device *netdev,
if (changes & NETIF_F_LRO) {
bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
- if (was_opened)
+ if (was_opened && (priv->params.rq_wq_type ==
+ MLX5_WQ_TYPE_LINKED_LIST))
mlx5e_close_locked(priv->netdev);
priv->params.lro_en = !!(features & NETIF_F_LRO);
@@ -1992,7 +2043,8 @@ static int mlx5e_set_features(struct net_device *netdev,
mlx5_core_warn(priv->mdev, "lro modify failed, %d\n",
err);
- if (was_opened)
+ if (was_opened && (priv->params.rq_wq_type ==
+ MLX5_WQ_TYPE_LINKED_LIST))
err = mlx5e_open_locked(priv->netdev);
}
@@ -2327,8 +2379,21 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
priv->params.log_sq_size =
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
- priv->params.log_rq_size =
- MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+ priv->params.rq_wq_type = MLX5_CAP_GEN(mdev, striding_rq) ?
+ MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+ MLX5_WQ_TYPE_LINKED_LIST;
+
+ switch (priv->params.rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+ priv->params.lro_en = true;
+ break;
+ default: /* MLX5_WQ_TYPE_LINKED_LIST */
+ priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+ }
+
+ priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
+ BIT(priv->params.log_rq_size));
priv->params.rx_cq_moderation_usec =
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
priv->params.rx_cq_moderation_pkts =
@@ -2338,8 +2403,6 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
priv->params.tx_cq_moderation_pkts =
MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
- priv->params.min_rx_wqes =
- MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
priv->params.num_tc = 1;
priv->params.rss_hfunc = ETH_RSS_HASH_XOR;