diff options
author | Tariq Toukan | 2016-09-15 15:08:38 +0200 |
---|---|---|
committer | David S. Miller | 2016-09-17 15:51:40 +0200 |
commit | 4415a0319f92ea0d624fe11c917faf9114f89187 (patch) | |
tree | 8933cfb00c8a32cab623dc6215ff637a60d2e723 /drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | |
parent | net/mlx5e: Introduce API for RX mapped pages (diff) | |
download | kernel-qcow2-linux-4415a0319f92ea0d624fe11c917faf9114f89187.tar.gz kernel-qcow2-linux-4415a0319f92ea0d624fe11c917faf9114f89187.tar.xz kernel-qcow2-linux-4415a0319f92ea0d624fe11c917faf9114f89187.zip |
net/mlx5e: Implement RX mapped page cache for page recycle
Instead of reallocating and mapping pages for RX data-path,
recycle already used pages in a per ring cache.
Performance tests:
The following results were measured on a freshly booted system,
giving optimal baseline performance, as high-order pages are yet to
be fragmented and depleted.
We ran pktgen single-stream benchmarks, with iptables-raw-drop:
Single stride, 64 bytes:
* 4,739,057 - baseline
* 4,749,550 - order0 no cache
* 4,786,899 - order0 with cache
1% gain
Larger packets, no page cross, 1024 bytes:
* 3,982,361 - baseline
* 3,845,682 - order0 no cache
* 4,127,852 - order0 with cache
3.7% gain
Larger packets, every 3rd packet crosses a page, 1500 bytes:
* 3,731,189 - baseline
* 3,579,414 - order0 no cache
* 3,931,708 - order0 with cache
5.4% gain
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rx.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 57 |
1 files changed, 52 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0c34daa04c43..dc8677933f76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -305,11 +305,55 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_FROM_DEVICE); + return true; +} + static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - struct page *page = dev_alloc_page(); + struct page *page; + + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + page = dev_alloc_page(); if (unlikely(!page)) return -ENOMEM; @@ -324,9 +368,12 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } -static inline void mlx5e_page_release(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) { + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; + dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); put_page(dma_info->page); } @@ -362,7 +409,7 @@ err_unmap: struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; page_ref_sub(dma_info->page, pg_strides); - mlx5e_page_release(rq, dma_info); + mlx5e_page_release(rq, dma_info, true); } return err; @@ -377,7 +424,7 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); - mlx5e_page_release(rq, dma_info); + mlx5e_page_release(rq, dma_info, true); } } |