summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
diff options
context:
space:
mode:
authorEric Dumazet2011-11-14 07:05:34 +0100
committerDavid S. Miller2011-11-14 20:13:30 +0100
commite52fcb2462ac484e6dd6e68869536609f0216938 (patch)
tree794407ff242481d5d09e3e669cabfe129d80f6df /drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
parentnet: introduce build_skb() (diff)
downloadkernel-qcow2-linux-e52fcb2462ac484e6dd6e68869536609f0216938.tar.gz
kernel-qcow2-linux-e52fcb2462ac484e6dd6e68869536609f0216938.tar.xz
kernel-qcow2-linux-e52fcb2462ac484e6dd6e68869536609f0216938.zip
bnx2x: uses build_skb() in receive path
bnx2x uses following formula to compute its rx_buf_sz : dev->mtu + 2*L1_CACHE_BYTES + 14 + 8 + 8 + 2 Then core network adds NET_SKB_PAD and SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) Final allocated size for skb head on x86_64 (L1_CACHE_BYTES = 64, MTU=1500) : 2112 bytes : SLUB/SLAB round this to 4096 bytes. Since skb truesize is then bigger than SK_MEM_QUANTUM, we have lot of false sharing because of mem_reclaim in UDP stack. One possible way to half truesize is to reduce the need by 64 bytes (2112 -> 2048 bytes) Instead of allocating a full cache line at the end of packet for alignment, we can use the fact that skb_shared_info sits at the end of skb->head, and we can use this room, if we convert bnx2x to new build_skb() infrastructure. skb_shared_info will be initialized after hardware finished its transfert, so we can eventually overwrite the final padding. Using build_skb() also reduces cache line misses in the driver, since we use cache hot skb instead of cold ones. Number of in-flight sk_buff structures is lower, they are recycled while still hot. Performance results : (820.000 pps on a rx UDP monothread benchmark, instead of 720.000 pps) Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Eilon Greenstein <eilong@broadcom.com> CC: Ben Hutchings <bhutchings@solarflare.com> CC: Tom Herbert <therbert@google.com> CC: Jamal Hadi Salim <hadi@mojatatu.com> CC: Stephen Hemminger <shemminger@vyatta.com> CC: Thomas Graf <tgraf@infradead.org> CC: Herbert Xu <herbert@gondor.apana.org.au> CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Acked-by: Eilon Greenstein <eilong@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h')
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h33
1 files changed, 17 insertions, 16 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 260226d09916..41eb17e7720f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -910,26 +910,27 @@ static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
return 0;
}
-static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
- struct bnx2x_fastpath *fp, u16 index)
+static inline int bnx2x_alloc_rx_data(struct bnx2x *bp,
+ struct bnx2x_fastpath *fp, u16 index)
{
- struct sk_buff *skb;
+ u8 *data;
struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[index];
struct eth_rx_bd *rx_bd = &fp->rx_desc_ring[index];
dma_addr_t mapping;
- skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
- if (unlikely(skb == NULL))
+ data = kmalloc(fp->rx_buf_size + NET_SKB_PAD, GFP_ATOMIC);
+ if (unlikely(data == NULL))
return -ENOMEM;
- mapping = dma_map_single(&bp->pdev->dev, skb->data, fp->rx_buf_size,
+ mapping = dma_map_single(&bp->pdev->dev, data + NET_SKB_PAD,
+ fp->rx_buf_size,
DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
- dev_kfree_skb_any(skb);
+ kfree(data);
return -ENOMEM;
}
- rx_buf->skb = skb;
+ rx_buf->data = data;
dma_unmap_addr_set(rx_buf, mapping, mapping);
rx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
@@ -938,12 +939,12 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
return 0;
}
-/* note that we are not allocating a new skb,
+/* note that we are not allocating a new buffer,
* we are just moving one from cons to prod
* we are not creating a new mapping,
* so there is no need to check for dma_mapping_error().
*/
-static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
+static inline void bnx2x_reuse_rx_data(struct bnx2x_fastpath *fp,
u16 cons, u16 prod)
{
struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
@@ -953,7 +954,7 @@ static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
dma_unmap_addr_set(prod_rx_buf, mapping,
dma_unmap_addr(cons_rx_buf, mapping));
- prod_rx_buf->skb = cons_rx_buf->skb;
+ prod_rx_buf->data = cons_rx_buf->data;
*prod_bd = *cons_bd;
}
@@ -1029,9 +1030,9 @@ static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
for (i = 0; i < last; i++) {
struct bnx2x_agg_info *tpa_info = &fp->tpa_info[i];
struct sw_rx_bd *first_buf = &tpa_info->first_buf;
- struct sk_buff *skb = first_buf->skb;
+ u8 *data = first_buf->data;
- if (skb == NULL) {
+ if (data == NULL) {
DP(NETIF_MSG_IFDOWN, "tpa bin %d empty on free\n", i);
continue;
}
@@ -1039,8 +1040,8 @@ static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
dma_unmap_single(&bp->pdev->dev,
dma_unmap_addr(first_buf, mapping),
fp->rx_buf_size, DMA_FROM_DEVICE);
- dev_kfree_skb(skb);
- first_buf->skb = NULL;
+ kfree(data);
+ first_buf->data = NULL;
}
}
@@ -1148,7 +1149,7 @@ static inline int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp,
* fp->eth_q_stats.rx_skb_alloc_failed = 0
*/
for (i = 0; i < rx_ring_size; i++) {
- if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) {
+ if (bnx2x_alloc_rx_data(bp, fp, ring_prod) < 0) {
fp->eth_q_stats.rx_skb_alloc_failed++;
continue;
}