summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
diff options
context:
space:
mode:
authorBjörn Töpel2018-10-02 10:00:34 +0200
committerJeff Kirsher2018-10-03 21:54:37 +0200
commit8221c5eba8c13831d80860f6e0016d5c075695fb (patch)
treeabe571968df17b957dea89892084f44594a4967e /drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
parentixgbe: move common Tx functions to ixgbe_txrx_common.h (diff)
downloadkernel-qcow2-linux-8221c5eba8c13831d80860f6e0016d5c075695fb.tar.gz
kernel-qcow2-linux-8221c5eba8c13831d80860f6e0016d5c075695fb.tar.xz
kernel-qcow2-linux-8221c5eba8c13831d80860f6e0016d5c075695fb.zip
ixgbe: add AF_XDP zero-copy Tx support
This patch adds zero-copy Tx support for AF_XDP sockets. It implements the ndo_xsk_async_xmit netdev ndo and performs all the Tx logic from a NAPI context. This means pulling egress packets from the Tx ring, placing the frames on the NIC HW descriptor ring and completing sent frames back to the application via the completion ring. The regular XDP Tx ring is used for AF_XDP as well. This rationale for this is as follows: XDP_REDIRECT guarantees mutual exclusion between different NAPI contexts based on CPU id. In other words, a netdev can XDP_REDIRECT to another netdev with a different NAPI context, since the operation is bound to a specific core and each core has its own hardware ring. As the AF_XDP Tx action is running in the same NAPI context and using the same ring, it will also be protected from XDP_REDIRECT actions with the exact same mechanism. As with AF_XDP Rx, all AF_XDP Tx specific functions are added to ixgbe_xsk.c. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Tested-by: William Tu <u9012063@gmail.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c')
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c175
1 files changed, 175 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index e876ff120758..65c3e2c979d4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -624,3 +624,178 @@ void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
}
}
}
+
+static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
+{
+ union ixgbe_adv_tx_desc *tx_desc = NULL;
+ struct ixgbe_tx_buffer *tx_bi;
+ bool work_done = true;
+ u32 len, cmd_type;
+ dma_addr_t dma;
+
+ while (budget-- > 0) {
+ if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
+ work_done = false;
+ break;
+ }
+
+ if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len))
+ break;
+
+ dma_sync_single_for_device(xdp_ring->dev, dma, len,
+ DMA_BIDIRECTIONAL);
+
+ tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
+ tx_bi->bytecount = len;
+ tx_bi->xdpf = NULL;
+
+ tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ /* put descriptor type bits */
+ cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+ IXGBE_ADVTXD_DCMD_DEXT |
+ IXGBE_ADVTXD_DCMD_IFCS;
+ cmd_type |= len | IXGBE_TXD_CMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ tx_desc->read.olinfo_status =
+ cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+ xdp_ring->next_to_use++;
+ if (xdp_ring->next_to_use == xdp_ring->count)
+ xdp_ring->next_to_use = 0;
+ }
+
+ if (tx_desc) {
+ ixgbe_xdp_ring_update_tail(xdp_ring);
+ xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ }
+
+ return !!budget && work_done;
+}
+
+static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring,
+ struct ixgbe_tx_buffer *tx_bi)
+{
+ xdp_return_frame(tx_bi->xdpf);
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_bi, dma),
+ dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_bi, len, 0);
+}
+
+bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
+ struct ixgbe_ring *tx_ring, int napi_budget)
+{
+ unsigned int total_packets = 0, total_bytes = 0;
+ u32 i = tx_ring->next_to_clean, xsk_frames = 0;
+ unsigned int budget = q_vector->tx.work_limit;
+ struct xdp_umem *umem = tx_ring->xsk_umem;
+ union ixgbe_adv_tx_desc *tx_desc;
+ struct ixgbe_tx_buffer *tx_bi;
+ bool xmit_done;
+
+ tx_bi = &tx_ring->tx_buffer_info[i];
+ tx_desc = IXGBE_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ do {
+ if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+ break;
+
+ total_bytes += tx_bi->bytecount;
+ total_packets += tx_bi->gso_segs;
+
+ if (tx_bi->xdpf)
+ ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
+ else
+ xsk_frames++;
+
+ tx_bi->xdpf = NULL;
+ total_bytes += tx_bi->bytecount;
+
+ tx_bi++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_bi = tx_ring->tx_buffer_info;
+ tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+ }
+
+ /* issue prefetch for next Tx descriptor */
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ budget--;
+ } while (likely(budget));
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->stats.bytes += total_bytes;
+ tx_ring->stats.packets += total_packets;
+ u64_stats_update_end(&tx_ring->syncp);
+ q_vector->tx.total_bytes += total_bytes;
+ q_vector->tx.total_packets += total_packets;
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(umem, xsk_frames);
+
+ xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
+ return budget > 0 && xmit_done;
+}
+
+int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_ring *ring;
+
+ if (test_bit(__IXGBE_DOWN, &adapter->state))
+ return -ENETDOWN;
+
+ if (!READ_ONCE(adapter->xdp_prog))
+ return -ENXIO;
+
+ if (qid >= adapter->num_xdp_queues)
+ return -ENXIO;
+
+ if (!adapter->xsk_umems || !adapter->xsk_umems[qid])
+ return -ENXIO;
+
+ ring = adapter->xdp_ring[qid];
+ if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+ u64 eics = BIT_ULL(ring->q_vector->v_idx);
+
+ ixgbe_irq_rearm_queues(adapter, eics);
+ }
+
+ return 0;
+}
+
+void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
+{
+ u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+ struct xdp_umem *umem = tx_ring->xsk_umem;
+ struct ixgbe_tx_buffer *tx_bi;
+ u32 xsk_frames = 0;
+
+ while (ntc != ntu) {
+ tx_bi = &tx_ring->tx_buffer_info[ntc];
+
+ if (tx_bi->xdpf)
+ ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
+ else
+ xsk_frames++;
+
+ tx_bi->xdpf = NULL;
+
+ ntc++;
+ if (ntc == tx_ring->count)
+ ntc = 0;
+ }
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(umem, xsk_frames);
+}