diff options
author | Michael Brown | 2018-01-08 14:21:40 +0100 |
---|---|---|
committer | Michael Brown | 2018-01-13 00:46:02 +0100 |
commit | 2fb70e8b32244e25ca3f43518b580fe0a54536d2 (patch) | |
tree | 3857b94e0a729b6d52290c12d5036b792b2a6f47 /src/drivers/net | |
parent | [build] Avoid use of "ld --oformat binary" (diff) | |
download | ipxe-2fb70e8b32244e25ca3f43518b580fe0a54536d2.tar.gz ipxe-2fb70e8b32244e25ca3f43518b580fe0a54536d2.tar.xz ipxe-2fb70e8b32244e25ca3f43518b580fe0a54536d2.zip |
[ena] Add driver for Amazon ENA virtual function NIC
Signed-off-by: Michael Brown <mcb30@ipxe.org>
Diffstat (limited to 'src/drivers/net')
-rw-r--r-- | src/drivers/net/ena.c | 1039 | ||||
-rw-r--r-- | src/drivers/net/ena.h | 588 |
2 files changed, 1627 insertions, 0 deletions
diff --git a/src/drivers/net/ena.c b/src/drivers/net/ena.c new file mode 100644 index 00000000..b6d8bc6f --- /dev/null +++ b/src/drivers/net/ena.c @@ -0,0 +1,1039 @@ +/* + * Copyright (C) 2018 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * You can also choose to distribute this program under the terms of + * the Unmodified Binary Distribution Licence (as given in the file + * COPYING.UBDL), provided that you have satisfied its requirements. + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); + +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <byteswap.h> +#include <ipxe/netdevice.h> +#include <ipxe/ethernet.h> +#include <ipxe/if_ether.h> +#include <ipxe/iobuf.h> +#include <ipxe/malloc.h> +#include <ipxe/pci.h> +#include "ena.h" + +/** @file + * + * Amazon ENA network driver + * + */ + +/** + * Get direction name (for debugging) + * + * @v direction Direction + * @ret name Direction name + */ +static const char * ena_direction ( unsigned int direction ) { + + switch ( direction ) { + case ENA_SQ_TX: return "TX"; + case ENA_SQ_RX: return "RX"; + default: return "<UNKNOWN>"; + } +} + +/****************************************************************************** + * + * Device reset + * + ****************************************************************************** + */ + +/** + * Reset hardware + * + * @v ena ENA device + * @ret rc Return status code + */ +static int ena_reset ( struct ena_nic *ena ) { + uint32_t stat; + unsigned int i; + + /* Trigger reset */ + writel ( ENA_CTRL_RESET, ( ena->regs + ENA_CTRL ) ); + + /* Wait for reset to complete */ + for ( i = 0 ; i < ENA_RESET_MAX_WAIT_MS ; i++ ) { + + /* Check if device is ready */ + stat = readl ( ena->regs + ENA_STAT ); + if ( stat & ENA_STAT_READY ) + return 0; + + /* Delay */ + mdelay ( 1 ); + } + + DBGC ( ena, "ENA %p timed out waiting for reset (status %#08x)\n", + ena, stat ); + return -ETIMEDOUT; +} + +/****************************************************************************** + * + * Admin queue + * + ****************************************************************************** + */ + +/** + * Set queue base address + * + * @v ena ENA device + * @v offset Register offset + * @v address Base address + */ +static inline void ena_set_base ( struct ena_nic *ena, unsigned int offset, + void *base ) { + physaddr_t phys = virt_to_bus ( base ); + + /* Program base address registers */ + writel ( ( phys & 0xffffffffUL ), + ( ena->regs + offset + ENA_BASE_LO ) ); + if ( sizeof ( phys ) > sizeof ( uint32_t ) ) { + writel ( ( ( ( uint64_t ) phys ) >> 32 ), + ( ena->regs + offset + ENA_BASE_HI ) ); + } else { + writel ( 0, ( ena->regs + offset + ENA_BASE_HI ) ); + } +} + +/** + * Set queue capabilities + * + * @v ena ENA device + * @v offset Register offset + * @v count Number of entries + * @v size Size of each entry + */ +static inline __attribute__ (( always_inline )) void +ena_set_caps ( struct ena_nic *ena, unsigned int offset, unsigned int count, + size_t size ) { + + /* Program capabilities register */ + writel ( ENA_CAPS ( count, size ), ( ena->regs + offset ) ); +} + +/** + * Clear queue capabilities + * + * @v ena ENA device + * @v offset Register offset + */ +static inline __attribute__ (( always_inline )) void +ena_clear_caps ( struct ena_nic *ena, unsigned int offset ) { + + /* Clear capabilities register */ + writel ( 0, ( ena->regs + offset ) ); +} + +/** + * Create admin queues + * + * @v ena ENA device + * @ret rc Return status code + */ +static int ena_create_admin ( struct ena_nic *ena ) { + size_t aq_len = ( ENA_AQ_COUNT * sizeof ( ena->aq.req[0] ) ); + size_t acq_len = ( ENA_ACQ_COUNT * sizeof ( ena->acq.rsp[0] ) ); + int rc; + + /* Allocate admin completion queue */ + ena->acq.rsp = malloc_dma ( acq_len, acq_len ); + if ( ! ena->acq.rsp ) { + rc = -ENOMEM; + goto err_alloc_acq; + } + memset ( ena->acq.rsp, 0, acq_len ); + + /* Allocate admin queue */ + ena->aq.req = malloc_dma ( aq_len, aq_len ); + if ( ! ena->aq.req ) { + rc = -ENOMEM; + goto err_alloc_aq; + } + memset ( ena->aq.req, 0, aq_len ); + + /* Program queue addresses and capabilities */ + ena_set_base ( ena, ENA_ACQ_BASE, ena->acq.rsp ); + ena_set_caps ( ena, ENA_ACQ_CAPS, ENA_ACQ_COUNT, + sizeof ( ena->acq.rsp[0] ) ); + ena_set_base ( ena, ENA_AQ_BASE, ena->aq.req ); + ena_set_caps ( ena, ENA_AQ_CAPS, ENA_AQ_COUNT, + sizeof ( ena->aq.req[0] ) ); + + DBGC ( ena, "ENA %p AQ [%08lx,%08lx) ACQ [%08lx,%08lx)\n", + ena, virt_to_phys ( ena->aq.req ), + ( virt_to_phys ( ena->aq.req ) + aq_len ), + virt_to_phys ( ena->acq.rsp ), + ( virt_to_phys ( ena->acq.rsp ) + acq_len ) ); + return 0; + + ena_clear_caps ( ena, ENA_AQ_CAPS ); + ena_clear_caps ( ena, ENA_ACQ_CAPS ); + free_dma ( ena->aq.req, aq_len ); + err_alloc_aq: + free_dma ( ena->acq.rsp, acq_len ); + err_alloc_acq: + return rc; +} + +/** + * Destroy admin queues + * + * @v ena ENA device + */ +static void ena_destroy_admin ( struct ena_nic *ena ) { + size_t aq_len = ( ENA_AQ_COUNT * sizeof ( ena->aq.req[0] ) ); + size_t acq_len = ( ENA_ACQ_COUNT * sizeof ( ena->acq.rsp[0] ) ); + + /* Clear queue capabilities */ + ena_clear_caps ( ena, ENA_AQ_CAPS ); + ena_clear_caps ( ena, ENA_ACQ_CAPS ); + wmb(); + + /* Free queues */ + free_dma ( ena->aq.req, aq_len ); + free_dma ( ena->acq.rsp, acq_len ); + DBGC ( ena, "ENA %p AQ and ACQ destroyed\n", ena ); +} + +/** + * Get next available admin queue request + * + * @v ena ENA device + * @ret req Admin queue request + */ +static union ena_aq_req * ena_admin_req ( struct ena_nic *ena ) { + union ena_aq_req *req; + unsigned int index; + + /* Get next request */ + index = ( ena->aq.prod % ENA_AQ_COUNT ); + req = &ena->aq.req[index]; + + /* Initialise request */ + memset ( ( ( ( void * ) req ) + sizeof ( req->header ) ), 0, + ( sizeof ( *req ) - sizeof ( req->header ) ) ); + req->header.id = ena->aq.prod; + + /* Increment producer counter */ + ena->aq.prod++; + + return req; +} + +/** + * Issue admin queue request + * + * @v ena ENA device + * @v req Admin queue request + * @v rsp Admin queue response to fill in + * @ret rc Return status code + */ +static int ena_admin ( struct ena_nic *ena, union ena_aq_req *req, + union ena_acq_rsp **rsp ) { + unsigned int index; + unsigned int i; + int rc; + + /* Locate response */ + index = ( ena->acq.cons % ENA_ACQ_COUNT ); + *rsp = &ena->acq.rsp[index]; + + /* Mark request as ready */ + req->header.flags ^= ENA_AQ_PHASE; + wmb(); + DBGC2 ( ena, "ENA %p admin request %#x:\n", + ena, le16_to_cpu ( req->header.id ) ); + DBGC2_HDA ( ena, virt_to_phys ( req ), req, sizeof ( *req ) ); + + /* Ring doorbell */ + writel ( ena->aq.prod, ( ena->regs + ENA_AQ_DB ) ); + + /* Wait for response */ + for ( i = 0 ; i < ENA_ADMIN_MAX_WAIT_MS ; i++ ) { + + /* Check for response */ + if ( ( (*rsp)->header.flags ^ ena->acq.phase ) & ENA_ACQ_PHASE){ + mdelay ( 1 ); + continue; + } + DBGC2 ( ena, "ENA %p admin response %#x:\n", + ena, le16_to_cpu ( (*rsp)->header.id ) ); + DBGC2_HDA ( ena, virt_to_phys ( *rsp ), *rsp, sizeof ( **rsp )); + + /* Increment consumer counter */ + ena->acq.cons++; + if ( ( ena->acq.cons % ENA_ACQ_COUNT ) == 0 ) + ena->acq.phase ^= ENA_ACQ_PHASE; + + /* Check command identifier */ + if ( (*rsp)->header.id != req->header.id ) { + DBGC ( ena, "ENA %p admin response %#x mismatch:\n", + ena, le16_to_cpu ( (*rsp)->header.id ) ); + rc = -EILSEQ; + goto err; + } + + /* Check status */ + if ( (*rsp)->header.status != 0 ) { + DBGC ( ena, "ENA %p admin response %#x status %d:\n", + ena, le16_to_cpu ( (*rsp)->header.id ), + (*rsp)->header.status ); + rc = -EIO; + goto err; + } + + /* Success */ + return 0; + } + + rc = -ETIMEDOUT; + DBGC ( ena, "ENA %p timed out waiting for admin request %#x:\n", + ena, le16_to_cpu ( req->header.id ) ); + err: + DBGC_HDA ( ena, virt_to_phys ( req ), req, sizeof ( *req ) ); + DBGC_HDA ( ena, virt_to_phys ( *rsp ), *rsp, sizeof ( **rsp ) ); + return rc; +} + +/** + * Create submission queue + * + * @v ena ENA device + * @v sq Submission queue + * @v cq Corresponding completion queue + * @ret rc Return status code + */ +static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq, + struct ena_cq *cq ) { + union ena_aq_req *req; + union ena_acq_rsp *rsp; + int rc; + + /* Allocate submission queue entries */ + sq->sqe.raw = malloc_dma ( sq->len, ENA_ALIGN ); + if ( ! sq->sqe.raw ) { + rc = -ENOMEM; + goto err_alloc; + } + memset ( sq->sqe.raw, 0, sq->len ); + + /* Construct request */ + req = ena_admin_req ( ena ); + req->header.opcode = ENA_CREATE_SQ; + req->create_sq.direction = sq->direction; + req->create_sq.policy = cpu_to_le16 ( ENA_SQ_HOST_MEMORY | + ENA_SQ_CONTIGUOUS ); + req->create_sq.cq_id = cpu_to_le16 ( cq->id ); + req->create_sq.count = cpu_to_le16 ( sq->count ); + req->create_sq.address = cpu_to_le64 ( virt_to_bus ( sq->sqe.raw ) ); + + /* Issue request */ + if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) + goto err_admin; + + /* Parse response */ + sq->id = le16_to_cpu ( rsp->create_sq.id ); + sq->doorbell = le32_to_cpu ( rsp->create_sq.doorbell ); + + /* Reset producer counter and phase */ + sq->prod = 0; + sq->phase = ENA_SQE_PHASE; + + DBGC ( ena, "ENA %p %s SQ%d at [%08lx,%08lx) db +%04x CQ%d\n", + ena, ena_direction ( sq->direction ), sq->id, + virt_to_phys ( sq->sqe.raw ), + ( virt_to_phys ( sq->sqe.raw ) + sq->len ), + sq->doorbell, cq->id ); + return 0; + + err_admin: + free_dma ( sq->sqe.raw, sq->len ); + err_alloc: + return rc; +} + +/** + * Destroy submission queue + * + * @v ena ENA device + * @v sq Submission queue + * @ret rc Return status code + */ +static int ena_destroy_sq ( struct ena_nic *ena, struct ena_sq *sq ) { + union ena_aq_req *req; + union ena_acq_rsp *rsp; + int rc; + + /* Construct request */ + req = ena_admin_req ( ena ); + req->header.opcode = ENA_DESTROY_SQ; + req->destroy_sq.id = cpu_to_le16 ( sq->id ); + req->destroy_sq.direction = sq->direction; + + /* Issue request */ + if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) + return rc; + + /* Free submission queue entries */ + free_dma ( sq->sqe.raw, sq->len ); + + DBGC ( ena, "ENA %p %s SQ%d destroyed\n", + ena, ena_direction ( sq->direction ), sq->id ); + return 0; +} + +/** + * Create completion queue + * + * @v ena ENA device + * @v cq Completion queue + * @ret rc Return status code + */ +static int ena_create_cq ( struct ena_nic *ena, struct ena_cq *cq ) { + union ena_aq_req *req; + union ena_acq_rsp *rsp; + int rc; + + /* Allocate completion queue entries */ + cq->cqe.raw = malloc_dma ( cq->len, ENA_ALIGN ); + if ( ! cq->cqe.raw ) { + rc = -ENOMEM; + goto err_alloc; + } + memset ( cq->cqe.raw, 0, cq->len ); + + /* Construct request */ + req = ena_admin_req ( ena ); + req->header.opcode = ENA_CREATE_CQ; + req->create_cq.size = cq->size; + req->create_cq.count = cpu_to_le16 ( cq->requested ); + req->create_cq.address = cpu_to_le64 ( virt_to_bus ( cq->cqe.raw ) ); + + /* Issue request */ + if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) + goto err_admin; + + /* Parse response */ + cq->id = le16_to_cpu ( rsp->create_cq.id ); + cq->actual = le16_to_cpu ( rsp->create_cq.count ); + cq->doorbell = le32_to_cpu ( rsp->create_cq.doorbell ); + cq->mask = ( cq->actual - 1 ); + if ( cq->actual != cq->requested ) { + DBGC ( ena, "ENA %p CQ%d requested %d actual %d\n", + ena, cq->id, cq->requested, cq->actual ); + } + + /* Reset consumer counter and phase */ + cq->cons = 0; + cq->phase = ENA_CQE_PHASE; + + DBGC ( ena, "ENA %p CQ%d at [%08lx,%08lx) db +%04x\n", + ena, cq->id, virt_to_phys ( cq->cqe.raw ), + ( virt_to_phys ( cq->cqe.raw ) + cq->len ), cq->doorbell ); + return 0; + + err_admin: + free_dma ( cq->cqe.raw, cq->len ); + err_alloc: + return rc; +} + +/** + * Destroy completion queue + * + * @v ena ENA device + * @v cq Completion queue + * @ret rc Return status code + */ +static int ena_destroy_cq ( struct ena_nic *ena, struct ena_cq *cq ) { + union ena_aq_req *req; + union ena_acq_rsp *rsp; + int rc; + + /* Construct request */ + req = ena_admin_req ( ena ); + req->header.opcode = ENA_DESTROY_CQ; + req->destroy_cq.id = cpu_to_le16 ( cq->id ); + + /* Issue request */ + if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) + return rc; + + /* Free completion queue entries */ + free_dma ( cq->cqe.raw, cq->len ); + + DBGC ( ena, "ENA %p CQ%d destroyed\n", ena, cq->id ); + return 0; +} + +/** + * Create queue pair + * + * @v ena ENA device + * @v qp Queue pair + * @ret rc Return status code + */ +static int ena_create_qp ( struct ena_nic *ena, struct ena_qp *qp ) { + int rc; + + /* Create completion queue */ + if ( ( rc = ena_create_cq ( ena, &qp->cq ) ) != 0 ) + goto err_create_cq; + + /* Create submission queue */ + if ( ( rc = ena_create_sq ( ena, &qp->sq, &qp->cq ) ) != 0 ) + goto err_create_sq; + + return 0; + + ena_destroy_sq ( ena, &qp->sq ); + err_create_sq: + ena_destroy_cq ( ena, &qp->cq ); + err_create_cq: + return rc; +} + +/** + * Destroy queue pair + * + * @v ena ENA device + * @v qp Queue pair + * @ret rc Return status code + */ +static int ena_destroy_qp ( struct ena_nic *ena, struct ena_qp *qp ) { + + /* Destroy submission queue */ + ena_destroy_sq ( ena, &qp->sq ); + + /* Destroy completion queue */ + ena_destroy_cq ( ena, &qp->cq ); + + return 0; +} + +/** + * Get feature + * + * @v ena ENA device + * @v id Feature identifier + * @v feature Feature to fill in + * @ret rc Return status code + */ +static int ena_get_feature ( struct ena_nic *ena, unsigned int id, + union ena_feature **feature ) { + union ena_aq_req *req; + union ena_acq_rsp *rsp; + int rc; + + /* Construct request */ + req = ena_admin_req ( ena ); + req->header.opcode = ENA_GET_FEATURE; + req->get_feature.id = id; + + /* Issue request */ + if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) + return rc; + + /* Parse response */ + *feature = &rsp->get_feature.feature; + + return 0; +} + +/** + * Get device attributes + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ena_get_device_attributes ( struct net_device *netdev ) { + struct ena_nic *ena = netdev->priv; + union ena_feature *feature; + int rc; + + /* Get device attributes */ + if ( ( rc = ena_get_feature ( ena, ENA_DEVICE_ATTRIBUTES, + &feature ) ) != 0 ) + return rc; + + /* Extract MAC address */ + memcpy ( netdev->hw_addr, feature->device.mac, ETH_ALEN ); + + /* Extract MTU */ + netdev->max_pkt_len = le32_to_cpu ( feature->device.mtu ); + + DBGC ( ena, "ENA %p MAC %s MTU %zd\n", + ena, eth_ntoa ( netdev->hw_addr ), netdev->max_pkt_len ); + return 0; +} + +/** + * Get statistics (for debugging) + * + * @v ena ENA device + * @ret rc Return status code + */ +static int ena_get_stats ( struct ena_nic *ena ) { + union ena_aq_req *req; + union ena_acq_rsp *rsp; + struct ena_get_stats_rsp *stats; + int rc; + + /* Do nothing unless debug messages are enabled */ + if ( ! DBG_LOG ) + return 0; + + /* Construct request */ + req = ena_admin_req ( ena ); + req->header.opcode = ENA_GET_STATS; + req->get_stats.type = ENA_STATS_TYPE_BASIC; + req->get_stats.scope = ENA_STATS_SCOPE_ETH; + req->get_stats.device = ENA_DEVICE_MINE; + + /* Issue request */ + if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) + return rc; + + /* Parse response */ + stats = &rsp->get_stats; + DBGC ( ena, "ENA %p TX bytes %#llx packets %#llx\n", ena, + ( ( unsigned long long ) le64_to_cpu ( stats->tx_bytes ) ), + ( ( unsigned long long ) le64_to_cpu ( stats->tx_packets ) ) ); + DBGC ( ena, "ENA %p RX bytes %#llx packets %#llx drops %#llx\n", ena, + ( ( unsigned long long ) le64_to_cpu ( stats->rx_bytes ) ), + ( ( unsigned long long ) le64_to_cpu ( stats->rx_packets ) ), + ( ( unsigned long long ) le64_to_cpu ( stats->rx_drops ) ) ); + + return 0; +} + +/****************************************************************************** + * + * Network device interface + * + ****************************************************************************** + */ + +/** + * Refill receive queue + * + * @v netdev Network device + */ +static void ena_refill_rx ( struct net_device *netdev ) { + struct ena_nic *ena = netdev->priv; + struct io_buffer *iobuf; + struct ena_rx_sqe *sqe; + unsigned int index; + physaddr_t address; + size_t len = netdev->max_pkt_len; + unsigned int refilled = 0; + + /* Refill queue */ + while ( ( ena->rx.sq.prod - ena->rx.cq.cons ) < ENA_RX_COUNT ) { + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( len ); + if ( ! iobuf ) { + /* Wait for next refill */ + break; + } + + /* Get next submission queue entry */ + index = ( ena->rx.sq.prod % ENA_RX_COUNT ); + sqe = &ena->rx.sq.sqe.rx[index]; + + /* Construct submission queue entry */ + address = virt_to_bus ( iobuf->data ); + sqe->len = cpu_to_le16 ( len ); + sqe->id = cpu_to_le16 ( ena->rx.sq.prod ); + sqe->address = cpu_to_le64 ( address ); + wmb(); + sqe->flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL | + ena->rx.sq.phase ); + + /* Increment producer counter */ + ena->rx.sq.prod++; + if ( ( ena->rx.sq.prod % ENA_RX_COUNT ) == 0 ) + ena->rx.sq.phase ^= ENA_SQE_PHASE; + + /* Record I/O buffer */ + assert ( ena->rx_iobuf[index] == NULL ); + ena->rx_iobuf[index] = iobuf; + + DBGC2 ( ena, "ENA %p RX %d at [%08llx,%08llx)\n", ena, sqe->id, + ( ( unsigned long long ) address ), + ( ( unsigned long long ) address + len ) ); + refilled++; + } + + /* Ring doorbell, if applicable */ + if ( refilled ) { + wmb(); + writel ( ena->rx.sq.prod, ( ena->regs + ena->rx.sq.doorbell ) ); + } +} + +/** + * Discard unused receive I/O buffers + * + * @v ena ENA device + */ +static void ena_empty_rx ( struct ena_nic *ena ) { + unsigned int i; + + for ( i = 0 ; i < ENA_RX_COUNT ; i++ ) { + if ( ena->rx_iobuf[i] ) + free_iob ( ena->rx_iobuf[i] ); + ena->rx_iobuf[i] = NULL; + } +} + +/** + * Open network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ena_open ( struct net_device *netdev ) { + struct ena_nic *ena = netdev->priv; + int rc; + + /* Create transmit queue pair */ + if ( ( rc = ena_create_qp ( ena, &ena->tx ) ) != 0 ) + goto err_create_tx; + + /* Create receive queue pair */ + if ( ( rc = ena_create_qp ( ena, &ena->rx ) ) != 0 ) + goto err_create_rx; + + /* Refill receive queue */ + ena_refill_rx ( netdev ); + + return 0; + + ena_destroy_qp ( ena, &ena->rx ); + err_create_rx: + ena_destroy_qp ( ena, &ena->tx ); + err_create_tx: + return rc; +} + +/** + * Close network device + * + * @v netdev Network device + */ +static void ena_close ( struct net_device *netdev ) { + struct ena_nic *ena = netdev->priv; + + /* Dump statistics (for debugging) */ + ena_get_stats ( ena ); + + /* Destroy receive queue pair */ + ena_destroy_qp ( ena, &ena->rx ); + + /* Discard any unused receive buffers */ + ena_empty_rx ( ena ); + + /* Destroy transmit queue pair */ + ena_destroy_qp ( ena, &ena->tx ); +} + +/** + * Transmit packet + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int ena_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { + struct ena_nic *ena = netdev->priv; + struct ena_tx_sqe *sqe; + unsigned int index; + physaddr_t address; + size_t len; + + /* Get next submission queue entry */ + if ( ( ena->tx.sq.prod - ena->tx.cq.cons ) >= ENA_TX_COUNT ) { + DBGC ( ena, "ENA %p out of transmit descriptors\n", ena ); + return -ENOBUFS; + } + index = ( ena->tx.sq.prod % ENA_TX_COUNT ); + sqe = &ena->tx.sq.sqe.tx[index]; + + /* Construct submission queue entry */ + address = virt_to_bus ( iobuf->data ); + len = iob_len ( iobuf ); + sqe->len = cpu_to_le16 ( len ); + sqe->id = ena->tx.sq.prod; + sqe->address = cpu_to_le64 ( address ); + wmb(); + sqe->flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL | + ena->tx.sq.phase ); + wmb(); + + /* Increment producer counter */ + ena->tx.sq.prod++; + if ( ( ena->tx.sq.prod % ENA_TX_COUNT ) == 0 ) + ena->tx.sq.phase ^= ENA_SQE_PHASE; + + /* Ring doorbell */ + writel ( ena->tx.sq.prod, ( ena->regs + ena->tx.sq.doorbell ) ); + + DBGC2 ( ena, "ENA %p TX %d at [%08llx,%08llx)\n", ena, sqe->id, + ( ( unsigned long long ) address ), + ( ( unsigned long long ) address + len ) ); + return 0; +} + +/** + * Poll for completed transmissions + * + * @v netdev Network device + */ +static void ena_poll_tx ( struct net_device *netdev ) { + struct ena_nic *ena = netdev->priv; + struct ena_tx_cqe *cqe; + unsigned int index; + + /* Check for completed packets */ + while ( ena->tx.cq.cons != ena->tx.sq.prod ) { + + /* Get next completion queue entry */ + index = ( ena->tx.cq.cons & ena->tx.cq.mask ); + cqe = &ena->tx.cq.cqe.tx[index]; + + /* Stop if completion queue entry is empty */ + if ( ( cqe->flags ^ ena->tx.cq.phase ) & ENA_CQE_PHASE ) + return; + DBGC2 ( ena, "ENA %p TX %d complete\n", ena, + ( le16_to_cpu ( cqe->id ) >> 2 /* Don't ask */ ) ); + + /* Increment consumer counter */ + ena->tx.cq.cons++; + if ( ! ( ena->tx.cq.cons & ena->tx.cq.mask ) ) + ena->tx.cq.phase ^= ENA_CQE_PHASE; + + /* Complete transmit */ + netdev_tx_complete_next ( netdev ); + } +} + +/** + * Poll for received packets + * + * @v netdev Network device + */ +static void ena_poll_rx ( struct net_device *netdev ) { + struct ena_nic *ena = netdev->priv; + struct ena_rx_cqe *cqe; + struct io_buffer *iobuf; + unsigned int index; + size_t len; + + /* Check for received packets */ + while ( ena->rx.cq.cons != ena->rx.sq.prod ) { + + /* Get next completion queue entry */ + index = ( ena->rx.cq.cons % ENA_RX_COUNT ); + cqe = &ena->rx.cq.cqe.rx[index]; + + /* Stop if completion queue entry is empty */ + if ( ( cqe->flags ^ ena->rx.cq.phase ) & ENA_CQE_PHASE ) + return; + + /* Increment consumer counter */ + ena->rx.cq.cons++; + if ( ! ( ena->rx.cq.cons & ena->rx.cq.mask ) ) + ena->rx.cq.phase ^= ENA_CQE_PHASE; + + /* Populate I/O buffer */ + iobuf = ena->rx_iobuf[index]; + ena->rx_iobuf[index] = NULL; + len = le16_to_cpu ( cqe->len ); + iob_put ( iobuf, len ); + + /* Hand off to network stack */ + DBGC2 ( ena, "ENA %p RX %d complete (length %zd)\n", + ena, le16_to_cpu ( cqe->id ), len ); + netdev_rx ( netdev, iobuf ); + } +} + +/** + * Poll for completed and received packets + * + * @v netdev Network device + */ +static void ena_poll ( struct net_device *netdev ) { + + /* Poll for transmit completions */ + ena_poll_tx ( netdev ); + + /* Poll for receive completions */ + ena_poll_rx ( netdev ); + + /* Refill receive ring */ + ena_refill_rx ( netdev ); +} + +/** ENA network device operations */ +static struct net_device_operations ena_operations = { + .open = ena_open, + .close = ena_close, + .transmit = ena_transmit, + .poll = ena_poll, +}; + +/****************************************************************************** + * + * PCI interface + * + ****************************************************************************** + */ + +/** + * Probe PCI device + * + * @v pci PCI device + * @ret rc Return status code + */ +static int ena_probe ( struct pci_device *pci ) { + struct net_device *netdev; + struct ena_nic *ena; + int rc; + + /* Allocate and initialise net device */ + netdev = alloc_etherdev ( sizeof ( *ena ) ); + if ( ! netdev ) { + rc = -ENOMEM; + goto err_alloc; + } + netdev_init ( netdev, &ena_operations ); + ena = netdev->priv; + pci_set_drvdata ( pci, netdev ); + netdev->dev = &pci->dev; + memset ( ena, 0, sizeof ( *ena ) ); + ena->acq.phase = ENA_ACQ_PHASE; + ena_cq_init ( &ena->tx.cq, ENA_TX_COUNT, + sizeof ( ena->tx.cq.cqe.tx[0] ) ); + ena_sq_init ( &ena->tx.sq, ENA_SQ_TX, ENA_TX_COUNT, + sizeof ( ena->tx.sq.sqe.tx[0] ) ); + ena_cq_init ( &ena->rx.cq, ENA_RX_COUNT, + sizeof ( ena->rx.cq.cqe.rx[0] ) ); + ena_sq_init ( &ena->rx.sq, ENA_SQ_RX, ENA_RX_COUNT, + sizeof ( ena->rx.sq.sqe.rx[0] ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Map registers */ + ena->regs = ioremap ( pci->membase, ENA_BAR_SIZE ); + if ( ! ena->regs ) { + rc = -ENODEV; + goto err_ioremap; + } + + /* Reset the NIC */ + if ( ( rc = ena_reset ( ena ) ) != 0 ) + goto err_reset; + + /* Create admin queues */ + if ( ( rc = ena_create_admin ( ena ) ) != 0 ) + goto err_create_admin; + + /* Fetch MAC address */ + if ( ( rc = ena_get_device_attributes ( netdev ) ) != 0 ) + goto err_get_device_attributes; + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + /* Mark as link up, since we have no way to test link state on + * this hardware. + */ + netdev_link_up ( netdev ); + + return 0; + + unregister_netdev ( netdev ); + err_register_netdev: + err_get_device_attributes: + ena_destroy_admin ( ena ); + err_create_admin: + ena_reset ( ena ); + err_reset: + iounmap ( ena->regs ); + err_ioremap: + netdev_nullify ( netdev ); + netdev_put ( netdev ); + err_alloc: + return rc; +} + +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void ena_remove ( struct pci_device *pci ) { + struct net_device *netdev = pci_get_drvdata ( pci ); + struct ena_nic *ena = netdev->priv; + + /* Unregister network device */ + unregister_netdev ( netdev ); + + /* Destroy admin queues */ + ena_destroy_admin ( ena ); + + /* Reset card */ + ena_reset ( ena ); + + /* Free network device */ + iounmap ( ena->regs ); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} + +/** ENA PCI device IDs */ +static struct pci_device_id ena_nics[] = { + PCI_ROM ( 0x1d0f, 0xec20, "ena-vf", "ENA VF", 0 ), + PCI_ROM ( 0x1d0f, 0xec21, "ena-vf-llq", "ENA VF (LLQ)", 0 ), +}; + +/** ENA PCI driver */ +struct pci_driver ena_driver __pci_driver = { + .ids = ena_nics, + .id_count = ( sizeof ( ena_nics ) / sizeof ( ena_nics[0] ) ), + .probe = ena_probe, + .remove = ena_remove, +}; diff --git a/src/drivers/net/ena.h b/src/drivers/net/ena.h new file mode 100644 index 00000000..0496fc6b --- /dev/null +++ b/src/drivers/net/ena.h @@ -0,0 +1,588 @@ +#ifndef _ENA_H +#define _ENA_H + +/** @file + * + * Amazon ENA network driver + * + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); + +#include <stdint.h> +#include <ipxe/if_ether.h> + +/** BAR size */ +#define ENA_BAR_SIZE 16384 + +/** Queue alignment */ +#define ENA_ALIGN 4096 + +/** Number of admin queue entries */ +#define ENA_AQ_COUNT 2 + +/** Number of admin completion queue entries */ +#define ENA_ACQ_COUNT 2 + +/** Number of transmit queue entries */ +#define ENA_TX_COUNT 16 + +/** Number of receive queue entries */ +#define ENA_RX_COUNT 16 + +/** Base address low register offset */ +#define ENA_BASE_LO 0x0 + +/** Base address high register offset */ +#define ENA_BASE_HI 0x4 + +/** Capability register value */ +#define ENA_CAPS( count, size ) ( ( (size) << 16 ) | ( (count) << 0 ) ) + +/** Admin queue base address register */ +#define ENA_AQ_BASE 0x10 + +/** Admin queue capabilities register */ +#define ENA_AQ_CAPS 0x18 + +/** Admin completion queue base address register */ +#define ENA_ACQ_BASE 0x20 + +/** Admin completion queue capabilities register */ +#define ENA_ACQ_CAPS 0x28 + +/** Admin queue doorbell register */ +#define ENA_AQ_DB 0x2c + +/** Maximum time to wait for admin requests */ +#define ENA_ADMIN_MAX_WAIT_MS 5000 + +/** Device control register */ +#define ENA_CTRL 0x54 +#define ENA_CTRL_RESET 0x00000001UL /**< Reset */ + +/** Maximum time to wait for reset */ +#define ENA_RESET_MAX_WAIT_MS 1000 + +/** Device status register */ +#define ENA_STAT 0x58 +#define ENA_STAT_READY 0x00000001UL /**< Ready */ + +/** Admin queue entry header */ +struct ena_aq_header { + /** Request identifier */ + uint8_t id; + /** Reserved */ + uint8_t reserved; + /** Opcode */ + uint8_t opcode; + /** Flags */ + uint8_t flags; +} __attribute__ (( packed )); + +/** Admin queue ownership phase flag */ +#define ENA_AQ_PHASE 0x01 + +/** Admin completion queue entry header */ +struct ena_acq_header { + /** Request identifier */ + uint8_t id; + /** Reserved */ + uint8_t reserved; + /** Status */ + uint8_t status; + /** Flags */ + uint8_t flags; + /** Extended status */ + uint16_t ext; + /** Consumer index */ + uint16_t cons; +} __attribute__ (( packed )); + +/** Admin completion queue ownership phase flag */ +#define ENA_ACQ_PHASE 0x01 + +/** Device attributes */ +#define ENA_DEVICE_ATTRIBUTES 1 + +/** Device attributes */ +struct ena_device_attributes { + /** Implementation */ + uint32_t implementation; + /** Device version */ + uint32_t version; + /** Supported features */ + uint32_t features; + /** Reserved */ + uint8_t reserved_a[4]; + /** Physical address width */ + uint32_t physical; + /** Virtual address width */ + uint32_t virtual; + /** MAC address */ + uint8_t mac[ETH_ALEN]; + /** Reserved */ + uint8_t reserved_b[2]; + /** Maximum MTU */ + uint32_t mtu; +} __attribute__ (( packed )); + +/** Feature */ +union ena_feature { + /** Device attributes */ + struct ena_device_attributes device; +}; + +/** Submission queue direction */ +enum ena_sq_direction { + /** Transmit */ + ENA_SQ_TX = 0x20, + /** Receive */ + ENA_SQ_RX = 0x40, +}; + +/** Create submission queue */ +#define ENA_CREATE_SQ 1 + +/** Create submission queue request */ +struct ena_create_sq_req { + /** Header */ + struct ena_aq_header header; + /** Direction */ + uint8_t direction; + /** Reserved */ + uint8_t reserved_a; + /** Policy */ + uint16_t policy; + /** Completion queue identifier */ + uint16_t cq_id; + /** Number of entries */ + uint16_t count; + /** Base address */ + uint64_t address; + /** Writeback address */ + uint64_t writeback; + /** Reserved */ + uint8_t reserved_b[8]; +} __attribute__ (( packed )); + +/** Submission queue policy */ +enum ena_sq_policy { + /** Use host memory */ + ENA_SQ_HOST_MEMORY = 0x0001, + /** Memory is contiguous */ + ENA_SQ_CONTIGUOUS = 0x0100, +}; + +/** Create submission queue response */ +struct ena_create_sq_rsp { + /** Header */ + struct ena_acq_header header; + /** Submission queue identifier */ + uint16_t id; + /** Reserved */ + uint8_t reserved[2]; + /** Doorbell register offset */ + uint32_t doorbell; + /** LLQ descriptor ring offset */ + uint32_t llq_desc; + /** LLQ header offset */ + uint32_t llq_data; +} __attribute__ (( packed )); + +/** Destroy submission queue */ +#define ENA_DESTROY_SQ 2 + +/** Destroy submission queue request */ +struct ena_destroy_sq_req { + /** Header */ + struct ena_aq_header header; + /** Submission queue identifier */ + uint16_t id; + /** Direction */ + uint8_t direction; + /** Reserved */ + uint8_t reserved; +} __attribute__ (( packed )); + +/** Destroy submission queue response */ +struct ena_destroy_sq_rsp { + /** Header */ + struct ena_acq_header header; +} __attribute__ (( packed )); + +/** Create completion queue */ +#define ENA_CREATE_CQ 3 + +/** Create completion queue request */ +struct ena_create_cq_req { + /** Header */ + struct ena_aq_header header; + /** Interrupts enabled */ + uint8_t intr; + /** Entry size (in 32-bit words) */ + uint8_t size; + /** Number of entries */ + uint16_t count; + /** MSI-X vector */ + uint32_t vector; + /** Base address */ + uint64_t address; +} __attribute__ (( packed )); + +/** Create completion queue response */ +struct ena_create_cq_rsp { + /** Header */ + struct ena_acq_header header; + /** Completion queue identifier */ + uint16_t id; + /** Actual number of entries */ + uint16_t count; + /** NUMA node register offset */ + uint32_t node; + /** Doorbell register offset */ + uint32_t doorbell; + /** Interrupt unmask register offset */ + uint32_t intr; +} __attribute__ (( packed )); + +/** Destroy completion queue */ +#define ENA_DESTROY_CQ 4 + +/** Destroy completion queue request */ +struct ena_destroy_cq_req { + /** Header */ + struct ena_aq_header header; + /** Completion queue identifier */ + uint16_t id; + /** Reserved */ + uint8_t reserved[2]; +} __attribute__ (( packed )); + +/** Destroy completion queue response */ +struct ena_destroy_cq_rsp { + /** Header */ + struct ena_acq_header header; +} __attribute__ (( packed )); + +/** Get feature */ +#define ENA_GET_FEATURE 8 + +/** Get feature request */ +struct ena_get_feature_req { + /** Header */ + struct ena_aq_header header; + /** Length */ + uint32_t len; + /** Address */ + uint64_t address; + /** Flags */ + uint8_t flags; + /** Feature identifier */ + uint8_t id; + /** Reserved */ + uint8_t reserved[2]; +} __attribute__ (( packed )); + +/** Get feature response */ +struct ena_get_feature_rsp { + /** Header */ + struct ena_acq_header header; + /** Feature */ + union ena_feature feature; +} __attribute__ (( packed )); + +/** Get statistics */ +#define ENA_GET_STATS 11 + +/** Get statistics request */ +struct ena_get_stats_req { + /** Header */ + struct ena_aq_header header; + /** Reserved */ + uint8_t reserved_a[12]; + /** Type */ + uint8_t type; + /** Scope */ + uint8_t scope; + /** Reserved */ + uint8_t reserved_b[2]; + /** Queue ID */ + uint16_t queue; + /** Device ID */ + uint16_t device; +} __attribute__ (( packed )); + +/** Basic statistics */ +#define ENA_STATS_TYPE_BASIC 0 + +/** Ethernet statistics */ +#define ENA_STATS_SCOPE_ETH 1 + +/** My device */ +#define ENA_DEVICE_MINE 0xffff + +/** Get statistics response */ +struct ena_get_stats_rsp { + /** Header */ + struct ena_acq_header header; + /** Transmit byte count */ + uint64_t tx_bytes; + /** Transmit packet count */ + uint64_t tx_packets; + /** Receive byte count */ + uint64_t rx_bytes; + /** Receive packet count */ + uint64_t rx_packets; + /** Receive drop count */ + uint64_t rx_drops; +} __attribute__ (( packed )); + +/** Admin queue request */ +union ena_aq_req { + /** Header */ + struct ena_aq_header header; + /** Create submission queue */ + struct ena_create_sq_req create_sq; + /** Destroy submission queue */ + struct ena_destroy_sq_req destroy_sq; + /** Create completion queue */ + struct ena_create_cq_req create_cq; + /** Destroy completion queue */ + struct ena_destroy_cq_req destroy_cq; + /** Get feature */ + struct ena_get_feature_req get_feature; + /** Get statistics */ + struct ena_get_stats_req get_stats; + /** Padding */ + uint8_t pad[64]; +}; + +/** Admin completion queue response */ +union ena_acq_rsp { + /** Header */ + struct ena_acq_header header; + /** Create submission queue */ + struct ena_create_sq_rsp create_sq; + /** Destroy submission queue */ + struct ena_destroy_sq_rsp destroy_sq; + /** Create completion queue */ + struct ena_create_cq_rsp create_cq; + /** Destroy completion queue */ + struct ena_destroy_cq_rsp destroy_cq; + /** Get feature */ + struct ena_get_feature_rsp get_feature; + /** Get statistics */ + struct ena_get_stats_rsp get_stats; + /** Padding */ + uint8_t pad[64]; +}; + +/** Admin queue */ +struct ena_aq { + /** Requests */ + union ena_aq_req *req; + /** Producer counter */ + unsigned int prod; +}; + +/** Admin completion queue */ +struct ena_acq { + /** Responses */ + union ena_acq_rsp *rsp; + /** Consumer counter */ + unsigned int cons; + /** Phase */ + unsigned int phase; +}; + +/** Transmit submission queue entry */ +struct ena_tx_sqe { + /** Length */ + uint16_t len; + /** Reserved */ + uint8_t reserved_a; + /** Flags */ + uint8_t flags; + /** Reserved */ + uint8_t reserved_b[3]; + /** Request identifier */ + uint8_t id; + /** Address */ + uint64_t address; +} __attribute__ (( packed )); + +/** Receive submission queue entry */ +struct ena_rx_sqe { + /** Length */ + uint16_t len; + /** Reserved */ + uint8_t reserved_a; + /** Flags */ + uint8_t flags; + /** Request identifier */ + uint16_t id; + /** Reserved */ + uint8_t reserved_b[2]; + /** Address */ + uint64_t address; +} __attribute__ (( packed )); + +/** Submission queue ownership phase flag */ +#define ENA_SQE_PHASE 0x01 + +/** This is the first descriptor */ +#define ENA_SQE_FIRST 0x04 + +/** This is the last descriptor */ +#define ENA_SQE_LAST 0x08 + +/** Request completion */ +#define ENA_SQE_CPL 0x10 + +/** Transmit completion queue entry */ +struct ena_tx_cqe { + /** Request identifier */ + uint16_t id; + /** Status */ + uint8_t status; + /** Flags */ + uint8_t flags; + /** Reserved */ + uint8_t reserved[2]; + /** Consumer index */ + uint16_t cons; +} __attribute__ (( packed )); + +/** Receive completion queue entry */ +struct ena_rx_cqe { + /** Reserved */ + uint8_t reserved_a[3]; + /** Flags */ + uint8_t flags; + /** Length */ + uint16_t len; + /** Request identifier */ + uint16_t id; + /** Reserved */ + uint8_t reserved_b[8]; +} __attribute__ (( packed )); + +/** Completion queue ownership phase flag */ +#define ENA_CQE_PHASE 0x01 + +/** Submission queue */ +struct ena_sq { + /** Entries */ + union { + /** Transmit submission queue entries */ + struct ena_tx_sqe *tx; + /** Receive submission queue entries */ + struct ena_rx_sqe *rx; + /** Raw data */ + void *raw; + } sqe; + /** Doorbell register offset */ + unsigned int doorbell; + /** Total length of entries */ + size_t len; + /** Producer counter */ + unsigned int prod; + /** Phase */ + unsigned int phase; + /** Submission queue identifier */ + uint16_t id; + /** Direction */ + uint8_t direction; + /** Number of entries */ + uint8_t count; +}; + +/** + * Initialise submission queue + * + * @v sq Submission queue + * @v direction Direction + * @v count Number of entries + * @v size Size of each entry + */ +static inline __attribute__ (( always_inline )) void +ena_sq_init ( struct ena_sq *sq, unsigned int direction, unsigned int count, + size_t size ) { + + sq->len = ( count * size ); + sq->direction = direction; + sq->count = count; +} + +/** Completion queue */ +struct ena_cq { + /** Entries */ + union { + /** Transmit completion queue entries */ + struct ena_tx_cqe *tx; + /** Receive completion queue entries */ + struct ena_rx_cqe *rx; + /** Raw data */ + void *raw; + } cqe; + /** Doorbell register offset */ + unsigned int doorbell; + /** Total length of entries */ + size_t len; + /** Consumer counter */ + unsigned int cons; + /** Phase */ + unsigned int phase; + /** Completion queue identifier */ + uint16_t id; + /** Entry size (in 32-bit words) */ + uint8_t size; + /** Requested number of entries */ + uint8_t requested; + /** Actual number of entries */ + uint8_t actual; + /** Actual number of entries minus one */ + uint8_t mask; +}; + +/** + * Initialise completion queue + * + * @v cq Completion queue + * @v count Number of entries + * @v size Size of each entry + */ +static inline __attribute__ (( always_inline )) void +ena_cq_init ( struct ena_cq *cq, unsigned int count, size_t size ) { + + cq->len = ( count * size ); + cq->size = ( size / sizeof ( uint32_t ) ); + cq->requested = count; +} + +/** Queue pair */ +struct ena_qp { + /** Submission queue */ + struct ena_sq sq; + /** Completion queue */ + struct ena_cq cq; +}; + +/** An ENA network card */ +struct ena_nic { + /** Registers */ + void *regs; + /** Admin queue */ + struct ena_aq aq; + /** Admin completion queue */ + struct ena_acq acq; + /** Transmit queue */ + struct ena_qp tx; + /** Receive queue */ + struct ena_qp rx; + /** Receive I/O buffers */ + struct io_buffer *rx_iobuf[ENA_RX_COUNT]; +}; + +#endif /* _ENA_H */ |