/* * Copyright (C) 2024 Michael Brown . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * You can also choose to distribute this program under the terms of * the Unmodified Binary Distribution Licence (as given in the file * COPYING.UBDL), provided that you have satisfied its requirements. */ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gve.h" /** @file * * Google Virtual Ethernet network driver * */ /* Disambiguate the various error causes */ #define EINFO_EIO_ADMIN_UNSET \ __einfo_uniqify ( EINFO_EIO, 0x00, "Uncompleted" ) #define EIO_ADMIN_UNSET \ __einfo_error ( EINFO_EIO_ADMIN_UNSET ) #define EINFO_EIO_ADMIN_ABORTED \ __einfo_uniqify ( EINFO_EIO, 0x10, "Aborted" ) #define EIO_ADMIN_ABORTED \ __einfo_error ( EINFO_EIO_ADMIN_ABORTED ) #define EINFO_EIO_ADMIN_EXISTS \ __einfo_uniqify ( EINFO_EIO, 0x11, "Already exists" ) #define EIO_ADMIN_EXISTS \ __einfo_error ( EINFO_EIO_ADMIN_EXISTS ) #define EINFO_EIO_ADMIN_CANCELLED \ __einfo_uniqify ( EINFO_EIO, 0x12, "Cancelled" ) #define EIO_ADMIN_CANCELLED \ __einfo_error ( EINFO_EIO_ADMIN_CANCELLED ) #define EINFO_EIO_ADMIN_DATALOSS \ __einfo_uniqify ( EINFO_EIO, 0x13, "Data loss" ) #define EIO_ADMIN_DATALOSS \ __einfo_error ( EINFO_EIO_ADMIN_DATALOSS ) #define EINFO_EIO_ADMIN_DEADLINE \ __einfo_uniqify ( EINFO_EIO, 0x14, "Deadline exceeded" ) #define EIO_ADMIN_DEADLINE \ __einfo_error ( EINFO_EIO_ADMIN_DEADLINE ) #define EINFO_EIO_ADMIN_PRECONDITION \ __einfo_uniqify ( EINFO_EIO, 0x15, "Failed precondition" ) #define EIO_ADMIN_PRECONDITION \ __einfo_error ( EINFO_EIO_ADMIN_PRECONDITION ) #define EINFO_EIO_ADMIN_INTERNAL \ __einfo_uniqify ( EINFO_EIO, 0x16, "Internal error" ) #define EIO_ADMIN_INTERNAL \ __einfo_error ( EINFO_EIO_ADMIN_INTERNAL ) #define EINFO_EIO_ADMIN_INVAL \ __einfo_uniqify ( EINFO_EIO, 0x17, "Invalid argument" ) #define EIO_ADMIN_INVAL \ __einfo_error ( EINFO_EIO_ADMIN_INVAL ) #define EINFO_EIO_ADMIN_NOT_FOUND \ __einfo_uniqify ( EINFO_EIO, 0x18, "Not found" ) #define EIO_ADMIN_NOT_FOUND \ __einfo_error ( EINFO_EIO_ADMIN_NOT_FOUND ) #define EINFO_EIO_ADMIN_RANGE \ __einfo_uniqify ( EINFO_EIO, 0x19, "Out of range" ) #define EIO_ADMIN_RANGE \ __einfo_error ( EINFO_EIO_ADMIN_RANGE ) #define EINFO_EIO_ADMIN_PERM \ __einfo_uniqify ( EINFO_EIO, 0x1a, "Permission denied" ) #define EIO_ADMIN_PERM \ __einfo_error ( EINFO_EIO_ADMIN_PERM ) #define EINFO_EIO_ADMIN_UNAUTH \ __einfo_uniqify ( EINFO_EIO, 0x1b, "Unauthenticated" ) #define EIO_ADMIN_UNAUTH \ __einfo_error ( EINFO_EIO_ADMIN_UNAUTH ) #define EINFO_EIO_ADMIN_RESOURCE \ __einfo_uniqify ( EINFO_EIO, 0x1c, "Resource exhausted" ) #define EIO_ADMIN_RESOURCE \ __einfo_error ( EINFO_EIO_ADMIN_RESOURCE ) #define EINFO_EIO_ADMIN_UNAVAIL \ __einfo_uniqify ( EINFO_EIO, 0x1d, "Unavailable" ) #define EIO_ADMIN_UNAVAIL \ __einfo_error ( EINFO_EIO_ADMIN_UNAVAIL ) #define EINFO_EIO_ADMIN_NOTSUP \ __einfo_uniqify ( EINFO_EIO, 0x1e, "Unimplemented" ) #define EIO_ADMIN_NOTSUP \ __einfo_error ( EINFO_EIO_ADMIN_NOTSUP ) #define EINFO_EIO_ADMIN_UNKNOWN \ __einfo_uniqify ( EINFO_EIO, 0x1f, "Unknown error" ) #define EIO_ADMIN_UNKNOWN \ __einfo_error ( EINFO_EIO_ADMIN_UNKNOWN ) #define EIO_ADMIN( status ) \ EUNIQ ( EINFO_EIO, ( (status) & 0x1f ), \ EIO_ADMIN_UNSET, EIO_ADMIN_ABORTED, EIO_ADMIN_EXISTS, \ EIO_ADMIN_CANCELLED, EIO_ADMIN_DATALOSS, \ EIO_ADMIN_DEADLINE, EIO_ADMIN_PRECONDITION, \ EIO_ADMIN_INTERNAL, EIO_ADMIN_INVAL, \ EIO_ADMIN_NOT_FOUND, EIO_ADMIN_RANGE, EIO_ADMIN_PERM, \ EIO_ADMIN_UNAUTH, EIO_ADMIN_RESOURCE, \ EIO_ADMIN_UNAVAIL, EIO_ADMIN_NOTSUP, EIO_ADMIN_UNKNOWN ) /****************************************************************************** * * Buffer layout * ****************************************************************************** */ /** * Get buffer offset (within queue page list allocation) * * @v queue Descriptor queue * @v tag Buffer tag * @ret addr Buffer address within queue page list address space */ static inline __attribute__ (( always_inline)) size_t gve_offset ( struct gve_queue *queue, unsigned int tag ) { /* We allocate sufficient pages for the maximum fill level of * buffers, and reuse the buffers in strict rotation as they * are released by the hardware. */ assert ( tag < queue->fill ); return ( tag * GVE_BUF_SIZE ); } /** * Get buffer address (within queue page list address space) * * @v queue Descriptor queue * @v tag Buffer tag * @ret addr Buffer address within queue page list address space */ static inline __attribute__ (( always_inline)) physaddr_t gve_address ( struct gve_queue *queue, unsigned int tag ) { /* Pages are allocated as a single contiguous block */ return ( queue->qpl.base + gve_offset ( queue, tag ) ); } /** * Get buffer address * * @v queue Descriptor queue * @v tag Buffer tag * @ret addr Buffer address */ static inline __attribute__ (( always_inline )) void * gve_buffer ( struct gve_queue *queue, unsigned int tag ) { /* Pages are allocated as a single contiguous block */ return ( queue->qpl.data + gve_offset ( queue, tag ) ); } /****************************************************************************** * * Device reset * ****************************************************************************** */ /** * Reset hardware * * @v gve GVE device * @ret rc Return status code */ static int gve_reset ( struct gve_nic *gve ) { uint32_t pfn; unsigned int i; /* Skip reset if admin queue page frame number is already * clear. Triggering a reset on an already-reset device seems * to cause a delayed reset to be scheduled. This can cause * the device to end up in a reset loop, where each attempt to * recover from reset triggers another reset a few seconds * later. */ pfn = readl ( gve->cfg + GVE_CFG_ADMIN_PFN ); if ( ! pfn ) { DBGC ( gve, "GVE %p skipping reset\n", gve ); return 0; } /* Clear admin queue page frame number */ writel ( 0, gve->cfg + GVE_CFG_ADMIN_PFN ); wmb(); /* Wait for device to reset */ for ( i = 0 ; i < GVE_RESET_MAX_WAIT_MS ; i++ ) { /* Delay */ mdelay ( 1 ); /* Check for reset completion */ pfn = readl ( gve->cfg + GVE_CFG_ADMIN_PFN ); if ( ! pfn ) return 0; } DBGC ( gve, "GVE %p reset timed out (PFN %#08x devstat %#08x)\n", gve, bswap_32 ( pfn ), bswap_32 ( readl ( gve->cfg + GVE_CFG_DEVSTAT ) ) ); return -ETIMEDOUT; } /****************************************************************************** * * Admin queue * ****************************************************************************** */ /** * Get operating mode name (for debugging) * * @v mode Operating mode * @ret name Mode name */ static inline const char * gve_mode_name ( unsigned int mode ) { static char buf[ 8 /* "XXX-XXX" + NUL */ ]; snprintf ( buf, sizeof ( buf ), "%s-%s", ( ( mode & GVE_MODE_DQO ) ? "DQO" : "GQI" ), ( ( mode & GVE_MODE_QPL ) ? "QPL" : "RDA" ) ); return buf; } /** * Allocate admin queue * * @v gve GVE device * @ret rc Return status code */ static int gve_admin_alloc ( struct gve_nic *gve ) { struct dma_device *dma = gve->dma; struct gve_admin *admin = &gve->admin; struct gve_scratch *scratch = &gve->scratch; size_t admin_len = ( GVE_ADMIN_COUNT * sizeof ( admin->cmd[0] ) ); size_t scratch_len = sizeof ( *scratch->buf ); int rc; /* Allocate admin queue */ admin->cmd = dma_alloc ( dma, &admin->map, admin_len, GVE_ALIGN ); if ( ! admin->cmd ) { rc = -ENOMEM; goto err_admin; } /* Allocate scratch buffer */ scratch->buf = dma_alloc ( dma, &scratch->map, scratch_len, GVE_ALIGN ); if ( ! scratch->buf ) { rc = -ENOMEM; goto err_scratch; } DBGC ( gve, "GVE %p AQ at [%08lx,%08lx) scratch [%08lx,%08lx)\n", gve, virt_to_phys ( admin->cmd ), ( virt_to_phys ( admin->cmd ) + admin_len ), virt_to_phys ( scratch->buf ), ( virt_to_phys ( scratch->buf ) + scratch_len ) ); return 0; dma_free ( &scratch->map, scratch->buf, scratch_len ); err_scratch: dma_free ( &admin->map, admin->cmd, admin_len ); err_admin: return rc; } /** * Free admin queue * * @v gve GVE device */ static void gve_admin_free ( struct gve_nic *gve ) { struct gve_admin *admin = &gve->admin; struct gve_scratch *scratch = &gve->scratch; size_t admin_len = ( GVE_ADMIN_COUNT * sizeof ( admin->cmd[0] ) ); size_t scratch_len = sizeof ( *scratch->buf ); /* Free scratch buffer */ dma_free ( &scratch->map, scratch->buf, scratch_len ); /* Free admin queue */ dma_free ( &admin->map, admin->cmd, admin_len ); } /** * Enable admin queue * * @v gve GVE device */ static void gve_admin_enable ( struct gve_nic *gve ) { struct gve_admin *admin = &gve->admin; size_t admin_len = ( GVE_ADMIN_COUNT * sizeof ( admin->cmd[0] ) ); physaddr_t base; /* Reset queue */ admin->prod = 0; /* Program queue addresses and capabilities */ base = dma ( &admin->map, admin->cmd ); writel ( bswap_32 ( base / GVE_PAGE_SIZE ), gve->cfg + GVE_CFG_ADMIN_PFN ); writel ( bswap_32 ( base & 0xffffffffUL ), gve->cfg + GVE_CFG_ADMIN_BASE_LO ); if ( sizeof ( base ) > sizeof ( uint32_t ) ) { writel ( bswap_32 ( ( ( uint64_t ) base ) >> 32 ), gve->cfg + GVE_CFG_ADMIN_BASE_HI ); } else { writel ( 0, gve->cfg + GVE_CFG_ADMIN_BASE_HI ); } writel ( bswap_16 ( admin_len ), gve->cfg + GVE_CFG_ADMIN_LEN ); writel ( bswap_32 ( GVE_CFG_DRVSTAT_RUN ), gve->cfg + GVE_CFG_DRVSTAT ); } /** * Get next available admin queue command slot * * @v gve GVE device * @ret cmd Admin queue command */ static union gve_admin_command * gve_admin_command ( struct gve_nic *gve ) { struct gve_admin *admin = &gve->admin; union gve_admin_command *cmd; unsigned int index; /* Get next command slot */ index = admin->prod; cmd = &admin->cmd[ index % GVE_ADMIN_COUNT ]; /* Initialise request */ memset ( cmd, 0, sizeof ( *cmd ) ); return cmd; } /** * Wait for admin queue command to complete * * @v gve GVE device * @ret rc Return status code */ static int gve_admin_wait ( struct gve_nic *gve ) { struct gve_admin *admin = &gve->admin; uint32_t evt; uint32_t pfn; unsigned int i; /* Wait for any outstanding commands to complete */ for ( i = 0 ; i < GVE_ADMIN_MAX_WAIT_MS ; i++ ) { /* Check event counter */ rmb(); evt = bswap_32 ( readl ( gve->cfg + GVE_CFG_ADMIN_EVT ) ); if ( evt == admin->prod ) return 0; /* Check for device reset */ pfn = readl ( gve->cfg + GVE_CFG_ADMIN_PFN ); if ( ! pfn ) break; /* Delay */ mdelay ( 1 ); } DBGC ( gve, "GVE %p AQ %#02x %s (completed %#02x, status %#08x)\n", gve, admin->prod, ( pfn ? "timed out" : "saw reset" ), evt, bswap_32 ( readl ( gve->cfg + GVE_CFG_DEVSTAT ) ) ); return ( pfn ? -ETIMEDOUT : -ECONNRESET ); } /** * Issue admin queue command * * @v gve GVE device * @ret rc Return status code */ static int gve_admin ( struct gve_nic *gve ) { struct gve_admin *admin = &gve->admin; union gve_admin_command *cmd; unsigned int index; uint32_t opcode; uint32_t status; int rc; /* Ensure admin queue is idle */ if ( ( rc = gve_admin_wait ( gve ) ) != 0 ) return rc; /* Get next command slot */ index = admin->prod; cmd = &admin->cmd[ index % GVE_ADMIN_COUNT ]; opcode = cmd->hdr.opcode; DBGC2 ( gve, "GVE %p AQ %#02x command %#04x request:\n", gve, index, opcode ); DBGC2_HDA ( gve, 0, cmd, sizeof ( *cmd ) ); /* Increment producer counter */ admin->prod++; /* Ring doorbell */ wmb(); writel ( bswap_32 ( admin->prod ), gve->cfg + GVE_CFG_ADMIN_DB ); /* Wait for command to complete */ if ( ( rc = gve_admin_wait ( gve ) ) != 0 ) return rc; /* Check command status */ status = be32_to_cpu ( cmd->hdr.status ); if ( status != GVE_ADMIN_STATUS_OK ) { rc = -EIO_ADMIN ( status ); DBGC ( gve, "GVE %p AQ %#02x command %#04x failed: %#08x\n", gve, index, opcode, status ); DBGC_HDA ( gve, 0, cmd, sizeof ( *cmd ) ); DBGC ( gve, "GVE %p AQ error: %s\n", gve, strerror ( rc ) ); return rc; } DBGC2 ( gve, "GVE %p AQ %#02x command %#04x result:\n", gve, index, opcode ); DBGC2_HDA ( gve, 0, cmd, sizeof ( *cmd ) ); return 0; } /** * Issue simple admin queue command * * @v gve GVE device * @v opcode Operation code * @v id ID parameter (or zero if not applicable) * @ret rc Return status code * * Several admin queue commands take either an empty parameter list or * a single 32-bit ID parameter. */ static int gve_admin_simple ( struct gve_nic *gve, unsigned int opcode, unsigned int id ) { union gve_admin_command *cmd; int rc; /* Construct request */ cmd = gve_admin_command ( gve ); cmd->hdr.opcode = opcode; cmd->simple.id = cpu_to_be32 ( id ); /* Issue command */ if ( ( rc = gve_admin ( gve ) ) != 0 ) return rc; return 0; } /** * Get device descriptor * * @v gve GVE device * @ret rc Return status code */ static int gve_describe ( struct gve_nic *gve ) { struct net_device *netdev = gve->netdev; struct gve_device_descriptor *desc = &gve->scratch.buf->desc; union gve_admin_command *cmd; struct gve_option *opt; unsigned int count; unsigned int id; size_t offset; size_t max; size_t len; int rc; /* Construct request */ cmd = gve_admin_command ( gve ); cmd->hdr.opcode = GVE_ADMIN_DESCRIBE; cmd->desc.addr = cpu_to_be64 ( dma ( &gve->scratch.map, desc ) ); cmd->desc.ver = cpu_to_be32 ( GVE_ADMIN_DESCRIBE_VER ); cmd->desc.len = cpu_to_be32 ( sizeof ( *desc ) ); /* Issue command */ if ( ( rc = gve_admin ( gve ) ) != 0 ) return rc; DBGC2 ( gve, "GVE %p device descriptor:\n", gve ); DBGC2_HDA ( gve, 0, desc, sizeof ( *desc ) ); /* Extract queue parameters */ gve->events.count = be16_to_cpu ( desc->counters ); gve->tx.count = be16_to_cpu ( desc->tx_count ); gve->rx.count = be16_to_cpu ( desc->rx_count ); DBGC ( gve, "GVE %p using %d TX, %d RX, %d events\n", gve, gve->tx.count, gve->rx.count, gve->events.count ); /* Extract network parameters */ build_assert ( sizeof ( desc->mac ) == ETH_ALEN ); memcpy ( netdev->hw_addr, &desc->mac, sizeof ( desc->mac ) ); netdev->mtu = be16_to_cpu ( desc->mtu ); netdev->max_pkt_len = ( netdev->mtu + ETH_HLEN ); DBGC ( gve, "GVE %p MAC %s (\"%s\") MTU %zd\n", gve, eth_ntoa ( netdev->hw_addr ), inet_ntoa ( desc->mac.in ), netdev->mtu ); /* Parse options */ count = be16_to_cpu ( desc->opt_count ); max = be16_to_cpu ( desc->len ); gve->options = 0; for ( offset = offsetof ( typeof ( *desc ), opts ) ; count ; count--, offset += len ) { /* Check space for option header */ if ( ( offset + sizeof ( *opt ) ) > max ) { DBGC ( gve, "GVE %p underlength option at +%#02zx:\n", gve, offset ); DBGC_HDA ( gve, 0, desc, sizeof ( *desc ) ); return -EINVAL; } opt = ( ( ( void * ) desc ) + offset ); /* Check space for option body */ len = ( sizeof ( *opt ) + be16_to_cpu ( opt->len ) ); if ( ( offset + len ) > max ) { DBGC ( gve, "GVE %p malformed option at +%#02zx:\n", gve, offset ); DBGC_HDA ( gve, 0, desc, sizeof ( *desc ) ); return -EINVAL; } /* Record option as supported */ id = be16_to_cpu ( opt->id ); if ( id < ( 8 * sizeof ( gve->options ) ) ) gve->options |= ( 1 << id ); } DBGC ( gve, "GVE %p supports options %#08x\n", gve, gve->options ); /* Select preferred operating mode */ if ( gve->options & ( 1 << GVE_OPT_GQI_QPL ) ) { /* GQI-QPL: in-order queues, queue page list addressing */ gve->mode = GVE_MODE_QPL; } else if ( gve->options & ( 1 << GVE_OPT_GQI_RDA ) ) { /* GQI-RDA: in-order queues, raw DMA addressing */ gve->mode = 0; } else if ( gve->options & ( 1 << GVE_OPT_DQO_QPL ) ) { /* DQO-QPL: out-of-order queues, queue page list addressing */ gve->mode = ( GVE_MODE_DQO | GVE_MODE_QPL ); } else if ( gve->options & ( 1 << GVE_OPT_DQO_RDA ) ) { /* DQO-RDA: out-of-order queues, raw DMA addressing */ gve->mode = GVE_MODE_DQO; } else { /* No options matched: assume the original GQI-QPL mode */ gve->mode = GVE_MODE_QPL; } DBGC ( gve, "GVE %p using %s mode\n", gve, gve_mode_name ( gve->mode ) ); return 0; } /** * Configure device resources * * @v gve GVE device * @ret rc Return status code */ static int gve_configure ( struct gve_nic *gve ) { struct gve_events *events = &gve->events; struct gve_irqs *irqs = &gve->irqs; union gve_admin_command *cmd; uint32_t doorbell; unsigned int db_off; unsigned int i; int rc; /* Construct request */ cmd = gve_admin_command ( gve ); cmd->hdr.opcode = GVE_ADMIN_CONFIGURE; cmd->conf.events = cpu_to_be64 ( dma ( &events->map, events->event ) ); cmd->conf.irqs = cpu_to_be64 ( dma ( &irqs->map, irqs->irq ) ); cmd->conf.num_events = cpu_to_be32 ( events->count ); cmd->conf.num_irqs = cpu_to_be32 ( GVE_IRQ_COUNT ); cmd->conf.irq_stride = cpu_to_be32 ( sizeof ( irqs->irq[0] ) ); cmd->conf.format = GVE_FORMAT ( gve->mode ); /* Issue command */ if ( ( rc = gve_admin ( gve ) ) != 0 ) return rc; /* Disable all interrupts */ doorbell = ( ( gve->mode & GVE_MODE_DQO ) ? 0 : bswap_32 ( GVE_GQI_IRQ_DISABLE ) ); for ( i = 0 ; i < GVE_IRQ_COUNT ; i++ ) { db_off = ( be32_to_cpu ( irqs->irq[i].db_idx ) * sizeof ( uint32_t ) ); DBGC ( gve, "GVE %p IRQ %d doorbell +%#04x\n", gve, i, db_off ); irqs->db[i] = ( gve->db + db_off ); writel ( doorbell, irqs->db[i] ); } return 0; } /** * Deconfigure device resources * * @v gve GVE device * @ret rc Return status code */ static int gve_deconfigure ( struct gve_nic *gve ) { int rc; /* Issue command (with meaningless ID) */ if ( ( rc = gve_admin_simple ( gve, GVE_ADMIN_DECONFIGURE, 0 ) ) != 0 ) return rc; return 0; } /** * Register queue page list * * @v gve GVE device * @v qpl Queue page list * @ret rc Return status code */ static int gve_register ( struct gve_nic *gve, struct gve_qpl *qpl ) { struct gve_pages *pages = &gve->scratch.buf->pages; union gve_admin_command *cmd; void *addr; unsigned int i; int rc; /* Do nothing if using raw DMA addressing */ if ( ! ( gve->mode & GVE_MODE_QPL ) ) return 0; /* Build page address list */ for ( i = 0 ; i < qpl->count ; i++ ) { addr = ( qpl->data + ( i * GVE_PAGE_SIZE ) ); pages->addr[i] = cpu_to_be64 ( dma ( &qpl->map, addr ) ); } /* Construct request */ cmd = gve_admin_command ( gve ); cmd->hdr.opcode = GVE_ADMIN_REGISTER; cmd->reg.id = cpu_to_be32 ( qpl->id ); cmd->reg.count = cpu_to_be32 ( qpl->count ); cmd->reg.addr = cpu_to_be64 ( dma ( &gve->scratch.map, pages ) ); cmd->reg.size = cpu_to_be64 ( GVE_PAGE_SIZE ); /* Issue command */ if ( ( rc = gve_admin ( gve ) ) != 0 ) return rc; return 0; } /** * Unregister page list * * @v gve GVE device * @v qpl Queue page list * @ret rc Return status code */ static int gve_unregister ( struct gve_nic *gve, struct gve_qpl *qpl ) { int rc; /* Do nothing if using raw DMA addressing */ if ( ! ( gve->mode & GVE_MODE_QPL ) ) return 0; /* Issue command */ if ( ( rc = gve_admin_simple ( gve, GVE_ADMIN_UNREGISTER, qpl->id ) ) != 0 ) { return rc; } return 0; } /** * Construct command to create transmit queue * * @v queue Transmit queue * @v qpl Queue page list ID * @v cmd Admin queue command */ static void gve_create_tx_param ( struct gve_queue *queue, uint32_t qpl, union gve_admin_command *cmd ) { struct gve_admin_create_tx *create = &cmd->create_tx; const struct gve_queue_type *type = queue->type; /* Construct request parameters */ create->res = cpu_to_be64 ( dma ( &queue->res_map, queue->res ) ); create->desc = cpu_to_be64 ( dma ( &queue->desc_map, queue->desc.raw ) ); create->qpl_id = cpu_to_be32 ( qpl ); create->notify_id = cpu_to_be32 ( type->irq ); create->desc_count = cpu_to_be16 ( queue->count ); if ( queue->cmplt.raw ) { create->cmplt = cpu_to_be64 ( dma ( &queue->cmplt_map, queue->cmplt.raw ) ); create->cmplt_count = cpu_to_be16 ( queue->count ); } } /** * Construct command to create receive queue * * @v queue Receive queue * @v qpl Queue page list ID * @v cmd Admin queue command */ static void gve_create_rx_param ( struct gve_queue *queue, uint32_t qpl, union gve_admin_command *cmd ) { struct gve_admin_create_rx *create = &cmd->create_rx; const struct gve_queue_type *type = queue->type; /* Construct request parameters */ create->notify_id = cpu_to_be32 ( type->irq ); create->res = cpu_to_be64 ( dma ( &queue->res_map, queue->res ) ); create->desc = cpu_to_be64 ( dma ( &queue->desc_map, queue->desc.raw ) ); create->cmplt = cpu_to_be64 ( dma ( &queue->cmplt_map, queue->cmplt.raw ) ); create->qpl_id = cpu_to_be32 ( qpl ); create->desc_count = cpu_to_be16 ( queue->count ); create->bufsz = cpu_to_be16 ( GVE_BUF_SIZE ); create->cmplt_count = cpu_to_be16 ( queue->count ); } /** * Create transmit or receive queue * * @v gve GVE device * @v queue Descriptor queue * @ret rc Return status code */ static int gve_create_queue ( struct gve_nic *gve, struct gve_queue *queue ) { const struct gve_queue_type *type = queue->type; const struct gve_queue_stride *stride = &queue->stride; union gve_admin_command *cmd; struct gve_buffer *buf; unsigned int db_off; unsigned int evt_idx; unsigned int tag; unsigned int i; uint32_t qpl; int rc; /* Reset queue */ queue->prod = 0; queue->cons = 0; queue->done = 0; memset ( queue->desc.raw, 0, ( queue->count * stride->desc ) ); memset ( queue->cmplt.raw, 0, ( queue->count * stride->cmplt ) ); for ( i = 0 ; i < queue->fill ; i++ ) queue->tag[i] = i; /* Pre-populate descriptor offsets for in-order queues */ if ( ! ( gve->mode & GVE_MODE_DQO ) ) { buf = ( queue->desc.raw + stride->desc - sizeof ( *buf ) ); for ( i = 0 ; i < queue->count ; i++ ) { tag = ( i & ( queue->fill - 1 ) ); buf->addr = cpu_to_be64 ( gve_address ( queue, tag ) ); buf = ( ( ( void * ) buf ) + stride->desc ); } } /* Construct request */ cmd = gve_admin_command ( gve ); cmd->hdr.opcode = type->create; qpl = ( ( gve->mode & GVE_MODE_QPL ) ? type->qpl : GVE_RAW_QPL ); type->param ( queue, qpl, cmd ); /* Issue command */ if ( ( rc = gve_admin ( gve ) ) != 0 ) return rc; /* Record indices */ db_off = ( be32_to_cpu ( queue->res->db_idx ) * sizeof ( uint32_t ) ); evt_idx = be32_to_cpu ( queue->res->evt_idx ); DBGC ( gve, "GVE %p %s doorbell +%#04x event counter %d\n", gve, type->name, db_off, evt_idx ); queue->db = ( gve->db + db_off ); assert ( evt_idx < gve->events.count ); queue->event = &gve->events.event[evt_idx]; assert ( queue->event->count == 0 ); /* Unmask dummy interrupt */ pci_msix_unmask ( &gve->msix, type->irq ); /* Rearm queue interrupt if applicable */ if ( gve->mode & GVE_MODE_DQO ) writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[type->irq] ); return 0; } /** * Destroy transmit or receive queue * * @v gve GVE device * @v queue Descriptor queue * @ret rc Return status code */ static int gve_destroy_queue ( struct gve_nic *gve, struct gve_queue *queue ) { const struct gve_queue_type *type = queue->type; int rc; /* Mask dummy interrupt */ pci_msix_mask ( &gve->msix, type->irq ); /* Issue command */ if ( ( rc = gve_admin_simple ( gve, type->destroy, 0 ) ) != 0 ) return rc; return 0; } /****************************************************************************** * * Network device interface * ****************************************************************************** */ /** * Allocate shared queue resources * * @v gve GVE device * @ret rc Return status code */ static int gve_alloc_shared ( struct gve_nic *gve ) { struct dma_device *dma = gve->dma; struct gve_irqs *irqs = &gve->irqs; struct gve_events *events = &gve->events; size_t irqs_len = ( GVE_IRQ_COUNT * sizeof ( irqs->irq[0] ) ); size_t events_len = ( gve->events.count * sizeof ( events->event[0] ) ); int rc; /* Allocate interrupt channels */ irqs->irq = dma_alloc ( dma, &irqs->map, irqs_len, GVE_ALIGN ); if ( ! irqs->irq ) { rc = -ENOMEM; goto err_irqs; } DBGC ( gve, "GVE %p IRQs at [%08lx,%08lx)\n", gve, virt_to_phys ( irqs->irq ), ( virt_to_phys ( irqs->irq ) + irqs_len ) ); /* Allocate event counters */ events->event = dma_alloc ( dma, &events->map, events_len, GVE_ALIGN ); if ( ! events->event ) { rc = -ENOMEM; goto err_events; } DBGC ( gve, "GVE %p events at [%08lx,%08lx)\n", gve, virt_to_phys ( events->event ), ( virt_to_phys ( events->event ) + events_len ) ); return 0; dma_free ( &events->map, events->event, events_len ); err_events: dma_free ( &irqs->map, irqs->irq, irqs_len ); err_irqs: return rc; } /** * Free shared queue resources * * @v gve GVE device */ static void gve_free_shared ( struct gve_nic *gve ) { struct gve_irqs *irqs = &gve->irqs; struct gve_events *events = &gve->events; size_t irqs_len = ( GVE_IRQ_COUNT * sizeof ( irqs->irq[0] ) ); size_t events_len = ( gve->events.count * sizeof ( events->event[0] ) ); /* Free event counters */ dma_free ( &events->map, events->event, events_len ); /* Free interrupt channels */ dma_free ( &irqs->map, irqs->irq, irqs_len ); } /** * Allocate queue page list * * @v gve GVE device * @v qpl Queue page list * @v id Queue page list ID * @v buffers Number of data buffers * @ret rc Return status code */ static int gve_alloc_qpl ( struct gve_nic *gve, struct gve_qpl *qpl, uint32_t id, unsigned int buffers ) { size_t len; /* Record ID */ qpl->id = id; /* Calculate number of pages required */ build_assert ( GVE_BUF_SIZE <= GVE_PAGE_SIZE ); qpl->count = ( ( buffers + GVE_BUF_PER_PAGE - 1 ) / GVE_BUF_PER_PAGE ); assert ( qpl->count <= GVE_QPL_MAX ); /* Allocate pages (as a single block) */ len = ( qpl->count * GVE_PAGE_SIZE ); qpl->data = dma_umalloc ( gve->dma, &qpl->map, len, GVE_ALIGN ); if ( ! qpl->data ) return -ENOMEM; qpl->base = ( ( gve->mode == GVE_MODE_QPL ) ? 0 : dma ( &qpl->map, qpl->data ) ); DBGC ( gve, "GVE %p QPL %#08x at [%08lx,%08lx)\n", gve, qpl->id, virt_to_phys ( qpl->data ), ( virt_to_phys ( qpl->data ) + len ) ); return 0; } /** * Free queue page list * * @v gve GVE device * @v qpl Queue page list */ static void gve_free_qpl ( struct gve_nic *nic __unused, struct gve_qpl *qpl ) { size_t len = ( qpl->count * GVE_PAGE_SIZE ); /* Free pages */ dma_ufree ( &qpl->map, qpl->data, len ); } /** * Calculate next receive sequence number * * @v seq Current sequence number, or zero to start sequence * @ret next Next sequence number */ static inline __attribute__ (( always_inline )) unsigned int gve_next ( unsigned int seq ) { /* The receive completion sequence number is a modulo 7 * counter that cycles through the non-zero three-bit values 1 * to 7 inclusive. * * Since 7 is coprime to 2^n, this ensures that the sequence * number changes each time that a new completion is written * to memory. * * Since the counter takes only non-zero values, this ensures * that the sequence number changes whenever a new completion * is first written to a zero-initialised completion ring. */ seq = ( ( seq + 1 ) & GVE_GQI_RX_SEQ_MASK ); return ( seq ? seq : 1 ); } /** * Allocate descriptor queue * * @v gve GVE device * @v queue Descriptor queue * @ret rc Return status code */ static int gve_alloc_queue ( struct gve_nic *gve, struct gve_queue *queue ) { const struct gve_queue_type *type = queue->type; struct gve_queue_stride *stride = &queue->stride; struct dma_device *dma = gve->dma; size_t desc_len; size_t cmplt_len; size_t res_len; int rc; /* Sanity checks */ if ( ( queue->count == 0 ) || ( queue->count & ( queue->count - 1 ) ) ) { DBGC ( gve, "GVE %p %s invalid queue size %d\n", gve, type->name, queue->count ); rc = -EINVAL; goto err_sanity; } /* Set queue strides and calculate total lengths */ *stride = ( ( gve->mode & GVE_MODE_DQO ) ? type->stride.dqo : type->stride.gqi ); desc_len = ( queue->count * stride->desc ); cmplt_len = ( queue->count * stride->cmplt ); res_len = sizeof ( *queue->res ); /* Calculate maximum fill level */ assert ( ( type->fill & ( type->fill - 1 ) ) == 0 ); queue->fill = type->fill; if ( queue->fill > queue->count ) queue->fill = queue->count; DBGC ( gve, "GVE %p %s using QPL %#08x with %d/%d descriptors\n", gve, type->name, type->qpl, queue->fill, queue->count ); /* Allocate queue page list */ if ( ( rc = gve_alloc_qpl ( gve, &queue->qpl, type->qpl, queue->fill ) ) != 0 ) goto err_qpl; /* Allocate descriptors */ queue->desc.raw = dma_umalloc ( dma, &queue->desc_map, desc_len, GVE_ALIGN ); if ( ! queue->desc.raw ) { rc = -ENOMEM; goto err_desc; } DBGC ( gve, "GVE %p %s descriptors at [%08lx,%08lx)\n", gve, type->name, virt_to_phys ( queue->desc.raw ), ( virt_to_phys ( queue->desc.raw ) + desc_len ) ); /* Allocate completions */ if ( cmplt_len ) { queue->cmplt.raw = dma_umalloc ( dma, &queue->cmplt_map, cmplt_len, GVE_ALIGN ); if ( ! queue->cmplt.raw ) { rc = -ENOMEM; goto err_cmplt; } DBGC ( gve, "GVE %p %s completions at [%08lx,%08lx)\n", gve, type->name, virt_to_phys ( queue->cmplt.raw ), ( virt_to_phys ( queue->cmplt.raw ) + cmplt_len ) ); } /* Allocate queue resources */ queue->res = dma_alloc ( dma, &queue->res_map, res_len, GVE_ALIGN ); if ( ! queue->res ) { rc = -ENOMEM; goto err_res; } memset ( queue->res, 0, res_len ); return 0; dma_free ( &queue->res_map, queue->res, res_len ); err_res: if ( cmplt_len ) dma_ufree ( &queue->cmplt_map, queue->cmplt.raw, cmplt_len ); err_cmplt: dma_ufree ( &queue->desc_map, queue->desc.raw, desc_len ); err_desc: gve_free_qpl ( gve, &queue->qpl ); err_qpl: err_sanity: return rc; } /** * Free descriptor queue * * @v gve GVE device * @v queue Descriptor queue */ static void gve_free_queue ( struct gve_nic *gve, struct gve_queue *queue ) { const struct gve_queue_stride *stride = &queue->stride; size_t desc_len = ( queue->count * stride->desc ); size_t cmplt_len = ( queue->count * stride->cmplt ); size_t res_len = sizeof ( *queue->res ); /* Free queue resources */ dma_free ( &queue->res_map, queue->res, res_len ); /* Free completions, if applicable */ if ( cmplt_len ) dma_ufree ( &queue->cmplt_map, queue->cmplt.raw, cmplt_len ); /* Free descriptors */ dma_ufree ( &queue->desc_map, queue->desc.raw, desc_len ); /* Free queue page list */ gve_free_qpl ( gve, &queue->qpl ); } /** * Cancel any pending transmissions * * @v gve GVE device */ static void gve_cancel_tx ( struct gve_nic *gve ) { struct net_device *netdev = gve->netdev; struct io_buffer *iobuf; unsigned int i; /* Cancel any pending transmissions */ for ( i = 0 ; i < ( sizeof ( gve->tx_iobuf ) / sizeof ( gve->tx_iobuf[0] ) ) ; i++ ) { iobuf = gve->tx_iobuf[i]; gve->tx_iobuf[i] = NULL; if ( iobuf ) netdev_tx_complete_err ( netdev, iobuf, -ECANCELED ); } } /** * Start up device * * @v gve GVE device * @ret rc Return status code */ static int gve_start ( struct gve_nic *gve ) { struct gve_queue *tx = &gve->tx; struct gve_queue *rx = &gve->rx; int rc; /* Cancel any pending transmissions */ gve_cancel_tx ( gve ); /* Reset receive sequence */ gve->seq = gve_next ( 0 ); /* Configure device resources */ if ( ( rc = gve_configure ( gve ) ) != 0 ) goto err_configure; /* Register transmit queue page list */ if ( ( rc = gve_register ( gve, &tx->qpl ) ) != 0 ) goto err_register_tx; /* Register receive queue page list */ if ( ( rc = gve_register ( gve, &rx->qpl ) ) != 0 ) goto err_register_rx; /* Create transmit queue */ if ( ( rc = gve_create_queue ( gve, tx ) ) != 0 ) goto err_create_tx; /* Create receive queue */ if ( ( rc = gve_create_queue ( gve, rx ) ) != 0 ) goto err_create_rx; return 0; gve_destroy_queue ( gve, rx ); err_create_rx: gve_destroy_queue ( gve, tx ); err_create_tx: gve_unregister ( gve, &rx->qpl ); err_register_rx: gve_unregister ( gve, &tx->qpl ); err_register_tx: gve_deconfigure ( gve ); err_configure: return rc; } /** * Stop device * * @v gve GVE device */ static void gve_stop ( struct gve_nic *gve ) { struct gve_queue *tx = &gve->tx; struct gve_queue *rx = &gve->rx; /* Destroy queues */ gve_destroy_queue ( gve, rx ); gve_destroy_queue ( gve, tx ); /* Unregister page lists */ gve_unregister ( gve, &rx->qpl ); gve_unregister ( gve, &tx->qpl ); /* Deconfigure device */ gve_deconfigure ( gve ); } /** * Device startup process * * @v gve GVE device */ static void gve_startup ( struct gve_nic *gve ) { struct net_device *netdev = gve->netdev; int rc; /* Reset device */ if ( ( rc = gve_reset ( gve ) ) != 0 ) goto err_reset; /* Enable admin queue */ gve_admin_enable ( gve ); /* Start device */ if ( ( rc = gve_start ( gve ) ) != 0 ) goto err_start; /* Reset retry count */ gve->retries = 0; /* (Ab)use link status to report startup status */ netdev_link_up ( netdev ); return; gve_stop ( gve ); err_start: err_reset: DBGC ( gve, "GVE %p startup failed: %s\n", gve, strerror ( rc ) ); netdev_link_err ( netdev, rc ); if ( gve->retries++ < GVE_RESET_MAX_RETRY ) process_add ( &gve->startup ); } /** * Trigger startup process * * @v gve GVE device */ static void gve_restart ( struct gve_nic *gve ) { struct net_device *netdev = gve->netdev; /* Mark link down to inhibit polling and transmit activity */ netdev_link_down ( netdev ); /* Schedule startup process */ process_add ( &gve->startup ); } /** * Reset recovery watchdog * * @v timer Reset recovery watchdog timer * @v over Failure indicator */ static void gve_watchdog ( struct retry_timer *timer, int over __unused ) { struct gve_nic *gve = container_of ( timer, struct gve_nic, watchdog ); uint32_t activity; uint32_t pfn; int rc; /* Reschedule watchdog */ start_timer_fixed ( &gve->watchdog, GVE_WATCHDOG_TIMEOUT ); /* Reset device (for test purposes) if applicable */ if ( ( rc = inject_fault ( VM_MIGRATED_RATE ) ) != 0 ) { DBGC ( gve, "GVE %p synthesising host reset\n", gve ); writel ( 0, gve->cfg + GVE_CFG_ADMIN_PFN ); } /* Check for activity since last timer invocation */ activity = ( gve->tx.cons + gve->rx.cons ); if ( activity != gve->activity ) { gve->activity = activity; return; } /* Check for reset */ pfn = readl ( gve->cfg + GVE_CFG_ADMIN_PFN ); if ( pfn ) { DBGC2 ( gve, "GVE %p idle but not in reset\n", gve ); return; } /* Schedule restart */ DBGC ( gve, "GVE %p watchdog detected reset by host\n", gve ); gve_restart ( gve ); } /** * Open network device * * @v netdev Network device * @ret rc Return status code */ static int gve_open ( struct net_device *netdev ) { struct gve_nic *gve = netdev->priv; struct gve_queue *tx = &gve->tx; struct gve_queue *rx = &gve->rx; int rc; /* Allocate shared queue resources */ if ( ( rc = gve_alloc_shared ( gve ) ) != 0 ) goto err_alloc_shared; /* Allocate and prepopulate transmit queue */ if ( ( rc = gve_alloc_queue ( gve, tx ) ) != 0 ) goto err_alloc_tx; /* Allocate and prepopulate receive queue */ if ( ( rc = gve_alloc_queue ( gve, rx ) ) != 0 ) goto err_alloc_rx; /* Trigger startup */ gve_restart ( gve ); /* Start reset recovery watchdog timer */ start_timer_fixed ( &gve->watchdog, GVE_WATCHDOG_TIMEOUT ); return 0; gve_free_queue ( gve, rx ); err_alloc_rx: gve_free_queue ( gve, tx ); err_alloc_tx: gve_free_shared ( gve ); err_alloc_shared: return rc; } /** * Close network device * * @v netdev Network device */ static void gve_close ( struct net_device *netdev ) { struct gve_nic *gve = netdev->priv; struct gve_queue *tx = &gve->tx; struct gve_queue *rx = &gve->rx; /* Stop reset recovery timer */ stop_timer ( &gve->watchdog ); /* Terminate startup process */ process_del ( &gve->startup ); /* Stop and reset device */ gve_stop ( gve ); gve_reset ( gve ); /* Cancel any pending transmissions */ gve_cancel_tx ( gve ); /* Free queues */ gve_free_queue ( gve, rx ); gve_free_queue ( gve, tx ); /* Free shared queue resources */ gve_free_shared ( gve ); } /** * Transmit packet * * @v netdev Network device * @v iobuf I/O buffer * @ret rc Return status code */ static int gve_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct gve_nic *gve = netdev->priv; struct gve_queue *tx = &gve->tx; struct gve_gqi_tx_descriptor *gqi; struct gve_dqo_tx_descriptor *dqo; unsigned int count; unsigned int index; unsigned int tag; unsigned int chain; uint32_t doorbell; size_t frag_len; size_t offset; size_t next; size_t len; /* Do nothing if queues are not yet set up */ if ( ! netdev_link_ok ( netdev ) ) return -ENETDOWN; /* Defer packet if there is no space in the transmit ring */ len = iob_len ( iobuf ); count = ( ( len + GVE_BUF_SIZE - 1 ) / GVE_BUF_SIZE ); if ( ( ( tx->prod - tx->cons ) + count ) > tx->fill ) { netdev_tx_defer ( netdev, iobuf ); return 0; } /* Copy packet to queue pages and populate descriptors */ for ( offset = 0, chain = 0 ; ; offset = next, chain = tag ) { /* Identify next available buffer */ index = ( tx->prod++ & ( tx->count - 1 ) ); tag = tx->tag[ index % GVE_TX_FILL ]; /* Sanity check */ assert ( gve->tx_iobuf[tag] == NULL ); /* Copy packet fragment */ frag_len = ( len - offset ); if ( frag_len > GVE_BUF_SIZE ) frag_len = GVE_BUF_SIZE; memcpy ( gve_buffer ( tx, tag ), ( iobuf->data + offset ), frag_len ); next = ( offset + frag_len ); /* Populate descriptor */ if ( gve->mode & GVE_MODE_DQO ) { /* Out-of-order descriptor */ dqo = &tx->desc.tx.dqo[index]; dqo->buf.addr = cpu_to_le64 ( gve_address ( tx, tag ) ); if ( next == len ) { dqo->type = ( GVE_DQO_TX_TYPE_PACKET | GVE_DQO_TX_TYPE_LAST ); dqo->tag.id = tag; dqo->tag.count = count; } else { dqo->type = GVE_DQO_TX_TYPE_PACKET; dqo->tag.id = 0; dqo->tag.count = 0; } dqo->len = cpu_to_le16 ( frag_len ); gve->tx_chain[tag] = chain; } else { /* In-order descriptor */ gqi = &tx->desc.tx.gqi[index]; if ( offset ) { gqi->type = GVE_GQI_TX_TYPE_CONT; gqi->count = 0; gqi->total = 0; } else { gqi->type = GVE_GQI_TX_TYPE_START; gqi->count = count; gqi->total = cpu_to_be16 ( len ); } gqi->len = cpu_to_be16 ( frag_len ); } DBGC2 ( gve, "GVE %p TXD %#04x %#02x:%#02x len %#04zx/%#04zx " "at %#08lx\n", gve, index, tag, count, frag_len, len, gve_address ( tx, tag ) ); /* Record I/O buffer against final descriptor */ if ( next == len ) { gve->tx_iobuf[tag] = iobuf; break; } } assert ( ( tx->prod - tx->cons ) <= tx->fill ); /* Ring doorbell */ doorbell = tx->prod; if ( gve->mode & GVE_MODE_DQO ) { doorbell &= ( tx->count - 1 ); } else { doorbell = bswap_32 ( doorbell ); } wmb(); writel ( doorbell, tx->db ); return 0; } /** * Poll for completed transmissions * * @v netdev Network device */ static void gve_poll_tx ( struct net_device *netdev ) { struct gve_nic *gve = netdev->priv; struct gve_queue *tx = &gve->tx; struct gve_dqo_tx_completion *dqo; struct io_buffer *iobuf; unsigned int index; unsigned int gen; unsigned int bit; unsigned int tag; uint32_t count; /* Process transmit completions */ if ( gve->mode & GVE_MODE_DQO ) { /* Out-of-order completions */ while ( 1 ) { /* Read next possible completion */ gen = ( tx->done & tx->count ); index = ( tx->done & ( tx->count - 1 ) ); dqo = &tx->cmplt.tx.dqo[index]; /* Check generation bit */ bit = ( dqo->flags & GVE_DQO_TXF_GEN ); if ( ( !! bit ) == ( !! gen ) ) break; rmb(); tx->done++; /* Ignore non-packet completions */ if ( ( ! ( dqo->flags & GVE_DQO_TXF_PKT ) ) || ( dqo->tag.count < 0 ) ) { DBGC2 ( gve, "GVE %p TXC %#04x flags %#02x " "ignored\n", gve, index, dqo->flags ); continue; } /* Parse completion */ tag = dqo->tag.id; count = dqo->tag.count; iobuf = gve->tx_iobuf[tag]; gve->tx_iobuf[tag] = NULL; assert ( iobuf != NULL ); /* Return completed descriptors to ring */ while ( count-- ) { DBGC2 ( gve, "GVE %p TXC %#04x %#02x:%#02x " "complete\n", gve, index, tag, dqo->tag.count ); tx->tag[ tx->cons++ % GVE_TX_FILL ] = tag; tag = gve->tx_chain[tag]; } /* Hand off to network stack */ if ( iobuf ) netdev_tx_complete ( netdev, iobuf ); } } else { /* Read event counter */ count = be32_to_cpu ( tx->event->count ); /* Process transmit completions */ while ( count != tx->cons ) { DBGC2 ( gve, "GVE %p TXC %#04x complete\n", gve, tx->cons ); tag = ( tx->cons % GVE_TX_FILL ); iobuf = gve->tx_iobuf[tag]; gve->tx_iobuf[tag] = NULL; tx->cons++; if ( iobuf ) netdev_tx_complete ( netdev, iobuf ); } } } /** * Poll for received packets * * @v netdev Network device */ static void gve_poll_rx ( struct net_device *netdev ) { struct gve_nic *gve = netdev->priv; struct gve_queue *rx = &gve->rx; struct gve_gqi_rx_completion *gqi; struct gve_dqo_rx_completion *dqo; struct io_buffer *iobuf; unsigned int index; unsigned int gen; unsigned int bit; unsigned int seq; unsigned int tag; uint32_t done; size_t total; size_t len; int rc; /* Process receive completions */ done = rx->done; seq = gve->seq; total = 0; while ( 1 ) { /* Read next possible completion */ rc = 0; gen = ( done & rx->count ); index = ( done++ & ( rx->count - 1 ) ); if ( gve->mode & GVE_MODE_DQO ) { /* Out-of-order completion */ dqo = &rx->cmplt.rx.dqo[index]; /* Check generation bit */ bit = ( dqo->len & cpu_to_le16 ( GVE_DQO_RXL_GEN ) ); if ( ( !! bit ) == ( !! gen ) ) break; rmb(); /* Parse completion */ len = ( le16_to_cpu ( dqo->len ) & ( GVE_BUF_SIZE - 1 ) ); tag = dqo->tag; DBGC2 ( gve, "GVE %p RXC %#04x %#02x:%#02x len %#04zx " "at %#08zx\n", gve, index, tag, dqo->flags, len, gve_offset ( rx, tag ) ); /* Accumulate a complete packet */ if ( dqo->status & GVE_DQO_RXS_ERROR ) { rc = -EIO; total = 0; } else { total += len; if ( ! ( dqo->flags & GVE_DQO_RXF_LAST ) ) continue; } } else { /* In-order completion */ gqi = &rx->cmplt.rx.gqi[index]; /* Check sequence number */ if ( ( gqi->seq & GVE_GQI_RX_SEQ_MASK ) != seq ) break; rmb(); seq = gve_next ( seq ); /* Parse completion */ len = be16_to_cpu ( gqi->len ); tag = ( index % GVE_RX_FILL ); DBGC2 ( gve, "GVE %p RXC %#04x %#02x:%#02x len %#04zx " "at %#08zx\n", gve, index, gqi->seq, gqi->flags, len, gve_offset ( rx, tag ) ); /* Accumulate a complete packet */ if ( gqi->flags & GVE_GQI_RXF_ERROR ) { rc = -EIO; total = 0; } else { total += len; if ( gqi->flags & GVE_GQI_RXF_MORE ) continue; } gve->seq = seq; } /* Allocate and populate I/O buffer */ iobuf = ( total ? alloc_iob ( total ) : NULL ); for ( ; rx->done != done ; rx->done++ ) { /* Re-read completion and return tag to ring */ index = ( rx->done & ( rx->count - 1 ) ); if ( gve->mode & GVE_MODE_DQO ) { dqo = &rx->cmplt.rx.dqo[index]; tag = dqo->tag; len = ( le16_to_cpu ( dqo->len ) & ( GVE_BUF_SIZE - 1 ) ); rx->tag[ rx->cons++ % GVE_RX_FILL ] = tag; } else { gqi = &rx->cmplt.rx.gqi[index]; tag = ( index % GVE_RX_FILL ); len = be16_to_cpu ( gqi->len ); assert ( rx->cons == rx->done ); rx->cons++; } /* Copy data */ if ( iobuf ) { memcpy ( iob_put ( iobuf, len ), gve_buffer ( rx, tag ), len ); } } assert ( ( iobuf == NULL ) || ( iob_len ( iobuf ) == total ) ); total = 0; /* Hand off packet to network stack */ if ( iobuf ) { if ( ! ( gve->mode & GVE_MODE_DQO ) ) iob_pull ( iobuf, GVE_GQI_RX_PAD ); netdev_rx ( netdev, iobuf ); } else { netdev_rx_err ( netdev, NULL, ( rc ? rc : -ENOMEM ) ); } } } /** * Refill receive queue * * @v netdev Network device */ static void gve_refill_rx ( struct net_device *netdev ) { struct gve_nic *gve = netdev->priv; struct gve_queue *rx = &gve->rx; struct gve_dqo_rx_descriptor *dqo; unsigned int refill; unsigned int index; unsigned int tag; uint32_t doorbell; /* Calculate refill quantity */ doorbell = ( rx->cons + rx->fill ); refill = ( doorbell - rx->prod ); if ( ! refill ) return; /* Refill ring */ if ( gve->mode & GVE_MODE_DQO ) { /* Out-of-order descriptors */ while ( refill-- ) { /* Identify next available buffer */ index = ( rx->prod++ & ( rx->count - 1 ) ); tag = rx->tag[ index % GVE_RX_FILL ]; /* Populate descriptor */ dqo = &rx->desc.rx.dqo[index]; dqo->tag = tag; dqo->buf.addr = cpu_to_le64 ( gve_address ( rx, tag ) ); DBGC2 ( gve, "GVE %p RXD %#04x:%#02x at %#08llx\n", gve, index, dqo->tag, ( ( unsigned long long ) le64_to_cpu ( dqo->buf.addr ) ) ); } wmb(); assert ( rx->prod == doorbell ); } else { /* The in-order receive descriptors are prepopulated * at the time of creating the receive queue (pointing * to the preallocated queue pages). Refilling is * therefore just a case of ringing the doorbell if * the device is not yet aware of any available * descriptors. */ rx->prod += refill; assert ( rx->prod == doorbell ); DBGC2 ( gve, "GVE %p RXD %#04x ready\n", gve, rx->prod ); /* Doorbell is big-endian */ doorbell = bswap_32 ( doorbell ); } /* Ring doorbell */ writel ( doorbell, rx->db ); } /** * Poll for completed and received packets * * @v netdev Network device */ static void gve_poll ( struct net_device *netdev ) { struct gve_nic *gve = netdev->priv; /* Do nothing if queues are not yet set up */ if ( ! netdev_link_ok ( netdev ) ) return; /* Poll for transmit completions */ gve_poll_tx ( netdev ); /* Poll for receive completions */ gve_poll_rx ( netdev ); /* Refill receive queue */ gve_refill_rx ( netdev ); /* Rearm queue interrupts if applicable */ if ( gve->mode & GVE_MODE_DQO ) { writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[GVE_TX_IRQ] ); writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[GVE_RX_IRQ] ); } } /** GVE network device operations */ static struct net_device_operations gve_operations = { .open = gve_open, .close = gve_close, .transmit = gve_transmit, .poll = gve_poll, }; /****************************************************************************** * * PCI interface * ****************************************************************************** */ /** Transmit descriptor queue type */ static const struct gve_queue_type gve_tx_type = { .name = "TX", .param = gve_create_tx_param, .qpl = GVE_TX_QPL, .irq = GVE_TX_IRQ, .fill = GVE_TX_FILL, .stride = { .gqi = { .desc = sizeof ( struct gve_gqi_tx_descriptor ), }, .dqo = { .desc = sizeof ( struct gve_dqo_tx_descriptor ), .cmplt = sizeof ( struct gve_dqo_tx_completion ), }, }, .create = GVE_ADMIN_CREATE_TX, .destroy = GVE_ADMIN_DESTROY_TX, }; /** Receive descriptor queue type */ static const struct gve_queue_type gve_rx_type = { .name = "RX", .param = gve_create_rx_param, .qpl = GVE_RX_QPL, .irq = GVE_RX_IRQ, .fill = GVE_RX_FILL, .stride = { .gqi = { .desc = sizeof ( struct gve_gqi_rx_descriptor ), .cmplt = sizeof ( struct gve_gqi_rx_completion ), }, .dqo = { .desc = sizeof ( struct gve_dqo_rx_descriptor ), .cmplt = sizeof ( struct gve_dqo_rx_completion ), }, }, .create = GVE_ADMIN_CREATE_RX, .destroy = GVE_ADMIN_DESTROY_RX, }; /** * Set up admin queue and get device description * * @v gve GVE device * @ret rc Return status code */ static int gve_setup ( struct gve_nic *gve ) { unsigned int i; int rc; /* Attempt several times, since the device may decide to add * in a few spurious resets. */ for ( i = 0 ; i < GVE_RESET_MAX_RETRY ; i++ ) { /* Reset device */ if ( ( rc = gve_reset ( gve ) ) != 0 ) continue; /* Enable admin queue */ gve_admin_enable ( gve ); /* Fetch MAC address */ if ( ( rc = gve_describe ( gve ) ) != 0 ) continue; /* Success */ return 0; } DBGC ( gve, "GVE %p failed to get device description: %s\n", gve, strerror ( rc ) ); return rc; } /** Device startup process descriptor */ static struct process_descriptor gve_startup_desc = PROC_DESC_ONCE ( struct gve_nic, startup, gve_startup ); /** * Probe PCI device * * @v pci PCI device * @ret rc Return status code */ static int gve_probe ( struct pci_device *pci ) { struct net_device *netdev; struct gve_nic *gve; unsigned long cfg_start; unsigned long db_start; unsigned long db_size; int rc; /* Allocate and initialise net device */ netdev = alloc_etherdev ( sizeof ( *gve ) ); if ( ! netdev ) { rc = -ENOMEM; goto err_alloc; } netdev_init ( netdev, &gve_operations ); gve = netdev->priv; pci_set_drvdata ( pci, netdev ); netdev->dev = &pci->dev; memset ( gve, 0, sizeof ( *gve ) ); gve->netdev = netdev; gve->tx.type = &gve_tx_type; gve->rx.type = &gve_rx_type; gve->tx.tag = gve->tx_tag; gve->rx.tag = gve->rx_tag; process_init_stopped ( &gve->startup, &gve_startup_desc, &netdev->refcnt ); timer_init ( &gve->watchdog, gve_watchdog, &netdev->refcnt ); /* Fix up PCI device */ adjust_pci_device ( pci ); /* Check PCI revision */ pci_read_config_byte ( pci, PCI_REVISION, &gve->revision ); DBGC ( gve, "GVE %p is revision %#02x\n", gve, gve->revision ); /* Map configuration registers */ cfg_start = pci_bar_start ( pci, GVE_CFG_BAR ); gve->cfg = pci_ioremap ( pci, cfg_start, GVE_CFG_SIZE ); if ( ! gve->cfg ) { rc = -ENODEV; goto err_cfg; } /* Map doorbell registers */ db_start = pci_bar_start ( pci, GVE_DB_BAR ); db_size = pci_bar_size ( pci, GVE_DB_BAR ); gve->db = pci_ioremap ( pci, db_start, db_size ); if ( ! gve->db ) { rc = -ENODEV; goto err_db; } /* Configure DMA */ gve->dma = &pci->dma; dma_set_mask_64bit ( gve->dma ); assert ( netdev->dma == NULL ); /* Configure dummy MSI-X interrupt */ if ( ( rc = pci_msix_enable ( pci, &gve->msix ) ) != 0 ) goto err_msix; /* Allocate admin queue */ if ( ( rc = gve_admin_alloc ( gve ) ) != 0 ) goto err_admin; /* Set up the device */ if ( ( rc = gve_setup ( gve ) ) != 0 ) goto err_setup; /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; return 0; unregister_netdev ( netdev ); err_register_netdev: err_setup: gve_reset ( gve ); gve_admin_free ( gve ); err_admin: pci_msix_disable ( pci, &gve->msix ); err_msix: iounmap ( gve->db ); err_db: iounmap ( gve->cfg ); err_cfg: netdev_nullify ( netdev ); netdev_put ( netdev ); err_alloc: return rc; } /** * Remove PCI device * * @v pci PCI device */ static void gve_remove ( struct pci_device *pci ) { struct net_device *netdev = pci_get_drvdata ( pci ); struct gve_nic *gve = netdev->priv; /* Unregister network device */ unregister_netdev ( netdev ); /* Reset device */ gve_reset ( gve ); /* Free admin queue */ gve_admin_free ( gve ); /* Disable dummy MSI-X interrupt */ pci_msix_disable ( pci, &gve->msix ); /* Unmap registers */ iounmap ( gve->db ); iounmap ( gve->cfg ); /* Free network device */ netdev_nullify ( netdev ); netdev_put ( netdev ); } /** GVE PCI device IDs */ static struct pci_device_id gve_nics[] = { PCI_ROM ( 0x1ae0, 0x0042, "gve", "gVNIC", 0 ), }; /** GVE PCI driver */ struct pci_driver gve_driver __pci_driver = { .ids = gve_nics, .id_count = ( sizeof ( gve_nics ) / sizeof ( gve_nics[0] ) ), .probe = gve_probe, .remove = gve_remove, };