/* * (c) Copyright 2010 Stefan Hajnoczi * * based on the Etherboot virtio-net driver * * (c) Copyright 2008 Bull S.A.S. * * Author: Laurent Vivier * * some parts from Linux Virtio PCI driver * * Copyright IBM Corp. 2007 * Authors: Anthony Liguori * * some parts from Linux Virtio Ring * * Copyright Rusty Russell IBM Corporation 2007 * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); #include #include #include #include #include #include #include #include #include #include #include #include "virtio-net.h" /* * Virtio network device driver * * Specification: * http://ozlabs.org/~rusty/virtio-spec/ * * The virtio network device is supported by Linux virtualization software * including QEMU/KVM and lguest. This driver supports the virtio over PCI * transport; virtual machines have one virtio-net PCI adapter per NIC. * * Virtio-net is different from hardware NICs because virtio devices * communicate with the hypervisor via virtqueues, not traditional descriptor * rings. Virtqueues are unordered queues, they support add_buf() and * get_buf() operations. To transmit a packet, the driver has to add the * packet buffer onto the virtqueue. To receive a packet, the driver must * first add an empty buffer to the virtqueue and then get the filled packet * buffer on completion. * * Virtqueues are an abstraction that is commonly implemented using the vring * descriptor ring layout. The vring is the actual shared memory structure * that allows the virtual machine to communicate buffers with the hypervisor. * Because the vring layout is optimized for flexibility and performance rather * than space, it is heavy-weight and allocated like traditional descriptor * rings in the open() function of the driver and not in probe(). * * There is no true interrupt enable/disable. Virtqueues have callback * enable/disable flags but these are only hints. The hypervisor may still * raise an interrupt. Nevertheless, this driver disables callbacks in the * hopes of avoiding interrupts. */ /* Driver types are declared here so virtio-net.h can be easily synced with its * Linux source. */ /* Virtqueue indices */ enum { RX_INDEX = 0, TX_INDEX, QUEUE_NB }; /** Max number of pending rx packets */ #define NUM_RX_BUF 8 struct virtnet_nic { /** Base pio register address */ unsigned long ioaddr; /** 0 for legacy, 1 for virtio 1.0 */ int virtio_version; /** Virtio 1.0 device data */ struct virtio_pci_modern_device vdev; /** RX/TX virtqueues */ struct vring_virtqueue *virtqueue; /** RX packets handed to the NIC waiting to be filled in */ struct list_head rx_iobufs; /** Pending rx packet count */ unsigned int rx_num_iobufs; /** Virtio net dummy packet headers */ struct virtio_net_hdr_modern empty_header[QUEUE_NB]; }; /** Add an iobuf to a virtqueue * * @v netdev Network device * @v vq_idx Virtqueue index (RX_INDEX or TX_INDEX) * @v iobuf I/O buffer * * The virtqueue is kicked after the iobuf has been added. */ static void virtnet_enqueue_iob ( struct net_device *netdev, int vq_idx, struct io_buffer *iobuf ) { struct virtnet_nic *virtnet = netdev->priv; struct vring_virtqueue *vq = &virtnet->virtqueue[vq_idx]; struct virtio_net_hdr_modern *header = &virtnet->empty_header[vq_idx]; unsigned int out = ( vq_idx == TX_INDEX ) ? 2 : 0; unsigned int in = ( vq_idx == TX_INDEX ) ? 0 : 2; size_t header_len = ( virtnet->virtio_version ? sizeof ( *header ) : sizeof ( header->legacy ) ); struct vring_list list[] = { { /* Share a single zeroed virtio net header between all * packets in a ring. This works because this driver * does not use any advanced features so none of the * header fields get used. * * Some host implementations (notably Google Compute * Platform) are known to unconditionally write back * to header->flags for received packets. Work around * this by using separate RX and TX headers. */ .addr = ( char* ) header, .length = header_len, }, { .addr = ( char* ) iobuf->data, .length = iob_len ( iobuf ), }, }; DBGC2 ( virtnet, "VIRTIO-NET %p enqueuing iobuf %p on vq %d\n", virtnet, iobuf, vq_idx ); vring_add_buf ( vq, list, out, in, iobuf, 0 ); vring_kick ( virtnet->virtio_version ? &virtnet->vdev : NULL, virtnet->ioaddr, vq, 1 ); } /** Try to keep rx virtqueue filled with iobufs * * @v netdev Network device */ static void virtnet_refill_rx_virtqueue ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; size_t len = ( netdev->max_pkt_len + 4 /* VLAN */ ); while ( virtnet->rx_num_iobufs < NUM_RX_BUF ) { struct io_buffer *iobuf; /* Try to allocate a buffer, stop for now if out of memory */ iobuf = alloc_iob ( len ); if ( ! iobuf ) break; /* Keep track of iobuf so close() can free it */ list_add ( &iobuf->list, &virtnet->rx_iobufs ); /* Mark packet length until we know the actual size */ iob_put ( iobuf, len ); virtnet_enqueue_iob ( netdev, RX_INDEX, iobuf ); virtnet->rx_num_iobufs++; } } /** Helper to free all virtqueue memory * * @v netdev Network device */ static void virtnet_free_virtqueues ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; int i; for ( i = 0; i < QUEUE_NB; i++ ) { virtio_pci_unmap_capability ( &virtnet->virtqueue[i].notification ); vp_free_vq ( &virtnet->virtqueue[i] ); } free ( virtnet->virtqueue ); virtnet->virtqueue = NULL; } /** Open network device, legacy virtio 0.9.5 * * @v netdev Network device * @ret rc Return status code */ static int virtnet_open_legacy ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; unsigned long ioaddr = virtnet->ioaddr; u32 features; int i; /* Reset for sanity */ vp_reset ( ioaddr ); /* Allocate virtqueues */ virtnet->virtqueue = zalloc ( QUEUE_NB * sizeof ( *virtnet->virtqueue ) ); if ( ! virtnet->virtqueue ) return -ENOMEM; /* Initialize rx/tx virtqueues */ for ( i = 0; i < QUEUE_NB; i++ ) { if ( vp_find_vq ( ioaddr, i, &virtnet->virtqueue[i] ) == -1 ) { DBGC ( virtnet, "VIRTIO-NET %p cannot register queue %d\n", virtnet, i ); virtnet_free_virtqueues ( netdev ); return -ENOENT; } } /* Initialize rx packets */ INIT_LIST_HEAD ( &virtnet->rx_iobufs ); virtnet->rx_num_iobufs = 0; virtnet_refill_rx_virtqueue ( netdev ); /* Disable interrupts before starting */ netdev_irq ( netdev, 0 ); /* Driver is ready */ features = vp_get_features ( ioaddr ); vp_set_features ( ioaddr, features & ( ( 1 << VIRTIO_NET_F_MAC ) | ( 1 << VIRTIO_NET_F_MTU ) ) ); vp_set_status ( ioaddr, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK ); return 0; } /** Open network device, modern virtio 1.0 * * @v netdev Network device * @ret rc Return status code */ static int virtnet_open_modern ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; u64 features; u8 status; /* Negotiate features */ features = vpm_get_features ( &virtnet->vdev ); if ( ! ( features & VIRTIO_F_VERSION_1 ) ) { vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); return -EINVAL; } vpm_set_features ( &virtnet->vdev, features & ( ( 1ULL << VIRTIO_NET_F_MAC ) | ( 1ULL << VIRTIO_NET_F_MTU ) | ( 1ULL << VIRTIO_F_VERSION_1 ) | ( 1ULL << VIRTIO_F_ANY_LAYOUT ) | ( 1ULL << VIRTIO_F_IOMMU_PLATFORM ) ) ); vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FEATURES_OK ); status = vpm_get_status ( &virtnet->vdev ); if ( ! ( status & VIRTIO_CONFIG_S_FEATURES_OK ) ) { DBGC ( virtnet, "VIRTIO-NET %p device didn't accept features\n", virtnet ); vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); return -EINVAL; } /* Allocate virtqueues */ virtnet->virtqueue = zalloc ( QUEUE_NB * sizeof ( *virtnet->virtqueue ) ); if ( ! virtnet->virtqueue ) { vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); return -ENOMEM; } /* Initialize rx/tx virtqueues */ if ( vpm_find_vqs ( &virtnet->vdev, QUEUE_NB, virtnet->virtqueue ) ) { DBGC ( virtnet, "VIRTIO-NET %p cannot register queues\n", virtnet ); virtnet_free_virtqueues ( netdev ); vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); return -ENOENT; } /* Disable interrupts before starting */ netdev_irq ( netdev, 0 ); vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER_OK ); /* Initialize rx packets */ INIT_LIST_HEAD ( &virtnet->rx_iobufs ); virtnet->rx_num_iobufs = 0; virtnet_refill_rx_virtqueue ( netdev ); return 0; } /** Open network device * * @v netdev Network device * @ret rc Return status code */ static int virtnet_open ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; if ( virtnet->virtio_version ) { return virtnet_open_modern ( netdev ); } else { return virtnet_open_legacy ( netdev ); } } /** Close network device * * @v netdev Network device */ static void virtnet_close ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; struct io_buffer *iobuf; struct io_buffer *next_iobuf; if ( virtnet->virtio_version ) { vpm_reset ( &virtnet->vdev ); } else { vp_reset ( virtnet->ioaddr ); } /* Virtqueues can be freed now that NIC is reset */ virtnet_free_virtqueues ( netdev ); /* Free rx iobufs */ list_for_each_entry_safe ( iobuf, next_iobuf, &virtnet->rx_iobufs, list ) { free_iob ( iobuf ); } INIT_LIST_HEAD ( &virtnet->rx_iobufs ); virtnet->rx_num_iobufs = 0; } /** Transmit packet * * @v netdev Network device * @v iobuf I/O buffer * @ret rc Return status code */ static int virtnet_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { virtnet_enqueue_iob ( netdev, TX_INDEX, iobuf ); return 0; } /** Complete packet transmission * * @v netdev Network device */ static void virtnet_process_tx_packets ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; struct vring_virtqueue *tx_vq = &virtnet->virtqueue[TX_INDEX]; while ( vring_more_used ( tx_vq ) ) { struct io_buffer *iobuf = vring_get_buf ( tx_vq, NULL ); DBGC2 ( virtnet, "VIRTIO-NET %p tx complete iobuf %p\n", virtnet, iobuf ); netdev_tx_complete ( netdev, iobuf ); } } /** Complete packet reception * * @v netdev Network device */ static void virtnet_process_rx_packets ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; struct vring_virtqueue *rx_vq = &virtnet->virtqueue[RX_INDEX]; while ( vring_more_used ( rx_vq ) ) { unsigned int len; struct io_buffer *iobuf = vring_get_buf ( rx_vq, &len ); /* Release ownership of iobuf */ list_del ( &iobuf->list ); virtnet->rx_num_iobufs--; /* Update iobuf length */ iob_unput ( iobuf, iob_len ( iobuf ) ); iob_put ( iobuf, len - sizeof ( struct virtio_net_hdr ) ); DBGC2 ( virtnet, "VIRTIO-NET %p rx complete iobuf %p len %zd\n", virtnet, iobuf, iob_len ( iobuf ) ); /* Pass completed packet to the network stack */ netdev_rx ( netdev, iobuf ); } virtnet_refill_rx_virtqueue ( netdev ); } /** Poll for completed and received packets * * @v netdev Network device */ static void virtnet_poll ( struct net_device *netdev ) { struct virtnet_nic *virtnet = netdev->priv; /* Acknowledge interrupt. This is necessary for UNDI operation and * interrupts that are raised despite VRING_AVAIL_F_NO_INTERRUPT being * set (that flag is just a hint and the hypervisor does not have to * honor it). */ if ( virtnet->virtio_version ) { vpm_get_isr ( &virtnet->vdev ); } else { vp_get_isr ( virtnet->ioaddr ); } virtnet_process_tx_packets ( netdev ); virtnet_process_rx_packets ( netdev ); } /** Enable or disable interrupts * * @v netdev Network device * @v enable Interrupts should be enabled */ static void virtnet_irq ( struct net_device *netdev, int enable ) { struct virtnet_nic *virtnet = netdev->priv; int i; for ( i = 0; i < QUEUE_NB; i++ ) { if ( enable ) vring_enable_cb ( &virtnet->virtqueue[i] ); else vring_disable_cb ( &virtnet->virtqueue[i] ); } } /** virtio-net device operations */ static struct net_device_operations virtnet_operations = { .open = virtnet_open, .close = virtnet_close, .transmit = virtnet_transmit, .poll = virtnet_poll, .irq = virtnet_irq, }; /** * Probe PCI device, legacy virtio 0.9.5 * * @v pci PCI device * @ret rc Return status code */ static int virtnet_probe_legacy ( struct pci_device *pci ) { unsigned long ioaddr = pci->ioaddr; struct net_device *netdev; struct virtnet_nic *virtnet; u32 features; u16 mtu; int rc; /* Allocate and hook up net device */ netdev = alloc_etherdev ( sizeof ( *virtnet ) ); if ( ! netdev ) return -ENOMEM; netdev_init ( netdev, &virtnet_operations ); virtnet = netdev->priv; virtnet->ioaddr = ioaddr; pci_set_drvdata ( pci, netdev ); netdev->dev = &pci->dev; DBGC ( virtnet, "VIRTIO-NET %p busaddr=%s ioaddr=%#lx irq=%d\n", virtnet, pci->dev.name, ioaddr, pci->irq ); /* Enable PCI bus master and reset NIC */ adjust_pci_device ( pci ); vp_reset ( ioaddr ); /* Load MAC address and MTU */ features = vp_get_features ( ioaddr ); if ( features & ( 1 << VIRTIO_NET_F_MAC ) ) { vp_get ( ioaddr, offsetof ( struct virtio_net_config, mac ), netdev->hw_addr, ETH_ALEN ); DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet, eth_ntoa ( netdev->hw_addr ) ); } if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) { vp_get ( ioaddr, offsetof ( struct virtio_net_config, mtu ), &mtu, sizeof ( mtu ) ); DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet, mtu ); netdev->max_pkt_len = ( mtu + ETH_HLEN ); netdev->mtu = mtu; } /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; /* Mark link as up, control virtqueue is not used */ netdev_link_up ( netdev ); return 0; unregister_netdev ( netdev ); err_register_netdev: vp_reset ( ioaddr ); netdev_nullify ( netdev ); netdev_put ( netdev ); return rc; } /** * Probe PCI device, modern virtio 1.0 * * @v pci PCI device * @v found_dev Set to non-zero if modern device was found (probe may still fail) * @ret rc Return status code */ static int virtnet_probe_modern ( struct pci_device *pci, int *found_dev ) { struct net_device *netdev; struct virtnet_nic *virtnet; u64 features; u16 mtu; int rc, common, isr, notify, config, device; common = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_COMMON_CFG ); if ( ! common ) { DBG ( "Common virtio capability not found!\n" ); return -ENODEV; } *found_dev = 1; isr = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_ISR_CFG ); notify = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_NOTIFY_CFG ); config = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_PCI_CFG ); if ( ! isr || ! notify || ! config ) { DBG ( "Missing virtio capabilities %i/%i/%i/%i\n", common, isr, notify, config ); return -EINVAL; } device = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_DEVICE_CFG ); /* Allocate and hook up net device */ netdev = alloc_etherdev ( sizeof ( *virtnet ) ); if ( ! netdev ) return -ENOMEM; netdev_init ( netdev, &virtnet_operations ); virtnet = netdev->priv; pci_set_drvdata ( pci, netdev ); netdev->dev = &pci->dev; DBGC ( virtnet, "VIRTIO-NET modern %p busaddr=%s irq=%d\n", virtnet, pci->dev.name, pci->irq ); virtnet->vdev.pci = pci; rc = virtio_pci_map_capability ( pci, common, sizeof ( struct virtio_pci_common_cfg ), 4, 0, sizeof ( struct virtio_pci_common_cfg ), &virtnet->vdev.common ); if ( rc ) goto err_map_common; rc = virtio_pci_map_capability ( pci, isr, sizeof ( u8 ), 1, 0, 1, &virtnet->vdev.isr ); if ( rc ) goto err_map_isr; virtnet->vdev.notify_cap_pos = notify; virtnet->vdev.cfg_cap_pos = config; /* Map the device capability */ if ( device ) { rc = virtio_pci_map_capability ( pci, device, 0, 4, 0, sizeof ( struct virtio_net_config ), &virtnet->vdev.device ); if ( rc ) goto err_map_device; } /* Enable the PCI device */ adjust_pci_device ( pci ); /* Reset the device and set initial status bits */ vpm_reset ( &virtnet->vdev ); vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_ACKNOWLEDGE ); vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER ); /* Load MAC address and MTU */ if ( device ) { features = vpm_get_features ( &virtnet->vdev ); if ( features & ( 1ULL << VIRTIO_NET_F_MAC ) ) { vpm_get ( &virtnet->vdev, offsetof ( struct virtio_net_config, mac ), netdev->hw_addr, ETH_ALEN ); DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet, eth_ntoa ( netdev->hw_addr ) ); } if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) { vpm_get ( &virtnet->vdev, offsetof ( struct virtio_net_config, mtu ), &mtu, sizeof ( mtu ) ); DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet, mtu ); netdev->max_pkt_len = ( mtu + ETH_HLEN ); } } /* We need a valid MAC address */ if ( ! is_valid_ether_addr ( netdev->hw_addr ) ) { rc = -EADDRNOTAVAIL; goto err_mac_address; } /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; /* Mark link as up, control virtqueue is not used */ netdev_link_up ( netdev ); virtnet->virtio_version = 1; return 0; unregister_netdev ( netdev ); err_register_netdev: err_mac_address: vpm_reset ( &virtnet->vdev ); netdev_nullify ( netdev ); netdev_put ( netdev ); virtio_pci_unmap_capability ( &virtnet->vdev.device ); err_map_device: virtio_pci_unmap_capability ( &virtnet->vdev.isr ); err_map_isr: virtio_pci_unmap_capability ( &virtnet->vdev.common ); err_map_common: return rc; } /** * Probe PCI device * * @v pci PCI device * @ret rc Return status code */ static int virtnet_probe ( struct pci_device *pci ) { int found_modern = 0; int rc = virtnet_probe_modern ( pci, &found_modern ); if ( ! found_modern && pci->device < 0x1040 ) { /* fall back to the legacy probe */ rc = virtnet_probe_legacy ( pci ); } return rc; } /** * Remove device * * @v pci PCI device */ static void virtnet_remove ( struct pci_device *pci ) { struct net_device *netdev = pci_get_drvdata ( pci ); struct virtnet_nic *virtnet = netdev->priv; virtio_pci_unmap_capability ( &virtnet->vdev.device ); virtio_pci_unmap_capability ( &virtnet->vdev.isr ); virtio_pci_unmap_capability ( &virtnet->vdev.common ); unregister_netdev ( netdev ); netdev_nullify ( netdev ); netdev_put ( netdev ); } static struct pci_device_id virtnet_nics[] = { PCI_ROM(0x1af4, 0x1000, "virtio-net", "Virtio Network Interface", 0), PCI_ROM(0x1af4, 0x1041, "virtio-net", "Virtio Network Interface 1.0", 0), }; struct pci_driver virtnet_driver __pci_driver = { .ids = virtnet_nics, .id_count = ( sizeof ( virtnet_nics ) / sizeof ( virtnet_nics[0] ) ), .probe = virtnet_probe, .remove = virtnet_remove, };