From 4a3f03ba8dbf53fce36d0c1dd5d0cc0f340fe5f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 11 Jan 2017 09:38:15 +0100 Subject: virtio-net: enable ioeventfd even if vhost=off virtio-net-pci does not enable ioeventfd for historical reasons (and nobody ever checked whether it should be revisited). Note that other backends do enable ioeventfd for virtio-net. However, it has a major effect on performance. On Windows, throughput is _multiplied_ by 2 or 3 on TCP_STREAM (on small packets it is "only" a 30% improvement) and a little less so on TCP_MAERTS albeit still very much statistically significant. Latency also has a single digit improvement. This is not visible when using vhost, which forces ioeventfd=on, but it is substantial without vhost. In addition, also on Windows and with the RHEL 7.3 kernel, APICv seems to slow down virtio-net performance a bit, but the penalty with this patch goes from -25% to -7%. Signed-off-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw/virtio') diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 854b8f22bf..8baaf2ba6d 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2278,7 +2278,7 @@ static const TypeInfo virtio_serial_pci_info = { static Property virtio_net_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), DEFINE_PROP_END_OF_LIST(), }; -- cgit v1.2.3-55-g7522 From 332fa82d0963409fa14997a02639289afa226596 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 12 Jan 2017 11:46:10 +0000 Subject: Revert "virtio: turn vq->notification into a nested counter" This reverts commit aff8fd18f1786fc5af259a9bc0077727222f51ca. Both virtio-net and virtio-crypto do not balance virtio_queue_set_notification() enable and disable calls. This makes the notifications_disabled counter unreliable and Doug Goldstein reported the following assertion failure: #3 0x00007ffff44d1c62 in __GI___assert_fail ( assertion=assertion@entry=0x555555ae8e8a "vq->notification_disabled > 0", file=file@entry=0x555555ae89c0 "/home/doug/work/qemu/hw/virtio/virtio.c", line=line@entry=215, function=function@entry=0x555555ae9630 <__PRETTY_FUNCTION__.43707> "virtio_queue_set_notification") at assert.c:101 #4 0x00005555557f25d6 in virtio_queue_set_notification (vq=0x55555666aa90, enable=enable@entry=1) at /home/doug/work/qemu/hw/virtio/virtio.c:215 #5 0x00005555557dc311 in virtio_net_has_buffers (q=, q=, bufsize=102) at /home/doug/work/qemu/hw/net/virtio-net.c:1008 #6 virtio_net_receive (nc=, buf=0x555557386b88 "", size=102) at /home/doug/work/qemu/hw/net/virtio-net.c:1148 #7 0x00005555559cad33 in nc_sendv_compat (flags=, iovcnt=1, iov=0x7fffead746d0, nc=0x55555788b340) at net/net.c:705 #8 qemu_deliver_packet_iov (sender=, flags=, iov=0x7fffead746d0, iovcnt=1, opaque=0x55555788b340) at net/net.c:732 #9 0x00005555559cd929 in qemu_net_queue_deliver (size=, data=, flags=, sender=, queue=0x55555788b550) at net/queue.c:164 #10 qemu_net_queue_flush (queue=0x55555788b550) at net/queue.c:261 This patch is safe to revert since it's just an optimization for virtqueue polling. The next patch will improve the situation again without resorting to nesting. Reported-by: Doug Goldstein Signed-off-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Richard Henderson Tested-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'hw/virtio') diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index aa4f38f50a..f04ab7aa6d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -88,8 +88,8 @@ struct VirtQueue /* Last used index value we have signalled on */ bool signalled_used_valid; - /* Nested host->guest notification disabled counter */ - unsigned int notification_disabled; + /* Notification enabled? */ + bool notification; uint16_t queue_index; @@ -202,7 +202,7 @@ static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) { hwaddr pa; - if (vq->notification_disabled) { + if (!vq->notification) { return; } pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); @@ -211,13 +211,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) void virtio_queue_set_notification(VirtQueue *vq, int enable) { - if (enable) { - assert(vq->notification_disabled > 0); - vq->notification_disabled--; - } else { - vq->notification_disabled++; - } - + vq->notification = enable; if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { vring_set_avail_event(vq, vring_avail_idx(vq)); } else if (enable) { @@ -1020,7 +1014,7 @@ void virtio_reset(void *opaque) virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; - vdev->vq[i].notification_disabled = 0; + vdev->vq[i].notification = true; vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; vdev->vq[i].inuse = 0; } @@ -1831,7 +1825,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) vdev->vq[i].vring.desc = qemu_get_be64(f); qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); vdev->vq[i].signalled_used_valid = false; - vdev->vq[i].notification_disabled = 0; + vdev->vq[i].notification = true; if (vdev->vq[i].vring.desc) { /* XXX virtio-1 devices */ -- cgit v1.2.3-55-g7522 From 1448c133e19372359d9de68626c06088ba79a34b Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 12 Jan 2017 11:46:11 +0000 Subject: virtio: disable notifications again after poll succeeded While AioContext is in polling mode virtqueue notifications are not necessary. Some device virtqueue handlers enable notifications. Make sure they stay disabled to avoid unnecessary vmexits. Signed-off-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Richard Henderson Tested-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'hw/virtio') diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f04ab7aa6d..34065c78f8 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2126,6 +2126,9 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) } virtio_queue_notify_aio_vq(vq); + + /* In case the handler function re-enabled notifications */ + virtio_queue_set_notification(vq, 0); return true; } -- cgit v1.2.3-55-g7522 From c471ad0e9bd46ca5f5c9c796e727230e043a091d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 11 Jan 2017 12:32:12 +0800 Subject: vhost_net: device IOTLB support This patches implements Device IOTLB support for vhost kernel. This is done through: 1) switch to use dma helpers when map/unmap vrings from vhost codes 2) introduce a set of VhostOps to: - setting up device IOTLB request callback - processing device IOTLB request - processing device IOTLB invalidation 2) kernel support for Device IOTLB API: - allow vhost-net to query the IOMMU IOTLB entry through eventfd - enable the ability for qemu to update a specified mapping of vhost - through ioctl. - enable the ability to invalidate a specified range of iova for the device IOTLB of vhost through ioctl. In x86/intel_iommu case this is triggered through iommu memory region notifier from device IOTLB invalidation descriptor processing routine. With all the above, kernel vhost_net can co-operate with userspace IOMMU. For vhost-user, the support could be easily done on top by implementing the VhostOps. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 1 + hw/virtio/vhost-backend.c | 99 +++++++++++++++++++++++ hw/virtio/vhost.c | 166 +++++++++++++++++++++++++++++++++----- include/hw/virtio/vhost-backend.h | 13 +++ include/hw/virtio/vhost.h | 4 + net/tap.c | 1 + 6 files changed, 262 insertions(+), 22 deletions(-) (limited to 'hw/virtio') diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 6280422d02..22874a9777 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -52,6 +52,7 @@ static const int kernel_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 272a5ec584..be927b891e 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -185,6 +185,102 @@ static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start) } #endif /* CONFIG_VHOST_VSOCK */ +static void vhost_kernel_iotlb_read(void *opaque) +{ + struct vhost_dev *dev = opaque; + struct vhost_msg msg; + ssize_t len; + + while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) { + struct vhost_iotlb_msg *imsg = &msg.iotlb; + if (len < sizeof msg) { + error_report("Wrong vhost message len: %d", (int)len); + break; + } + if (msg.type != VHOST_IOTLB_MSG) { + error_report("Unknown vhost iotlb message type"); + break; + } + switch (imsg->type) { + case VHOST_IOTLB_MISS: + vhost_device_iotlb_miss(dev, imsg->iova, + imsg->perm != VHOST_ACCESS_RO); + break; + case VHOST_IOTLB_UPDATE: + case VHOST_IOTLB_INVALIDATE: + error_report("Unexpected IOTLB message type"); + break; + case VHOST_IOTLB_ACCESS_FAIL: + /* FIXME: report device iotlb error */ + break; + default: + break; + } + } +} + +static int vhost_kernel_update_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm) +{ + struct vhost_msg msg; + msg.type = VHOST_IOTLB_MSG; + msg.iotlb.iova = iova; + msg.iotlb.uaddr = uaddr; + msg.iotlb.size = len; + msg.iotlb.type = VHOST_IOTLB_UPDATE; + + switch (perm) { + case IOMMU_RO: + msg.iotlb.perm = VHOST_ACCESS_RO; + break; + case IOMMU_WO: + msg.iotlb.perm = VHOST_ACCESS_WO; + break; + case IOMMU_RW: + msg.iotlb.perm = VHOST_ACCESS_RW; + break; + default: + g_assert_not_reached(); + } + + if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { + error_report("Fail to update device iotlb"); + return -EFAULT; + } + + return 0; +} + +static int vhost_kernel_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len) +{ + struct vhost_msg msg; + + msg.type = VHOST_IOTLB_MSG; + msg.iotlb.iova = iova; + msg.iotlb.size = len; + msg.iotlb.type = VHOST_IOTLB_INVALIDATE; + + if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { + error_report("Fail to invalidate device iotlb"); + return -EFAULT; + } + + return 0; +} + +static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev, + int enabled) +{ + if (enabled) + qemu_set_fd_handler((uintptr_t)dev->opaque, + vhost_kernel_iotlb_read, NULL, dev); + else + qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL); +} + static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_backend_init = vhost_kernel_init, @@ -214,6 +310,9 @@ static const VhostOps kernel_ops = { .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid, .vhost_vsock_set_running = vhost_kernel_vsock_set_running, #endif /* CONFIG_VHOST_VSOCK */ + .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback, + .vhost_update_device_iotlb = vhost_kernel_update_device_iotlb, + .vhost_invalidate_device_iotlb = vhost_kernel_invalidate_device_iotlb, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index d396b22531..9cacf557f2 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -26,6 +26,7 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "migration/migration.h" +#include "sysemu/dma.h" /* enabled until disconnected backend stabilizes */ #define _VHOST_DEBUG 1 @@ -421,8 +422,36 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) dev->log_size = size; } +static int vhost_dev_has_iommu(struct vhost_dev *dev) +{ + VirtIODevice *vdev = dev->vdev; + AddressSpace *dma_as = vdev->dma_as; + + return memory_region_is_iommu(dma_as->root) && + virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +} + +static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, + hwaddr *plen, int is_write) +{ + if (!vhost_dev_has_iommu(dev)) { + return cpu_physical_memory_map(addr, plen, is_write); + } else { + return (void *)(uintptr_t)addr; + } +} + +static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer, + hwaddr len, int is_write, + hwaddr access_len) +{ + if (!vhost_dev_has_iommu(dev)) { + cpu_physical_memory_unmap(buffer, len, is_write, access_len); + } +} -static int vhost_verify_ring_part_mapping(void *part, +static int vhost_verify_ring_part_mapping(struct vhost_dev *dev, + void *part, uint64_t part_addr, uint64_t part_size, uint64_t start_addr, @@ -436,14 +465,14 @@ static int vhost_verify_ring_part_mapping(void *part, return 0; } l = part_size; - p = cpu_physical_memory_map(part_addr, &l, 1); + p = vhost_memory_map(dev, part_addr, &l, 1); if (!p || l != part_size) { r = -ENOMEM; } if (p != part) { r = -EBUSY; } - cpu_physical_memory_unmap(p, l, 0, 0); + vhost_memory_unmap(dev, p, l, 0, 0); return r; } @@ -463,21 +492,21 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, struct vhost_virtqueue *vq = dev->vqs + i; j = 0; - r = vhost_verify_ring_part_mapping(vq->desc, vq->desc_phys, + r = vhost_verify_ring_part_mapping(dev, vq->desc, vq->desc_phys, vq->desc_size, start_addr, size); if (!r) { break; } j++; - r = vhost_verify_ring_part_mapping(vq->avail, vq->avail_phys, + r = vhost_verify_ring_part_mapping(dev, vq->avail, vq->avail_phys, vq->avail_size, start_addr, size); if (!r) { break; } j++; - r = vhost_verify_ring_part_mapping(vq->used, vq->used_phys, + r = vhost_verify_ring_part_mapping(dev, vq->used, vq->used_phys, vq->used_size, start_addr, size); if (!r) { break; @@ -715,7 +744,8 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, return 0; } -static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) +static int vhost_dev_set_features(struct vhost_dev *dev, + bool enable_log) { uint64_t features = dev->acked_features; int r; @@ -858,6 +888,56 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, return -errno; } +static int vhost_memory_region_lookup(struct vhost_dev *hdev, + uint64_t gpa, uint64_t *uaddr, + uint64_t *len) +{ + int i; + + for (i = 0; i < hdev->mem->nregions; i++) { + struct vhost_memory_region *reg = hdev->mem->regions + i; + + if (gpa >= reg->guest_phys_addr && + reg->guest_phys_addr + reg->memory_size > gpa) { + *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr; + *len = reg->guest_phys_addr + reg->memory_size - gpa; + return 0; + } + } + + return -EFAULT; +} + +void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write) +{ + IOMMUTLBEntry iotlb; + uint64_t uaddr, len; + + rcu_read_lock(); + + iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, + iova, write); + if (iotlb.target_as != NULL) { + if (vhost_memory_region_lookup(dev, iotlb.translated_addr, + &uaddr, &len)) { + error_report("Fail to lookup the translated address " + "%"PRIx64, iotlb.translated_addr); + goto out; + } + + len = MIN(iotlb.addr_mask + 1, len); + iova = iova & ~iotlb.addr_mask; + + if (dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr, + len, iotlb.perm)) { + error_report("Fail to update device iotlb"); + goto out; + } + } +out: + rcu_read_unlock(); +} + static int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, struct vhost_virtqueue *vq, @@ -903,21 +983,21 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx); vq->desc_phys = a = virtio_queue_get_desc_addr(vdev, idx); - vq->desc = cpu_physical_memory_map(a, &l, 0); + vq->desc = vhost_memory_map(dev, a, &l, 0); if (!vq->desc || l != s) { r = -ENOMEM; goto fail_alloc_desc; } vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx); vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx); - vq->avail = cpu_physical_memory_map(a, &l, 0); + vq->avail = vhost_memory_map(dev, a, &l, 0); if (!vq->avail || l != s) { r = -ENOMEM; goto fail_alloc_avail; } vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx); vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx); - vq->used = cpu_physical_memory_map(a, &l, 1); + vq->used = vhost_memory_map(dev, a, &l, 1); if (!vq->used || l != s) { r = -ENOMEM; goto fail_alloc_used; @@ -963,14 +1043,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, fail_vector: fail_kick: fail_alloc: - cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), - 0, 0); + vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx), + 0, 0); fail_alloc_used: - cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), - 0, 0); + vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx), + 0, 0); fail_alloc_avail: - cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), - 0, 0); + vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), + 0, 0); fail_alloc_desc: return r; } @@ -1004,12 +1084,12 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, vhost_vq_index); } - cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), - 1, virtio_queue_get_used_size(vdev, idx)); - cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), - 0, virtio_queue_get_avail_size(vdev, idx)); - cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), - 0, virtio_queue_get_desc_size(vdev, idx)); + vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx), + 1, virtio_queue_get_used_size(vdev, idx)); + vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx), + 0, virtio_queue_get_avail_size(vdev, idx)); + vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), + 0, virtio_queue_get_desc_size(vdev, idx)); } static void vhost_eventfd_add(MemoryListener *listener, @@ -1066,6 +1146,9 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, r = -errno; goto fail_call; } + + vq->dev = dev; + return 0; fail_call: event_notifier_cleanup(&vq->masked_notifier); @@ -1077,12 +1160,24 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) event_notifier_cleanup(&vq->masked_notifier); } +static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) +{ + struct vhost_dev *hdev = container_of(n, struct vhost_dev, n); + + if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, + iotlb->iova, + iotlb->addr_mask + 1)) { + error_report("Fail to invalidate device iotlb"); + } +} + int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout) { uint64_t features; int i, r, n_initialized_vqs = 0; + hdev->vdev = NULL; hdev->migration_blocker = NULL; r = vhost_set_backend_type(hdev, backend_type); @@ -1147,6 +1242,9 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .priority = 10 }; + hdev->n.notify = vhost_iommu_unmap_notify; + hdev->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; + if (hdev->migration_blocker == NULL) { if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { error_setg(&hdev->migration_blocker, @@ -1342,11 +1440,18 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) assert(hdev->vhost_ops); hdev->started = true; + hdev->vdev = vdev; r = vhost_dev_set_features(hdev, hdev->log_enabled); if (r < 0) { goto fail_features; } + + if (vhost_dev_has_iommu(hdev)) { + memory_region_register_iommu_notifier(vdev->dma_as->root, + &hdev->n); + } + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); if (r < 0) { VHOST_OPS_DEBUG("vhost_set_mem_table failed"); @@ -1380,6 +1485,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) } } + if (vhost_dev_has_iommu(hdev)) { + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true); + + /* Update used ring information for IOTLB to work correctly, + * vhost-kernel code requires for this.*/ + for (i = 0; i < hdev->nvqs; ++i) { + struct vhost_virtqueue *vq = hdev->vqs + i; + vhost_device_iotlb_miss(hdev, vq->used_phys, true); + } + } return 0; fail_log: vhost_log_put(hdev, false); @@ -1391,6 +1506,7 @@ fail_vq: hdev->vq_index + i); } i = hdev->nvqs; + fail_mem: fail_features: @@ -1413,8 +1529,14 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->vq_index + i); } + if (vhost_dev_has_iommu(hdev)) { + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); + memory_region_unregister_iommu_notifier(vdev->dma_as->root, + &hdev->n); + } vhost_log_put(hdev, true); hdev->started = false; + hdev->vdev = NULL; } int vhost_net_set_backend(struct vhost_dev *hdev, diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 30abc11cf1..c3cf4a72bc 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -11,6 +11,8 @@ #ifndef VHOST_BACKEND_H #define VHOST_BACKEND_H +#include "exec/memory.h" + typedef enum VhostBackendType { VHOST_BACKEND_TYPE_NONE = 0, VHOST_BACKEND_TYPE_KERNEL = 1, @@ -77,6 +79,14 @@ typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev, typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, uint64_t guest_cid); typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); +typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev, + int enabled); +typedef int (*vhost_update_device_iotlb_op)(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm); +typedef int (*vhost_invalidate_device_iotlb_op)(struct vhost_dev *dev, + uint64_t iova, uint64_t len); typedef struct VhostOps { VhostBackendType backend_type; @@ -109,6 +119,9 @@ typedef struct VhostOps { vhost_backend_can_merge_op vhost_backend_can_merge; vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; vhost_vsock_set_running_op vhost_vsock_set_running; + vhost_set_iotlb_callback_op vhost_set_iotlb_callback; + vhost_update_device_iotlb_op vhost_update_device_iotlb; + vhost_invalidate_device_iotlb_op vhost_invalidate_device_iotlb; } VhostOps; extern const VhostOps user_ops; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 1fe5aadef5..52f633ec89 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -21,6 +21,7 @@ struct vhost_virtqueue { unsigned long long used_phys; unsigned used_size; EventNotifier masked_notifier; + struct vhost_dev *dev; }; typedef unsigned long vhost_log_chunk_t; @@ -38,6 +39,7 @@ struct vhost_log { struct vhost_memory; struct vhost_dev { + VirtIODevice *vdev; MemoryListener memory_listener; struct vhost_memory *mem; int n_mem_sections; @@ -62,6 +64,7 @@ struct vhost_dev { void *opaque; struct vhost_log *log; QLIST_ENTRY(vhost_dev) entry; + IOMMUNotifier n; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, @@ -91,4 +94,5 @@ bool vhost_has_free_slot(void); int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); +void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); #endif diff --git a/net/tap.c b/net/tap.c index b6896a7b7c..86071b2659 100644 --- a/net/tap.c +++ b/net/tap.c @@ -696,6 +696,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, "tap: open vhost char device failed"); return; } + fcntl(vhostfd, F_SETFL, O_NONBLOCK); } options.opaque = (void *)(uintptr_t)vhostfd; -- cgit v1.2.3-55-g7522 From 1aea7a5b7e604598066dda8919339dab83fd8089 Mon Sep 17 00:00:00 2001 From: Michael S. Tsirkin Date: Thu, 12 Jan 2017 23:19:40 +0200 Subject: virtio: drop an obsolete comment virtio core has code to revert queue number to maximum on reset. Drop TODO to add that. Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/virtio-pci.c | 1 - 1 file changed, 1 deletion(-) (limited to 'hw/virtio') diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 8baaf2ba6d..09230c05df 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1316,7 +1316,6 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, virtio_queue_set_vector(vdev, vdev->queue_sel, val); break; case VIRTIO_PCI_COMMON_Q_ENABLE: - /* TODO: need a way to put num back on reset. */ virtio_queue_set_num(vdev, vdev->queue_sel, proxy->vqs[vdev->queue_sel].num); virtio_queue_set_rings(vdev, vdev->queue_sel, -- cgit v1.2.3-55-g7522 From 7e71da7f124fc47a2f2bc1fbfb32f0e8ee3e7d44 Mon Sep 17 00:00:00 2001 From: Michael S. Tsirkin Date: Fri, 13 Jan 2017 00:14:55 +0200 Subject: virtio-mmio: switch to linux headers Switch to virtio_mmio.h from Linux - will make it easier to implement virtio 1. Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-mmio.c | 95 +++++++++++++++++++------------------------------ 1 file changed, 37 insertions(+), 58 deletions(-) (limited to 'hw/virtio') diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 60654dc19d..5807aa87fe 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -20,6 +20,7 @@ */ #include "qemu/osdep.h" +#include "standard-headers/linux/virtio_mmio.h" #include "hw/sysbus.h" #include "hw/virtio/virtio.h" #include "qemu/host-utils.h" @@ -52,28 +53,6 @@ do { printf("virtio_mmio: " fmt , ## __VA_ARGS__); } while (0) #define VIRTIO_MMIO(obj) \ OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO) -/* Memory mapped register offsets */ -#define VIRTIO_MMIO_MAGIC 0x0 -#define VIRTIO_MMIO_VERSION 0x4 -#define VIRTIO_MMIO_DEVICEID 0x8 -#define VIRTIO_MMIO_VENDORID 0xc -#define VIRTIO_MMIO_HOSTFEATURES 0x10 -#define VIRTIO_MMIO_HOSTFEATURESSEL 0x14 -#define VIRTIO_MMIO_GUESTFEATURES 0x20 -#define VIRTIO_MMIO_GUESTFEATURESSEL 0x24 -#define VIRTIO_MMIO_GUESTPAGESIZE 0x28 -#define VIRTIO_MMIO_QUEUESEL 0x30 -#define VIRTIO_MMIO_QUEUENUMMAX 0x34 -#define VIRTIO_MMIO_QUEUENUM 0x38 -#define VIRTIO_MMIO_QUEUEALIGN 0x3c -#define VIRTIO_MMIO_QUEUEPFN 0x40 -#define VIRTIO_MMIO_QUEUENOTIFY 0x50 -#define VIRTIO_MMIO_INTERRUPTSTATUS 0x60 -#define VIRTIO_MMIO_INTERRUPTACK 0x64 -#define VIRTIO_MMIO_STATUS 0x70 -/* Device specific config space starts here */ -#define VIRTIO_MMIO_CONFIG 0x100 - #define VIRT_MAGIC 0x74726976 /* 'virt' */ #define VIRT_VERSION 1 #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */ @@ -104,10 +83,10 @@ static int virtio_mmio_ioeventfd_assign(DeviceState *d, VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); if (assign) { - memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, + memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUE_NOTIFY, 4, true, n, notifier); } else { - memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, + memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUE_NOTIFY, 4, true, n, notifier); } return 0; @@ -140,11 +119,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) * device ID of zero means no backend will claim it. */ switch (offset) { - case VIRTIO_MMIO_MAGIC: + case VIRTIO_MMIO_MAGIC_VALUE: return VIRT_MAGIC; case VIRTIO_MMIO_VERSION: return VIRT_VERSION; - case VIRTIO_MMIO_VENDORID: + case VIRTIO_MMIO_VENDOR_ID: return VIRT_VENDOR; default: return 0; @@ -169,40 +148,40 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) return 0; } switch (offset) { - case VIRTIO_MMIO_MAGIC: + case VIRTIO_MMIO_MAGIC_VALUE: return VIRT_MAGIC; case VIRTIO_MMIO_VERSION: return VIRT_VERSION; - case VIRTIO_MMIO_DEVICEID: + case VIRTIO_MMIO_DEVICE_ID: return vdev->device_id; - case VIRTIO_MMIO_VENDORID: + case VIRTIO_MMIO_VENDOR_ID: return VIRT_VENDOR; - case VIRTIO_MMIO_HOSTFEATURES: + case VIRTIO_MMIO_DEVICE_FEATURES: if (proxy->host_features_sel) { return 0; } return vdev->host_features; - case VIRTIO_MMIO_QUEUENUMMAX: + case VIRTIO_MMIO_QUEUE_NUM_MAX: if (!virtio_queue_get_num(vdev, vdev->queue_sel)) { return 0; } return VIRTQUEUE_MAX_SIZE; - case VIRTIO_MMIO_QUEUEPFN: + case VIRTIO_MMIO_QUEUE_PFN: return virtio_queue_get_addr(vdev, vdev->queue_sel) >> proxy->guest_page_shift; - case VIRTIO_MMIO_INTERRUPTSTATUS: + case VIRTIO_MMIO_INTERRUPT_STATUS: return atomic_read(&vdev->isr); case VIRTIO_MMIO_STATUS: return vdev->status; - case VIRTIO_MMIO_HOSTFEATURESSEL: - case VIRTIO_MMIO_GUESTFEATURES: - case VIRTIO_MMIO_GUESTFEATURESSEL: - case VIRTIO_MMIO_GUESTPAGESIZE: - case VIRTIO_MMIO_QUEUESEL: - case VIRTIO_MMIO_QUEUENUM: - case VIRTIO_MMIO_QUEUEALIGN: - case VIRTIO_MMIO_QUEUENOTIFY: - case VIRTIO_MMIO_INTERRUPTACK: + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: + case VIRTIO_MMIO_DRIVER_FEATURES: + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: + case VIRTIO_MMIO_GUEST_PAGE_SIZE: + case VIRTIO_MMIO_QUEUE_SEL: + case VIRTIO_MMIO_QUEUE_NUM: + case VIRTIO_MMIO_QUEUE_ALIGN: + case VIRTIO_MMIO_QUEUE_NOTIFY: + case VIRTIO_MMIO_INTERRUPT_ACK: DPRINTF("read of write-only register\n"); return 0; default: @@ -251,18 +230,18 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, return; } switch (offset) { - case VIRTIO_MMIO_HOSTFEATURESSEL: + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: proxy->host_features_sel = value; break; - case VIRTIO_MMIO_GUESTFEATURES: + case VIRTIO_MMIO_DRIVER_FEATURES: if (!proxy->guest_features_sel) { virtio_set_features(vdev, value); } break; - case VIRTIO_MMIO_GUESTFEATURESSEL: + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: proxy->guest_features_sel = value; break; - case VIRTIO_MMIO_GUESTPAGESIZE: + case VIRTIO_MMIO_GUEST_PAGE_SIZE: proxy->guest_page_shift = ctz32(value); if (proxy->guest_page_shift > 31) { proxy->guest_page_shift = 0; @@ -270,22 +249,22 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, DPRINTF("guest page size %" PRIx64 " shift %d\n", value, proxy->guest_page_shift); break; - case VIRTIO_MMIO_QUEUESEL: + case VIRTIO_MMIO_QUEUE_SEL: if (value < VIRTIO_QUEUE_MAX) { vdev->queue_sel = value; } break; - case VIRTIO_MMIO_QUEUENUM: + case VIRTIO_MMIO_QUEUE_NUM: DPRINTF("mmio_queue write %d max %d\n", (int)value, VIRTQUEUE_MAX_SIZE); virtio_queue_set_num(vdev, vdev->queue_sel, value); /* Note: only call this function for legacy devices */ virtio_queue_update_rings(vdev, vdev->queue_sel); break; - case VIRTIO_MMIO_QUEUEALIGN: + case VIRTIO_MMIO_QUEUE_ALIGN: /* Note: this is only valid for legacy devices */ virtio_queue_set_align(vdev, vdev->queue_sel, value); break; - case VIRTIO_MMIO_QUEUEPFN: + case VIRTIO_MMIO_QUEUE_PFN: if (value == 0) { virtio_reset(vdev); } else { @@ -293,12 +272,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, value << proxy->guest_page_shift); } break; - case VIRTIO_MMIO_QUEUENOTIFY: + case VIRTIO_MMIO_QUEUE_NOTIFY: if (value < VIRTIO_QUEUE_MAX) { virtio_queue_notify(vdev, value); } break; - case VIRTIO_MMIO_INTERRUPTACK: + case VIRTIO_MMIO_INTERRUPT_ACK: atomic_and(&vdev->isr, ~value); virtio_update_irq(vdev); break; @@ -317,13 +296,13 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, virtio_reset(vdev); } break; - case VIRTIO_MMIO_MAGIC: + case VIRTIO_MMIO_MAGIC_VALUE: case VIRTIO_MMIO_VERSION: - case VIRTIO_MMIO_DEVICEID: - case VIRTIO_MMIO_VENDORID: - case VIRTIO_MMIO_HOSTFEATURES: - case VIRTIO_MMIO_QUEUENUMMAX: - case VIRTIO_MMIO_INTERRUPTSTATUS: + case VIRTIO_MMIO_DEVICE_ID: + case VIRTIO_MMIO_VENDOR_ID: + case VIRTIO_MMIO_DEVICE_FEATURES: + case VIRTIO_MMIO_QUEUE_NUM_MAX: + case VIRTIO_MMIO_INTERRUPT_STATUS: DPRINTF("write to readonly register\n"); break; -- cgit v1.2.3-55-g7522 From 6bdc21c050a2a7b92cbbd0b2a1f8934e9b5f896f Mon Sep 17 00:00:00 2001 From: Michael S. Tsirkin Date: Wed, 18 Jan 2017 21:32:22 +0200 Subject: virtio: fix up max size checks Coverity reports that ARRAY_SIZE(elem->out_sg) (and all the others too) is wrong because elem->out_sg is a pointer. However, the check is not in the right place and the max_size argument of virtqueue_map_iovec can be removed. The check on in_num/out_num should be moved to qemu_get_virtqueue_element instead, before the call to virtqueue_alloc_element. Cc: qemu-stable@nongnu.org Reported-by: Paolo Bonzini Fixes: 3724650db07057333879484c8bc7d900b5c1bf8e ("virtio: introduce virtqueue_alloc_element") Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- hw/virtio/virtio.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'hw/virtio') diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 34065c78f8..6e34f0527d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -599,23 +599,11 @@ static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num, static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, hwaddr *addr, unsigned int *num_sg, - unsigned int max_size, int is_write) + int is_write) { unsigned int i; hwaddr len; - /* Note: this function MUST validate input, some callers - * are passing in num_sg values received over the network. - */ - /* TODO: teach all callers that this can fail, and return failure instead - * of asserting here. - * When we do, we might be able to re-enable NDEBUG below. - */ -#ifdef NDEBUG -#error building with NDEBUG is not supported -#endif - assert(*num_sg <= max_size); - for (i = 0; i < *num_sg; i++) { len = sg[i].iov_len; sg[i].iov_base = dma_memory_map(vdev->dma_as, @@ -635,13 +623,8 @@ static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem) { - virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, - MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)), - 1); - virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, - MIN(ARRAY_SIZE(elem->out_sg), - ARRAY_SIZE(elem->out_addr)), - 0); + virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, 1); + virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, 0); } static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) @@ -840,6 +823,16 @@ void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz) qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); + /* TODO: teach all callers that this can fail, and return failure instead + * of asserting here. + * When we do, we might be able to re-enable NDEBUG below. + */ +#ifdef NDEBUG +#error building with NDEBUG is not supported +#endif + assert(ARRAY_SIZE(data.in_addr) >= data.in_num); + assert(ARRAY_SIZE(data.out_addr) >= data.out_num); + elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); elem->index = data.index; -- cgit v1.2.3-55-g7522 From 2943b53f682f54548e7ddcf2ebb6c6d12d8dc821 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 17 Jan 2017 12:01:00 +0800 Subject: virtio: force VIRTIO_F_IOMMU_PLATFORM We allow vhost to clear VIRITO_F_IOMMU_PLATFORM which is wrong since VIRTIO_F_IOMMU_PLATFORM is mandatory for security. Fixing this by enforce it after vdc->get_features(). Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-bus.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'hw/virtio') diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index d31cc00e83..a886011e75 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -47,6 +47,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) VirtioBusState *bus = VIRTIO_BUS(qbus); VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus); VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); DPRINTF("%s: plug device.\n", qbus->name); @@ -63,8 +64,8 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) klass->device_plugged(qbus->parent, errp); } - if (klass->get_dma_as != NULL && - virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { + if (klass->get_dma_as != NULL && has_iommu) { + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); vdev->dma_as = klass->get_dma_as(qbus->parent); } else { vdev->dma_as = &address_space_memory; -- cgit v1.2.3-55-g7522