From 715ca691daca081108b33306faa6fa102f0df8d8 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 6 Jul 2015 12:15:14 -0600 Subject: sysbus: add irq_routing_notifier Add a new connect_irq_notifier notifier in the SysBusDeviceClass. This notifier, if populated, is called after sysbus_connect_irq. This mechanism is used to setup VFIO signaling once VFIO platform devices get attached to their platform bus, on a machine init done notifier. Signed-off-by: Eric Auger Reviewed-by: Peter Crosthwaite Tested-by: Vikram Sethi Reviewed-by: Peter Maydell Signed-off-by: Alex Williamson --- include/hw/sysbus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/hw') diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h index 34f93c39bf..cc1dba49bf 100644 --- a/include/hw/sysbus.h +++ b/include/hw/sysbus.h @@ -58,6 +58,7 @@ typedef struct SysBusDeviceClass { * omitted then. (This is not considered a fatal error.) */ char *(*explicit_ofw_unit_address)(const SysBusDevice *dev); + void (*connect_irq_notifier)(SysBusDevice *dev, qemu_irq irq); } SysBusDeviceClass; struct SysBusDevice { -- cgit v1.2.3-55-g7522 From fb5f816499a5184a1336d72db030b8419b523082 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 6 Jul 2015 12:15:14 -0600 Subject: hw/vfio/platform: add irqfd support This patch aims at optimizing IRQ handling using irqfd framework. Instead of handling the eventfds on user-side they are handled on kernel side using - the KVM irqfd framework, - the VFIO driver virqfd framework. the virtual IRQ completion is trapped at interrupt controller This removes the need for fast/slow path swap. Overall this brings significant performance improvements. Signed-off-by: Alvise Rigo Signed-off-by: Eric Auger Reviewed-by: Alex Bennée Tested-by: Vikram Sethi Acked-by: Peter Maydell Signed-off-by: Alex Williamson --- hw/vfio/platform.c | 100 ++++++++++++++++++++++++++++++++++++++++ include/hw/vfio/vfio-platform.h | 2 + trace-events | 1 + 3 files changed, 103 insertions(+) (limited to 'include/hw') diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 5c678b914e..60365d1279 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -26,6 +26,7 @@ #include "hw/sysbus.h" #include "trace.h" #include "hw/platform-bus.h" +#include "sysemu/kvm.h" /* * Functions used whatever the injection method @@ -51,6 +52,7 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, intp->pin = info.index; intp->flags = info.flags; intp->state = VFIO_IRQ_INACTIVE; + intp->kvm_accel = false; sysbus_init_irq(sbdev, &intp->qemuirq); @@ -61,6 +63,13 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, error_report("vfio: Error: trigger event_notifier_init failed "); return NULL; } + /* Get an eventfd for resample/unmask */ + ret = event_notifier_init(&intp->unmask, 0); + if (ret) { + g_free(intp); + error_report("vfio: Error: resamplefd event_notifier_init failed"); + return NULL; + } QLIST_INSERT_HEAD(&vdev->intp_list, intp, next); return intp; @@ -315,6 +324,94 @@ static int vfio_start_eventfd_injection(VFIOINTp *intp) return ret; } +/* + * Functions used for irqfd + */ + +/** + * vfio_set_resample_eventfd - sets the resamplefd for an IRQ + * @intp: the IRQ struct handle + * programs the VFIO driver to unmask this IRQ when the + * intp->unmask eventfd is triggered + */ +static int vfio_set_resample_eventfd(VFIOINTp *intp) +{ + VFIODevice *vbasedev = &intp->vdev->vbasedev; + struct vfio_irq_set *irq_set; + int argsz, ret; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = intp->pin; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + *pfd = event_notifier_get_fd(&intp->unmask); + qemu_set_fd_handler(*pfd, NULL, NULL, NULL); + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + g_free(irq_set); + if (ret < 0) { + error_report("vfio: Failed to set resample eventfd: %m"); + } + return ret; +} + +static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq) +{ + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); + VFIOINTp *intp; + + if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() || + !vdev->irqfd_allowed) { + return; + } + + QLIST_FOREACH(intp, &vdev->intp_list, next) { + if (intp->qemuirq == irq) { + break; + } + } + assert(intp); + + /* Get to a known interrupt state */ + qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt), + NULL, NULL, vdev); + + vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin); + qemu_set_irq(intp->qemuirq, 0); + + if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt, + &intp->unmask, irq) < 0) { + goto fail_irqfd; + } + + if (vfio_set_trigger_eventfd(intp, NULL) < 0) { + goto fail_vfio; + } + if (vfio_set_resample_eventfd(intp) < 0) { + goto fail_vfio; + } + + /* Let's resume injection with irqfd setup */ + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + + intp->kvm_accel = true; + + trace_vfio_platform_start_irqfd_injection(intp->pin, + event_notifier_get_fd(&intp->interrupt), + event_notifier_get_fd(&intp->unmask)); + return; +fail_vfio: + kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq); +fail_irqfd: + vfio_start_eventfd_injection(intp); + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + return; +} + /* VFIO skeleton */ static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev) @@ -584,17 +681,20 @@ static Property vfio_platform_dev_properties[] = { DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), + DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), DEFINE_PROP_END_OF_LIST(), }; static void vfio_platform_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass); dc->realize = vfio_platform_realize; dc->props = vfio_platform_dev_properties; dc->vmsd = &vfio_platform_vmstate; dc->desc = "VFIO-based platform device assignment"; + sbc->connect_irq_notifier = vfio_start_irqfd_injection; set_bit(DEVICE_CATEGORY_MISC, dc->categories); } diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h index 26b2ad6f4e..c5cf1d79f3 100644 --- a/include/hw/vfio/vfio-platform.h +++ b/include/hw/vfio/vfio-platform.h @@ -41,6 +41,7 @@ typedef struct VFIOINTp { int state; /* inactive, pending, active */ uint8_t pin; /* index */ uint32_t flags; /* IRQ info flags */ + bool kvm_accel; /* set when QEMU bypass through KVM enabled */ } VFIOINTp; /* function type for user side eventfd handler */ @@ -57,6 +58,7 @@ typedef struct VFIOPlatformDevice { uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */ QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */ QemuMutex intp_mutex; /* protect the intp_list IRQ state */ + bool irqfd_allowed; /* debug option to force irqfd on/off */ } VFIOPlatformDevice; typedef struct VFIOPlatformDeviceClass { diff --git a/trace-events b/trace-events index 52b7efa9a4..d24d80aed2 100644 --- a/trace-events +++ b/trace-events @@ -1594,6 +1594,7 @@ vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x" vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING" +vfio_platform_start_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d" #hw/acpi/memory_hotplug.c mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 -- cgit v1.2.3-55-g7522 From 43302969966bc3a95470bfc300289a83068ef5d9 Mon Sep 17 00:00:00 2001 From: Gabriel Laupre Date: Mon, 6 Jul 2015 12:15:15 -0600 Subject: vfio/pci : Add pba_offset PCI quirk for Chelsio T5 devices Fix pba_offset initialization value for Chelsio T5 Virtual Function device. The T5 hardware has a bug in it where it reports a Pending Interrupt Bit Array Offset of 0x8000 for its SR-IOV Virtual Functions instead of the 0x1000 that the hardware actually uses internally. As the hardware doesn't return the correct pba_offset value, add a quirk to instead return a hardcoded value of 0x1000 when a Chelsio T5 VF device is detected. This bug has been fixed in the Chelsio's next chip series T6 but there are no plans to respin the T5 ASIC for this bug. It is just documented in the T5 Errata and left it at that. Signed-off-by: Gabriel Laupre Reviewed-by: Bandan Das Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 27 +++++++++++++++++++++++++++ include/hw/pci/pci_ids.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'include/hw') diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 27b7ec133e..2ed877fe9f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2252,6 +2252,33 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; + /* + * Test the size of the pba_offset variable and catch if it extends outside + * of the specified BAR. If it is the case, we need to apply a hardware + * specific quirk if the device is known or we have a broken configuration. + */ + if (vdev->msix->pba_offset >= + vdev->bars[vdev->msix->pba_bar].region.size) { + + PCIDevice *pdev = &vdev->pdev; + uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); + uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID); + + /* + * Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5 + * adapters. The T5 hardware returns an incorrect value of 0x8000 for + * the VF PBA offset while the BAR itself is only 8k. The correct value + * is 0x1000, so we hard code that here. + */ + if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) { + vdev->msix->pba_offset = 0x1000; + } else { + error_report("vfio: Hardware reports invalid configuration, " + "MSIX PBA outside of specified BAR"); + return -EINVAL; + } + } + trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, vdev->msix->table_bar, vdev->msix->table_offset, diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 49c062b8ce..d98e6c915d 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -114,6 +114,8 @@ #define PCI_VENDOR_ID_ENSONIQ 0x1274 #define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000 +#define PCI_VENDOR_ID_CHELSIO 0x1425 + #define PCI_VENDOR_ID_FREESCALE 0x1957 #define PCI_DEVICE_ID_MPC8533E 0x0030 -- cgit v1.2.3-55-g7522