summaryrefslogtreecommitdiffstats
path: root/hw/vfio/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/pci.c')
-rw-r--r--hw/vfio/pci.c234
1 files changed, 139 insertions, 95 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9fd9faee1d..939dcc3d4a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -45,8 +45,12 @@
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
+/* Protected by BQL */
+static KVMRouteChange vfio_route_change;
+
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
+static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
/*
* Disabling BAR mmaping can be slow, but toggling it around INTx can
@@ -412,33 +416,36 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
int vector_n, bool msix)
{
- KVMRouteChange c;
- int virq;
-
if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
return;
}
- if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ vector->virq = kvm_irqchip_add_msi_route(&vfio_route_change,
+ vector_n, &vdev->pdev);
+}
+
+static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
+{
+ if (vector->virq < 0) {
return;
}
- c = kvm_irqchip_begin_route_changes(kvm_state);
- virq = kvm_irqchip_add_msi_route(&c, vector_n, &vdev->pdev);
- if (virq < 0) {
- event_notifier_cleanup(&vector->kvm_interrupt);
- return;
+ if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ goto fail_notifier;
}
- kvm_irqchip_commit_route_changes(&c);
if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
- NULL, virq) < 0) {
- kvm_irqchip_release_virq(kvm_state, virq);
- event_notifier_cleanup(&vector->kvm_interrupt);
- return;
+ NULL, vector->virq) < 0) {
+ goto fail_kvm;
}
- vector->virq = virq;
+ return;
+
+fail_kvm:
+ event_notifier_cleanup(&vector->kvm_interrupt);
+fail_notifier:
+ kvm_irqchip_release_virq(kvm_state, vector->virq);
+ vector->virq = -1;
}
static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
@@ -493,7 +500,14 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
}
} else {
if (msg) {
- vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ if (vdev->defer_kvm_irq_routing) {
+ vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ } else {
+ vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
+ vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ kvm_irqchip_commit_route_changes(&vfio_route_change);
+ vfio_connect_kvm_msi_virq(vector);
+ }
}
}
@@ -503,11 +517,13 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
* increase them as needed.
*/
if (vdev->nr_vectors < nr + 1) {
- vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
vdev->nr_vectors = nr + 1;
- ret = vfio_enable_vectors(vdev, true);
- if (ret) {
- error_report("vfio: failed to enable vectors, %d", ret);
+ if (!vdev->defer_kvm_irq_routing) {
+ vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
+ ret = vfio_enable_vectors(vdev, true);
+ if (ret) {
+ error_report("vfio: failed to enable vectors, %d", ret);
+ }
}
} else {
Error *err = NULL;
@@ -569,11 +585,29 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
-static void vfio_msix_enable(VFIOPCIDevice *vdev)
+static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
- PCIDevice *pdev = &vdev->pdev;
- unsigned int nr, max_vec = 0;
+ assert(!vdev->defer_kvm_irq_routing);
+ vdev->defer_kvm_irq_routing = true;
+ vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
+}
+
+static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
+{
+ int i;
+
+ assert(vdev->defer_kvm_irq_routing);
+ vdev->defer_kvm_irq_routing = false;
+
+ kvm_irqchip_commit_route_changes(&vfio_route_change);
+
+ for (i = 0; i < vdev->nr_vectors; i++) {
+ vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]);
+ }
+}
+static void vfio_msix_enable(VFIOPCIDevice *vdev)
+{
vfio_disable_interrupts(vdev);
vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries);
@@ -581,37 +615,45 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
vdev->interrupt = VFIO_INT_MSIX;
/*
- * Some communication channels between VF & PF or PF & fw rely on the
- * physical state of the device and expect that enabling MSI-X from the
- * guest enables the same on the host. When our guest is Linux, the
- * guest driver call to pci_enable_msix() sets the enabling bit in the
- * MSI-X capability, but leaves the vector table masked. We therefore
- * can't rely on a vector_use callback (from request_irq() in the guest)
- * to switch the physical device into MSI-X mode because that may come a
- * long time after pci_enable_msix(). This code enables vector 0 with
- * triggering to userspace, then immediately release the vector, leaving
- * the physical device with no vectors enabled, but MSI-X enabled, just
- * like the guest view.
- * If there are already unmasked vectors (in migration resume phase and
- * some guest startups) which will be enabled soon, we can allocate all
- * of them here to avoid inefficiently disabling and enabling vectors
- * repeatedly later.
+ * Setting vector notifiers triggers synchronous vector-use
+ * callbacks for each active vector. Deferring to commit the KVM
+ * routes once rather than per vector provides a substantial
+ * performance improvement.
*/
- if (!pdev->msix_function_masked) {
- for (nr = 0; nr < msix_nr_vectors_allocated(pdev); nr++) {
- if (!msix_is_masked(pdev, nr)) {
- max_vec = nr;
- }
- }
- }
- vfio_msix_vector_do_use(pdev, max_vec, NULL, NULL);
- vfio_msix_vector_release(pdev, max_vec);
+ vfio_prepare_kvm_msi_virq_batch(vdev);
- if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
+ if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
vfio_msix_vector_release, NULL)) {
error_report("vfio: msix_set_vector_notifiers failed");
}
+ vfio_commit_kvm_msi_virq_batch(vdev);
+
+ if (vdev->nr_vectors) {
+ int ret;
+
+ ret = vfio_enable_vectors(vdev, true);
+ if (ret) {
+ error_report("vfio: failed to enable vectors, %d", ret);
+ }
+ } else {
+ /*
+ * Some communication channels between VF & PF or PF & fw rely on the
+ * physical state of the device and expect that enabling MSI-X from the
+ * guest enables the same on the host. When our guest is Linux, the
+ * guest driver call to pci_enable_msix() sets the enabling bit in the
+ * MSI-X capability, but leaves the vector table masked. We therefore
+ * can't rely on a vector_use callback (from request_irq() in the guest)
+ * to switch the physical device into MSI-X mode because that may come a
+ * long time after pci_enable_msix(). This code enables vector 0 with
+ * triggering to userspace, then immediately release the vector, leaving
+ * the physical device with no vectors enabled, but MSI-X enabled, just
+ * like the guest view.
+ */
+ vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
+ vfio_msix_vector_release(&vdev->pdev, 0);
+ }
+
trace_vfio_msix_enable(vdev->vbasedev.name);
}
@@ -621,6 +663,13 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev)
vfio_disable_interrupts(vdev);
+ /*
+ * Setting vector notifiers needs to enable route for each vector.
+ * Deferring to commit the KVM routes once rather than per vector
+ * provides a substantial performance improvement.
+ */
+ vfio_prepare_kvm_msi_virq_batch(vdev);
+
vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
retry:
vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors);
@@ -646,6 +695,8 @@ retry:
vfio_add_kvm_msi_virq(vdev, vector, i, false);
}
+ vfio_commit_kvm_msi_virq_batch(vdev);
+
/* Set interrupt type prior to possible interrupts */
vdev->interrupt = VFIO_INT_MSI;
@@ -653,29 +704,17 @@ retry:
if (ret) {
if (ret < 0) {
error_report("vfio: Error: Failed to setup MSI fds: %m");
- } else if (ret != vdev->nr_vectors) {
+ } else {
error_report("vfio: Error: Failed to enable %d "
"MSI vectors, retry with %d", vdev->nr_vectors, ret);
}
- for (i = 0; i < vdev->nr_vectors; i++) {
- VFIOMSIVector *vector = &vdev->msi_vectors[i];
- if (vector->virq >= 0) {
- vfio_remove_kvm_msi_virq(vector);
- }
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- NULL, NULL, NULL);
- event_notifier_cleanup(&vector->interrupt);
- }
-
- g_free(vdev->msi_vectors);
- vdev->msi_vectors = NULL;
+ vfio_msi_disable_common(vdev);
- if (ret > 0 && ret != vdev->nr_vectors) {
+ if (ret > 0) {
vdev->nr_vectors = ret;
goto retry;
}
- vdev->nr_vectors = 0;
/*
* Failing to setup MSI doesn't really fall within any specification.
@@ -683,7 +722,6 @@ retry:
* out to fall back to INTx for this device.
*/
error_report("vfio: Error: Failed to enable MSI");
- vdev->interrupt = VFIO_INT_NONE;
return;
}
@@ -693,7 +731,6 @@ retry:
static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
{
- Error *err = NULL;
int i;
for (i = 0; i < vdev->nr_vectors; i++) {
@@ -712,15 +749,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
vdev->msi_vectors = NULL;
vdev->nr_vectors = 0;
vdev->interrupt = VFIO_INT_NONE;
-
- vfio_intx_enable(vdev, &err);
- if (err) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
}
static void vfio_msix_disable(VFIOPCIDevice *vdev)
{
+ Error *err = NULL;
int i;
msix_unset_vector_notifiers(&vdev->pdev);
@@ -741,6 +774,10 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
}
vfio_msi_disable_common(vdev);
+ vfio_intx_enable(vdev, &err);
+ if (err) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
memset(vdev->msix->pending, 0,
BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long));
@@ -750,8 +787,14 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
static void vfio_msi_disable(VFIOPCIDevice *vdev)
{
+ Error *err = NULL;
+
vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX);
vfio_msi_disable_common(vdev);
+ vfio_intx_enable(vdev, &err);
+ if (err) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
trace_vfio_msi_disable(vdev->vbasedev.name);
}
@@ -2337,7 +2380,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
g_free(reset);
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
- ret ? "%m" : "Success");
+ ret ? strerror(errno) : "Success");
out:
/* Re-enable INTx on affected devices */
@@ -2803,6 +2846,7 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
static void vfio_realize(PCIDevice *pdev, Error **errp)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
VFIODevice *vbasedev_iter;
VFIOGroup *group;
char *tmp, *subsys, group_path[PATH_MAX], *group_name;
@@ -2813,7 +2857,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
int i, ret;
bool is_mdev;
- if (!vdev->vbasedev.sysfsdev) {
+ if (!vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
error_setg(errp, "No provided host device");
@@ -2821,24 +2865,24 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
return;
}
- vdev->vbasedev.sysfsdev =
+ vbasedev->sysfsdev =
g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
vdev->host.domain, vdev->host.bus,
vdev->host.slot, vdev->host.function);
}
- if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
error_setg_errno(errp, errno, "no such host device");
- error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.sysfsdev);
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
return;
}
- vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
- vdev->vbasedev.ops = &vfio_pci_ops;
- vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
- vdev->vbasedev.dev = DEVICE(vdev);
+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
+ vbasedev->ops = &vfio_pci_ops;
+ vbasedev->type = VFIO_DEVICE_TYPE_PCI;
+ vbasedev->dev = DEVICE(vdev);
- tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
+ tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
len = readlink(tmp, group_path, sizeof(group_path));
g_free(tmp);
@@ -2856,7 +2900,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- trace_vfio_realize(vdev->vbasedev.name, groupid);
+ trace_vfio_realize(vbasedev->name, groupid);
group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
if (!group) {
@@ -2864,7 +2908,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
+ if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
error_setg(errp, "device is already attached");
vfio_put_group(group);
goto error;
@@ -2877,22 +2921,22 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
* stays in sync with the active working set of the guest driver. Prevent
* the x-balloon-allowed option unless this is minimally an mdev device.
*/
- tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
+ tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev);
subsys = realpath(tmp, NULL);
g_free(tmp);
is_mdev = subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
free(subsys);
- trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
+ trace_vfio_mdev(vbasedev->name, is_mdev);
- if (vdev->vbasedev.ram_block_discard_allowed && !is_mdev) {
+ if (vbasedev->ram_block_discard_allowed && !is_mdev) {
error_setg(errp, "x-balloon-allowed only potentially compatible "
"with mdev devices");
vfio_put_group(group);
goto error;
}
- ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
+ ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
if (ret) {
vfio_put_group(group);
goto error;
@@ -2905,7 +2949,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
/* Get a copy of config space */
- ret = pread(vdev->vbasedev.fd, vdev->pdev.config,
+ ret = pread(vbasedev->fd, vdev->pdev.config,
MIN(pci_config_size(&vdev->pdev), vdev->config_size),
vdev->config_offset);
if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
@@ -2933,7 +2977,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
- trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id);
+ trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id);
} else {
vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
}
@@ -2944,7 +2988,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
- trace_vfio_pci_emulated_device_id(vdev->vbasedev.name, vdev->device_id);
+ trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id);
} else {
vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
}
@@ -2956,7 +3000,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
vdev->sub_vendor_id, ~0);
- trace_vfio_pci_emulated_sub_vendor_id(vdev->vbasedev.name,
+ trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name,
vdev->sub_vendor_id);
}
@@ -2966,7 +3010,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
- trace_vfio_pci_emulated_sub_device_id(vdev->vbasedev.name,
+ trace_vfio_pci_emulated_sub_device_id(vbasedev->name,
vdev->sub_device_id);
}
@@ -3025,7 +3069,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto out_teardown;
}
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
+ ret = vfio_get_dev_region_info(vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
if (ret) {
@@ -3101,9 +3145,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
if (!pdev->failover_pair_id) {
- ret = vfio_migration_probe(&vdev->vbasedev, errp);
+ ret = vfio_migration_probe(vbasedev, errp);
if (ret) {
- error_report("%s: Migration disabled", vdev->vbasedev.name);
+ error_report("%s: Migration disabled", vbasedev->name);
}
}
@@ -3120,7 +3164,7 @@ out_teardown:
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
error:
- error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
}
static void vfio_instance_finalize(Object *obj)