From 4c3e257b5e6ccba6bd34f780fab8008e0d79680a Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Thu, 4 Jan 2018 09:53:31 +0800 Subject: vhost-user: add new vhost user messages to support virtio config space Add VHOST_USER_GET_CONFIG/VHOST_USER_SET_CONFIG messages which can be used for live migration of vhost user devices, also vhost user devices can benefit from the messages to get/set virtio config space from/to the I/O target. For the purpose to support virtio config space change, VHOST_USER_SLAVE_CONFIG_CHANGE_MSG message is added as the event notifier in case virtio config space change in the slave I/O target. Signed-off-by: Changpeng Liu Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/vhost-backend.h | 12 ++++++++++++ include/hw/virtio/vhost.h | 15 +++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index a7a5f22bc6..592254f40d 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -20,6 +20,11 @@ typedef enum VhostBackendType { VHOST_BACKEND_TYPE_MAX = 3, } VhostBackendType; +typedef enum VhostSetConfigType { + VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_MIGRATION = 1, +} VhostSetConfigType; + struct vhost_dev; struct vhost_log; struct vhost_memory; @@ -84,6 +89,11 @@ typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev, int enabled); typedef int (*vhost_send_device_iotlb_msg_op)(struct vhost_dev *dev, struct vhost_iotlb_msg *imsg); +typedef int (*vhost_set_config_op)(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags); +typedef int (*vhost_get_config_op)(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len); typedef struct VhostOps { VhostBackendType backend_type; @@ -118,6 +128,8 @@ typedef struct VhostOps { vhost_vsock_set_running_op vhost_vsock_set_running; vhost_set_iotlb_callback_op vhost_set_iotlb_callback; vhost_send_device_iotlb_msg_op vhost_send_device_iotlb_msg; + vhost_get_config_op vhost_get_config; + vhost_set_config_op vhost_set_config; } VhostOps; extern const VhostOps user_ops; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 467dc7794b..1dc2d73d76 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -46,6 +46,12 @@ struct vhost_iommu { QLIST_ENTRY(vhost_iommu) iommu_next; }; +typedef struct VhostDevConfigOps { + /* Vhost device config space changed callback + */ + int (*vhost_dev_config_notifier)(struct vhost_dev *dev); +} VhostDevConfigOps; + struct vhost_memory; struct vhost_dev { VirtIODevice *vdev; @@ -76,6 +82,7 @@ struct vhost_dev { QLIST_ENTRY(vhost_dev) entry; QLIST_HEAD(, vhost_iommu) iommu_list; IOMMUNotifier n; + const VhostDevConfigOps *config_ops; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, @@ -106,4 +113,12 @@ int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); +int vhost_dev_get_config(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len); +int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags); +/* notifier callback in case vhost device config space changed + */ +void vhost_dev_set_config_notifier(struct vhost_dev *dev, + const VhostDevConfigOps *ops); #endif -- cgit v1.2.3-55-g7522 From 00343e4b54ba0685e9ebe928ec5713b0cf7f1d1c Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Thu, 4 Jan 2018 09:53:32 +0800 Subject: vhost-user-blk: introduce a new vhost-user-blk host device This commit introduces a new vhost-user device for block, it uses a chardev to connect with the backend, same with Qemu virito-blk device, Guest OS still uses the virtio-blk frontend driver. To use it, start QEMU with command line like this: qemu-system-x86_64 \ -chardev socket,id=char0,path=/path/vhost.socket \ -device vhost-user-blk-pci,chardev=char0,num-queues=2, \ bootindex=2... \ Users can use different parameters for `num-queues` and `bootindex`. Different with exist Qemu virtio-blk host device, it makes more easy for users to implement their own I/O processing logic, such as all user space I/O stack against hardware block device. It uses the new vhost messages(VHOST_USER_GET_CONFIG) to get block virtio config information from backend process. Signed-off-by: Changpeng Liu Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- default-configs/pci.mak | 1 + default-configs/s390x-softmmu.mak | 1 + hw/block/Makefile.objs | 3 + hw/block/vhost-user-blk.c | 359 +++++++++++++++++++++++++++++++++++++ hw/virtio/virtio-pci.c | 55 ++++++ hw/virtio/virtio-pci.h | 18 ++ include/hw/virtio/vhost-user-blk.h | 41 +++++ 7 files changed, 478 insertions(+) create mode 100644 hw/block/vhost-user-blk.c create mode 100644 include/hw/virtio/vhost-user-blk.h (limited to 'include') diff --git a/default-configs/pci.mak b/default-configs/pci.mak index e514bdef42..49a0f285ac 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -43,3 +43,4 @@ CONFIG_VGA_PCI=y CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) CONFIG_ROCKER=y CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) +CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index 444bf16b80..2f4bfe73b4 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -1,6 +1,7 @@ CONFIG_PCI=y CONFIG_VIRTIO_PCI=$(CONFIG_PCI) CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) +CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) CONFIG_VIRTIO=y CONFIG_SCLPCONSOLE=y CONFIG_TERMINAL3270=y diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index e0ed980c90..4c19a583c8 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -13,3 +13,6 @@ obj-$(CONFIG_SH4) += tc58128.o obj-$(CONFIG_VIRTIO) += virtio-blk.o obj-$(CONFIG_VIRTIO) += dataplane/ +ifeq ($(CONFIG_VIRTIO),y) +obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o +endif diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c new file mode 100644 index 0000000000..b53b4c9c57 --- /dev/null +++ b/hw/block/vhost-user-blk.c @@ -0,0 +1,359 @@ +/* + * vhost-user-blk host device + * + * Copyright(C) 2017 Intel Corporation. + * + * Authors: + * Changpeng Liu + * + * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by: + * Felipe Franciosi + * Stefan Hajnoczi + * Nicholas Bellinger + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/typedefs.h" +#include "qemu/cutils.h" +#include "qom/object.h" +#include "hw/qdev-core.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user-blk.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" + +static const int user_feature_bits[] = { + VIRTIO_BLK_F_SIZE_MAX, + VIRTIO_BLK_F_SEG_MAX, + VIRTIO_BLK_F_GEOMETRY, + VIRTIO_BLK_F_BLK_SIZE, + VIRTIO_BLK_F_TOPOLOGY, + VIRTIO_BLK_F_MQ, + VIRTIO_BLK_F_RO, + VIRTIO_BLK_F_FLUSH, + VIRTIO_BLK_F_CONFIG_WCE, + VIRTIO_F_VERSION_1, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_NOTIFY_ON_EMPTY, + VHOST_INVALID_FEATURE_BIT +}; + +static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config)); +} + +static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config; + int ret; + + if (blkcfg->wce == s->blkcfg.wce) { + return; + } + + ret = vhost_dev_set_config(&s->dev, &blkcfg->wce, + offsetof(struct virtio_blk_config, wce), + sizeof(blkcfg->wce), + VHOST_SET_CONFIG_TYPE_MASTER); + if (ret) { + error_report("set device config space failed"); + return; + } + + s->blkcfg.wce = blkcfg->wce; +} + +static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) +{ + int ret; + struct virtio_blk_config blkcfg; + VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); + + ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg, + sizeof(struct virtio_blk_config)); + if (ret < 0) { + error_report("get config space failed"); + return -1; + } + + /* valid for resize only */ + if (blkcfg.capacity != s->blkcfg.capacity) { + s->blkcfg.capacity = blkcfg.capacity; + memcpy(dev->vdev->config, &s->blkcfg, sizeof(struct virtio_blk_config)); + virtio_notify_config(dev->vdev); + } + + return 0; +} + +const VhostDevConfigOps blk_ops = { + .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, +}; + +static void vhost_user_blk_start(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int i, ret; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&s->dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + s->dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&s->dev, vdev); + if (ret < 0) { + error_report("Error starting vhost: %d", -ret); + goto err_guest_notifiers; + } + + /* guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < s->dev.nvqs; i++) { + vhost_virtqueue_mask(&s->dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&s->dev, vdev); +} + +static void vhost_user_blk_stop(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&s->dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&s->dev, vdev); +} + +static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (s->dev.started == should_start) { + return; + } + + if (should_start) { + vhost_user_blk_start(vdev); + } else { + vhost_user_blk_stop(vdev); + } + +} + +static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + uint64_t get_features; + + /* Turn on pre-defined features */ + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); + virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); + virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); + virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); + virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH); + + if (s->config_wce) { + virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); + } + if (s->config_ro) { + virtio_add_feature(&features, VIRTIO_BLK_F_RO); + } + if (s->num_queues > 1) { + virtio_add_feature(&features, VIRTIO_BLK_F_MQ); + } + + get_features = vhost_get_features(&s->dev, user_feature_bits, features); + + return get_features; +} + +static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + +} + +static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + int i, ret; + + if (!s->chardev.chr) { + error_setg(errp, "vhost-user-blk: chardev is mandatory"); + return; + } + + if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { + error_setg(errp, "vhost-user-blk: invalid number of IO queues"); + return; + } + + if (!s->queue_size) { + error_setg(errp, "vhost-user-blk: queue size must be non-zero"); + return; + } + + virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, + sizeof(struct virtio_blk_config)); + + for (i = 0; i < s->num_queues; i++) { + virtio_add_queue(vdev, s->queue_size, + vhost_user_blk_handle_output); + } + + s->dev.nvqs = s->num_queues; + s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); + s->dev.vq_index = 0; + s->dev.backend_features = 0; + + ret = vhost_dev_init(&s->dev, &s->chardev, VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", + strerror(-ret)); + goto virtio_err; + } + + ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + sizeof(struct virtio_blk_config)); + if (ret < 0) { + error_setg(errp, "vhost-user-blk: get block config failed"); + goto vhost_err; + } + + if (s->blkcfg.num_queues != s->num_queues) { + s->blkcfg.num_queues = s->num_queues; + } + + vhost_dev_set_config_notifier(&s->dev, &blk_ops); + + return; + +vhost_err: + vhost_dev_cleanup(&s->dev); +virtio_err: + g_free(s->dev.vqs); + virtio_cleanup(vdev); +} + +static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(dev); + + vhost_user_blk_set_status(vdev, 0); + vhost_dev_cleanup(&s->dev); + g_free(s->dev.vqs); + virtio_cleanup(vdev); +} + +static void vhost_user_blk_instance_init(Object *obj) +{ + VHostUserBlk *s = VHOST_USER_BLK(obj); + + device_add_bootindex_property(obj, &s->bootindex, "bootindex", + "/disk@0,0", DEVICE(obj), NULL); +} + +static const VMStateDescription vmstate_vhost_user_blk = { + .name = "vhost-user-blk", + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static Property vhost_user_blk_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev), + DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues, 1), + DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128), + DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true), + DEFINE_PROP_BIT("config-ro", VHostUserBlk, config_ro, 0, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_blk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + dc->props = vhost_user_blk_properties; + dc->vmsd = &vmstate_vhost_user_blk; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = vhost_user_blk_device_realize; + vdc->unrealize = vhost_user_blk_device_unrealize; + vdc->get_config = vhost_user_blk_update_config; + vdc->set_config = vhost_user_blk_set_config; + vdc->get_features = vhost_user_blk_get_features; + vdc->set_status = vhost_user_blk_set_status; +} + +static const TypeInfo vhost_user_blk_info = { + .name = TYPE_VHOST_USER_BLK, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserBlk), + .instance_init = vhost_user_blk_instance_init, + .class_init = vhost_user_blk_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&vhost_user_blk_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 6c75cca88a..9ae10f0cdd 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1978,6 +1978,58 @@ static const TypeInfo virtio_blk_pci_info = { .class_init = virtio_blk_pci_class_init, }; +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +/* vhost-user-blk */ + +static Property vhost_user_blk_pci_properties[] = { + DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_blk_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->props = vhost_user_blk_pci_properties; + k->realize = vhost_user_blk_pci_realize; + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI; +} + +static void vhost_user_blk_pci_instance_init(Object *obj) +{ + VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_BLK); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex", &error_abort); +} + +static const TypeInfo vhost_user_blk_pci_info = { + .name = TYPE_VHOST_USER_BLK_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VHostUserBlkPCI), + .instance_init = vhost_user_blk_pci_instance_init, + .class_init = vhost_user_blk_pci_class_init, +}; +#endif + /* virtio-scsi-pci */ static Property virtio_scsi_pci_properties[] = { @@ -2624,6 +2676,9 @@ static void virtio_pci_register_types(void) type_register_static(&virtio_9p_pci_info); #endif type_register_static(&virtio_blk_pci_info); +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) + type_register_static(&vhost_user_blk_pci_info); +#endif type_register_static(&virtio_scsi_pci_info); type_register_static(&virtio_balloon_pci_info); type_register_static(&virtio_serial_pci_info); diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 12d3a90686..813082b0d7 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -27,6 +27,9 @@ #include "hw/virtio/virtio-gpu.h" #include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user-scsi.h" +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +#include "hw/virtio/vhost-user-blk.h" +#endif #ifdef CONFIG_VIRTFS #include "hw/9pfs/virtio-9p.h" @@ -46,6 +49,7 @@ typedef struct VirtIOSerialPCI VirtIOSerialPCI; typedef struct VirtIONetPCI VirtIONetPCI; typedef struct VHostSCSIPCI VHostSCSIPCI; typedef struct VHostUserSCSIPCI VHostUserSCSIPCI; +typedef struct VHostUserBlkPCI VHostUserBlkPCI; typedef struct VirtIORngPCI VirtIORngPCI; typedef struct VirtIOInputPCI VirtIOInputPCI; typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI; @@ -244,6 +248,20 @@ struct VHostUserSCSIPCI { VHostUserSCSI vdev; }; +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +/* + * vhost-user-blk-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci" +#define VHOST_USER_BLK_PCI(obj) \ + OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI) + +struct VHostUserBlkPCI { + VirtIOPCIProxy parent_obj; + VHostUserBlk vdev; +}; +#endif + /* * virtio-blk-pci: This extends VirtioPCIProxy. */ diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h new file mode 100644 index 0000000000..5804cc904a --- /dev/null +++ b/include/hw/virtio/vhost-user-blk.h @@ -0,0 +1,41 @@ +/* + * vhost-user-blk host device + * Copyright(C) 2017 Intel Corporation. + * + * Authors: + * Changpeng Liu + * + * Based on vhost-scsi.h, Copyright IBM, Corp. 2011 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_USER_BLK_H +#define VHOST_USER_BLK_H + +#include "standard-headers/linux/virtio_blk.h" +#include "qemu-common.h" +#include "hw/qdev.h" +#include "hw/block/block.h" +#include "chardev/char-fe.h" +#include "hw/virtio/vhost.h" + +#define TYPE_VHOST_USER_BLK "vhost-user-blk" +#define VHOST_USER_BLK(obj) \ + OBJECT_CHECK(VHostUserBlk, (obj), TYPE_VHOST_USER_BLK) + +typedef struct VHostUserBlk { + VirtIODevice parent_obj; + CharBackend chardev; + int32_t bootindex; + struct virtio_blk_config blkcfg; + uint16_t num_queues; + uint32_t queue_size; + uint32_t config_wce; + uint32_t config_ro; + struct vhost_dev dev; +} VHostUserBlk; + +#endif -- cgit v1.2.3-55-g7522 From f87d72f5c5bff0837d409a56bd34f439a90119ca Mon Sep 17 00:00:00 2001 From: Gal Hammer Date: Sun, 14 Jan 2018 12:06:54 +0200 Subject: qemu: add a cleanup callback function to EventNotifier Adding a cleanup callback function to the EventNotifier struct which allows users to execute event_notifier_cleanup in a different context. Signed-off-by: Gal Hammer Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/qemu/event_notifier.h | 1 + util/event_notifier-posix.c | 5 ++++- util/event_notifier-win32.c | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h index 599c99f1a5..b30a45474f 100644 --- a/include/qemu/event_notifier.h +++ b/include/qemu/event_notifier.h @@ -26,6 +26,7 @@ struct EventNotifier { int rfd; int wfd; #endif + void (*cleanup)(EventNotifier *); }; typedef void EventNotifierHandler(EventNotifier *); diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c index 73c4046b58..652566634a 100644 --- a/util/event_notifier-posix.c +++ b/util/event_notifier-posix.c @@ -29,6 +29,7 @@ void event_notifier_init_fd(EventNotifier *e, int fd) { e->rfd = fd; e->wfd = fd; + e->cleanup = NULL; } #endif @@ -65,6 +66,7 @@ int event_notifier_init(EventNotifier *e, int active) e->rfd = fds[0]; e->wfd = fds[1]; } + e->cleanup = NULL; if (active) { event_notifier_set(e); } @@ -80,10 +82,11 @@ void event_notifier_cleanup(EventNotifier *e) { if (e->rfd != e->wfd) { close(e->rfd); - e->rfd = -1; } close(e->wfd); + e->rfd = -1; e->wfd = -1; + e->cleanup = NULL; } int event_notifier_get_fd(const EventNotifier *e) diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c index 62c53b0a99..eff86701ad 100644 --- a/util/event_notifier-win32.c +++ b/util/event_notifier-win32.c @@ -19,6 +19,7 @@ int event_notifier_init(EventNotifier *e, int active) { e->event = CreateEvent(NULL, TRUE, FALSE, NULL); assert(e->event); + e->cleanup = NULL; return 0; } @@ -26,6 +27,7 @@ void event_notifier_cleanup(EventNotifier *e) { CloseHandle(e->event); e->event = NULL; + e->cleanup = NULL; } HANDLE event_notifier_get_handle(EventNotifier *e) -- cgit v1.2.3-55-g7522 From 37e626cedae08288f73f2356530a0bd5f045c8b9 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 14 Jan 2018 11:01:43 +0200 Subject: pci/shpc: Move function to generic header file This function should be declared in generic header file so we can utilize it. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Yuval Shaia Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/shpc.c | 13 ++----------- include/qemu/host-utils.h | 10 ++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 69fc14b218..a8462d48bb 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" +#include "qemu/host-utils.h" #include "qemu/range.h" #include "qemu/error-report.h" #include "hw/pci/shpc.h" @@ -122,16 +123,6 @@ #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) -static int roundup_pow_of_two(int x) -{ - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - return x + 1; -} - static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) { uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); @@ -656,7 +647,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar, int shpc_bar_size(PCIDevice *d) { - return roundup_pow_of_two(SHPC_SLOT_REG(SHPC_MAX_SLOTS)); + return pow2roundup32(SHPC_SLOT_REG(SHPC_MAX_SLOTS)); } void shpc_cleanup(PCIDevice *d, MemoryRegion *bar) diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 5ac621cf1f..38da849be9 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -400,6 +400,16 @@ static inline uint64_t pow2ceil(uint64_t value) return 0x8000000000000000ull >> (n - 1); } +static inline uint32_t pow2roundup32(uint32_t x) +{ + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + return x + 1; +} + /** * urshift - 128-bit Unsigned Right Shift. * @plow: in/out - lower 64-bit integer. -- cgit v1.2.3-55-g7522 From 92e5d85e8345a22e87eda940ffe0f6422eb45360 Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Tue, 14 Nov 2017 18:13:49 -0500 Subject: intel-iommu: Redefine macros to enable supporting 48 bit address width The current implementation of Intel IOMMU code only supports 39 bits host/iova address width so number of macros use hard coded values based on that. This patch is to redefine them so they can be used with variable address widths. This patch doesn't add any new functionality but enables adding support for 48 bit address width. Signed-off-by: Prasad Singamsetty Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 54 ++++++++++++++++++++++++------------------ hw/i386/intel_iommu_internal.h | 34 +++++++++++++++++++------- include/hw/i386/intel_iommu.h | 6 +++-- 3 files changed, 61 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index fe15d3ba84..fbcf43a622 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -523,7 +523,7 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) { - return slpte & VTD_SL_PT_BASE_ADDR_MASK; + return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH); } /* Whether the pte indicates the address of the page frame */ @@ -624,19 +624,12 @@ static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce) return !(iova & ~(vtd_iova_limit(ce) - 1)); } -static const uint64_t vtd_paging_entry_rsvd_field[] = { - [0] = ~0ULL, - /* For not large page */ - [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - /* For large page */ - [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), -}; +/* + * Rsvd field masks for spte: + * Index [1] to [4] 4k pages + * Index [5] to [8] large pages + */ +static uint64_t vtd_paging_entry_rsvd_field[9]; static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) { @@ -874,7 +867,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, return -VTD_FR_ROOT_ENTRY_P; } - if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { + if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(VTD_HOST_ADDRESS_WIDTH))) { trace_vtd_re_invalid(re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -891,7 +884,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, } if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(VTD_HOST_ADDRESS_WIDTH))) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } @@ -1207,7 +1200,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s) { s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); s->root_extended = s->root & VTD_RTADDR_RTT; - s->root &= VTD_RTADDR_ADDR_MASK; + s->root &= VTD_RTADDR_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH); trace_vtd_reg_dmar_root(s->root, s->root_extended); } @@ -1223,7 +1216,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) uint64_t value = 0; value = vtd_get_quad_raw(s, DMAR_IRTA_REG); s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); - s->intr_root = value & VTD_IRTA_ADDR_MASK; + s->intr_root = value & VTD_IRTA_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH); s->intr_eime = value & VTD_IRTA_EIME; /* Notify global invalidation */ @@ -1479,7 +1472,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) trace_vtd_inv_qi_enable(en); if (en) { - s->iq = iqa_val & VTD_IQA_IQA_MASK; + s->iq = iqa_val & VTD_IQA_IQA_MASK(VTD_HOST_ADDRESS_WIDTH); /* 2^(x+8) entries */ s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); s->qi_enabled = true; @@ -2772,12 +2765,12 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * VT-d spec), otherwise we need to consider overflow of 64 bits. */ - if (end > VTD_ADDRESS_SIZE) { + if (end > VTD_ADDRESS_SIZE(VTD_HOST_ADDRESS_WIDTH)) { /* * Don't need to unmap regions that is bigger than the whole * VT-d supported address space size */ - end = VTD_ADDRESS_SIZE; + end = VTD_ADDRESS_SIZE(VTD_HOST_ADDRESS_WIDTH); } assert(start <= end); @@ -2866,6 +2859,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) static void vtd_init(IntelIOMMUState *s) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + uint8_t aw_bits = VTD_HOST_ADDRESS_WIDTH; memset(s->csr, 0, DMAR_REG_SIZE); memset(s->wmask, 0, DMAR_REG_SIZE); @@ -2882,10 +2876,24 @@ static void vtd_init(IntelIOMMUState *s) s->qi_enabled = false; s->iq_last_desc_type = VTD_INV_DESC_NONE; s->next_frcd_reg = 0; - s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | - VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; + s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | + VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | + VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(VTD_HOST_ADDRESS_WIDTH); s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + /* + * Rsvd field masks for spte + */ + vtd_paging_entry_rsvd_field[0] = ~0ULL; + vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(aw_bits); + if (x86_iommu->intr_supported) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 0e73a65bf2..77e4a9833a 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -172,10 +172,10 @@ /* RTADDR_REG */ #define VTD_RTADDR_RTT (1ULL << 11) -#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_RTADDR_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) /* IRTA_REG */ -#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IRTA_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) #define VTD_IRTA_EIME (1ULL << 11) #define VTD_IRTA_SIZE_MASK (0xfULL) @@ -198,8 +198,8 @@ #define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) #define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) #define VTD_MGAW 39 /* Maximum Guest Address Width */ -#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW) -#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) +#define VTD_ADDRESS_SIZE(aw) (1ULL << (aw)) +#define VTD_CAP_MGAW(aw) ((((aw) - 1) & 0x3fULL) << 16) #define VTD_MAMV 18ULL #define VTD_CAP_MAMV (VTD_MAMV << 48) #define VTD_CAP_PSI (1ULL << 39) @@ -219,7 +219,7 @@ #define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL) /* IQA_REG */ -#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IQA_IQA_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) #define VTD_IQA_QS 0x7ULL /* IQH_REG */ @@ -373,6 +373,24 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8 +/* Rsvd field masks for spte */ +#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L2_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L3_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \ + (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L1_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw) \ + (0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \ + (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L4_RSVD_MASK(aw) \ + (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) + /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; @@ -403,7 +421,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_ROOT_ENTRY_CTP (~0xfffULL) #define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry)) -#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK) +#define VTD_ROOT_ENTRY_RSVD(aw) (0xffeULL | ~VTD_HAW_MASK(aw)) /* Masks for struct VTDContextEntry */ /* lo */ @@ -415,7 +433,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2) /* Second Level Page Translation Pointer*/ #define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL) -#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK) +#define VTD_CONTEXT_ENTRY_RSVD_LO(aw) (0xff0ULL | ~VTD_HAW_MASK(aw)) /* hi */ #define VTD_CONTEXT_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_CONTEXT_ENTRY_DID(val) (((val) >> 8) & VTD_DOMAIN_ID_MASK) @@ -439,7 +457,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SL_RW_MASK 3ULL #define VTD_SL_R 1ULL #define VTD_SL_W (1ULL << 1) -#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK) +#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) #define VTD_SL_IGN_COM 0xbff0000000000000ULL #endif diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index ac15e6be14..372b06df45 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -46,8 +46,10 @@ #define VTD_SID_TO_DEVFN(sid) ((sid) & 0xff) #define DMAR_REG_SIZE 0x230 -#define VTD_HOST_ADDRESS_WIDTH 39 -#define VTD_HAW_MASK ((1ULL << VTD_HOST_ADDRESS_WIDTH) - 1) +#define VTD_HOST_AW_39BIT 39 +#define VTD_HOST_AW_48BIT 48 +#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT +#define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) #define DMAR_REPORT_F_INTR (1) -- cgit v1.2.3-55-g7522 From 37f51384ae05bd50f83308339dbffa3e78404874 Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Tue, 14 Nov 2017 18:13:50 -0500 Subject: intel-iommu: Extend address width to 48 bits The current implementation of Intel IOMMU code only supports 39 bits iova address width. This patch provides a new parameter (x-aw-bits) for intel-iommu to extend its address width to 48 bits but keeping the default the same (39 bits). The reason for not changing the default is to avoid potential compatibility problems with live migration of intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits' parameter are 39 and 48. After enabling larger address width (48), we should be able to map larger iova addresses in the guest. For example, a QEMU guest that is configured with large memory ( >=1TB ). To check whether 48 bits aw is enabled, we can grep in the guest dmesg output with line: "DMAR: Host address width 48". Signed-off-by: Prasad Singamsetty Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 3 +- hw/i386/intel_iommu.c | 101 ++++++++++++++++++++++++----------------- hw/i386/intel_iommu_internal.h | 9 ++-- include/hw/i386/intel_iommu.h | 1 + 4 files changed, 65 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 18b939e469..6f38fb9d27 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2473,6 +2473,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) AcpiDmarDeviceScope *scope = NULL; /* Root complex IOAPIC use one path[0] only */ size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]); + IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu); assert(iommu); if (iommu->intr_supported) { @@ -2480,7 +2481,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) } dmar = acpi_data_push(table_data, sizeof(*dmar)); - dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; + dmar->host_address_width = intel_iommu->aw_bits - 1; dmar->flags = dmar_flags; /* DMAR Remapping Hardware Unit Definition structure */ diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index fbcf43a622..4e8642ea6a 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -521,9 +521,9 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) +static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw) { - return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH); + return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw); } /* Whether the pte indicates the address of the page frame */ @@ -608,20 +608,21 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, return true; } -static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw) { uint32_t ce_agaw = vtd_ce_get_agaw(ce); - return 1ULL << MIN(ce_agaw, VTD_MGAW); + return 1ULL << MIN(ce_agaw, aw); } /* Return true if IOVA passes range check, otherwise false. */ -static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce) +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce, + uint8_t aw) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW * in CAP_REG and AW in context-entry. */ - return !(iova & ~(vtd_iova_limit(ce) - 1)); + return !(iova & ~(vtd_iova_limit(ce, aw) - 1)); } /* @@ -669,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) */ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, - bool *reads, bool *writes) + bool *reads, bool *writes, uint8_t aw_bits) { dma_addr_t addr = vtd_ce_get_slpt_base(ce); uint32_t level = vtd_ce_get_level(ce); @@ -677,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t slpte; uint64_t access_right_check; - if (!vtd_iova_range_check(iova, ce)) { + if (!vtd_iova_range_check(iova, ce, aw_bits)) { trace_vtd_err_dmar_iova_overflow(iova); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -714,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, *slpte_level = level; return 0; } - addr = vtd_get_slpte_addr(slpte); + addr = vtd_get_slpte_addr(slpte, aw_bits); level--; } } @@ -732,11 +733,12 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); * @read: whether parent level has read permission * @write: whether parent level has write permission * @notify_unmap: whether we should notify invalid entries + * @aw: maximum address width */ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, - void *private, uint32_t level, - bool read, bool write, bool notify_unmap) + void *private, uint32_t level, bool read, + bool write, bool notify_unmap, uint8_t aw) { bool read_cur, write_cur, entry_valid; uint32_t offset; @@ -783,7 +785,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, entry.target_as = &address_space_memory; entry.iova = iova & subpage_mask; /* NOTE: this is only meaningful if entry_valid == true */ - entry.translated_addr = vtd_get_slpte_addr(slpte); + entry.translated_addr = vtd_get_slpte_addr(slpte, aw); entry.addr_mask = ~subpage_mask; entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); if (!entry_valid && !notify_unmap) { @@ -803,10 +805,10 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, trace_vtd_page_walk_skip_perm(iova, iova_next); goto next; } - ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova, + ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova, MIN(iova_next, end), hook_fn, private, level - 1, read_cur, write_cur, - notify_unmap); + notify_unmap, aw); if (ret < 0) { return ret; } @@ -827,25 +829,26 @@ next: * @end: IOVA range end address (start <= addr < end) * @hook_fn: the hook that to be called for each detected area * @private: private data for the hook function + * @aw: maximum address width */ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, void *private, - bool notify_unmap) + bool notify_unmap, uint8_t aw) { dma_addr_t addr = vtd_ce_get_slpt_base(ce); uint32_t level = vtd_ce_get_level(ce); - if (!vtd_iova_range_check(start, ce)) { + if (!vtd_iova_range_check(start, ce, aw)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(end, ce)) { + if (!vtd_iova_range_check(end, ce, aw)) { /* Fix end so that it reaches the maximum */ - end = vtd_iova_limit(ce); + end = vtd_iova_limit(ce, aw); } return vtd_page_walk_level(addr, start, end, hook_fn, private, - level, true, true, notify_unmap); + level, true, true, notify_unmap, aw); } /* Map a device to its corresponding domain (context-entry) */ @@ -867,7 +870,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, return -VTD_FR_ROOT_ENTRY_P; } - if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(VTD_HOST_ADDRESS_WIDTH))) { + if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) { trace_vtd_re_invalid(re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -884,7 +887,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, } if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(VTD_HOST_ADDRESS_WIDTH))) { + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } @@ -1166,7 +1169,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, } ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, - &reads, &writes); + &reads, &writes, s->aw_bits); if (ret_fr) { ret_fr = -ret_fr; if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { @@ -1183,7 +1186,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, access_flags, level); out: entry->iova = addr & page_mask; - entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; + entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; entry->perm = access_flags; return true; @@ -1200,7 +1203,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s) { s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); s->root_extended = s->root & VTD_RTADDR_RTT; - s->root &= VTD_RTADDR_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH); + s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits); trace_vtd_reg_dmar_root(s->root, s->root_extended); } @@ -1216,7 +1219,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) uint64_t value = 0; value = vtd_get_quad_raw(s, DMAR_IRTA_REG); s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); - s->intr_root = value & VTD_IRTA_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH); + s->intr_root = value & VTD_IRTA_ADDR_MASK(s->aw_bits); s->intr_eime = value & VTD_IRTA_EIME; /* Notify global invalidation */ @@ -1392,7 +1395,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE, vtd_page_invalidate_notify_hook, - (void *)&vtd_as->iommu, true); + (void *)&vtd_as->iommu, true, s->aw_bits); } } } @@ -1472,7 +1475,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) trace_vtd_inv_qi_enable(en); if (en) { - s->iq = iqa_val & VTD_IQA_IQA_MASK(VTD_HOST_ADDRESS_WIDTH); + s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits); /* 2^(x+8) entries */ s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); s->qi_enabled = true; @@ -2403,6 +2406,8 @@ static Property vtd_properties[] = { DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false), + DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits, + VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_END_OF_LIST(), }; @@ -2758,6 +2763,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) hwaddr size; hwaddr start = n->start; hwaddr end = n->end; + IntelIOMMUState *s = as->iommu_state; /* * Note: all the codes in this function has a assumption that IOVA @@ -2765,12 +2771,12 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * VT-d spec), otherwise we need to consider overflow of 64 bits. */ - if (end > VTD_ADDRESS_SIZE(VTD_HOST_ADDRESS_WIDTH)) { + if (end > VTD_ADDRESS_SIZE(s->aw_bits)) { /* * Don't need to unmap regions that is bigger than the whole * VT-d supported address space size */ - end = VTD_ADDRESS_SIZE(VTD_HOST_ADDRESS_WIDTH); + end = VTD_ADDRESS_SIZE(s->aw_bits); } assert(start <= end); @@ -2782,9 +2788,9 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * suite the minimum available mask. */ int n = 64 - clz64(size); - if (n > VTD_MGAW) { + if (n > s->aw_bits) { /* should not happen, but in case it happens, limit it */ - n = VTD_MGAW; + n = s->aw_bits; } size = 1ULL << n; } @@ -2844,7 +2850,8 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) PCI_FUNC(vtd_as->devfn), VTD_CONTEXT_ENTRY_DID(ce.hi), ce.hi, ce.lo); - vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false); + vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false, + s->aw_bits); } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), PCI_FUNC(vtd_as->devfn)); @@ -2859,7 +2866,6 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) static void vtd_init(IntelIOMMUState *s) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - uint8_t aw_bits = VTD_HOST_ADDRESS_WIDTH; memset(s->csr, 0, DMAR_REG_SIZE); memset(s->wmask, 0, DMAR_REG_SIZE); @@ -2878,21 +2884,24 @@ static void vtd_init(IntelIOMMUState *s) s->next_frcd_reg = 0; s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | - VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(VTD_HOST_ADDRESS_WIDTH); + VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits); + if (s->aw_bits == VTD_HOST_AW_48BIT) { + s->cap |= VTD_CAP_SAGAW_48bit; + } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; /* * Rsvd field masks for spte */ vtd_paging_entry_rsvd_field[0] = ~0ULL; - vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(aw_bits); - vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(aw_bits); - vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(aw_bits); - vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(aw_bits); - vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(aw_bits); - vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(aw_bits); - vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(aw_bits); - vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(aw_bits); + vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits); if (x86_iommu->intr_supported) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; @@ -3029,6 +3038,14 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) } } + /* Currently only address widths supported are 39 and 48 bits */ + if ((s->aw_bits != VTD_HOST_AW_39BIT) && + (s->aw_bits != VTD_HOST_AW_48BIT)) { + error_setg(errp, "Supported values for x-aw-bits are: %d, %d", + VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT); + return false; + } + return true; } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 77e4a9833a..d084099ed9 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -131,7 +131,7 @@ #define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK) /* IVA_REG */ -#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1)) +#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL) #define VTD_IVA_AM(val) ((val) & 0x3fULL) /* GCMD_REG */ @@ -197,7 +197,6 @@ #define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */ #define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) #define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) -#define VTD_MGAW 39 /* Maximum Guest Address Width */ #define VTD_ADDRESS_SIZE(aw) (1ULL << (aw)) #define VTD_CAP_MGAW(aw) ((((aw) - 1) & 0x3fULL) << 16) #define VTD_MAMV 18ULL @@ -213,7 +212,6 @@ #define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) /* 48-bit AGAW, 4-level page-table */ #define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) -#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit /* IQT_REG */ #define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL) @@ -252,7 +250,7 @@ #define VTD_FRCD_SID_MASK 0xffffULL #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) /* For the low 64-bit of 128-bit */ -#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL)) +#define VTD_FRCD_FI(val) ((val) & ~0xfffULL) /* DMA Remapping Fault Conditions */ typedef enum VTDFaultReason { @@ -360,8 +358,7 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4) #define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4) #define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) -#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \ - ((1ULL << VTD_MGAW) - 1)) +#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL) #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 372b06df45..45ec8919b6 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -304,6 +304,7 @@ struct IntelIOMMUState { bool intr_eime; /* Extended interrupt mode enabled */ OnOffAuto intr_eim; /* Toggle for EIM cabability */ bool buggy_eim; /* Force buggy EIM unless eim=off */ + uint8_t aw_bits; /* Host/IOVA address width (in bits) */ }; /* Find the VTD Address space associated with the given bus pointer, -- cgit v1.2.3-55-g7522