From 7df953bd456da45f761064974820ab5c3fd7b2aa Mon Sep 17 00:00:00 2001 From: Knut Omang Date: Sun, 4 Oct 2015 15:48:50 +0200 Subject: intel_iommu: Add support for translation for devices behind bridges - Use a hash table indexed on bus pointers to store information about buses instead of using the bus numbers. Bus pointers are stored in a new VTDBus struct together with the vector of device address space pointers indexed by devfn. - The bus number is still used for lookup for selective SID based invalidate, in which case the bus number is lazily resolved from the bus hash table and cached in a separate index. Signed-off-by: Knut Omang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/i386/intel_iommu.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index e321ee4fbc..5dbadb785c 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -49,6 +49,7 @@ typedef struct VTDContextCacheEntry VTDContextCacheEntry; typedef struct IntelIOMMUState IntelIOMMUState; typedef struct VTDAddressSpace VTDAddressSpace; typedef struct VTDIOTLBEntry VTDIOTLBEntry; +typedef struct VTDBus VTDBus; /* Context-Entry */ struct VTDContextEntry { @@ -65,7 +66,7 @@ struct VTDContextCacheEntry { }; struct VTDAddressSpace { - uint8_t bus_num; + PCIBus *bus; uint8_t devfn; AddressSpace as; MemoryRegion iommu; @@ -73,6 +74,11 @@ struct VTDAddressSpace { VTDContextCacheEntry context_cache_entry; }; +struct VTDBus { + PCIBus* bus; /* A reference to the bus to provide translation for */ + VTDAddressSpace *dev_as[0]; /* A table of VTDAddressSpace objects indexed by devfn */ +}; + struct VTDIOTLBEntry { uint64_t gfn; uint16_t domain_id; @@ -114,7 +120,13 @@ struct IntelIOMMUState { GHashTable *iotlb; /* IOTLB */ MemoryRegionIOMMUOps iommu_ops; - VTDAddressSpace **address_spaces[VTD_PCI_BUS_MAX]; + GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */ + VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ }; +/* Find the VTD Address space associated with the given bus pointer, + * create a new one if none exists + */ +VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn); + #endif -- cgit v1.2.3-55-g7522 From 794e8f301a17953efa78ab7538019ec43c59e82a Mon Sep 17 00:00:00 2001 From: Michael S. Tsirkin Date: Thu, 24 Sep 2015 14:41:17 +0300 Subject: exec: factor out duplicate mmap code Anonymous and file-backed RAM allocation are now almost exactly the same. Reduce code duplication by moving RAM mmap code out of oslib-posix.c and exec.c. Reported-by: Marc-André Lureau Signed-off-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Acked-by: Paolo Bonzini Tested-by: Thibaut Collet --- exec.c | 47 +++++++------------------------ include/qemu/mmap-alloc.h | 10 +++++++ util/Makefile.objs | 1 + util/mmap-alloc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++ util/oslib-posix.c | 28 +++---------------- 5 files changed, 96 insertions(+), 61 deletions(-) create mode 100644 include/qemu/mmap-alloc.h create mode 100644 util/mmap-alloc.c (limited to 'include') diff --git a/exec.c b/exec.c index 7d90a52252..4505dc76e8 100644 --- a/exec.c +++ b/exec.c @@ -55,6 +55,9 @@ #include "exec/ram_addr.h" #include "qemu/range.h" +#ifndef _WIN32 +#include "qemu/mmap-alloc.h" +#endif //#define DEBUG_SUBPAGE @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned; */ #define RAM_RESIZEABLE (1 << 2) -/* An extra page is mapped on top of this RAM. +/* RAM is backed by an mmapped file. */ -#define RAM_EXTRA (1 << 3) +#define RAM_FILE (1 << 3) #endif struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block, char *filename; char *sanitized_name; char *c; - void *ptr; - void *area = NULL; + void *area; int fd; uint64_t hpagesize; - uint64_t total; Error *local_err = NULL; - size_t offset; hpagesize = gethugepagesize(path, &local_err); if (local_err) { @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block, g_free(filename); memory = ROUND_UP(memory, hpagesize); - total = memory + hpagesize; /* * ftruncate is not supported by hugetlbfs in older @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (ptr == MAP_FAILED) { - error_setg_errno(errp, errno, - "unable to allocate memory range for hugepages"); - close(fd); - goto error; - } - - offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr; - - area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, - (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) | - MAP_FIXED, - fd, 0); + area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for hugepages"); - munmap(ptr, total); close(fd); goto error; } - if (offset > 0) { - munmap(ptr, offset); - } - ptr += offset; - total -= offset; - - if (total > memory + getpagesize()) { - munmap(ptr + memory + getpagesize(), - total - memory - getpagesize()); - } - if (mem_prealloc) { os_mem_prealloc(fd, area, memory); } @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = share ? RAM_SHARED : 0; - new_block->flags |= RAM_EXTRA; + new_block->flags |= RAM_FILE; new_block->host = file_ram_alloc(new_block, size, mem_path, errp); if (!new_block->host) { @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block) xen_invalidate_map_cache_entry(block->host); #ifndef _WIN32 } else if (block->fd >= 0) { - if (block->flags & RAM_EXTRA) { - munmap(block->host, block->max_length + getpagesize()); + if (block->flags & RAM_FILE) { + qemu_ram_munmap(block->host, block->max_length); } else { munmap(block->host, block->max_length); } diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h new file mode 100644 index 0000000000..56388e689b --- /dev/null +++ b/include/qemu/mmap-alloc.h @@ -0,0 +1,10 @@ +#ifndef QEMU_MMAP_ALLOC +#define QEMU_MMAP_ALLOC + +#include "qemu-common.h" + +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); + +void qemu_ram_munmap(void *ptr, size_t size); + +#endif diff --git a/util/Makefile.objs b/util/Makefile.objs index 114d6578c4..f011b81dcb 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -1,6 +1,7 @@ util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o +util-obj-$(CONFIG_POSIX) += mmap-alloc.o util-obj-y += envlist.o path.o module.o util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o util-obj-y += bitmap.o bitops.o hbitmap.o diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c new file mode 100644 index 0000000000..13942694cc --- /dev/null +++ b/util/mmap-alloc.c @@ -0,0 +1,71 @@ +/* + * Support for RAM backed by mmaped host memory. + * + * Copyright (c) 2015 Red Hat, Inc. + * + * Authors: + * Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ +#include +#include +#include +#include + +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) +{ + /* + * Note: this always allocates at least one extra page of virtual address + * space, even if size is already aligned. + */ + size_t total = size + align; + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + void *ptr1; + + if (ptr == MAP_FAILED) { + return NULL; + } + + /* Make sure align is a power of 2 */ + assert(!(align & (align - 1))); + /* Always align to host page size */ + assert(align >= getpagesize()); + + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, + MAP_FIXED | + (fd == -1 ? MAP_ANONYMOUS : 0) | + (shared ? MAP_SHARED : MAP_PRIVATE), + fd, 0); + if (ptr1 == MAP_FAILED) { + munmap(ptr, total); + return NULL; + } + + ptr += offset; + total -= offset; + + if (offset > 0) { + munmap(ptr - offset, offset); + } + + /* + * Leave a single PROT_NONE page allocated after the RAM block, to serve as + * a guard page guarding against potential buffer overflows. + */ + if (total > size + getpagesize()) { + munmap(ptr + size + getpagesize(), total - size - getpagesize()); + } + + return ptr; +} + +void qemu_ram_munmap(void *ptr, size_t size) +{ + if (ptr) { + /* Unmap both the RAM block and the guard page */ + munmap(ptr, size + getpagesize()); + } +} diff --git a/util/oslib-posix.c b/util/oslib-posix.c index a0fcdc2ede..892d2d8ad2 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -72,6 +72,8 @@ extern int daemon(int, int); #include #endif +#include + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -128,10 +130,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) { size_t align = QEMU_VMALLOC_ALIGN; - size_t total = size + align; - void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; - void *ptr1; + void *ptr = qemu_ram_mmap(-1, size, align, false); if (ptr == MAP_FAILED) { return NULL; @@ -141,23 +140,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) *alignment = align; } - ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (ptr1 == MAP_FAILED) { - munmap(ptr, total); - return NULL; - } - - ptr += offset; - total -= offset; - - if (offset > 0) { - munmap(ptr - offset, offset); - } - if (total > size + getpagesize()) { - munmap(ptr + size + getpagesize(), total - size - getpagesize()); - } - trace_qemu_anon_ram_alloc(size, ptr); return ptr; } @@ -171,9 +153,7 @@ void qemu_vfree(void *ptr) void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); - if (ptr) { - munmap(ptr, size + getpagesize()); - } + qemu_ram_munmap(ptr, size); } void qemu_set_block(int fd) -- cgit v1.2.3-55-g7522 From 2ce68e4cf5be9b5176a3c3c372948d6340724d2d Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 6 Oct 2015 10:37:27 +0200 Subject: vhost: add vhost_has_free_slot() interface it will allow for other parts of QEMU check if it's safe to map memory region during hotplug/runtime. That way hotplug path will have a chance to cancel hotplug operation instead of crashing in vhost_commit(). Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-backend.c | 19 +++++++++++++++++++ hw/virtio/vhost-user.c | 6 ++++++ hw/virtio/vhost.c | 21 +++++++++++++++++++++ include/hw/virtio/vhost-backend.h | 2 ++ include/hw/virtio/vhost.h | 2 ++ stubs/Makefile.objs | 1 + stubs/vhost.c | 6 ++++++ 7 files changed, 57 insertions(+) create mode 100644 stubs/vhost.c (limited to 'include') diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 72d1392b35..910d8e00e3 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -11,6 +11,7 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" #include "qemu/error-report.h" +#include "linux/vhost.h" #include @@ -49,12 +50,30 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) return idx - dev->vq_index; } +static int vhost_kernel_memslots_limit(struct vhost_dev *dev) +{ + int limit = 64; + char *s; + + if (g_file_get_contents("/sys/module/vhost/parameters/max_mem_regions", + &s, NULL, NULL)) { + uint64_t val = g_ascii_strtoull(s, NULL, 10); + if (!((val == G_MAXUINT64 || !val) && errno)) { + return val; + } + error_report("ignoring invalid max_mem_regions value in vhost module:" + " %s", s); + } + return limit; +} + static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_call = vhost_kernel_call, .vhost_backend_init = vhost_kernel_init, .vhost_backend_cleanup = vhost_kernel_cleanup, .vhost_backend_get_vq_index = vhost_kernel_get_vq_index, + .vhost_backend_memslots_limit = vhost_kernel_memslots_limit, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b11c0d21a0..9585637d07 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -440,6 +440,11 @@ static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) return idx; } +static int vhost_user_memslots_limit(struct vhost_dev *dev) +{ + return VHOST_MEMORY_MAX_NREGIONS; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_call = vhost_user_call, @@ -447,4 +452,5 @@ const VhostOps user_ops = { .vhost_backend_cleanup = vhost_user_cleanup, .vhost_backend_get_vq_index = vhost_user_get_vq_index, .vhost_backend_set_vring_enable = vhost_user_set_vring_enable, + .vhost_backend_memslots_limit = vhost_user_memslots_limit, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index c0ed5b263f..a3b4f9e8ea 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -26,6 +26,22 @@ static struct vhost_log *vhost_log; +static unsigned int used_memslots; +static QLIST_HEAD(, vhost_dev) vhost_devices = + QLIST_HEAD_INITIALIZER(vhost_devices); + +bool vhost_has_free_slot(void) +{ + unsigned int slots_limit = ~0U; + struct vhost_dev *hdev; + + QLIST_FOREACH(hdev, &vhost_devices, entry) { + unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); + slots_limit = MIN(slots_limit, r); + } + return slots_limit > used_memslots; +} + static void vhost_dev_sync_region(struct vhost_dev *dev, MemoryRegionSection *section, uint64_t mfirst, uint64_t mlast, @@ -457,6 +473,7 @@ static void vhost_set_memory(MemoryListener *listener, dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr); dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1); dev->memory_changed = true; + used_memslots = dev->mem->nregions; } static bool vhost_section(MemoryRegionSection *section) @@ -916,6 +933,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, return -errno; } + QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); if (r < 0) { goto fail; @@ -972,6 +991,7 @@ fail_vq: fail: r = -errno; hdev->vhost_ops->vhost_backend_cleanup(hdev); + QLIST_REMOVE(hdev, entry); return r; } @@ -989,6 +1009,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) g_free(hdev->mem); g_free(hdev->mem_sections); hdev->vhost_ops->vhost_backend_cleanup(hdev); + QLIST_REMOVE(hdev, entry); } /* Stop processing guest IO notifications in qemu. diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 3a0f6e2dc2..7d4a8ad968 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -26,6 +26,7 @@ typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); typedef int (*vhost_backend_get_vq_index)(struct vhost_dev *dev, int idx); typedef int (*vhost_backend_set_vring_enable)(struct vhost_dev *dev, int enable); +typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev); typedef struct VhostOps { VhostBackendType backend_type; @@ -34,6 +35,7 @@ typedef struct VhostOps { vhost_backend_cleanup vhost_backend_cleanup; vhost_backend_get_vq_index vhost_backend_get_vq_index; vhost_backend_set_vring_enable vhost_backend_set_vring_enable; + vhost_backend_memslots_limit vhost_backend_memslots_limit; } VhostOps; extern const VhostOps user_ops; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index c3758f3c78..080831ed67 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -59,6 +59,7 @@ struct vhost_dev { const VhostOps *vhost_ops; void *opaque; struct vhost_log *log; + QLIST_ENTRY(vhost_dev) entry; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, @@ -83,4 +84,5 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, uint64_t features); void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, uint64_t features); +bool vhost_has_free_slot(void); #endif diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 85e4e81017..ce6ce11ba2 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -39,3 +39,4 @@ stub-obj-y += cpus.o stub-obj-y += kvm.o stub-obj-y += qmp_pc_dimm_device_list.o stub-obj-y += target-monitor-defs.o +stub-obj-y += vhost.o diff --git a/stubs/vhost.c b/stubs/vhost.c new file mode 100644 index 0000000000..d346b856f5 --- /dev/null +++ b/stubs/vhost.c @@ -0,0 +1,6 @@ +#include "hw/virtio/vhost.h" + +bool vhost_has_free_slot(void) +{ + return true; +} -- cgit v1.2.3-55-g7522 From f04cf9239addd12d6be9e7ff137262755e3680d3 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:19 +0200 Subject: util: add linux-only memfd fallback Implement memfd_create() fallback if not available in system libc. memfd_create() is still not included in glibc today, atlhough it's been available since Linux 3.17 in Oct 2014. memfd has numerous advantages over traditional shm/mmap for ipc memory sharing with fd handler, which we are going to make use of for vhost-user logging memory in following patches. The next patches are going to introduce helpers to use best practices of memfd usage and provide some compatibility fallback. memfd.c is thus temporarily useless and eventually empty if memfd_create() is provided by the system. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- include/qemu/memfd.h | 20 +++++++++++++++++++ util/Makefile.objs | 1 + util/memfd.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 include/qemu/memfd.h create mode 100644 util/memfd.c (limited to 'include') diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h new file mode 100644 index 0000000000..8b1fe6ad66 --- /dev/null +++ b/include/qemu/memfd.h @@ -0,0 +1,20 @@ +#ifndef QEMU_MEMFD_H +#define QEMU_MEMFD_H + +#include "config-host.h" + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#endif /* QEMU_MEMFD_H */ diff --git a/util/Makefile.objs b/util/Makefile.objs index f217901161..bab1722fc8 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -6,6 +6,7 @@ util-obj-$(CONFIG_POSIX) += oslib-posix.o util-obj-$(CONFIG_POSIX) += qemu-openpty.o util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o util-obj-$(CONFIG_WIN32) += event_notifier-win32.o +util-obj-$(CONFIG_POSIX) += memfd.o util-obj-$(CONFIG_WIN32) += oslib-win32.o util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o util-obj-y += envlist.o path.o module.o diff --git a/util/memfd.c b/util/memfd.c new file mode 100644 index 0000000000..c50969255f --- /dev/null +++ b/util/memfd.c @@ -0,0 +1,54 @@ +/* + * memfd.c + * + * Copyright (c) 2015 Red Hat, Inc. + * + * QEMU library functions on POSIX which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "qemu/memfd.h" + +#ifdef CONFIG_MEMFD +#include +#elif defined CONFIG_LINUX +#include +#include + +inline static int memfd_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} +#endif + +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif -- cgit v1.2.3-55-g7522 From d3592199ba3db504c6585115b9531b4cf7c50a0d Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:20 +0200 Subject: util: add memfd helpers Add qemu_memfd_alloc/free() helpers. The function helps to allocate and seal shared memory. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- include/qemu/memfd.h | 4 +++ util/memfd.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h index 8b1fe6ad66..950fb882a6 100644 --- a/include/qemu/memfd.h +++ b/include/qemu/memfd.h @@ -17,4 +17,8 @@ #define F_SEAL_WRITE 0x0008 /* prevent writes */ #endif +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, + int *fd); +void qemu_memfd_free(void *ptr, size_t size, int fd); + #endif /* QEMU_MEMFD_H */ diff --git a/util/memfd.c b/util/memfd.c index c50969255f..c1194832be 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -27,6 +27,11 @@ #include "qemu/osdep.h" +#include +#include + +#include + #include "qemu/memfd.h" #ifdef CONFIG_MEMFD @@ -35,7 +40,7 @@ #include #include -inline static int memfd_create(const char *name, unsigned int flags) +static int memfd_create(const char *name, unsigned int flags) { #ifdef __NR_memfd_create return syscall(__NR_memfd_create, name, flags); @@ -52,3 +57,68 @@ inline static int memfd_create(const char *name, unsigned int flags) #ifndef MFD_ALLOW_SEALING #define MFD_ALLOW_SEALING 0x0002U #endif + +/* + * This is a best-effort helper for shared memory allocation, with + * optional sealing. The helper will do his best to allocate using + * memfd with sealing, but may fallback on other methods without + * sealing. + */ +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, + int *fd) +{ + void *ptr; + int mfd = -1; + + *fd = -1; + +#ifdef CONFIG_LINUX + if (seals) { + mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); + } + + if (mfd == -1) { + /* some systems have memfd without sealing */ + mfd = memfd_create(name, MFD_CLOEXEC); + seals = 0; + } +#endif + + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + perror("ftruncate"); + close(mfd); + return NULL; + } + + if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) { + perror("fcntl"); + close(mfd); + return NULL; + } + } else { + perror("memfd"); + return NULL; + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +void qemu_memfd_free(void *ptr, size_t size, int fd) +{ + if (ptr) { + munmap(ptr, size); + } + + if (fd != -1) { + close(fd); + } +} -- cgit v1.2.3-55-g7522 From c2bea314f6a2870b847c79e2e11263c5f9334db7 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:23 +0200 Subject: vhost: add vhost_set_log_base op Split VHOST_SET_LOG_BASE call in a seperate function callback, so that type safety works and more arguments can be added in the next patches. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- hw/virtio/vhost-backend.c | 6 ++++++ hw/virtio/vhost-user.c | 16 +++++++++++++++- hw/virtio/vhost.c | 6 +++--- include/hw/virtio/vhost-backend.h | 3 +++ 4 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 910d8e00e3..5846c37f6d 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -67,6 +67,11 @@ static int vhost_kernel_memslots_limit(struct vhost_dev *dev) return limit; } +static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base) +{ + return vhost_kernel_call(dev, VHOST_SET_LOG_BASE, &base); +} + static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_call = vhost_kernel_call, @@ -74,6 +79,7 @@ static const VhostOps kernel_ops = { .vhost_backend_cleanup = vhost_kernel_cleanup, .vhost_backend_get_vq_index = vhost_kernel_get_vq_index, .vhost_backend_memslots_limit = vhost_kernel_memslots_limit, + .vhost_set_log_base = vhost_set_log_base, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 9585637d07..5d512898cf 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -242,7 +242,6 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, break; case VHOST_USER_SET_FEATURES: - case VHOST_USER_SET_LOG_BASE: case VHOST_USER_SET_PROTOCOL_FEATURES: msg.u64 = *((__u64 *) arg); msg.size = sizeof(m.u64); @@ -367,6 +366,20 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, return 0; } +static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_LOG_BASE, + .flags = VHOST_USER_VERSION, + .u64 = base, + .size = sizeof(m.u64), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + static int vhost_user_init(struct vhost_dev *dev, void *opaque) { unsigned long long features; @@ -453,4 +466,5 @@ const VhostOps user_ops = { .vhost_backend_get_vq_index = vhost_user_get_vq_index, .vhost_backend_set_vring_enable = vhost_user_set_vring_enable, .vhost_backend_memslots_limit = vhost_user_memslots_limit, + .vhost_set_log_base = vhost_set_log_base, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index f5ecaf0d02..f2b31b7d12 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -352,7 +352,7 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) /* inform backend of log switching, this must be done before releasing the current log, to ensure no logging is lost */ - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); + r = dev->vhost_ops->vhost_set_log_base(dev, log_base); assert(r >= 0); vhost_log_put(dev, true); dev->log = log; @@ -1167,8 +1167,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->log_size = vhost_get_log_size(hdev); hdev->log = vhost_log_get(hdev->log_size); log_base = (uintptr_t)hdev->log->log; - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, - hdev->log_size ? &log_base : NULL); + r = hdev->vhost_ops->vhost_set_log_base(hdev, + hdev->log_size ? log_base : 0); if (r < 0) { r = -errno; goto fail_log; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 7d4a8ad968..c9035736db 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -28,6 +28,8 @@ typedef int (*vhost_backend_get_vq_index)(struct vhost_dev *dev, int idx); typedef int (*vhost_backend_set_vring_enable)(struct vhost_dev *dev, int enable); typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev); +typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base); + typedef struct VhostOps { VhostBackendType backend_type; vhost_call vhost_call; @@ -36,6 +38,7 @@ typedef struct VhostOps { vhost_backend_get_vq_index vhost_backend_get_vq_index; vhost_backend_set_vring_enable vhost_backend_set_vring_enable; vhost_backend_memslots_limit vhost_backend_memslots_limit; + vhost_set_log_base_op vhost_set_log_base; } VhostOps; extern const VhostOps user_ops; -- cgit v1.2.3-55-g7522 From 1be0ac2109fbaca6e730ac578f0564507d173e2d Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:24 +0200 Subject: vhost-user: add vhost_user_requires_shm_log() Check if the backend has VHOST_USER_PROTOCOL_F_LOG_SHMFD feature and require a shared log. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- hw/virtio/vhost-user.c | 14 ++++++++++++-- include/hw/virtio/vhost-backend.h | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 5d512898cf..e10ce87299 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -25,9 +25,10 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL -#define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x3ULL +#define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -458,6 +459,14 @@ static int vhost_user_memslots_limit(struct vhost_dev *dev) return VHOST_MEMORY_MAX_NREGIONS; } +static bool vhost_user_requires_shm_log(struct vhost_dev *dev) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + return virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD); +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_call = vhost_user_call, @@ -467,4 +476,5 @@ const VhostOps user_ops = { .vhost_backend_set_vring_enable = vhost_user_set_vring_enable, .vhost_backend_memslots_limit = vhost_user_memslots_limit, .vhost_set_log_base = vhost_set_log_base, + .vhost_requires_shm_log = vhost_user_requires_shm_log, }; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index c9035736db..c33570aeea 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -11,6 +11,8 @@ #ifndef VHOST_BACKEND_H_ #define VHOST_BACKEND_H_ +#include + typedef enum VhostBackendType { VHOST_BACKEND_TYPE_NONE = 0, VHOST_BACKEND_TYPE_KERNEL = 1, @@ -29,6 +31,7 @@ typedef int (*vhost_backend_set_vring_enable)(struct vhost_dev *dev, int enable) typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev); typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base); +typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev); typedef struct VhostOps { VhostBackendType backend_type; @@ -39,6 +42,7 @@ typedef struct VhostOps { vhost_backend_set_vring_enable vhost_backend_set_vring_enable; vhost_backend_memslots_limit vhost_backend_memslots_limit; vhost_set_log_base_op vhost_set_log_base; + vhost_requires_shm_log_op vhost_requires_shm_log; } VhostOps; extern const VhostOps user_ops; -- cgit v1.2.3-55-g7522 From 15324404f68cde561d0eeee3d18a7b203f57f095 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:25 +0200 Subject: vhost: alloc shareable log If the backend is requires it, allocate shareable memory. vhost_log_get() now uses 2 globals "vhost_log" and "vhost_log_shm", that way there is a common non-shareable log and a common shareable one. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- hw/virtio/vhost.c | 57 ++++++++++++++++++++++++++++++++++++++--------- include/hw/virtio/vhost.h | 3 ++- 2 files changed, 49 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index f2b31b7d12..aec92d4691 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -18,6 +18,7 @@ #include "qemu/atomic.h" #include "qemu/range.h" #include "qemu/error-report.h" +#include "qemu/memfd.h" #include #include "exec/address-spaces.h" #include "hw/virtio/virtio-bus.h" @@ -25,6 +26,7 @@ #include "migration/migration.h" static struct vhost_log *vhost_log; +static struct vhost_log *vhost_log_shm; static unsigned int used_memslots; static QLIST_HEAD(, vhost_dev) vhost_devices = @@ -302,25 +304,46 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) } return log_size; } -static struct vhost_log *vhost_log_alloc(uint64_t size) + +static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) { - struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log))); + struct vhost_log *log; + uint64_t logsize = size * sizeof(*(log->log)); + int fd = -1; + + log = g_new0(struct vhost_log, 1); + if (share) { + log->log = qemu_memfd_alloc("vhost-log", logsize, + F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, + &fd); + memset(log->log, 0, logsize); + } else { + log->log = g_malloc0(logsize); + } log->size = size; log->refcnt = 1; + log->fd = fd; return log; } -static struct vhost_log *vhost_log_get(uint64_t size) +static struct vhost_log *vhost_log_get(uint64_t size, bool share) { - if (!vhost_log || vhost_log->size != size) { - vhost_log = vhost_log_alloc(size); + struct vhost_log *log = share ? vhost_log_shm : vhost_log; + + if (!log || log->size != size) { + log = vhost_log_alloc(size, share); + if (share) { + vhost_log_shm = log; + } else { + vhost_log = log; + } } else { - ++vhost_log->refcnt; + ++log->refcnt; } - return vhost_log; + return log; } static void vhost_log_put(struct vhost_dev *dev, bool sync) @@ -337,16 +360,29 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) if (dev->log_size && sync) { vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } + if (vhost_log == log) { + g_free(log->log); vhost_log = NULL; + } else if (vhost_log_shm == log) { + qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), + log->fd); + vhost_log_shm = NULL; } + g_free(log); } } -static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) +static bool vhost_dev_log_is_shared(struct vhost_dev *dev) +{ + return dev->vhost_ops->vhost_requires_shm_log && + dev->vhost_ops->vhost_requires_shm_log(dev); +} + +static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) { - struct vhost_log *log = vhost_log_get(size); + struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); uint64_t log_base = (uintptr_t)log->log; int r; @@ -1165,7 +1201,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = vhost_log_get(hdev->log_size); + hdev->log = vhost_log_get(hdev->log_size, + vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; r = hdev->vhost_ops->vhost_set_log_base(hdev, hdev->log_size ? log_base : 0); diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 080831ed67..6bf759f5c7 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -31,7 +31,8 @@ typedef unsigned long vhost_log_chunk_t; struct vhost_log { unsigned long long size; int refcnt; - vhost_log_chunk_t log[0]; + int fd; + vhost_log_chunk_t *log; }; struct vhost_memory; -- cgit v1.2.3-55-g7522 From 9a78a5dd272a190d262b5ba5d4721ab93d48c564 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:26 +0200 Subject: vhost-user: send log shm fd along with log_base Send the shm for the dirty pages logging if the backend supports VHOST_USER_PROTOCOL_F_LOG_SHMFD. Wait for a reply to make sure the old log is no longer used. Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- hw/virtio/vhost-backend.c | 3 ++- hw/virtio/vhost-user.c | 27 +++++++++++++++++++++++++-- hw/virtio/vhost.c | 5 +++-- include/hw/virtio/vhost-backend.h | 4 +++- 4 files changed, 33 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 5846c37f6d..5a2f1af137 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -67,7 +67,8 @@ static int vhost_kernel_memslots_limit(struct vhost_dev *dev) return limit; } -static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base) +static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) { return vhost_kernel_call(dev, VHOST_SET_LOG_BASE, &base); } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index e10ce87299..2709c4ee39 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -367,8 +367,13 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, return 0; } -static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base) +static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) { + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + bool shmfd = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD); VhostUserMsg msg = { .request = VHOST_USER_SET_LOG_BASE, .flags = VHOST_USER_VERSION, @@ -376,7 +381,25 @@ static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base) .size = sizeof(m.u64), }; - vhost_user_write(dev, &msg, NULL, 0); + if (shmfd && log->fd != -1) { + fds[fd_num++] = log->fd; + } + + vhost_user_write(dev, &msg, fds, fd_num); + + if (shmfd) { + msg.size = 0; + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != VHOST_USER_SET_LOG_BASE) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_SET_LOG_BASE, msg.request); + return -1; + } + } return 0; } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index aec92d4691..65c09bf888 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -388,7 +388,7 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) /* inform backend of log switching, this must be done before releasing the current log, to ensure no logging is lost */ - r = dev->vhost_ops->vhost_set_log_base(dev, log_base); + r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log); assert(r >= 0); vhost_log_put(dev, true); dev->log = log; @@ -1205,7 +1205,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; r = hdev->vhost_ops->vhost_set_log_base(hdev, - hdev->log_size ? log_base : 0); + hdev->log_size ? log_base : 0, + hdev->log); if (r < 0) { r = -errno; goto fail_log; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index c33570aeea..375625f5fb 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -21,6 +21,7 @@ typedef enum VhostBackendType { } VhostBackendType; struct vhost_dev; +struct vhost_log; typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request, void *arg); @@ -30,7 +31,8 @@ typedef int (*vhost_backend_get_vq_index)(struct vhost_dev *dev, int idx); typedef int (*vhost_backend_set_vring_enable)(struct vhost_dev *dev, int enable); typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev); -typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base); +typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log); typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev); typedef struct VhostOps { -- cgit v1.2.3-55-g7522 From 21e704256dea24baf93b169f5d7b0e7fe684bd15 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:28 +0200 Subject: vhost: use a function for each call Replace the generic vhost_call() by specific functions for each function call to help with type safety and changing arguments. While doing this, I found that "unsigned long long" and "uint64_t" were used interchangeably and causing compilation warnings, using uint64_t instead, as the vhost & protocol specifies. Signed-off-by: Marc-André Lureau [Fix enum usage and MQ - Thibaut Collet] Signed-off-by: Thibaut Collet Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- hw/net/vhost_net.c | 16 +- hw/scsi/vhost-scsi.c | 7 +- hw/virtio/vhost-backend.c | 132 +++++++++- hw/virtio/vhost-user.c | 522 ++++++++++++++++++++++---------------- hw/virtio/vhost.c | 36 +-- include/hw/virtio/vhost-backend.h | 63 ++++- include/hw/virtio/vhost.h | 12 +- 7 files changed, 509 insertions(+), 279 deletions(-) (limited to 'include') diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 2bce89129d..1ab4133769 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -252,8 +252,7 @@ static int vhost_net_start_one(struct vhost_net *net, file.fd = net->backend; for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { const VhostOps *vhost_ops = net->dev.vhost_ops; - r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, - &file); + r = vhost_ops->vhost_net_set_backend(&net->dev, &file); if (r < 0) { r = -errno; goto fail; @@ -266,8 +265,7 @@ fail: if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { while (file.index-- > 0) { const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, - &file); + int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); assert(r >= 0); } } @@ -289,15 +287,13 @@ static void vhost_net_stop_one(struct vhost_net *net, if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, - &file); + int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); assert(r >= 0); } } else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_DEVICE, - NULL); + int r = vhost_ops->vhost_reset_device(&net->dev); assert(r >= 0); } } @@ -428,8 +424,8 @@ int vhost_set_vring_enable(NetClientState *nc, int enable) VHostNetState *net = get_vhost_net(nc); const VhostOps *vhost_ops = net->dev.vhost_ops; - if (vhost_ops->vhost_backend_set_vring_enable) { - return vhost_ops->vhost_backend_set_vring_enable(&net->dev, enable); + if (vhost_ops->vhost_set_vring_enable) { + return vhost_ops->vhost_set_vring_enable(&net->dev, enable); } return 0; diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index fb7983d9dc..00cdac62f9 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -46,7 +46,7 @@ static int vhost_scsi_set_endpoint(VHostSCSI *s) memset(&backend, 0, sizeof(backend)); pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); - ret = vhost_ops->vhost_call(&s->dev, VHOST_SCSI_SET_ENDPOINT, &backend); + ret = vhost_ops->vhost_scsi_set_endpoint(&s->dev, &backend); if (ret < 0) { return -errno; } @@ -61,7 +61,7 @@ static void vhost_scsi_clear_endpoint(VHostSCSI *s) memset(&backend, 0, sizeof(backend)); pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); - vhost_ops->vhost_call(&s->dev, VHOST_SCSI_CLEAR_ENDPOINT, &backend); + vhost_ops->vhost_scsi_clear_endpoint(&s->dev, &backend); } static int vhost_scsi_start(VHostSCSI *s) @@ -77,8 +77,7 @@ static int vhost_scsi_start(VHostSCSI *s) return -ENOSYS; } - ret = vhost_ops->vhost_call(&s->dev, - VHOST_SCSI_GET_ABI_VERSION, &abi_version); + ret = vhost_ops->vhost_scsi_get_abi_version(&s->dev, &abi_version); if (ret < 0) { return -errno; } diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 5a2f1af137..1d5f684eb0 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -43,13 +43,6 @@ static int vhost_kernel_cleanup(struct vhost_dev *dev) return close(fd); } -static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) -{ - assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); - - return idx - dev->vq_index; -} - static int vhost_kernel_memslots_limit(struct vhost_dev *dev) { int limit = 64; @@ -67,20 +60,135 @@ static int vhost_kernel_memslots_limit(struct vhost_dev *dev) return limit; } -static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base, - struct vhost_log *log) +static int vhost_kernel_net_set_backend(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_kernel_call(dev, VHOST_NET_SET_BACKEND, file); +} + +static int vhost_kernel_scsi_set_endpoint(struct vhost_dev *dev, + struct vhost_scsi_target *target) +{ + return vhost_kernel_call(dev, VHOST_SCSI_SET_ENDPOINT, target); +} + +static int vhost_kernel_scsi_clear_endpoint(struct vhost_dev *dev, + struct vhost_scsi_target *target) +{ + return vhost_kernel_call(dev, VHOST_SCSI_CLEAR_ENDPOINT, target); +} + +static int vhost_kernel_scsi_get_abi_version(struct vhost_dev *dev, int *version) +{ + return vhost_kernel_call(dev, VHOST_SCSI_GET_ABI_VERSION, version); +} + +static int vhost_kernel_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) { return vhost_kernel_call(dev, VHOST_SET_LOG_BASE, &base); } +static int vhost_kernel_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) +{ + return vhost_kernel_call(dev, VHOST_SET_MEM_TABLE, mem); +} + +static int vhost_kernel_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_ADDR, addr); +} + +static int vhost_kernel_set_vring_endian(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_ENDIAN, ring); +} + +static int vhost_kernel_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_NUM, ring); +} + +static int vhost_kernel_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_BASE, ring); +} + +static int vhost_kernel_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_GET_VRING_BASE, ring); +} + +static int vhost_kernel_set_vring_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_KICK, file); +} + +static int vhost_kernel_set_vring_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_CALL, file); +} + +static int vhost_kernel_set_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features); +} + +static int vhost_kernel_get_features(struct vhost_dev *dev, + uint64_t *features) +{ + return vhost_kernel_call(dev, VHOST_GET_FEATURES, features); +} + +static int vhost_kernel_set_owner(struct vhost_dev *dev) +{ + return vhost_kernel_call(dev, VHOST_SET_OWNER, NULL); +} + +static int vhost_kernel_reset_device(struct vhost_dev *dev) +{ + return vhost_kernel_call(dev, VHOST_RESET_DEVICE, NULL); +} + +static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) +{ + assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); + + return idx - dev->vq_index; +} + static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, - .vhost_call = vhost_kernel_call, .vhost_backend_init = vhost_kernel_init, .vhost_backend_cleanup = vhost_kernel_cleanup, - .vhost_backend_get_vq_index = vhost_kernel_get_vq_index, .vhost_backend_memslots_limit = vhost_kernel_memslots_limit, - .vhost_set_log_base = vhost_set_log_base, + .vhost_net_set_backend = vhost_kernel_net_set_backend, + .vhost_scsi_set_endpoint = vhost_kernel_scsi_set_endpoint, + .vhost_scsi_clear_endpoint = vhost_kernel_scsi_clear_endpoint, + .vhost_scsi_get_abi_version = vhost_kernel_scsi_get_abi_version, + .vhost_set_log_base = vhost_kernel_set_log_base, + .vhost_set_mem_table = vhost_kernel_set_mem_table, + .vhost_set_vring_addr = vhost_kernel_set_vring_addr, + .vhost_set_vring_endian = vhost_kernel_set_vring_endian, + .vhost_set_vring_num = vhost_kernel_set_vring_num, + .vhost_set_vring_base = vhost_kernel_set_vring_base, + .vhost_get_vring_base = vhost_kernel_get_vring_base, + .vhost_set_vring_kick = vhost_kernel_set_vring_kick, + .vhost_set_vring_call = vhost_kernel_set_vring_call, + .vhost_set_features = vhost_kernel_set_features, + .vhost_get_features = vhost_kernel_get_features, + .vhost_set_owner = vhost_kernel_set_owner, + .vhost_reset_device = vhost_kernel_reset_device, + .vhost_get_vq_index = vhost_kernel_get_vq_index, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index fb3aa40d59..16d8e80a2a 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -99,37 +99,6 @@ static bool ioeventfd_enabled(void) return kvm_enabled() && kvm_eventfds_enabled(); } -static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { - -1, /* VHOST_USER_NONE */ - VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ - VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ - VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ - VHOST_RESET_DEVICE, /* VHOST_USER_RESET_DEVICE */ - VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ - VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ - VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ - VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ - VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ - VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ - VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ - VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ - VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ -}; - -static VhostUserRequest vhost_user_request_translate(unsigned long int request) -{ - VhostUserRequest idx; - - for (idx = 0; idx < VHOST_USER_MAX; idx++) { - if (ioctl_to_vhost_user_request[idx] == request) { - break; - } - } - - return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; -} - static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) { CharDriverState *chr = dev->opaque; @@ -176,12 +145,35 @@ fail: return -1; } +static bool vhost_user_one_time_request(VhostUserRequest request) +{ + switch (request) { + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_DEVICE: + case VHOST_USER_SET_MEM_TABLE: + case VHOST_USER_GET_QUEUE_NUM: + return true; + default: + return false; + } +} + +/* most non-init callers ignore the error */ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, int *fds, int fd_num) { CharDriverState *chr = dev->opaque; int size = VHOST_USER_HDR_SIZE + msg->size; + /* + * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, + * we just need send it once in the first time. For later such + * request, we just ignore it. + */ + if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { + return 0; + } + if (fd_num) { qemu_chr_fe_set_msgfds(chr, fds, fd_num); } @@ -190,231 +182,321 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 0 : -1; } -static bool vhost_user_one_time_request(VhostUserRequest request) +static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) { - switch (request) { - case VHOST_USER_SET_OWNER: - case VHOST_USER_RESET_DEVICE: - case VHOST_USER_SET_MEM_TABLE: - case VHOST_USER_GET_QUEUE_NUM: - return true; - default: - return false; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + bool shmfd = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD); + VhostUserMsg msg = { + .request = VHOST_USER_SET_LOG_BASE, + .flags = VHOST_USER_VERSION, + .u64 = base, + .size = sizeof(m.u64), + }; + + if (shmfd && log->fd != -1) { + fds[fd_num++] = log->fd; + } + + vhost_user_write(dev, &msg, fds, fd_num); + + if (shmfd) { + msg.size = 0; + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != VHOST_USER_SET_LOG_BASE) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_SET_LOG_BASE, msg.request); + return -1; + } } + + return 0; } -static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, - void *arg) +static int vhost_user_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) { - VhostUserMsg msg; - VhostUserRequest msg_request; - struct vhost_vring_file *file = 0; - int need_reply = 0; int fds[VHOST_MEMORY_MAX_NREGIONS]; int i, fd; size_t fd_num = 0; + VhostUserMsg msg = { + .request = VHOST_USER_SET_MEM_TABLE, + .flags = VHOST_USER_VERSION, + }; - assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); - - /* only translate vhost ioctl requests */ - if (request > VHOST_USER_MAX) { - msg_request = vhost_user_request_translate(request); - } else { - msg_request = request; + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + ram_addr_t ram_addr; + + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); + qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, + &ram_addr); + fd = qemu_get_ram_fd(ram_addr); + if (fd > 0) { + msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; + msg.memory.regions[fd_num].memory_size = reg->memory_size; + msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; + msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - + (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); + fds[fd_num++] = fd; + } } - /* - * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, - * we just need send it once in the first time. For later such - * request, we just ignore it. - */ - if (vhost_user_one_time_request(msg_request) && dev->vq_index != 0) { - return 0; + msg.memory.nregions = fd_num; + + if (!fd_num) { + error_report("Failed initializing vhost-user memory map, " + "consider using -object memory-backend-file share=on"); + return -1; } - msg.request = msg_request; - msg.flags = VHOST_USER_VERSION; - msg.size = 0; + msg.size = sizeof(m.memory.nregions); + msg.size += sizeof(m.memory.padding); + msg.size += fd_num * sizeof(VhostUserMemoryRegion); - switch (msg_request) { - case VHOST_USER_GET_FEATURES: - case VHOST_USER_GET_PROTOCOL_FEATURES: - case VHOST_USER_GET_QUEUE_NUM: - need_reply = 1; - break; + vhost_user_write(dev, &msg, fds, fd_num); - case VHOST_USER_SET_FEATURES: - case VHOST_USER_SET_PROTOCOL_FEATURES: - msg.u64 = *((__u64 *) arg); - msg.size = sizeof(m.u64); - break; + return 0; +} - case VHOST_USER_SET_OWNER: - case VHOST_USER_RESET_DEVICE: - break; +static int vhost_user_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_VRING_ADDR, + .flags = VHOST_USER_VERSION, + .addr = *addr, + .size = sizeof(*addr), + }; - case VHOST_USER_SET_MEM_TABLE: - for (i = 0; i < dev->mem->nregions; ++i) { - struct vhost_memory_region *reg = dev->mem->regions + i; - ram_addr_t ram_addr; - - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); - qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); - fd = qemu_get_ram_fd(ram_addr); - if (fd > 0) { - msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; - msg.memory.regions[fd_num].memory_size = reg->memory_size; - msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; - msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); - fds[fd_num++] = fd; - } - } + vhost_user_write(dev, &msg, NULL, 0); - msg.memory.nregions = fd_num; + return 0; +} - if (!fd_num) { - error_report("Failed initializing vhost-user memory map, " - "consider using -object memory-backend-file share=on"); - return -1; - } +static int vhost_user_set_vring_endian(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + error_report("vhost-user trying to send unhandled ioctl"); + return -1; +} - msg.size = sizeof(m.memory.nregions); - msg.size += sizeof(m.memory.padding); - msg.size += fd_num * sizeof(VhostUserMemoryRegion); - - break; - - case VHOST_USER_SET_LOG_FD: - fds[fd_num++] = *((int *) arg); - break; - - case VHOST_USER_SET_VRING_NUM: - case VHOST_USER_SET_VRING_BASE: - case VHOST_USER_SET_VRING_ENABLE: - memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); - msg.size = sizeof(m.state); - break; - - case VHOST_USER_GET_VRING_BASE: - memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); - msg.size = sizeof(m.state); - need_reply = 1; - break; - - case VHOST_USER_SET_VRING_ADDR: - memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); - msg.size = sizeof(m.addr); - break; - - case VHOST_USER_SET_VRING_KICK: - case VHOST_USER_SET_VRING_CALL: - case VHOST_USER_SET_VRING_ERR: - file = arg; - msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; - msg.size = sizeof(m.u64); - if (ioeventfd_enabled() && file->fd > 0) { - fds[fd_num++] = file->fd; - } else { - msg.u64 |= VHOST_USER_VRING_NOFD_MASK; - } - break; - default: - error_report("vhost-user trying to send unhandled ioctl"); +static int vhost_set_vring(struct vhost_dev *dev, + unsigned long int request, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .state = *ring, + .size = sizeof(*ring), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); +} + +static int vhost_user_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); +} + +static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) +{ + struct vhost_vring_state state = { + .index = dev->vq_index, + .num = enable, + }; + + if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { return -1; - break; } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); +} + + +static int vhost_user_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = VHOST_USER_GET_VRING_BASE, + .flags = VHOST_USER_VERSION, + .state = *ring, + .size = sizeof(*ring), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + if (vhost_user_read(dev, &msg) < 0) { return 0; } - if (need_reply) { - if (vhost_user_read(dev, &msg) < 0) { - return 0; - } - - if (msg_request != msg.request) { - error_report("Received unexpected msg type." - " Expected %d received %d", msg_request, msg.request); - return -1; - } + if (msg.request != VHOST_USER_GET_VRING_BASE) { + error_report("Received unexpected msg type. Expected %d received %d", + VHOST_USER_GET_VRING_BASE, msg.request); + return -1; + } - switch (msg_request) { - case VHOST_USER_GET_FEATURES: - case VHOST_USER_GET_PROTOCOL_FEATURES: - case VHOST_USER_GET_QUEUE_NUM: - if (msg.size != sizeof(m.u64)) { - error_report("Received bad msg size."); - return -1; - } - *((__u64 *) arg) = msg.u64; - break; - case VHOST_USER_GET_VRING_BASE: - if (msg.size != sizeof(m.state)) { - error_report("Received bad msg size."); - return -1; - } - memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); - break; - default: - error_report("Received unexpected msg type."); - return -1; - break; - } + if (msg.size != sizeof(m.state)) { + error_report("Received bad msg size."); + return -1; } + *ring = msg.state; + return 0; } -static int vhost_set_log_base(struct vhost_dev *dev, uint64_t base, - struct vhost_log *log) +static int vhost_set_vring_file(struct vhost_dev *dev, + VhostUserRequest request, + struct vhost_vring_file *file) { int fds[VHOST_MEMORY_MAX_NREGIONS]; size_t fd_num = 0; - bool shmfd = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_LOG_SHMFD); VhostUserMsg msg = { - .request = VHOST_USER_SET_LOG_BASE, + .request = request, .flags = VHOST_USER_VERSION, - .u64 = base, + .u64 = file->index & VHOST_USER_VRING_IDX_MASK, .size = sizeof(m.u64), }; - if (shmfd && log->fd != -1) { - fds[fd_num++] = log->fd; + if (ioeventfd_enabled() && file->fd > 0) { + fds[fd_num++] = file->fd; + } else { + msg.u64 |= VHOST_USER_VRING_NOFD_MASK; } vhost_user_write(dev, &msg, fds, fd_num); - if (shmfd) { - msg.size = 0; - if (vhost_user_read(dev, &msg) < 0) { - return 0; - } + return 0; +} - if (msg.request != VHOST_USER_SET_LOG_BASE) { - error_report("Received unexpected msg type. " - "Expected %d received %d", - VHOST_USER_SET_LOG_BASE, msg.request); - return -1; - } +static int vhost_user_set_vring_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); +} + +static int vhost_user_set_vring_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); +} + +static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .u64 = u64, + .size = sizeof(m.u64), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_set_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); +} + +static int vhost_user_set_protocol_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); +} + +static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + }; + + if (vhost_user_one_time_request(request) && dev->vq_index != 0) { + return 0; } + vhost_user_write(dev, &msg, NULL, 0); + + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != request) { + error_report("Received unexpected msg type. Expected %d received %d", + request, msg.request); + return -1; + } + + if (msg.size != sizeof(m.u64)) { + error_report("Received bad msg size."); + return -1; + } + + *u64 = msg.u64; + + return 0; +} + +static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) +{ + return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); +} + +static int vhost_user_set_owner(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_OWNER, + .flags = VHOST_USER_VERSION, + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_reset_device(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_RESET_DEVICE, + .flags = VHOST_USER_VERSION, + }; + + vhost_user_write(dev, &msg, NULL, 0); + return 0; } static int vhost_user_init(struct vhost_dev *dev, void *opaque) { - unsigned long long features; + uint64_t features; int err; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); dev->opaque = opaque; - err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features); + err = vhost_user_get_features(dev, &features); if (err < 0) { return err; } @@ -422,21 +504,22 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; - err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features); + err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, + &features); if (err < 0) { return err; } dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; - err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES, - &dev->protocol_features); + err = vhost_user_set_protocol_features(dev, dev->protocol_features); if (err < 0) { return err; } /* query the max queues we support if backend supports Multiple Queue */ if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { - err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues); + err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, + &dev->max_queues); if (err < 0) { return err; } @@ -454,22 +537,6 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) return 0; } -static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) -{ - struct vhost_vring_state state = { - .index = dev->vq_index, - .num = enable, - }; - - assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); - - if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { - return -1; - } - - return vhost_user_call(dev, VHOST_USER_SET_VRING_ENABLE, &state); -} - static int vhost_user_cleanup(struct vhost_dev *dev) { assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); @@ -501,12 +568,23 @@ static bool vhost_user_requires_shm_log(struct vhost_dev *dev) const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, - .vhost_call = vhost_user_call, .vhost_backend_init = vhost_user_init, .vhost_backend_cleanup = vhost_user_cleanup, - .vhost_backend_get_vq_index = vhost_user_get_vq_index, - .vhost_backend_set_vring_enable = vhost_user_set_vring_enable, .vhost_backend_memslots_limit = vhost_user_memslots_limit, - .vhost_set_log_base = vhost_set_log_base, + .vhost_set_log_base = vhost_user_set_log_base, + .vhost_set_mem_table = vhost_user_set_mem_table, + .vhost_set_vring_addr = vhost_user_set_vring_addr, + .vhost_set_vring_endian = vhost_user_set_vring_endian, + .vhost_set_vring_num = vhost_user_set_vring_num, + .vhost_set_vring_base = vhost_user_set_vring_base, + .vhost_get_vring_base = vhost_user_get_vring_base, + .vhost_set_vring_kick = vhost_user_set_vring_kick, + .vhost_set_vring_call = vhost_user_set_vring_call, + .vhost_set_features = vhost_user_set_features, + .vhost_get_features = vhost_user_get_features, + .vhost_set_owner = vhost_user_set_owner, + .vhost_reset_device = vhost_user_reset_device, + .vhost_get_vq_index = vhost_user_get_vq_index, + .vhost_set_vring_enable = vhost_user_set_vring_enable, .vhost_requires_shm_log = vhost_user_requires_shm_log, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index a553256081..2e78e4b706 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -555,7 +555,7 @@ static void vhost_commit(MemoryListener *listener) } if (!dev->log_enabled) { - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); assert(r >= 0); dev->memory_changed = false; return; @@ -568,7 +568,7 @@ static void vhost_commit(MemoryListener *listener) if (dev->log_size < log_size) { vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); } - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); assert(r >= 0); /* To log less, can only decrease log size after table update. */ if (dev->log_size > log_size + VHOST_LOG_BUFFER) { @@ -636,7 +636,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, .log_guest_addr = vq->used_phys, .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, }; - int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr); + int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr); if (r < 0) { return -errno; } @@ -650,7 +650,7 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) if (enable_log) { features |= 0x1ULL << VHOST_F_LOG_ALL; } - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features); + r = dev->vhost_ops->vhost_set_features(dev, features); return r < 0 ? -errno : 0; } @@ -755,7 +755,7 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, .num = is_big_endian }; - if (!dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ENDIAN, &s)) { + if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) { return 0; } @@ -774,7 +774,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, { hwaddr s, l, a; int r; - int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, idx); + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_file file = { .index = vhost_vq_index }; @@ -785,13 +785,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, vq->num = state.num = virtio_queue_get_num(vdev, idx); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state); + r = dev->vhost_ops->vhost_set_vring_num(dev, &state); if (r) { return -errno; } state.num = virtio_queue_get_last_avail_idx(vdev, idx); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_set_vring_base(dev, &state); if (r) { return -errno; } @@ -843,7 +843,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file); + r = dev->vhost_ops->vhost_set_vring_kick(dev, &file); if (r) { r = -errno; goto fail_kick; @@ -876,13 +876,13 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned idx) { - int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, idx); + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { .index = vhost_vq_index, }; int r; - r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_get_vring_base(dev, &state); if (r < 0) { fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); fflush(stderr); @@ -929,7 +929,7 @@ static void vhost_eventfd_del(MemoryListener *listener, static int vhost_virtqueue_init(struct vhost_dev *dev, struct vhost_virtqueue *vq, int n) { - int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, n); + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n); struct vhost_vring_file file = { .index = vhost_vq_index, }; @@ -939,7 +939,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(&vq->masked_notifier); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file); + r = dev->vhost_ops->vhost_set_vring_call(dev, &file); if (r) { r = -errno; goto fail_call; @@ -981,12 +981,12 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, } QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); + r = hdev->vhost_ops->vhost_set_owner(hdev); if (r < 0) { goto fail; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features); + r = hdev->vhost_ops->vhost_get_features(hdev, &features); if (r < 0) { goto fail; } @@ -1147,8 +1147,8 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); } - file.index = hdev->vhost_ops->vhost_backend_get_vq_index(hdev, n); - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file); + file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); + r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); assert(r >= 0); } @@ -1190,7 +1190,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { goto fail_features; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); if (r < 0) { r = -errno; goto fail_mem; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 375625f5fb..ad1948101d 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -22,28 +22,77 @@ typedef enum VhostBackendType { struct vhost_dev; struct vhost_log; +struct vhost_memory; +struct vhost_vring_file; +struct vhost_vring_state; +struct vhost_vring_addr; +struct vhost_scsi_target; -typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request, - void *arg); typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); -typedef int (*vhost_backend_get_vq_index)(struct vhost_dev *dev, int idx); -typedef int (*vhost_backend_set_vring_enable)(struct vhost_dev *dev, int enable); typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev); +typedef int (*vhost_net_set_backend_op)(struct vhost_dev *dev, + struct vhost_vring_file *file); +typedef int (*vhost_scsi_set_endpoint_op)(struct vhost_dev *dev, + struct vhost_scsi_target *target); +typedef int (*vhost_scsi_clear_endpoint_op)(struct vhost_dev *dev, + struct vhost_scsi_target *target); +typedef int (*vhost_scsi_get_abi_version_op)(struct vhost_dev *dev, + int *version); typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base, struct vhost_log *log); +typedef int (*vhost_set_mem_table_op)(struct vhost_dev *dev, + struct vhost_memory *mem); +typedef int (*vhost_set_vring_addr_op)(struct vhost_dev *dev, + struct vhost_vring_addr *addr); +typedef int (*vhost_set_vring_endian_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_set_vring_num_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_set_vring_base_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_get_vring_base_op)(struct vhost_dev *dev, + struct vhost_vring_state *ring); +typedef int (*vhost_set_vring_kick_op)(struct vhost_dev *dev, + struct vhost_vring_file *file); +typedef int (*vhost_set_vring_call_op)(struct vhost_dev *dev, + struct vhost_vring_file *file); +typedef int (*vhost_set_features_op)(struct vhost_dev *dev, + uint64_t features); +typedef int (*vhost_get_features_op)(struct vhost_dev *dev, + uint64_t *features); +typedef int (*vhost_set_owner_op)(struct vhost_dev *dev); +typedef int (*vhost_reset_device_op)(struct vhost_dev *dev); +typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx); +typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev, + int enable); typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev); typedef struct VhostOps { VhostBackendType backend_type; - vhost_call vhost_call; vhost_backend_init vhost_backend_init; vhost_backend_cleanup vhost_backend_cleanup; - vhost_backend_get_vq_index vhost_backend_get_vq_index; - vhost_backend_set_vring_enable vhost_backend_set_vring_enable; vhost_backend_memslots_limit vhost_backend_memslots_limit; + vhost_net_set_backend_op vhost_net_set_backend; + vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint; + vhost_scsi_clear_endpoint_op vhost_scsi_clear_endpoint; + vhost_scsi_get_abi_version_op vhost_scsi_get_abi_version; vhost_set_log_base_op vhost_set_log_base; + vhost_set_mem_table_op vhost_set_mem_table; + vhost_set_vring_addr_op vhost_set_vring_addr; + vhost_set_vring_endian_op vhost_set_vring_endian; + vhost_set_vring_num_op vhost_set_vring_num; + vhost_set_vring_base_op vhost_set_vring_base; + vhost_get_vring_base_op vhost_get_vring_base; + vhost_set_vring_kick_op vhost_set_vring_kick; + vhost_set_vring_call_op vhost_set_vring_call; + vhost_set_features_op vhost_set_features; + vhost_get_features_op vhost_get_features; + vhost_set_owner_op vhost_set_owner; + vhost_reset_device_op vhost_reset_device; + vhost_get_vq_index_op vhost_get_vq_index; + vhost_set_vring_enable_op vhost_set_vring_enable; vhost_requires_shm_log_op vhost_requires_shm_log; } VhostOps; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 6bf759f5c7..7437fd476a 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -45,14 +45,14 @@ struct vhost_dev { int nvqs; /* the first virtqueue which would be used by this vhost dev */ int vq_index; - unsigned long long features; - unsigned long long acked_features; - unsigned long long backend_features; - unsigned long long protocol_features; - unsigned long long max_queues; + uint64_t features; + uint64_t acked_features; + uint64_t backend_features; + uint64_t protocol_features; + uint64_t max_queues; bool started; bool log_enabled; - unsigned long long log_size; + uint64_t log_size; Error *migration_blocker; bool memory_changed; hwaddr mem_changed_start_addr; -- cgit v1.2.3-55-g7522 From 3e866365e1eb6bcfa2d01c21debb05b9e47a47f7 Mon Sep 17 00:00:00 2001 From: Thibaut Collet Date: Fri, 9 Oct 2015 17:17:32 +0200 Subject: vhost user: add rarp sending after live migration for legacy guest A new vhost user message is added to allow QEMU to ask to vhost user backend to broadcast a fake RARP after live migration for guest without GUEST_ANNOUNCE capability. This new message is sent only if the backend supports the new VHOST_USER_PROTOCOL_F_RARP protocol feature. The payload of this new message is the MAC address of the guest (not known by the backend). The MAC address is copied in the first 6 bytes of a u64 to avoid to create a new payload message type. This new message has no equivalent ioctl so a new callback is added in the userOps structure to send the request. Upon reception of this new message the vhost user backend must generate and broadcast a fake RARP request to notify the migration is terminated. Signed-off-by: Thibaut Collet [Rebased and fixed checkpatch errors - Marc-André] Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- docs/specs/vhost-user.txt | 15 +++++++++++++++ hw/net/vhost_net.c | 17 +++++++++++++++++ hw/virtio/vhost-user.c | 32 +++++++++++++++++++++++++++++++- include/hw/virtio/vhost-backend.h | 3 +++ include/net/vhost_net.h | 1 + net/vhost-user.c | 24 ++++++++++++++++++++++-- 6 files changed, 89 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt index e0292a0e65..e0d71e27e6 100644 --- a/docs/specs/vhost-user.txt +++ b/docs/specs/vhost-user.txt @@ -194,6 +194,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#define VHOST_USER_PROTOCOL_F_RARP 2 Message types ------------- @@ -381,3 +382,17 @@ Message types Master payload: vring state description Signal slave to enable or disable corresponding vring. + + * VHOST_USER_SEND_RARP + + Id: 19 + Equivalent ioctl: N/A + Master payload: u64 + + Ask vhost user backend to broadcast a fake RARP to notify the migration + is terminated for guest that does not support GUEST_ANNOUNCE. + Only legal if feature bit VHOST_USER_F_PROTOCOL_FEATURES is present in + VHOST_USER_GET_FEATURES and protocol feature bit VHOST_USER_PROTOCOL_F_RARP + is present in VHOST_USER_GET_PROTOCOL_FEATURES. + The first 6 bytes of the payload contain the mac address of the guest to + allow the vhost user backend to construct and broadcast the fake RARP. diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 840f443065..d91b7b155e 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -388,6 +388,18 @@ void vhost_net_cleanup(struct vhost_net *net) g_free(net); } +int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) +{ + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = -1; + + if (vhost_ops->vhost_migration_done) { + r = vhost_ops->vhost_migration_done(&net->dev, mac_addr); + } + + return r; +} + bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) { return vhost_virtqueue_pending(&net->dev, idx); @@ -479,6 +491,11 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, { } +int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) +{ + return -1; +} + VHostNetState *get_vhost_net(NetClientState *nc) { return 0; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 16d8e80a2a..d804c5f565 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -10,6 +10,7 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" +#include "hw/virtio/virtio-net.h" #include "sysemu/char.h" #include "sysemu/kvm.h" #include "qemu/error-report.h" @@ -27,9 +28,10 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x3ULL +#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x7ULL #define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#define VHOST_USER_PROTOCOL_F_RARP 2 typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -51,6 +53,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_PROTOCOL_FEATURES = 16, VHOST_USER_GET_QUEUE_NUM = 17, VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, VHOST_USER_MAX } VhostUserRequest; @@ -566,6 +569,32 @@ static bool vhost_user_requires_shm_log(struct vhost_dev *dev) VHOST_USER_PROTOCOL_F_LOG_SHMFD); } +static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) +{ + VhostUserMsg msg = { 0 }; + int err; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + /* If guest supports GUEST_ANNOUNCE do nothing */ + if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { + return 0; + } + + /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ + if (virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_RARP)) { + msg.request = VHOST_USER_SEND_RARP; + msg.flags = VHOST_USER_VERSION; + memcpy((char *)&msg.u64, mac_addr, 6); + msg.size = sizeof(m.u64); + + err = vhost_user_write(dev, &msg, NULL, 0); + return err; + } + return -1; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_init, @@ -587,4 +616,5 @@ const VhostOps user_ops = { .vhost_get_vq_index = vhost_user_get_vq_index, .vhost_set_vring_enable = vhost_user_set_vring_enable, .vhost_requires_shm_log = vhost_user_requires_shm_log, + .vhost_migration_done = vhost_user_migration_done, }; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index ad1948101d..c59cc81915 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -68,6 +68,8 @@ typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx); typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev, int enable); typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev); +typedef int (*vhost_migration_done_op)(struct vhost_dev *dev, + char *mac_addr); typedef struct VhostOps { VhostBackendType backend_type; @@ -94,6 +96,7 @@ typedef struct VhostOps { vhost_get_vq_index_op vhost_get_vq_index; vhost_set_vring_enable_op vhost_set_vring_enable; vhost_requires_shm_log_op vhost_requires_shm_log; + vhost_migration_done_op vhost_migration_done; } VhostOps; extern const VhostOps user_ops; diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h index 0188c4d02d..3389b410d8 100644 --- a/include/net/vhost_net.h +++ b/include/net/vhost_net.h @@ -27,6 +27,7 @@ void vhost_net_ack_features(VHostNetState *net, uint64_t features); bool vhost_net_virtqueue_pending(VHostNetState *net, int n); void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, int idx, bool mask); +int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); VHostNetState *get_vhost_net(NetClientState *nc); int vhost_set_vring_enable(NetClientState * nc, int enable); diff --git a/net/vhost-user.c b/net/vhost-user.c index b067524f2b..17b5c2a722 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -106,9 +106,29 @@ err: static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf, size_t size) { - /* Discard the request that is received and managed by backend - * by an other way. + /* In case of RARP (message size is 60) notify backup to send a fake RARP. + This fake RARP will be sent by backend only for guest + without GUEST_ANNOUNCE capability. */ + if (size == 60) { + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + int r; + static int display_rarp_failure = 1; + char mac_addr[6]; + + /* extract guest mac address from the RARP message */ + memcpy(mac_addr, &buf[6], 6); + + r = vhost_net_notify_migration_done(s->vhost_net, mac_addr); + + if ((r != 0) && (display_rarp_failure)) { + fprintf(stderr, + "Vhost user backend fails to broadcast fake RARP\n"); + fflush(stderr); + display_rarp_failure = 0; + } + } + return size; } -- cgit v1.2.3-55-g7522 From 31190ed781a81d2de65cea405e4cb3441ab929fc Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Fri, 9 Oct 2015 17:17:34 +0200 Subject: vhost: add migration block if memfd failed Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Thibaut Collet --- hw/virtio/vhost.c | 3 +++ include/qemu/memfd.h | 2 ++ util/memfd.c | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+) (limited to 'include') diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 2e78e4b706..feeaaa4186 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1019,6 +1019,9 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { error_setg(&hdev->migration_blocker, "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); + } else if (!qemu_memfd_check()) { + error_setg(&hdev->migration_blocker, + "Migration disabled: failed to allocate shared memory"); } } diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h index 950fb882a6..53858ed43c 100644 --- a/include/qemu/memfd.h +++ b/include/qemu/memfd.h @@ -2,6 +2,7 @@ #define QEMU_MEMFD_H #include "config-host.h" +#include #ifndef F_LINUX_SPECIFIC_BASE #define F_LINUX_SPECIFIC_BASE 1024 @@ -20,5 +21,6 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, int *fd); void qemu_memfd_free(void *ptr, size_t size, int fd); +bool qemu_memfd_check(void); #endif /* QEMU_MEMFD_H */ diff --git a/util/memfd.c b/util/memfd.c index 4b23765b4a..7c406914c5 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -138,3 +138,25 @@ void qemu_memfd_free(void *ptr, size_t size, int fd) close(fd); } } + +enum { + MEMFD_KO, + MEMFD_OK, + MEMFD_TODO +}; + +bool qemu_memfd_check(void) +{ + static int memfd_check = MEMFD_TODO; + + if (memfd_check == MEMFD_TODO) { + int fd; + void *ptr; + + ptr = qemu_memfd_alloc("test", 4096, 0, &fd); + memfd_check = ptr ? MEMFD_OK : MEMFD_KO; + qemu_memfd_free(ptr, 4096, fd); + } + + return memfd_check == MEMFD_OK; +} -- cgit v1.2.3-55-g7522 From 4884b7bfe962e659c0e20c1d0de6f307d58f09be Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Thu, 15 Oct 2015 11:12:12 +0800 Subject: i386: keep cpu_model field in MachineState uptodate Update cpu_model in MachineState for i386, so that the field can be used for cpu hotplug, instead of using a static variable. This patch is rebased on the latest master. Signed-off-by: Zhu Guihua Reviewed-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Marcel Apfelbaum Acked-by: Andreas Färber --- hw/i386/pc.c | 17 ++++++++--------- hw/i386/pc_piix.c | 2 +- hw/i386/pc_q35.c | 2 +- include/hw/i386/pc.h | 2 +- 4 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 682867a8a9..208f5530c8 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1077,11 +1077,10 @@ out: return cpu; } -static const char *current_cpu_model; - void pc_hot_add_cpu(const int64_t id, Error **errp) { X86CPU *cpu; + MachineState *machine = MACHINE(qdev_get_machine()); int64_t apic_id = x86_cpu_apic_id_from_index(id); Error *local_err = NULL; @@ -1109,7 +1108,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) return; } - cpu = pc_new_cpu(current_cpu_model, apic_id, &local_err); + cpu = pc_new_cpu(machine->cpu_model, apic_id, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1117,22 +1116,22 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) object_unref(OBJECT(cpu)); } -void pc_cpus_init(const char *cpu_model) +void pc_cpus_init(PCMachineState *pcms) { int i; X86CPU *cpu = NULL; + MachineState *machine = MACHINE(pcms); Error *error = NULL; unsigned long apic_id_limit; /* init CPUs */ - if (cpu_model == NULL) { + if (machine->cpu_model == NULL) { #ifdef TARGET_X86_64 - cpu_model = "qemu64"; + machine->cpu_model = "qemu64"; #else - cpu_model = "qemu32"; + machine->cpu_model = "qemu32"; #endif } - current_cpu_model = cpu_model; apic_id_limit = pc_apic_id_limit(max_cpus); if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { @@ -1142,7 +1141,7 @@ void pc_cpus_init(const char *cpu_model) } for (i = 0; i < smp_cpus; i++) { - cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), + cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i), &error); if (error) { error_report_err(error); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index ae7bbebd0f..0eacde12bb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -139,7 +139,7 @@ static void pc_init1(MachineState *machine, exit(1); } - pc_cpus_init(machine->cpu_model); + pc_cpus_init(pcms); if (kvm_enabled() && kvmclock_enabled) { kvmclock_create(); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 19e66702e0..3744abd397 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -128,7 +128,7 @@ static void pc_q35_init(MachineState *machine) exit(1); } - pc_cpus_init(machine->cpu_model); + pc_cpus_init(pcms); pc_acpi_init("q35-acpi-dsdt.aml"); kvmclock_create(); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 0503485cd0..c5961d7c03 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -168,7 +168,7 @@ bool pc_machine_is_smm_enabled(PCMachineState *pcms); void pc_register_ferr_irq(qemu_irq irq); void pc_acpi_smi_interrupt(void *opaque, int irq, int level); -void pc_cpus_init(const char *cpu_model); +void pc_cpus_init(PCMachineState *pcms); void pc_hot_add_cpu(const int64_t id, Error **errp); void pc_acpi_init(const char *default_dsdt); -- cgit v1.2.3-55-g7522