summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.travis.yml23
-rw-r--r--MAINTAINERS10
-rw-r--r--block/nbd.c33
-rw-r--r--contrib/libvhost-user/libvhost-user-glib.c17
-rw-r--r--contrib/libvhost-user/libvhost-user-glib.h1
-rw-r--r--contrib/libvhost-user/libvhost-user.c139
-rw-r--r--contrib/libvhost-user/libvhost-user.h14
-rw-r--r--contrib/vhost-user-input/main.c6
-rw-r--r--docs/devel/index.rst1
-rw-r--r--docs/devel/s390-dasd-ipl.rst (renamed from docs/devel/s390-dasd-ipl.txt)119
-rw-r--r--docs/devel/tcg-plugins.rst13
-rw-r--r--docs/interop/vhost-user.rst122
-rw-r--r--docs/specs/acpi_cpu_hotplug.txt2
-rw-r--r--docs/system/index.rst1
-rw-r--r--docs/system/vfio-ap.rst (renamed from docs/vfio-ap.txt)796
-rw-r--r--hw/arm/virt.c57
-rw-r--r--hw/block/vhost-user-blk.c23
-rw-r--r--hw/s390x/ipl.c2
-rw-r--r--hw/virtio/Kconfig5
-rw-r--r--hw/virtio/Makefile.objs2
-rw-r--r--hw/virtio/trace-events20
-rw-r--r--hw/virtio/vhost-user-fs.c16
-rw-r--r--hw/virtio/vhost-user.c10
-rw-r--r--hw/virtio/virtio-crypto.c3
-rw-r--r--hw/virtio/virtio-iommu-pci.c104
-rw-r--r--hw/virtio/virtio-iommu.c890
-rw-r--r--hw/virtio/virtio-pmem.c1
-rw-r--r--hw/virtio/virtio.c99
-rw-r--r--include/hw/arm/virt.h2
-rw-r--r--include/hw/pci/pci.h1
-rw-r--r--include/hw/virtio/vhost-user-blk.h3
-rw-r--r--include/hw/virtio/vhost-user-fs.h2
-rw-r--r--include/hw/virtio/virtio-iommu.h61
-rw-r--r--include/qemu/bitops.h38
-rw-r--r--include/standard-headers/drm/drm_fourcc.h24
-rw-r--r--include/standard-headers/linux/ethtool.h11
-rw-r--r--include/standard-headers/linux/input.h1
-rw-r--r--include/standard-headers/linux/pci_regs.h1
-rw-r--r--linux-headers/asm-arm/unistd-common.h2
-rw-r--r--linux-headers/asm-arm64/kvm.h12
-rw-r--r--linux-headers/asm-arm64/unistd.h1
-rw-r--r--linux-headers/asm-generic/mman-common.h2
-rw-r--r--linux-headers/asm-generic/unistd.h7
-rw-r--r--linux-headers/asm-mips/unistd_n32.h2
-rw-r--r--linux-headers/asm-mips/unistd_n64.h2
-rw-r--r--linux-headers/asm-mips/unistd_o32.h2
-rw-r--r--linux-headers/asm-powerpc/unistd_32.h2
-rw-r--r--linux-headers/asm-powerpc/unistd_64.h2
-rw-r--r--linux-headers/asm-s390/unistd_32.h2
-rw-r--r--linux-headers/asm-s390/unistd_64.h2
-rw-r--r--linux-headers/asm-x86/unistd_32.h2
-rw-r--r--linux-headers/asm-x86/unistd_64.h2
-rw-r--r--linux-headers/asm-x86/unistd_x32.h2
-rw-r--r--linux-headers/linux/kvm.h5
-rw-r--r--nbd/server.c12
-rw-r--r--plugins/core.c1
-rw-r--r--qdev-monitor.c1
-rw-r--r--softmmu/vl.c54
-rw-r--r--target/riscv/instmap.h8
-rw-r--r--target/riscv/translate.c40
-rw-r--r--target/s390x/cpu.c18
-rw-r--r--target/s390x/cpu.h3
-rw-r--r--target/s390x/helper.c2
-rw-r--r--target/s390x/kvm-stub.c10
-rw-r--r--target/s390x/kvm.c42
-rw-r--r--target/s390x/kvm_s390x.h4
-rw-r--r--target/s390x/translate.c2
-rw-r--r--tcg/tcg-op.c23
-rwxr-xr-xtests/data/acpi/rebuild-expected-aml.sh7
-rw-r--r--tests/plugin/bb.c6
-rw-r--r--tests/plugin/howvec.c26
-rw-r--r--tests/plugin/insn.c3
-rwxr-xr-xtests/qemu-iotests/2144
-rw-r--r--tests/qtest/bios-tables-test.c31
-rw-r--r--tests/rcutorture.c74
-rw-r--r--tests/tcg/Makefile.target4
-rw-r--r--tests/tcg/aarch64/Makefile.softmmu-target2
-rw-r--r--tests/tcg/aarch64/pauth-4.c54
-rwxr-xr-xtests/tcg/configure.sh2
-rw-r--r--trace/control.c11
80 files changed, 2488 insertions, 678 deletions
diff --git a/.travis.yml b/.travis.yml
index 5887055951..f4020dcc6c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,7 @@
# The current Travis default is a VM based 16.04 Xenial on GCE
# Additional builds with specific requirements for a full VM need to
# be added as additional matrix: entries later on
+os: linux
dist: xenial
language: c
compiler:
@@ -113,7 +114,7 @@ after_script:
- if command -v ccache ; then ccache --show-stats ; fi
-matrix:
+jobs:
include:
- name: "GCC static (user)"
env:
@@ -297,8 +298,7 @@ matrix:
- CONFIG="--target-list=x86_64-softmmu"
- CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-default"
language: python
- python:
- - "3.5"
+ python: 3.5
- name: "GCC Python 3.6 (x86_64-softmmu)"
@@ -306,8 +306,7 @@ matrix:
- CONFIG="--target-list=x86_64-softmmu"
- CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-default"
language: python
- python:
- - "3.6"
+ python: 3.6
# Acceptance (Functional) tests
@@ -401,7 +400,7 @@ matrix:
- name: "GCC check-tcg (some-softmmu)"
env:
- CONFIG="--enable-debug-tcg --target-list=xtensa-softmmu,arm-softmmu,aarch64-softmmu,alpha-softmmu"
- - TEST_BUILD_CMD="make -j${JOBS} build-tcg"
+ - TEST_BUILD_CMD="make build-tcg"
- TEST_CMD="make check-tcg"
- CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug-tcg"
@@ -410,7 +409,7 @@ matrix:
- name: "GCC plugins check-tcg (some-softmmu)"
env:
- CONFIG="--enable-plugins --enable-debug-tcg --target-list=xtensa-softmmu,arm-softmmu,aarch64-softmmu,alpha-softmmu"
- - TEST_BUILD_CMD="make -j${JOBS} build-tcg"
+ - TEST_BUILD_CMD="make build-tcg"
- TEST_CMD="make check-tcg"
- CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug-tcg"
@@ -509,6 +508,16 @@ matrix:
env:
- TEST_CMD="make check check-tcg V=1"
- CONFIG="--disable-containers --target-list=${MAIN_SOFTMMU_TARGETS},s390x-linux-user"
+ script:
+ - ( cd ${SRC_DIR} ; git submodule update --init roms/SLOF )
+ - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$?
+ - |
+ if [ "$BUILD_RC" -eq 0 ] ; then
+ mv pc-bios/s390-ccw/*.img pc-bios/ ;
+ ${TEST_CMD} ;
+ else
+ $(exit $BUILD_RC);
+ fi
# Release builds
# The make-release script expect a QEMU version, so our tag must start with a 'v'.
diff --git a/MAINTAINERS b/MAINTAINERS
index 36d94c17a6..0acb16cd1b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1259,7 +1259,7 @@ S: Supported
F: hw/s390x/ipl.*
F: pc-bios/s390-ccw/
F: pc-bios/s390-ccw.img
-F: docs/devel/s390-dasd-ipl.txt
+F: docs/devel/s390-dasd-ipl.rst
T: git https://github.com/borntraeger/qemu.git s390-next
L: qemu-s390x@nongnu.org
@@ -1570,7 +1570,7 @@ F: hw/s390x/ap-bridge.c
F: include/hw/s390x/ap-device.h
F: include/hw/s390x/ap-bridge.h
F: hw/vfio/ap.c
-F: docs/vfio-ap.txt
+F: docs/system/vfio-ap.rst
L: qemu-s390x@nongnu.org
vhost
@@ -1639,6 +1639,12 @@ F: hw/input/virtio-input*.c
F: include/hw/virtio/virtio-input.h
F: contrib/vhost-user-input/*
+virtio-iommu
+M: Eric Auger <eric.auger@redhat.com>
+S: Maintained
+F: hw/virtio/virtio-iommu*.c
+F: include/hw/virtio/virtio-iommu.h
+
virtio-serial
M: Laurent Vivier <lvivier@redhat.com>
R: Amit Shah <amit@kernel.org>
diff --git a/block/nbd.c b/block/nbd.c
index 6d3b22f844..976be76647 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -95,6 +95,19 @@ typedef struct BDRVNBDState {
static int nbd_client_connect(BlockDriverState *bs, Error **errp);
+static void nbd_clear_bdrvstate(BDRVNBDState *s)
+{
+ object_unref(OBJECT(s->tlscreds));
+ qapi_free_SocketAddress(s->saddr);
+ s->saddr = NULL;
+ g_free(s->export);
+ s->export = NULL;
+ g_free(s->tlscredsid);
+ s->tlscredsid = NULL;
+ g_free(s->x_dirty_bitmap);
+ s->x_dirty_bitmap = NULL;
+}
+
static void nbd_channel_error(BDRVNBDState *s, int ret)
{
if (ret == -EIO) {
@@ -1528,8 +1541,10 @@ static int nbd_parse_uri(const char *filename, QDict *options)
goto out;
}
- p = uri->path ? uri->path : "/";
- p += strspn(p, "/");
+ p = uri->path ? uri->path : "";
+ if (p[0] == '/') {
+ p++;
+ }
if (p[0]) {
qdict_put_str(options, "export", p);
}
@@ -1877,11 +1892,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
error:
if (ret < 0) {
- object_unref(OBJECT(s->tlscreds));
- qapi_free_SocketAddress(s->saddr);
- g_free(s->export);
- g_free(s->tlscredsid);
- g_free(s->x_dirty_bitmap);
+ nbd_clear_bdrvstate(s);
}
qemu_opts_del(opts);
return ret;
@@ -1904,6 +1915,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
ret = nbd_client_connect(bs, errp);
if (ret < 0) {
+ nbd_clear_bdrvstate(s);
return ret;
}
/* successfully connected */
@@ -1960,12 +1972,7 @@ static void nbd_close(BlockDriverState *bs)
BDRVNBDState *s = bs->opaque;
nbd_client_close(bs);
-
- object_unref(OBJECT(s->tlscreds));
- qapi_free_SocketAddress(s->saddr);
- g_free(s->export);
- g_free(s->tlscredsid);
- g_free(s->x_dirty_bitmap);
+ nbd_clear_bdrvstate(s);
}
static int64_t nbd_getlength(BlockDriverState *bs)
diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c
index 99edd2f3de..53f1ca4cdd 100644
--- a/contrib/libvhost-user/libvhost-user-glib.c
+++ b/contrib/libvhost-user/libvhost-user-glib.c
@@ -89,9 +89,8 @@ vug_source_new(VugDev *gdev, int fd, GIOCondition cond,
src->gfd.events = cond;
g_source_add_poll(gsrc, &src->gfd);
- id = g_source_attach(gsrc, NULL);
+ id = g_source_attach(gsrc, g_main_context_get_thread_default());
g_assert(id);
- g_source_unref(gsrc);
return gsrc;
}
@@ -131,6 +130,16 @@ static void vug_watch(VuDev *dev, int condition, void *data)
}
}
+void vug_source_destroy(GSource *src)
+{
+ if (!src) {
+ return;
+ }
+
+ g_source_destroy(src);
+ g_source_unref(src);
+}
+
bool
vug_init(VugDev *dev, uint16_t max_queues, int socket,
vu_panic_cb panic, const VuDevIface *iface)
@@ -144,7 +153,7 @@ vug_init(VugDev *dev, uint16_t max_queues, int socket,
}
dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL,
- (GDestroyNotify) g_source_destroy);
+ (GDestroyNotify) vug_source_destroy);
dev->src = vug_source_new(dev, socket, G_IO_IN, vug_watch, NULL);
@@ -157,5 +166,5 @@ vug_deinit(VugDev *dev)
g_assert(dev);
g_hash_table_unref(dev->fdmap);
- g_source_unref(dev->src);
+ vug_source_destroy(dev->src);
}
diff --git a/contrib/libvhost-user/libvhost-user-glib.h b/contrib/libvhost-user/libvhost-user-glib.h
index 64d539d93a..1a79a4916e 100644
--- a/contrib/libvhost-user/libvhost-user-glib.h
+++ b/contrib/libvhost-user/libvhost-user-glib.h
@@ -31,5 +31,6 @@ void vug_deinit(VugDev *dev);
GSource *vug_source_new(VugDev *dev, int fd, GIOCondition cond,
vu_watch_cb vu_cb, gpointer data);
+void vug_source_destroy(GSource *src);
#endif /* LIBVHOST_USER_GLIB_H */
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index b89bf18501..3bca996c62 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -136,6 +136,7 @@ vu_request_to_string(unsigned int req)
REQ(VHOST_USER_GET_INFLIGHT_FD),
REQ(VHOST_USER_SET_INFLIGHT_FD),
REQ(VHOST_USER_GPU_SET_SOCKET),
+ REQ(VHOST_USER_VRING_KICK),
REQ(VHOST_USER_MAX),
};
#undef REQ
@@ -163,7 +164,10 @@ vu_panic(VuDev *dev, const char *msg, ...)
dev->panic(dev, buf);
free(buf);
- /* FIXME: find a way to call virtio_error? */
+ /*
+ * FIXME:
+ * find a way to call virtio_error, or perhaps close the connection?
+ */
}
/* Translate guest physical address to our virtual address. */
@@ -948,6 +952,7 @@ static bool
vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
if (index >= dev->max_queues) {
vmsg_close_fds(vmsg);
@@ -955,8 +960,12 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
return false;
}
- if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
- vmsg->fd_num != 1) {
+ if (nofd) {
+ vmsg_close_fds(vmsg);
+ return true;
+ }
+
+ if (vmsg->fd_num != 1) {
vmsg_close_fds(vmsg);
vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
return false;
@@ -1053,6 +1062,7 @@ static bool
vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -1066,8 +1076,8 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->vq[index].kick_fd = -1;
}
- dev->vq[index].kick_fd = vmsg->fds[0];
- DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+ dev->vq[index].kick_fd = nofd ? -1 : vmsg->fds[0];
+ DPRINT("Got kick_fd: %d for vq: %d\n", dev->vq[index].kick_fd, index);
dev->vq[index].started = true;
if (dev->iface->queue_set_started) {
@@ -1147,6 +1157,7 @@ static bool
vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -1159,14 +1170,14 @@ vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->vq[index].call_fd = -1;
}
- dev->vq[index].call_fd = vmsg->fds[0];
+ dev->vq[index].call_fd = nofd ? -1 : vmsg->fds[0];
/* in case of I/O hang after reconnecting */
- if (eventfd_write(vmsg->fds[0], 1)) {
+ if (dev->vq[index].call_fd != -1 && eventfd_write(vmsg->fds[0], 1)) {
return -1;
}
- DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+ DPRINT("Got call_fd: %d for vq: %d\n", dev->vq[index].call_fd, index);
return false;
}
@@ -1175,6 +1186,7 @@ static bool
vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -1187,7 +1199,7 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->vq[index].err_fd = -1;
}
- dev->vq[index].err_fd = vmsg->fds[0];
+ dev->vq[index].err_fd = nofd ? -1 : vmsg->fds[0];
return false;
}
@@ -1195,11 +1207,20 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
static bool
vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
{
+ /*
+ * Note that we support, but intentionally do not set,
+ * VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. This means that
+ * a device implementation can return it in its callback
+ * (get_protocol_features) if it wants to use this for
+ * simulation, but it is otherwise not desirable (if even
+ * implemented by the master.)
+ */
uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ |
1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ |
1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
- 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD;
+ 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD |
+ 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK;
if (have_userfault()) {
features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
@@ -1226,6 +1247,25 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->protocol_features = vmsg->payload.u64;
+ if (vu_has_protocol_feature(dev,
+ VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
+ (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ) ||
+ !vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
+ /*
+ * The use case for using messages for kick/call is simulation, to make
+ * the kick and call synchronous. To actually get that behaviour, both
+ * of the other features are required.
+ * Theoretically, one could use only kick messages, or do them without
+ * having F_REPLY_ACK, but too many (possibly pending) messages on the
+ * socket will eventually cause the master to hang, to avoid this in
+ * scenarios where not desired enforce that the settings are in a way
+ * that actually enables the simulation case.
+ */
+ vu_panic(dev,
+ "F_IN_BAND_NOTIFICATIONS requires F_SLAVE_REQ && F_REPLY_ACK");
+ return false;
+ }
+
if (dev->iface->set_protocol_features) {
dev->iface->set_protocol_features(dev, features);
}
@@ -1487,6 +1527,34 @@ vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
}
static bool
+vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+
+ if (index >= dev->max_queues) {
+ vu_panic(dev, "Invalid queue index: %u", index);
+ return false;
+ }
+
+ DPRINT("Got kick message: handler:%p idx:%d\n",
+ dev->vq[index].handler, index);
+
+ if (!dev->vq[index].started) {
+ dev->vq[index].started = true;
+
+ if (dev->iface->queue_set_started) {
+ dev->iface->queue_set_started(dev, index, true);
+ }
+ }
+
+ if (dev->vq[index].handler) {
+ dev->vq[index].handler(dev, index);
+ }
+
+ return false;
+}
+
+static bool
vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
{
int do_reply = 0;
@@ -1568,6 +1636,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
return vu_get_inflight_fd(dev, vmsg);
case VHOST_USER_SET_INFLIGHT_FD:
return vu_set_inflight_fd(dev, vmsg);
+ case VHOST_USER_VRING_KICK:
+ return vu_handle_vring_kick(dev, vmsg);
default:
vmsg_close_fds(vmsg);
vu_panic(dev, "Unhandled request: %d", vmsg->request);
@@ -1581,13 +1651,20 @@ vu_dispatch(VuDev *dev)
{
VhostUserMsg vmsg = { 0, };
int reply_requested;
- bool success = false;
+ bool need_reply, success = false;
if (!vu_message_read(dev, dev->sock, &vmsg)) {
goto end;
}
+ need_reply = vmsg.flags & VHOST_USER_NEED_REPLY_MASK;
+
reply_requested = vu_process_message(dev, &vmsg);
+ if (!reply_requested && need_reply) {
+ vmsg_set_reply_u64(&vmsg, 0);
+ reply_requested = 1;
+ }
+
if (!reply_requested) {
success = true;
goto end;
@@ -2022,8 +2099,7 @@ vring_notify(VuDev *dev, VuVirtq *vq)
return !v || vring_need_event(vring_get_used_event(vq), new, old);
}
-void
-vu_queue_notify(VuDev *dev, VuVirtq *vq)
+static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
{
if (unlikely(dev->broken) ||
unlikely(!vq->vring.avail)) {
@@ -2035,11 +2111,48 @@ vu_queue_notify(VuDev *dev, VuVirtq *vq)
return;
}
+ if (vq->call_fd < 0 &&
+ vu_has_protocol_feature(dev,
+ VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
+ vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+ VhostUserMsg vmsg = {
+ .request = VHOST_USER_SLAVE_VRING_CALL,
+ .flags = VHOST_USER_VERSION,
+ .size = sizeof(vmsg.payload.state),
+ .payload.state = {
+ .index = vq - dev->vq,
+ },
+ };
+ bool ack = sync &&
+ vu_has_protocol_feature(dev,
+ VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+ if (ack) {
+ vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
+ }
+
+ vu_message_write(dev, dev->slave_fd, &vmsg);
+ if (ack) {
+ vu_message_read(dev, dev->slave_fd, &vmsg);
+ }
+ return;
+ }
+
if (eventfd_write(vq->call_fd, 1) < 0) {
vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
}
}
+void vu_queue_notify(VuDev *dev, VuVirtq *vq)
+{
+ _vu_queue_notify(dev, vq, false);
+}
+
+void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
+{
+ _vu_queue_notify(dev, vq, true);
+}
+
static inline void
vring_used_flags_set_bit(VuVirtq *vq, int mask)
{
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 5cb7708559..6fc8000e99 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -54,6 +54,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
+ VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
VHOST_USER_PROTOCOL_F_MAX
};
@@ -95,6 +96,7 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_INFLIGHT_FD = 31,
VHOST_USER_SET_INFLIGHT_FD = 32,
VHOST_USER_GPU_SET_SOCKET = 33,
+ VHOST_USER_VRING_KICK = 35,
VHOST_USER_MAX
} VhostUserRequest;
@@ -103,6 +105,8 @@ typedef enum VhostUserSlaveRequest {
VHOST_USER_SLAVE_IOTLB_MSG = 1,
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+ VHOST_USER_SLAVE_VRING_CALL = 4,
+ VHOST_USER_SLAVE_VRING_ERR = 5,
VHOST_USER_SLAVE_MAX
} VhostUserSlaveRequest;
@@ -529,6 +533,16 @@ bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
void vu_queue_notify(VuDev *dev, VuVirtq *vq);
/**
+ * vu_queue_notify_sync:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Request to notify the queue via callfd (skipped if unnecessary)
+ * or sync message if possible.
+ */
+void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq);
+
+/**
* vu_queue_pop:
* @dev: a VuDev context
* @vq: a VuVirtq queue
diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c
index ef4b7769f2..6020c6f33a 100644
--- a/contrib/vhost-user-input/main.c
+++ b/contrib/vhost-user-input/main.c
@@ -187,7 +187,7 @@ vi_queue_set_started(VuDev *dev, int qidx, bool started)
}
if (!started && vi->evsrc) {
- g_source_destroy(vi->evsrc);
+ vug_source_destroy(vi->evsrc);
vi->evsrc = NULL;
}
}
@@ -401,9 +401,7 @@ main(int argc, char *argv[])
vug_deinit(&vi.dev);
- if (vi.evsrc) {
- g_source_unref(vi.evsrc);
- }
+ vug_source_destroy(vi.evsrc);
g_array_free(vi.config, TRUE);
g_free(vi.queue);
return 0;
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 4dc2ca8d71..b734ba4655 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -25,3 +25,4 @@ Contents:
tcg-plugins
bitops
reset
+ s390-dasd-ipl
diff --git a/docs/devel/s390-dasd-ipl.txt b/docs/devel/s390-dasd-ipl.rst
index 9107e048e4..2529eb5f54 100644
--- a/docs/devel/s390-dasd-ipl.txt
+++ b/docs/devel/s390-dasd-ipl.rst
@@ -1,49 +1,55 @@
-*****************************
-***** s390 hardware IPL *****
-*****************************
+Booting from real channel-attached devices on s390x
+===================================================
+
+s390 hardware IPL
+-----------------
The s390 hardware IPL process consists of the following steps.
-1. A READ IPL ccw is constructed in memory location 0x0.
- This ccw, by definition, reads the IPL1 record which is located on the disk
- at cylinder 0 track 0 record 1. Note that the chain flag is on in this ccw
- so when it is complete another ccw will be fetched and executed from memory
- location 0x08.
-
-2. Execute the Read IPL ccw at 0x00, thereby reading IPL1 data into 0x00.
- IPL1 data is 24 bytes in length and consists of the following pieces of
- information: [psw][read ccw][tic ccw]. When the machine executes the Read
- IPL ccw it read the 24-bytes of IPL1 to be read into memory starting at
- location 0x0. Then the ccw program at 0x08 which consists of a read
- ccw and a tic ccw is automatically executed because of the chain flag from
- the original READ IPL ccw. The read ccw will read the IPL2 data into memory
- and the TIC (Transfer In Channel) will transfer control to the channel
- program contained in the IPL2 data. The TIC channel command is the
- equivalent of a branch/jump/goto instruction for channel programs.
- NOTE: The ccws in IPL1 are defined by the architecture to be format 0.
+1. A READ IPL ccw is constructed in memory location ``0x0``.
+ This ccw, by definition, reads the IPL1 record which is located on the disk
+ at cylinder 0 track 0 record 1. Note that the chain flag is on in this ccw
+ so when it is complete another ccw will be fetched and executed from memory
+ location ``0x08``.
+
+2. Execute the Read IPL ccw at ``0x00``, thereby reading IPL1 data into ``0x00``.
+ IPL1 data is 24 bytes in length and consists of the following pieces of
+ information: ``[psw][read ccw][tic ccw]``. When the machine executes the Read
+ IPL ccw it read the 24-bytes of IPL1 to be read into memory starting at
+ location ``0x0``. Then the ccw program at ``0x08`` which consists of a read
+ ccw and a tic ccw is automatically executed because of the chain flag from
+ the original READ IPL ccw. The read ccw will read the IPL2 data into memory
+ and the TIC (Transfer In Channel) will transfer control to the channel
+ program contained in the IPL2 data. The TIC channel command is the
+ equivalent of a branch/jump/goto instruction for channel programs.
+
+ NOTE: The ccws in IPL1 are defined by the architecture to be format 0.
3. Execute IPL2.
- The TIC ccw instruction at the end of the IPL1 channel program will begin
- the execution of the IPL2 channel program. IPL2 is stage-2 of the boot
- process and will contain a larger channel program than IPL1. The point of
- IPL2 is to find and load either the operating system or a small program that
- loads the operating system from disk. At the end of this step all or some of
- the real operating system is loaded into memory and we are ready to hand
- control over to the guest operating system. At this point the guest
- operating system is entirely responsible for loading any more data it might
- need to function. NOTE: The IPL2 channel program might read data into memory
- location 0 thereby overwriting the IPL1 psw and channel program. This is ok
- as long as the data placed in location 0 contains a psw whose instruction
- address points to the guest operating system code to execute at the end of
- the IPL/boot process.
- NOTE: The ccws in IPL2 are defined by the architecture to be format 0.
+ The TIC ccw instruction at the end of the IPL1 channel program will begin
+ the execution of the IPL2 channel program. IPL2 is stage-2 of the boot
+ process and will contain a larger channel program than IPL1. The point of
+ IPL2 is to find and load either the operating system or a small program that
+ loads the operating system from disk. At the end of this step all or some of
+ the real operating system is loaded into memory and we are ready to hand
+ control over to the guest operating system. At this point the guest
+ operating system is entirely responsible for loading any more data it might
+ need to function.
+
+ NOTE: The IPL2 channel program might read data into memory
+ location ``0x0`` thereby overwriting the IPL1 psw and channel program. This is ok
+ as long as the data placed in location ``0x0`` contains a psw whose instruction
+ address points to the guest operating system code to execute at the end of
+ the IPL/boot process.
+
+ NOTE: The ccws in IPL2 are defined by the architecture to be format 0.
4. Start executing the guest operating system.
- The psw that was loaded into memory location 0 as part of the ipl process
- should contain the needed flags for the operating system we have loaded. The
- psw's instruction address will point to the location in memory where we want
- to start executing the operating system. This psw is loaded (via LPSW
- instruction) causing control to be passed to the operating system code.
+ The psw that was loaded into memory location ``0x0`` as part of the ipl process
+ should contain the needed flags for the operating system we have loaded. The
+ psw's instruction address will point to the location in memory where we want
+ to start executing the operating system. This psw is loaded (via LPSW
+ instruction) causing control to be passed to the operating system code.
In a non-virtualized environment this process, handled entirely by the hardware,
is kicked off by the user initiating a "Load" procedure from the hardware
@@ -54,18 +60,17 @@ written immediately after the special "Read IPL" ccw, the IPL1 channel program
will be executed immediately (the special read ccw has the chaining bit turned
on). The TIC at the end of the IPL1 channel program will cause the IPL2 channel
program to be executed automatically. After this sequence completes the "Load"
-procedure then loads the psw from 0x0.
+procedure then loads the psw from ``0x0``.
-**********************************************************
-***** How this all pertains to QEMU (and the kernel) *****
-**********************************************************
+How this all pertains to QEMU (and the kernel)
+----------------------------------------------
In theory we should merely have to do the following to IPL/boot a guest
operating system from a DASD device:
-1. Place a "Read IPL" ccw into memory location 0x0 with chaining bit on.
-2. Execute channel program at 0x0.
-3. LPSW 0x0.
+1. Place a "Read IPL" ccw into memory location ``0x0`` with chaining bit on.
+2. Execute channel program at ``0x0``.
+3. LPSW ``0x0``.
However, our emulation of the machine's channel program logic within the kernel
is missing one key feature that is required for this process to work:
@@ -89,32 +94,31 @@ Lastly, in some cases (the zipl bootloader for example) the IPL2 program also
transfers control to another channel program segment immediately after reading
it from the disk. So we need to be able to handle this case.
-**************************
-***** What QEMU does *****
-**************************
+What QEMU does
+--------------
Since we are forced to live with prefetch we cannot use the very simple IPL
procedure we defined in the preceding section. So we compensate by doing the
following.
-1. Place "Read IPL" ccw into memory location 0x0, but turn off chaining bit.
-2. Execute "Read IPL" at 0x0.
+1. Place "Read IPL" ccw into memory location ``0x0``, but turn off chaining bit.
+2. Execute "Read IPL" at ``0x0``.
- So now IPL1's psw is at 0x0 and IPL1's channel program is at 0x08.
+ So now IPL1's psw is at ``0x0`` and IPL1's channel program is at ``0x08``.
-4. Write a custom channel program that will seek to the IPL2 record and then
+3. Write a custom channel program that will seek to the IPL2 record and then
execute the READ and TIC ccws from IPL1. Normally the seek is not required
because after reading the IPL1 record the disk is automatically positioned
to read the very next record which will be IPL2. But since we are not reading
both IPL1 and IPL2 as part of the same channel program we must manually set
the position.
-5. Grab the target address of the TIC instruction from the IPL1 channel program.
+4. Grab the target address of the TIC instruction from the IPL1 channel program.
This address is where the IPL2 channel program starts.
Now IPL2 is loaded into memory somewhere, and we know the address.
-6. Execute the IPL2 channel program at the address obtained in step #5.
+5. Execute the IPL2 channel program at the address obtained in step #4.
Because this channel program can be dynamic, we must use a special algorithm
that detects a READ immediately followed by a TIC and breaks the ccw chain
@@ -126,8 +130,9 @@ following.
channel program from executing properly.
Now the operating system code is loaded somewhere in guest memory and the psw
- in memory location 0x0 will point to entry code for the guest operating
+ in memory location ``0x0`` will point to entry code for the guest operating
system.
-7. LPSW 0x0.
+6. LPSW ``0x0``
+
LPSW transfers control to the guest operating system and we're done.
diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 718eef00f2..a05990906c 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -51,8 +51,17 @@ about how QEMU's translation works to the plugins. While there are
conceptions such as translation time and translation blocks the
details are opaque to plugins. The plugin is able to query select
details of instructions and system configuration only through the
-exported *qemu_plugin* functions. The types used to describe
-instructions and events are opaque to the plugins themselves.
+exported *qemu_plugin* functions.
+
+Query Handle Lifetime
+---------------------
+
+Each callback provides an opaque anonymous information handle which
+can usually be further queried to find out information about a
+translation, instruction or operation. The handles themselves are only
+valid during the lifetime of the callback so it is important that any
+information that is needed is extracted during the callback and saved
+by the plugin.
Usage
=====
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 5f8b3a456b..401652397c 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -2,6 +2,7 @@
Vhost-user Protocol
===================
:Copyright: 2014 Virtual Open Systems Sarl.
+:Copyright: 2019 Intel Corporation
:Licence: This work is licensed under the terms of the GNU GPL,
version 2 or later. See the COPYING file in the top-level
directory.
@@ -279,6 +280,9 @@ If *master* is unable to send the full message or receives a wrong
reply it will close the connection. An optional reconnection mechanism
can be implemented.
+If *slave* detects some error such as incompatible features, it may also
+close the connection. This should only happen in exceptional circumstances.
+
Any protocol extensions are gated by protocol feature bits, which
allows full backwards compatibility on both master and slave. As
older slaves don't support negotiating protocol features, a feature
@@ -315,7 +319,8 @@ it until ring is started, or after it has been stopped.
Client must start ring upon receiving a kick (that is, detecting that
file descriptor is readable) on the descriptor specified by
-``VHOST_USER_SET_VRING_KICK``, and stop ring upon receiving
+``VHOST_USER_SET_VRING_KICK`` or receiving the in-band message
+``VHOST_USER_VRING_KICK`` if negotiated, and stop ring upon receiving
``VHOST_USER_GET_VRING_BASE``.
While processing the rings (whether they are enabled or not), client
@@ -767,25 +772,49 @@ When reconnecting:
#. Resubmit inflight ``DescStatePacked`` entries in order of their
counter value
+In-band notifications
+---------------------
+
+In some limited situations (e.g. for simulation) it is desirable to
+have the kick, call and error (if used) signals done via in-band
+messages instead of asynchronous eventfd notifications. This can be
+done by negotiating the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS``
+protocol feature.
+
+Note that due to the fact that too many messages on the sockets can
+cause the sending application(s) to block, it is not advised to use
+this feature unless absolutely necessary. It is also considered an
+error to negotiate this feature without also negotiating
+``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` and ``VHOST_USER_PROTOCOL_F_REPLY_ACK``,
+the former is necessary for getting a message channel from the slave
+to the master, while the latter needs to be used with the in-band
+notification messages to block until they are processed, both to avoid
+blocking later and for proper processing (at least in the simulation
+use case.) As it has no other way of signalling this error, the slave
+should close the connection as a response to a
+``VHOST_USER_SET_PROTOCOL_FEATURES`` message that sets the in-band
+notifications feature flag without the other two.
+
Protocol features
-----------------
.. code:: c
- #define VHOST_USER_PROTOCOL_F_MQ 0
- #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
- #define VHOST_USER_PROTOCOL_F_RARP 2
- #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
- #define VHOST_USER_PROTOCOL_F_MTU 4
- #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
- #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6
- #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
- #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
- #define VHOST_USER_PROTOCOL_F_CONFIG 9
- #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
- #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
- #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
- #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
+ #define VHOST_USER_PROTOCOL_F_MQ 0
+ #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
+ #define VHOST_USER_PROTOCOL_F_RARP 2
+ #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
+ #define VHOST_USER_PROTOCOL_F_MTU 4
+ #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
+ #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6
+ #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
+ #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
+ #define VHOST_USER_PROTOCOL_F_CONFIG 9
+ #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
+ #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
+ #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
+ #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
+ #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
Master message types
--------------------
@@ -947,7 +976,12 @@ Master message types
Bits (0-7) of the payload contain the vring index. Bit 8 is the
invalid FD flag. This flag is set when there is no file descriptor
in the ancillary data. This signals that polling should be used
- instead of waiting for a kick.
+ instead of waiting for the kick. Note that if the protocol feature
+ ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` has been negotiated
+ this message isn't necessary as the ring is also started on the
+ ``VHOST_USER_VRING_KICK`` message, it may however still be used to
+ set an event file descriptor (which will be preferred over the
+ message) or to enable polling.
``VHOST_USER_SET_VRING_CALL``
:id: 13
@@ -960,7 +994,12 @@ Master message types
Bits (0-7) of the payload contain the vring index. Bit 8 is the
invalid FD flag. This flag is set when there is no file descriptor
in the ancillary data. This signals that polling will be used
- instead of waiting for the call.
+ instead of waiting for the call. Note that if the protocol features
+ ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and
+ ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message
+ isn't necessary as the ``VHOST_USER_SLAVE_VRING_CALL`` message can be
+ used, it may however still be used to set an event file descriptor
+ or to enable polling.
``VHOST_USER_SET_VRING_ERR``
:id: 14
@@ -972,7 +1011,12 @@ Master message types
Bits (0-7) of the payload contain the vring index. Bit 8 is the
invalid FD flag. This flag is set when there is no file descriptor
- in the ancillary data.
+ in the ancillary data. Note that if the protocol features
+ ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and
+ ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message
+ isn't necessary as the ``VHOST_USER_SLAVE_VRING_ERR`` message can be
+ used, it may however still be used to set an event file descriptor
+ (which will be preferred over the message).
``VHOST_USER_GET_QUEUE_NUM``
:id: 17
@@ -1205,6 +1249,20 @@ Master message types
Only valid if the ``VHOST_USER_PROTOCOL_F_RESET_DEVICE`` protocol
feature is set by the backend.
+``VHOST_USER_VRING_KICK``
+ :id: 35
+ :equivalent ioctl: N/A
+ :slave payload: vring state description
+ :master payload: N/A
+
+ When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol
+ feature has been successfully negotiated, this message may be
+ submitted by the master to indicate that a buffer was added to
+ the vring instead of signalling it using the vring's kick file
+ descriptor or having the slave rely on polling.
+
+ The state.num field is currently reserved and must be set to 0.
+
Slave message types
-------------------
@@ -1261,6 +1319,34 @@ Slave message types
``VHOST_USER_PROTOCOL_F_HOST_NOTIFIER`` protocol feature has been
successfully negotiated.
+``VHOST_USER_SLAVE_VRING_CALL``
+ :id: 4
+ :equivalent ioctl: N/A
+ :slave payload: vring state description
+ :master payload: N/A
+
+ When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol
+ feature has been successfully negotiated, this message may be
+ submitted by the slave to indicate that a buffer was used from
+ the vring instead of signalling this using the vring's call file
+ descriptor or having the master relying on polling.
+
+ The state.num field is currently reserved and must be set to 0.
+
+``VHOST_USER_SLAVE_VRING_ERR``
+ :id: 5
+ :equivalent ioctl: N/A
+ :slave payload: vring state description
+ :master payload: N/A
+
+ When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol
+ feature has been successfully negotiated, this message may be
+ submitted by the slave to indicate that an error occurred on the
+ specific vring, instead of signalling the error file descriptor
+ set by the master via ``VHOST_USER_SET_VRING_ERR``.
+
+ The state.num field is currently reserved and must be set to 0.
+
.. _reply_ack:
VHOST_USER_PROTOCOL_F_REPLY_ACK
diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
index a8ce5e7402..9bb22d1270 100644
--- a/docs/specs/acpi_cpu_hotplug.txt
+++ b/docs/specs/acpi_cpu_hotplug.txt
@@ -94,6 +94,8 @@ write access:
register in QEMU
2: following writes to 'Command data' register set OST status
register in QEMU
+ 3: following reads from 'Command data' and 'Command data 2' return
+ architecture specific CPU ID value for currently selected CPU.
other values: reserved
[0x6-0x7] reserved
[0x8] Command data: (DWORD access)
diff --git a/docs/system/index.rst b/docs/system/index.rst
index f66e6ea585..1a4b2c82ac 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -15,3 +15,4 @@ Contents:
:maxdepth: 2
qemu-block-drivers
+ vfio-ap
diff --git a/docs/vfio-ap.txt b/docs/system/vfio-ap.rst
index b1eb2deeaf..3cd84179a2 100644
--- a/docs/vfio-ap.txt
+++ b/docs/system/vfio-ap.rst
@@ -1,17 +1,11 @@
Adjunct Processor (AP) Device
=============================
-Contents:
-=========
-* Introduction
-* AP Architectural Overview
-* Start Interpretive Execution (SIE) Instruction
-* AP Matrix Configuration on Linux Host
-* Starting a Linux Guest Configured with an AP Matrix
-* Example: Configure AP Matrices for Three Linux Guests
-
-Introduction:
-============
+.. contents::
+
+Introduction
+------------
+
The IBM Adjunct Processor (AP) Cryptographic Facility is comprised
of three AP instructions and from 1 to 256 PCIe cryptographic adapter cards.
These AP devices provide cryptographic functions to all CPUs assigned to a
@@ -21,8 +15,9 @@ On s390x, AP adapter cards are exposed via the AP bus. This document
describes how those cards may be made available to KVM guests using the
VFIO mediated device framework.
-AP Architectural Overview:
-=========================
+AP Architectural Overview
+-------------------------
+
In order understand the terminology used in the rest of this document, let's
start with some definitions:
@@ -75,7 +70,8 @@ start with some definitions:
must be one of the control domains.
Start Interpretive Execution (SIE) Instruction
-==============================================
+----------------------------------------------
+
A KVM guest is started by executing the Start Interpretive Execution (SIE)
instruction. The SIE state description is a control block that contains the
state information for a KVM guest and is supplied as input to the SIE
@@ -114,246 +110,278 @@ The APQNs can provide secure key functionality - i.e., a private key is stored
on the adapter card for each of its domains - so each APQN must be assigned to
at most one guest or the linux host.
- Example 1: Valid configuration:
- ------------------------------
- Guest1: adapters 1,2 domains 5,6
- Guest2: adapter 1,2 domain 7
+Example 1: Valid configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++----------+--------+--------+
+| | Guest1 | Guest2 |
++==========+========+========+
+| adapters | 1, 2 | 1, 2 |
++----------+--------+--------+
+| domains | 5, 6 | 7 |
++----------+--------+--------+
+
+This is valid because both guests have a unique set of APQNs:
+
+* Guest1 has APQNs (1,5), (1,6), (2,5) and (2,6);
+* Guest2 has APQNs (1,7) and (2,7).
+
+Example 2: Valid configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++----------+--------+--------+
+| | Guest1 | Guest2 |
++==========+========+========+
+| adapters | 1, 2 | 3, 4 |
++----------+--------+--------+
+| domains | 5, 6 | 5, 6 |
++----------+--------+--------+
- This is valid because both guests have a unique set of APQNs: Guest1 has
- APQNs (1,5), (1,6), (2,5) and (2,6); Guest2 has APQNs (1,7) and (2,7).
+This is also valid because both guests have a unique set of APQNs:
- Example 2: Valid configuration:
- ------------------------------
- Guest1: adapters 1,2 domains 5,6
- Guest2: adapters 3,4 domains 5,6
+* Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+* Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
- This is also valid because both guests have a unique set of APQNs:
- Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
- Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
+Example 3: Invalid configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Example 3: Invalid configuration:
- --------------------------------
- Guest1: adapters 1,2 domains 5,6
- Guest2: adapter 1 domains 6,7
++----------+--------+--------+
+| | Guest1 | Guest2 |
++==========+========+========+
+| adapters | 1, 2 | 1 |
++----------+--------+--------+
+| domains | 5, 6 | 6, 7 |
++----------+--------+--------+
- This is an invalid configuration because both guests have access to
- APQN (1,6).
+This is an invalid configuration because both guests have access to
+APQN (1,6).
+
+AP Matrix Configuration on Linux Host
+-------------------------------------
-AP Matrix Configuration on Linux Host:
-=====================================
A linux system is a guest of the LPAR in which it is running and has access to
the AP resources configured for the LPAR. The LPAR's AP matrix is
configured via its Activation Profile which can be edited on the HMC. When the
linux system is started, the AP bus will detect the AP devices assigned to the
-LPAR and create the following in sysfs:
+LPAR and create the following in sysfs::
-/sys/bus/ap
-... [devices]
-...... xx.yyyy
-...... ...
-...... cardxx
-...... ...
+ /sys/bus/ap
+ ... [devices]
+ ...... xx.yyyy
+ ...... ...
+ ...... cardxx
+ ...... ...
Where:
- cardxx is AP adapter number xx (in hex)
-....xx.yyyy is an APQN with xx specifying the APID and yyyy specifying the
- APQI
+
+``cardxx``
+ is AP adapter number xx (in hex)
+
+``xx.yyyy``
+ is an APQN with xx specifying the APID and yyyy specifying the APQI
For example, if AP adapters 5 and 6 and domains 4, 71 (0x47), 171 (0xab) and
255 (0xff) are configured for the LPAR, the sysfs representation on the linux
-host system would look like this:
-
-/sys/bus/ap
-... [devices]
-...... 05.0004
-...... 05.0047
-...... 05.00ab
-...... 05.00ff
-...... 06.0004
-...... 06.0047
-...... 06.00ab
-...... 06.00ff
-...... card05
-...... card06
+host system would look like this::
+
+ /sys/bus/ap
+ ... [devices]
+ ...... 05.0004
+ ...... 05.0047
+ ...... 05.00ab
+ ...... 05.00ff
+ ...... 06.0004
+ ...... 06.0047
+ ...... 06.00ab
+ ...... 06.00ff
+ ...... card05
+ ...... card06
A set of default device drivers are also created to control each type of AP
-device that can be assigned to the LPAR on which a linux host is running:
-
-/sys/bus/ap
-... [drivers]
-...... [cex2acard] for Crypto Express 2/3 accelerator cards
-...... [cex2aqueue] for AP queues served by Crypto Express 2/3
- accelerator cards
-...... [cex4card] for Crypto Express 4/5/6 accelerator and coprocessor
- cards
-...... [cex4queue] for AP queues served by Crypto Express 4/5/6
- accelerator and coprocessor cards
-...... [pcixcccard] for Crypto Express 2/3 coprocessor cards
-...... [pcixccqueue] for AP queues served by Crypto Express 2/3
- coprocessor cards
+device that can be assigned to the LPAR on which a linux host is running::
+
+ /sys/bus/ap
+ ... [drivers]
+ ...... [cex2acard] for Crypto Express 2/3 accelerator cards
+ ...... [cex2aqueue] for AP queues served by Crypto Express 2/3
+ accelerator cards
+ ...... [cex4card] for Crypto Express 4/5/6 accelerator and coprocessor
+ cards
+ ...... [cex4queue] for AP queues served by Crypto Express 4/5/6
+ accelerator and coprocessor cards
+ ...... [pcixcccard] for Crypto Express 2/3 coprocessor cards
+ ...... [pcixccqueue] for AP queues served by Crypto Express 2/3
+ coprocessor cards
Binding AP devices to device drivers
-------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
There are two sysfs files that specify bitmasks marking a subset of the APQN
range as 'usable by the default AP queue device drivers' or 'not usable by the
default device drivers' and thus available for use by the alternate device
-driver(s). The sysfs locations of the masks are:
+driver(s). The sysfs locations of the masks are::
/sys/bus/ap/apmask
/sys/bus/ap/aqmask
- The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
- (APID). Each bit in the mask, from left to right (i.e., from most significant
- to least significant bit in big endian order), corresponds to an APID from
- 0-255. If a bit is set, the APID is marked as usable only by the default AP
- queue device drivers; otherwise, the APID is usable by the vfio_ap
- device driver.
+The ``apmask`` is a 256-bit mask that identifies a set of AP adapter IDs
+(APID). Each bit in the mask, from left to right (i.e., from most significant
+to least significant bit in big endian order), corresponds to an APID from
+0-255. If a bit is set, the APID is marked as usable only by the default AP
+queue device drivers; otherwise, the APID is usable by the vfio_ap
+device driver.
- The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
- (APQI). Each bit in the mask, from left to right (i.e., from most significant
- to least significant bit in big endian order), corresponds to an APQI from
- 0-255. If a bit is set, the APQI is marked as usable only by the default AP
- queue device drivers; otherwise, the APQI is usable by the vfio_ap device
- driver.
+The ``aqmask`` is a 256-bit mask that identifies a set of AP queue indexes
+(APQI). Each bit in the mask, from left to right (i.e., from most significant
+to least significant bit in big endian order), corresponds to an APQI from
+0-255. If a bit is set, the APQI is marked as usable only by the default AP
+queue device drivers; otherwise, the APQI is usable by the vfio_ap device
+driver.
- Take, for example, the following mask:
+Take, for example, the following mask::
0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
- It indicates:
+It indicates:
1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
belong to the vfio_ap device driver's pool.
- The APQN of each AP queue device assigned to the linux host is checked by the
- AP bus against the set of APQNs derived from the cross product of APIDs
- and APQIs marked as usable only by the default AP queue device drivers. If a
- match is detected, only the default AP queue device drivers will be probed;
- otherwise, the vfio_ap device driver will be probed.
+The APQN of each AP queue device assigned to the linux host is checked by the
+AP bus against the set of APQNs derived from the cross product of APIDs
+and APQIs marked as usable only by the default AP queue device drivers. If a
+match is detected, only the default AP queue device drivers will be probed;
+otherwise, the vfio_ap device driver will be probed.
- By default, the two masks are set to reserve all APQNs for use by the default
- AP queue device drivers. There are two ways the default masks can be changed:
+By default, the two masks are set to reserve all APQNs for use by the default
+AP queue device drivers. There are two ways the default masks can be changed:
- 1. The sysfs mask files can be edited by echoing a string into the
- respective sysfs mask file in one of two formats:
+ 1. The sysfs mask files can be edited by echoing a string into the
+ respective sysfs mask file in one of two formats:
- * An absolute hex string starting with 0x - like "0x12345678" - sets
- the mask. If the given string is shorter than the mask, it is padded
- with 0s on the right; for example, specifying a mask value of 0x41 is
- the same as specifying:
+ * An absolute hex string starting with 0x - like "0x12345678" - sets
+ the mask. If the given string is shorter than the mask, it is padded
+ with 0s on the right; for example, specifying a mask value of 0x41 is
+ the same as specifying::
0x4100000000000000000000000000000000000000000000000000000000000000
- Keep in mind that the mask reads from left to right (i.e., most
- significant to least significant bit in big endian order), so the mask
- above identifies device numbers 1 and 7 (01000001).
+ Keep in mind that the mask reads from left to right (i.e., most
+ significant to least significant bit in big endian order), so the mask
+ above identifies device numbers 1 and 7 (``01000001``).
- If the string is longer than the mask, the operation is terminated with
- an error (EINVAL).
+ If the string is longer than the mask, the operation is terminated with
+ an error (EINVAL).
- * Individual bits in the mask can be switched on and off by specifying
- each bit number to be switched in a comma separated list. Each bit
- number string must be prepended with a ('+') or minus ('-') to indicate
- the corresponding bit is to be switched on ('+') or off ('-'). Some
- valid values are:
+ * Individual bits in the mask can be switched on and off by specifying
+ each bit number to be switched in a comma separated list. Each bit
+ number string must be prepended with a (``+``) or minus (``-``) to indicate
+ the corresponding bit is to be switched on (``+``) or off (``-``). Some
+ valid values are::
"+0" switches bit 0 on
"-13" switches bit 13 off
"+0x41" switches bit 65 on
"-0xff" switches bit 255 off
- The following example:
+ The following example::
+
+0,-6,+0x47,-0xf0
- Switches bits 0 and 71 (0x47) on
- Switches bits 6 and 240 (0xf0) off
+ Switches bits 0 and 71 (0x47) on
+ Switches bits 6 and 240 (0xf0) off
- Note that the bits not specified in the list remain as they were before
- the operation.
+ Note that the bits not specified in the list remain as they were before
+ the operation.
- 2. The masks can also be changed at boot time via parameters on the kernel
- command line like this:
+ 2. The masks can also be changed at boot time via parameters on the kernel
+ command line like this::
ap.apmask=0xffff ap.aqmask=0x40
- This would create the following masks:
+ This would create the following masks:
+
+ apmask::
- apmask:
0xffff000000000000000000000000000000000000000000000000000000000000
- aqmask:
+ aqmask::
+
0x4000000000000000000000000000000000000000000000000000000000000000
- Resulting in these two pools:
+ Resulting in these two pools::
default drivers pool: adapter 0-15, domain 1
alternate drivers pool: adapter 16-255, domains 0, 2-255
-Configuring an AP matrix for a linux guest.
-------------------------------------------
+Configuring an AP matrix for a linux guest
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
The sysfs interfaces for configuring an AP matrix for a guest are built on the
VFIO mediated device framework. To configure an AP matrix for a guest, a
-mediated matrix device must first be created for the /sys/devices/vfio_ap/matrix
+mediated matrix device must first be created for the ``/sys/devices/vfio_ap/matrix``
device. When the vfio_ap device driver is loaded, it registers with the VFIO
mediated device framework. When the driver registers, the sysfs interfaces for
-creating mediated matrix devices is created:
+creating mediated matrix devices is created::
-/sys/devices
-... [vfio_ap]
-......[matrix]
-......... [mdev_supported_types]
-............ [vfio_ap-passthrough]
-............... create
-............... [devices]
+ /sys/devices
+ ... [vfio_ap]
+ ......[matrix]
+ ......... [mdev_supported_types]
+ ............ [vfio_ap-passthrough]
+ ............... create
+ ............... [devices]
A mediated AP matrix device is created by writing a UUID to the attribute file
-named 'create', for example:
+named ``create``, for example::
uuidgen > create
- or
+or
+
+::
echo $uuid > create
When a mediated AP matrix device is created, a sysfs directory named after
-the UUID is created in the 'devices' subdirectory:
+the UUID is created in the ``devices`` subdirectory::
-/sys/devices
-... [vfio_ap]
-......[matrix]
-......... [mdev_supported_types]
-............ [vfio_ap-passthrough]
-............... create
-............... [devices]
-.................. [$uuid]
+ /sys/devices
+ ... [vfio_ap]
+ ......[matrix]
+ ......... [mdev_supported_types]
+ ............ [vfio_ap-passthrough]
+ ............... create
+ ............... [devices]
+ .................. [$uuid]
There will also be three sets of attribute files created in the mediated
matrix device's sysfs directory to configure an AP matrix for the
-KVM guest:
-
-/sys/devices
-... [vfio_ap]
-......[matrix]
-......... [mdev_supported_types]
-............ [vfio_ap-passthrough]
-............... create
-............... [devices]
-.................. [$uuid]
-..................... assign_adapter
-..................... assign_control_domain
-..................... assign_domain
-..................... matrix
-..................... unassign_adapter
-..................... unassign_control_domain
-..................... unassign_domain
-
-assign_adapter
+KVM guest::
+
+ /sys/devices
+ ... [vfio_ap]
+ ......[matrix]
+ ......... [mdev_supported_types]
+ ............ [vfio_ap-passthrough]
+ ............... create
+ ............... [devices]
+ .................. [$uuid]
+ ..................... assign_adapter
+ ..................... assign_control_domain
+ ..................... assign_domain
+ ..................... matrix
+ ..................... unassign_adapter
+ ..................... unassign_control_domain
+ ..................... unassign_domain
+
+``assign_adapter``
To assign an AP adapter to the mediated matrix device, its APID is written
- to the 'assign_adapter' file. This may be done multiple times to assign more
+ to the ``assign_adapter`` file. This may be done multiple times to assign more
than one adapter. The APID may be specified using conventional semantics
as a decimal, hexadecimal, or octal number. For example, to assign adapters
4, 5 and 16 to a mediated matrix device in decimal, hexadecimal and octal
- respectively:
+ respectively::
echo 4 > assign_adapter
echo 0x5 > assign_adapter
@@ -373,22 +401,22 @@ assign_adapter
APQNs are bound to the driver, the operation will terminate with an
error (EADDRNOTAVAIL).
- No APQN that can be derived from the adapter ID and the IDs of the
+ * No APQN that can be derived from the adapter ID and the IDs of the
previously assigned domains can be assigned to another mediated matrix
device. If an APQN is assigned to another mediated matrix device, the
operation will terminate with an error (EADDRINUSE).
-unassign_adapter
- To unassign an AP adapter, its APID is written to the 'unassign_adapter'
+``unassign_adapter``
+ To unassign an AP adapter, its APID is written to the ``unassign_adapter``
file. This may also be done multiple times to unassign more than one adapter.
-assign_domain
+``assign_domain``
To assign a usage domain, the domain number is written into the
- 'assign_domain' file. This may be done multiple times to assign more than one
+ ``assign_domain`` file. This may be done multiple times to assign more than one
usage domain. The domain number is specified using conventional semantics as
a decimal, hexadecimal, or octal number. For example, to assign usage domains
4, 8, and 71 to a mediated matrix device in decimal, hexadecimal and octal
- respectively:
+ respectively::
echo 4 > assign_domain
echo 0x8 > assign_domain
@@ -408,23 +436,23 @@ assign_domain
APQNs are bound to the driver, the operation will terminate with an
error (EADDRNOTAVAIL).
- No APQN that can be derived from the domain ID being assigned and the IDs
+ * No APQN that can be derived from the domain ID being assigned and the IDs
of the previously assigned adapters can be assigned to another mediated
matrix device. If an APQN is assigned to another mediated matrix device,
the operation will terminate with an error (EADDRINUSE).
-unassign_domain
+``unassign_domain``
To unassign a usage domain, the domain number is written into the
- 'unassign_domain' file. This may be done multiple times to unassign more than
+ ``unassign_domain`` file. This may be done multiple times to unassign more than
one usage domain.
-assign_control_domain
+``assign_control_domain``
To assign a control domain, the domain number is written into the
- 'assign_control_domain' file. This may be done multiple times to
+ ``assign_control_domain`` file. This may be done multiple times to
assign more than one control domain. The domain number may be specified using
conventional semantics as a decimal, hexadecimal, or octal number. For
example, to assign control domains 4, 8, and 71 to a mediated matrix device
- in decimal, hexadecimal and octal respectively:
+ in decimal, hexadecimal and octal respectively::
echo 4 > assign_domain
echo 0x8 > assign_domain
@@ -435,33 +463,34 @@ assign_control_domain
allowed by the machine model. If a control domain number higher than the
maximum is specified, the operation will terminate with an error (ENODEV).
-unassign_control_domain
+``unassign_control_domain``
To unassign a control domain, the domain number is written into the
- 'unassign_domain' file. This may be done multiple times to unassign more than
+ ``unassign_domain`` file. This may be done multiple times to unassign more than
one control domain.
Notes: No changes to the AP matrix will be allowed while a guest using
the mediated matrix device is running. Attempts to assign an adapter,
domain or control domain will be rejected and an error (EBUSY) returned.
-Starting a Linux Guest Configured with an AP Matrix:
-===================================================
+Starting a Linux Guest Configured with an AP Matrix
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
To provide a mediated matrix device for use by a guest, the following option
-must be specified on the QEMU command line:
+must be specified on the QEMU command line::
-device vfio_ap,sysfsdev=$path-to-mdev
The sysfsdev parameter specifies the path to the mediated matrix device.
-There are a number of ways to specify this path:
+There are a number of ways to specify this path::
-/sys/devices/vfio_ap/matrix/$uuid
-/sys/bus/mdev/devices/$uuid
-/sys/bus/mdev/drivers/vfio_mdev/$uuid
-/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
+ /sys/devices/vfio_ap/matrix/$uuid
+ /sys/bus/mdev/devices/$uuid
+ /sys/bus/mdev/drivers/vfio_mdev/$uuid
+ /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
When the linux guest is started, the guest will open the mediated
matrix device's file descriptor to get information about the mediated matrix
-device. The vfio_ap device driver will update the APM, AQM, and ADM fields in
+device. The ``vfio_ap`` device driver will update the APM, AQM, and ADM fields in
the guest's CRYCB with the adapter, usage domain and control domains assigned
via the mediated matrix device's sysfs attribute files. Programs running on the
linux guest will then:
@@ -486,20 +515,22 @@ facilities:
The AP facilities feature indicates that AP facilities are installed on the
guest. This feature will be exposed for use only if the AP facilities
are installed on the host system. The feature is s390-specific and is
- represented as a parameter of the -cpu option on the QEMU command line:
+ represented as a parameter of the -cpu option on the QEMU command line::
qemu-system-s390x -cpu $model,ap=on|off
- Where:
+ Where:
- $model is the CPU model defined for the guest (defaults to the model of
- the host system if not specified).
+ ``$model``
+ is the CPU model defined for the guest (defaults to the model of
+ the host system if not specified).
- ap=on|off indicates whether AP facilities are installed (on) or not
- (off). The default for CPU models zEC12 or newer
- is ap=on. AP facilities must be installed on the guest if a
- vfio-ap device (-device vfio-ap,sysfsdev=$path) is configured
- for the guest, or the guest will fail to start.
+ ``ap=on|off``
+ indicates whether AP facilities are installed (on) or not
+ (off). The default for CPU models zEC12 or newer
+ is ``ap=on``. AP facilities must be installed on the guest if a
+ vfio-ap device (``-device vfio-ap,sysfsdev=$path``) is configured
+ for the guest, or the guest will fail to start.
2. Query Configuration Information (QCI) facility
@@ -507,27 +538,29 @@ facilities:
configuration of the AP facilities. This facility will be available
only if the QCI facility is installed on the host system. The feature is
s390-specific and is represented as a parameter of the -cpu option on the
- QEMU command line:
+ QEMU command line::
qemu-system-s390x -cpu $model,apqci=on|off
- Where:
+ Where:
- $model is the CPU model defined for the guest
+ ``$model``
+ is the CPU model defined for the guest
- apqci=on|off indicates whether the QCI facility is installed (on) or
- not (off). The default for CPU models zEC12 or newer
- is apqci=on; for older models, QCI will not be installed.
+ ``apqci=on|off``
+ indicates whether the QCI facility is installed (on) or
+ not (off). The default for CPU models zEC12 or newer
+ is ``apqci=on``; for older models, QCI will not be installed.
- If QCI is installed (apqci=on) but AP facilities are not
- (ap=off), an error message will be logged, but the guest
- will be allowed to start. It makes no sense to have QCI
- installed if the AP facilities are not; this is considered
- an invalid configuration.
+ If QCI is installed (``apqci=on``) but AP facilities are not
+ (``ap=off``), an error message will be logged, but the guest
+ will be allowed to start. It makes no sense to have QCI
+ installed if the AP facilities are not; this is considered
+ an invalid configuration.
- If the QCI facility is not installed, APQNs with an APQI
- greater than 15 will not be detected by the AP bus
- running on the guest.
+ If the QCI facility is not installed, APQNs with an APQI
+ greater than 15 will not be detected by the AP bus
+ running on the guest.
3. Adjunct Process Facility Test (APFT) facility
@@ -535,48 +568,51 @@ facilities:
AP facilities available for a given AP queue. This facility will be available
only if the APFT facility is installed on the host system. The feature is
s390-specific and is represented as a parameter of the -cpu option on the
- QEMU command line:
+ QEMU command line::
qemu-system-s390x -cpu $model,apft=on|off
- Where:
+ Where:
+
+ ``$model``
+ is the CPU model defined for the guest (defaults to the model of
+ the host system if not specified).
- $model is the CPU model defined for the guest (defaults to the model of
- the host system if not specified).
+ ``apft=on|off``
+ indicates whether the APFT facility is installed (on) or
+ not (off). The default for CPU models zEC12 and
+ newer is ``apft=on`` for older models, APFT will not be
+ installed.
- apft=on|off indicates whether the APFT facility is installed (on) or
- not (off). The default for CPU models zEC12 and
- newer is apft=on for older models, APFT will not be
- installed.
+ If APFT is installed (``apft=on``) but AP facilities are not
+ (``ap=off``), an error message will be logged, but the guest
+ will be allowed to start. It makes no sense to have APFT
+ installed if the AP facilities are not; this is considered
+ an invalid configuration.
- If APFT is installed (apft=on) but AP facilities are not
- (ap=off), an error message will be logged, but the guest
- will be allowed to start. It makes no sense to have APFT
- installed if the AP facilities are not; this is considered
- an invalid configuration.
+ It also makes no sense to turn APFT off because the AP bus
+ running on the guest will not detect CEX4 and newer devices
+ without it. Since only CEX4 and newer devices are supported
+ for guest usage, no AP devices can be made accessible to a
+ guest started without APFT installed.
- It also makes no sense to turn APFT off because the AP bus
- running on the guest will not detect CEX4 and newer devices
- without it. Since only CEX4 and newer devices are supported
- for guest usage, no AP devices can be made accessible to a
- guest started without APFT installed.
+Hot plug a vfio-ap device into a running guest
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Hot plug a vfio-ap device into a running guest:
-==============================================
Only one vfio-ap device can be attached to the virtual machine's ap-bus, so a
vfio-ap device can be hot plugged if and only if no vfio-ap device is attached
to the bus already, whether via the QEMU command line or a prior hot plug
action.
-To hot plug a vfio-ap device, use the QEMU device_add command:
+To hot plug a vfio-ap device, use the QEMU ``device_add`` command::
(qemu) device_add vfio-ap,sysfsdev="$path-to-mdev"
- Where the '$path-to-mdev' value specifies the absolute path to a mediated
- device to which AP resources to be used by the guest have been assigned.
+Where the ``$path-to-mdev`` value specifies the absolute path to a mediated
+device to which AP resources to be used by the guest have been assigned.
Note that on Linux guests, the AP devices will be created in the
-/sys/bus/ap/devices directory when the AP bus subsequently performs its periodic
+``/sys/bus/ap/devices`` directory when the AP bus subsequently performs its periodic
scan, so there may be a short delay before the AP devices are accessible on the
guest.
@@ -587,66 +623,69 @@ The command will fail if:
* The CPU model features for controlling guest access to AP facilities are not
enabled (see 'CPU model features' subsection in the previous section).
-Hot unplug a vfio-ap device from a running guest:
-================================================
+Hot unplug a vfio-ap device from a running guest
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
A vfio-ap device can be unplugged from a running KVM guest if a vfio-ap device
has been attached to the virtual machine's ap-bus via the QEMU command line
or a prior hot plug action.
-To hot unplug a vfio-ap device, use the QEMU device_del command:
+To hot unplug a vfio-ap device, use the QEMU ``device_del`` command::
(qemu) device_del vfio-ap,sysfsdev="$path-to-mdev"
- Where $path-to-mdev is the same as the path specified when the vfio-ap
- device was attached to the virtual machine's ap-bus.
+Where ``$path-to-mdev`` is the same as the path specified when the vfio-ap
+device was attached to the virtual machine's ap-bus.
-On a Linux guest, the AP devices will be removed from the /sys/bus/ap/devices
+On a Linux guest, the AP devices will be removed from the ``/sys/bus/ap/devices``
directory on the guest when the AP bus subsequently performs its periodic scan,
so there may be a short delay before the AP devices are no longer accessible by
the guest.
-The command will fail if the $path-to-mdev specified on the device_del command
+The command will fail if the ``$path-to-mdev`` specified on the ``device_del`` command
does not match the value specified when the vfio-ap device was attached to
the virtual machine's ap-bus.
-Example: Configure AP Matrixes for Three Linux Guests:
-=====================================================
+Example: Configure AP Matrices for Three Linux Guests
+-----------------------------------------------------
+
Let's now provide an example to illustrate how KVM guests may be given
access to AP facilities. For this example, we will show how to configure
three guests such that executing the lszcrypt command on the guests would
look like this:
-Guest1
-------
-CARD.DOMAIN TYPE MODE
-------------------------------
-05 CEX5C CCA-Coproc
-05.0004 CEX5C CCA-Coproc
-05.00ab CEX5C CCA-Coproc
-06 CEX5A Accelerator
-06.0004 CEX5A Accelerator
-06.00ab CEX5C CCA-Coproc
-
-Guest2
-------
-CARD.DOMAIN TYPE MODE
-------------------------------
-05 CEX5A Accelerator
-05.0047 CEX5A Accelerator
-05.00ff CEX5A Accelerator (5,4), (5,171), (6,4), (6,171),
-
-Guest3
-------
-CARD.DOMAIN TYPE MODE
-------------------------------
-06 CEX5A Accelerator
-06.0047 CEX5A Accelerator
-06.00ff CEX5A Accelerator
+Guest1::
+
+ CARD.DOMAIN TYPE MODE
+ ------------------------------
+ 05 CEX5C CCA-Coproc
+ 05.0004 CEX5C CCA-Coproc
+ 05.00ab CEX5C CCA-Coproc
+ 06 CEX5A Accelerator
+ 06.0004 CEX5A Accelerator
+ 06.00ab CEX5C CCA-Coproc
+
+Guest2::
+
+ CARD.DOMAIN TYPE MODE
+ ------------------------------
+ 05 CEX5A Accelerator
+ 05.0047 CEX5A Accelerator
+ 05.00ff CEX5A Accelerator
+
+Guest3::
+
+ CARD.DOMAIN TYPE MODE
+ ------------------------------
+ 06 CEX5A Accelerator
+ 06.0047 CEX5A Accelerator
+ 06.00ff CEX5A Accelerator
These are the steps:
1. Install the vfio_ap module on the linux host. The dependency chain for the
vfio_ap module is:
+
* iommu
* s390
* zcrypt
@@ -657,6 +696,7 @@ These are the steps:
To build the vfio_ap module, the kernel build must be configured with the
following Kconfig elements selected:
+
* IOMMU_SUPPORT
* S390
* ZCRYPT
@@ -666,21 +706,21 @@ These are the steps:
* VFIO_MDEV_DEVICE
* KVM
- If using make menuconfig select the following to build the vfio_ap module:
- -> Device Drivers
- -> IOMMU Hardware Support
- select S390 AP IOMMU Support
- -> VFIO Non-Privileged userspace driver framework
- -> Mediated device driver framework
- -> VFIO driver for Mediated devices
- -> I/O subsystem
- -> VFIO support for AP devices
+ If using make menuconfig select the following to build the vfio_ap module::
+ -> Device Drivers
+ -> IOMMU Hardware Support
+ select S390 AP IOMMU Support
+ -> VFIO Non-Privileged userspace driver framework
+ -> Mediated device driver framework
+ -> VFIO driver for Mediated devices
+ -> I/O subsystem
+ -> VFIO support for AP devices
2. Secure the AP queues to be used by the three guests so that the host can not
access them. To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff,
06.0004, 06.0047, 06.00ab, and 06.00ff for use by the vfio_ap device driver,
the corresponding APQNs must be removed from the default queue drivers pool
- as follows:
+ as follows::
echo -5,-6 > /sys/bus/ap/apmask
@@ -689,19 +729,19 @@ These are the steps:
This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
sysfs directory for the vfio_ap device driver will now contain symbolic links
- to the AP queue devices bound to it:
-
- /sys/bus/ap
- ... [drivers]
- ...... [vfio_ap]
- ......... [05.0004]
- ......... [05.0047]
- ......... [05.00ab]
- ......... [05.00ff]
- ......... [06.0004]
- ......... [06.0047]
- ......... [06.00ab]
- ......... [06.00ff]
+ to the AP queue devices bound to it::
+
+ /sys/bus/ap
+ ... [drivers]
+ ...... [vfio_ap]
+ ......... [05.0004]
+ ......... [05.0047]
+ ......... [05.00ab]
+ ......... [05.00ff]
+ ......... [06.0004]
+ ......... [06.0047]
+ ......... [06.00ab]
+ ......... [06.00ff]
Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
can be bound to the vfio_ap device driver. The reason for this is to
@@ -712,153 +752,153 @@ These are the steps:
The administrator, therefore, must take care to secure only AP queues that
can be bound to the vfio_ap device driver. The device type for a given AP
queue device can be read from the parent card's sysfs directory. For example,
- to see the hardware type of the queue 05.0004:
+ to see the hardware type of the queue 05.0004::
- cat /sys/bus/ap/devices/card05/hwtype
+ cat /sys/bus/ap/devices/card05/hwtype
The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
vfio_ap device driver.
3. Create the mediated devices needed to configure the AP matrixes for the
three guests and to provide an interface to the vfio_ap driver for
- use by the guests:
+ use by the guests::
- /sys/devices/vfio_ap/matrix/
- --- [mdev_supported_types]
- ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
- --------- create
- --------- [devices]
+ /sys/devices/vfio_ap/matrix/
+ ... [mdev_supported_types]
+ ...... [vfio_ap-passthrough] (passthrough mediated matrix device type)
+ ......... create
+ ......... [devices]
- To create the mediated devices for the three guests:
+ To create the mediated devices for the three guests::
uuidgen > create
uuidgen > create
uuidgen > create
- or
+ or
- echo $uuid1 > create
- echo $uuid2 > create
- echo $uuid3 > create
+ ::
+
+ echo $uuid1 > create
+ echo $uuid2 > create
+ echo $uuid3 > create
This will create three mediated devices in the [devices] subdirectory named
after the UUID used to create the mediated device. We'll call them $uuid1,
- $uuid2 and $uuid3 and this is the sysfs directory structure after creation:
-
- /sys/devices/vfio_ap/matrix/
- --- [mdev_supported_types]
- ------ [vfio_ap-passthrough]
- --------- [devices]
- ------------ [$uuid1]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- --------------- unassign_control_domain
- --------------- unassign_domain
-
- ------------ [$uuid2]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- ----------------unassign_control_domain
- ----------------unassign_domain
-
- ------------ [$uuid3]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- ----------------unassign_control_domain
- ----------------unassign_domain
+ $uuid2 and $uuid3 and this is the sysfs directory structure after creation::
+
+ /sys/devices/vfio_ap/matrix/
+ ... [mdev_supported_types]
+ ...... [vfio_ap-passthrough]
+ ......... [devices]
+ ............ [$uuid1]
+ ............... assign_adapter
+ ............... assign_control_domain
+ ............... assign_domain
+ ............... matrix
+ ............... unassign_adapter
+ ............... unassign_control_domain
+ ............... unassign_domain
+
+ ............ [$uuid2]
+ ............... assign_adapter
+ ............... assign_control_domain
+ ............... assign_domain
+ ............... matrix
+ ............... unassign_adapter
+ ............... unassign_control_domain
+ ............... unassign_domain
+
+ ............ [$uuid3]
+ ............... assign_adapter
+ ............... assign_control_domain
+ ............... assign_domain
+ ............... matrix
+ ............... unassign_adapter
+ ............... unassign_control_domain
+ ............... unassign_domain
4. The administrator now needs to configure the matrixes for the mediated
devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
- This is how the matrix is configured for Guest1:
+ This is how the matrix is configured for Guest1::
echo 5 > assign_adapter
echo 6 > assign_adapter
echo 4 > assign_domain
echo 0xab > assign_domain
- Control domains can similarly be assigned using the assign_control_domain
- sysfs file.
+ Control domains can similarly be assigned using the assign_control_domain
+ sysfs file.
- If a mistake is made configuring an adapter, domain or control domain,
- you can use the unassign_xxx interfaces to unassign the adapter, domain or
- control domain.
+ If a mistake is made configuring an adapter, domain or control domain,
+ you can use the ``unassign_xxx`` interfaces to unassign the adapter, domain or
+ control domain.
- To display the matrix configuration for Guest1:
+ To display the matrix configuration for Guest1::
cat matrix
- The output will display the APQNs in the format xx.yyyy, where xx is
- the adapter number and yyyy is the domain number. The output for Guest1
- will look like this:
+ The output will display the APQNs in the format ``xx.yyyy``, where xx is
+ the adapter number and yyyy is the domain number. The output for Guest1
+ will look like this::
05.0004
05.00ab
06.0004
06.00ab
- This is how the matrix is configured for Guest2:
+ This is how the matrix is configured for Guest2::
echo 5 > assign_adapter
echo 0x47 > assign_domain
echo 0xff > assign_domain
- This is how the matrix is configured for Guest3:
+ This is how the matrix is configured for Guest3::
echo 6 > assign_adapter
echo 0x47 > assign_domain
echo 0xff > assign_domain
-5. Start Guest1:
+5. Start Guest1::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
-7. Start Guest2:
+7. Start Guest2::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
-7. Start Guest3:
+7. Start Guest3::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
When the guest is shut down, the mediated matrix devices may be removed.
-Using our example again, to remove the mediated matrix device $uuid1:
+Using our example again, to remove the mediated matrix device $uuid1::
/sys/devices/vfio_ap/matrix/
- --- [mdev_supported_types]
- ------ [vfio_ap-passthrough]
- --------- [devices]
- ------------ [$uuid1]
- --------------- remove
+ ... [mdev_supported_types]
+ ...... [vfio_ap-passthrough]
+ ......... [devices]
+ ............ [$uuid1]
+ ............... remove
echo 1 > remove
- This will remove all of the mdev matrix device's sysfs structures including
- the mdev device itself. To recreate and reconfigure the mdev matrix device,
- all of the steps starting with step 3 will have to be performed again. Note
- that the remove will fail if a guest using the mdev is still running.
+This will remove all of the mdev matrix device's sysfs structures including
+the mdev device itself. To recreate and reconfigure the mdev matrix device,
+all of the steps starting with step 3 will have to be performed again. Note
+that the remove will fail if a guest using the mdev is still running.
- It is not necessary to remove an mdev matrix device, but one may want to
- remove it if no guest will use it during the remaining lifetime of the linux
- host. If the mdev matrix device is removed, one may want to also reconfigure
- the pool of adapters and queues reserved for use by the default drivers.
+It is not necessary to remove an mdev matrix device, but one may want to
+remove it if no guest will use it during the remaining lifetime of the linux
+host. If the mdev matrix device is removed, one may want to also reconfigure
+the pool of adapters and queues reserved for use by the default drivers.
Limitations
-===========
+-----------
+
* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
to the default drivers pool of a queue that is still assigned to a mediated
device in use by a guest. It is incumbent upon the administrator to
@@ -867,10 +907,10 @@ Limitations
device, such as a private key configured specifically for the guest.
* Dynamically assigning AP resources to or unassigning AP resources from a
- mediated matrix device - see 'Configuring an AP matrix for a linux guest'
+ mediated matrix device - see `Configuring an AP matrix for a linux guest`_
section above - while a running guest is using it is currently not supported.
* Live guest migration is not supported for guests using AP devices. If a guest
is using AP devices, the vfio-ap device configured for the guest must be
- unplugged before migrating the guest (see 'Hot unplug a vfio-ap device from a
- running guest' section above.
+ unplugged before migrating the guest (see `Hot unplug a vfio-ap device from a
+ running guest`_ section above.)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e591a126e7..a8191a3e75 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -32,6 +32,7 @@
#include "qemu-common.h"
#include "qemu/units.h"
#include "qemu/option.h"
+#include "monitor/qdev.h"
#include "qapi/error.h"
#include "hw/sysbus.h"
#include "hw/boards.h"
@@ -54,6 +55,7 @@
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "hw/pci-host/gpex.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/arm/sysbus-fdt.h"
#include "hw/platform-bus.h"
#include "hw/qdev-properties.h"
@@ -71,6 +73,7 @@
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
#include "hw/acpi/generic_event_device.h"
+#include "hw/virtio/virtio-iommu.h"
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -1180,6 +1183,30 @@ static void create_smmu(const VirtMachineState *vms,
g_free(node);
}
+static void create_virtio_iommu_dt_bindings(VirtMachineState *vms, Error **errp)
+{
+ const char compat[] = "virtio,pci-iommu";
+ uint16_t bdf = vms->virtio_iommu_bdf;
+ char *node;
+
+ vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+
+ node = g_strdup_printf("%s/virtio_iommu@%d", vms->pciehb_nodename, bdf);
+ qemu_fdt_add_subnode(vms->fdt, node);
+ qemu_fdt_setprop(vms->fdt, node, "compatible", compat, sizeof(compat));
+ qemu_fdt_setprop_sized_cells(vms->fdt, node, "reg",
+ 1, bdf << 8, 1, 0, 1, 0,
+ 1, 0, 1, 0);
+
+ qemu_fdt_setprop_cell(vms->fdt, node, "#iommu-cells", 1);
+ qemu_fdt_setprop_cell(vms->fdt, node, "phandle", vms->iommu_phandle);
+ g_free(node);
+
+ qemu_fdt_setprop_cells(vms->fdt, vms->pciehb_nodename, "iommu-map",
+ 0x0, vms->iommu_phandle, 0x0, bdf,
+ bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
+}
+
static void create_pcie(VirtMachineState *vms)
{
hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
@@ -1258,7 +1285,7 @@ static void create_pcie(VirtMachineState *vms)
}
}
- nodename = g_strdup_printf("/pcie@%" PRIx64, base);
+ nodename = vms->pciehb_nodename = g_strdup_printf("/pcie@%" PRIx64, base);
qemu_fdt_add_subnode(vms->fdt, nodename);
qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "pci-host-ecam-generic");
@@ -1301,13 +1328,16 @@ static void create_pcie(VirtMachineState *vms)
if (vms->iommu) {
vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt);
- create_smmu(vms, pci->bus);
-
- qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map",
- 0x0, vms->iommu_phandle, 0x0, 0x10000);
+ switch (vms->iommu) {
+ case VIRT_IOMMU_SMMUV3:
+ create_smmu(vms, pci->bus);
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map",
+ 0x0, vms->iommu_phandle, 0x0, 0x10000);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
-
- g_free(nodename);
}
static void create_platform_bus(VirtMachineState *vms)
@@ -1974,6 +2004,13 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
virt_memory_plug(hotplug_dev, dev, errp);
}
+ if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+ PCIDevice *pdev = PCI_DEVICE(dev);
+
+ vms->iommu = VIRT_IOMMU_VIRTIO;
+ vms->virtio_iommu_bdf = pci_get_bdf(pdev);
+ create_virtio_iommu_dt_bindings(vms, errp);
+ }
}
static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
@@ -1990,7 +2027,13 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
(object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) {
return HOTPLUG_HANDLER(machine);
}
+ if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+ VirtMachineState *vms = VIRT_MACHINE(machine);
+ if (!vms->bootinfo.firmware_loaded || !acpi_enabled) {
+ return HOTPLUG_HANDLER(machine);
+ }
+ }
return NULL;
}
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index d8c459c575..12925a47ec 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -306,7 +306,7 @@ static int vhost_user_blk_connect(DeviceState *dev)
s->connected = true;
s->dev.nvqs = s->num_queues;
- s->dev.vqs = s->vqs;
+ s->dev.vqs = s->vhost_vqs;
s->dev.vq_index = 0;
s->dev.backend_features = 0;
@@ -420,13 +420,14 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
sizeof(struct virtio_blk_config));
+ s->virtqs = g_new(VirtQueue *, s->num_queues);
for (i = 0; i < s->num_queues; i++) {
- virtio_add_queue(vdev, s->queue_size,
- vhost_user_blk_handle_output);
+ s->virtqs[i] = virtio_add_queue(vdev, s->queue_size,
+ vhost_user_blk_handle_output);
}
s->inflight = g_new0(struct vhost_inflight, 1);
- s->vqs = g_new0(struct vhost_virtqueue, s->num_queues);
+ s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
s->watch = 0;
s->connected = false;
@@ -458,8 +459,12 @@ reconnect:
return;
virtio_err:
- g_free(s->vqs);
+ g_free(s->vhost_vqs);
g_free(s->inflight);
+ for (i = 0; i < s->num_queues; i++) {
+ virtio_delete_queue(s->virtqs[i]);
+ }
+ g_free(s->virtqs);
virtio_cleanup(vdev);
vhost_user_cleanup(&s->vhost_user);
}
@@ -468,14 +473,20 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(dev);
+ int i;
virtio_set_status(vdev, 0);
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL,
NULL, NULL, NULL, false);
vhost_dev_cleanup(&s->dev);
vhost_dev_free_inflight(s->inflight);
- g_free(s->vqs);
+ g_free(s->vhost_vqs);
g_free(s->inflight);
+
+ for (i = 0; i < s->num_queues; i++) {
+ virtio_delete_queue(s->virtqs[i]);
+ }
+ g_free(s->virtqs);
virtio_cleanup(vdev);
vhost_user_cleanup(&s->vhost_user);
}
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 0817874b48..9c1ecd423c 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -179,7 +179,7 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp)
/* if not Linux load the address of the (short) IPL PSW */
ipl_psw = rom_ptr(4, 4);
if (ipl_psw) {
- pentry = be32_to_cpu(*ipl_psw) & 0x7fffffffUL;
+ pentry = be32_to_cpu(*ipl_psw) & PSW_MASK_SHORT_ADDR;
} else {
error_setg(&err, "Could not get IPL PSW");
goto error;
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index f87def27a6..d29525b36f 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -9,6 +9,11 @@ config VIRTIO_RNG
default y
depends on VIRTIO
+config VIRTIO_IOMMU
+ bool
+ default y
+ depends on VIRTIO
+
config VIRTIO_PCI
bool
default y if PCI_DEVICES
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index de0f5fc39b..4e4d39a0a4 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -16,6 +16,7 @@ obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-p
obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o
common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o
obj-$(call land,$(CONFIG_VHOST_USER_FS),$(CONFIG_VIRTIO_PCI)) += vhost-user-fs-pci.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o
ifeq ($(CONFIG_VIRTIO_PCI),y)
@@ -28,6 +29,7 @@ obj-$(CONFIG_VIRTIO_INPUT_HOST) += virtio-input-host-pci.o
obj-$(CONFIG_VIRTIO_INPUT) += virtio-input-pci.o
obj-$(CONFIG_VIRTIO_RNG) += virtio-rng-pci.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio-balloon-pci.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu-pci.o
obj-$(CONFIG_VIRTIO_9P) += virtio-9p-pci.o
obj-$(CONFIG_VIRTIO_SCSI) += virtio-scsi-pci.o
obj-$(CONFIG_VIRTIO_BLK) += virtio-blk-pci.o
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index e28ba48da6..e83500bee9 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -53,3 +53,23 @@ virtio_mmio_write_offset(uint64_t offset, uint64_t value) "virtio_mmio_write off
virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 " shift %d"
virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d"
virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
+
+# hw/virtio/virtio-iommu.c
+virtio_iommu_device_reset(void) "reset!"
+virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
+virtio_iommu_device_status(uint8_t status) "driver status = %d"
+virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_range=%d probe_size=0x%x"
+virtio_iommu_set_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_bits=%d probe_size=0x%x"
+virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
+virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
+virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
+virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
+virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
+virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
+virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s"
+virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
+virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
+virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
+virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
+virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
+virtio_iommu_report_fault(uint8_t reason, uint32_t flags, uint32_t endpoint, uint64_t addr) "FAULT reason=%d flags=%d endpoint=%d address =0x%"PRIx64
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 33b17848c2..6136768875 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -209,11 +209,12 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
sizeof(struct virtio_fs_config));
/* Hiprio queue */
- virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
+ fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
/* Request queues */
+ fs->req_vqs = g_new(VirtQueue *, fs->conf.num_request_queues);
for (i = 0; i < fs->conf.num_request_queues; i++) {
- virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
+ fs->req_vqs[i] = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
}
/* 1 high prio queue, plus the number configured */
@@ -230,6 +231,11 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
err_virtio:
vhost_user_cleanup(&fs->vhost_user);
+ virtio_delete_queue(fs->hiprio_vq);
+ for (i = 0; i < fs->conf.num_request_queues; i++) {
+ virtio_delete_queue(fs->req_vqs[i]);
+ }
+ g_free(fs->req_vqs);
virtio_cleanup(vdev);
g_free(fs->vhost_dev.vqs);
return;
@@ -239,6 +245,7 @@ static void vuf_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserFS *fs = VHOST_USER_FS(dev);
+ int i;
/* This will stop vhost backend if appropriate. */
vuf_set_status(vdev, 0);
@@ -247,6 +254,11 @@ static void vuf_device_unrealize(DeviceState *dev, Error **errp)
vhost_user_cleanup(&fs->vhost_user);
+ virtio_delete_queue(fs->hiprio_vq);
+ for (i = 0; i < fs->conf.num_request_queues; i++) {
+ virtio_delete_queue(fs->req_vqs[i]);
+ }
+ g_free(fs->req_vqs);
virtio_cleanup(vdev);
g_free(fs->vhost_dev.vqs);
fs->vhost_dev.vqs = NULL;
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 2e81f5514f..08e7e63790 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -443,6 +443,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
&offset);
fd = memory_region_get_fd(mr);
if (fd > 0) {
+ assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
reg->memory_size,
reg->guest_phys_addr,
@@ -455,7 +456,6 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
msg.payload.memory.regions[fd_num].guest_phys_addr =
reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
- assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
} else {
u->region_rb_offset[i] = 0;
@@ -1458,9 +1458,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
"VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
}
- err = vhost_setup_slave_channel(dev);
- if (err < 0) {
- return err;
+ if (dev->vq_index == 0) {
+ err = vhost_setup_slave_channel(dev);
+ if (err < 0) {
+ return err;
+ }
}
u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 7351ab0a19..4c65114de5 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -831,12 +831,13 @@ static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp)
max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1;
for (i = 0; i < max_queues; i++) {
- virtio_del_queue(vdev, i);
+ virtio_delete_queue(vcrypto->vqs[i].dataq);
q = &vcrypto->vqs[i];
qemu_bh_delete(q->dataq_bh);
}
g_free(vcrypto->vqs);
+ virtio_delete_queue(vcrypto->ctrl_vq);
virtio_cleanup(vdev);
cryptodev_backend_set_used(vcrypto->cryptodev, false);
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
new file mode 100644
index 0000000000..3dfbf55b47
--- /dev/null
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -0,0 +1,104 @@
+/*
+ * Virtio IOMMU PCI Bindings
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ * Written by Eric Auger
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "virtio-pci.h"
+#include "hw/virtio/virtio-iommu.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+
+typedef struct VirtIOIOMMUPCI VirtIOIOMMUPCI;
+
+/*
+ * virtio-iommu-pci: This extends VirtioPCIProxy.
+ *
+ */
+#define VIRTIO_IOMMU_PCI(obj) \
+ OBJECT_CHECK(VirtIOIOMMUPCI, (obj), TYPE_VIRTIO_IOMMU_PCI)
+
+struct VirtIOIOMMUPCI {
+ VirtIOPCIProxy parent_obj;
+ VirtIOIOMMU vdev;
+};
+
+static Property virtio_iommu_pci_properties[] = {
+ DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+ VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(vpci_dev);
+ DeviceState *vdev = DEVICE(&dev->vdev);
+
+ if (!qdev_get_machine_hotplug_handler(DEVICE(vpci_dev))) {
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+
+ error_setg(errp,
+ "%s machine fails to create iommu-map device tree bindings",
+ mc->name);
+ error_append_hint(errp,
+ "Check you machine implements a hotplug handler "
+ "for the virtio-iommu-pci device\n");
+ error_append_hint(errp, "Check the guest is booted without FW or with "
+ "-no-acpi\n");
+ return;
+ }
+ qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
+ object_property_set_link(OBJECT(dev),
+ OBJECT(pci_get_bus(&vpci_dev->pci_dev)),
+ "primary-bus", errp);
+ object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+}
+
+static void virtio_iommu_pci_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+ PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+ k->realize = virtio_iommu_pci_realize;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ device_class_set_props(dc, virtio_iommu_pci_properties);
+ pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+ pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_IOMMU;
+ pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+ pcidev_k->class_id = PCI_CLASS_OTHERS;
+ dc->hotpluggable = false;
+}
+
+static void virtio_iommu_pci_instance_init(Object *obj)
+{
+ VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(obj);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VIRTIO_IOMMU);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_iommu_pci_info = {
+ .base_name = TYPE_VIRTIO_IOMMU_PCI,
+ .generic_name = "virtio-iommu-pci",
+ .transitional_name = "virtio-iommu-pci-transitional",
+ .non_transitional_name = "virtio-iommu-pci-non-transitional",
+ .instance_size = sizeof(VirtIOIOMMUPCI),
+ .instance_init = virtio_iommu_pci_instance_init,
+ .class_init = virtio_iommu_pci_class_init,
+};
+
+static void virtio_iommu_pci_register(void)
+{
+ virtio_pci_types_register(&virtio_iommu_pci_info);
+}
+
+type_init(virtio_iommu_pci_register)
+
+
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
new file mode 100644
index 0000000000..4cee8083bc
--- /dev/null
+++ b/hw/virtio/virtio-iommu.c
@@ -0,0 +1,890 @@
+/*
+ * virtio-iommu device
+ *
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/iov.h"
+#include "qemu-common.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio.h"
+#include "sysemu/kvm.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+#include "standard-headers/linux/virtio_ids.h"
+
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/virtio-iommu.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci.h"
+
+/* Max size */
+#define VIOMMU_DEFAULT_QUEUE_SIZE 256
+
+typedef struct VirtIOIOMMUDomain {
+ uint32_t id;
+ GTree *mappings;
+ QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
+} VirtIOIOMMUDomain;
+
+typedef struct VirtIOIOMMUEndpoint {
+ uint32_t id;
+ VirtIOIOMMUDomain *domain;
+ QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
+} VirtIOIOMMUEndpoint;
+
+typedef struct VirtIOIOMMUInterval {
+ uint64_t low;
+ uint64_t high;
+} VirtIOIOMMUInterval;
+
+typedef struct VirtIOIOMMUMapping {
+ uint64_t phys_addr;
+ uint32_t flags;
+} VirtIOIOMMUMapping;
+
+static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
+{
+ return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
+}
+
+/**
+ * The bus number is used for lookup when SID based operations occur.
+ * In that case we lazily populate the IOMMUPciBus array from the bus hash
+ * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
+ * numbers may not be always initialized yet.
+ */
+static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
+{
+ IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
+
+ if (!iommu_pci_bus) {
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, s->as_by_busptr);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
+ if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
+ s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
+ return iommu_pci_bus;
+ }
+ }
+ return NULL;
+ }
+ return iommu_pci_bus;
+}
+
+static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
+{
+ uint8_t bus_n, devfn;
+ IOMMUPciBus *iommu_pci_bus;
+ IOMMUDevice *dev;
+
+ bus_n = PCI_BUS_NUM(sid);
+ iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
+ if (iommu_pci_bus) {
+ devfn = sid & PCI_DEVFN_MAX;
+ dev = iommu_pci_bus->pbdev[devfn];
+ if (dev) {
+ return &dev->iommu_mr;
+ }
+ }
+ return NULL;
+}
+
+static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+ VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
+ VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
+
+ if (inta->high < intb->low) {
+ return -1;
+ } else if (intb->high < inta->low) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
+{
+ if (!ep->domain) {
+ return;
+ }
+ QLIST_REMOVE(ep, next);
+ ep->domain = NULL;
+}
+
+static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
+ uint32_t ep_id)
+{
+ VirtIOIOMMUEndpoint *ep;
+
+ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
+ if (ep) {
+ return ep;
+ }
+ if (!virtio_iommu_mr(s, ep_id)) {
+ return NULL;
+ }
+ ep = g_malloc0(sizeof(*ep));
+ ep->id = ep_id;
+ trace_virtio_iommu_get_endpoint(ep_id);
+ g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
+ return ep;
+}
+
+static void virtio_iommu_put_endpoint(gpointer data)
+{
+ VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
+
+ if (ep->domain) {
+ virtio_iommu_detach_endpoint_from_domain(ep);
+ }
+
+ trace_virtio_iommu_put_endpoint(ep->id);
+ g_free(ep);
+}
+
+static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
+ uint32_t domain_id)
+{
+ VirtIOIOMMUDomain *domain;
+
+ domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
+ if (domain) {
+ return domain;
+ }
+ domain = g_malloc0(sizeof(*domain));
+ domain->id = domain_id;
+ domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+ NULL, (GDestroyNotify)g_free,
+ (GDestroyNotify)g_free);
+ g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
+ QLIST_INIT(&domain->endpoint_list);
+ trace_virtio_iommu_get_domain(domain_id);
+ return domain;
+}
+
+static void virtio_iommu_put_domain(gpointer data)
+{
+ VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
+ VirtIOIOMMUEndpoint *iter, *tmp;
+
+ QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
+ virtio_iommu_detach_endpoint_from_domain(iter);
+ }
+ g_tree_destroy(domain->mappings);
+ trace_virtio_iommu_put_domain(domain->id);
+ g_free(domain);
+}
+
+static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
+ int devfn)
+{
+ VirtIOIOMMU *s = opaque;
+ IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+ static uint32_t mr_index;
+ IOMMUDevice *sdev;
+
+ if (!sbus) {
+ sbus = g_malloc0(sizeof(IOMMUPciBus) +
+ sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
+ sbus->bus = bus;
+ g_hash_table_insert(s->as_by_busptr, bus, sbus);
+ }
+
+ sdev = sbus->pbdev[devfn];
+ if (!sdev) {
+ char *name = g_strdup_printf("%s-%d-%d",
+ TYPE_VIRTIO_IOMMU_MEMORY_REGION,
+ mr_index++, devfn);
+ sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice));
+
+ sdev->viommu = s;
+ sdev->bus = bus;
+ sdev->devfn = devfn;
+
+ trace_virtio_iommu_init_iommu_mr(name);
+
+ memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
+ TYPE_VIRTIO_IOMMU_MEMORY_REGION,
+ OBJECT(s), name,
+ UINT64_MAX);
+ address_space_init(&sdev->as,
+ MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU);
+ g_free(name);
+ }
+ return &sdev->as;
+}
+
+static int virtio_iommu_attach(VirtIOIOMMU *s,
+ struct virtio_iommu_req_attach *req)
+{
+ uint32_t domain_id = le32_to_cpu(req->domain);
+ uint32_t ep_id = le32_to_cpu(req->endpoint);
+ VirtIOIOMMUDomain *domain;
+ VirtIOIOMMUEndpoint *ep;
+
+ trace_virtio_iommu_attach(domain_id, ep_id);
+
+ ep = virtio_iommu_get_endpoint(s, ep_id);
+ if (!ep) {
+ return VIRTIO_IOMMU_S_NOENT;
+ }
+
+ if (ep->domain) {
+ VirtIOIOMMUDomain *previous_domain = ep->domain;
+ /*
+ * the device is already attached to a domain,
+ * detach it first
+ */
+ virtio_iommu_detach_endpoint_from_domain(ep);
+ if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
+ g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
+ }
+ }
+
+ domain = virtio_iommu_get_domain(s, domain_id);
+ QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
+
+ ep->domain = domain;
+
+ return VIRTIO_IOMMU_S_OK;
+}
+
+static int virtio_iommu_detach(VirtIOIOMMU *s,
+ struct virtio_iommu_req_detach *req)
+{
+ uint32_t domain_id = le32_to_cpu(req->domain);
+ uint32_t ep_id = le32_to_cpu(req->endpoint);
+ VirtIOIOMMUDomain *domain;
+ VirtIOIOMMUEndpoint *ep;
+
+ trace_virtio_iommu_detach(domain_id, ep_id);
+
+ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
+ if (!ep) {
+ return VIRTIO_IOMMU_S_NOENT;
+ }
+
+ domain = ep->domain;
+
+ if (!domain || domain->id != domain_id) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
+ virtio_iommu_detach_endpoint_from_domain(ep);
+
+ if (QLIST_EMPTY(&domain->endpoint_list)) {
+ g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
+ }
+ return VIRTIO_IOMMU_S_OK;
+}
+
+static int virtio_iommu_map(VirtIOIOMMU *s,
+ struct virtio_iommu_req_map *req)
+{
+ uint32_t domain_id = le32_to_cpu(req->domain);
+ uint64_t phys_start = le64_to_cpu(req->phys_start);
+ uint64_t virt_start = le64_to_cpu(req->virt_start);
+ uint64_t virt_end = le64_to_cpu(req->virt_end);
+ uint32_t flags = le32_to_cpu(req->flags);
+ VirtIOIOMMUDomain *domain;
+ VirtIOIOMMUInterval *interval;
+ VirtIOIOMMUMapping *mapping;
+
+ if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
+ domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
+ if (!domain) {
+ return VIRTIO_IOMMU_S_NOENT;
+ }
+
+ interval = g_malloc0(sizeof(*interval));
+
+ interval->low = virt_start;
+ interval->high = virt_end;
+
+ mapping = g_tree_lookup(domain->mappings, (gpointer)interval);
+ if (mapping) {
+ g_free(interval);
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
+ trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags);
+
+ mapping = g_malloc0(sizeof(*mapping));
+ mapping->phys_addr = phys_start;
+ mapping->flags = flags;
+
+ g_tree_insert(domain->mappings, interval, mapping);
+
+ return VIRTIO_IOMMU_S_OK;
+}
+
+static int virtio_iommu_unmap(VirtIOIOMMU *s,
+ struct virtio_iommu_req_unmap *req)
+{
+ uint32_t domain_id = le32_to_cpu(req->domain);
+ uint64_t virt_start = le64_to_cpu(req->virt_start);
+ uint64_t virt_end = le64_to_cpu(req->virt_end);
+ VirtIOIOMMUMapping *iter_val;
+ VirtIOIOMMUInterval interval, *iter_key;
+ VirtIOIOMMUDomain *domain;
+ int ret = VIRTIO_IOMMU_S_OK;
+
+ trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
+
+ domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
+ if (!domain) {
+ return VIRTIO_IOMMU_S_NOENT;
+ }
+ interval.low = virt_start;
+ interval.high = virt_end;
+
+ while (g_tree_lookup_extended(domain->mappings, &interval,
+ (void **)&iter_key, (void**)&iter_val)) {
+ uint64_t current_low = iter_key->low;
+ uint64_t current_high = iter_key->high;
+
+ if (interval.low <= current_low && interval.high >= current_high) {
+ g_tree_remove(domain->mappings, iter_key);
+ trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
+ } else {
+ ret = VIRTIO_IOMMU_S_RANGE;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int virtio_iommu_iov_to_req(struct iovec *iov,
+ unsigned int iov_cnt,
+ void *req, size_t req_sz)
+{
+ size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail);
+
+ sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
+ if (unlikely(sz != payload_sz)) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+ return 0;
+}
+
+#define virtio_iommu_handle_req(__req) \
+static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
+ struct iovec *iov, \
+ unsigned int iov_cnt) \
+{ \
+ struct virtio_iommu_req_ ## __req req; \
+ int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \
+ \
+ return ret ? ret : virtio_iommu_ ## __req(s, &req); \
+}
+
+virtio_iommu_handle_req(attach)
+virtio_iommu_handle_req(detach)
+virtio_iommu_handle_req(map)
+virtio_iommu_handle_req(unmap)
+
+static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+ struct virtio_iommu_req_head head;
+ struct virtio_iommu_req_tail tail = {};
+ VirtQueueElement *elem;
+ unsigned int iov_cnt;
+ struct iovec *iov;
+ size_t sz;
+
+ for (;;) {
+ elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+ if (!elem) {
+ return;
+ }
+
+ if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
+ iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
+ virtio_error(vdev, "virtio-iommu bad head/tail size");
+ virtqueue_detach_element(vq, elem, 0);
+ g_free(elem);
+ break;
+ }
+
+ iov_cnt = elem->out_num;
+ iov = elem->out_sg;
+ sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
+ if (unlikely(sz != sizeof(head))) {
+ tail.status = VIRTIO_IOMMU_S_DEVERR;
+ goto out;
+ }
+ qemu_mutex_lock(&s->mutex);
+ switch (head.type) {
+ case VIRTIO_IOMMU_T_ATTACH:
+ tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
+ break;
+ case VIRTIO_IOMMU_T_DETACH:
+ tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
+ break;
+ case VIRTIO_IOMMU_T_MAP:
+ tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
+ break;
+ case VIRTIO_IOMMU_T_UNMAP:
+ tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
+ break;
+ default:
+ tail.status = VIRTIO_IOMMU_S_UNSUPP;
+ }
+ qemu_mutex_unlock(&s->mutex);
+
+out:
+ sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
+ &tail, sizeof(tail));
+ assert(sz == sizeof(tail));
+
+ virtqueue_push(vq, elem, sizeof(tail));
+ virtio_notify(vdev, vq);
+ g_free(elem);
+ }
+}
+
+static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
+ int flags, uint32_t endpoint,
+ uint64_t address)
+{
+ VirtIODevice *vdev = &viommu->parent_obj;
+ VirtQueue *vq = viommu->event_vq;
+ struct virtio_iommu_fault fault;
+ VirtQueueElement *elem;
+ size_t sz;
+
+ memset(&fault, 0, sizeof(fault));
+ fault.reason = reason;
+ fault.flags = cpu_to_le32(flags);
+ fault.endpoint = cpu_to_le32(endpoint);
+ fault.address = cpu_to_le64(address);
+
+ elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+
+ if (!elem) {
+ error_report_once(
+ "no buffer available in event queue to report event");
+ return;
+ }
+
+ if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) {
+ virtio_error(vdev, "error buffer of wrong size");
+ virtqueue_detach_element(vq, elem, 0);
+ g_free(elem);
+ return;
+ }
+
+ sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
+ &fault, sizeof(fault));
+ assert(sz == sizeof(fault));
+
+ trace_virtio_iommu_report_fault(reason, flags, endpoint, address);
+ virtqueue_push(vq, elem, sz);
+ virtio_notify(vdev, vq);
+ g_free(elem);
+
+}
+
+static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
+ IOMMUAccessFlags flag,
+ int iommu_idx)
+{
+ IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+ VirtIOIOMMUInterval interval, *mapping_key;
+ VirtIOIOMMUMapping *mapping_value;
+ VirtIOIOMMU *s = sdev->viommu;
+ bool read_fault, write_fault;
+ VirtIOIOMMUEndpoint *ep;
+ uint32_t sid, flags;
+ bool bypass_allowed;
+ bool found;
+
+ interval.low = addr;
+ interval.high = addr + 1;
+
+ IOMMUTLBEntry entry = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = addr,
+ .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
+ .perm = IOMMU_NONE,
+ };
+
+ bypass_allowed = virtio_vdev_has_feature(&s->parent_obj,
+ VIRTIO_IOMMU_F_BYPASS);
+
+ sid = virtio_iommu_get_bdf(sdev);
+
+ trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
+ qemu_mutex_lock(&s->mutex);
+
+ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+ if (!ep) {
+ if (!bypass_allowed) {
+ error_report_once("%s sid=%d is not known!!", __func__, sid);
+ virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN,
+ VIRTIO_IOMMU_FAULT_F_ADDRESS,
+ sid, addr);
+ } else {
+ entry.perm = flag;
+ }
+ goto unlock;
+ }
+
+ if (!ep->domain) {
+ if (!bypass_allowed) {
+ error_report_once("%s %02x:%02x.%01x not attached to any domain",
+ __func__, PCI_BUS_NUM(sid),
+ PCI_SLOT(sid), PCI_FUNC(sid));
+ virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN,
+ VIRTIO_IOMMU_FAULT_F_ADDRESS,
+ sid, addr);
+ } else {
+ entry.perm = flag;
+ }
+ goto unlock;
+ }
+
+ found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
+ (void **)&mapping_key,
+ (void **)&mapping_value);
+ if (!found) {
+ error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d",
+ __func__, addr, sid);
+ virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
+ VIRTIO_IOMMU_FAULT_F_ADDRESS,
+ sid, addr);
+ goto unlock;
+ }
+
+ read_fault = (flag & IOMMU_RO) &&
+ !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ);
+ write_fault = (flag & IOMMU_WO) &&
+ !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE);
+
+ flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0;
+ flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0;
+ if (flags) {
+ error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d",
+ __func__, addr, flag, mapping_value->flags);
+ flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS;
+ virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
+ flags | VIRTIO_IOMMU_FAULT_F_ADDRESS,
+ sid, addr);
+ goto unlock;
+ }
+ entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
+ entry.perm = flag;
+ trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
+
+unlock:
+ qemu_mutex_unlock(&s->mutex);
+ return entry;
+}
+
+static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+ VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+ struct virtio_iommu_config *config = &dev->config;
+
+ trace_virtio_iommu_get_config(config->page_size_mask,
+ config->input_range.start,
+ config->input_range.end,
+ config->domain_range.end,
+ config->probe_size);
+ memcpy(config_data, &dev->config, sizeof(struct virtio_iommu_config));
+}
+
+static void virtio_iommu_set_config(VirtIODevice *vdev,
+ const uint8_t *config_data)
+{
+ struct virtio_iommu_config config;
+
+ memcpy(&config, config_data, sizeof(struct virtio_iommu_config));
+ trace_virtio_iommu_set_config(config.page_size_mask,
+ config.input_range.start,
+ config.input_range.end,
+ config.domain_range.end,
+ config.probe_size);
+}
+
+static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
+ Error **errp)
+{
+ VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+
+ f |= dev->features;
+ trace_virtio_iommu_get_features(f);
+ return f;
+}
+
+static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+ guint ua = GPOINTER_TO_UINT(a);
+ guint ub = GPOINTER_TO_UINT(b);
+ return (ua > ub) - (ua < ub);
+}
+
+static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+
+ virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
+ sizeof(struct virtio_iommu_config));
+
+ memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
+
+ s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
+ virtio_iommu_handle_command);
+ s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
+
+ s->config.page_size_mask = TARGET_PAGE_MASK;
+ s->config.input_range.end = -1UL;
+ s->config.domain_range.end = 32;
+
+ virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
+ virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
+ virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
+ virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
+ virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
+ virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
+ virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS);
+ virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
+
+ qemu_mutex_init(&s->mutex);
+
+ s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
+
+ if (s->primary_bus) {
+ pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
+ } else {
+ error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
+ }
+}
+
+static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+
+ g_tree_destroy(s->domains);
+ g_tree_destroy(s->endpoints);
+
+ virtio_cleanup(vdev);
+}
+
+static void virtio_iommu_device_reset(VirtIODevice *vdev)
+{
+ VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+
+ trace_virtio_iommu_device_reset();
+
+ if (s->domains) {
+ g_tree_destroy(s->domains);
+ }
+ if (s->endpoints) {
+ g_tree_destroy(s->endpoints);
+ }
+ s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
+ NULL, NULL, virtio_iommu_put_domain);
+ s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
+ NULL, NULL, virtio_iommu_put_endpoint);
+}
+
+static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
+{
+ trace_virtio_iommu_device_status(status);
+}
+
+static void virtio_iommu_instance_init(Object *obj)
+{
+}
+
+#define VMSTATE_INTERVAL \
+{ \
+ .name = "interval", \
+ .version_id = 1, \
+ .minimum_version_id = 1, \
+ .fields = (VMStateField[]) { \
+ VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
+ VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
+ VMSTATE_END_OF_LIST() \
+ } \
+}
+
+#define VMSTATE_MAPPING \
+{ \
+ .name = "mapping", \
+ .version_id = 1, \
+ .minimum_version_id = 1, \
+ .fields = (VMStateField[]) { \
+ VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
+ VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
+ VMSTATE_END_OF_LIST() \
+ }, \
+}
+
+static const VMStateDescription vmstate_interval_mapping[2] = {
+ VMSTATE_MAPPING, /* value */
+ VMSTATE_INTERVAL /* key */
+};
+
+static int domain_preload(void *opaque)
+{
+ VirtIOIOMMUDomain *domain = opaque;
+
+ domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+ NULL, g_free, g_free);
+ return 0;
+}
+
+static const VMStateDescription vmstate_endpoint = {
+ .name = "endpoint",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(id, VirtIOIOMMUEndpoint),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_domain = {
+ .name = "domain",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_load = domain_preload,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(id, VirtIOIOMMUDomain),
+ VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1,
+ vmstate_interval_mapping,
+ VirtIOIOMMUInterval, VirtIOIOMMUMapping),
+ VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
+ vmstate_endpoint, VirtIOIOMMUEndpoint, next),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static gboolean reconstruct_endpoints(gpointer key, gpointer value,
+ gpointer data)
+{
+ VirtIOIOMMU *s = (VirtIOIOMMU *)data;
+ VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
+ VirtIOIOMMUEndpoint *iter;
+
+ QLIST_FOREACH(iter, &d->endpoint_list, next) {
+ iter->domain = d;
+ g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
+ }
+ return false; /* continue the domain traversal */
+}
+
+static int iommu_post_load(void *opaque, int version_id)
+{
+ VirtIOIOMMU *s = opaque;
+
+ g_tree_foreach(s->domains, reconstruct_endpoints, s);
+ return 0;
+}
+
+static const VMStateDescription vmstate_virtio_iommu_device = {
+ .name = "virtio-iommu-device",
+ .minimum_version_id = 1,
+ .version_id = 1,
+ .post_load = iommu_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 1,
+ &vmstate_domain, VirtIOIOMMUDomain),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_virtio_iommu = {
+ .name = "virtio-iommu",
+ .minimum_version_id = 1,
+ .priority = MIG_PRI_IOMMU,
+ .version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_VIRTIO_DEVICE,
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property virtio_iommu_properties[] = {
+ DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_iommu_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, virtio_iommu_properties);
+ dc->vmsd = &vmstate_virtio_iommu;
+
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ vdc->realize = virtio_iommu_device_realize;
+ vdc->unrealize = virtio_iommu_device_unrealize;
+ vdc->reset = virtio_iommu_device_reset;
+ vdc->get_config = virtio_iommu_get_config;
+ vdc->set_config = virtio_iommu_set_config;
+ vdc->get_features = virtio_iommu_get_features;
+ vdc->set_status = virtio_iommu_set_status;
+ vdc->vmsd = &vmstate_virtio_iommu_device;
+}
+
+static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
+ void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = virtio_iommu_translate;
+}
+
+static const TypeInfo virtio_iommu_info = {
+ .name = TYPE_VIRTIO_IOMMU,
+ .parent = TYPE_VIRTIO_DEVICE,
+ .instance_size = sizeof(VirtIOIOMMU),
+ .instance_init = virtio_iommu_instance_init,
+ .class_init = virtio_iommu_class_init,
+};
+
+static const TypeInfo virtio_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
+ .class_init = virtio_iommu_memory_region_class_init,
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&virtio_iommu_info);
+ type_register_static(&virtio_iommu_memory_region_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
index 97287e923b..43399522f5 100644
--- a/hw/virtio/virtio-pmem.c
+++ b/hw/virtio/virtio-pmem.c
@@ -130,6 +130,7 @@ static void virtio_pmem_unrealize(DeviceState *dev, Error **errp)
VirtIOPMEM *pmem = VIRTIO_PMEM(dev);
host_memory_backend_set_mapped(pmem->memdev, false);
+ virtio_delete_queue(pmem->rq_vq);
virtio_cleanup(vdev);
}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 9d06dbe3ef..b2d415e5dd 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -282,15 +282,19 @@ static void vring_packed_flags_write(VirtIODevice *vdev,
/* Called within rcu_read_lock(). */
static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
{
- VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
- assert(caches != NULL);
- return caches;
+ return atomic_rcu_read(&vq->vring.caches);
}
+
/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingAvail, flags);
+
+ if (!caches) {
+ return 0;
+ }
+
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
@@ -299,6 +303,11 @@ static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingAvail, idx);
+
+ if (!caches) {
+ return 0;
+ }
+
vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
return vq->shadow_avail_idx;
}
@@ -308,6 +317,11 @@ static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingAvail, ring[i]);
+
+ if (!caches) {
+ return 0;
+ }
+
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
@@ -323,6 +337,11 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingUsed, ring[i]);
+
+ if (!caches) {
+ return;
+ }
+
virtio_tswap32s(vq->vdev, &uelem->id);
virtio_tswap32s(vq->vdev, &uelem->len);
address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
@@ -334,6 +353,11 @@ static uint16_t vring_used_idx(VirtQueue *vq)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingUsed, idx);
+
+ if (!caches) {
+ return 0;
+ }
+
return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
}
@@ -342,8 +366,12 @@ static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingUsed, idx);
- virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
- address_space_cache_invalidate(&caches->used, pa, sizeof(val));
+
+ if (caches) {
+ virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(val));
+ }
+
vq->used_idx = val;
}
@@ -353,8 +381,13 @@ static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
VirtIODevice *vdev = vq->vdev;
hwaddr pa = offsetof(VRingUsed, flags);
- uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+ uint16_t flags;
+ if (!caches) {
+ return;
+ }
+
+ flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
@@ -365,8 +398,13 @@ static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
VirtIODevice *vdev = vq->vdev;
hwaddr pa = offsetof(VRingUsed, flags);
- uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+ uint16_t flags;
+ if (!caches) {
+ return;
+ }
+
+ flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
@@ -381,6 +419,10 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
}
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ return;
+ }
+
pa = offsetof(VRingUsed, ring[vq->vring.num]);
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
address_space_cache_invalidate(&caches->used, pa, sizeof(val));
@@ -410,7 +452,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
VRingMemoryRegionCaches *caches;
RCU_READ_LOCK_GUARD();
- caches = vring_get_region_caches(vq);
+ caches = vring_get_region_caches(vq);
+ if (!caches) {
+ return;
+ }
+
vring_packed_event_read(vq->vdev, &caches->used, &e);
if (!enable) {
@@ -597,6 +643,10 @@ static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
}
cache = vring_get_region_caches(vq);
+ if (!cache) {
+ return 1;
+ }
+
vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
vq->last_avail_idx);
@@ -777,6 +827,10 @@ static void virtqueue_packed_fill_desc(VirtQueue *vq,
}
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ return;
+ }
+
vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
}
@@ -949,6 +1003,10 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
max = vq->vring.num;
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ goto err;
+ }
+
while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
MemoryRegionCache *desc_cache = &caches->desc;
unsigned int num_bufs;
@@ -1089,6 +1147,9 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
max = vq->vring.num;
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ goto err;
+ }
for (;;) {
unsigned int num_bufs = total_bufs;
@@ -1194,6 +1255,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
}
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ goto err;
+ }
+
desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
sizeof(VRingPackedDesc) : sizeof(VRingDesc);
if (caches->desc.len < vq->vring.num * desc_size) {
@@ -1388,6 +1453,11 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
i = head;
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ virtio_error(vdev, "Region caches not initialized");
+ goto done;
+ }
+
if (caches->desc.len < max * sizeof(VRingDesc)) {
virtio_error(vdev, "Cannot map descriptor ring");
goto done;
@@ -1510,6 +1580,11 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
i = vq->last_avail_idx;
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ virtio_error(vdev, "Region caches not initialized");
+ goto done;
+ }
+
if (caches->desc.len < max * sizeof(VRingDesc)) {
virtio_error(vdev, "Cannot map descriptor ring");
goto done;
@@ -1629,6 +1704,10 @@ static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
VRingPackedDesc desc;
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ return 0;
+ }
+
desc_cache = &caches->desc;
virtio_queue_set_notification(vq, 0);
@@ -2413,6 +2492,10 @@ static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
VRingMemoryRegionCaches *caches;
caches = vring_get_region_caches(vq);
+ if (!caches) {
+ return false;
+ }
+
vring_packed_event_read(vdev, &caches->avail, &e);
old = vq->signalled_used;
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 71508bf40c..02f500cb8e 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -125,8 +125,10 @@ typedef struct {
bool virt;
int32_t gic_version;
VirtIOMMUType iommu;
+ uint16_t virtio_iommu_bdf;
struct arm_boot_info bootinfo;
MemMapEntry *memmap;
+ char *pciehb_nodename;
const int *irqmap;
int smp_cpus;
void *fdt;
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 2acd8321af..cfedf5a995 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -86,6 +86,7 @@ extern bool pci_available;
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
#define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012
#define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013
+#define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001
diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
index 108bfadeeb..05ea0ad183 100644
--- a/include/hw/virtio/vhost-user-blk.h
+++ b/include/hw/virtio/vhost-user-blk.h
@@ -36,7 +36,8 @@ typedef struct VHostUserBlk {
struct vhost_dev dev;
struct vhost_inflight *inflight;
VhostUserState vhost_user;
- struct vhost_virtqueue *vqs;
+ struct vhost_virtqueue *vhost_vqs;
+ VirtQueue **virtqs;
guint watch;
bool connected;
} VHostUserBlk;
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
index 9ff1bdb7cf..6f3030d288 100644
--- a/include/hw/virtio/vhost-user-fs.h
+++ b/include/hw/virtio/vhost-user-fs.h
@@ -37,6 +37,8 @@ typedef struct {
struct vhost_virtqueue *vhost_vqs;
struct vhost_dev vhost_dev;
VhostUserState vhost_user;
+ VirtQueue **req_vqs;
+ VirtQueue *hiprio_vq;
/*< public >*/
} VHostUserFS;
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
new file mode 100644
index 0000000000..6f67f1020a
--- /dev/null
+++ b/include/hw/virtio/virtio-iommu.h
@@ -0,0 +1,61 @@
+/*
+ * virtio-iommu device
+ *
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_IOMMU_H
+#define QEMU_VIRTIO_IOMMU_H
+
+#include "standard-headers/linux/virtio_iommu.h"
+#include "hw/virtio/virtio.h"
+#include "hw/pci/pci.h"
+
+#define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
+#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-device-base"
+#define VIRTIO_IOMMU(obj) \
+ OBJECT_CHECK(VirtIOIOMMU, (obj), TYPE_VIRTIO_IOMMU)
+
+#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region"
+
+typedef struct IOMMUDevice {
+ void *viommu;
+ PCIBus *bus;
+ int devfn;
+ IOMMUMemoryRegion iommu_mr;
+ AddressSpace as;
+} IOMMUDevice;
+
+typedef struct IOMMUPciBus {
+ PCIBus *bus;
+ IOMMUDevice *pbdev[0]; /* Parent array is sparse, so dynamically alloc */
+} IOMMUPciBus;
+
+typedef struct VirtIOIOMMU {
+ VirtIODevice parent_obj;
+ VirtQueue *req_vq;
+ VirtQueue *event_vq;
+ struct virtio_iommu_config config;
+ uint64_t features;
+ GHashTable *as_by_busptr;
+ IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
+ PCIBus *primary_bus;
+ GTree *domains;
+ QemuMutex mutex;
+ GTree *endpoints;
+} VirtIOIOMMU;
+
+#endif
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 02c1ce6a5d..f55ce8b320 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -302,6 +302,44 @@ static inline uint32_t extract32(uint32_t value, int start, int length)
}
/**
+ * extract8:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 8 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 8 bit word. It is valid to request that
+ * all 8 bits are returned (ie @length 8 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint8_t extract8(uint8_t value, int start, int length)
+{
+ assert(start >= 0 && length > 0 && length <= 8 - start);
+ return extract32(value, start, length);
+}
+
+/**
+ * extract16:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 16 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 16 bit word. It is valid to request that
+ * all 16 bits are returned (ie @length 16 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint16_t extract16(uint16_t value, int start, int length)
+{
+ assert(start >= 0 && length > 0 && length <= 16 - start);
+ return extract32(value, start, length);
+}
+
+/**
* extract64:
* @value: the value to extract the bit field from
* @start: the lowest bit in the bit field (numbered from 0)
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
index 46d279f515..66e838074c 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -410,6 +410,30 @@ extern "C" {
#define I915_FORMAT_MOD_Yf_TILED_CCS fourcc_mod_code(INTEL, 5)
/*
+ * Intel color control surfaces (CCS) for Gen-12 render compression.
+ *
+ * The main surface is Y-tiled and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * Y-tile widths.
+ */
+#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS fourcc_mod_code(INTEL, 6)
+
+/*
+ * Intel color control surfaces (CCS) for Gen-12 media compression
+ *
+ * The main surface is Y-tiled and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * Y-tile widths. For semi-planar formats like NV12, CCS planes follow the
+ * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces,
+ * planes 2 and 3 for the respective CCS.
+ */
+#define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7)
+
+/*
* Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
*
* Macroblocks are laid in a Z-shape, and each pixel data is following the
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 6e8a10ee10..8adf3b018b 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -593,6 +593,9 @@ struct ethtool_pauseparam {
* @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
* @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
* @ETH_SS_PHY_TUNABLES: PHY tunable names
+ * @ETH_SS_LINK_MODES: link mode names
+ * @ETH_SS_MSG_CLASSES: debug message class names
+ * @ETH_SS_WOL_MODES: wake-on-lan modes
*/
enum ethtool_stringset {
ETH_SS_TEST = 0,
@@ -604,6 +607,12 @@ enum ethtool_stringset {
ETH_SS_TUNABLES,
ETH_SS_PHY_STATS,
ETH_SS_PHY_TUNABLES,
+ ETH_SS_LINK_MODES,
+ ETH_SS_MSG_CLASSES,
+ ETH_SS_WOL_MODES,
+
+ /* add new constants above here */
+ ETH_SS_COUNT
};
/**
@@ -1688,6 +1697,8 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */
#define WAKE_FILTER (1 << 7)
+#define WOL_MODE_COUNT 8
+
/* L2-L4 network traffic flow types */
#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */
#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */
diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index d8914f25a5..f89c986190 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -31,6 +31,7 @@ struct input_event {
unsigned long __sec;
#if defined(__sparc__) && defined(__arch64__)
unsigned int __usec;
+ unsigned int __pad;
#else
unsigned long __usec;
#endif
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index acb7d2bdb4..5437690483 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -676,6 +676,7 @@
#define PCI_EXP_LNKCTL2_TLS_32_0GT 0x0005 /* Supported Speed 32GT/s */
#define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */
#define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */
+#define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */
#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */
#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */
#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */
diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h
index eb5d361b11..23de64e44c 100644
--- a/linux-headers/asm-arm/unistd-common.h
+++ b/linux-headers/asm-arm/unistd-common.h
@@ -390,5 +390,7 @@
#define __NR_fspick (__NR_SYSCALL_BASE + 433)
#define __NR_pidfd_open (__NR_SYSCALL_BASE + 434)
#define __NR_clone3 (__NR_SYSCALL_BASE + 435)
+#define __NR_openat2 (__NR_SYSCALL_BASE + 437)
+#define __NR_pidfd_getfd (__NR_SYSCALL_BASE + 438)
#endif /* _ASM_ARM_UNISTD_COMMON_H */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 920af01c8b..9e34f0f875 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -220,10 +220,18 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2)
#define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1)
-/* EL0 Virtual Timer Registers */
+/*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
/* KVM-as-firmware specific pseudo-registers */
#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/linux-headers/asm-arm64/unistd.h b/linux-headers/asm-arm64/unistd.h
index 4703d21866..f83a70e07d 100644
--- a/linux-headers/asm-arm64/unistd.h
+++ b/linux-headers/asm-arm64/unistd.h
@@ -19,5 +19,6 @@
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYS_CLONE3
#include <asm-generic/unistd.h>
diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
index c160a5354e..f94f65d429 100644
--- a/linux-headers/asm-generic/mman-common.h
+++ b/linux-headers/asm-generic/mman-common.h
@@ -11,6 +11,8 @@
#define PROT_WRITE 0x2 /* page can be written */
#define PROT_EXEC 0x4 /* page can be executed */
#define PROT_SEM 0x8 /* page may be used for atomic ops */
+/* 0x10 reserved for arch-specific use */
+/* 0x20 reserved for arch-specific use */
#define PROT_NONE 0x0 /* page can not be accessed */
#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h
index 1fc8faa6e9..3a3201e461 100644
--- a/linux-headers/asm-generic/unistd.h
+++ b/linux-headers/asm-generic/unistd.h
@@ -851,8 +851,13 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
__SYSCALL(__NR_clone3, sys_clone3)
#endif
+#define __NR_openat2 437
+__SYSCALL(__NR_openat2, sys_openat2)
+#define __NR_pidfd_getfd 438
+__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+
#undef __NR_syscalls
-#define __NR_syscalls 436
+#define __NR_syscalls 439
/*
* 32 bit systems traditionally used different
diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h
index 659d5c9ade..aec9f6081a 100644
--- a/linux-headers/asm-mips/unistd_n32.h
+++ b/linux-headers/asm-mips/unistd_n32.h
@@ -365,6 +365,8 @@
#define __NR_fspick (__NR_Linux + 433)
#define __NR_pidfd_open (__NR_Linux + 434)
#define __NR_clone3 (__NR_Linux + 435)
+#define __NR_openat2 (__NR_Linux + 437)
+#define __NR_pidfd_getfd (__NR_Linux + 438)
#endif /* _ASM_MIPS_UNISTD_N32_H */
diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h
index 4b6310a05c..1c75d83df5 100644
--- a/linux-headers/asm-mips/unistd_n64.h
+++ b/linux-headers/asm-mips/unistd_n64.h
@@ -341,6 +341,8 @@
#define __NR_fspick (__NR_Linux + 433)
#define __NR_pidfd_open (__NR_Linux + 434)
#define __NR_clone3 (__NR_Linux + 435)
+#define __NR_openat2 (__NR_Linux + 437)
+#define __NR_pidfd_getfd (__NR_Linux + 438)
#endif /* _ASM_MIPS_UNISTD_N64_H */
diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h
index 4ce7b4e288..660716e240 100644
--- a/linux-headers/asm-mips/unistd_o32.h
+++ b/linux-headers/asm-mips/unistd_o32.h
@@ -411,6 +411,8 @@
#define __NR_fspick (__NR_Linux + 433)
#define __NR_pidfd_open (__NR_Linux + 434)
#define __NR_clone3 (__NR_Linux + 435)
+#define __NR_openat2 (__NR_Linux + 437)
+#define __NR_pidfd_getfd (__NR_Linux + 438)
#endif /* _ASM_MIPS_UNISTD_O32_H */
diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h
index 5584cc1b4f..4ba8e32f73 100644
--- a/linux-headers/asm-powerpc/unistd_32.h
+++ b/linux-headers/asm-powerpc/unistd_32.h
@@ -418,6 +418,8 @@
#define __NR_fspick 433
#define __NR_pidfd_open 434
#define __NR_clone3 435
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
#endif /* _ASM_POWERPC_UNISTD_32_H */
diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h
index 251bcff77e..ac20bb4f95 100644
--- a/linux-headers/asm-powerpc/unistd_64.h
+++ b/linux-headers/asm-powerpc/unistd_64.h
@@ -390,6 +390,8 @@
#define __NR_fspick 433
#define __NR_pidfd_open 434
#define __NR_clone3 435
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
#endif /* _ASM_POWERPC_UNISTD_64_H */
diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h
index 7cce3ee296..e4a6b654f1 100644
--- a/linux-headers/asm-s390/unistd_32.h
+++ b/linux-headers/asm-s390/unistd_32.h
@@ -408,5 +408,7 @@
#define __NR_fspick 433
#define __NR_pidfd_open 434
#define __NR_clone3 435
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
#endif /* _ASM_S390_UNISTD_32_H */
diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h
index 2371ff1e7a..472f732956 100644
--- a/linux-headers/asm-s390/unistd_64.h
+++ b/linux-headers/asm-s390/unistd_64.h
@@ -356,5 +356,7 @@
#define __NR_fspick 433
#define __NR_pidfd_open 434
#define __NR_clone3 435
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
#endif /* _ASM_S390_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
index e8ebec1cdc..f6e06fcfbd 100644
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -426,5 +426,7 @@
#define __NR_fspick 433
#define __NR_pidfd_open 434
#define __NR_clone3 435
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
#endif /* _ASM_X86_UNISTD_32_H */
diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
index a2f863d549..924f826d2d 100644
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -348,5 +348,7 @@
#define __NR_fspick 433
#define __NR_pidfd_open 434
#define __NR_clone3 435
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
#endif /* _ASM_X86_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index 4cdc67d848..010307757b 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -301,6 +301,8 @@
#define __NR_fspick (__X32_SYSCALL_BIT + 433)
#define __NR_pidfd_open (__X32_SYSCALL_BIT + 434)
#define __NR_clone3 (__X32_SYSCALL_BIT + 435)
+#define __NR_openat2 (__X32_SYSCALL_BIT + 437)
+#define __NR_pidfd_getfd (__X32_SYSCALL_BIT + 438)
#define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
#define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
#define __NR_ioctl (__X32_SYSCALL_BIT + 514)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 9d647fad76..265099100e 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1009,6 +1009,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
#define KVM_CAP_ARM_NISV_TO_USER 177
#define KVM_CAP_ARM_INJECT_EXT_DABT 178
+#define KVM_CAP_S390_VCPU_RESETS 179
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1473,6 +1474,10 @@ struct kvm_enc_region {
/* Available with KVM_CAP_ARM_SVE */
#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int)
+/* Available with KVM_CAP_S390_VCPU_RESETS */
+#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
+#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
diff --git a/nbd/server.c b/nbd/server.c
index 87fcd2e7bf..11a31094ff 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -2384,15 +2384,23 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
!client->export_meta.bitmap,
NBD_META_ID_BASE_ALLOCATION,
errp);
- } else { /* client->export_meta.bitmap */
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (client->export_meta.bitmap) {
ret = nbd_co_send_bitmap(client, request->handle,
client->exp->export_bitmap,
request->from, request->len,
dont_fragment,
true, NBD_META_ID_DIRTY_BITMAP, errp);
+ if (ret < 0) {
+ return ret;
+ }
}
- return ret;
+ return 0;
} else {
return nbd_send_generic_reply(client, request->handle, -EINVAL,
"CMD_BLOCK_STATUS not negotiated",
diff --git a/plugins/core.c b/plugins/core.c
index 9e1b9e7a91..ed863011ba 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -286,6 +286,7 @@ static inline uint32_t cb_to_tcg_flags(enum qemu_plugin_cb_flags flags)
switch (flags) {
case QEMU_PLUGIN_CB_RW_REGS:
ret = 0;
+ break;
case QEMU_PLUGIN_CB_R_REGS:
ret = TCG_CALL_NO_WG;
break;
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 8a2a9538cd..9833b33549 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -67,6 +67,7 @@ static const QDevAlias qdev_alias_table[] = {
{ "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_S390X },
{ "virtio-input-host-pci", "virtio-input-host",
QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
+ { "virtio-iommu-pci", "virtio-iommu", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
{ "virtio-keyboard-ccw", "virtio-keyboard", QEMU_ARCH_S390X },
{ "virtio-keyboard-pci", "virtio-keyboard",
QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
diff --git a/softmmu/vl.c b/softmmu/vl.c
index a9cce78f45..16ff5a16a3 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2558,7 +2558,7 @@ static bool object_create_delayed(const char *type, QemuOpts *opts)
}
-static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size,
+static bool set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size,
MachineClass *mc)
{
uint64_t sz;
@@ -2634,30 +2634,8 @@ static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size,
exit(EXIT_FAILURE);
}
- if (current_machine->ram_memdev_id) {
- Object *backend;
- ram_addr_t backend_size;
-
- backend = object_resolve_path_type(current_machine->ram_memdev_id,
- TYPE_MEMORY_BACKEND, NULL);
- backend_size = object_property_get_uint(backend, "size", &error_abort);
- if (mem_str && backend_size != ram_size) {
- error_report("Size specified by -m option must match size of "
- "explicitly specified 'memory-backend' property");
- exit(EXIT_FAILURE);
- }
- ram_size = backend_size;
- }
-
- if (!xen_enabled()) {
- /* On 32-bit hosts, QEMU is limited by virtual address space */
- if (ram_size > (2047 << 20) && HOST_LONG_BITS == 32) {
- error_report("at most 2047 MB RAM can be simulated");
- exit(1);
- }
- }
-
loc_pop(&loc);
+ return !!mem_str;
}
static int global_init_func(void *opaque, QemuOpts *opts, Error **errp)
@@ -2861,6 +2839,7 @@ void qemu_init(int argc, char **argv, char **envp)
bool list_data_dirs = false;
char *dir, **dirs;
const char *mem_path = NULL;
+ bool have_custom_ram_size;
BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue);
QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list);
int mem_prealloc = 0; /* force preallocation of physical target memory */
@@ -3821,6 +3800,9 @@ void qemu_init(int argc, char **argv, char **envp)
machine_class = select_machine();
object_set_machine_compat_props(machine_class->compat_props);
+ have_custom_ram_size = set_memory_options(&ram_slots, &maxram_size,
+ machine_class);
+
os_daemonize();
/*
@@ -4296,7 +4278,29 @@ void qemu_init(int argc, char **argv, char **envp)
current_machine->cpu_type = parse_cpu_option(cpu_option);
}
- set_memory_options(&ram_slots, &maxram_size, machine_class);
+ if (current_machine->ram_memdev_id) {
+ Object *backend;
+ ram_addr_t backend_size;
+
+ backend = object_resolve_path_type(current_machine->ram_memdev_id,
+ TYPE_MEMORY_BACKEND, NULL);
+ backend_size = object_property_get_uint(backend, "size", &error_abort);
+ if (have_custom_ram_size && backend_size != ram_size) {
+ error_report("Size specified by -m option must match size of "
+ "explicitly specified 'memory-backend' property");
+ exit(EXIT_FAILURE);
+ }
+ ram_size = backend_size;
+ }
+
+ if (!xen_enabled()) {
+ /* On 32-bit hosts, QEMU is limited by virtual address space */
+ if (ram_size > (2047 << 20) && HOST_LONG_BITS == 32) {
+ error_report("at most 2047 MB RAM can be simulated");
+ exit(1);
+ }
+ }
+
current_machine->ram_size = ram_size;
current_machine->maxram_size = maxram_size;
current_machine->ram_slots = ram_slots;
diff --git a/target/riscv/instmap.h b/target/riscv/instmap.h
index f8ad7d60fd..40b6d2b64d 100644
--- a/target/riscv/instmap.h
+++ b/target/riscv/instmap.h
@@ -344,8 +344,8 @@ enum {
#define GET_C_LW_IMM(inst) ((extract32(inst, 6, 1) << 2) \
| (extract32(inst, 10, 3) << 3) \
| (extract32(inst, 5, 1) << 6))
-#define GET_C_LD_IMM(inst) ((extract32(inst, 10, 3) << 3) \
- | (extract32(inst, 5, 2) << 6))
+#define GET_C_LD_IMM(inst) ((extract16(inst, 10, 3) << 3) \
+ | (extract16(inst, 5, 2) << 6))
#define GET_C_J_IMM(inst) ((extract32(inst, 3, 3) << 1) \
| (extract32(inst, 11, 1) << 4) \
| (extract32(inst, 2, 1) << 5) \
@@ -363,7 +363,7 @@ enum {
#define GET_C_RD(inst) GET_RD(inst)
#define GET_C_RS1(inst) GET_RD(inst)
#define GET_C_RS2(inst) extract32(inst, 2, 5)
-#define GET_C_RS1S(inst) (8 + extract32(inst, 7, 3))
-#define GET_C_RS2S(inst) (8 + extract32(inst, 2, 3))
+#define GET_C_RS1S(inst) (8 + extract16(inst, 7, 3))
+#define GET_C_RS2S(inst) (8 + extract16(inst, 2, 3))
#endif
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 14dc71156b..d5de7f468a 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -44,7 +44,6 @@ typedef struct DisasContext {
/* pc_succ_insn points to the instruction following base.pc_next */
target_ulong pc_succ_insn;
target_ulong priv_ver;
- uint32_t opcode;
uint32_t mstatus_fs;
uint32_t misa;
uint32_t mem_idx;
@@ -492,45 +491,45 @@ static void gen_set_rm(DisasContext *ctx, int rm)
tcg_temp_free_i32(t0);
}
-static void decode_RV32_64C0(DisasContext *ctx)
+static void decode_RV32_64C0(DisasContext *ctx, uint16_t opcode)
{
- uint8_t funct3 = extract32(ctx->opcode, 13, 3);
- uint8_t rd_rs2 = GET_C_RS2S(ctx->opcode);
- uint8_t rs1s = GET_C_RS1S(ctx->opcode);
+ uint8_t funct3 = extract16(opcode, 13, 3);
+ uint8_t rd_rs2 = GET_C_RS2S(opcode);
+ uint8_t rs1s = GET_C_RS1S(opcode);
switch (funct3) {
case 3:
#if defined(TARGET_RISCV64)
/* C.LD(RV64/128) -> ld rd', offset[7:3](rs1')*/
gen_load_c(ctx, OPC_RISC_LD, rd_rs2, rs1s,
- GET_C_LD_IMM(ctx->opcode));
+ GET_C_LD_IMM(opcode));
#else
/* C.FLW (RV32) -> flw rd', offset[6:2](rs1')*/
gen_fp_load(ctx, OPC_RISC_FLW, rd_rs2, rs1s,
- GET_C_LW_IMM(ctx->opcode));
+ GET_C_LW_IMM(opcode));
#endif
break;
case 7:
#if defined(TARGET_RISCV64)
/* C.SD (RV64/128) -> sd rs2', offset[7:3](rs1')*/
gen_store_c(ctx, OPC_RISC_SD, rs1s, rd_rs2,
- GET_C_LD_IMM(ctx->opcode));
+ GET_C_LD_IMM(opcode));
#else
/* C.FSW (RV32) -> fsw rs2', offset[6:2](rs1')*/
gen_fp_store(ctx, OPC_RISC_FSW, rs1s, rd_rs2,
- GET_C_LW_IMM(ctx->opcode));
+ GET_C_LW_IMM(opcode));
#endif
break;
}
}
-static void decode_RV32_64C(DisasContext *ctx)
+static void decode_RV32_64C(DisasContext *ctx, uint16_t opcode)
{
- uint8_t op = extract32(ctx->opcode, 0, 2);
+ uint8_t op = extract16(opcode, 0, 2);
switch (op) {
case 0:
- decode_RV32_64C0(ctx);
+ decode_RV32_64C0(ctx, opcode);
break;
}
}
@@ -709,22 +708,25 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
/* Include the auto-generated decoder for 16 bit insn */
#include "decode_insn16.inc.c"
-static void decode_opc(DisasContext *ctx)
+static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
{
/* check for compressed insn */
- if (extract32(ctx->opcode, 0, 2) != 3) {
+ if (extract16(opcode, 0, 2) != 3) {
if (!has_ext(ctx, RVC)) {
gen_exception_illegal(ctx);
} else {
ctx->pc_succ_insn = ctx->base.pc_next + 2;
- if (!decode_insn16(ctx, ctx->opcode)) {
+ if (!decode_insn16(ctx, opcode)) {
/* fall back to old decoder */
- decode_RV32_64C(ctx);
+ decode_RV32_64C(ctx, opcode);
}
}
} else {
+ uint32_t opcode32 = opcode;
+ opcode32 = deposit32(opcode32, 16, 16,
+ translator_lduw(env, ctx->base.pc_next + 2));
ctx->pc_succ_insn = ctx->base.pc_next + 4;
- if (!decode_insn32(ctx, ctx->opcode)) {
+ if (!decode_insn32(ctx, opcode32)) {
gen_exception_illegal(ctx);
}
}
@@ -776,9 +778,9 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
CPURISCVState *env = cpu->env_ptr;
+ uint16_t opcode16 = translator_lduw(env, ctx->base.pc_next);
- ctx->opcode = translator_ldl(env, ctx->base.pc_next);
- decode_opc(ctx);
+ decode_opc(env, ctx, opcode16);
ctx->base.pc_next = ctx->pc_succ_insn;
if (ctx->base.is_jmp == DISAS_NEXT) {
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index cf84d307c6..3dd396e870 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -78,13 +78,13 @@ static void s390_cpu_load_normal(CPUState *s)
S390CPU *cpu = S390_CPU(s);
uint64_t spsw = ldq_phys(s->as, 0);
- cpu->env.psw.mask = spsw & 0xffffffff80000000ULL;
+ cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL;
/*
* Invert short psw indication, so SIE will report a specification
* exception if it was not set.
*/
cpu->env.psw.mask ^= PSW_MASK_SHORTPSW;
- cpu->env.psw.addr = spsw & 0x7fffffffULL;
+ cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR;
s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
}
@@ -144,8 +144,18 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type)
}
/* Reset state inside the kernel that we cannot access yet from QEMU. */
- if (kvm_enabled() && type != S390_CPU_RESET_NORMAL) {
- kvm_s390_reset_vcpu(cpu);
+ if (kvm_enabled()) {
+ switch (type) {
+ case S390_CPU_RESET_CLEAR:
+ kvm_s390_reset_vcpu_clear(cpu);
+ break;
+ case S390_CPU_RESET_INITIAL:
+ kvm_s390_reset_vcpu_initial(cpu);
+ break;
+ case S390_CPU_RESET_NORMAL:
+ kvm_s390_reset_vcpu_normal(cpu);
+ break;
+ }
}
}
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 8a557fd8d1..1d17709d6e 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -276,7 +276,8 @@ extern const VMStateDescription vmstate_s390_cpu;
#define PSW_MASK_RI 0x0000008000000000ULL
#define PSW_MASK_64 0x0000000100000000ULL
#define PSW_MASK_32 0x0000000080000000ULL
-#define PSW_MASK_ESA_ADDR 0x000000007fffffffULL
+#define PSW_MASK_SHORT_ADDR 0x000000007fffffffULL
+#define PSW_MASK_SHORT_CTRL 0xffffffff80000000ULL
#undef PSW_ASC_PRIMARY
#undef PSW_ASC_ACCREG
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index b810ad431e..ed72684911 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -89,7 +89,7 @@ hwaddr s390_cpu_get_phys_addr_debug(CPUState *cs, vaddr vaddr)
static inline bool is_special_wait_psw(uint64_t psw_addr)
{
/* signal quiesce */
- return psw_addr == 0xfffUL;
+ return (psw_addr & 0xfffUL) == 0xfffUL;
}
void s390_handle_wait(S390CPU *cpu)
diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c
index 5152e2bdf1..c4cd497f85 100644
--- a/target/s390x/kvm-stub.c
+++ b/target/s390x/kvm-stub.c
@@ -83,7 +83,15 @@ void kvm_s390_cmma_reset(void)
{
}
-void kvm_s390_reset_vcpu(S390CPU *cpu)
+void kvm_s390_reset_vcpu_initial(S390CPU *cpu)
+{
+}
+
+void kvm_s390_reset_vcpu_clear(S390CPU *cpu)
+{
+}
+
+void kvm_s390_reset_vcpu_normal(S390CPU *cpu)
{
}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 30112e529c..1d6fd6a27b 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -151,6 +151,7 @@ static int cap_s390_irq;
static int cap_ri;
static int cap_gs;
static int cap_hpage_1m;
+static int cap_vcpu_resets;
static int active_cmma;
@@ -342,6 +343,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF);
cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP);
cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ);
+ cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS);
if (!kvm_check_extension(s, KVM_CAP_S390_GMAP)
|| !kvm_check_extension(s, KVM_CAP_S390_COW)) {
@@ -406,17 +408,41 @@ int kvm_arch_destroy_vcpu(CPUState *cs)
return 0;
}
-void kvm_s390_reset_vcpu(S390CPU *cpu)
+static void kvm_s390_reset_vcpu(S390CPU *cpu, unsigned long type)
{
CPUState *cs = CPU(cpu);
- /* The initial reset call is needed here to reset in-kernel
- * vcpu data that we can't access directly from QEMU
- * (i.e. with older kernels which don't support sync_regs/ONE_REG).
- * Before this ioctl cpu_synchronize_state() is called in common kvm
- * code (kvm-all) */
- if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) {
- error_report("Initial CPU reset failed on CPU %i", cs->cpu_index);
+ /*
+ * The reset call is needed here to reset in-kernel vcpu data that
+ * we can't access directly from QEMU (i.e. with older kernels
+ * which don't support sync_regs/ONE_REG). Before this ioctl
+ * cpu_synchronize_state() is called in common kvm code
+ * (kvm-all).
+ */
+ if (kvm_vcpu_ioctl(cs, type)) {
+ error_report("CPU reset failed on CPU %i type %lx",
+ cs->cpu_index, type);
+ }
+}
+
+void kvm_s390_reset_vcpu_initial(S390CPU *cpu)
+{
+ kvm_s390_reset_vcpu(cpu, KVM_S390_INITIAL_RESET);
+}
+
+void kvm_s390_reset_vcpu_clear(S390CPU *cpu)
+{
+ if (cap_vcpu_resets) {
+ kvm_s390_reset_vcpu(cpu, KVM_S390_CLEAR_RESET);
+ } else {
+ kvm_s390_reset_vcpu(cpu, KVM_S390_INITIAL_RESET);
+ }
+}
+
+void kvm_s390_reset_vcpu_normal(S390CPU *cpu)
+{
+ if (cap_vcpu_resets) {
+ kvm_s390_reset_vcpu(cpu, KVM_S390_NORMAL_RESET);
}
}
diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h
index caf985955b..0b21789796 100644
--- a/target/s390x/kvm_s390x.h
+++ b/target/s390x/kvm_s390x.h
@@ -34,7 +34,9 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
int vq, bool assign);
int kvm_s390_cmma_active(void);
void kvm_s390_cmma_reset(void);
-void kvm_s390_reset_vcpu(S390CPU *cpu);
+void kvm_s390_reset_vcpu_clear(S390CPU *cpu);
+void kvm_s390_reset_vcpu_normal(S390CPU *cpu);
+void kvm_s390_reset_vcpu_initial(S390CPU *cpu);
int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit);
void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp);
void kvm_s390_crypto_reset(void);
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 0bd2073718..4f6f1e31cd 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3874,7 +3874,7 @@ static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o)
/* Operate. */
switch (s->fields.op2) {
- case 0x55: /* AND */
+ case 0x54: /* AND */
tcg_gen_ori_i64(o->in2, o->in2, ~mask);
tcg_gen_and_i64(o->out, o->out, o->in2);
break;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 7d782002e3..e2e25ebf7d 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2794,13 +2794,26 @@ static void tcg_gen_req_mo(TCGBar type)
}
}
+static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
+{
+#ifdef CONFIG_PLUGIN
+ if (tcg_ctx->plugin_insn != NULL) {
+ /* Save a copy of the vaddr for use after a load. */
+ TCGv temp = tcg_temp_new();
+ tcg_gen_mov_tl(temp, vaddr);
+ return temp;
+ }
+#endif
+ return vaddr;
+}
+
static inline void plugin_gen_mem_callbacks(TCGv vaddr, uint16_t info)
{
#ifdef CONFIG_PLUGIN
- if (tcg_ctx->plugin_insn == NULL) {
- return;
+ if (tcg_ctx->plugin_insn != NULL) {
+ plugin_gen_empty_mem_callback(vaddr, info);
+ tcg_temp_free(vaddr);
}
- plugin_gen_empty_mem_callback(vaddr, info);
#endif
}
@@ -2822,6 +2835,7 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
}
}
+ addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
plugin_gen_mem_callbacks(addr, info);
@@ -2868,6 +2882,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
memop &= ~MO_BSWAP;
}
+ addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
plugin_gen_mem_callbacks(addr, info);
@@ -2905,6 +2920,7 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
}
}
+ addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
plugin_gen_mem_callbacks(addr, info);
@@ -2967,6 +2983,7 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
memop &= ~MO_BSWAP;
}
+ addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
plugin_gen_mem_callbacks(addr, info);
diff --git a/tests/data/acpi/rebuild-expected-aml.sh b/tests/data/acpi/rebuild-expected-aml.sh
index d44e511533..9cbaab1a4d 100755
--- a/tests/data/acpi/rebuild-expected-aml.sh
+++ b/tests/data/acpi/rebuild-expected-aml.sh
@@ -31,6 +31,13 @@ done
eval `grep SRC_PATH= config-host.mak`
+old_allowed_dif=`grep -v -e 'List of comma-separated changed AML files to ignore' ${SRC_PATH}/tests/qtest/bios-tables-test-allowed-diff.h`
+
echo '/* List of comma-separated changed AML files to ignore */' > ${SRC_PATH}/tests/qtest/bios-tables-test-allowed-diff.h
echo "The files were rebuilt and can be added to git."
+
+if [ -z "$old_allowed_dif" ]; then
+ echo "Note! Please do not commit expected files with source changes"
+ echo "Note! Please follow the process documented in ${SRC_PATH}/tests/qtest/bios-tables-test.c"
+fi
diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c
index f30bea08dc..df19fd359d 100644
--- a/tests/plugin/bb.c
+++ b/tests/plugin/bb.c
@@ -22,9 +22,9 @@ static bool do_inline;
static void plugin_exit(qemu_plugin_id_t id, void *p)
{
- g_autofree gchar *out;
- out = g_strdup_printf("bb's: %" PRIu64", insns: %" PRIu64 "\n",
- bb_count, insn_count);
+ g_autofree gchar *out = g_strdup_printf(
+ "bb's: %" PRIu64", insns: %" PRIu64 "\n",
+ bb_count, insn_count);
qemu_plugin_outs(out);
}
diff --git a/tests/plugin/howvec.c b/tests/plugin/howvec.c
index 4ca555e123..3b9a6939f2 100644
--- a/tests/plugin/howvec.c
+++ b/tests/plugin/howvec.c
@@ -163,6 +163,13 @@ static gint cmp_exec_count(gconstpointer a, gconstpointer b)
return ea->count > eb->count ? -1 : 1;
}
+static void free_record(gpointer data)
+{
+ InsnExecCount *rec = (InsnExecCount *) data;
+ g_free(rec->insn);
+ g_free(rec);
+}
+
static void plugin_exit(qemu_plugin_id_t id, void *p)
{
g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
@@ -195,30 +202,31 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
counts = g_hash_table_get_values(insns);
if (counts && g_list_next(counts)) {
- GList *it;
-
g_string_append_printf(report,"Individual Instructions:\n");
+ counts = g_list_sort(counts, cmp_exec_count);
- it = g_list_sort(counts, cmp_exec_count);
-
- for (i = 0; i < limit && it->next; i++, it = it->next) {
- InsnExecCount *rec = (InsnExecCount *) it->data;
- g_string_append_printf(report, "Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n",
+ for (i = 0; i < limit && g_list_next(counts);
+ i++, counts = g_list_next(counts)) {
+ InsnExecCount *rec = (InsnExecCount *) counts->data;
+ g_string_append_printf(report,
+ "Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n",
rec->insn,
rec->count,
rec->opcode,
rec->class ?
rec->class->class : "un-categorised");
}
- g_list_free(it);
+ g_list_free(counts);
}
+ g_hash_table_destroy(insns);
+
qemu_plugin_outs(report->str);
}
static void plugin_init(void)
{
- insns = g_hash_table_new(NULL, g_direct_equal);
+ insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
}
static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c
index 0a8f5a0000..a9a6e41237 100644
--- a/tests/plugin/insn.c
+++ b/tests/plugin/insn.c
@@ -44,8 +44,7 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
static void plugin_exit(qemu_plugin_id_t id, void *p)
{
- g_autofree gchar *out;
- out = g_strdup_printf("insns: %" PRIu64 "\n", insn_count);
+ g_autofree gchar *out = g_strdup_printf("insns: %" PRIu64 "\n", insn_count);
qemu_plugin_outs(out);
}
diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214
index 3500e0c47a..af677d90b8 100755
--- a/tests/qemu-iotests/214
+++ b/tests/qemu-iotests/214
@@ -125,9 +125,9 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\
sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" |
sed -n '/"actual-size":/ s/[^0-9]//gp')
-if [ $sizeA -le $sizeB ]
+if [ $sizeA -lt $sizeB ]
then
- echo "Compression ERROR"
+ echo "Compression ERROR ($sizeA < $sizeB)"
fi
$QEMU_IMG check --output=json "$TEST_IMG" |
diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index b4752c644c..0a597bbacf 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -426,7 +426,9 @@ static void test_acpi_asl(test_data *data)
fprintf(stderr,
"acpi-test: Warning! %.4s binary file mismatch. "
- "Actual [aml:%s], Expected [aml:%s].\n",
+ "Actual [aml:%s], Expected [aml:%s].\n"
+ "See source file tests/qtest/bios-tables-test.c "
+ "for instructions on how to update expected files.\n",
exp_sdt->aml, sdt->aml_file, exp_sdt->aml_file);
all_tables_match = all_tables_match &&
@@ -461,21 +463,20 @@ static void test_acpi_asl(test_data *data)
"Actual [asl:%s, aml:%s], Expected [asl:%s, aml:%s].\n",
exp_sdt->aml, sdt->asl_file, sdt->aml_file,
exp_sdt->asl_file, exp_sdt->aml_file);
+ fflush(stderr);
if (getenv("V")) {
- const char *diff_cmd = getenv("DIFF");
- if (diff_cmd) {
- int ret G_GNUC_UNUSED;
- char *diff = g_strdup_printf("%s %s %s", diff_cmd,
- exp_sdt->asl_file, sdt->asl_file);
- ret = system(diff) ;
- g_free(diff);
- } else {
- fprintf(stderr, "acpi-test: Warning. not showing "
- "difference since no diff utility is specified. "
- "Set 'DIFF' environment variable to a preferred "
- "diff utility and run 'make V=1 check' again to "
- "see ASL difference.");
- }
+ const char *diff_env = getenv("DIFF");
+ const char *diff_cmd = diff_env ? diff_env : "diff -u";
+ char *diff = g_strdup_printf("%s %s %s", diff_cmd,
+ exp_sdt->asl_file, sdt->asl_file);
+ int out = dup(STDOUT_FILENO);
+ int ret G_GNUC_UNUSED;
+
+ dup2(STDERR_FILENO, STDOUT_FILENO);
+ ret = system(diff) ;
+ dup2(out, STDOUT_FILENO);
+ close(out);
+ g_free(diff);
}
}
}
diff --git a/tests/rcutorture.c b/tests/rcutorture.c
index 49311c82ea..732f03abda 100644
--- a/tests/rcutorture.c
+++ b/tests/rcutorture.c
@@ -65,8 +65,6 @@
#include "qemu/rcu.h"
#include "qemu/thread.h"
-long long n_reads = 0LL;
-long n_updates = 0L;
int nthreadsrunning;
#define GOFLAG_INIT 0
@@ -78,11 +76,20 @@ static volatile int goflag = GOFLAG_INIT;
#define RCU_READ_RUN 1000
#define NR_THREADS 100
-static QemuMutex counts_mutex;
static QemuThread threads[NR_THREADS];
static struct rcu_reader_data *data[NR_THREADS];
static int n_threads;
+/*
+ * Statistical counts
+ *
+ * These are the sum of local counters at the end of a run.
+ * Updates are protected by a mutex.
+ */
+static QemuMutex counts_mutex;
+long long n_reads = 0LL;
+long n_updates = 0L;
+
static void create_thread(void *(*func)(void *))
{
if (n_threads >= NR_THREADS) {
@@ -223,15 +230,15 @@ static void uperftest(int nupdaters, int duration)
#define RCU_STRESS_PIPE_LEN 10
struct rcu_stress {
- int pipe_count;
+ int age; /* how many update cycles while not rcu_stress_current */
int mbtest;
};
struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0 } };
struct rcu_stress *rcu_stress_current;
-int rcu_stress_idx;
-
int n_mberror;
+
+/* Updates protected by counts_mutex */
long long rcu_stress_count[RCU_STRESS_PIPE_LEN + 1];
@@ -253,7 +260,7 @@ static void *rcu_read_stress_test(void *arg)
while (goflag == GOFLAG_RUN) {
rcu_read_lock();
p = atomic_rcu_read(&rcu_stress_current);
- if (p->mbtest == 0) {
+ if (atomic_read(&p->mbtest) == 0) {
n_mberror++;
}
rcu_read_lock();
@@ -261,7 +268,7 @@ static void *rcu_read_stress_test(void *arg)
garbage++;
}
rcu_read_unlock();
- pc = p->pipe_count;
+ pc = atomic_read(&p->age);
rcu_read_unlock();
if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0)) {
pc = RCU_STRESS_PIPE_LEN;
@@ -280,32 +287,52 @@ static void *rcu_read_stress_test(void *arg)
return NULL;
}
+/*
+ * Stress Test Updater
+ *
+ * The updater cycles around updating rcu_stress_current to point at
+ * one of the rcu_stress_array_entries and resets it's age. It
+ * then increments the age of all the other entries. The age
+ * will be read under an rcu_read_lock() and distribution of values
+ * calculated. The final result gives an indication of how many
+ * previously current rcu_stress entries are in flight until the RCU
+ * cycle complete.
+ */
static void *rcu_update_stress_test(void *arg)
{
- int i;
- struct rcu_stress *p;
+ int i, rcu_stress_idx = 0;
+ struct rcu_stress *cp = atomic_read(&rcu_stress_current);
rcu_register_thread();
-
*(struct rcu_reader_data **)arg = &rcu_reader;
+
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
+
while (goflag == GOFLAG_RUN) {
- i = rcu_stress_idx + 1;
- if (i >= RCU_STRESS_PIPE_LEN) {
- i = 0;
+ struct rcu_stress *p;
+ rcu_stress_idx++;
+ if (rcu_stress_idx >= RCU_STRESS_PIPE_LEN) {
+ rcu_stress_idx = 0;
}
- p = &rcu_stress_array[i];
- p->mbtest = 0;
+ p = &rcu_stress_array[rcu_stress_idx];
+ /* catching up with ourselves would be a bug */
+ assert(p != cp);
+ atomic_set(&p->mbtest, 0);
smp_mb();
- p->pipe_count = 0;
- p->mbtest = 1;
+ atomic_set(&p->age, 0);
+ atomic_set(&p->mbtest, 1);
atomic_rcu_set(&rcu_stress_current, p);
- rcu_stress_idx = i;
+ cp = p;
+ /*
+ * New RCU structure is now live, update pipe counts on old
+ * ones.
+ */
for (i = 0; i < RCU_STRESS_PIPE_LEN; i++) {
if (i != rcu_stress_idx) {
- rcu_stress_array[i].pipe_count++;
+ atomic_set(&rcu_stress_array[i].age,
+ rcu_stress_array[i].age + 1);
}
}
synchronize_rcu();
@@ -338,7 +365,7 @@ static void stresstest(int nreaders, int duration)
int i;
rcu_stress_current = &rcu_stress_array[0];
- rcu_stress_current->pipe_count = 0;
+ rcu_stress_current->age = 0;
rcu_stress_current->mbtest = 1;
for (i = 0; i < nreaders; i++) {
create_thread(rcu_read_stress_test);
@@ -368,7 +395,7 @@ static void gtest_stress(int nreaders, int duration)
int i;
rcu_stress_current = &rcu_stress_array[0];
- rcu_stress_current->pipe_count = 0;
+ rcu_stress_current->age = 0;
rcu_stress_current->mbtest = 1;
for (i = 0; i < nreaders; i++) {
create_thread(rcu_read_stress_test);
@@ -413,7 +440,8 @@ static void gtest_stress_10_5(void)
static void usage(int argc, char *argv[])
{
- fprintf(stderr, "Usage: %s [nreaders [ perf | stress ] ]\n", argv[0]);
+ fprintf(stderr, "Usage: %s [nreaders [ [r|u]perf | stress [duration]]\n",
+ argv[0]);
exit(-1);
}
diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
index 3c7421a356..b3cff3cad1 100644
--- a/tests/tcg/Makefile.target
+++ b/tests/tcg/Makefile.target
@@ -79,7 +79,7 @@ QEMU_OPTS=
# If TCG debugging is enabled things are a lot slower
ifeq ($(CONFIG_DEBUG_TCG),y)
-TIMEOUT=45
+TIMEOUT=60
else
TIMEOUT=15
endif
@@ -137,7 +137,7 @@ PLUGINS=$(notdir $(wildcard $(PLUGIN_DIR)/*.so))
$(foreach p,$(PLUGINS), \
$(foreach t,$(TESTS),\
$(eval run-plugin-$(t)-with-$(p): $t $p) \
- $(eval run-plugin-$(t)-with-$(p): TIMEOUT=30) \
+ $(eval run-plugin-$(t)-with-$(p): TIMEOUT=60) \
$(eval RUN_TESTS+=run-plugin-$(t)-with-$(p))))
endif
diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
index d2299b98b7..71f72cfbe3 100644
--- a/tests/tcg/aarch64/Makefile.softmmu-target
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -70,4 +70,6 @@ pauth-3:
$(call skip-test, "BUILD of $@", "missing compiler support")
run-pauth-3:
$(call skip-test, "RUN of pauth-3", "not built")
+run-plugin-pauth-3-with-%:
+ $(call skip-test, "RUN of pauth-3 ($*)", "not built")
endif
diff --git a/tests/tcg/aarch64/pauth-4.c b/tests/tcg/aarch64/pauth-4.c
index 1040e92aec..24a639e36c 100644
--- a/tests/tcg/aarch64/pauth-4.c
+++ b/tests/tcg/aarch64/pauth-4.c
@@ -1,25 +1,45 @@
#include <stdint.h>
#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define TESTS 1000
int main()
{
- uintptr_t x, y;
+ int i, count = 0;
+ float perc;
+ void *base = malloc(TESTS);
+
+ for (i = 0; i < TESTS; i++) {
+ uintptr_t in, x, y;
+
+ in = i + (uintptr_t) base;
+
+ asm("mov %0, %[in]\n\t"
+ "pacia %0, sp\n\t" /* sigill if pauth not supported */
+ "eor %0, %0, #4\n\t" /* corrupt single bit */
+ "mov %1, %0\n\t"
+ "autia %1, sp\n\t" /* validate corrupted pointer */
+ "xpaci %0\n\t" /* strip pac from corrupted pointer */
+ : /* out */ "=r"(x), "=r"(y)
+ : /* in */ [in] "r" (in)
+ : /* clobbers */);
- asm("mov %0, lr\n\t"
- "pacia %0, sp\n\t" /* sigill if pauth not supported */
- "eor %0, %0, #4\n\t" /* corrupt single bit */
- "mov %1, %0\n\t"
- "autia %1, sp\n\t" /* validate corrupted pointer */
- "xpaci %0\n\t" /* strip pac from corrupted pointer */
- : "=r"(x), "=r"(y));
+ /*
+ * Once stripped, the corrupted pointer is of the form 0x0000...wxyz.
+ * We expect the autia to indicate failure, producing a pointer of the
+ * form 0x000e....wxyz. Use xpaci and != for the test, rather than
+ * extracting explicit bits from the top, because the location of the
+ * error code "e" depends on the configuration of virtual memory.
+ */
+ if (x != y) {
+ count++;
+ }
- /*
- * Once stripped, the corrupted pointer is of the form 0x0000...wxyz.
- * We expect the autia to indicate failure, producing a pointer of the
- * form 0x000e....wxyz. Use xpaci and != for the test, rather than
- * extracting explicit bits from the top, because the location of the
- * error code "e" depends on the configuration of virtual memory.
- */
- assert(x != y);
- return 0;
+ }
+ perc = (float) count / (float) TESTS;
+ printf("Checks Passed: %0.2f%%", perc * 100.0);
+ assert(perc > 0.95);
+ return 0;
}
diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh
index 9eb6ba3b7e..eaaaff6233 100755
--- a/tests/tcg/configure.sh
+++ b/tests/tcg/configure.sh
@@ -228,7 +228,7 @@ for target in $target_list; do
echo "CROSS_CC_HAS_SVE=y" >> $config_target_mak
fi
if do_compiler "$target_compiler" $target_compiler_cflags \
- -march=-march=armv8.3-a -o $TMPE $TMPC; then
+ -march=armv8.3-a -o $TMPE $TMPC; then
echo "CROSS_CC_HAS_ARMV8_3=y" >> $config_target_mak
fi
;;
diff --git a/trace/control.c b/trace/control.c
index 6c775e68eb..2ffe000818 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -226,10 +226,15 @@ void trace_init_file(const char *file)
#ifdef CONFIG_TRACE_SIMPLE
st_set_trace_file(file);
#elif defined CONFIG_TRACE_LOG
- /* If both the simple and the log backends are enabled, "--trace file"
- * only applies to the simple backend; use "-D" for the log backend.
+ /*
+ * If both the simple and the log backends are enabled, "--trace file"
+ * only applies to the simple backend; use "-D" for the log
+ * backend. However we should only override -D if we actually have
+ * something to override it with.
*/
- qemu_set_log_filename(file, &error_fatal);
+ if (file) {
+ qemu_set_log_filename(file, &error_fatal);
+ }
#else
if (file) {
fprintf(stderr, "error: --trace file=...: "