diff options
268 files changed, 10934 insertions, 5215 deletions
diff --git a/.gitlab-ci.d/cirrus/freebsd-12.vars b/.gitlab-ci.d/cirrus/freebsd-12.vars index c3db1d7d30..e3fc3235b9 100644 --- a/.gitlab-ci.d/cirrus/freebsd-12.vars +++ b/.gitlab-ci.d/cirrus/freebsd-12.vars @@ -11,6 +11,6 @@ MAKE='/usr/local/bin/gmake' NINJA='/usr/local/bin/ninja' PACKAGING_COMMAND='pkg' PIP3='/usr/local/bin/pip-3.8' -PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson ncurses nettle ninja opencv perl5 pixman pkgconf png py39-numpy py39-pillow py39-pip py39-sphinx py39-sphinx_rtd_theme py39-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd' +PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson ncurses nettle ninja opencv perl5 pixman pkgconf png py39-numpy py39-pillow py39-pip py39-sphinx py39-sphinx_rtd_theme py39-yaml python3 rpm2cpio sdl2 sdl2_image snappy sndio spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd' PYPI_PKGS='' PYTHON='/usr/local/bin/python3' diff --git a/.gitlab-ci.d/cirrus/freebsd-13.vars b/.gitlab-ci.d/cirrus/freebsd-13.vars index d31faa787f..9f56babd9c 100644 --- a/.gitlab-ci.d/cirrus/freebsd-13.vars +++ b/.gitlab-ci.d/cirrus/freebsd-13.vars @@ -11,6 +11,6 @@ MAKE='/usr/local/bin/gmake' NINJA='/usr/local/bin/ninja' PACKAGING_COMMAND='pkg' PIP3='/usr/local/bin/pip-3.8' -PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson ncurses nettle ninja opencv perl5 pixman pkgconf png py39-numpy py39-pillow py39-pip py39-sphinx py39-sphinx_rtd_theme py39-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd' +PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson ncurses nettle ninja opencv perl5 pixman pkgconf png py39-numpy py39-pillow py39-pip py39-sphinx py39-sphinx_rtd_theme py39-yaml python3 rpm2cpio sdl2 sdl2_image snappy sndio spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd' PYPI_PKGS='' PYTHON='/usr/local/bin/python3' diff --git a/MAINTAINERS b/MAINTAINERS index 32e495e165..c41d8d65e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -84,7 +84,6 @@ T: git https://github.com/vivier/qemu.git trivial-patches Architecture support -------------------- S390 general architecture support -M: Cornelia Huck <cohuck@redhat.com> M: Thomas Huth <thuth@redhat.com> S: Supported F: configs/devices/s390x-softmmu/default.mak @@ -106,7 +105,6 @@ F: docs/system/target-s390x.rst F: docs/system/s390x/ F: tests/migration/s390x/ K: ^Subject:.*(?i)s390x? -T: git https://gitlab.com/cohuck/qemu.git s390-next L: qemu-s390x@nongnu.org MIPS general architecture support @@ -305,6 +303,7 @@ F: target/rx/ S390 TCG CPUs M: Richard Henderson <richard.henderson@linaro.org> M: David Hildenbrand <david@redhat.com> +R: Ilya Leoshkevich <iii@linux.ibm.com> S: Maintained F: target/s390x/ F: target/s390x/tcg @@ -2509,7 +2508,10 @@ S: Supported F: block* F: block/ F: hw/block/ +F: qapi/block*.json +F: qapi/transaction.json F: include/block/ +F: include/sysemu/block-*.h F: qemu-img* F: docs/tools/qemu-img.rst F: qemu-io* @@ -2582,16 +2584,6 @@ F: include/qemu/co-shared-resource.h T: git https://gitlab.com/jsnow/qemu.git jobs T: git https://gitlab.com/vsementsov/qemu.git block -Block QAPI, monitor, command line -M: Markus Armbruster <armbru@redhat.com> -S: Supported -F: blockdev.c -F: blockdev-hmp-cmds.c -F: block/qapi.c -F: qapi/block*.json -F: qapi/transaction.json -T: git https://repo.or.cz/qemu/armbru.git block-next - Compute Express Link M: Ben Widawsky <ben.widawsky@intel.com> M: Jonathan Cameron <jonathan.cameron@huawei.com> @@ -3415,6 +3407,12 @@ L: qemu-block@nongnu.org S: Maintained F: block/vdi.c +blkio +M: Stefan Hajnoczi <stefanha@redhat.com> +L: qemu-block@nongnu.org +S: Maintained +F: block/blkio.c + iSCSI M: Ronnie Sahlberg <ronniesahlberg@gmail.com> M: Paolo Bonzini <pbonzini@redhat.com> diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c index 10429fdfb2..d6a1b8d0a2 100644 --- a/accel/dummy-cpus.c +++ b/accel/dummy-cpus.c @@ -21,8 +21,6 @@ static void *dummy_cpu_thread_fn(void *arg) { CPUState *cpu = arg; - sigset_t waitset; - int r; rcu_register_thread(); @@ -32,8 +30,13 @@ static void *dummy_cpu_thread_fn(void *arg) cpu->can_do_io = 1; current_cpu = cpu; +#ifndef _WIN32 + sigset_t waitset; + int r; + sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); +#endif /* signal CPU creation */ cpu_thread_signal_created(cpu); @@ -41,6 +44,7 @@ static void *dummy_cpu_thread_fn(void *arg) do { qemu_mutex_unlock_iothread(); +#ifndef _WIN32 do { int sig; r = sigwait(&waitset, &sig); @@ -49,6 +53,9 @@ static void *dummy_cpu_thread_fn(void *arg) perror("sigwait"); exit(1); } +#else + qemu_sem_wait(&cpu->sem); +#endif qemu_mutex_lock_iothread(); qemu_wait_io_event(cpu); } while (!cpu->unplug); @@ -69,4 +76,7 @@ void dummy_start_vcpu_thread(CPUState *cpu) cpu->cpu_index); qemu_thread_create(cpu->thread, thread_name, dummy_cpu_thread_fn, cpu, QEMU_THREAD_JOINABLE); +#ifdef _WIN32 + qemu_sem_init(&cpu->sem, 0); +#endif } diff --git a/accel/meson.build b/accel/meson.build index b9a963cf80..259c35c4c8 100644 --- a/accel/meson.build +++ b/accel/meson.build @@ -16,5 +16,5 @@ dummy_ss.add(files( 'dummy-cpus.c', )) -specific_ss.add_all(when: ['CONFIG_SOFTMMU', 'CONFIG_POSIX'], if_true: dummy_ss) +specific_ss.add_all(when: ['CONFIG_SOFTMMU'], if_true: dummy_ss) specific_ss.add_all(when: ['CONFIG_XEN'], if_true: dummy_ss) diff --git a/accel/qtest/meson.build b/accel/qtest/meson.build index 4c65600293..176d990ae1 100644 --- a/accel/qtest/meson.build +++ b/accel/qtest/meson.build @@ -1,2 +1 @@ -qtest_module_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_POSIX'], - if_true: files('qtest.c')) +qtest_module_ss.add(when: ['CONFIG_SOFTMMU'], if_true: files('qtest.c')) diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c index 5443a59153..f9c5867e38 100644 --- a/backends/cryptodev-vhost-user.c +++ b/backends/cryptodev-vhost-user.c @@ -339,7 +339,7 @@ static void cryptodev_vhost_user_set_chardev(Object *obj, CRYPTODEV_BACKEND_VHOST_USER(obj); if (s->opened) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property 'chardev' can no longer be set"); } else { g_free(s->chr_name); s->chr_name = g_strdup(value); diff --git a/backends/hostmem.c b/backends/hostmem.c index 4428e06738..8640294c10 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -232,7 +232,8 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, void *ptr = memory_region_get_ram_ptr(&backend->mr); uint64_t sz = memory_region_size(&backend->mr); - os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err); + qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, + backend->prealloc_context, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -383,8 +384,9 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) * specified NUMA policy in place. */ if (backend->prealloc) { - os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, - backend->prealloc_threads, &local_err); + qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz, + backend->prealloc_threads, + backend->prealloc_context, &local_err); if (local_err) { goto out; } @@ -492,6 +494,11 @@ host_memory_backend_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, "prealloc-threads", "Number of CPU threads to use for prealloc"); + object_class_property_add_link(oc, "prealloc-context", + TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context), + object_property_allow_set_link, OBJ_PROP_LINK_STRONG); + object_class_property_set_description(oc, "prealloc-context", + "Context to use for creating CPU threads for preallocation"); object_class_property_add(oc, "size", "int", host_memory_backend_get_size, host_memory_backend_set_size, diff --git a/backends/rng-egd.c b/backends/rng-egd.c index 4de142b9dc..684c3cf3d6 100644 --- a/backends/rng-egd.c +++ b/backends/rng-egd.c @@ -116,7 +116,7 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp) RngEgd *s = RNG_EGD(b); if (b->opened) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property 'chardev' can no longer be set"); } else { g_free(s->chr_name); s->chr_name = g_strdup(value); diff --git a/backends/rng-random.c b/backends/rng-random.c index 7add272edd..80eb5be138 100644 --- a/backends/rng-random.c +++ b/backends/rng-random.c @@ -96,7 +96,7 @@ static void rng_random_set_filename(Object *obj, const char *filename, RngRandom *s = RNG_RANDOM(obj); if (b->opened) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property 'filename' can no longer be set"); return; } diff --git a/backends/vhost-user.c b/backends/vhost-user.c index 10b39992d2..5dedb2d987 100644 --- a/backends/vhost-user.c +++ b/backends/vhost-user.c @@ -141,7 +141,7 @@ static void set_chardev(Object *obj, const char *value, Error **errp) Chardev *chr; if (b->completed) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property 'chardev' can no longer be set"); return; } @@ -90,13 +90,9 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_child_free(BdrvChild *child); -static void bdrv_replace_child_noperm(BdrvChild **child, - BlockDriverState *new_bs, - bool free_empty_child); -static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, - BdrvChild *child, - Transaction *tran); +static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs); +static void bdrv_remove_child(BdrvChild *child, Transaction *tran); static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, Transaction *tran); @@ -108,6 +104,10 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state); static bool bdrv_backing_overridden(BlockDriverState *bs); +static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); + /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -643,7 +643,7 @@ create_file_fallback_zero_first_sector(BlockBackend *blk, bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); if (bytes_to_clear) { - ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); + ret = blk_co_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to clear the new image's first sector"); @@ -861,38 +861,42 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) /* * Create a uniquely-named empty temporary file. - * Return 0 upon success, otherwise a negative errno value. + * Return the actual file name used upon success, otherwise NULL. + * This string should be freed with g_free() when not needed any longer. + * + * Note: creating a temporary file for the caller to (re)open is + * inherently racy. Use g_file_open_tmp() instead whenever practical. */ -int get_tmp_filename(char *filename, int size) +char *create_tmp_file(Error **errp) { -#ifdef _WIN32 - char temp_dir[MAX_PATH]; - /* GetTempFileName requires that its output buffer (4th param) - have length MAX_PATH or greater. */ - assert(size >= MAX_PATH); - return (GetTempPath(MAX_PATH, temp_dir) - && GetTempFileName(temp_dir, "qem", 0, filename) - ? 0 : -GetLastError()); -#else int fd; const char *tmpdir; - tmpdir = getenv("TMPDIR"); - if (!tmpdir) { + g_autofree char *filename = NULL; + + tmpdir = g_get_tmp_dir(); +#ifndef _WIN32 + /* + * See commit 69bef79 ("block: use /var/tmp instead of /tmp for -snapshot") + * + * This function is used to create temporary disk images (like -snapshot), + * so the files can become very large. /tmp is often a tmpfs where as + * /var/tmp is usually on a disk, so more appropriate for disk images. + */ + if (!g_strcmp0(tmpdir, "/tmp")) { tmpdir = "/var/tmp"; } - if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { - return -EOVERFLOW; - } - fd = mkstemp(filename); +#endif + + filename = g_strdup_printf("%s/vl.XXXXXX", tmpdir); + fd = g_mkstemp(filename); if (fd < 0) { - return -errno; - } - if (close(fd) != 0) { - unlink(filename); - return -errno; + error_setg_errno(errp, errno, "Could not open temporary file '%s'", + filename); + return NULL; } - return 0; -#endif + close(fd); + + return g_steal_pointer(&filename); } /* @@ -1238,18 +1242,12 @@ static int bdrv_child_cb_inactivate(BdrvChild *child) return 0; } -static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp) -{ - BlockDriverState *bs = child->opaque; - return bdrv_can_set_aio_context(bs, ctx, ignore, errp); -} - -static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore) +static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { BlockDriverState *bs = child->opaque; - return bdrv_set_aio_context_ignore(bs, ctx, ignore); + return bdrv_change_aio_context(bs, ctx, visited, tran, errp); } /* @@ -1443,9 +1441,39 @@ static void bdrv_child_cb_attach(BdrvChild *child) assert_bdrv_graph_writable(bs); QLIST_INSERT_HEAD(&bs->children, child, next); - - if (child->role & BDRV_CHILD_COW) { + if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) { + /* + * Here we handle filters and block/raw-format.c when it behave like + * filter. They generally have a single PRIMARY child, which is also the + * FILTERED child, and that they may have multiple more children, which + * are neither PRIMARY nor FILTERED. And never we have a COW child here. + * So bs->file will be the PRIMARY child, unless the PRIMARY child goes + * into bs->backing on exceptional cases; and bs->backing will be + * nothing else. + */ + assert(!(child->role & BDRV_CHILD_COW)); + if (child->role & BDRV_CHILD_PRIMARY) { + assert(child->role & BDRV_CHILD_FILTERED); + assert(!bs->backing); + assert(!bs->file); + + if (bs->drv->filtered_child_is_backing) { + bs->backing = child; + } else { + bs->file = child; + } + } else { + assert(!(child->role & BDRV_CHILD_FILTERED)); + } + } else if (child->role & BDRV_CHILD_COW) { + assert(bs->drv->supports_backing); + assert(!(child->role & BDRV_CHILD_PRIMARY)); + assert(!bs->backing); + bs->backing = child; bdrv_backing_attach(child); + } else if (child->role & BDRV_CHILD_PRIMARY) { + assert(!bs->file); + bs->file = child; } bdrv_apply_subtree_drain(child, bs); @@ -1463,6 +1491,12 @@ static void bdrv_child_cb_detach(BdrvChild *child) assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); + if (child == bs->backing) { + assert(child != bs->file); + bs->backing = NULL; + } else if (child == bs->file) { + bs->file = NULL; + } } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -1492,8 +1526,7 @@ const BdrvChildClass child_of_bds = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, - .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, - .set_aio_ctx = bdrv_child_cb_set_aio_ctx, + .change_aio_ctx = bdrv_child_cb_change_aio_ctx, .update_filename = bdrv_child_cb_update_filename, .get_parent_aio_context = child_of_bds_get_parent_aio_context, }; @@ -1641,6 +1674,20 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, goto open_failed; } + assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK)); + assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK)); + + /* + * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves + * drivers that pass read/write requests through to a child the trouble of + * declaring support explicitly. + * + * Drivers must not propagate this flag accidentally when they initiate I/O + * to a bounce buffer. That case should be rare though. + */ + bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; + bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; + ret = refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { error_setg_errno(errp, -ret, "Could not refresh total sector count"); @@ -1668,7 +1715,7 @@ open_failed: bs->drv = NULL; if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); - bs->file = NULL; + assert(!bs->file); } g_free(bs->opaque); bs->opaque = NULL; @@ -2339,9 +2386,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, typedef struct BdrvReplaceChildState { BdrvChild *child; - BdrvChild **childp; BlockDriverState *old_bs; - bool free_empty_child; } BdrvReplaceChildState; static void bdrv_replace_child_commit(void *opaque) @@ -2349,9 +2394,6 @@ static void bdrv_replace_child_commit(void *opaque) BdrvReplaceChildState *s = opaque; GLOBAL_STATE_CODE(); - if (s->free_empty_child && !s->child->bs) { - bdrv_child_free(s->child); - } bdrv_unref(s->old_bs); } @@ -2361,34 +2403,8 @@ static void bdrv_replace_child_abort(void *opaque) BlockDriverState *new_bs = s->child->bs; GLOBAL_STATE_CODE(); - /* - * old_bs reference is transparently moved from @s to s->child. - * - * Pass &s->child here instead of s->childp, because: - * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not - * modify the BdrvChild * pointer we indirectly pass to it, i.e. it - * will not modify s->child. From that perspective, it does not matter - * whether we pass s->childp or &s->child. - * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use - * it here. - * (3) If new_bs is NULL, *s->childp will have been NULLed by - * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we - * must not pass a NULL *s->childp here. - * - * So whether new_bs was NULL or not, we cannot pass s->childp here; and in - * any case, there is no reason to pass it anyway. - */ - bdrv_replace_child_noperm(&s->child, s->old_bs, true); - /* - * The child was pre-existing, so s->old_bs must be non-NULL, and - * s->child thus must not have been freed - */ - assert(s->child != NULL); - if (!new_bs) { - /* As described above, *s->childp was cleared, so restore it */ - assert(s->childp != NULL); - *s->childp = s->child; - } + /* old_bs reference is transparently moved from @s to @s->child */ + bdrv_replace_child_noperm(s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2404,46 +2420,22 @@ static TransactionActionDrv bdrv_replace_child_drv = { * Note: real unref of old_bs is done only on commit. * * The function doesn't update permissions, caller is responsible for this. - * - * (*childp)->bs must not be NULL. - * - * Note that if new_bs == NULL, @childp is stored in a state object attached - * to @tran, so that the old child can be reinstated in the abort handler. - * Therefore, if @new_bs can be NULL, @childp must stay valid until the - * transaction is committed or aborted. - * - * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is - * freed (on commit). @free_empty_child should only be false if the - * caller will free the BDrvChild themselves (which may be important - * if this is in turn called in another transactional context). */ -static void bdrv_replace_child_tran(BdrvChild **childp, - BlockDriverState *new_bs, - Transaction *tran, - bool free_empty_child) +static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, + Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { - .child = *childp, - .childp = new_bs == NULL ? childp : NULL, - .old_bs = (*childp)->bs, - .free_empty_child = free_empty_child, + .child = child, + .old_bs = child->bs, }; tran_add(tran, &bdrv_replace_child_drv, s); - /* The abort handler relies on this */ - assert(s->old_bs != NULL); - if (new_bs) { bdrv_ref(new_bs); } - /* - * Pass free_empty_child=false, we will free the child (if - * necessary) in bdrv_replace_child_commit() (if our - * @free_empty_child parameter was true). - */ - bdrv_replace_child_noperm(childp, new_bs, false); - /* old_bs reference is transparently moved from *childp to @s */ + bdrv_replace_child_noperm(child, new_bs); + /* old_bs reference is transparently moved from @child to @s */ } /* @@ -2824,24 +2816,9 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -/** - * Replace (*childp)->bs by @new_bs. - * - * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents - * generally cannot handle a BdrvChild with .bs == NULL, so clearing - * BdrvChild.bs should generally immediately be followed by the - * BdrvChild pointer being cleared as well. - * - * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is - * freed. @free_empty_child should only be false if the caller will - * free the BdrvChild themselves (this may be important in a - * transactional context, where it may only be freed on commit). - */ -static void bdrv_replace_child_noperm(BdrvChild **childp, - BlockDriverState *new_bs, - bool free_empty_child) +static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs) { - BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; @@ -2878,9 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, } child->bs = new_bs; - if (!new_bs) { - *childp = NULL; - } if (new_bs) { assert_bdrv_graph_writable(new_bs); @@ -2911,10 +2885,6 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, bdrv_parent_drained_end_single(child); drain_saldo++; } - - if (free_empty_child && !child->bs) { - bdrv_child_free(child); - } } /** @@ -2934,7 +2904,7 @@ static void bdrv_child_free(BdrvChild *child) } typedef struct BdrvAttachChildCommonState { - BdrvChild **child; + BdrvChild *child; AioContext *old_parent_ctx; AioContext *old_child_ctx; } BdrvAttachChildCommonState; @@ -2942,39 +2912,35 @@ typedef struct BdrvAttachChildCommonState { static void bdrv_attach_child_common_abort(void *opaque) { BdrvAttachChildCommonState *s = opaque; - BdrvChild *child = *s->child; - BlockDriverState *bs = child->bs; + BlockDriverState *bs = s->child->bs; GLOBAL_STATE_CODE(); - /* - * Pass free_empty_child=false, because we still need the child - * for the AioContext operations on the parent below; those - * BdrvChildClass methods all work on a BdrvChild object, so we - * need to keep it as an empty shell (after this function, it will - * not be attached to any parent, and it will not have a .bs). - */ - bdrv_replace_child_noperm(s->child, NULL, false); + bdrv_replace_child_noperm(s->child, NULL); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { - bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); + bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort); } - if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { - GSList *ignore; + if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) { + Transaction *tran; + GHashTable *visited; + bool ret; - /* No need to ignore `child`, because it has been detached already */ - ignore = NULL; - child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, - &error_abort); - g_slist_free(ignore); + tran = tran_new(); - ignore = NULL; - child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); - g_slist_free(ignore); + /* No need to visit `child`, because it has been detached already */ + visited = g_hash_table_new(NULL, NULL); + ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx, + visited, tran, &error_abort); + g_hash_table_destroy(visited); + + /* transaction is supposed to always succeed */ + assert(ret == true); + tran_commit(tran); } bdrv_unref(bs); - bdrv_child_free(child); + bdrv_child_free(s->child); } static TransactionActionDrv bdrv_attach_child_common_drv = { @@ -2985,28 +2951,22 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { /* * Common part of attaching bdrv child to bs or to blk or to job * - * Resulting new child is returned through @child. - * At start *@child must be NULL. - * @child is saved to a new entry of @tran, so that *@child could be reverted to - * NULL on abort(). So referenced variable must live at least until transaction - * end. - * * Function doesn't update permissions, caller is responsible for this. + * + * Returns new created child. */ -static int bdrv_attach_child_common(BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - uint64_t perm, uint64_t shared_perm, - void *opaque, BdrvChild **child, - Transaction *tran, Error **errp) +static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + uint64_t perm, uint64_t shared_perm, + void *opaque, + Transaction *tran, Error **errp) { BdrvChild *new_child; AioContext *parent_ctx; AioContext *child_ctx = bdrv_get_aio_context(child_bs); - assert(child); - assert(*child == NULL); assert(child_class->get_parent_desc); GLOBAL_STATE_CODE(); @@ -3029,63 +2989,57 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, parent_ctx = bdrv_child_get_parent_aio_context(new_child); if (child_ctx != parent_ctx) { Error *local_err = NULL; - int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err); - - if (ret < 0 && child_class->can_set_aio_ctx) { - GSList *ignore = g_slist_prepend(NULL, new_child); - if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore, - NULL)) - { + int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL, + &local_err); + + if (ret < 0 && child_class->change_aio_ctx) { + Transaction *tran = tran_new(); + GHashTable *visited = g_hash_table_new(NULL, NULL); + bool ret_child; + + g_hash_table_add(visited, new_child); + ret_child = child_class->change_aio_ctx(new_child, child_ctx, + visited, tran, NULL); + if (ret_child == true) { error_free(local_err); ret = 0; - g_slist_free(ignore); - ignore = g_slist_prepend(NULL, new_child); - child_class->set_aio_ctx(new_child, child_ctx, &ignore); } - g_slist_free(ignore); + tran_finalize(tran, ret_child == true ? 0 : -1); + g_hash_table_destroy(visited); } if (ret < 0) { error_propagate(errp, local_err); bdrv_child_free(new_child); - return ret; + return NULL; } } bdrv_ref(child_bs); - bdrv_replace_child_noperm(&new_child, child_bs, true); - /* child_bs was non-NULL, so new_child must not have been freed */ - assert(new_child != NULL); - - *child = new_child; + bdrv_replace_child_noperm(new_child, child_bs); BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); *s = (BdrvAttachChildCommonState) { - .child = child, + .child = new_child, .old_parent_ctx = parent_ctx, .old_child_ctx = child_ctx, }; tran_add(tran, &bdrv_attach_child_common_drv, s); - return 0; + return new_child; } /* - * Variable referenced by @child must live at least until transaction end. - * (see bdrv_attach_child_common() doc for details) - * * Function doesn't update permissions, caller is responsible for this. */ -static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - BdrvChild **child, - Transaction *tran, - Error **errp) +static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Transaction *tran, + Error **errp) { - int ret; uint64_t perm, shared_perm; assert(parent_bs->drv); @@ -3094,29 +3048,25 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, if (bdrv_recurse_has_child(child_bs, parent_bs)) { error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", child_bs->node_name, child_name, parent_bs->node_name); - return -EINVAL; + return NULL; } bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); - ret = bdrv_attach_child_common(child_bs, child_name, child_class, - child_role, perm, shared_perm, parent_bs, - child, tran, errp); - if (ret < 0) { - return ret; - } - - return 0; + return bdrv_attach_child_common(child_bs, child_name, child_class, + child_role, perm, shared_perm, parent_bs, + tran, errp); } -static void bdrv_detach_child(BdrvChild **childp) +static void bdrv_detach_child(BdrvChild *child) { - BlockDriverState *old_bs = (*childp)->bs; + BlockDriverState *old_bs = child->bs; GLOBAL_STATE_CODE(); - bdrv_replace_child_noperm(childp, NULL, true); + bdrv_replace_child_noperm(child, NULL); + bdrv_child_free(child); if (old_bs) { /* @@ -3130,7 +3080,7 @@ static void bdrv_detach_child(BdrvChild **childp) * When the parent requiring a non-default AioContext is removed, the * node moves back to the main AioContext */ - bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); + bdrv_try_change_aio_context(old_bs, qemu_get_aio_context(), NULL, NULL); } } @@ -3152,15 +3102,16 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, void *opaque, Error **errp) { int ret; - BdrvChild *child = NULL; + BdrvChild *child; Transaction *tran = tran_new(); GLOBAL_STATE_CODE(); - ret = bdrv_attach_child_common(child_bs, child_name, child_class, + child = bdrv_attach_child_common(child_bs, child_name, child_class, child_role, perm, shared_perm, opaque, - &child, tran, errp); - if (ret < 0) { + tran, errp); + if (!child) { + ret = -EINVAL; goto out; } @@ -3168,11 +3119,10 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, out: tran_finalize(tran, ret); - /* child is unset on failure by bdrv_attach_child_common_abort() */ - assert((ret < 0) == !child); bdrv_unref(child_bs); - return child; + + return ret < 0 ? NULL : child; } /* @@ -3194,14 +3144,15 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, Error **errp) { int ret; - BdrvChild *child = NULL; + BdrvChild *child; Transaction *tran = tran_new(); GLOBAL_STATE_CODE(); - ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class, - child_role, &child, tran, errp); - if (ret < 0) { + child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, + child_class, child_role, tran, errp); + if (!child) { + ret = -EINVAL; goto out; } @@ -3212,12 +3163,10 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, out: tran_finalize(tran, ret); - /* child is unset on failure by bdrv_attach_child_common_abort() */ - assert((ret < 0) == !child); bdrv_unref(child_bs); - return child; + return ret < 0 ? NULL : child; } /* Callers must ensure that child->frozen is false. */ @@ -3228,7 +3177,7 @@ void bdrv_root_unref_child(BdrvChild *child) GLOBAL_STATE_CODE(); child_bs = child->bs; - bdrv_detach_child(&child); + bdrv_detach_child(child); bdrv_unref(child_bs); } @@ -3359,7 +3308,6 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, bool is_backing, Transaction *tran, Error **errp) { - int ret = 0; bool update_inherits_from = bdrv_inherits_from_recursive(child_bs, parent_bs); BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; @@ -3410,21 +3358,19 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, if (child) { bdrv_unset_inherits_from(parent_bs, child, tran); - bdrv_remove_file_or_backing_child(parent_bs, child, tran); + bdrv_remove_child(child, tran); } if (!child_bs) { goto out; } - ret = bdrv_attach_child_noperm(parent_bs, child_bs, - is_backing ? "backing" : "file", - &child_of_bds, role, - is_backing ? &parent_bs->backing : - &parent_bs->file, - tran, errp); - if (ret < 0) { - return ret; + child = bdrv_attach_child_noperm(parent_bs, child_bs, + is_backing ? "backing" : "file", + &child_of_bds, role, + tran, errp); + if (!child) { + return -EINVAL; } @@ -3670,6 +3616,29 @@ BdrvChild *bdrv_open_child(const char *filename, } /* + * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. + */ +int bdrv_open_file_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState *parent, Error **errp) +{ + BdrvChildRole role; + + /* commit_top and mirror_top don't use this function */ + assert(!parent->drv->filtered_child_is_backing); + role = parent->drv->is_filter ? + (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; + + if (!bdrv_open_child(filename, options, bdref_key, parent, + &child_of_bds, role, false, errp)) + { + return -EINVAL; + } + + return 0; +} + +/* * TODO Future callers may need to specify parent/child_class in order for * option inheritance to work. Existing callers use it for the root node. */ @@ -3718,8 +3687,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, QDict *snapshot_options, Error **errp) { - /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ - char *tmp_filename = g_malloc0(PATH_MAX + 1); + g_autofree char *tmp_filename = NULL; int64_t total_size; QemuOpts *opts = NULL; BlockDriverState *bs_snapshot = NULL; @@ -3738,9 +3706,8 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, } /* Create the temporary image */ - ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not get temporary filename"); + tmp_filename = create_tmp_file(errp); + if (!tmp_filename) { goto out; } @@ -3774,7 +3741,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, out: qobject_unref(snapshot_options); - g_free(tmp_filename); return bs_snapshot; } @@ -4941,8 +4907,8 @@ static void bdrv_close(BlockDriverState *bs) bdrv_unref_child(bs, child); } - bs->backing = NULL; - bs->file = NULL; + assert(!bs->backing); + assert(!bs->file); g_free(bs->opaque); bs->opaque = NULL; qatomic_set(&bs->copy_on_read, 0); @@ -5073,96 +5039,28 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) return ret; } -typedef struct BdrvRemoveFilterOrCowChild { - BdrvChild *child; - BlockDriverState *bs; - bool is_backing; -} BdrvRemoveFilterOrCowChild; - -static void bdrv_remove_filter_or_cow_child_abort(void *opaque) -{ - BdrvRemoveFilterOrCowChild *s = opaque; - BlockDriverState *parent_bs = s->child->opaque; - - if (s->is_backing) { - parent_bs->backing = s->child; - } else { - parent_bs->file = s->child; - } - - /* - * We don't have to restore child->bs here to undo bdrv_replace_child_tran() - * because that function is transactionable and it registered own completion - * entries in @tran, so .abort() for bdrv_replace_child_safe() will be - * called automatically. - */ -} - -static void bdrv_remove_filter_or_cow_child_commit(void *opaque) +static void bdrv_remove_child_commit(void *opaque) { - BdrvRemoveFilterOrCowChild *s = opaque; GLOBAL_STATE_CODE(); - bdrv_child_free(s->child); + bdrv_child_free(opaque); } -static void bdrv_remove_filter_or_cow_child_clean(void *opaque) -{ - BdrvRemoveFilterOrCowChild *s = opaque; - - /* Drop the bs reference after the transaction is done */ - bdrv_unref(s->bs); - g_free(s); -} - -static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { - .abort = bdrv_remove_filter_or_cow_child_abort, - .commit = bdrv_remove_filter_or_cow_child_commit, - .clean = bdrv_remove_filter_or_cow_child_clean, +static TransactionActionDrv bdrv_remove_child_drv = { + .commit = bdrv_remove_child_commit, }; -/* - * A function to remove backing or file child of @bs. - * Function doesn't update permissions, caller is responsible for this. - */ -static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, - BdrvChild *child, - Transaction *tran) +/* Function doesn't update permissions, caller is responsible for this. */ +static void bdrv_remove_child(BdrvChild *child, Transaction *tran) { - BdrvChild **childp; - BdrvRemoveFilterOrCowChild *s; - if (!child) { return; } - /* - * Keep a reference to @bs so @childp will stay valid throughout the - * transaction (required by bdrv_replace_child_tran()) - */ - bdrv_ref(bs); - if (child == bs->backing) { - childp = &bs->backing; - } else if (child == bs->file) { - childp = &bs->file; - } else { - g_assert_not_reached(); - } - if (child->bs) { - /* - * Pass free_empty_child=false, we will free the child in - * bdrv_remove_filter_or_cow_child_commit() - */ - bdrv_replace_child_tran(childp, NULL, tran, false); + bdrv_replace_child_tran(child, NULL, tran); } - s = g_new(BdrvRemoveFilterOrCowChild, 1); - *s = (BdrvRemoveFilterOrCowChild) { - .child = child, - .bs = bs, - .is_backing = (childp == &bs->backing), - }; - tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); + tran_add(tran, &bdrv_remove_child_drv, child); } /* @@ -5173,7 +5071,7 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, Transaction *tran) { - bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran); + bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); } static int bdrv_replace_node_noperm(BlockDriverState *from, @@ -5183,7 +5081,6 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, { BdrvChild *c, *next; - assert(to != NULL); GLOBAL_STATE_CODE(); QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { @@ -5201,12 +5098,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, c->name, from->node_name); return -EPERM; } - - /* - * Passing a pointer to the local variable @c is fine here, because - * @to is not NULL, and so &c will not be attached to the transaction. - */ - bdrv_replace_child_tran(&c, to, tran, true); + bdrv_replace_child_tran(c, to, tran); } return 0; @@ -5221,8 +5113,6 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, * * With @detach_subchain=true @to must be in a backing chain of @from. In this * case backing link of the cow-parent of @to is removed. - * - * @to must not be NULL. */ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to, @@ -5236,7 +5126,6 @@ static int bdrv_replace_node_common(BlockDriverState *from, int ret; GLOBAL_STATE_CODE(); - assert(to != NULL); if (detach_subchain) { assert(bdrv_chain_contains(from, to)); @@ -5293,9 +5182,6 @@ out: return ret; } -/** - * Replace node @from by @to (where neither may be NULL). - */ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { @@ -5328,16 +5214,18 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, Error **errp) { int ret; + BdrvChild *child; Transaction *tran = tran_new(); GLOBAL_STATE_CODE(); assert(!bs_new->backing); - ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing", - &child_of_bds, bdrv_backing_role(bs_new), - &bs_new->backing, tran, errp); - if (ret < 0) { + child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", + &child_of_bds, bdrv_backing_role(bs_new), + tran, errp); + if (!child) { + ret = -EINVAL; goto out; } @@ -5371,9 +5259,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); - bdrv_replace_child_tran(&child, new_bs, tran, true); - /* @new_bs must have been non-NULL, so @child must not have been freed */ - assert(child != NULL); + bdrv_replace_child_tran(child, new_bs, tran); found = g_hash_table_new(NULL, NULL); refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); @@ -7285,6 +7171,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); } + assert_bdrv_graph_writable(bs); bs->aio_context = NULL; } @@ -7298,6 +7185,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, aio_disable_external(new_context); } + assert_bdrv_graph_writable(bs); bs->aio_context = new_context; if (bs->drv && bs->drv->bdrv_attach_aio_context) { @@ -7316,191 +7204,222 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, bs->walking_aio_notifiers = false; } -/* - * Changes the AioContext used for fd handlers, timers, and BHs by this - * BlockDriverState and all its children and parents. - * - * Must be called from the main AioContext. - * - * The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is the - * same as the current context of bs). - * - * @ignore will accumulate all visited BdrvChild object. The caller is - * responsible for freeing the list afterwards. - */ -void bdrv_set_aio_context_ignore(BlockDriverState *bs, - AioContext *new_context, GSList **ignore) -{ - AioContext *old_context = bdrv_get_aio_context(bs); - GSList *children_to_process = NULL; - GSList *parents_to_process = NULL; - GSList *entry; - BdrvChild *child, *parent; - - g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - GLOBAL_STATE_CODE(); - - if (old_context == new_context) { - return; - } - - bdrv_drained_begin(bs); - - QLIST_FOREACH(child, &bs->children, next) { - if (g_slist_find(*ignore, child)) { - continue; - } - *ignore = g_slist_prepend(*ignore, child); - children_to_process = g_slist_prepend(children_to_process, child); - } - - QLIST_FOREACH(parent, &bs->parents, next_parent) { - if (g_slist_find(*ignore, parent)) { - continue; - } - *ignore = g_slist_prepend(*ignore, parent); - parents_to_process = g_slist_prepend(parents_to_process, parent); - } - - for (entry = children_to_process; - entry != NULL; - entry = g_slist_next(entry)) { - child = entry->data; - bdrv_set_aio_context_ignore(child->bs, new_context, ignore); - } - g_slist_free(children_to_process); - - for (entry = parents_to_process; - entry != NULL; - entry = g_slist_next(entry)) { - parent = entry->data; - assert(parent->klass->set_aio_ctx); - parent->klass->set_aio_ctx(parent, new_context, ignore); - } - g_slist_free(parents_to_process); - - bdrv_detach_aio_context(bs); - - /* Acquire the new context, if necessary */ - if (qemu_get_aio_context() != new_context) { - aio_context_acquire(new_context); - } - - bdrv_attach_aio_context(bs, new_context); - - /* - * If this function was recursively called from - * bdrv_set_aio_context_ignore(), there may be nodes in the - * subtree that have not yet been moved to the new AioContext. - * Release the old one so bdrv_drained_end() can poll them. - */ - if (qemu_get_aio_context() != old_context) { - aio_context_release(old_context); - } - - bdrv_drained_end(bs); - - if (qemu_get_aio_context() != old_context) { - aio_context_acquire(old_context); - } - if (qemu_get_aio_context() != new_context) { - aio_context_release(new_context); - } -} +typedef struct BdrvStateSetAioContext { + AioContext *new_ctx; + BlockDriverState *bs; +} BdrvStateSetAioContext; -static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, - GSList **ignore, Error **errp) +static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, + Transaction *tran, + Error **errp) { GLOBAL_STATE_CODE(); - if (g_slist_find(*ignore, c)) { + if (g_hash_table_contains(visited, c)) { return true; } - *ignore = g_slist_prepend(*ignore, c); + g_hash_table_add(visited, c); /* * A BdrvChildClass that doesn't handle AioContext changes cannot * tolerate any AioContext changes */ - if (!c->klass->can_set_aio_ctx) { + if (!c->klass->change_aio_ctx) { char *user = bdrv_child_user_desc(c); error_setg(errp, "Changing iothreads is not supported by %s", user); g_free(user); return false; } - if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) { + if (!c->klass->change_aio_ctx(c, ctx, visited, tran, errp)) { assert(!errp || *errp); return false; } return true; } -bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, - GSList **ignore, Error **errp) +bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { GLOBAL_STATE_CODE(); - if (g_slist_find(*ignore, c)) { + if (g_hash_table_contains(visited, c)) { return true; } - *ignore = g_slist_prepend(*ignore, c); - return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); + g_hash_table_add(visited, c); + return bdrv_change_aio_context(c->bs, ctx, visited, tran, errp); +} + +static void bdrv_set_aio_context_clean(void *opaque) +{ + BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; + BlockDriverState *bs = (BlockDriverState *) state->bs; + + /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */ + bdrv_drained_end(bs); + + g_free(state); +} + +static void bdrv_set_aio_context_commit(void *opaque) +{ + BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; + BlockDriverState *bs = (BlockDriverState *) state->bs; + AioContext *new_context = state->new_ctx; + AioContext *old_context = bdrv_get_aio_context(bs); + assert_bdrv_graph_writable(bs); + + /* + * Take the old AioContex when detaching it from bs. + * At this point, new_context lock is already acquired, and we are now + * also taking old_context. This is safe as long as bdrv_detach_aio_context + * does not call AIO_POLL_WHILE(). + */ + if (old_context != qemu_get_aio_context()) { + aio_context_acquire(old_context); + } + bdrv_detach_aio_context(bs); + if (old_context != qemu_get_aio_context()) { + aio_context_release(old_context); + } + bdrv_attach_aio_context(bs, new_context); } -/* @ignore will accumulate all visited BdrvChild object. The caller is - * responsible for freeing the list afterwards. */ -bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, - GSList **ignore, Error **errp) +static TransactionActionDrv set_aio_context = { + .commit = bdrv_set_aio_context_commit, + .clean = bdrv_set_aio_context_clean, +}; + +/* + * Changes the AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children and parents. + * + * Must be called from the main AioContext. + * + * The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is the + * same as the current context of bs). + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + */ +static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { BdrvChild *c; + BdrvStateSetAioContext *state; + + GLOBAL_STATE_CODE(); if (bdrv_get_aio_context(bs) == ctx) { return true; } - GLOBAL_STATE_CODE(); - QLIST_FOREACH(c, &bs->parents, next_parent) { - if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { + if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) { return false; } } + QLIST_FOREACH(c, &bs->children, next) { - if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { + if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) { return false; } } + state = g_new(BdrvStateSetAioContext, 1); + *state = (BdrvStateSetAioContext) { + .new_ctx = ctx, + .bs = bs, + }; + + /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */ + bdrv_drained_begin(bs); + + tran_add(tran, &set_aio_context, state); + return true; } -int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp) +/* + * Change bs's and recursively all of its parents' and children's AioContext + * to the given new context, returning an error if that isn't possible. + * + * If ignore_child is not NULL, that child (and its subgraph) will not + * be touched. + * + * This function still requires the caller to take the bs current + * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE + * assumes the lock is always held if bs is in another AioContext. + * For the same reason, it temporarily also holds the new AioContext, since + * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too. + * Therefore the new AioContext lock must not be taken by the caller. + */ +int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) { - GSList *ignore; - bool ret; - + Transaction *tran; + GHashTable *visited; + int ret; + AioContext *old_context = bdrv_get_aio_context(bs); GLOBAL_STATE_CODE(); - ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; - ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); - g_slist_free(ignore); + /* + * Recursion phase: go through all nodes of the graph. + * Take care of checking that all nodes support changing AioContext + * and drain them, builing a linear list of callbacks to run if everything + * is successful (the transaction itself). + */ + tran = tran_new(); + visited = g_hash_table_new(NULL, NULL); + if (ignore_child) { + g_hash_table_add(visited, ignore_child); + } + ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp); + g_hash_table_destroy(visited); + + /* + * Linear phase: go through all callbacks collected in the transaction. + * Run all callbacks collected in the recursion to switch all nodes + * AioContext lock (transaction commit), or undo all changes done in the + * recursion (transaction abort). + */ if (!ret) { + /* Just run clean() callbacks. No AioContext changed. */ + tran_abort(tran); return -EPERM; } - ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; - bdrv_set_aio_context_ignore(bs, ctx, &ignore); - g_slist_free(ignore); + /* + * Release old AioContext, it won't be needed anymore, as all + * bdrv_drained_begin() have been called already. + */ + if (qemu_get_aio_context() != old_context) { + aio_context_release(old_context); + } - return 0; -} + /* + * Acquire new AioContext since bdrv_drained_end() is going to be called + * after we switched all nodes in the new AioContext, and the function + * assumes that the lock of the bs is always taken. + */ + if (qemu_get_aio_context() != ctx) { + aio_context_acquire(ctx); + } -int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - Error **errp) -{ - GLOBAL_STATE_CODE(); - return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); + tran_commit(tran); + + if (qemu_get_aio_context() != ctx) { + aio_context_release(ctx); + } + + /* Re-acquire the old AioContext, since the caller takes and releases it. */ + if (qemu_get_aio_context() != old_context) { + aio_context_acquire(old_context); + } + + return 0; } void bdrv_add_aio_context_notifier(BlockDriverState *bs, diff --git a/block/backup.c b/block/backup.c index b2b649e305..6a9ad97a53 100644 --- a/block/backup.c +++ b/block/backup.c @@ -309,7 +309,7 @@ static void coroutine_fn backup_pause(Job *job) } } -static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed) +static void backup_set_speed(BlockJob *job, int64_t speed) { BackupBlockJob *s = container_of(job, BackupBlockJob, common); diff --git a/block/blkdebug.c b/block/blkdebug.c index bbf2948703..4265ca125e 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -503,12 +503,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the image file */ - bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image", - bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - ret = -EINVAL; + ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image", + bs, errp); + if (ret < 0) { goto out; } @@ -672,7 +669,7 @@ blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); } -static int blkdebug_co_flush(BlockDriverState *bs) +static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs) { int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH); diff --git a/block/blkio.c b/block/blkio.c new file mode 100644 index 0000000000..82f26eedd2 --- /dev/null +++ b/block/blkio.c @@ -0,0 +1,1008 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * libblkio BlockDriver + * + * Copyright Red Hat, Inc. + * + * Author: + * Stefan Hajnoczi <stefanha@redhat.com> + */ + +#include "qemu/osdep.h" +#include <blkio.h> +#include "block/block_int.h" +#include "exec/memory.h" +#include "exec/cpu-common.h" /* for qemu_ram_get_fd() */ +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qapi/qmp/qdict.h" +#include "qemu/module.h" +#include "exec/memory.h" /* for ram_block_discard_disable() */ + +/* + * Keep the QEMU BlockDriver names identical to the libblkio driver names. + * Using macros instead of typing out the string literals avoids typos. + */ +#define DRIVER_IO_URING "io_uring" +#define DRIVER_NVME_IO_URING "nvme-io_uring" +#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" +#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" + +/* + * Allocated bounce buffers are kept in a list sorted by buffer address. + */ +typedef struct BlkioBounceBuf { + QLIST_ENTRY(BlkioBounceBuf) next; + + /* The bounce buffer */ + struct iovec buf; +} BlkioBounceBuf; + +typedef struct { + /* + * libblkio is not thread-safe so this lock protects ->blkio and + * ->blkioq. + */ + QemuMutex blkio_lock; + struct blkio *blkio; + struct blkioq *blkioq; /* make this multi-queue in the future... */ + int completion_fd; + + /* + * Polling fetches the next completion into this field. + * + * No lock is necessary since only one thread calls aio_poll() and invokes + * fd and poll handlers. + */ + struct blkio_completion poll_completion; + + /* + * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available. + * + * Lock ordering: ->bounce_lock before ->blkio_lock. + */ + CoMutex bounce_lock; + + /* Bounce buffer pool */ + struct blkio_mem_region bounce_pool; + + /* Sorted list of allocated bounce buffers */ + QLIST_HEAD(, BlkioBounceBuf) bounce_bufs; + + /* Queue for coroutines waiting for bounce buffer space */ + CoQueue bounce_available; + + /* The value of the "mem-region-alignment" property */ + size_t mem_region_alignment; + + /* Can we skip adding/deleting blkio_mem_regions? */ + bool needs_mem_regions; + + /* Are file descriptors necessary for blkio_mem_regions? */ + bool needs_mem_region_fd; + + /* Are madvise(MADV_DONTNEED)-style operations unavailable? */ + bool may_pin_mem_regions; +} BDRVBlkioState; + +/* Called with s->bounce_lock held */ +static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes) +{ + /* There can be no allocated bounce buffers during resize */ + assert(QLIST_EMPTY(&s->bounce_bufs)); + + /* Pad size to reduce frequency of resize calls */ + bytes += 128 * 1024; + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + int ret; + + if (s->bounce_pool.addr) { + blkio_unmap_mem_region(s->blkio, &s->bounce_pool); + blkio_free_mem_region(s->blkio, &s->bounce_pool); + memset(&s->bounce_pool, 0, sizeof(s->bounce_pool)); + } + + /* Automatically freed when s->blkio is destroyed */ + ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes); + if (ret < 0) { + return ret; + } + + ret = blkio_map_mem_region(s->blkio, &s->bounce_pool); + if (ret < 0) { + blkio_free_mem_region(s->blkio, &s->bounce_pool); + memset(&s->bounce_pool, 0, sizeof(s->bounce_pool)); + return ret; + } + } + + return 0; +} + +/* Called with s->bounce_lock held */ +static bool +blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce, + int64_t bytes) +{ + void *addr = s->bounce_pool.addr; + BlkioBounceBuf *cur = NULL; + BlkioBounceBuf *prev = NULL; + ptrdiff_t space; + + /* + * This is just a linear search over the holes between requests. An + * efficient allocator would be nice. + */ + QLIST_FOREACH(cur, &s->bounce_bufs, next) { + space = cur->buf.iov_base - addr; + if (bytes <= space) { + QLIST_INSERT_BEFORE(cur, bounce, next); + bounce->buf.iov_base = addr; + bounce->buf.iov_len = bytes; + return true; + } + + addr = cur->buf.iov_base + cur->buf.iov_len; + prev = cur; + } + + /* Is there space after the last request? */ + space = s->bounce_pool.addr + s->bounce_pool.len - addr; + if (bytes > space) { + return false; + } + if (prev) { + QLIST_INSERT_AFTER(prev, bounce, next); + } else { + QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next); + } + bounce->buf.iov_base = addr; + bounce->buf.iov_len = bytes; + return true; +} + +static int coroutine_fn +blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce, + int64_t bytes) +{ + /* + * Ensure fairness: first time around we join the back of the queue, + * subsequently we join the front so we don't lose our place. + */ + CoQueueWaitFlags wait_flags = 0; + + QEMU_LOCK_GUARD(&s->bounce_lock); + + /* Ensure fairness: don't even try if other requests are already waiting */ + if (!qemu_co_queue_empty(&s->bounce_available)) { + qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock, + wait_flags); + wait_flags = CO_QUEUE_WAIT_FRONT; + } + + while (true) { + if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) { + /* Kick the next queued request since there may be space */ + qemu_co_queue_next(&s->bounce_available); + return 0; + } + + /* + * If there are no in-flight requests then the pool was simply too + * small. + */ + if (QLIST_EMPTY(&s->bounce_bufs)) { + bool ok; + int ret; + + ret = blkio_resize_bounce_pool(s, bytes); + if (ret < 0) { + /* Kick the next queued request since that may fail too */ + qemu_co_queue_next(&s->bounce_available); + return ret; + } + + ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes); + assert(ok); /* must have space this time */ + return 0; + } + + qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock, + wait_flags); + wait_flags = CO_QUEUE_WAIT_FRONT; + } +} + +static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s, + BlkioBounceBuf *bounce) +{ + QEMU_LOCK_GUARD(&s->bounce_lock); + + QLIST_REMOVE(bounce, next); + + /* Wake up waiting coroutines since space may now be available */ + qemu_co_queue_next(&s->bounce_available); +} + +/* For async to .bdrv_co_*() conversion */ +typedef struct { + Coroutine *coroutine; + int ret; +} BlkioCoData; + +static void blkio_completion_fd_read(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVBlkioState *s = bs->opaque; + uint64_t val; + int ret; + + /* Polling may have already fetched a completion */ + if (s->poll_completion.user_data != NULL) { + BlkioCoData *cod = s->poll_completion.user_data; + cod->ret = s->poll_completion.ret; + + /* Clear it in case aio_co_wake() enters a nested event loop */ + s->poll_completion.user_data = NULL; + + aio_co_wake(cod->coroutine); + } + + /* Reset completion fd status */ + ret = read(s->completion_fd, &val, sizeof(val)); + + /* Ignore errors, there's nothing we can do */ + (void)ret; + + /* + * Reading one completion at a time makes nested event loop re-entrancy + * simple. Change this loop to get multiple completions in one go if it + * becomes a performance bottleneck. + */ + while (true) { + struct blkio_completion completion; + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL); + } + if (ret != 1) { + break; + } + + BlkioCoData *cod = completion.user_data; + cod->ret = completion.ret; + aio_co_wake(cod->coroutine); + } +} + +static bool blkio_completion_fd_poll(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVBlkioState *s = bs->opaque; + int ret; + + /* Just in case we already fetched a completion */ + if (s->poll_completion.user_data != NULL) { + return true; + } + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL); + } + return ret == 1; +} + +static void blkio_completion_fd_poll_ready(void *opaque) +{ + blkio_completion_fd_read(opaque); +} + +static void blkio_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVBlkioState *s = bs->opaque; + + aio_set_fd_handler(new_context, + s->completion_fd, + false, + blkio_completion_fd_read, + NULL, + blkio_completion_fd_poll, + blkio_completion_fd_poll_ready, + bs); +} + +static void blkio_detach_aio_context(BlockDriverState *bs) +{ + BDRVBlkioState *s = bs->opaque; + + aio_set_fd_handler(bdrv_get_aio_context(bs), + s->completion_fd, + false, NULL, NULL, NULL, NULL, NULL); +} + +/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */ +static void blkio_submit_io(BlockDriverState *bs) +{ + if (qatomic_read(&bs->io_plugged) == 0) { + BDRVBlkioState *s = bs->opaque; + + blkioq_do_io(s->blkioq, NULL, 0, 0, NULL); + } +} + +static int coroutine_fn +blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + BDRVBlkioState *s = bs->opaque; + BlkioCoData cod = { + .coroutine = qemu_coroutine_self(), + }; + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + blkioq_discard(s->blkioq, offset, bytes, &cod, 0); + blkio_submit_io(bs); + } + + qemu_coroutine_yield(); + return cod.ret; +} + +static int coroutine_fn +blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) +{ + BlkioCoData cod = { + .coroutine = qemu_coroutine_self(), + }; + BDRVBlkioState *s = bs->opaque; + bool use_bounce_buffer = + s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF); + BlkioBounceBuf bounce; + struct iovec *iov = qiov->iov; + int iovcnt = qiov->niov; + + if (use_bounce_buffer) { + int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes); + if (ret < 0) { + return ret; + } + + iov = &bounce.buf; + iovcnt = 1; + } + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0); + blkio_submit_io(bs); + } + + qemu_coroutine_yield(); + + if (use_bounce_buffer) { + if (cod.ret == 0) { + qemu_iovec_from_buf(qiov, 0, + bounce.buf.iov_base, + bounce.buf.iov_len); + } + + blkio_free_bounce_buffer(s, &bounce); + } + + return cod.ret; +} + +static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) +{ + uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0; + BlkioCoData cod = { + .coroutine = qemu_coroutine_self(), + }; + BDRVBlkioState *s = bs->opaque; + bool use_bounce_buffer = + s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF); + BlkioBounceBuf bounce; + struct iovec *iov = qiov->iov; + int iovcnt = qiov->niov; + + if (use_bounce_buffer) { + int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes); + if (ret < 0) { + return ret; + } + + qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes); + iov = &bounce.buf; + iovcnt = 1; + } + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags); + blkio_submit_io(bs); + } + + qemu_coroutine_yield(); + + if (use_bounce_buffer) { + blkio_free_bounce_buffer(s, &bounce); + } + + return cod.ret; +} + +static int coroutine_fn blkio_co_flush(BlockDriverState *bs) +{ + BDRVBlkioState *s = bs->opaque; + BlkioCoData cod = { + .coroutine = qemu_coroutine_self(), + }; + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + blkioq_flush(s->blkioq, &cod, 0); + blkio_submit_io(bs); + } + + qemu_coroutine_yield(); + return cod.ret; +} + +static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, BdrvRequestFlags flags) +{ + BDRVBlkioState *s = bs->opaque; + BlkioCoData cod = { + .coroutine = qemu_coroutine_self(), + }; + uint32_t blkio_flags = 0; + + if (flags & BDRV_REQ_FUA) { + blkio_flags |= BLKIO_REQ_FUA; + } + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + blkio_flags |= BLKIO_REQ_NO_UNMAP; + } + if (flags & BDRV_REQ_NO_FALLBACK) { + blkio_flags |= BLKIO_REQ_NO_FALLBACK; + } + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags); + blkio_submit_io(bs); + } + + qemu_coroutine_yield(); + return cod.ret; +} + +static void blkio_io_unplug(BlockDriverState *bs) +{ + BDRVBlkioState *s = bs->opaque; + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + blkio_submit_io(bs); + } +} + +typedef enum { + BMRR_OK, + BMRR_SKIP, + BMRR_FAIL, +} BlkioMemRegionResult; + +/* + * Produce a struct blkio_mem_region for a given address and size. + * + * This function produces identical results when called multiple times with the + * same arguments. This property is necessary because blkio_unmap_mem_region() + * must receive the same struct blkio_mem_region field values that were passed + * to blkio_map_mem_region(). + */ +static BlkioMemRegionResult +blkio_mem_region_from_host(BlockDriverState *bs, + void *host, size_t size, + struct blkio_mem_region *region, + Error **errp) +{ + BDRVBlkioState *s = bs->opaque; + int fd = -1; + ram_addr_t fd_offset = 0; + + if (((uintptr_t)host | size) % s->mem_region_alignment) { + error_setg(errp, "unaligned buf %p with size %zu", host, size); + return BMRR_FAIL; + } + + /* Attempt to find the fd for the underlying memory */ + if (s->needs_mem_region_fd) { + RAMBlock *ram_block; + RAMBlock *end_block; + ram_addr_t offset; + + /* + * bdrv_register_buf() is called with the BQL held so mr lives at least + * until this function returns. + */ + ram_block = qemu_ram_block_from_host(host, false, &fd_offset); + if (ram_block) { + fd = qemu_ram_get_fd(ram_block); + } + if (fd == -1) { + /* + * Ideally every RAMBlock would have an fd. pc-bios and other + * things don't. Luckily they are usually not I/O buffers and we + * can just ignore them. + */ + return BMRR_SKIP; + } + + /* Make sure the fd covers the entire range */ + end_block = qemu_ram_block_from_host(host + size - 1, false, &offset); + if (ram_block != end_block) { + error_setg(errp, "registered buffer at %p with size %zu extends " + "beyond RAMBlock", host, size); + return BMRR_FAIL; + } + } + + *region = (struct blkio_mem_region){ + .addr = host, + .len = size, + .fd = fd, + .fd_offset = fd_offset, + }; + return BMRR_OK; +} + +static bool blkio_register_buf(BlockDriverState *bs, void *host, size_t size, + Error **errp) +{ + BDRVBlkioState *s = bs->opaque; + struct blkio_mem_region region; + BlkioMemRegionResult region_result; + int ret; + + /* + * Mapping memory regions conflicts with RAM discard (virtio-mem) when + * there is pinning, so only do it when necessary. + */ + if (!s->needs_mem_regions && s->may_pin_mem_regions) { + return true; + } + + region_result = blkio_mem_region_from_host(bs, host, size, ®ion, errp); + if (region_result == BMRR_SKIP) { + return true; + } else if (region_result != BMRR_OK) { + return false; + } + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + ret = blkio_map_mem_region(s->blkio, ®ion); + } + + if (ret < 0) { + error_setg(errp, "Failed to add blkio mem region %p with size %zu: %s", + host, size, blkio_get_error_msg()); + return false; + } + return true; +} + +static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size) +{ + BDRVBlkioState *s = bs->opaque; + struct blkio_mem_region region; + + /* See blkio_register_buf() */ + if (!s->needs_mem_regions && s->may_pin_mem_regions) { + return; + } + + if (blkio_mem_region_from_host(bs, host, size, ®ion, NULL) != BMRR_OK) { + return; + } + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + blkio_unmap_mem_region(s->blkio, ®ion); + } +} + +static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + const char *filename = qdict_get_str(options, "filename"); + BDRVBlkioState *s = bs->opaque; + int ret; + + ret = blkio_set_str(s->blkio, "path", filename); + qdict_del(options, "filename"); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set path: %s", + blkio_get_error_msg()); + return ret; + } + + if (flags & BDRV_O_NOCACHE) { + ret = blkio_set_bool(s->blkio, "direct", true); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set direct: %s", + blkio_get_error_msg()); + return ret; + } + } + + return 0; +} + +static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + const char *filename = qdict_get_str(options, "filename"); + BDRVBlkioState *s = bs->opaque; + int ret; + + ret = blkio_set_str(s->blkio, "path", filename); + qdict_del(options, "filename"); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set path: %s", + blkio_get_error_msg()); + return ret; + } + + if (!(flags & BDRV_O_NOCACHE)) { + error_setg(errp, "cache.direct=off is not supported"); + return -EINVAL; + } + + return 0; +} + +static int blkio_virtio_blk_common_open(BlockDriverState *bs, + QDict *options, int flags, Error **errp) +{ + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; + int ret; + + if (!path) { + error_setg(errp, "missing 'path' option"); + return -EINVAL; + } + + ret = blkio_set_str(s->blkio, "path", path); + qdict_del(options, "path"); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set path: %s", + blkio_get_error_msg()); + return ret; + } + + if (!(flags & BDRV_O_NOCACHE)) { + error_setg(errp, "cache.direct=off is not supported"); + return -EINVAL; + } + return 0; +} + +static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + const char *blkio_driver = bs->drv->protocol_name; + BDRVBlkioState *s = bs->opaque; + int ret; + + ret = blkio_create(blkio_driver, &s->blkio); + if (ret < 0) { + error_setg_errno(errp, -ret, "blkio_create failed: %s", + blkio_get_error_msg()); + return ret; + } + + if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { + ret = blkio_io_uring_open(bs, options, flags, errp); + } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { + ret = blkio_nvme_io_uring(bs, options, flags, errp); + } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); + } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); + } else { + g_assert_not_reached(); + } + if (ret < 0) { + blkio_destroy(&s->blkio); + return ret; + } + + if (!(flags & BDRV_O_RDWR)) { + ret = blkio_set_bool(s->blkio, "read-only", true); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set read-only: %s", + blkio_get_error_msg()); + blkio_destroy(&s->blkio); + return ret; + } + } + + ret = blkio_connect(s->blkio); + if (ret < 0) { + error_setg_errno(errp, -ret, "blkio_connect failed: %s", + blkio_get_error_msg()); + blkio_destroy(&s->blkio); + return ret; + } + + ret = blkio_get_bool(s->blkio, + "needs-mem-regions", + &s->needs_mem_regions); + if (ret < 0) { + error_setg_errno(errp, -ret, + "failed to get needs-mem-regions: %s", + blkio_get_error_msg()); + blkio_destroy(&s->blkio); + return ret; + } + + ret = blkio_get_bool(s->blkio, + "needs-mem-region-fd", + &s->needs_mem_region_fd); + if (ret < 0) { + error_setg_errno(errp, -ret, + "failed to get needs-mem-region-fd: %s", + blkio_get_error_msg()); + blkio_destroy(&s->blkio); + return ret; + } + + ret = blkio_get_uint64(s->blkio, + "mem-region-alignment", + &s->mem_region_alignment); + if (ret < 0) { + error_setg_errno(errp, -ret, + "failed to get mem-region-alignment: %s", + blkio_get_error_msg()); + blkio_destroy(&s->blkio); + return ret; + } + + ret = blkio_get_bool(s->blkio, + "may-pin-mem-regions", + &s->may_pin_mem_regions); + if (ret < 0) { + /* Be conservative (assume pinning) if the property is not supported */ + s->may_pin_mem_regions = s->needs_mem_regions; + } + + /* + * Notify if libblkio drivers pin memory and prevent features like + * virtio-mem from working. + */ + if (s->may_pin_mem_regions) { + ret = ram_block_discard_disable(true); + if (ret < 0) { + error_setg_errno(errp, -ret, "ram_block_discard_disable() failed"); + blkio_destroy(&s->blkio); + return ret; + } + } + + ret = blkio_start(s->blkio); + if (ret < 0) { + error_setg_errno(errp, -ret, "blkio_start failed: %s", + blkio_get_error_msg()); + blkio_destroy(&s->blkio); + if (s->may_pin_mem_regions) { + ram_block_discard_disable(false); + } + return ret; + } + + bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF; + bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | + BDRV_REQ_NO_FALLBACK; + + qemu_mutex_init(&s->blkio_lock); + qemu_co_mutex_init(&s->bounce_lock); + qemu_co_queue_init(&s->bounce_available); + QLIST_INIT(&s->bounce_bufs); + s->blkioq = blkio_get_queue(s->blkio, 0); + s->completion_fd = blkioq_get_completion_fd(s->blkioq); + + blkio_attach_aio_context(bs, bdrv_get_aio_context(bs)); + return 0; +} + +static void blkio_close(BlockDriverState *bs) +{ + BDRVBlkioState *s = bs->opaque; + + /* There is no destroy() API for s->bounce_lock */ + + qemu_mutex_destroy(&s->blkio_lock); + blkio_detach_aio_context(bs); + blkio_destroy(&s->blkio); + + if (s->may_pin_mem_regions) { + ram_block_discard_disable(false); + } +} + +static int64_t blkio_getlength(BlockDriverState *bs) +{ + BDRVBlkioState *s = bs->opaque; + uint64_t capacity; + int ret; + + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + ret = blkio_get_uint64(s->blkio, "capacity", &capacity); + } + if (ret < 0) { + return -ret; + } + + return capacity; +} + +static int blkio_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + return 0; +} + +static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVBlkioState *s = bs->opaque; + QEMU_LOCK_GUARD(&s->blkio_lock); + int value; + int ret; + + ret = blkio_get_int(s->blkio, "request-alignment", &value); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s", + blkio_get_error_msg()); + return; + } + bs->bl.request_alignment = value; + if (bs->bl.request_alignment < 1 || + bs->bl.request_alignment >= INT_MAX || + !is_power_of_2(bs->bl.request_alignment)) { + error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", " + "must be a power of 2 less than INT_MAX", + bs->bl.request_alignment); + return; + } + + ret = blkio_get_int(s->blkio, "optimal-io-size", &value); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s", + blkio_get_error_msg()); + return; + } + bs->bl.opt_transfer = value; + if (bs->bl.opt_transfer > INT_MAX || + (bs->bl.opt_transfer % bs->bl.request_alignment)) { + error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must " + "be a multiple of %" PRIu32, bs->bl.opt_transfer, + bs->bl.request_alignment); + return; + } + + ret = blkio_get_int(s->blkio, "max-transfer", &value); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s", + blkio_get_error_msg()); + return; + } + bs->bl.max_transfer = value; + if ((bs->bl.max_transfer % bs->bl.request_alignment) || + (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) { + error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be " + "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)", + bs->bl.max_transfer, bs->bl.request_alignment, + bs->bl.opt_transfer); + return; + } + + ret = blkio_get_int(s->blkio, "buf-alignment", &value); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s", + blkio_get_error_msg()); + return; + } + if (value < 1) { + error_setg(errp, "invalid \"buf-alignment\" value %d, must be " + "positive", value); + return; + } + bs->bl.min_mem_alignment = value; + + ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value); + if (ret < 0) { + error_setg_errno(errp, -ret, + "failed to get \"optimal-buf-alignment\": %s", + blkio_get_error_msg()); + return; + } + if (value < 1) { + error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, " + "must be positive", value); + return; + } + bs->bl.opt_mem_alignment = value; + + ret = blkio_get_int(s->blkio, "max-segments", &value); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s", + blkio_get_error_msg()); + return; + } + if (value < 1) { + error_setg(errp, "invalid \"max-segments\" value %d, must be positive", + value); + return; + } + bs->bl.max_iov = value; +} + +/* + * TODO + * Missing libblkio APIs: + * - block_status + * - co_invalidate_cache + * + * Out of scope? + * - create + * - truncate + */ + +#define BLKIO_DRIVER(name, ...) \ + { \ + .format_name = name, \ + .protocol_name = name, \ + .instance_size = sizeof(BDRVBlkioState), \ + .bdrv_file_open = blkio_file_open, \ + .bdrv_close = blkio_close, \ + .bdrv_getlength = blkio_getlength, \ + .bdrv_get_info = blkio_get_info, \ + .bdrv_attach_aio_context = blkio_attach_aio_context, \ + .bdrv_detach_aio_context = blkio_detach_aio_context, \ + .bdrv_co_pdiscard = blkio_co_pdiscard, \ + .bdrv_co_preadv = blkio_co_preadv, \ + .bdrv_co_pwritev = blkio_co_pwritev, \ + .bdrv_co_flush_to_disk = blkio_co_flush, \ + .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ + .bdrv_io_unplug = blkio_io_unplug, \ + .bdrv_refresh_limits = blkio_refresh_limits, \ + .bdrv_register_buf = blkio_register_buf, \ + .bdrv_unregister_buf = blkio_unregister_buf, \ + __VA_ARGS__ \ + } + +static BlockDriver bdrv_io_uring = BLKIO_DRIVER( + DRIVER_IO_URING, + .bdrv_needs_filename = true, +); + +static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( + DRIVER_NVME_IO_URING, + .bdrv_needs_filename = true, +); + +static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( + DRIVER_VIRTIO_BLK_VHOST_USER +); + +static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( + DRIVER_VIRTIO_BLK_VHOST_VDPA +); + +static void bdrv_blkio_init(void) +{ + bdrv_register(&bdrv_io_uring); + bdrv_register(&bdrv_nvme_io_uring); + bdrv_register(&bdrv_virtio_blk_vhost_user); + bdrv_register(&bdrv_virtio_blk_vhost_vdpa); +} + +block_init(bdrv_blkio_init); diff --git a/block/blklogwrites.c b/block/blklogwrites.c index e3c6c4039c..cef9efe55d 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -155,11 +155,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the file */ - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false, - errp); - if (!bs->file) { - ret = -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { goto fail; } @@ -257,10 +254,6 @@ fail_log: s->log_file = NULL; } fail: - if (ret < 0) { - bdrv_unref_child(bs, bs->file); - bs->file = NULL; - } qemu_opts_del(opts); return ret; } diff --git a/block/blkreplay.c b/block/blkreplay.c index dcbe780ddb..76a0b8d12a 100644 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -26,11 +26,8 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags, int ret; /* Open the image file */ - bs->file = bdrv_open_child(NULL, options, "image", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - ret = -EINVAL; + ret = bdrv_open_file_child(NULL, options, "image", bs, errp); + if (ret < 0) { goto fail; } diff --git a/block/blkverify.c b/block/blkverify.c index 020b1ae7b6..c60a2dc624 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -122,12 +122,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the raw file */ - bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw", - bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - ret = -EINVAL; + ret = bdrv_open_file_child(qemu_opt_get(opts, "x-raw"), options, "raw", + bs, errp); + if (ret < 0) { goto fail; } @@ -235,8 +232,8 @@ blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, qemu_iovec_init(&raw_qiov, qiov->niov); qemu_iovec_clone(&raw_qiov, qiov, buf); - ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags, - false); + ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, + flags & ~BDRV_REQ_REGISTERED_BUF, false); cmp_offset = qemu_iovec_compare(qiov, &raw_qiov); if (cmp_offset != -1) { diff --git a/block/block-backend.c b/block/block-backend.c index aa4adf06ae..c0c7d56c8d 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -134,10 +134,9 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); -static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp); -static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore); +static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); static char *blk_root_get_parent_desc(BdrvChild *child) { @@ -334,8 +333,7 @@ static const BdrvChildClass child_root = { .attach = blk_root_attach, .detach = blk_root_detach, - .can_set_aio_ctx = blk_root_can_set_aio_ctx, - .set_aio_ctx = blk_root_set_aio_ctx, + .change_aio_ctx = blk_root_change_aio_ctx, .get_parent_aio_context = blk_root_get_parent_aio_context, }; @@ -1946,7 +1944,7 @@ bool blk_enable_write_cache(BlockBackend *blk) void blk_set_enable_write_cache(BlockBackend *blk, bool wce) { - GLOBAL_STATE_CODE(); + IO_CODE(); blk->enable_write_cache = wce; } @@ -2149,8 +2147,11 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, bdrv_ref(bs); if (update_root_node) { - ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, - errp); + /* + * update_root_node MUST be false for blk_root_set_aio_ctx_commit(), + * as we are already in the commit function of a transaction. + */ + ret = bdrv_try_change_aio_context(bs, new_context, blk->root, errp); if (ret < 0) { bdrv_unref(bs); return ret; @@ -2177,33 +2178,54 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, return blk_do_set_aio_context(blk, new_context, true, errp); } -static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp) +typedef struct BdrvStateBlkRootContext { + AioContext *new_ctx; + BlockBackend *blk; +} BdrvStateBlkRootContext; + +static void blk_root_set_aio_ctx_commit(void *opaque) +{ + BdrvStateBlkRootContext *s = opaque; + BlockBackend *blk = s->blk; + + blk_do_set_aio_context(blk, s->new_ctx, false, &error_abort); +} + +static TransactionActionDrv set_blk_root_context = { + .commit = blk_root_set_aio_ctx_commit, + .clean = g_free, +}; + +static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { BlockBackend *blk = child->opaque; + BdrvStateBlkRootContext *s; - if (blk->allow_aio_context_change) { - return true; + if (!blk->allow_aio_context_change) { + /* + * Manually created BlockBackends (those with a name) that are not + * attached to anything can change their AioContext without updating + * their user; return an error for others. + */ + if (!blk->name || blk->dev) { + /* TODO Add BB name/QOM path */ + error_setg(errp, "Cannot change iothread of active block backend"); + return false; + } } - /* Only manually created BlockBackends that are not attached to anything - * can change their AioContext without updating their user. */ - if (!blk->name || blk->dev) { - /* TODO Add BB name/QOM path */ - error_setg(errp, "Cannot change iothread of active block backend"); - return false; - } + s = g_new(BdrvStateBlkRootContext, 1); + *s = (BdrvStateBlkRootContext) { + .new_ctx = ctx, + .blk = blk, + }; + tran_add(tran, &set_blk_root_context, s); return true; } -static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore) -{ - BlockBackend *blk = child->opaque; - blk_do_set_aio_context(blk, ctx, false, &error_abort); -} - void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) @@ -2545,16 +2567,16 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) } } -void blk_register_buf(BlockBackend *blk, void *host, size_t size) +bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp) { GLOBAL_STATE_CODE(); - bdrv_register_buf(blk_bs(blk), host, size); + return bdrv_register_buf(blk_bs(blk), host, size, errp); } -void blk_unregister_buf(BlockBackend *blk, void *host) +void blk_unregister_buf(BlockBackend *blk, void *host, size_t size) { GLOBAL_STATE_CODE(); - bdrv_unregister_buf(blk_bs(blk), host); + bdrv_unregister_buf(blk_bs(blk), host, size); } int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, diff --git a/block/block-ram-registrar.c b/block/block-ram-registrar.c new file mode 100644 index 0000000000..25dbafa789 --- /dev/null +++ b/block/block-ram-registrar.c @@ -0,0 +1,58 @@ +/* + * BlockBackend RAM Registrar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "sysemu/block-backend.h" +#include "sysemu/block-ram-registrar.h" +#include "qapi/error.h" + +static void ram_block_added(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) +{ + BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier); + Error *err = NULL; + + if (!r->ok) { + return; /* don't try again if we've already failed */ + } + + if (!blk_register_buf(r->blk, host, max_size, &err)) { + error_report_err(err); + ram_block_notifier_remove(&r->notifier); + r->ok = false; + } +} + +static void ram_block_removed(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) +{ + BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier); + blk_unregister_buf(r->blk, host, max_size); +} + +void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk) +{ + r->blk = blk; + r->notifier = (RAMBlockNotifier){ + .ram_block_added = ram_block_added, + .ram_block_removed = ram_block_removed, + + /* + * .ram_block_resized() is not necessary because we use the max_size + * value that does not change across resize. + */ + }; + r->ok = true; + + ram_block_notifier_add(&r->notifier); +} + +void blk_ram_registrar_destroy(BlockRAMRegistrar *r) +{ + if (r->ok) { + ram_block_notifier_remove(&r->notifier); + } +} diff --git a/block/bochs.c b/block/bochs.c index b76f34fe03..e30e3908d9 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -110,10 +110,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, return ret; } - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } ret = bdrv_pread(bs->file, 0, sizeof(bochs), &bochs, 0); diff --git a/block/cloop.c b/block/cloop.c index 40b146e714..3ff975a94d 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -71,10 +71,9 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, return ret; } - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } /* read header */ diff --git a/block/commit.c b/block/commit.c index 38571510cb..0029b31944 100644 --- a/block/commit.c +++ b/block/commit.c @@ -135,7 +135,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) } if (base_len < len) { - ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); + ret = blk_co_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); if (ret) { return ret; } @@ -238,6 +238,7 @@ static BlockDriver bdrv_commit_top = { .bdrv_child_perm = bdrv_commit_top_child_perm, .is_filter = true, + .filtered_child_is_backing = true, }; void commit_start(const char *job_id, BlockDriverState *bs, diff --git a/block/copy-before-write.c b/block/copy-before-write.c index afbdd04489..4abaa7339e 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -412,6 +412,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, int64_t cluster_size; g_autoptr(BlockdevOptions) full_opts = NULL; BlockdevOptionsCbw *opts; + int ret; full_opts = cbw_parse_options(options, errp); if (!full_opts) { @@ -420,11 +421,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, assert(full_opts->driver == BLOCKDEV_DRIVER_COPY_BEFORE_WRITE); opts = &full_opts->u.copy_before_write; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds, diff --git a/block/copy-on-read.c b/block/copy-on-read.c index 1fc7fb3333..815ac1d835 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -41,12 +41,11 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags, BDRVStateCOR *state = bs->opaque; /* Find a bottom node name, if any */ const char *bottom_node = qdict_get_try_str(options, "bottom"); + int ret; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } bs->supported_read_flags = BDRV_REQ_PREFETCH; diff --git a/block/crypto.c b/block/crypto.c index 7a57774b76..2fb8add458 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -261,15 +261,14 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, { BlockCrypto *crypto = bs->opaque; QemuOpts *opts = NULL; - int ret = -EINVAL; + int ret; QCryptoBlockOpenOptions *open_opts = NULL; unsigned int cflags = 0; QDict *cryptoopts = NULL; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } bs->supported_write_flags = BDRV_REQ_FUA & @@ -277,6 +276,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort); if (!qemu_opts_absorb_qdict(opts, options, errp)) { + ret = -EINVAL; goto cleanup; } @@ -285,6 +285,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, open_opts = block_crypto_open_opts_init(cryptoopts, errp); if (!open_opts) { + ret = -EINVAL; goto cleanup; } @@ -410,7 +411,6 @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - assert(!flags); assert(payload_offset < INT64_MAX); assert(QEMU_IS_ALIGNED(offset, sector_size)); assert(QEMU_IS_ALIGNED(bytes, sector_size)); @@ -473,7 +473,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - assert(!(flags & ~BDRV_REQ_FUA)); + flags &= ~BDRV_REQ_REGISTERED_BUF; + assert(payload_offset < INT64_MAX); assert(QEMU_IS_ALIGNED(offset, sector_size)); assert(QEMU_IS_ALIGNED(bytes, sector_size)); diff --git a/block/dmg.c b/block/dmg.c index 98db18d82a..422136276a 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -440,10 +440,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, return ret; } - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } block_module_load_one("dmg-bz2"); diff --git a/block/export/export.c b/block/export/export.c index 4744862915..7cc0c25c1c 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -129,7 +129,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) /* Ignore errors with fixed-iothread=false */ set_context_errp = fixed_iothread ? errp : NULL; - ret = bdrv_try_set_aio_context(bs, new_ctx, set_context_errp); + ret = bdrv_try_change_aio_context(bs, new_ctx, NULL, set_context_errp); if (ret == 0) { aio_context_release(ctx); aio_context_acquire(new_ctx); diff --git a/block/file-posix.c b/block/file-posix.c index 23acffb9a4..b9647c5ffc 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2133,7 +2133,6 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { - assert(flags == 0); return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE); } diff --git a/block/filter-compress.c b/block/filter-compress.c index d5be538619..305716c86c 100644 --- a/block/filter-compress.c +++ b/block/filter-compress.c @@ -30,11 +30,9 @@ static int compress_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - return -EINVAL; + int ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } if (!bs->file->bs->drv || !block_driver_can_compress(bs->file->bs->drv)) { diff --git a/block/gluster.c b/block/gluster.c index bb1144cf6a..7c90f7ba4b 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1236,7 +1236,6 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, QEMUIOVector *qiov, int flags) { - assert(!flags); return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1); } diff --git a/block/io.c b/block/io.c index d30073036e..34b30e304e 100644 --- a/block/io.c +++ b/block/io.c @@ -1130,8 +1130,7 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, int ret; bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort); - assert(!(flags & ~BDRV_REQ_MASK)); - assert(!(flags & BDRV_REQ_NO_FALLBACK)); + assert(!(flags & ~bs->supported_read_flags)); if (!drv) { return -ENOMEDIUM; @@ -1195,23 +1194,29 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; + bool emulate_fua = false; int64_t sector_num; unsigned int nb_sectors; QEMUIOVector local_qiov; int ret; bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort); - assert(!(flags & ~BDRV_REQ_MASK)); - assert(!(flags & BDRV_REQ_NO_FALLBACK)); if (!drv) { return -ENOMEDIUM; } + if ((flags & BDRV_REQ_FUA) && + (~bs->supported_write_flags & BDRV_REQ_FUA)) { + flags &= ~BDRV_REQ_FUA; + emulate_fua = true; + } + + flags &= bs->supported_write_flags; + if (drv->bdrv_co_pwritev_part) { ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, - flags & bs->supported_write_flags); - flags &= ~bs->supported_write_flags; + flags); goto emulate_flags; } @@ -1221,9 +1226,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, } if (drv->bdrv_co_pwritev) { - ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, - flags & bs->supported_write_flags); - flags &= ~bs->supported_write_flags; + ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags); goto emulate_flags; } @@ -1233,10 +1236,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, .coroutine = qemu_coroutine_self(), }; - acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, - flags & bs->supported_write_flags, + acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags, bdrv_co_io_em_complete, &co); - flags &= ~bs->supported_write_flags; if (acb == NULL) { ret = -EIO; } else { @@ -1254,12 +1255,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, assert(bytes <= BDRV_REQUEST_MAX_BYTES); assert(drv->bdrv_co_writev); - ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, - flags & bs->supported_write_flags); - flags &= ~bs->supported_write_flags; + ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags); emulate_flags: - if (ret == 0 && (flags & BDRV_REQ_FUA)) { + if (ret == 0 && emulate_fua) { ret = bdrv_co_flush(bs); } @@ -1487,11 +1486,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), align); - /* TODO: We would need a per-BDS .supported_read_flags and + /* + * TODO: We would need a per-BDS .supported_read_flags and * potential fallback support, if we ever implement any read flags * to pass through to drivers. For now, there aren't any - * passthrough flags. */ - assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH))); + * passthrough flags except the BDRV_REQ_REGISTERED_BUF optimization hint. + */ + assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH | + BDRV_REQ_REGISTERED_BUF))); /* Handle Copy on Read and associated serialisation */ if (flags & BDRV_REQ_COPY_ON_READ) { @@ -1532,7 +1534,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, goto out; } - assert(!(flags & ~bs->supported_read_flags)); + assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF))); max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); if (bytes <= max_bytes && bytes <= max_transfer) { @@ -1721,7 +1723,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) static int bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, size_t *qiov_offset, int64_t *offset, int64_t *bytes, - BdrvRequestPadding *pad, bool *padded) + BdrvRequestPadding *pad, bool *padded, + BdrvRequestFlags *flags) { int ret; @@ -1749,6 +1752,10 @@ static int bdrv_pad_request(BlockDriverState *bs, if (padded) { *padded = true; } + if (flags) { + /* Can't use optimization hint with bounce buffer */ + *flags &= ~BDRV_REQ_REGISTERED_BUF; + } return 0; } @@ -1803,7 +1810,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, } ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, - NULL); + NULL, &flags); if (ret < 0) { goto fail; } @@ -1848,6 +1855,11 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, return -ENOTSUP; } + /* By definition there is no user buffer so this flag doesn't make sense */ + if (flags & BDRV_REQ_REGISTERED_BUF) { + return -EINVAL; + } + /* Invalidate the cached block-status data range if this write overlaps */ bdrv_bsc_invalidate_range(bs, offset, bytes); @@ -2133,6 +2145,9 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, bool padding; BdrvRequestPadding pad; + /* This flag doesn't make sense for padding or zero writes */ + flags &= ~BDRV_REQ_REGISTERED_BUF; + padding = bdrv_init_padding(bs, offset, bytes, &pad); if (padding) { assert(!(flags & BDRV_REQ_NO_WAIT)); @@ -2250,7 +2265,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, * alignment only if there is no ZERO flag. */ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, - &padded); + &padded, &flags); if (ret < 0) { return ret; } @@ -2729,8 +2744,8 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, return 1; } - ret = bdrv_common_block_status_above(bs, NULL, false, false, offset, - bytes, &pnum, NULL, NULL, NULL); + ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset, + bytes, &pnum, NULL, NULL, NULL); if (ret < 0) { return ret; @@ -2739,8 +2754,8 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); } -int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, - int64_t bytes, int64_t *pnum) +int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum) { int ret; int64_t dummy; @@ -3262,29 +3277,57 @@ void bdrv_io_unplug(BlockDriverState *bs) } } -void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size) +/* Helper that undoes bdrv_register_buf() when it fails partway through */ +static void bdrv_register_buf_rollback(BlockDriverState *bs, + void *host, + size_t size, + BdrvChild *final_child) +{ + BdrvChild *child; + + QLIST_FOREACH(child, &bs->children, next) { + if (child == final_child) { + break; + } + + bdrv_unregister_buf(child->bs, host, size); + } + + if (bs->drv && bs->drv->bdrv_unregister_buf) { + bs->drv->bdrv_unregister_buf(bs, host, size); + } +} + +bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size, + Error **errp) { BdrvChild *child; GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_register_buf) { - bs->drv->bdrv_register_buf(bs, host, size); + if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) { + return false; + } } QLIST_FOREACH(child, &bs->children, next) { - bdrv_register_buf(child->bs, host, size); + if (!bdrv_register_buf(child->bs, host, size, errp)) { + bdrv_register_buf_rollback(bs, host, size, child); + return false; + } } + return true; } -void bdrv_unregister_buf(BlockDriverState *bs, void *host) +void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size) { BdrvChild *child; GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_unregister_buf) { - bs->drv->bdrv_unregister_buf(bs, host); + bs->drv->bdrv_unregister_buf(bs, host, size); } QLIST_FOREACH(child, &bs->children, next) { - bdrv_unregister_buf(child->bs, host); + bdrv_unregister_buf(child->bs, host, size); } } diff --git a/block/io_uring.c b/block/io_uring.c index a1760152e0..973e15d876 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include <liburing.h> #include "block/aio.h" -#include "qemu/error-report.h" #include "qemu/queue.h" #include "block/block.h" #include "block/raw-aio.h" @@ -19,7 +18,6 @@ #include "qapi/error.h" #include "trace.h" - /* io_uring ring size */ #define MAX_ENTRIES 128 @@ -432,17 +430,8 @@ LuringState *luring_init(Error **errp) } ioq_init(&s->io_q); -#ifdef CONFIG_LIBURING_REGISTER_RING_FD - if (io_uring_register_ring_fd(&s->ring) < 0) { - /* - * Only warn about this error: we will fallback to the non-optimized - * io_uring operations. - */ - warn_report("failed to register linux io_uring ring file descriptor"); - } -#endif - return s; + } void luring_cleanup(LuringState *s) diff --git a/block/meson.build b/block/meson.build index 60bc305597..b7c68b83a3 100644 --- a/block/meson.build +++ b/block/meson.build @@ -46,6 +46,7 @@ block_ss.add(files( ), zstd, zlib, gnutls) softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c')) +softmmu_ss.add(files('block-ram-registrar.c')) if get_option('qcow1').allowed() block_ss.add(files('qcow.c')) @@ -92,6 +93,7 @@ block_modules = {} modsrc = [] foreach m : [ + [blkio, 'blkio', files('blkio.c')], [curl, 'curl', files('curl.c')], [glusterfs, 'gluster', files('gluster.c')], [libiscsi, 'iscsi', [files('iscsi.c'), libm]], diff --git a/block/mirror.c b/block/mirror.c index 80c0109d39..1a75a47cc3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -922,8 +922,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) * active layer. */ if (s->base == blk_bs(s->target)) { if (s->bdev_length > target_length) { - ret = blk_truncate(s->target, s->bdev_length, false, - PREALLOC_MODE_OFF, 0, NULL); + ret = blk_co_truncate(s->target, s->bdev_length, false, + PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { goto immediate_exit; } @@ -1486,6 +1486,8 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, qemu_iovec_init(&bounce_qiov, 1); qemu_iovec_add(&bounce_qiov, bounce_buf, bytes); qiov = &bounce_qiov; + + flags &= ~BDRV_REQ_REGISTERED_BUF; } ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov, @@ -1587,6 +1589,7 @@ static BlockDriver bdrv_mirror_top = { .bdrv_child_perm = bdrv_mirror_top_child_perm, .is_filter = true, + .filtered_child_is_backing = true, }; static BlockJob *mirror_start_job( diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index 939a520d17..b6135e9bfe 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -489,7 +489,7 @@ void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, err); } -void hmp_block_resize(Monitor *mon, const QDict *qdict) +void coroutine_fn hmp_block_resize(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_str(qdict, "device"); int64_t size = qdict_get_int(qdict, "size"); diff --git a/block/nbd.c b/block/nbd.c index 494b9d683e..7d485c86d2 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -1222,7 +1222,6 @@ static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offse }; assert(bytes <= NBD_MAX_BUFFER_SIZE); - assert(!flags); if (!bytes) { return 0; diff --git a/block/nfs.c b/block/nfs.c index 596ebe98cb..ece22353ac 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -418,7 +418,11 @@ static int64_t nfs_client_open(NFSClient *client, BlockdevOptionsNfs *opts, int flags, int open_flags, Error **errp) { int64_t ret = -EINVAL; +#ifdef _WIN32 + struct __stat64 st; +#else struct stat st; +#endif char *file = NULL, *strp = NULL; qemu_mutex_init(&client->mutex); @@ -781,7 +785,11 @@ static int nfs_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { NFSClient *client = state->bs->opaque; +#ifdef _WIN32 + struct __stat64 st; +#else struct stat st; +#endif int ret = 0; if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) { diff --git a/block/nvme.c b/block/nvme.c index 2b24f95164..656624c585 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -1587,22 +1587,22 @@ static void nvme_aio_unplug(BlockDriverState *bs) } } -static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size) +static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size, + Error **errp) { int ret; - Error *local_err = NULL; BDRVNVMeState *s = bs->opaque; - ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err); - if (ret) { - /* FIXME: we may run out of IOVA addresses after repeated - * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap - * doesn't reclaim addresses for fixed mappings. */ - error_reportf_err(local_err, "nvme_register_buf failed: "); - } + /* + * FIXME: we may run out of IOVA addresses after repeated + * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap + * doesn't reclaim addresses for fixed mappings. + */ + ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, errp); + return ret == 0; } -static void nvme_unregister_buf(BlockDriverState *bs, void *host) +static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t size) { BDRVNVMeState *s = bs->opaque; diff --git a/block/parallels.c b/block/parallels.c index c1523e7dab..fa08c1104b 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -205,18 +205,18 @@ static coroutine_fn int64_t allocate_clusters(BlockDriverState *bs, * force the safer-but-slower fallocate. */ if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { - ret = bdrv_truncate(bs->file, - (s->data_end + space) << BDRV_SECTOR_BITS, - false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, - NULL); + ret = bdrv_co_truncate(bs->file, + (s->data_end + space) << BDRV_SECTOR_BITS, + false, PREALLOC_MODE_OFF, + BDRV_REQ_ZERO_WRITE, NULL); if (ret == -ENOTSUP) { s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; } } if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { - ret = bdrv_pwrite_zeroes(bs->file, - s->data_end << BDRV_SECTOR_BITS, - space << BDRV_SECTOR_BITS, 0); + ret = bdrv_co_pwrite_zeroes(bs->file, + s->data_end << BDRV_SECTOR_BITS, + space << BDRV_SECTOR_BITS, 0); } if (ret < 0) { return ret; @@ -278,8 +278,8 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) if (off + to_write > s->header_size) { to_write = s->header_size - off; } - ret = bdrv_pwrite(bs->file, off, to_write, (uint8_t *)s->header + off, - 0); + ret = bdrv_co_pwrite(bs->file, off, to_write, + (uint8_t *)s->header + off, 0); if (ret < 0) { qemu_co_mutex_unlock(&s->lock); return ret; @@ -329,7 +329,6 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs, QEMUIOVector hd_qiov; int ret = 0; - assert(!flags); qemu_iovec_init(&hd_qiov, qiov->niov); while (nb_sectors > 0) { @@ -504,8 +503,8 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, * In order to really repair the image, we must shrink it. * That means we have to pass exact=true. */ - ret = bdrv_truncate(bs->file, res->image_end_offset, true, - PREALLOC_MODE_OFF, 0, &local_err); + ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, + PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0) { error_report_err(local_err); res->check_errors++; @@ -600,12 +599,12 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, memset(tmp, 0, sizeof(tmp)); memcpy(tmp, &header, sizeof(header)); - ret = blk_pwrite(blk, 0, BDRV_SECTOR_SIZE, tmp, 0); + ret = blk_co_pwrite(blk, 0, BDRV_SECTOR_SIZE, tmp, 0); if (ret < 0) { goto exit; } - ret = blk_pwrite_zeroes(blk, BDRV_SECTOR_SIZE, - (bat_sectors - 1) << BDRV_SECTOR_BITS, 0); + ret = blk_co_pwrite_zeroes(blk, BDRV_SECTOR_SIZE, + (bat_sectors - 1) << BDRV_SECTOR_BITS, 0); if (ret < 0) { goto exit; } @@ -737,10 +736,9 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; char *buf; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0); diff --git a/block/preallocate.c b/block/preallocate.c index e15cb8c74a..d50ee7f49b 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -134,6 +134,7 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVPreallocateState *s = bs->opaque; + int ret; /* * s->data_end and friends should be initialized on permission update. @@ -141,11 +142,9 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags, */ s->file_end = s->zero_start = s->data_end = -EINVAL; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } if (!preallocate_absorb_opts(&s->opts, options, bs->file->bs, errp)) { diff --git a/block/qcow.c b/block/qcow.c index 311aaa8705..daa38839ab 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -92,7 +92,8 @@ typedef struct BDRVQcowState { static QemuOptsList qcow_create_opts; -static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); +static int coroutine_fn decompress_cluster(BlockDriverState *bs, + uint64_t cluster_offset); static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) { @@ -121,10 +122,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, qdict_extract_subqdict(options, &encryptopts, "encrypt."); encryptfmt = qdict_get_try_str(encryptopts, "format"); - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - ret = -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { goto fail; } @@ -351,10 +350,11 @@ static int qcow_reopen_prepare(BDRVReopenState *state, * return 0 if not allocated, 1 if *result is assigned, and negative * errno on failure. */ -static int get_cluster_offset(BlockDriverState *bs, - uint64_t offset, int allocate, - int compressed_size, - int n_start, int n_end, uint64_t *result) +static int coroutine_fn get_cluster_offset(BlockDriverState *bs, + uint64_t offset, int allocate, + int compressed_size, + int n_start, int n_end, + uint64_t *result) { BDRVQcowState *s = bs->opaque; int min_index, i, j, l1_index, l2_index, ret; @@ -381,9 +381,9 @@ static int get_cluster_offset(BlockDriverState *bs, s->l1_table[l1_index] = l2_offset; tmp = cpu_to_be64(l2_offset); BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); - ret = bdrv_pwrite_sync(bs->file, - s->l1_table_offset + l1_index * sizeof(tmp), - sizeof(tmp), &tmp, 0); + ret = bdrv_co_pwrite_sync(bs->file, + s->l1_table_offset + l1_index * sizeof(tmp), + sizeof(tmp), &tmp, 0); if (ret < 0) { return ret; } @@ -414,14 +414,14 @@ static int get_cluster_offset(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); if (new_l2_table) { memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); - ret = bdrv_pwrite_sync(bs->file, l2_offset, - s->l2_size * sizeof(uint64_t), l2_table, 0); + ret = bdrv_co_pwrite_sync(bs->file, l2_offset, + s->l2_size * sizeof(uint64_t), l2_table, 0); if (ret < 0) { return ret; } } else { - ret = bdrv_pread(bs->file, l2_offset, s->l2_size * sizeof(uint64_t), - l2_table, 0); + ret = bdrv_co_pread(bs->file, l2_offset, + s->l2_size * sizeof(uint64_t), l2_table, 0); if (ret < 0) { return ret; } @@ -453,8 +453,8 @@ static int get_cluster_offset(BlockDriverState *bs, cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size); /* write the cluster content */ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - ret = bdrv_pwrite(bs->file, cluster_offset, s->cluster_size, - s->cluster_cache, 0); + ret = bdrv_co_pwrite(bs->file, cluster_offset, s->cluster_size, + s->cluster_cache, 0); if (ret < 0) { return ret; } @@ -469,8 +469,9 @@ static int get_cluster_offset(BlockDriverState *bs, if (cluster_offset + s->cluster_size > INT64_MAX) { return -E2BIG; } - ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, - false, PREALLOC_MODE_OFF, 0, NULL); + ret = bdrv_co_truncate(bs->file, + cluster_offset + s->cluster_size, + false, PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { return ret; } @@ -492,9 +493,9 @@ static int get_cluster_offset(BlockDriverState *bs, return -EIO; } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - ret = bdrv_pwrite(bs->file, cluster_offset + i, - BDRV_SECTOR_SIZE, - s->cluster_data, 0); + ret = bdrv_co_pwrite(bs->file, cluster_offset + i, + BDRV_SECTOR_SIZE, + s->cluster_data, 0); if (ret < 0) { return ret; } @@ -514,8 +515,8 @@ static int get_cluster_offset(BlockDriverState *bs, } else { BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); } - ret = bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), - sizeof(tmp), &tmp, 0); + ret = bdrv_co_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), + sizeof(tmp), &tmp, 0); if (ret < 0) { return ret; } @@ -585,7 +586,8 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size, return 0; } -static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) +static int coroutine_fn decompress_cluster(BlockDriverState *bs, + uint64_t cluster_offset) { BDRVQcowState *s = bs->opaque; int ret, csize; @@ -596,7 +598,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) csize = cluster_offset >> (63 - s->cluster_bits); csize &= (s->cluster_size - 1); BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); - ret = bdrv_pread(bs->file, coffset, csize, s->cluster_data, 0); + ret = bdrv_co_pread(bs->file, coffset, csize, s->cluster_data, 0); if (ret < 0) return -1; if (decompress_buffer(s->cluster_cache, s->cluster_size, @@ -628,7 +630,6 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset, uint8_t *buf; void *orig_buf; - assert(!flags); if (qiov->niov > 1) { buf = orig_buf = qemu_try_blockalign(bs, qiov->size); if (buf == NULL) { @@ -725,7 +726,6 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset, uint8_t *buf; void *orig_buf; - assert(!flags); s->cluster_cache_offset = -1; /* disable compressed cache */ /* We must always copy the iov when encrypting, so we @@ -890,14 +890,14 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, } /* write all the data */ - ret = blk_pwrite(qcow_blk, 0, sizeof(header), &header, 0); + ret = blk_co_pwrite(qcow_blk, 0, sizeof(header), &header, 0); if (ret < 0) { goto exit; } if (qcow_opts->has_backing_file) { - ret = blk_pwrite(qcow_blk, sizeof(header), backing_filename_len, - qcow_opts->backing_file, 0); + ret = blk_co_pwrite(qcow_blk, sizeof(header), backing_filename_len, + qcow_opts->backing_file, 0); if (ret < 0) { goto exit; } @@ -906,8 +906,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, tmp = g_malloc0(BDRV_SECTOR_SIZE); for (i = 0; i < DIV_ROUND_UP(sizeof(uint64_t) * l1_size, BDRV_SECTOR_SIZE); i++) { - ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i, - BDRV_SECTOR_SIZE, tmp, 0); + ret = blk_co_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i, + BDRV_SECTOR_SIZE, tmp, 0); if (ret < 0) { g_free(tmp); goto exit; diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 7197754843..bcad567c0c 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -955,8 +955,8 @@ static void set_readonly_helper(gpointer bitmap, gpointer value) * If header_updated is not NULL then it is set appropriately regardless of * the return value. */ -bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, bool *header_updated, - Error **errp) +bool coroutine_fn qcow2_load_dirty_bitmaps(BlockDriverState *bs, + bool *header_updated, Error **errp) { BDRVQcow2State *s = bs->opaque; Qcow2BitmapList *bm_list; diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 0f293950a1..40ed847f97 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -31,7 +31,8 @@ #include "qemu/memalign.h" #include "trace.h" -int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) +int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs, + uint64_t exact_size) { BDRVQcow2State *s = bs->opaque; int new_l1_size, i, ret; @@ -47,14 +48,14 @@ int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) #endif BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); - ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset + - new_l1_size * L1E_SIZE, - (s->l1_size - new_l1_size) * L1E_SIZE, 0); + ret = bdrv_co_pwrite_zeroes(bs->file, + s->l1_table_offset + new_l1_size * L1E_SIZE, + (s->l1_size - new_l1_size) * L1E_SIZE, 0); if (ret < 0) { goto fail; } - ret = bdrv_flush(bs->file->bs); + ret = bdrv_co_flush(bs->file->bs); if (ret < 0) { goto fail; } @@ -823,10 +824,10 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, * * Return 0 on success and -errno in error cases */ -int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, - uint64_t offset, - int compressed_size, - uint64_t *host_offset) +int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, + uint64_t offset, + int compressed_size, + uint64_t *host_offset) { BDRVQcow2State *s = bs->opaque; int l2_index, ret; @@ -1488,8 +1489,9 @@ static int coroutine_fn handle_dependencies(BlockDriverState *bs, * * -errno: in error cases */ -static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +static int coroutine_fn handle_copied(BlockDriverState *bs, + uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, + QCowL2Meta **m) { BDRVQcow2State *s = bs->opaque; int l2_index; @@ -1653,8 +1655,9 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * * -errno: in error cases */ -static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +static int coroutine_fn handle_alloc(BlockDriverState *bs, + uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, + QCowL2Meta **m) { BDRVQcow2State *s = bs->opaque; int l2_index; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 1fbb07ca77..81264740f0 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -97,7 +97,7 @@ static void update_max_refcount_table_index(BDRVQcow2State *s) s->max_refcount_table_index = i; } -int qcow2_refcount_init(BlockDriverState *bs) +int coroutine_fn qcow2_refcount_init(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; unsigned int refcount_table_size2, i; @@ -118,8 +118,8 @@ int qcow2_refcount_init(BlockDriverState *bs) goto fail; } BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD); - ret = bdrv_pread(bs->file, s->refcount_table_offset, - refcount_table_size2, s->refcount_table, 0); + ret = bdrv_co_pread(bs->file, s->refcount_table_offset, + refcount_table_size2, s->refcount_table, 0); if (ret < 0) { goto fail; } @@ -3559,8 +3559,8 @@ static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset) return covering_refblock_offset; } -static int qcow2_discard_refcount_block(BlockDriverState *bs, - uint64_t discard_block_offs) +static int coroutine_fn +qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs) { BDRVQcow2State *s = bs->opaque; int64_t refblock_offs; @@ -3616,7 +3616,7 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs, return 0; } -int qcow2_shrink_reftable(BlockDriverState *bs) +int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; uint64_t *reftable_tmp = @@ -3657,9 +3657,9 @@ int qcow2_shrink_reftable(BlockDriverState *bs) reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); } - ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, - s->refcount_table_size * REFTABLE_ENTRY_SIZE, - reftable_tmp, 0); + ret = bdrv_co_pwrite_sync(bs->file, s->refcount_table_offset, + s->refcount_table_size * REFTABLE_ENTRY_SIZE, + reftable_tmp, 0); /* * If the write in the reftable failed the image may contain a partially * overwritten reftable. In this case it would be better to clear the diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index d1d46facbf..62e8a0335d 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -441,9 +441,9 @@ int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs, } QEMU_PACKED snapshot_table_pointer; /* qcow2_do_open() discards this information in check mode */ - ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots), - sizeof(snapshot_table_pointer), &snapshot_table_pointer, - 0); + ret = bdrv_co_pread(bs->file, offsetof(QCowHeader, nb_snapshots), + sizeof(snapshot_table_pointer), &snapshot_table_pointer, + 0); if (ret < 0) { result->check_errors++; fprintf(stderr, "ERROR failed to read the snapshot table pointer from " diff --git a/block/qcow2.c b/block/qcow2.c index b57f7cc8ee..4d6666d3ff 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1306,7 +1306,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, uint64_t l1_vm_state_index; bool update_header = false; - ret = bdrv_pread(bs->file, 0, sizeof(header), &header, 0); + ret = bdrv_co_pread(bs->file, 0, sizeof(header), &header, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read qcow2 header"); goto fail; @@ -1382,9 +1382,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, if (header.header_length > sizeof(header)) { s->unknown_header_fields_size = header.header_length - sizeof(header); s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); - ret = bdrv_pread(bs->file, sizeof(header), - s->unknown_header_fields_size, - s->unknown_header_fields, 0); + ret = bdrv_co_pread(bs->file, sizeof(header), + s->unknown_header_fields_size, + s->unknown_header_fields, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " "fields"); @@ -1579,8 +1579,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, ret = -ENOMEM; goto fail; } - ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_size * L1E_SIZE, - s->l1_table, 0); + ret = bdrv_co_pread(bs->file, s->l1_table_offset, s->l1_size * L1E_SIZE, + s->l1_table, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read L1 table"); goto fail; @@ -1699,8 +1699,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, } s->image_backing_file = g_malloc(len + 1); - ret = bdrv_pread(bs->file, header.backing_file_offset, len, - s->image_backing_file, 0); + ret = bdrv_co_pread(bs->file, header.backing_file_offset, len, + s->image_backing_file, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read backing file name"); goto fail; @@ -1905,11 +1905,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, .errp = errp, .ret = -EINPROGRESS }; + int ret; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } /* Initialise locks */ @@ -3679,7 +3679,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) cpu_to_be64(QCOW2_INCOMPAT_EXTL2); } - ret = blk_pwrite(blk, 0, cluster_size, header, 0); + ret = blk_co_pwrite(blk, 0, cluster_size, header, 0); g_free(header); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write qcow2 header"); @@ -3689,7 +3689,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* Write a refcount table with one refcount block */ refcount_table = g_malloc0(2 * cluster_size); refcount_table[0] = cpu_to_be64(2 * cluster_size); - ret = blk_pwrite(blk, cluster_size, 2 * cluster_size, refcount_table, 0); + ret = blk_co_pwrite(blk, cluster_size, 2 * cluster_size, refcount_table, 0); g_free(refcount_table); if (ret < 0) { @@ -3744,8 +3744,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } /* Okay, now that we have a valid image, let's give it the right size */ - ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, - 0, errp); + ret = blk_co_truncate(blk, qcow2_opts->size, false, + qcow2_opts->preallocation, 0, errp); if (ret < 0) { error_prepend(errp, "Could not resize image: "); goto out; @@ -5287,8 +5287,8 @@ static int64_t qcow2_check_vmstate_request(BlockDriverState *bs, return pos; } -static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t pos) +static coroutine_fn int qcow2_save_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos) { int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos); if (offset < 0) { @@ -5299,8 +5299,8 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0); } -static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t pos) +static coroutine_fn int qcow2_load_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos) { int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos); if (offset < 0) { diff --git a/block/qcow2.h b/block/qcow2.h index 3e7c5e80b6..2285f18a73 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -846,7 +846,7 @@ int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, Error **errp); /* qcow2-refcount.c functions */ -int qcow2_refcount_init(BlockDriverState *bs); +int coroutine_fn qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, @@ -893,14 +893,14 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); -int qcow2_shrink_reftable(BlockDriverState *bs); +int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs); int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); -int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); +int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, uint8_t *buf, int nb_sectors, bool enc, Error **errp); @@ -911,10 +911,10 @@ int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, unsigned int *bytes, uint64_t *host_offset, QCowL2Meta **m); -int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, - uint64_t offset, - int compressed_size, - uint64_t *host_offset); +int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, + uint64_t offset, + int compressed_size, + uint64_t *host_offset); void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, uint64_t *coffset, int *csize); @@ -982,8 +982,8 @@ void qcow2_cache_discard(Qcow2Cache *c, void *table); int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, void **refcount_table, int64_t *refcount_table_size); -bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, bool *header_updated, - Error **errp); +bool coroutine_fn qcow2_load_dirty_bitmaps(BlockDriverState *bs, + bool *header_updated, Error **errp); bool qcow2_get_bitmap_info_list(BlockDriverState *bs, Qcow2BitmapInfoList **info_list, Error **errp); int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); @@ -991,13 +991,13 @@ int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp); bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, bool release_stored, Error **errp); int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); -bool qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, - const char *name, - uint32_t granularity, - Error **errp); -int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, - const char *name, - Error **errp); +bool coroutine_fn qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); +int coroutine_fn qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs); uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, uint32_t cluster_size); diff --git a/block/qed-table.c b/block/qed-table.c index 1cc844b1a5..aa203f2627 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -100,7 +100,7 @@ static int coroutine_fn qed_write_table(BDRVQEDState *s, uint64_t offset, } if (flush) { - ret = bdrv_flush(s->bs); + ret = bdrv_co_flush(s->bs); if (ret < 0) { goto out; } diff --git a/block/qed.c b/block/qed.c index bda00e6257..2f36ad342c 100644 --- a/block/qed.c +++ b/block/qed.c @@ -387,7 +387,7 @@ static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int64_t file_size; int ret; - ret = bdrv_pread(bs->file, 0, sizeof(le_header), &le_header, 0); + ret = bdrv_co_pread(bs->file, 0, sizeof(le_header), &le_header, 0); if (ret < 0) { error_setg(errp, "Failed to read QED header"); return ret; @@ -492,7 +492,7 @@ static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options, } /* From here on only known autoclear feature bits are valid */ - bdrv_flush(bs->file->bs); + bdrv_co_flush(bs->file->bs); } s->l1_table = qed_alloc_table(s); @@ -561,11 +561,11 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, .errp = errp, .ret = -EINPROGRESS }; + int ret; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } bdrv_qed_init_state(bs); @@ -693,7 +693,7 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, * The QED format associates file length with allocation status, * so a new file (which is empty) must have a length of 0. */ - ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); + ret = blk_co_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { goto out; } @@ -712,18 +712,18 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, } qed_header_cpu_to_le(&header, &le_header); - ret = blk_pwrite(blk, 0, sizeof(le_header), &le_header, 0); + ret = blk_co_pwrite(blk, 0, sizeof(le_header), &le_header, 0); if (ret < 0) { goto out; } - ret = blk_pwrite(blk, sizeof(le_header), header.backing_filename_size, + ret = blk_co_pwrite(blk, sizeof(le_header), header.backing_filename_size, qed_opts->backing_file, 0); if (ret < 0) { goto out; } l1_table = g_malloc0(l1_size); - ret = blk_pwrite(blk, header.l1_table_offset, l1_size, l1_table, 0); + ret = blk_co_pwrite(blk, header.l1_table_offset, l1_size, l1_table, 0); if (ret < 0) { goto out; } @@ -1395,7 +1395,6 @@ static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags) { - assert(!flags); return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE); } diff --git a/block/raw-format.c b/block/raw-format.c index f337ac7569..a68014ef0b 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -258,6 +258,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, qemu_iovec_add(&local_qiov, buf, 512); qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512); qiov = &local_qiov; + + flags &= ~BDRV_REQ_REGISTERED_BUF; } ret = raw_adjust_offset(bs, &offset, bytes, true); @@ -458,8 +460,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, file_role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; } - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - file_role, false, errp); + bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + file_role, false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/replication.c b/block/replication.c index c67f931f37..f1eed25e43 100644 --- a/block/replication.c +++ b/block/replication.c @@ -88,11 +88,9 @@ static int replication_open(BlockDriverState *bs, QDict *options, const char *mode; const char *top_id; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } ret = -EINVAL; @@ -261,7 +259,6 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, int ret; int64_t n; - assert(!flags); ret = replication_get_io_status(s); if (ret < 0) { goto out; diff --git a/block/snapshot-access.c b/block/snapshot-access.c index 77b87c1946..0a30ec6cd9 100644 --- a/block/snapshot-access.c +++ b/block/snapshot-access.c @@ -82,9 +82,9 @@ static void snapshot_access_refresh_filename(BlockDriverState *bs) static int snapshot_access_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, - false, errp); + bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, + false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/snapshot.c b/block/snapshot.c index d6f53c3065..e22ac3eac6 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -151,41 +151,29 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, } /** - * Return a pointer to the child BDS pointer to which we can fall + * Return a pointer to child of given BDS to which we can fall * back if the given BDS does not support snapshots. * Return NULL if there is no BDS to (safely) fall back to. - * - * We need to return an indirect pointer because bdrv_snapshot_goto() - * has to modify the BdrvChild pointer. */ -static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs) +static BdrvChild *bdrv_snapshot_fallback_child(BlockDriverState *bs) { - BdrvChild **fallback; + BdrvChild *fallback = bdrv_primary_child(bs); BdrvChild *child; - /* - * The only BdrvChild pointers that are safe to modify (and which - * we can thus return a reference to) are bs->file and - * bs->backing. - */ - fallback = &bs->file; - if (!*fallback && bs->drv && bs->drv->is_filter) { - fallback = &bs->backing; - } - - if (!*fallback) { + /* We allow fallback only to primary child */ + if (!fallback) { return NULL; } /* * Check that there are no other children that would need to be * snapshotted. If there are, it is not safe to fall back to - * *fallback. + * fallback. */ QLIST_FOREACH(child, &bs->children, next) { if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED) && - child != *fallback) + child != fallback) { return NULL; } @@ -196,8 +184,7 @@ static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs) static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs) { - BdrvChild **child_ptr = bdrv_snapshot_fallback_ptr(bs); - return child_ptr ? (*child_ptr)->bs : NULL; + return child_bs(bdrv_snapshot_fallback_child(bs)); } int bdrv_can_snapshot(BlockDriverState *bs) @@ -244,7 +231,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, Error **errp) { BlockDriver *drv = bs->drv; - BdrvChild **fallback_ptr; + BdrvChild *fallback; int ret, open_ret; GLOBAL_STATE_CODE(); @@ -267,13 +254,13 @@ int bdrv_snapshot_goto(BlockDriverState *bs, return ret; } - fallback_ptr = bdrv_snapshot_fallback_ptr(bs); - if (fallback_ptr) { + fallback = bdrv_snapshot_fallback_child(bs); + if (fallback) { QDict *options; QDict *file_options; Error *local_err = NULL; - BlockDriverState *fallback_bs = (*fallback_ptr)->bs; - char *subqdict_prefix = g_strdup_printf("%s.", (*fallback_ptr)->name); + BlockDriverState *fallback_bs = fallback->bs; + char *subqdict_prefix = g_strdup_printf("%s.", fallback->name); options = qdict_clone_shallow(bs->options); @@ -284,8 +271,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs, qobject_unref(file_options); g_free(subqdict_prefix); - /* Force .bdrv_open() below to re-attach fallback_bs on *fallback_ptr */ - qdict_put_str(options, (*fallback_ptr)->name, + /* Force .bdrv_open() below to re-attach fallback_bs on fallback */ + qdict_put_str(options, fallback->name, bdrv_get_node_name(fallback_bs)); /* Now close bs, apply the snapshot on fallback_bs, and re-open bs */ @@ -294,8 +281,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* .bdrv_open() will re-attach it */ - bdrv_unref_child(bs, *fallback_ptr); - *fallback_ptr = NULL; + bdrv_unref_child(bs, fallback); ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); @@ -309,15 +295,12 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* - * fallback_ptr is &bs->file or &bs->backing. *fallback_ptr - * was closed above and set to NULL, but the .bdrv_open() call - * has opened it again, because we set the respective option - * (with the qdict_put_str() call above). - * Assert that .bdrv_open() has attached some child on - * *fallback_ptr, and that it has attached the one we wanted - * it to (i.e., fallback_bs). + * fallback was a primary child. It was closed above and set to NULL, + * but the .bdrv_open() call has opened it again, because we set the + * respective option (with the qdict_put_str() call above). + * Assert that .bdrv_open() has attached the right BDS as primary child. */ - assert(*fallback_ptr && fallback_bs == (*fallback_ptr)->bs); + assert(bdrv_primary_bs(bs) == fallback_bs); bdrv_unref(fallback_bs); return ret; } diff --git a/block/ssh.c b/block/ssh.c index a2dc646536..04726d4ecb 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -1129,9 +1129,9 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs, return ret; } -static int ssh_write(BDRVSSHState *s, BlockDriverState *bs, - int64_t offset, size_t size, - QEMUIOVector *qiov) +static coroutine_fn int ssh_write(BDRVSSHState *s, BlockDriverState *bs, + int64_t offset, size_t size, + QEMUIOVector *qiov) { ssize_t r; size_t written; @@ -1196,7 +1196,6 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs, BDRVSSHState *s = bs->opaque; int ret; - assert(!flags); qemu_co_mutex_lock(&s->lock); ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, qiov); diff --git a/block/throttle.c b/block/throttle.c index ddd450593a..131eba3ab4 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -78,11 +78,9 @@ static int throttle_open(BlockDriverState *bs, QDict *options, char *group; int ret; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } bs->supported_write_flags = bs->file->bs->supported_write_flags | BDRV_REQ_WRITE_UNCHANGED; diff --git a/block/vdi.c b/block/vdi.c index e942325455..c0c111c4b9 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -377,10 +377,9 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, int ret; QemuUUID uuid_link, uuid_parent; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } logout("\n"); @@ -664,7 +663,8 @@ vdi_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, * so this full-cluster write does not overlap a partial write * of the same cluster, issued from the "else" branch. */ - ret = bdrv_pwrite(bs->file, data_offset, s->block_size, block, 0); + ret = bdrv_co_pwrite(bs->file, data_offset, s->block_size, block, + 0); qemu_co_rwlock_unlock(&s->bmap_lock); } else { nonallocating_write: @@ -709,7 +709,7 @@ nonallocating_write: assert(VDI_IS_ALLOCATED(bmap_first)); *header = s->header; vdi_header_to_le(header); - ret = bdrv_pwrite(bs->file, 0, sizeof(*header), header, 0); + ret = bdrv_co_pwrite(bs->file, 0, sizeof(*header), header, 0); g_free(header); if (ret < 0) { @@ -726,8 +726,8 @@ nonallocating_write: base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; logout("will write %u block map sectors starting from entry %u\n", n_sectors, bmap_first); - ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, - n_sectors * SECTOR_SIZE, base, 0); + ret = bdrv_co_pwrite(bs->file, offset * SECTOR_SIZE, + n_sectors * SECTOR_SIZE, base, 0); } return ret; @@ -845,7 +845,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, vdi_header_print(&header); } vdi_header_to_le(&header); - ret = blk_pwrite(blk, offset, sizeof(header), &header, 0); + ret = blk_co_pwrite(blk, offset, sizeof(header), &header, 0); if (ret < 0) { error_setg(errp, "Error writing header"); goto exit; @@ -866,7 +866,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, bmap[i] = VDI_UNALLOCATED; } } - ret = blk_pwrite(blk, offset, bmap_size, bmap, 0); + ret = blk_co_pwrite(blk, offset, bmap_size, bmap, 0); if (ret < 0) { error_setg(errp, "Error writing bmap"); goto exit; @@ -875,8 +875,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, } if (image_type == VDI_TYPE_STATIC) { - ret = blk_truncate(blk, offset + blocks * block_size, false, - PREALLOC_MODE_OFF, 0, errp); + ret = blk_co_truncate(blk, offset + blocks * block_size, false, + PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { error_prepend(errp, "Failed to statically allocate file"); goto exit; diff --git a/block/vhdx.c b/block/vhdx.c index e10e78ebfd..bad9ca691b 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1001,10 +1001,9 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, uint64_t signature; Error *local_err = NULL; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } s->bat = NULL; @@ -1342,7 +1341,6 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, uint64_t bat_prior_offset = 0; bool bat_update = false; - assert(!flags); qemu_iovec_init(&hd_qiov, qiov->niov); qemu_co_mutex_lock(&s->lock); @@ -2012,15 +2010,15 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, &creator_items, NULL); signature = cpu_to_le64(VHDX_FILE_SIGNATURE); - ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, sizeof(signature), &signature, - 0); + ret = blk_co_pwrite(blk, VHDX_FILE_ID_OFFSET, sizeof(signature), &signature, + 0); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to write file signature"); goto delete_and_exit; } if (creator) { - ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature), - creator_items * sizeof(gunichar2), creator, 0); + ret = blk_co_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature), + creator_items * sizeof(gunichar2), creator, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to write creator field"); goto delete_and_exit; diff --git a/block/vmdk.c b/block/vmdk.c index f7d8856dfb..26376352b9 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1308,10 +1308,9 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, BDRVVmdkState *s = bs->opaque; uint32_t magic; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } buf = vmdk_read_desc(bs->file, 0, errp); @@ -1404,13 +1403,13 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave * it for call to write user data in the request. */ -static int get_whole_cluster(BlockDriverState *bs, - VmdkExtent *extent, - uint64_t cluster_offset, - uint64_t offset, - uint64_t skip_start_bytes, - uint64_t skip_end_bytes, - bool zeroed) +static int coroutine_fn get_whole_cluster(BlockDriverState *bs, + VmdkExtent *extent, + uint64_t cluster_offset, + uint64_t offset, + uint64_t skip_start_bytes, + uint64_t skip_end_bytes, + bool zeroed) { int ret = VMDK_OK; int64_t cluster_bytes; @@ -1441,16 +1440,16 @@ static int get_whole_cluster(BlockDriverState *bs, if (copy_from_backing) { /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); - ret = bdrv_pread(bs->backing, offset, skip_start_bytes, - whole_grain, 0); + ret = bdrv_co_pread(bs->backing, offset, skip_start_bytes, + whole_grain, 0); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); - ret = bdrv_pwrite(extent->file, cluster_offset, skip_start_bytes, - whole_grain, 0); + ret = bdrv_co_pwrite(extent->file, cluster_offset, skip_start_bytes, + whole_grain, 0); if (ret < 0) { ret = VMDK_ERROR; goto exit; @@ -1461,18 +1460,18 @@ static int get_whole_cluster(BlockDriverState *bs, if (copy_from_backing) { /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); - ret = bdrv_pread(bs->backing, offset + skip_end_bytes, - cluster_bytes - skip_end_bytes, - whole_grain + skip_end_bytes, 0); + ret = bdrv_co_pread(bs->backing, offset + skip_end_bytes, + cluster_bytes - skip_end_bytes, + whole_grain + skip_end_bytes, 0); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); - ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes, - cluster_bytes - skip_end_bytes, - whole_grain + skip_end_bytes, 0); + ret = bdrv_co_pwrite(extent->file, cluster_offset + skip_end_bytes, + cluster_bytes - skip_end_bytes, + whole_grain + skip_end_bytes, 0); if (ret < 0) { ret = VMDK_ERROR; goto exit; @@ -1485,29 +1484,29 @@ exit: return ret; } -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, - uint32_t offset) +static int coroutine_fn vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, + uint32_t offset) { offset = cpu_to_le32(offset); /* update L2 table */ BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE); - if (bdrv_pwrite(extent->file, - ((int64_t)m_data->l2_offset * 512) - + (m_data->l2_index * sizeof(offset)), - sizeof(offset), &offset, 0) < 0) { + if (bdrv_co_pwrite(extent->file, + ((int64_t)m_data->l2_offset * 512) + + (m_data->l2_index * sizeof(offset)), + sizeof(offset), &offset, 0) < 0) { return VMDK_ERROR; } /* update backup L2 table */ if (extent->l1_backup_table_offset != 0) { m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; - if (bdrv_pwrite(extent->file, - ((int64_t)m_data->l2_offset * 512) - + (m_data->l2_index * sizeof(offset)), - sizeof(offset), &offset, 0) < 0) { + if (bdrv_co_pwrite(extent->file, + ((int64_t)m_data->l2_offset * 512) + + (m_data->l2_index * sizeof(offset)), + sizeof(offset), &offset, 0) < 0) { return VMDK_ERROR; } } - if (bdrv_flush(extent->file->bs) < 0) { + if (bdrv_co_flush(extent->file->bs) < 0) { return VMDK_ERROR; } if (m_data->l2_cache_entry) { @@ -1537,14 +1536,14 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, * VMDK_UNALLOC if cluster is not mapped and @allocate is false. * VMDK_ERROR if failed. */ -static int get_cluster_offset(BlockDriverState *bs, - VmdkExtent *extent, - VmdkMetaData *m_data, - uint64_t offset, - bool allocate, - uint64_t *cluster_offset, - uint64_t skip_start_bytes, - uint64_t skip_end_bytes) +static int coroutine_fn get_cluster_offset(BlockDriverState *bs, + VmdkExtent *extent, + VmdkMetaData *m_data, + uint64_t offset, + bool allocate, + uint64_t *cluster_offset, + uint64_t skip_start_bytes, + uint64_t skip_end_bytes) { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; @@ -1624,11 +1623,10 @@ static int get_cluster_offset(BlockDriverState *bs, } l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes); BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD); - if (bdrv_pread(extent->file, + if (bdrv_co_pread(extent->file, (int64_t)l2_offset * 512, l2_size_bytes, - l2_table, - 0 + l2_table, 0 ) < 0) { return VMDK_ERROR; } @@ -1899,7 +1897,8 @@ vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, cluster_buf = g_malloc(buf_bytes); uncomp_buf = g_malloc(cluster_bytes); BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED); - ret = bdrv_pread(extent->file, cluster_offset, buf_bytes, cluster_buf, 0); + ret = bdrv_co_pread(extent->file, cluster_offset, buf_bytes, cluster_buf, + 0); if (ret < 0) { goto out; } @@ -2144,8 +2143,8 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes, return length; } length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); - ret = bdrv_truncate(s->extents[i].file, length, false, - PREALLOC_MODE_OFF, 0, NULL); + ret = bdrv_co_truncate(s->extents[i].file, length, false, + PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { return ret; } @@ -2586,7 +2585,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, desc_offset = 0x200; } - ret = blk_pwrite(blk, desc_offset, desc_len, desc, 0); + ret = blk_co_pwrite(blk, desc_offset, desc_len, desc, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write description"); goto exit; @@ -2594,7 +2593,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); + ret = blk_co_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { goto exit; } diff --git a/block/vpc.c b/block/vpc.c index 4f49ef207f..95841f259a 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -233,10 +233,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int ret; int64_t bs_size; - bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_IMAGE, false, errp); - if (!bs->file) { - return -EINVAL; + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort); diff --git a/block/vvfat.c b/block/vvfat.c index d6dd919683..c5b1442145 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -499,7 +499,7 @@ static bool valid_filename(const unsigned char *name) (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c > 127 || - strchr("$%'-_@~`!(){}^#&.+,;=[]", c) != NULL)) + strchr(" $%'-_@~`!(){}^#&.+,;=[]", c) != NULL)) { return false; } @@ -2993,11 +2993,35 @@ DLOG(checkpoint()); vvfat_close_current_file(s); + if (sector_num == s->offset_to_bootsector && nb_sectors == 1) { + /* + * Write on bootsector. Allow only changing the reserved1 field, + * used to mark volume dirtiness + */ + unsigned char *bootsector = s->first_sectors + + s->offset_to_bootsector * 0x200; + /* + * LATER TODO: if FAT32, this is wrong (see init_directories(), + * which always creates a FAT16 bootsector) + */ + const int reserved1_offset = offsetof(bootsector_t, u.fat16.reserved1); + + for (i = 0; i < 0x200; i++) { + if (i != reserved1_offset && bootsector[i] != buf[i]) { + fprintf(stderr, "Tried to write to protected bootsector\n"); + return -1; + } + } + + /* Update bootsector with the only updatable byte, and return success */ + bootsector[reserved1_offset] = buf[reserved1_offset]; + return 0; + } + /* * Some sanity checks: * - do not allow writing to the boot sector */ - if (sector_num < s->offset_to_fat) return -1; @@ -3146,10 +3170,9 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) array_init(&(s->commits), sizeof(commit_t)); - s->qcow_filename = g_malloc(PATH_MAX); - ret = get_tmp_filename(s->qcow_filename, PATH_MAX); - if (ret < 0) { - error_setg_errno(errp, -ret, "can't create temporary file"); + s->qcow_filename = create_tmp_file(errp); + if (!s->qcow_filename) { + ret = -ENOENT; goto err; } diff --git a/blockdev.c b/blockdev.c index a32bafc07a..3f1dec6242 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1630,8 +1630,8 @@ static void external_snapshot_abort(BlkActionState *common) aio_context_release(aio_context); aio_context_acquire(tmp_context); - ret = bdrv_try_set_aio_context(state->old_bs, - aio_context, NULL); + ret = bdrv_try_change_aio_context(state->old_bs, + aio_context, NULL, NULL); assert(ret == 0); aio_context_release(tmp_context); @@ -1792,12 +1792,12 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) goto out; } - /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ old_context = bdrv_get_aio_context(target_bs); aio_context_release(aio_context); aio_context_acquire(old_context); - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); if (ret < 0) { bdrv_unref(target_bs); aio_context_release(old_context); @@ -1892,12 +1892,12 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) return; } - /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ aio_context = bdrv_get_aio_context(bs); old_context = bdrv_get_aio_context(target_bs); aio_context_acquire(old_context); - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); if (ret < 0) { aio_context_release(old_context); return; @@ -2448,7 +2448,7 @@ void coroutine_fn qmp_block_resize(bool has_device, const char *device, bdrv_co_unlock(bs); old_ctx = bdrv_co_enter(bs); - blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); + blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); bdrv_co_leave(bs, old_ctx); bdrv_co_lock(bs); @@ -3194,12 +3194,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) !bdrv_has_zero_init(target_bs))); - /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ old_context = bdrv_get_aio_context(target_bs); aio_context_release(aio_context); aio_context_acquire(old_context); - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); if (ret < 0) { bdrv_unref(target_bs); aio_context_release(old_context); @@ -3266,12 +3266,12 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, zero_target = (sync == MIRROR_SYNC_MODE_FULL); - /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ old_context = bdrv_get_aio_context(target_bs); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(old_context); - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); aio_context_release(old_context); aio_context_acquire(aio_context); @@ -3767,7 +3767,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, old_context = bdrv_get_aio_context(bs); aio_context_acquire(old_context); - bdrv_try_set_aio_context(bs, new_context, errp); + bdrv_try_change_aio_context(bs, new_context, NULL, errp); aio_context_release(old_context); } diff --git a/blockjob.c b/blockjob.c index bdf20a0e35..2d86014fa5 100644 --- a/blockjob.c +++ b/blockjob.c @@ -126,37 +126,48 @@ static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) job_resume(&job->job); } -static bool child_job_can_set_aio_ctx(BdrvChild *c, AioContext *ctx, - GSList **ignore, Error **errp) +typedef struct BdrvStateChildJobContext { + AioContext *new_ctx; + BlockJob *job; +} BdrvStateChildJobContext; + +static void child_job_set_aio_ctx_commit(void *opaque) { - BlockJob *job = c->opaque; - GSList *l; + BdrvStateChildJobContext *s = opaque; + BlockJob *job = s->job; - for (l = job->nodes; l; l = l->next) { - BdrvChild *sibling = l->data; - if (!bdrv_child_can_set_aio_context(sibling, ctx, ignore, errp)) { - return false; - } - } - return true; + job_set_aio_context(&job->job, s->new_ctx); } -static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, - GSList **ignore) +static TransactionActionDrv change_child_job_context = { + .commit = child_job_set_aio_ctx_commit, + .clean = g_free, +}; + +static bool child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { BlockJob *job = c->opaque; + BdrvStateChildJobContext *s; GSList *l; for (l = job->nodes; l; l = l->next) { BdrvChild *sibling = l->data; - if (g_slist_find(*ignore, sibling)) { - continue; + if (!bdrv_child_change_aio_context(sibling, ctx, visited, + tran, errp)) { + return false; } - *ignore = g_slist_prepend(*ignore, sibling); - bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); } - job_set_aio_context(&job->job, ctx); + s = g_new(BdrvStateChildJobContext, 1); + *s = (BdrvStateChildJobContext) { + .new_ctx = ctx, + .job = job, + }; + + tran_add(tran, &change_child_job_context, s); + return true; } static AioContext *child_job_get_parent_aio_context(BdrvChild *c) @@ -172,8 +183,7 @@ static const BdrvChildClass child_job = { .drained_begin = child_job_drained_begin, .drained_poll = child_job_drained_poll, .drained_end = child_job_drained_end, - .can_set_aio_ctx = child_job_can_set_aio_ctx, - .set_aio_ctx = child_job_set_aio_ctx, + .change_aio_ctx = child_job_change_aio_ctx, .stay_at_node = true, .get_parent_aio_context = child_job_get_parent_aio_context, }; diff --git a/crypto/block-luks-priv.h b/crypto/block-luks-priv.h new file mode 100644 index 0000000000..90a20d432b --- /dev/null +++ b/crypto/block-luks-priv.h @@ -0,0 +1,143 @@ +/* + * QEMU Crypto block device encryption LUKS format + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/bswap.h" + +#include "block-luks.h" + +#include "crypto/hash.h" +#include "crypto/afsplit.h" +#include "crypto/pbkdf.h" +#include "crypto/secret.h" +#include "crypto/random.h" +#include "qemu/uuid.h" + +#include "qemu/coroutine.h" +#include "qemu/bitmap.h" + +/* + * Reference for the LUKS format implemented here is + * + * docs/on-disk-format.pdf + * + * in 'cryptsetup' package source code + * + * This file implements the 1.2.1 specification, dated + * Oct 16, 2011. + */ + +typedef struct QCryptoBlockLUKSHeader QCryptoBlockLUKSHeader; +typedef struct QCryptoBlockLUKSKeySlot QCryptoBlockLUKSKeySlot; + + +/* The following constants are all defined by the LUKS spec */ +#define QCRYPTO_BLOCK_LUKS_VERSION 1 + +#define QCRYPTO_BLOCK_LUKS_MAGIC_LEN 6 +#define QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN 32 +#define QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN 32 +#define QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN 32 +#define QCRYPTO_BLOCK_LUKS_DIGEST_LEN 20 +#define QCRYPTO_BLOCK_LUKS_SALT_LEN 32 +#define QCRYPTO_BLOCK_LUKS_UUID_LEN 40 +#define QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS 8 +#define QCRYPTO_BLOCK_LUKS_STRIPES 4000 +#define QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS 1000 +#define QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS 1000 +#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET 4096 + +#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED 0x0000DEAD +#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED 0x00AC71F3 + +#define QCRYPTO_BLOCK_LUKS_SECTOR_SIZE 512LL + +#define QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS 2000 +#define QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS 40 + +static const char qcrypto_block_luks_magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN] = { + 'L', 'U', 'K', 'S', 0xBA, 0xBE +}; + +/* + * This struct is written to disk in big-endian format, + * but operated upon in native-endian format. + */ +struct QCryptoBlockLUKSKeySlot { + /* state of keyslot, enabled/disable */ + uint32_t active; + /* iterations for PBKDF2 */ + uint32_t iterations; + /* salt for PBKDF2 */ + uint8_t salt[QCRYPTO_BLOCK_LUKS_SALT_LEN]; + /* start sector of key material */ + uint32_t key_offset_sector; + /* number of anti-forensic stripes */ + uint32_t stripes; +}; + +/* + * This struct is written to disk in big-endian format, + * but operated upon in native-endian format. + */ +struct QCryptoBlockLUKSHeader { + /* 'L', 'U', 'K', 'S', '0xBA', '0xBE' */ + char magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN]; + + /* LUKS version, currently 1 */ + uint16_t version; + + /* cipher name specification (aes, etc) */ + char cipher_name[QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN]; + + /* cipher mode specification (cbc-plain, xts-essiv:sha256, etc) */ + char cipher_mode[QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN]; + + /* hash specification (sha256, etc) */ + char hash_spec[QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN]; + + /* start offset of the volume data (in 512 byte sectors) */ + uint32_t payload_offset_sector; + + /* Number of key bytes */ + uint32_t master_key_len; + + /* master key checksum after PBKDF2 */ + uint8_t master_key_digest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN]; + + /* salt for master key PBKDF2 */ + uint8_t master_key_salt[QCRYPTO_BLOCK_LUKS_SALT_LEN]; + + /* iterations for master key PBKDF2 */ + uint32_t master_key_iterations; + + /* UUID of the partition in standard ASCII representation */ + uint8_t uuid[QCRYPTO_BLOCK_LUKS_UUID_LEN]; + + /* key slots */ + QCryptoBlockLUKSKeySlot key_slots[QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS]; +}; + + +void +qcrypto_block_luks_to_disk_endian(QCryptoBlockLUKSHeader *hdr); +void +qcrypto_block_luks_from_disk_endian(QCryptoBlockLUKSHeader *hdr); diff --git a/crypto/block-luks.c b/crypto/block-luks.c index f62be6836b..df2b4105d6 100644 --- a/crypto/block-luks.c +++ b/crypto/block-luks.c @@ -23,6 +23,7 @@ #include "qemu/bswap.h" #include "block-luks.h" +#include "block-luks-priv.h" #include "crypto/hash.h" #include "crypto/afsplit.h" @@ -46,37 +47,6 @@ */ typedef struct QCryptoBlockLUKS QCryptoBlockLUKS; -typedef struct QCryptoBlockLUKSHeader QCryptoBlockLUKSHeader; -typedef struct QCryptoBlockLUKSKeySlot QCryptoBlockLUKSKeySlot; - - -/* The following constants are all defined by the LUKS spec */ -#define QCRYPTO_BLOCK_LUKS_VERSION 1 - -#define QCRYPTO_BLOCK_LUKS_MAGIC_LEN 6 -#define QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN 32 -#define QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN 32 -#define QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN 32 -#define QCRYPTO_BLOCK_LUKS_DIGEST_LEN 20 -#define QCRYPTO_BLOCK_LUKS_SALT_LEN 32 -#define QCRYPTO_BLOCK_LUKS_UUID_LEN 40 -#define QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS 8 -#define QCRYPTO_BLOCK_LUKS_STRIPES 4000 -#define QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS 1000 -#define QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS 1000 -#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET 4096 - -#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED 0x0000DEAD -#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED 0x00AC71F3 - -#define QCRYPTO_BLOCK_LUKS_SECTOR_SIZE 512LL - -#define QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS 2000 -#define QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS 40 - -static const char qcrypto_block_luks_magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN] = { - 'L', 'U', 'K', 'S', 0xBA, 0xBE -}; typedef struct QCryptoBlockLUKSNameMap QCryptoBlockLUKSNameMap; struct QCryptoBlockLUKSNameMap { @@ -134,69 +104,7 @@ qcrypto_block_luks_cipher_name_map[] = { { "twofish", qcrypto_block_luks_cipher_size_map_twofish }, }; - -/* - * This struct is written to disk in big-endian format, - * but operated upon in native-endian format. - */ -struct QCryptoBlockLUKSKeySlot { - /* state of keyslot, enabled/disable */ - uint32_t active; - /* iterations for PBKDF2 */ - uint32_t iterations; - /* salt for PBKDF2 */ - uint8_t salt[QCRYPTO_BLOCK_LUKS_SALT_LEN]; - /* start sector of key material */ - uint32_t key_offset_sector; - /* number of anti-forensic stripes */ - uint32_t stripes; -}; - QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSKeySlot) != 48); - - -/* - * This struct is written to disk in big-endian format, - * but operated upon in native-endian format. - */ -struct QCryptoBlockLUKSHeader { - /* 'L', 'U', 'K', 'S', '0xBA', '0xBE' */ - char magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN]; - - /* LUKS version, currently 1 */ - uint16_t version; - - /* cipher name specification (aes, etc) */ - char cipher_name[QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN]; - - /* cipher mode specification (cbc-plain, xts-essiv:sha256, etc) */ - char cipher_mode[QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN]; - - /* hash specification (sha256, etc) */ - char hash_spec[QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN]; - - /* start offset of the volume data (in 512 byte sectors) */ - uint32_t payload_offset_sector; - - /* Number of key bytes */ - uint32_t master_key_len; - - /* master key checksum after PBKDF2 */ - uint8_t master_key_digest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN]; - - /* salt for master key PBKDF2 */ - uint8_t master_key_salt[QCRYPTO_BLOCK_LUKS_SALT_LEN]; - - /* iterations for master key PBKDF2 */ - uint32_t master_key_iterations; - - /* UUID of the partition in standard ASCII representation */ - uint8_t uuid[QCRYPTO_BLOCK_LUKS_UUID_LEN]; - - /* key slots */ - QCryptoBlockLUKSKeySlot key_slots[QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS]; -}; - QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSHeader) != 592); @@ -254,7 +162,7 @@ static int qcrypto_block_luks_cipher_name_lookup(const char *name, } } - error_setg(errp, "Algorithm %s with key size %d bytes not supported", + error_setg(errp, "Algorithm '%s' with key size %d bytes not supported", name, key_bytes); return 0; } @@ -290,7 +198,7 @@ static int qcrypto_block_luks_name_lookup(const char *name, int ret = qapi_enum_parse(map, name, -1, NULL); if (ret < 0) { - error_setg(errp, "%s %s not supported", type, name); + error_setg(errp, "%s '%s' not supported", type, name); return 0; } return ret; @@ -440,6 +348,51 @@ qcrypto_block_luks_splitkeylen_sectors(const QCryptoBlockLUKS *luks, return ROUND_UP(splitkeylen_sectors, header_sectors); } + +void +qcrypto_block_luks_to_disk_endian(QCryptoBlockLUKSHeader *hdr) +{ + size_t i; + + /* + * Everything on disk uses Big Endian (tm), so flip header fields + * before writing them + */ + cpu_to_be16s(&hdr->version); + cpu_to_be32s(&hdr->payload_offset_sector); + cpu_to_be32s(&hdr->master_key_len); + cpu_to_be32s(&hdr->master_key_iterations); + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + cpu_to_be32s(&hdr->key_slots[i].active); + cpu_to_be32s(&hdr->key_slots[i].iterations); + cpu_to_be32s(&hdr->key_slots[i].key_offset_sector); + cpu_to_be32s(&hdr->key_slots[i].stripes); + } +} + +void +qcrypto_block_luks_from_disk_endian(QCryptoBlockLUKSHeader *hdr) +{ + size_t i; + + /* + * The header is always stored in big-endian format, so + * convert everything to native + */ + be16_to_cpus(&hdr->version); + be32_to_cpus(&hdr->payload_offset_sector); + be32_to_cpus(&hdr->master_key_len); + be32_to_cpus(&hdr->master_key_iterations); + + for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { + be32_to_cpus(&hdr->key_slots[i].active); + be32_to_cpus(&hdr->key_slots[i].iterations); + be32_to_cpus(&hdr->key_slots[i].key_offset_sector); + be32_to_cpus(&hdr->key_slots[i].stripes); + } +} + /* * Stores the main LUKS header, taking care of endianess */ @@ -451,28 +404,13 @@ qcrypto_block_luks_store_header(QCryptoBlock *block, { const QCryptoBlockLUKS *luks = block->opaque; Error *local_err = NULL; - size_t i; g_autofree QCryptoBlockLUKSHeader *hdr_copy = NULL; /* Create a copy of the header */ hdr_copy = g_new0(QCryptoBlockLUKSHeader, 1); memcpy(hdr_copy, &luks->header, sizeof(QCryptoBlockLUKSHeader)); - /* - * Everything on disk uses Big Endian (tm), so flip header fields - * before writing them - */ - cpu_to_be16s(&hdr_copy->version); - cpu_to_be32s(&hdr_copy->payload_offset_sector); - cpu_to_be32s(&hdr_copy->master_key_len); - cpu_to_be32s(&hdr_copy->master_key_iterations); - - for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { - cpu_to_be32s(&hdr_copy->key_slots[i].active); - cpu_to_be32s(&hdr_copy->key_slots[i].iterations); - cpu_to_be32s(&hdr_copy->key_slots[i].key_offset_sector); - cpu_to_be32s(&hdr_copy->key_slots[i].stripes); - } + qcrypto_block_luks_to_disk_endian(hdr_copy); /* Write out the partition header and key slot headers */ writefunc(block, 0, (const uint8_t *)hdr_copy, sizeof(*hdr_copy), @@ -496,7 +434,6 @@ qcrypto_block_luks_load_header(QCryptoBlock *block, Error **errp) { int rv; - size_t i; QCryptoBlockLUKS *luks = block->opaque; /* @@ -512,21 +449,7 @@ qcrypto_block_luks_load_header(QCryptoBlock *block, return rv; } - /* - * The header is always stored in big-endian format, so - * convert everything to native - */ - be16_to_cpus(&luks->header.version); - be32_to_cpus(&luks->header.payload_offset_sector); - be32_to_cpus(&luks->header.master_key_len); - be32_to_cpus(&luks->header.master_key_iterations); - - for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) { - be32_to_cpus(&luks->header.key_slots[i].active); - be32_to_cpus(&luks->header.key_slots[i].iterations); - be32_to_cpus(&luks->header.key_slots[i].key_offset_sector); - be32_to_cpus(&luks->header.key_slots[i].stripes); - } + qcrypto_block_luks_from_disk_endian(&luks->header); return 0; } @@ -554,6 +477,36 @@ qcrypto_block_luks_check_header(const QCryptoBlockLUKS *luks, Error **errp) return -1; } + if (!memchr(luks->header.cipher_name, '\0', + sizeof(luks->header.cipher_name))) { + error_setg(errp, "LUKS header cipher name is not NUL terminated"); + return -1; + } + + if (!memchr(luks->header.cipher_mode, '\0', + sizeof(luks->header.cipher_mode))) { + error_setg(errp, "LUKS header cipher mode is not NUL terminated"); + return -1; + } + + if (!memchr(luks->header.hash_spec, '\0', + sizeof(luks->header.hash_spec))) { + error_setg(errp, "LUKS header hash spec is not NUL terminated"); + return -1; + } + + if (luks->header.payload_offset_sector < + DIV_ROUND_UP(QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET, + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) { + error_setg(errp, "LUKS payload is overlapping with the header"); + return -1; + } + + if (luks->header.master_key_iterations == 0) { + error_setg(errp, "LUKS key iteration count is zero"); + return -1; + } + /* Check all keyslots for corruption */ for (i = 0 ; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS ; i++) { @@ -564,8 +517,9 @@ qcrypto_block_luks_check_header(const QCryptoBlockLUKS *luks, Error **errp) header_sectors, slot1->stripes); - if (slot1->stripes == 0) { - error_setg(errp, "Keyslot %zu is corrupted (stripes == 0)", i); + if (slot1->stripes != QCRYPTO_BLOCK_LUKS_STRIPES) { + error_setg(errp, "Keyslot %zu is corrupted (stripes %d != %d)", + i, slot1->stripes, QCRYPTO_BLOCK_LUKS_STRIPES); return -1; } @@ -576,6 +530,20 @@ qcrypto_block_luks_check_header(const QCryptoBlockLUKS *luks, Error **errp) return -1; } + if (slot1->active == QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED && + slot1->iterations == 0) { + error_setg(errp, "Keyslot %zu iteration count is zero", i); + return -1; + } + + if (start1 < DIV_ROUND_UP(QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET, + QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) { + error_setg(errp, + "Keyslot %zu is overlapping with the LUKS header", + i); + return -1; + } + if (start1 + len1 > luks->header.payload_offset_sector) { error_setg(errp, "Keyslot %zu is overlapping with the encrypted payload", @@ -624,7 +592,7 @@ qcrypto_block_luks_parse_header(QCryptoBlockLUKS *luks, Error **errp) */ ivgen_name = strchr(cipher_mode, '-'); if (!ivgen_name) { - error_setg(errp, "Unexpected cipher mode string format %s", + error_setg(errp, "Unexpected cipher mode string format '%s'", luks->header.cipher_mode); return -1; } diff --git a/crypto/pbkdf.c b/crypto/pbkdf.c index 3775ddc6c5..8d198c152c 100644 --- a/crypto/pbkdf.c +++ b/crypto/pbkdf.c @@ -24,6 +24,11 @@ #ifndef _WIN32 #include <sys/resource.h> #endif +#ifdef CONFIG_DARWIN +#include <mach/mach_init.h> +#include <mach/thread_act.h> +#include <mach/mach_port.h> +#endif static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms, @@ -45,6 +50,24 @@ static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms, /* QuadPart is units of 100ns and we want ms as unit */ *val_ms = thread_time.QuadPart / 10000ll; return 0; +#elif defined(CONFIG_DARWIN) + mach_port_t thread; + kern_return_t kr; + mach_msg_type_number_t count; + thread_basic_info_data_t info; + + thread = mach_thread_self(); + count = THREAD_BASIC_INFO_COUNT; + kr = thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count); + mach_port_deallocate(mach_task_self(), thread); + if (kr != KERN_SUCCESS || (info.flags & TH_FLAGS_IDLE) != 0) { + error_setg_errno(errp, errno, "Unable to get thread CPU usage"); + return -1; + } + + *val_ms = ((info.user_time.seconds * 1000ll) + + (info.user_time.microseconds / 1000)); + return 0; #elif defined(RUSAGE_THREAD) struct rusage ru; if (getrusage(RUSAGE_THREAD, &ru) < 0) { diff --git a/crypto/tlscredspsk.c b/crypto/tlscredspsk.c index a4f9891274..546cad1c5a 100644 --- a/crypto/tlscredspsk.c +++ b/crypto/tlscredspsk.c @@ -109,7 +109,12 @@ qcrypto_tls_creds_psk_load(QCryptoTLSCredsPSK *creds, goto cleanup; } - gnutls_psk_set_server_credentials_file(creds->data.server, pskfile); + ret = gnutls_psk_set_server_credentials_file(creds->data.server, pskfile); + if (ret < 0) { + error_setg(errp, "Cannot set PSK server credentials: %s", + gnutls_strerror(ret)); + goto cleanup; + } gnutls_psk_set_server_dh_params(creds->data.server, creds->parent_obj.dh_params); } else { @@ -135,8 +140,13 @@ qcrypto_tls_creds_psk_load(QCryptoTLSCredsPSK *creds, goto cleanup; } - gnutls_psk_set_client_credentials(creds->data.client, - username, &key, GNUTLS_PSK_KEY_HEX); + ret = gnutls_psk_set_client_credentials(creds->data.client, + username, &key, GNUTLS_PSK_KEY_HEX); + if (ret < 0) { + error_setg(errp, "Cannot set PSK client credentials: %s", + gnutls_strerror(ret)); + goto cleanup; + } } rv = 0; diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt index aeb997bed5..343120f2ef 100644 --- a/docs/devel/multiple-iothreads.txt +++ b/docs/devel/multiple-iothreads.txt @@ -109,7 +109,7 @@ The AioContext originates from the QEMU block layer, even though nowadays AioContext is a generic event loop that can be used by any QEMU subsystem. The block layer has support for AioContext integrated. Each BlockDriverState -is associated with an AioContext using bdrv_try_set_aio_context() and +is associated with an AioContext using bdrv_try_change_aio_context() and bdrv_get_aio_context(). This allows block layer code to process I/O inside the right AioContext. Other subsystems may wish to follow a similar approach. @@ -134,5 +134,5 @@ Long-running jobs (usually in the form of coroutines) are best scheduled in the BlockDriverState's AioContext to avoid the need to acquire/release around each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier, or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends, -can be used to get a notification whenever bdrv_try_set_aio_context() moves a +can be used to get a notification whenever bdrv_try_change_aio_context() moves a BlockDriverState to a different AioContext. diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst index abea1102dc..7cc6a6b314 100644 --- a/docs/devel/reset.rst +++ b/docs/devel/reset.rst @@ -210,9 +210,11 @@ Polling the reset state Resettable interface provides the ``resettable_is_in_reset()`` function. This function returns true if the object parameter is currently under reset. -An object is under reset from the beginning of the *init* phase to the end of -the *exit* phase. During all three phases, the function will return that the -object is in reset. +An object is under reset from the beginning of the *enter* phase (before +either its children or its own enter method is called) to the *exit* +phase. During *enter* and *hold* phase only, the function will return that the +object is in reset. The state is changed after the *exit* is propagated to +its children and just before calling the object's own *exit* method. This function may be used if the object behavior has to be adapted while in reset state. For example if a device has an irq input, diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index cfb4b0768b..e3af79bb8c 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -24,6 +24,7 @@ the following architecture extensions: - FEAT_Debugv8p4 (Debug changes for v8.4) - FEAT_DotProd (Advanced SIMD dot product instructions) - FEAT_DoubleFault (Double Fault Extension) +- FEAT_E0PD (Preventing EL0 access to halves of address maps) - FEAT_ETS (Enhanced Translation Synchronization) - FEAT_FCMA (Floating-point complex number instructions) - FEAT_FHM (Floating-point half-precision multiplication instructions) @@ -32,6 +33,7 @@ the following architecture extensions: - FEAT_FlagM (Flag manipulation instructions v2) - FEAT_FlagM2 (Enhancements to flag manipulation instructions) - FEAT_GTG (Guest translation granule size) +- FEAT_HAFDBS (Hardware management of the access flag and dirty bit state) - FEAT_HCX (Support for the HCRX_EL2 register) - FEAT_HPDS (Hierarchical permission disables) - FEAT_I8MM (AArch64 Int8 matrix multiplication instructions) diff --git a/docs/system/ppc/ppce500.rst b/docs/system/ppc/ppce500.rst index ba6bcb7314..fa40e57d18 100644 --- a/docs/system/ppc/ppce500.rst +++ b/docs/system/ppc/ppce500.rst @@ -113,7 +113,7 @@ To boot the 32-bit Linux kernel: .. code-block:: bash - $ qemu-system-ppc{64|32} -M ppce500 -cpu e500mc -smp 4 -m 2G \ + $ qemu-system-ppc64 -M ppce500 -cpu e500mc -smp 4 -m 2G \ -display none -serial stdio \ -kernel vmlinux \ -initrd /path/to/rootfs.cpio \ @@ -154,10 +154,10 @@ interface at PCI address 0.1.0, but we can switch that to an e1000 NIC by: .. code-block:: bash - $ qemu-system-ppc -M ppce500 -smp 4 -m 2G \ - -display none -serial stdio \ - -bios u-boot \ - -nic tap,ifname=tap0,script=no,downscript=no,model=e1000 + $ qemu-system-ppc64 -M ppce500 -smp 4 -m 2G \ + -display none -serial stdio \ + -bios u-boot \ + -nic tap,ifname=tap0,script=no,downscript=no,model=e1000 The QEMU ``ppce500`` machine can also dynamically instantiate an eTSEC device if “-device eTSEC” is given to QEMU: @@ -165,3 +165,18 @@ if “-device eTSEC” is given to QEMU: .. code-block:: bash -netdev tap,ifname=tap0,script=no,downscript=no,id=net0 -device eTSEC,netdev=net0 + +Root file system on flash drive +------------------------------- + +Rather than using a root file system on ram disk, it is possible to have it on +CFI flash. Given an ext2 image whose size must be a power of two, it can be used +as follows: + +.. code-block:: bash + + $ qemu-system-ppc64 -M ppce500 -cpu e500mc -smp 4 -m 2G \ + -display none -serial stdio \ + -kernel vmlinux \ + -drive if=pflash,file=/path/to/rootfs.ext2,format=raw \ + -append "rootwait root=/dev/mtdblock0" diff --git a/hmp-commands.hx b/hmp-commands.hx index 12b6d4e2dc..673e39a697 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1276,7 +1276,7 @@ ERST { .name = "netdev_add", .args_type = "netdev:O", - .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user" + .params = "[user|tap|socket|stream|dgram|vde|bridge|hubport|netmap|vhost-user" #ifdef CONFIG_VMNET "|vmnet-host|vmnet-shared|vmnet-bridged" #endif diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index f8bc6d4a14..55f114ef72 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -1356,12 +1356,12 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, void *data) aspeed_soc_num_cpus(amc->soc_name); } -static void fby35_reset(MachineState *state) +static void fby35_reset(MachineState *state, ShutdownCause reason) { AspeedMachineState *bmc = ASPEED_MACHINE(state); AspeedGPIOState *gpio = &bmc->soc.gpio; - qemu_devices_reset(); + qemu_devices_reset(reason); /* Board ID: 7 (Class-1, 4 slots) */ object_property_set_bool(OBJECT(gpio), "gpioV4", true, &error_fatal); diff --git a/hw/arm/boot.c b/hw/arm/boot.c index b0b92af188..b106f31468 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -683,6 +683,8 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, * the DTB is copied again upon reset, even if addr points into RAM. */ rom_add_blob_fixed_as("dtb", fdt, size, addr, as); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr_for_as(as, addr, size)); g_free(fdt); diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c index 394192b9b2..284c09c91d 100644 --- a/hw/arm/mps2-tz.c +++ b/hw/arm/mps2-tz.c @@ -1239,7 +1239,7 @@ static void mps2_set_remap(Object *obj, const char *value, Error **errp) } } -static void mps2_machine_reset(MachineState *machine) +static void mps2_machine_reset(MachineState *machine, ShutdownCause reason) { MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine); @@ -1249,7 +1249,7 @@ static void mps2_machine_reset(MachineState *machine) * reset see the correct mapping. */ remap_memory(mms, mms->remap); - qemu_devices_reset(); + qemu_devices_reset(reason); } static void mps2tz_class_init(ObjectClass *oc, void *data) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index cda9defe8f..b871350856 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1371,14 +1371,15 @@ static void create_smmu(const VirtMachineState *vms, static void create_virtio_iommu_dt_bindings(VirtMachineState *vms) { - const char compat[] = "virtio,pci-iommu"; + const char compat[] = "virtio,pci-iommu\0pci1af4,1057"; uint16_t bdf = vms->virtio_iommu_bdf; MachineState *ms = MACHINE(vms); char *node; vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt); - node = g_strdup_printf("%s/virtio_iommu@%d", vms->pciehb_nodename, bdf); + node = g_strdup_printf("%s/virtio_iommu@%x,%x", vms->pciehb_nodename, + PCI_SLOT(bdf), PCI_FUNC(bdf)); qemu_fdt_add_subnode(ms->fdt, node); qemu_fdt_setprop(ms->fdt, node, "compatible", compat, sizeof(compat)); qemu_fdt_setprop_sized_cells(ms->fdt, node, "reg", diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 0cbc2fb4cb..9c235bf66e 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -690,7 +690,7 @@ static const MemoryRegionOps pflash_cfi01_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static void pflash_cfi01_fill_cfi_table(PFlashCFI01 *pfl) +static void pflash_cfi01_fill_cfi_table(PFlashCFI01 *pfl, Error **errp) { uint64_t blocks_per_device, sector_len_per_device, device_len; int num_devices; @@ -708,6 +708,10 @@ static void pflash_cfi01_fill_cfi_table(PFlashCFI01 *pfl) sector_len_per_device = pfl->sector_len / num_devices; } device_len = sector_len_per_device * blocks_per_device; + if (!is_power_of_2(device_len)) { + error_setg(errp, "Device size must be a power of two."); + return; + } /* Hardcoded CFI table */ /* Standard "QRY" string */ @@ -865,7 +869,7 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) */ pfl->cmd = 0x00; pfl->status = 0x80; /* WSM ready */ - pflash_cfi01_fill_cfi_table(pfl); + pflash_cfi01_fill_cfi_table(pfl, errp); } static void pflash_cfi01_system_reset(DeviceState *dev) diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index 2a99b286b0..ff2fe154c1 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -880,6 +880,11 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) return; } + if (!is_power_of_2(pfl->chip_len)) { + error_setg(errp, "Device size must be a power of two."); + return; + } + memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), &pflash_cfi02_ops, pfl, pfl->name, pfl->chip_len, errp); diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 84902dde17..13bf5cc47a 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -97,6 +97,10 @@ static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); Error *local_err = NULL; + if (!dev->started) { + return 0; + } + ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg, vdev->config_len, &local_err); if (ret < 0) { diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 8131ec2dbc..f717550fdc 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -21,6 +21,7 @@ #include "hw/block/block.h" #include "hw/qdev-properties.h" #include "sysemu/blockdev.h" +#include "sysemu/block-ram-registrar.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" #include "hw/virtio/virtio-blk.h" @@ -362,12 +363,14 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) } } -static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, +static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb, int start, int num_reqs, int niov) { + BlockBackend *blk = s->blk; QEMUIOVector *qiov = &mrb->reqs[start]->qiov; int64_t sector_num = mrb->reqs[start]->sector_num; bool is_write = mrb->is_write; + BdrvRequestFlags flags = 0; if (num_reqs > 1) { int i; @@ -398,12 +401,18 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, num_reqs - 1); } + if (blk_ram_registrar_ok(&s->blk_ram_registrar)) { + flags |= BDRV_REQ_REGISTERED_BUF; + } + if (is_write) { - blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0, - virtio_blk_rw_complete, mrb->reqs[start]); + blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, + flags, virtio_blk_rw_complete, + mrb->reqs[start]); } else { - blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0, - virtio_blk_rw_complete, mrb->reqs[start]); + blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, + flags, virtio_blk_rw_complete, + mrb->reqs[start]); } } @@ -425,14 +434,14 @@ static int multireq_compare(const void *a, const void *b) } } -static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) +static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb) { int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; uint32_t max_transfer; int64_t sector_num = 0; if (mrb->num_reqs == 1) { - submit_requests(blk, mrb, 0, 1, -1); + submit_requests(s, mrb, 0, 1, -1); mrb->num_reqs = 0; return; } @@ -452,11 +461,11 @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) * 3. merge would exceed maximum transfer length of backend device */ if (sector_num + nb_sectors != req->sector_num || - niov > blk_get_max_iov(blk) - req->qiov.niov || + niov > blk_get_max_iov(s->blk) - req->qiov.niov || req->qiov.size > max_transfer || nb_sectors > (max_transfer - req->qiov.size) / BDRV_SECTOR_SIZE) { - submit_requests(blk, mrb, start, num_reqs, niov); + submit_requests(s, mrb, start, num_reqs, niov); num_reqs = 0; } } @@ -472,7 +481,7 @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) num_reqs++; } - submit_requests(blk, mrb, start, num_reqs, niov); + submit_requests(s, mrb, start, num_reqs, niov); mrb->num_reqs = 0; } @@ -487,7 +496,7 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) * Make sure all outstanding writes are posted to the backing device. */ if (mrb->is_write && mrb->num_reqs > 0) { - virtio_blk_submit_multireq(s->blk, mrb); + virtio_blk_submit_multireq(s, mrb); } blk_aio_flush(s->blk, virtio_blk_flush_complete, req); } @@ -667,7 +676,7 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || is_write != mrb->is_write || !s->conf.request_merging)) { - virtio_blk_submit_multireq(s->blk, mrb); + virtio_blk_submit_multireq(s, mrb); } assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); @@ -774,7 +783,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) } while (!virtio_queue_empty(vq)); if (mrb.num_reqs) { - virtio_blk_submit_multireq(s->blk, &mrb); + virtio_blk_submit_multireq(s, &mrb); } blk_io_unplug(s->blk); @@ -823,7 +832,7 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) } if (mrb.num_reqs) { - virtio_blk_submit_multireq(s->blk, &mrb); + virtio_blk_submit_multireq(s, &mrb); } if (is_bh) { blk_dec_in_flight(s->conf.conf.blk); @@ -1205,6 +1214,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); + blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); blk_set_dev_ops(s->blk, &virtio_block_ops, s); blk_iostatus_enable(s->blk); @@ -1230,6 +1240,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, i); } qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); + blk_ram_registrar_destroy(&s->blk_ram_registrar); qemu_del_vm_change_state_handler(s->change); blockdev_mark_auto_del(s->blk); virtio_cleanup(vdev); diff --git a/hw/core/numa.c b/hw/core/numa.c index 26d8e5f616..ea24a5fa8c 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -822,6 +822,19 @@ static int ram_block_notify_add_single(RAMBlock *rb, void *opaque) return 0; } +static int ram_block_notify_remove_single(RAMBlock *rb, void *opaque) +{ + const ram_addr_t max_size = qemu_ram_get_max_length(rb); + const ram_addr_t size = qemu_ram_get_used_length(rb); + void *host = qemu_ram_get_host_addr(rb); + RAMBlockNotifier *notifier = opaque; + + if (host) { + notifier->ram_block_removed(notifier, host, size, max_size); + } + return 0; +} + void ram_block_notifier_add(RAMBlockNotifier *n) { QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); @@ -835,13 +848,18 @@ void ram_block_notifier_add(RAMBlockNotifier *n) void ram_block_notifier_remove(RAMBlockNotifier *n) { QLIST_REMOVE(n, next); + + if (n->ram_block_removed) { + qemu_ram_foreach_block(ram_block_notify_remove_single, n); + } } void ram_block_notify_add(void *host, size_t size, size_t max_size) { RAMBlockNotifier *notifier; + RAMBlockNotifier *next; - QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) { if (notifier->ram_block_added) { notifier->ram_block_added(notifier, host, size, max_size); } @@ -851,8 +869,9 @@ void ram_block_notify_add(void *host, size_t size, size_t max_size) void ram_block_notify_remove(void *host, size_t size, size_t max_size) { RAMBlockNotifier *notifier; + RAMBlockNotifier *next; - QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) { if (notifier->ram_block_removed) { notifier->ram_block_removed(notifier, host, size, max_size); } @@ -862,8 +881,9 @@ void ram_block_notify_remove(void *host, size_t size, size_t max_size) void ram_block_notify_resize(void *host, size_t old_size, size_t new_size) { RAMBlockNotifier *notifier; + RAMBlockNotifier *next; - QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) { if (notifier->ram_block_resized) { notifier->ram_block_resized(notifier, host, old_size, new_size); } diff --git a/hw/core/reset.c b/hw/core/reset.c index 36be82c491..d3263b613e 100644 --- a/hw/core/reset.c +++ b/hw/core/reset.c @@ -33,6 +33,7 @@ typedef struct QEMUResetEntry { QTAILQ_ENTRY(QEMUResetEntry) entry; QEMUResetHandler *func; void *opaque; + bool skip_on_snapshot_load; } QEMUResetEntry; static QTAILQ_HEAD(, QEMUResetEntry) reset_handlers = @@ -47,6 +48,16 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque) QTAILQ_INSERT_TAIL(&reset_handlers, re, entry); } +void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque) +{ + QEMUResetEntry *re = g_new0(QEMUResetEntry, 1); + + re->func = func; + re->opaque = opaque; + re->skip_on_snapshot_load = true; + QTAILQ_INSERT_TAIL(&reset_handlers, re, entry); +} + void qemu_unregister_reset(QEMUResetHandler *func, void *opaque) { QEMUResetEntry *re; @@ -60,12 +71,16 @@ void qemu_unregister_reset(QEMUResetHandler *func, void *opaque) } } -void qemu_devices_reset(void) +void qemu_devices_reset(ShutdownCause reason) { QEMUResetEntry *re, *nre; /* reset all devices */ QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) { + if (reason == SHUTDOWN_CAUSE_SNAPSHOT_LOAD && + re->skip_on_snapshot_load) { + continue; + } re->func(re->opaque); } } diff --git a/hw/core/resettable.c b/hw/core/resettable.c index 96a99ce39e..c3df75c6ba 100644 --- a/hw/core/resettable.c +++ b/hw/core/resettable.c @@ -201,12 +201,11 @@ static void resettable_phase_exit(Object *obj, void *opaque, ResetType type) resettable_child_foreach(rc, obj, resettable_phase_exit, NULL, type); assert(s->count > 0); - if (s->count == 1) { + if (--s->count == 0) { trace_resettable_phase_exit_exec(obj, obj_typename, !!rc->phases.exit); if (rc->phases.exit && !resettable_get_tr_func(rc, obj)) { rc->phases.exit(obj); } - s->count = 0; } s->exit_phase_in_progress = false; trace_resettable_phase_exit_end(obj, obj_typename, s->count); diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index e53d5f0fa7..19ea7c2c66 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -411,12 +411,12 @@ static void machine_hppa_init(MachineState *machine) cpu[0]->env.gr[19] = FW_CFG_IO_BASE; } -static void hppa_machine_reset(MachineState *ms) +static void hppa_machine_reset(MachineState *ms, ShutdownCause reason) { unsigned int smp_cpus = ms->smp.cpus; int i; - qemu_devices_reset(); + qemu_devices_reset(reason); /* Start all CPUs at the firmware entry point. * Monarch CPU will initialize firmware, secondary CPUs diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 4a1b59cb9d..57b402b956 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -157,7 +157,7 @@ void hyperv_synic_reset(CPUState *cs) SynICState *synic = get_synic(cs); if (synic) { - device_legacy_reset(DEVICE(synic)); + device_cold_reset(DEVICE(synic)); } } diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 52f9aa9d8c..ffd1884100 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -467,7 +467,7 @@ static void microvm_machine_state_init(MachineState *machine) microvm_devices_init(mms); } -static void microvm_machine_reset(MachineState *machine) +static void microvm_machine_reset(MachineState *machine, ShutdownCause reason) { MicrovmMachineState *mms = MICROVM_MACHINE(machine); CPUState *cs; @@ -480,7 +480,7 @@ static void microvm_machine_reset(MachineState *machine) mms->kernel_cmdline_fixed = true; } - qemu_devices_reset(); + qemu_devices_reset(reason); CPU_FOREACH(cs) { cpu = X86_CPU(cs); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 768982ae9a..3e86083db3 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1847,12 +1847,12 @@ static void pc_machine_initfn(Object *obj) cxl_machine_init(obj, &pcms->cxl_devices_state); } -static void pc_machine_reset(MachineState *machine) +static void pc_machine_reset(MachineState *machine, ShutdownCause reason) { CPUState *cs; X86CPU *cpu; - qemu_devices_reset(); + qemu_devices_reset(reason); /* Reset APIC after devices have been reset to cancel * any changes that qemu_devices_reset() might have done. @@ -1867,7 +1867,7 @@ static void pc_machine_reset(MachineState *machine) static void pc_machine_wakeup(MachineState *machine) { cpu_synchronize_all_states(); - pc_machine_reset(machine); + pc_machine_reset(machine, SHUTDOWN_CAUSE_NONE); cpu_synchronize_all_post_reset(); } diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 1148f70c03..bd50a064a3 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1111,7 +1111,7 @@ void x86_load_linux(X86MachineState *x86ms, setup_data->type = cpu_to_le32(SETUP_RNG_SEED); setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); - qemu_register_reset(reset_rng_seed, setup_data); + qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data); fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL, setup_data, kernel, kernel_size, true); } else { diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c index e09e244ddc..9d52ca6613 100644 --- a/hw/m68k/q800.c +++ b/hw/m68k/q800.c @@ -321,27 +321,23 @@ static const TypeInfo glue_info = { }, }; -typedef struct { - M68kCPU *cpu; - struct bi_record *rng_seed; -} ResetInfo; - static void main_cpu_reset(void *opaque) { - ResetInfo *reset_info = opaque; - M68kCPU *cpu = reset_info->cpu; + M68kCPU *cpu = opaque; CPUState *cs = CPU(cpu); - if (reset_info->rng_seed) { - qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2, - be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data)); - } - cpu_reset(cs); cpu->env.aregs[7] = ldl_phys(cs->as, 0); cpu->env.pc = ldl_phys(cs->as, 4); } +static void rerandomize_rng_seed(void *opaque) +{ + struct bi_record *rng_seed = opaque; + qemu_guest_getrandom_nofail((void *)rng_seed->data + 2, + be16_to_cpu(*(uint16_t *)rng_seed->data)); +} + static uint8_t fake_mac_rom[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -397,7 +393,6 @@ static void q800_init(MachineState *machine) NubusBus *nubus; DeviceState *glue; DriveInfo *dinfo; - ResetInfo *reset_info; uint8_t rng_seed[32]; linux_boot = (kernel_filename != NULL); @@ -408,12 +403,9 @@ static void q800_init(MachineState *machine) exit(1); } - reset_info = g_new0(ResetInfo, 1); - /* init CPUs */ cpu = M68K_CPU(cpu_create(machine->cpu_type)); - reset_info->cpu = cpu; - qemu_register_reset(main_cpu_reset, reset_info); + qemu_register_reset(main_cpu_reset, cpu); /* RAM */ memory_region_add_subregion(get_system_memory(), 0, machine->ram); @@ -687,9 +679,10 @@ static void q800_init(MachineState *machine) BOOTINFO0(param_ptr, BI_LAST); rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob, parameters_base, cs->as); - reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base, - param_ptr - param_blob) + - (param_rng_seed - param_blob); + qemu_register_reset_nosnapshotload(rerandomize_rng_seed, + rom_ptr_for_as(cs->as, parameters_base, + param_ptr - param_blob) + + (param_rng_seed - param_blob)); g_free(param_blob); } else { uint8_t *ptr; diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index 89c4108eb5..da5eafd275 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -89,7 +89,6 @@ typedef struct { M68kCPU *cpu; hwaddr initial_pc; hwaddr initial_stack; - struct bi_record *rng_seed; } ResetInfo; static void main_cpu_reset(void *opaque) @@ -98,16 +97,18 @@ static void main_cpu_reset(void *opaque) M68kCPU *cpu = reset_info->cpu; CPUState *cs = CPU(cpu); - if (reset_info->rng_seed) { - qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2, - be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data)); - } - cpu_reset(cs); cpu->env.aregs[7] = reset_info->initial_stack; cpu->env.pc = reset_info->initial_pc; } +static void rerandomize_rng_seed(void *opaque) +{ + struct bi_record *rng_seed = opaque; + qemu_guest_getrandom_nofail((void *)rng_seed->data + 2, + be16_to_cpu(*(uint16_t *)rng_seed->data)); +} + static void virt_init(MachineState *machine) { M68kCPU *cpu = NULL; @@ -289,9 +290,10 @@ static void virt_init(MachineState *machine) BOOTINFO0(param_ptr, BI_LAST); rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob, parameters_base, cs->as); - reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base, - param_ptr - param_blob) + - (param_rng_seed - param_blob); + qemu_register_reset_nosnapshotload(rerandomize_rng_seed, + rom_ptr_for_as(cs->as, parameters_base, + param_ptr - param_blob) + + (param_rng_seed - param_blob)); g_free(param_blob); } } diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 7c7d777781..31080c22c9 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -149,7 +149,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp) if (!nvdimm->unarmed && memory_region_is_rom(mr)) { HostMemoryBackend *hostmem = dimm->hostmem; - error_setg(errp, "'unarmed' property must be off since memdev %s " + error_setg(errp, "'unarmed' property must be 'on' since memdev %s " "is read-only", object_get_canonical_path_component(OBJECT(hostmem))); return; diff --git a/hw/mips/boston.c b/hw/mips/boston.c index d2ab9da1a0..cab63f43bf 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -41,6 +41,7 @@ #include "sysemu/sysemu.h" #include "sysemu/qtest.h" #include "sysemu/runstate.h" +#include "sysemu/reset.h" #include <libfdt.h> #include "qom/object.h" @@ -810,6 +811,8 @@ static void boston_mach_init(MachineState *machine) /* Calculate real fdt size after filter */ dt_size = fdt_totalsize(dtb_load_data); rom_add_blob_fixed("dtb", dtb_load_data, dt_size, dtb_paddr); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr(dtb_paddr, dt_size)); } else { /* Try to load file as FIT */ fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s); diff --git a/hw/mips/malta.c b/hw/mips/malta.c index 0e932988e0..7c3ad0974b 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -26,6 +26,7 @@ #include "qemu/units.h" #include "qemu/bitops.h" #include "qemu/datadir.h" +#include "qemu/guest-random.h" #include "hw/clock.h" #include "hw/southbridge/piix.h" #include "hw/isa/superio.h" @@ -1017,6 +1018,17 @@ static void G_GNUC_PRINTF(3, 4) prom_set(uint32_t *prom_buf, int index, va_end(ap); } +static void reinitialize_rng_seed(void *opaque) +{ + char *rng_seed_hex = opaque; + uint8_t rng_seed[32]; + + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + for (size_t i = 0; i < sizeof(rng_seed); ++i) { + sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]); + } +} + /* Kernel */ static uint64_t load_kernel(void) { @@ -1028,6 +1040,9 @@ static uint64_t load_kernel(void) long prom_size; int prom_index = 0; uint64_t (*xlate_to_kseg0) (void *opaque, uint64_t addr); + uint8_t rng_seed[32]; + char rng_seed_hex[sizeof(rng_seed) * 2 + 1]; + size_t rng_seed_prom_offset; #if TARGET_BIG_ENDIAN big_endian = 1; @@ -1115,9 +1130,21 @@ static uint64_t load_kernel(void) prom_set(prom_buf, prom_index++, "modetty0"); prom_set(prom_buf, prom_index++, "38400n8r"); + + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + for (size_t i = 0; i < sizeof(rng_seed); ++i) { + sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]); + } + prom_set(prom_buf, prom_index++, "rngseed"); + rng_seed_prom_offset = prom_index * ENVP_ENTRY_SIZE + + sizeof(uint32_t) * ENVP_NB_ENTRIES; + prom_set(prom_buf, prom_index++, "%s", rng_seed_hex); + prom_set(prom_buf, prom_index++, NULL); rom_add_blob_fixed("prom", prom_buf, prom_size, ENVP_PADDR); + qemu_register_reset_nosnapshotload(reinitialize_rng_seed, + rom_ptr(ENVP_PADDR, prom_size) + rng_seed_prom_offset); g_free(prom_buf); return kernel_entry; diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h index ae99f58bab..9d423f6c09 100644 --- a/hw/net/e1000_regs.h +++ b/hw/net/e1000_regs.h @@ -793,6 +793,7 @@ #define E1000_CTRL_EXT_ASDCHK 0x00001000 /* auto speed detection check */ #define E1000_CTRL_EXT_EE_RST 0x00002000 /* EEPROM reset */ #define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ +#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ #define E1000_CTRL_EXT_EIAME 0x01000000 #define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ #define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index e9f696b4cf..b6903aea54 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2526,6 +2526,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q = virtio_net_get_subqueue(nc); VirtIODevice *vdev = VIRTIO_DEVICE(n); + int ret; virtqueue_push(q->tx_vq, q->async_tx.elem, 0); virtio_notify(vdev, q->tx_vq); @@ -2534,7 +2535,22 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) q->async_tx.elem = NULL; virtio_queue_set_notification(q->tx_vq, 1); - virtio_net_flush_tx(q); + ret = virtio_net_flush_tx(q); + if (ret >= n->tx_burst) { + /* + * the flush has been stopped by tx_burst + * we will not receive notification for the + * remainining part, so re-schedule + */ + virtio_queue_set_notification(q->tx_vq, 0); + if (q->tx_bh) { + qemu_bh_schedule(q->tx_bh); + } else { + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + } + q->tx_waiting = 1; + } } /* TX */ @@ -2633,6 +2649,8 @@ drop: return num_packets; } +static void virtio_net_tx_timer(void *opaque); + static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) { VirtIONet *n = VIRTIO_NET(vdev); @@ -2650,15 +2668,13 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) } if (q->tx_waiting) { - virtio_queue_set_notification(vq, 1); + /* We already have queued packets, immediately flush */ timer_del(q->tx_timer); - q->tx_waiting = 0; - if (virtio_net_flush_tx(q) == -EINVAL) { - return; - } + virtio_net_tx_timer(q); } else { + /* re-arm timer to flush it (and more) on next tick */ timer_mod(q->tx_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); q->tx_waiting = 1; virtio_queue_set_notification(vq, 0); } @@ -2691,6 +2707,8 @@ static void virtio_net_tx_timer(void *opaque) VirtIONetQueue *q = opaque; VirtIONet *n = q->n; VirtIODevice *vdev = VIRTIO_DEVICE(n); + int ret; + /* This happens when device was stopped but BH wasn't. */ if (!vdev->vm_running) { /* Make sure tx waiting is set, so we'll run when restarted. */ @@ -2705,8 +2723,33 @@ static void virtio_net_tx_timer(void *opaque) return; } + ret = virtio_net_flush_tx(q); + if (ret == -EBUSY || ret == -EINVAL) { + return; + } + /* + * If we flush a full burst of packets, assume there are + * more coming and immediately rearm + */ + if (ret >= n->tx_burst) { + q->tx_waiting = 1; + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + return; + } + /* + * If less than a full burst, re-enable notification and flush + * anything that may have come in while we weren't looking. If + * we find something, assume the guest is still active and rearm + */ virtio_queue_set_notification(q->tx_vq, 1); - virtio_net_flush_tx(q); + ret = virtio_net_flush_tx(q); + if (ret > 0) { + virtio_queue_set_notification(q->tx_vq, 0); + q->tx_waiting = 1; + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + } } static void virtio_net_tx_bh(void *opaque) diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 5c815b4f0c..7d92c2d022 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -296,9 +296,8 @@ static int net_init(struct XenLegacyDevice *xendev) netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf, "xen", NULL, netdev); - snprintf(qemu_get_queue(netdev->nic)->info_str, - sizeof(qemu_get_queue(netdev->nic)->info_str), - "nic: xenbus vif macaddr=%s", netdev->mac); + qemu_set_info_str(qemu_get_queue(netdev->nic), + "nic: xenbus vif macaddr=%s", netdev->mac); /* fill info */ xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1); diff --git a/hw/openrisc/boot.c b/hw/openrisc/boot.c index 128ccbcba2..007e80cd5a 100644 --- a/hw/openrisc/boot.c +++ b/hw/openrisc/boot.c @@ -14,6 +14,7 @@ #include "hw/openrisc/boot.h" #include "sysemu/device_tree.h" #include "sysemu/qtest.h" +#include "sysemu/reset.h" #include <libfdt.h> @@ -111,6 +112,8 @@ uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start, rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr, &address_space_memory); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); return fdt_addr; } diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index 791fe78a50..769a1ead1c 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -126,6 +126,7 @@ config E500 select ETSEC select GPIO_MPC8XXX select OPENPIC + select PFLASH_CFI01 select PLATFORM_BUS select PPCE500_PCI select SERIAL diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 3e950ea3ba..2fe496677c 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -23,8 +23,10 @@ #include "e500-ccsr.h" #include "net/net.h" #include "qemu/config-file.h" +#include "hw/block/flash.h" #include "hw/char/serial.h" #include "hw/pci/pci.h" +#include "sysemu/block-backend-io.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" @@ -267,6 +269,31 @@ static void sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque) } } +static void create_devtree_flash(SysBusDevice *sbdev, + PlatformDevtreeData *data) +{ + g_autofree char *name = NULL; + uint64_t num_blocks = object_property_get_uint(OBJECT(sbdev), + "num-blocks", + &error_fatal); + uint64_t sector_length = object_property_get_uint(OBJECT(sbdev), + "sector-length", + &error_fatal); + uint64_t bank_width = object_property_get_uint(OBJECT(sbdev), + "width", + &error_fatal); + hwaddr flashbase = 0; + hwaddr flashsize = num_blocks * sector_length; + void *fdt = data->fdt; + + name = g_strdup_printf("%s/nor@%" PRIx64, data->node, flashbase); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "cfi-flash"); + qemu_fdt_setprop_sized_cells(fdt, name, "reg", + 1, flashbase, 1, flashsize); + qemu_fdt_setprop_cell(fdt, name, "bank-width", bank_width); +} + static void platform_bus_create_devtree(PPCE500MachineState *pms, void *fdt, const char *mpic) { @@ -276,6 +303,8 @@ static void platform_bus_create_devtree(PPCE500MachineState *pms, uint64_t addr = pmc->platform_bus_base; uint64_t size = pmc->platform_bus_size; int irq_start = pmc->platform_bus_first_irq; + SysBusDevice *sbdev; + bool ambiguous; /* Create a /platform node that we can put all devices into */ @@ -302,6 +331,13 @@ static void platform_bus_create_devtree(PPCE500MachineState *pms, /* Loop through all dynamic sysbus devices and create nodes for them */ foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data); + sbdev = SYS_BUS_DEVICE(object_resolve_path_type("", TYPE_PFLASH_CFI01, + &ambiguous)); + if (sbdev) { + assert(!ambiguous); + create_devtree_flash(sbdev, &data); + } + g_free(node); } @@ -856,6 +892,7 @@ void ppce500_init(MachineState *machine) unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4}; IrqLines *irqs; DeviceState *dev, *mpicdev; + DriveInfo *dinfo; CPUPPCState *firstenv = NULL; MemoryRegion *ccsr_addr_space; SysBusDevice *s; @@ -1024,6 +1061,48 @@ void ppce500_init(MachineState *machine) pmc->platform_bus_base, &pms->pbus_dev->mmio); + dinfo = drive_get(IF_PFLASH, 0, 0); + if (dinfo) { + BlockBackend *blk = blk_by_legacy_dinfo(dinfo); + BlockDriverState *bs = blk_bs(blk); + uint64_t mmio_size = memory_region_size(&pms->pbus_dev->mmio); + uint64_t size = bdrv_getlength(bs); + uint32_t sector_len = 64 * KiB; + + if (!is_power_of_2(size)) { + error_report("Size of pflash file must be a power of two."); + exit(1); + } + + if (size > mmio_size) { + error_report("Size of pflash file must not be bigger than %" PRIu64 + " bytes.", mmio_size); + exit(1); + } + + if (!QEMU_IS_ALIGNED(size, sector_len)) { + error_report("Size of pflash file must be a multiple of %" PRIu32 + ".", sector_len); + exit(1); + } + + dev = qdev_new(TYPE_PFLASH_CFI01); + qdev_prop_set_drive(dev, "drive", blk); + qdev_prop_set_uint32(dev, "num-blocks", size / sector_len); + qdev_prop_set_uint64(dev, "sector-length", sector_len); + qdev_prop_set_uint8(dev, "width", 2); + qdev_prop_set_bit(dev, "big-endian", true); + qdev_prop_set_uint16(dev, "id0", 0x89); + qdev_prop_set_uint16(dev, "id1", 0x18); + qdev_prop_set_uint16(dev, "id2", 0x0000); + qdev_prop_set_uint16(dev, "id3", 0x0); + qdev_prop_set_string(dev, "name", "e500.flash"); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + memory_region_add_subregion(&pms->pbus_dev->mmio, 0, + pflash_cfi01_get_memory(PFLASH_CFI01(dev))); + } + /* * Smart firmware defaults ahead! * diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index 32babc9b48..c927337da0 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -59,8 +59,9 @@ ppc_ss.add(when: 'CONFIG_PPC440', if_true: files( 'ppc440_bamboo.c', 'ppc440_pcix.c', 'ppc440_uc.c')) ppc_ss.add(when: 'CONFIG_PPC4XX', if_true: files( + 'ppc4xx_devs.c', 'ppc4xx_pci.c', - 'ppc4xx_devs.c')) + 'ppc4xx_sdram.c')) ppc_ss.add(when: 'CONFIG_SAM460EX', if_true: files('sam460ex.c')) # PReP ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep.c')) diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index ecf682b148..bb4d008ba9 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -248,14 +248,14 @@ static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus, pegasos2_mv_reg_write(pm, pcicfg + 4, len, val); } -static void pegasos2_machine_reset(MachineState *machine) +static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) { Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); void *fdt; uint64_t d[2]; int sz; - qemu_devices_reset(); + qemu_devices_reset(reason); if (!pm->vof) { return; /* Firmware should set up machine so nothing to do */ } diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 40bb573d1a..3d01e26f84 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -643,13 +643,13 @@ static void pnv_powerdown_notify(Notifier *n, void *opaque) } } -static void pnv_reset(MachineState *machine) +static void pnv_reset(MachineState *machine, ShutdownCause reason) { PnvMachineState *pnv = PNV_MACHINE(machine); IPMIBmc *bmc; void *fdt; - qemu_devices_reset(); + qemu_devices_reset(reason); /* * The machine should provide by default an internal BMC simulator. diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 19e8eb885f..9ee79192dd 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -58,6 +58,7 @@ static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) env->msr |= MSR_HVB; /* Hypervisor mode */ env->spr[SPR_HRMOR] = pc->hrmor; hreg_compute_hflags(env); + ppc_maybe_interrupt(env); pcc->intc_reset(pc->chip, cpu); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 690f448cb9..dc86c1c7db 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -40,9 +40,8 @@ static void cpu_ppc_tb_stop (CPUPPCState *env); static void cpu_ppc_tb_start (CPUPPCState *env); -void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level) +void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) { - CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; unsigned int old_pending; bool locked = false; @@ -56,21 +55,17 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level) old_pending = env->pending_interrupts; if (level) { - env->pending_interrupts |= 1 << n_IRQ; - cpu_interrupt(cs, CPU_INTERRUPT_HARD); + env->pending_interrupts |= irq; } else { - env->pending_interrupts &= ~(1 << n_IRQ); - if (env->pending_interrupts == 0) { - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } + env->pending_interrupts &= ~irq; } if (old_pending != env->pending_interrupts) { - kvmppc_set_interrupt(cpu, n_IRQ, level); + ppc_maybe_interrupt(env); + kvmppc_set_interrupt(cpu, irq, level); } - - trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts, + trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts, CPU(cpu)->interrupt_request); if (locked) { diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c index 5fbf44009e..651263926e 100644 --- a/hw/ppc/ppc440_uc.c +++ b/hw/ppc/ppc440_uc.c @@ -10,21 +10,14 @@ #include "qemu/osdep.h" #include "qemu/units.h" -#include "qemu/error-report.h" #include "qapi/error.h" #include "qemu/log.h" -#include "qemu/module.h" #include "hw/irq.h" -#include "exec/memory.h" -#include "cpu.h" #include "hw/ppc/ppc4xx.h" #include "hw/qdev-properties.h" #include "hw/pci/pci.h" -#include "sysemu/block-backend.h" #include "sysemu/reset.h" #include "ppc440.h" -#include "qom/object.h" -#include "trace.h" /*****************************************************************************/ /* L2 Cache as SRAM */ @@ -479,331 +472,6 @@ void ppc4xx_sdr_init(CPUPPCState *env) } /*****************************************************************************/ -/* SDRAM controller */ -enum { - SDRAM0_CFGADDR = 0x10, - SDRAM0_CFGDATA, - SDRAM_R0BAS = 0x40, - SDRAM_R1BAS, - SDRAM_R2BAS, - SDRAM_R3BAS, - SDRAM_CONF1HB = 0x45, - SDRAM_PLBADDULL = 0x4a, - SDRAM_CONF1LL = 0x4b, - SDRAM_CONFPATHB = 0x4f, - SDRAM_PLBADDUHB = 0x50, -}; - -static uint32_t sdram_ddr2_bcr(hwaddr ram_base, hwaddr ram_size) -{ - uint32_t bcr; - - switch (ram_size) { - case 8 * MiB: - bcr = 0xffc0; - break; - case 16 * MiB: - bcr = 0xff80; - break; - case 32 * MiB: - bcr = 0xff00; - break; - case 64 * MiB: - bcr = 0xfe00; - break; - case 128 * MiB: - bcr = 0xfc00; - break; - case 256 * MiB: - bcr = 0xf800; - break; - case 512 * MiB: - bcr = 0xf000; - break; - case 1 * GiB: - bcr = 0xe000; - break; - case 2 * GiB: - bcr = 0xc000; - break; - case 4 * GiB: - bcr = 0x8000; - break; - default: - error_report("invalid RAM size " TARGET_FMT_plx, ram_size); - return 0; - } - bcr |= ram_base >> 2 & 0xffe00000; - bcr |= 1; - - return bcr; -} - -static inline hwaddr sdram_ddr2_base(uint32_t bcr) -{ - return (bcr & 0xffe00000) << 2; -} - -static uint64_t sdram_ddr2_size(uint32_t bcr) -{ - uint64_t size; - int sh; - - sh = 1024 - ((bcr >> 6) & 0x3ff); - size = 8 * MiB * sh; - - return size; -} - -static void sdram_bank_map(Ppc4xxSdramBank *bank) -{ - memory_region_init(&bank->container, NULL, "sdram-container", bank->size); - memory_region_add_subregion(&bank->container, 0, &bank->ram); - memory_region_add_subregion(get_system_memory(), bank->base, - &bank->container); -} - -static void sdram_bank_unmap(Ppc4xxSdramBank *bank) -{ - memory_region_del_subregion(get_system_memory(), &bank->container); - memory_region_del_subregion(&bank->container, &bank->ram); - object_unparent(OBJECT(&bank->container)); -} - -static void sdram_ddr2_set_bcr(Ppc4xxSdramDdr2State *sdram, int i, - uint32_t bcr, int enabled) -{ - if (sdram->bank[i].bcr & 1) { - /* First unmap RAM if enabled */ - trace_ppc4xx_sdram_unmap(sdram_ddr2_base(sdram->bank[i].bcr), - sdram_ddr2_size(sdram->bank[i].bcr)); - sdram_bank_unmap(&sdram->bank[i]); - } - sdram->bank[i].bcr = bcr & 0xffe0ffc1; - if (enabled && (bcr & 1)) { - trace_ppc4xx_sdram_map(sdram_ddr2_base(bcr), sdram_ddr2_size(bcr)); - sdram_bank_map(&sdram->bank[i]); - } -} - -static void sdram_ddr2_map_bcr(Ppc4xxSdramDdr2State *sdram) -{ - int i; - - for (i = 0; i < sdram->nbanks; i++) { - if (sdram->bank[i].size) { - sdram_ddr2_set_bcr(sdram, i, - sdram_ddr2_bcr(sdram->bank[i].base, - sdram->bank[i].size), 1); - } else { - sdram_ddr2_set_bcr(sdram, i, 0, 0); - } - } -} - -static void sdram_ddr2_unmap_bcr(Ppc4xxSdramDdr2State *sdram) -{ - int i; - - for (i = 0; i < sdram->nbanks; i++) { - if (sdram->bank[i].size) { - sdram_ddr2_set_bcr(sdram, i, sdram->bank[i].bcr & ~1, 0); - } - } -} - -static uint32_t sdram_ddr2_dcr_read(void *opaque, int dcrn) -{ - Ppc4xxSdramDdr2State *sdram = opaque; - uint32_t ret = 0; - - switch (dcrn) { - case SDRAM_R0BAS: - case SDRAM_R1BAS: - case SDRAM_R2BAS: - case SDRAM_R3BAS: - if (sdram->bank[dcrn - SDRAM_R0BAS].size) { - ret = sdram_ddr2_bcr(sdram->bank[dcrn - SDRAM_R0BAS].base, - sdram->bank[dcrn - SDRAM_R0BAS].size); - } - break; - case SDRAM_CONF1HB: - case SDRAM_CONF1LL: - case SDRAM_CONFPATHB: - case SDRAM_PLBADDULL: - case SDRAM_PLBADDUHB: - break; - case SDRAM0_CFGADDR: - ret = sdram->addr; - break; - case SDRAM0_CFGDATA: - switch (sdram->addr) { - case 0x14: /* SDRAM_MCSTAT (405EX) */ - case 0x1F: - ret = 0x80000000; - break; - case 0x21: /* SDRAM_MCOPT2 */ - ret = sdram->mcopt2; - break; - case 0x40: /* SDRAM_MB0CF */ - ret = 0x00008001; - break; - case 0x7A: /* SDRAM_DLCR */ - ret = 0x02000000; - break; - case 0xE1: /* SDR0_DDR0 */ - ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1; - break; - default: - break; - } - break; - default: - break; - } - - return ret; -} - -#define SDRAM_DDR2_MCOPT2_DCEN BIT(27) - -static void sdram_ddr2_dcr_write(void *opaque, int dcrn, uint32_t val) -{ - Ppc4xxSdramDdr2State *sdram = opaque; - - switch (dcrn) { - case SDRAM_R0BAS: - case SDRAM_R1BAS: - case SDRAM_R2BAS: - case SDRAM_R3BAS: - case SDRAM_CONF1HB: - case SDRAM_CONF1LL: - case SDRAM_CONFPATHB: - case SDRAM_PLBADDULL: - case SDRAM_PLBADDUHB: - break; - case SDRAM0_CFGADDR: - sdram->addr = val; - break; - case SDRAM0_CFGDATA: - switch (sdram->addr) { - case 0x00: /* B0CR */ - break; - case 0x21: /* SDRAM_MCOPT2 */ - if (!(sdram->mcopt2 & SDRAM_DDR2_MCOPT2_DCEN) && - (val & SDRAM_DDR2_MCOPT2_DCEN)) { - trace_ppc4xx_sdram_enable("enable"); - /* validate all RAM mappings */ - sdram_ddr2_map_bcr(sdram); - sdram->mcopt2 |= SDRAM_DDR2_MCOPT2_DCEN; - } else if ((sdram->mcopt2 & SDRAM_DDR2_MCOPT2_DCEN) && - !(val & SDRAM_DDR2_MCOPT2_DCEN)) { - trace_ppc4xx_sdram_enable("disable"); - /* invalidate all RAM mappings */ - sdram_ddr2_unmap_bcr(sdram); - sdram->mcopt2 &= ~SDRAM_DDR2_MCOPT2_DCEN; - } - break; - default: - break; - } - break; - default: - break; - } -} - -static void ppc4xx_sdram_ddr2_reset(DeviceState *dev) -{ - Ppc4xxSdramDdr2State *sdram = PPC4xx_SDRAM_DDR2(dev); - - sdram->addr = 0; - sdram->mcopt2 = 0; -} - -static void ppc4xx_sdram_ddr2_realize(DeviceState *dev, Error **errp) -{ - Ppc4xxSdramDdr2State *s = PPC4xx_SDRAM_DDR2(dev); - Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev); - /* - * SoC also has 4 GiB but that causes problem with 32 bit - * builds (4*GiB overflows the 32 bit ram_addr_t). - */ - const ram_addr_t valid_bank_sizes[] = { - 2 * GiB, 1 * GiB, 512 * MiB, 256 * MiB, 128 * MiB, - 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 0 - }; - - if (s->nbanks < 1 || s->nbanks > 4) { - error_setg(errp, "Invalid number of RAM banks"); - return; - } - if (!s->dram_mr) { - error_setg(errp, "Missing dram memory region"); - return; - } - ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank, valid_bank_sizes); - - ppc4xx_dcr_register(dcr, SDRAM0_CFGADDR, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM0_CFGDATA, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - - ppc4xx_dcr_register(dcr, SDRAM_R0BAS, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_R1BAS, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_R2BAS, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_R3BAS, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_CONF1HB, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_PLBADDULL, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_CONF1LL, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_CONFPATHB, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM_PLBADDUHB, - s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); -} - -static Property ppc4xx_sdram_ddr2_props[] = { - DEFINE_PROP_LINK("dram", Ppc4xxSdramDdr2State, dram_mr, TYPE_MEMORY_REGION, - MemoryRegion *), - DEFINE_PROP_UINT32("nbanks", Ppc4xxSdramDdr2State, nbanks, 4), - DEFINE_PROP_END_OF_LIST(), -}; - -static void ppc4xx_sdram_ddr2_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - - dc->realize = ppc4xx_sdram_ddr2_realize; - dc->reset = ppc4xx_sdram_ddr2_reset; - /* Reason: only works as function of a ppc4xx SoC */ - dc->user_creatable = false; - device_class_set_props(dc, ppc4xx_sdram_ddr2_props); -} - -void ppc4xx_sdram_ddr2_enable(Ppc4xxSdramDdr2State *s) -{ - sdram_ddr2_dcr_write(s, SDRAM0_CFGADDR, 0x21); - sdram_ddr2_dcr_write(s, SDRAM0_CFGDATA, 0x08000000); -} - -static const TypeInfo ppc4xx_types[] = { - { - .name = TYPE_PPC4xx_SDRAM_DDR2, - .parent = TYPE_PPC4xx_DCR_DEVICE, - .instance_size = sizeof(Ppc4xxSdramDdr2State), - .class_init = ppc4xx_sdram_ddr2_class_init, - } -}; -DEFINE_TYPES(ppc4xx_types) - -/*****************************************************************************/ /* PLB to AHB bridge */ enum { AHB_TOP = 0xA4, diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c index 12af90f244..c1d111465d 100644 --- a/hw/ppc/ppc4xx_devs.c +++ b/hw/ppc/ppc4xx_devs.c @@ -23,419 +23,10 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" -#include "sysemu/reset.h" #include "cpu.h" -#include "hw/irq.h" -#include "hw/ppc/ppc.h" #include "hw/ppc/ppc4xx.h" #include "hw/qdev-properties.h" -#include "qemu/log.h" -#include "exec/address-spaces.h" -#include "qemu/error-report.h" #include "qapi/error.h" -#include "trace.h" - -/*****************************************************************************/ -/* SDRAM controller */ -enum { - SDRAM0_CFGADDR = 0x010, - SDRAM0_CFGDATA = 0x011, -}; - -/* - * XXX: TOFIX: some patches have made this code become inconsistent: - * there are type inconsistencies, mixing hwaddr, target_ulong - * and uint32_t - */ -static uint32_t sdram_ddr_bcr(hwaddr ram_base, hwaddr ram_size) -{ - uint32_t bcr; - - switch (ram_size) { - case 4 * MiB: - bcr = 0; - break; - case 8 * MiB: - bcr = 0x20000; - break; - case 16 * MiB: - bcr = 0x40000; - break; - case 32 * MiB: - bcr = 0x60000; - break; - case 64 * MiB: - bcr = 0x80000; - break; - case 128 * MiB: - bcr = 0xA0000; - break; - case 256 * MiB: - bcr = 0xC0000; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: invalid RAM size 0x%" HWADDR_PRIx "\n", __func__, - ram_size); - return 0; - } - bcr |= ram_base & 0xFF800000; - bcr |= 1; - - return bcr; -} - -static inline hwaddr sdram_ddr_base(uint32_t bcr) -{ - return bcr & 0xFF800000; -} - -static target_ulong sdram_ddr_size(uint32_t bcr) -{ - target_ulong size; - int sh; - - sh = (bcr >> 17) & 0x7; - if (sh == 7) { - size = -1; - } else { - size = (4 * MiB) << sh; - } - - return size; -} - -static void sdram_ddr_set_bcr(Ppc4xxSdramDdrState *sdram, int i, - uint32_t bcr, int enabled) -{ - if (sdram->bank[i].bcr & 1) { - /* Unmap RAM */ - trace_ppc4xx_sdram_unmap(sdram_ddr_base(sdram->bank[i].bcr), - sdram_ddr_size(sdram->bank[i].bcr)); - memory_region_del_subregion(get_system_memory(), - &sdram->bank[i].container); - memory_region_del_subregion(&sdram->bank[i].container, - &sdram->bank[i].ram); - object_unparent(OBJECT(&sdram->bank[i].container)); - } - sdram->bank[i].bcr = bcr & 0xFFDEE001; - if (enabled && (bcr & 1)) { - trace_ppc4xx_sdram_map(sdram_ddr_base(bcr), sdram_ddr_size(bcr)); - memory_region_init(&sdram->bank[i].container, NULL, "sdram-container", - sdram_ddr_size(bcr)); - memory_region_add_subregion(&sdram->bank[i].container, 0, - &sdram->bank[i].ram); - memory_region_add_subregion(get_system_memory(), - sdram_ddr_base(bcr), - &sdram->bank[i].container); - } -} - -static void sdram_ddr_map_bcr(Ppc4xxSdramDdrState *sdram) -{ - int i; - - for (i = 0; i < sdram->nbanks; i++) { - if (sdram->bank[i].size != 0) { - sdram_ddr_set_bcr(sdram, i, sdram_ddr_bcr(sdram->bank[i].base, - sdram->bank[i].size), 1); - } else { - sdram_ddr_set_bcr(sdram, i, 0, 0); - } - } -} - -static void sdram_ddr_unmap_bcr(Ppc4xxSdramDdrState *sdram) -{ - int i; - - for (i = 0; i < sdram->nbanks; i++) { - trace_ppc4xx_sdram_unmap(sdram_ddr_base(sdram->bank[i].bcr), - sdram_ddr_size(sdram->bank[i].bcr)); - memory_region_del_subregion(get_system_memory(), - &sdram->bank[i].ram); - } -} - -static uint32_t sdram_ddr_dcr_read(void *opaque, int dcrn) -{ - Ppc4xxSdramDdrState *sdram = opaque; - uint32_t ret; - - switch (dcrn) { - case SDRAM0_CFGADDR: - ret = sdram->addr; - break; - case SDRAM0_CFGDATA: - switch (sdram->addr) { - case 0x00: /* SDRAM_BESR0 */ - ret = sdram->besr0; - break; - case 0x08: /* SDRAM_BESR1 */ - ret = sdram->besr1; - break; - case 0x10: /* SDRAM_BEAR */ - ret = sdram->bear; - break; - case 0x20: /* SDRAM_CFG */ - ret = sdram->cfg; - break; - case 0x24: /* SDRAM_STATUS */ - ret = sdram->status; - break; - case 0x30: /* SDRAM_RTR */ - ret = sdram->rtr; - break; - case 0x34: /* SDRAM_PMIT */ - ret = sdram->pmit; - break; - case 0x40: /* SDRAM_B0CR */ - ret = sdram->bank[0].bcr; - break; - case 0x44: /* SDRAM_B1CR */ - ret = sdram->bank[1].bcr; - break; - case 0x48: /* SDRAM_B2CR */ - ret = sdram->bank[2].bcr; - break; - case 0x4C: /* SDRAM_B3CR */ - ret = sdram->bank[3].bcr; - break; - case 0x80: /* SDRAM_TR */ - ret = -1; /* ? */ - break; - case 0x94: /* SDRAM_ECCCFG */ - ret = sdram->ecccfg; - break; - case 0x98: /* SDRAM_ECCESR */ - ret = sdram->eccesr; - break; - default: /* Error */ - ret = -1; - break; - } - break; - default: - /* Avoid gcc warning */ - ret = 0; - break; - } - - return ret; -} - -static void sdram_ddr_dcr_write(void *opaque, int dcrn, uint32_t val) -{ - Ppc4xxSdramDdrState *sdram = opaque; - - switch (dcrn) { - case SDRAM0_CFGADDR: - sdram->addr = val; - break; - case SDRAM0_CFGDATA: - switch (sdram->addr) { - case 0x00: /* SDRAM_BESR0 */ - sdram->besr0 &= ~val; - break; - case 0x08: /* SDRAM_BESR1 */ - sdram->besr1 &= ~val; - break; - case 0x10: /* SDRAM_BEAR */ - sdram->bear = val; - break; - case 0x20: /* SDRAM_CFG */ - val &= 0xFFE00000; - if (!(sdram->cfg & 0x80000000) && (val & 0x80000000)) { - trace_ppc4xx_sdram_enable("enable"); - /* validate all RAM mappings */ - sdram_ddr_map_bcr(sdram); - sdram->status &= ~0x80000000; - } else if ((sdram->cfg & 0x80000000) && !(val & 0x80000000)) { - trace_ppc4xx_sdram_enable("disable"); - /* invalidate all RAM mappings */ - sdram_ddr_unmap_bcr(sdram); - sdram->status |= 0x80000000; - } - if (!(sdram->cfg & 0x40000000) && (val & 0x40000000)) { - sdram->status |= 0x40000000; - } else if ((sdram->cfg & 0x40000000) && !(val & 0x40000000)) { - sdram->status &= ~0x40000000; - } - sdram->cfg = val; - break; - case 0x24: /* SDRAM_STATUS */ - /* Read-only register */ - break; - case 0x30: /* SDRAM_RTR */ - sdram->rtr = val & 0x3FF80000; - break; - case 0x34: /* SDRAM_PMIT */ - sdram->pmit = (val & 0xF8000000) | 0x07C00000; - break; - case 0x40: /* SDRAM_B0CR */ - sdram_ddr_set_bcr(sdram, 0, val, sdram->cfg & 0x80000000); - break; - case 0x44: /* SDRAM_B1CR */ - sdram_ddr_set_bcr(sdram, 1, val, sdram->cfg & 0x80000000); - break; - case 0x48: /* SDRAM_B2CR */ - sdram_ddr_set_bcr(sdram, 2, val, sdram->cfg & 0x80000000); - break; - case 0x4C: /* SDRAM_B3CR */ - sdram_ddr_set_bcr(sdram, 3, val, sdram->cfg & 0x80000000); - break; - case 0x80: /* SDRAM_TR */ - sdram->tr = val & 0x018FC01F; - break; - case 0x94: /* SDRAM_ECCCFG */ - sdram->ecccfg = val & 0x00F00000; - break; - case 0x98: /* SDRAM_ECCESR */ - val &= 0xFFF0F000; - if (sdram->eccesr == 0 && val != 0) { - qemu_irq_raise(sdram->irq); - } else if (sdram->eccesr != 0 && val == 0) { - qemu_irq_lower(sdram->irq); - } - sdram->eccesr = val; - break; - default: /* Error */ - break; - } - break; - } -} - -static void ppc4xx_sdram_ddr_reset(DeviceState *dev) -{ - Ppc4xxSdramDdrState *sdram = PPC4xx_SDRAM_DDR(dev); - - sdram->addr = 0; - sdram->bear = 0; - sdram->besr0 = 0; /* No error */ - sdram->besr1 = 0; /* No error */ - sdram->cfg = 0; - sdram->ecccfg = 0; /* No ECC */ - sdram->eccesr = 0; /* No error */ - sdram->pmit = 0x07C00000; - sdram->rtr = 0x05F00000; - sdram->tr = 0x00854009; - /* We pre-initialize RAM banks */ - sdram->status = 0; - sdram->cfg = 0x00800000; -} - -static void ppc4xx_sdram_ddr_realize(DeviceState *dev, Error **errp) -{ - Ppc4xxSdramDdrState *s = PPC4xx_SDRAM_DDR(dev); - Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev); - const ram_addr_t valid_bank_sizes[] = { - 256 * MiB, 128 * MiB, 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 4 * MiB, 0 - }; - - if (s->nbanks < 1 || s->nbanks > 4) { - error_setg(errp, "Invalid number of RAM banks"); - return; - } - if (!s->dram_mr) { - error_setg(errp, "Missing dram memory region"); - return; - } - ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank, valid_bank_sizes); - - sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); - - ppc4xx_dcr_register(dcr, SDRAM0_CFGADDR, - s, &sdram_ddr_dcr_read, &sdram_ddr_dcr_write); - ppc4xx_dcr_register(dcr, SDRAM0_CFGDATA, - s, &sdram_ddr_dcr_read, &sdram_ddr_dcr_write); -} - -static Property ppc4xx_sdram_ddr_props[] = { - DEFINE_PROP_LINK("dram", Ppc4xxSdramDdrState, dram_mr, TYPE_MEMORY_REGION, - MemoryRegion *), - DEFINE_PROP_UINT32("nbanks", Ppc4xxSdramDdrState, nbanks, 4), - DEFINE_PROP_END_OF_LIST(), -}; - -static void ppc4xx_sdram_ddr_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - - dc->realize = ppc4xx_sdram_ddr_realize; - dc->reset = ppc4xx_sdram_ddr_reset; - /* Reason: only works as function of a ppc4xx SoC */ - dc->user_creatable = false; - device_class_set_props(dc, ppc4xx_sdram_ddr_props); -} - -void ppc4xx_sdram_ddr_enable(Ppc4xxSdramDdrState *s) -{ - sdram_ddr_dcr_write(s, SDRAM0_CFGADDR, 0x20); - sdram_ddr_dcr_write(s, SDRAM0_CFGDATA, 0x80000000); -} - -/* - * Split RAM between SDRAM banks. - * - * sdram_bank_sizes[] must be in descending order, that is sizes[i] > sizes[i+1] - * and must be 0-terminated. - * - * The 4xx SDRAM controller supports a small number of banks, and each bank - * must be one of a small set of sizes. The number of banks and the supported - * sizes varies by SoC. - */ -void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks, - Ppc4xxSdramBank ram_banks[], - const ram_addr_t sdram_bank_sizes[]) -{ - ram_addr_t size_left = memory_region_size(ram); - ram_addr_t base = 0; - ram_addr_t bank_size; - int i; - int j; - - for (i = 0; i < nr_banks; i++) { - for (j = 0; sdram_bank_sizes[j] != 0; j++) { - bank_size = sdram_bank_sizes[j]; - if (bank_size <= size_left) { - char name[32]; - - ram_banks[i].base = base; - ram_banks[i].size = bank_size; - base += bank_size; - size_left -= bank_size; - snprintf(name, sizeof(name), "ppc4xx.sdram%d", i); - memory_region_init_alias(&ram_banks[i].ram, NULL, name, ram, - ram_banks[i].base, ram_banks[i].size); - break; - } - } - if (!size_left) { - /* No need to use the remaining banks. */ - break; - } - } - - if (size_left) { - ram_addr_t used_size = memory_region_size(ram) - size_left; - GString *s = g_string_new(NULL); - - for (i = 0; sdram_bank_sizes[i]; i++) { - g_string_append_printf(s, "%" PRIi64 "%s", - sdram_bank_sizes[i] / MiB, - sdram_bank_sizes[i + 1] ? ", " : ""); - } - error_report("at most %d bank%s of %s MiB each supported", - nr_banks, nr_banks == 1 ? "" : "s", s->str); - error_printf("Possible valid RAM size: %" PRIi64 " MiB\n", - used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB); - - g_string_free(s, true); - exit(EXIT_FAILURE); - } -} /*****************************************************************************/ /* MAL */ @@ -963,11 +554,6 @@ static void ppc4xx_dcr_class_init(ObjectClass *oc, void *data) static const TypeInfo ppc4xx_types[] = { { - .name = TYPE_PPC4xx_SDRAM_DDR, - .parent = TYPE_PPC4xx_DCR_DEVICE, - .instance_size = sizeof(Ppc4xxSdramDdrState), - .class_init = ppc4xx_sdram_ddr_class_init, - }, { .name = TYPE_PPC4xx_MAL, .parent = TYPE_PPC4xx_DCR_DEVICE, .instance_size = sizeof(Ppc4xxMalState), diff --git a/hw/ppc/ppc4xx_sdram.c b/hw/ppc/ppc4xx_sdram.c new file mode 100644 index 0000000000..8d7137faf3 --- /dev/null +++ b/hw/ppc/ppc4xx_sdram.c @@ -0,0 +1,757 @@ +/* + * QEMU PowerPC 4xx embedded processors SDRAM controller emulation + * + * DDR SDRAM controller: + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * DDR2 SDRAM controller: + * Copyright (c) 2012 François Revol + * Copyright (c) 2016-2019 BALATON Zoltan + * + * This work is licensed under the GNU GPL license version 2 or later. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "exec/address-spaces.h" /* get_system_memory() */ +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/ppc4xx.h" +#include "trace.h" + +/*****************************************************************************/ +/* Shared functions */ + +/* + * Split RAM between SDRAM banks. + * + * sdram_bank_sizes[] must be in descending order, that is sizes[i] > sizes[i+1] + * and must be 0-terminated. + * + * The 4xx SDRAM controller supports a small number of banks, and each bank + * must be one of a small set of sizes. The number of banks and the supported + * sizes varies by SoC. + */ +static bool ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks, + Ppc4xxSdramBank ram_banks[], + const ram_addr_t sdram_bank_sizes[], + Error **errp) +{ + ERRP_GUARD(); + ram_addr_t size_left = memory_region_size(ram); + ram_addr_t base = 0; + ram_addr_t bank_size; + int i; + int j; + + for (i = 0; i < nr_banks; i++) { + for (j = 0; sdram_bank_sizes[j] != 0; j++) { + bank_size = sdram_bank_sizes[j]; + if (bank_size <= size_left) { + char name[32]; + + ram_banks[i].base = base; + ram_banks[i].size = bank_size; + base += bank_size; + size_left -= bank_size; + snprintf(name, sizeof(name), "ppc4xx.sdram%d", i); + memory_region_init_alias(&ram_banks[i].ram, NULL, name, ram, + ram_banks[i].base, ram_banks[i].size); + break; + } + } + if (!size_left) { + /* No need to use the remaining banks. */ + break; + } + } + + if (size_left) { + ram_addr_t used_size = memory_region_size(ram) - size_left; + GString *s = g_string_new(NULL); + + for (i = 0; sdram_bank_sizes[i]; i++) { + g_string_append_printf(s, "%" PRIi64 "%s", + sdram_bank_sizes[i] / MiB, + sdram_bank_sizes[i + 1] ? ", " : ""); + } + error_setg(errp, "Invalid SDRAM banks"); + error_append_hint(errp, "at most %d bank%s of %s MiB each supported\n", + nr_banks, nr_banks == 1 ? "" : "s", s->str); + error_append_hint(errp, "Possible valid RAM size: %" PRIi64 " MiB\n", + used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB); + + g_string_free(s, true); + return false; + } + return true; +} + +static void sdram_bank_map(Ppc4xxSdramBank *bank) +{ + trace_ppc4xx_sdram_map(bank->base, bank->size); + memory_region_init(&bank->container, NULL, "sdram-container", bank->size); + memory_region_add_subregion(&bank->container, 0, &bank->ram); + memory_region_add_subregion(get_system_memory(), bank->base, + &bank->container); +} + +static void sdram_bank_unmap(Ppc4xxSdramBank *bank) +{ + trace_ppc4xx_sdram_unmap(bank->base, bank->size); + memory_region_del_subregion(get_system_memory(), &bank->container); + memory_region_del_subregion(&bank->container, &bank->ram); + object_unparent(OBJECT(&bank->container)); +} + +static void sdram_bank_set_bcr(Ppc4xxSdramBank *bank, uint32_t bcr, + hwaddr base, hwaddr size, int enabled) +{ + if (memory_region_is_mapped(&bank->container)) { + sdram_bank_unmap(bank); + } + bank->bcr = bcr; + bank->base = base; + bank->size = size; + if (enabled && (bcr & 1)) { + sdram_bank_map(bank); + } +} + +enum { + SDRAM0_CFGADDR = 0x010, + SDRAM0_CFGDATA = 0x011, +}; + +/*****************************************************************************/ +/* DDR SDRAM controller */ +#define SDRAM_DDR_BCR_MASK 0xFFDEE001 + +static uint32_t sdram_ddr_bcr(hwaddr ram_base, hwaddr ram_size) +{ + uint32_t bcr; + + switch (ram_size) { + case 4 * MiB: + bcr = 0; + break; + case 8 * MiB: + bcr = 0x20000; + break; + case 16 * MiB: + bcr = 0x40000; + break; + case 32 * MiB: + bcr = 0x60000; + break; + case 64 * MiB: + bcr = 0x80000; + break; + case 128 * MiB: + bcr = 0xA0000; + break; + case 256 * MiB: + bcr = 0xC0000; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid RAM size 0x%" HWADDR_PRIx "\n", __func__, + ram_size); + return 0; + } + bcr |= ram_base & 0xFF800000; + bcr |= 1; + + return bcr; +} + +static inline hwaddr sdram_ddr_base(uint32_t bcr) +{ + return bcr & 0xFF800000; +} + +static hwaddr sdram_ddr_size(uint32_t bcr) +{ + hwaddr size; + int sh; + + sh = (bcr >> 17) & 0x7; + if (sh == 7) { + size = -1; + } else { + size = (4 * MiB) << sh; + } + + return size; +} + +static uint32_t sdram_ddr_dcr_read(void *opaque, int dcrn) +{ + Ppc4xxSdramDdrState *s = opaque; + uint32_t ret; + + switch (dcrn) { + case SDRAM0_CFGADDR: + ret = s->addr; + break; + case SDRAM0_CFGDATA: + switch (s->addr) { + case 0x00: /* SDRAM_BESR0 */ + ret = s->besr0; + break; + case 0x08: /* SDRAM_BESR1 */ + ret = s->besr1; + break; + case 0x10: /* SDRAM_BEAR */ + ret = s->bear; + break; + case 0x20: /* SDRAM_CFG */ + ret = s->cfg; + break; + case 0x24: /* SDRAM_STATUS */ + ret = s->status; + break; + case 0x30: /* SDRAM_RTR */ + ret = s->rtr; + break; + case 0x34: /* SDRAM_PMIT */ + ret = s->pmit; + break; + case 0x40: /* SDRAM_B0CR */ + ret = s->bank[0].bcr; + break; + case 0x44: /* SDRAM_B1CR */ + ret = s->bank[1].bcr; + break; + case 0x48: /* SDRAM_B2CR */ + ret = s->bank[2].bcr; + break; + case 0x4C: /* SDRAM_B3CR */ + ret = s->bank[3].bcr; + break; + case 0x80: /* SDRAM_TR */ + ret = -1; /* ? */ + break; + case 0x94: /* SDRAM_ECCCFG */ + ret = s->ecccfg; + break; + case 0x98: /* SDRAM_ECCESR */ + ret = s->eccesr; + break; + default: /* Error */ + ret = -1; + break; + } + break; + default: + /* Avoid gcc warning */ + ret = 0; + break; + } + + return ret; +} + +static void sdram_ddr_dcr_write(void *opaque, int dcrn, uint32_t val) +{ + Ppc4xxSdramDdrState *s = opaque; + int i; + + switch (dcrn) { + case SDRAM0_CFGADDR: + s->addr = val; + break; + case SDRAM0_CFGDATA: + switch (s->addr) { + case 0x00: /* SDRAM_BESR0 */ + s->besr0 &= ~val; + break; + case 0x08: /* SDRAM_BESR1 */ + s->besr1 &= ~val; + break; + case 0x10: /* SDRAM_BEAR */ + s->bear = val; + break; + case 0x20: /* SDRAM_CFG */ + val &= 0xFFE00000; + if (!(s->cfg & 0x80000000) && (val & 0x80000000)) { + trace_ppc4xx_sdram_enable("enable"); + /* validate all RAM mappings */ + for (i = 0; i < s->nbanks; i++) { + if (s->bank[i].size) { + sdram_bank_set_bcr(&s->bank[i], s->bank[i].bcr, + s->bank[i].base, s->bank[i].size, + 1); + } + } + s->status &= ~0x80000000; + } else if ((s->cfg & 0x80000000) && !(val & 0x80000000)) { + trace_ppc4xx_sdram_enable("disable"); + /* invalidate all RAM mappings */ + for (i = 0; i < s->nbanks; i++) { + if (s->bank[i].size) { + sdram_bank_set_bcr(&s->bank[i], s->bank[i].bcr, + s->bank[i].base, s->bank[i].size, + 0); + } + } + s->status |= 0x80000000; + } + if (!(s->cfg & 0x40000000) && (val & 0x40000000)) { + s->status |= 0x40000000; + } else if ((s->cfg & 0x40000000) && !(val & 0x40000000)) { + s->status &= ~0x40000000; + } + s->cfg = val; + break; + case 0x24: /* SDRAM_STATUS */ + /* Read-only register */ + break; + case 0x30: /* SDRAM_RTR */ + s->rtr = val & 0x3FF80000; + break; + case 0x34: /* SDRAM_PMIT */ + s->pmit = (val & 0xF8000000) | 0x07C00000; + break; + case 0x40: /* SDRAM_B0CR */ + case 0x44: /* SDRAM_B1CR */ + case 0x48: /* SDRAM_B2CR */ + case 0x4C: /* SDRAM_B3CR */ + i = (s->addr - 0x40) / 4; + val &= SDRAM_DDR_BCR_MASK; + if (s->bank[i].size) { + sdram_bank_set_bcr(&s->bank[i], val, + sdram_ddr_base(val), sdram_ddr_size(val), + s->cfg & 0x80000000); + } + break; + case 0x80: /* SDRAM_TR */ + s->tr = val & 0x018FC01F; + break; + case 0x94: /* SDRAM_ECCCFG */ + s->ecccfg = val & 0x00F00000; + break; + case 0x98: /* SDRAM_ECCESR */ + val &= 0xFFF0F000; + if (s->eccesr == 0 && val != 0) { + qemu_irq_raise(s->irq); + } else if (s->eccesr != 0 && val == 0) { + qemu_irq_lower(s->irq); + } + s->eccesr = val; + break; + default: /* Error */ + break; + } + break; + } +} + +static void ppc4xx_sdram_ddr_reset(DeviceState *dev) +{ + Ppc4xxSdramDdrState *s = PPC4xx_SDRAM_DDR(dev); + + s->addr = 0; + s->bear = 0; + s->besr0 = 0; /* No error */ + s->besr1 = 0; /* No error */ + s->cfg = 0; + s->ecccfg = 0; /* No ECC */ + s->eccesr = 0; /* No error */ + s->pmit = 0x07C00000; + s->rtr = 0x05F00000; + s->tr = 0x00854009; + /* We pre-initialize RAM banks */ + s->status = 0; + s->cfg = 0x00800000; +} + +static void ppc4xx_sdram_ddr_realize(DeviceState *dev, Error **errp) +{ + Ppc4xxSdramDdrState *s = PPC4xx_SDRAM_DDR(dev); + Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev); + const ram_addr_t valid_bank_sizes[] = { + 256 * MiB, 128 * MiB, 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 4 * MiB, 0 + }; + int i; + + if (s->nbanks < 1 || s->nbanks > 4) { + error_setg(errp, "Invalid number of RAM banks"); + return; + } + if (!s->dram_mr) { + error_setg(errp, "Missing dram memory region"); + return; + } + if (!ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank, + valid_bank_sizes, errp)) { + return; + } + for (i = 0; i < s->nbanks; i++) { + if (s->bank[i].size) { + s->bank[i].bcr = sdram_ddr_bcr(s->bank[i].base, s->bank[i].size); + sdram_bank_set_bcr(&s->bank[i], s->bank[i].bcr, + s->bank[i].base, s->bank[i].size, 0); + } else { + sdram_bank_set_bcr(&s->bank[i], 0, 0, 0, 0); + } + trace_ppc4xx_sdram_init(sdram_ddr_base(s->bank[i].bcr), + sdram_ddr_size(s->bank[i].bcr), + s->bank[i].bcr); + } + + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); + + ppc4xx_dcr_register(dcr, SDRAM0_CFGADDR, + s, &sdram_ddr_dcr_read, &sdram_ddr_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM0_CFGDATA, + s, &sdram_ddr_dcr_read, &sdram_ddr_dcr_write); +} + +static Property ppc4xx_sdram_ddr_props[] = { + DEFINE_PROP_LINK("dram", Ppc4xxSdramDdrState, dram_mr, TYPE_MEMORY_REGION, + MemoryRegion *), + DEFINE_PROP_UINT32("nbanks", Ppc4xxSdramDdrState, nbanks, 4), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ppc4xx_sdram_ddr_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = ppc4xx_sdram_ddr_realize; + dc->reset = ppc4xx_sdram_ddr_reset; + /* Reason: only works as function of a ppc4xx SoC */ + dc->user_creatable = false; + device_class_set_props(dc, ppc4xx_sdram_ddr_props); +} + +void ppc4xx_sdram_ddr_enable(Ppc4xxSdramDdrState *s) +{ + sdram_ddr_dcr_write(s, SDRAM0_CFGADDR, 0x20); + sdram_ddr_dcr_write(s, SDRAM0_CFGDATA, 0x80000000); +} + +/*****************************************************************************/ +/* DDR2 SDRAM controller */ +#define SDRAM_DDR2_BCR_MASK 0xffe0ffc1 + +enum { + SDRAM_R0BAS = 0x40, + SDRAM_R1BAS, + SDRAM_R2BAS, + SDRAM_R3BAS, + SDRAM_CONF1HB = 0x45, + SDRAM_PLBADDULL = 0x4a, + SDRAM_CONF1LL = 0x4b, + SDRAM_CONFPATHB = 0x4f, + SDRAM_PLBADDUHB = 0x50, +}; + +static uint32_t sdram_ddr2_bcr(hwaddr ram_base, hwaddr ram_size) +{ + uint32_t bcr; + + switch (ram_size) { + case 8 * MiB: + bcr = 0xffc0; + break; + case 16 * MiB: + bcr = 0xff80; + break; + case 32 * MiB: + bcr = 0xff00; + break; + case 64 * MiB: + bcr = 0xfe00; + break; + case 128 * MiB: + bcr = 0xfc00; + break; + case 256 * MiB: + bcr = 0xf800; + break; + case 512 * MiB: + bcr = 0xf000; + break; + case 1 * GiB: + bcr = 0xe000; + break; + case 2 * GiB: + bcr = 0xc000; + break; + case 4 * GiB: + bcr = 0x8000; + break; + default: + error_report("invalid RAM size " TARGET_FMT_plx, ram_size); + return 0; + } + bcr |= ram_base >> 2 & 0xffe00000; + bcr |= 1; + + return bcr; +} + +static inline hwaddr sdram_ddr2_base(uint32_t bcr) +{ + return (bcr & 0xffe00000) << 2; +} + +static hwaddr sdram_ddr2_size(uint32_t bcr) +{ + hwaddr size; + int sh; + + sh = 1024 - ((bcr >> 6) & 0x3ff); + size = 8 * MiB * sh; + + return size; +} + +static uint32_t sdram_ddr2_dcr_read(void *opaque, int dcrn) +{ + Ppc4xxSdramDdr2State *s = opaque; + uint32_t ret = 0; + + switch (dcrn) { + case SDRAM_R0BAS: + case SDRAM_R1BAS: + case SDRAM_R2BAS: + case SDRAM_R3BAS: + if (s->bank[dcrn - SDRAM_R0BAS].size) { + ret = sdram_ddr2_bcr(s->bank[dcrn - SDRAM_R0BAS].base, + s->bank[dcrn - SDRAM_R0BAS].size); + } + break; + case SDRAM_CONF1HB: + case SDRAM_CONF1LL: + case SDRAM_CONFPATHB: + case SDRAM_PLBADDULL: + case SDRAM_PLBADDUHB: + break; + case SDRAM0_CFGADDR: + ret = s->addr; + break; + case SDRAM0_CFGDATA: + switch (s->addr) { + case 0x14: /* SDRAM_MCSTAT (405EX) */ + case 0x1F: + ret = 0x80000000; + break; + case 0x21: /* SDRAM_MCOPT2 */ + ret = s->mcopt2; + break; + case 0x40: /* SDRAM_MB0CF */ + ret = 0x00008001; + break; + case 0x7A: /* SDRAM_DLCR */ + ret = 0x02000000; + break; + case 0xE1: /* SDR0_DDR0 */ + ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1; + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} + +#define SDRAM_DDR2_MCOPT2_DCEN BIT(27) + +static void sdram_ddr2_dcr_write(void *opaque, int dcrn, uint32_t val) +{ + Ppc4xxSdramDdr2State *s = opaque; + int i; + + switch (dcrn) { + case SDRAM_R0BAS: + case SDRAM_R1BAS: + case SDRAM_R2BAS: + case SDRAM_R3BAS: + case SDRAM_CONF1HB: + case SDRAM_CONF1LL: + case SDRAM_CONFPATHB: + case SDRAM_PLBADDULL: + case SDRAM_PLBADDUHB: + break; + case SDRAM0_CFGADDR: + s->addr = val; + break; + case SDRAM0_CFGDATA: + switch (s->addr) { + case 0x00: /* B0CR */ + break; + case 0x21: /* SDRAM_MCOPT2 */ + if (!(s->mcopt2 & SDRAM_DDR2_MCOPT2_DCEN) && + (val & SDRAM_DDR2_MCOPT2_DCEN)) { + trace_ppc4xx_sdram_enable("enable"); + /* validate all RAM mappings */ + for (i = 0; i < s->nbanks; i++) { + if (s->bank[i].size) { + sdram_bank_set_bcr(&s->bank[i], s->bank[i].bcr, + s->bank[i].base, s->bank[i].size, + 1); + } + } + s->mcopt2 |= SDRAM_DDR2_MCOPT2_DCEN; + } else if ((s->mcopt2 & SDRAM_DDR2_MCOPT2_DCEN) && + !(val & SDRAM_DDR2_MCOPT2_DCEN)) { + trace_ppc4xx_sdram_enable("disable"); + /* invalidate all RAM mappings */ + for (i = 0; i < s->nbanks; i++) { + if (s->bank[i].size) { + sdram_bank_set_bcr(&s->bank[i], s->bank[i].bcr, + s->bank[i].base, s->bank[i].size, + 0); + } + } + s->mcopt2 &= ~SDRAM_DDR2_MCOPT2_DCEN; + } + break; + default: + break; + } + break; + default: + break; + } +} + +static void ppc4xx_sdram_ddr2_reset(DeviceState *dev) +{ + Ppc4xxSdramDdr2State *s = PPC4xx_SDRAM_DDR2(dev); + + s->addr = 0; + s->mcopt2 = 0; +} + +static void ppc4xx_sdram_ddr2_realize(DeviceState *dev, Error **errp) +{ + Ppc4xxSdramDdr2State *s = PPC4xx_SDRAM_DDR2(dev); + Ppc4xxDcrDeviceState *dcr = PPC4xx_DCR_DEVICE(dev); + /* + * SoC also has 4 GiB but that causes problem with 32 bit + * builds (4*GiB overflows the 32 bit ram_addr_t). + */ + const ram_addr_t valid_bank_sizes[] = { + 2 * GiB, 1 * GiB, 512 * MiB, 256 * MiB, 128 * MiB, + 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 0 + }; + int i; + + if (s->nbanks < 1 || s->nbanks > 4) { + error_setg(errp, "Invalid number of RAM banks"); + return; + } + if (!s->dram_mr) { + error_setg(errp, "Missing dram memory region"); + return; + } + if (!ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank, + valid_bank_sizes, errp)) { + return; + } + for (i = 0; i < s->nbanks; i++) { + if (s->bank[i].size) { + s->bank[i].bcr = sdram_ddr2_bcr(s->bank[i].base, s->bank[i].size); + s->bank[i].bcr &= SDRAM_DDR2_BCR_MASK; + sdram_bank_set_bcr(&s->bank[i], s->bank[i].bcr, + s->bank[i].base, s->bank[i].size, 0); + } else { + sdram_bank_set_bcr(&s->bank[i], 0, 0, 0, 0); + } + trace_ppc4xx_sdram_init(sdram_ddr2_base(s->bank[i].bcr), + sdram_ddr2_size(s->bank[i].bcr), + s->bank[i].bcr); + } + + ppc4xx_dcr_register(dcr, SDRAM0_CFGADDR, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM0_CFGDATA, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + + ppc4xx_dcr_register(dcr, SDRAM_R0BAS, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_R1BAS, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_R2BAS, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_R3BAS, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_CONF1HB, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_PLBADDULL, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_CONF1LL, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_CONFPATHB, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); + ppc4xx_dcr_register(dcr, SDRAM_PLBADDUHB, + s, &sdram_ddr2_dcr_read, &sdram_ddr2_dcr_write); +} + +static Property ppc4xx_sdram_ddr2_props[] = { + DEFINE_PROP_LINK("dram", Ppc4xxSdramDdr2State, dram_mr, TYPE_MEMORY_REGION, + MemoryRegion *), + DEFINE_PROP_UINT32("nbanks", Ppc4xxSdramDdr2State, nbanks, 4), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ppc4xx_sdram_ddr2_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = ppc4xx_sdram_ddr2_realize; + dc->reset = ppc4xx_sdram_ddr2_reset; + /* Reason: only works as function of a ppc4xx SoC */ + dc->user_creatable = false; + device_class_set_props(dc, ppc4xx_sdram_ddr2_props); +} + +void ppc4xx_sdram_ddr2_enable(Ppc4xxSdramDdr2State *s) +{ + sdram_ddr2_dcr_write(s, SDRAM0_CFGADDR, 0x21); + sdram_ddr2_dcr_write(s, SDRAM0_CFGDATA, 0x08000000); +} + +static const TypeInfo ppc4xx_sdram_types[] = { + { + .name = TYPE_PPC4xx_SDRAM_DDR, + .parent = TYPE_PPC4xx_DCR_DEVICE, + .instance_size = sizeof(Ppc4xxSdramDdrState), + .class_init = ppc4xx_sdram_ddr_class_init, + }, { + .name = TYPE_PPC4xx_SDRAM_DDR2, + .parent = TYPE_PPC4xx_DCR_DEVICE, + .instance_size = sizeof(Ppc4xxSdramDdr2State), + .class_init = ppc4xx_sdram_ddr2_class_init, + } +}; + +DEFINE_TYPES(ppc4xx_sdram_types) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f79ac85ca1..66b414d2e9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1623,7 +1623,7 @@ void spapr_check_mmu_mode(bool guest_radix) } } -static void spapr_machine_reset(MachineState *machine) +static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) { SpaprMachineState *spapr = SPAPR_MACHINE(machine); PowerPCCPU *first_ppc_cpu; @@ -1649,7 +1649,7 @@ static void spapr_machine_reset(MachineState *machine) spapr_setup_hpt(spapr); } - qemu_devices_reset(); + qemu_devices_reset(reason); spapr_ovec_cleanup(spapr->ov5_cas); spapr->ov5_cas = spapr_ovec_new(); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 891206e893..925ff523cc 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -490,6 +490,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr, env->msr |= (1ULL << MSR_EE); hreg_compute_hflags(env); + ppc_maybe_interrupt(env); if (spapr_cpu->prod) { spapr_cpu->prod = false; @@ -500,6 +501,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr, cs->halted = 1; cs->exception_index = EXCP_HLT; cs->exit_request = 1; + ppc_maybe_interrupt(env); } return H_SUCCESS; @@ -521,6 +523,7 @@ static target_ulong h_confer_self(PowerPCCPU *cpu) cs->halted = 1; cs->exception_index = EXCP_HALTED; cs->exit_request = 1; + ppc_maybe_interrupt(&cpu->env); return H_SUCCESS; } @@ -633,6 +636,7 @@ static target_ulong h_prod(PowerPCCPU *cpu, SpaprMachineState *spapr, spapr_cpu = spapr_cpu_state(tcpu); spapr_cpu->prod = true; cs->halted = 0; + ppc_maybe_interrupt(&cpu->env); qemu_cpu_kick(cs); return H_SUCCESS; @@ -1669,6 +1673,7 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu, spapr_cpu->in_nested = true; hreg_compute_hflags(env); + ppc_maybe_interrupt(env); tlb_flush(cs); env->reserve_addr = -1; /* Reset the reservation */ @@ -1810,6 +1815,7 @@ out_restore_l1: spapr_cpu->in_nested = false; hreg_compute_hflags(env); + ppc_maybe_interrupt(env); tlb_flush(cs); env->reserve_addr = -1; /* Reset the reservation */ diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index d58b65e88f..3f664ea02c 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -214,9 +214,9 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr, * guest. * For the same reason, set PSSCR_EC. */ - ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm); env->spr[SPR_PSSCR] |= PSSCR_EC; cs->halted = 1; + ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm); kvmppc_set_reg_ppc_online(cpu, 0); qemu_cpu_kick(cs); } diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index a07d5aca0f..f670e8906c 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -127,7 +127,7 @@ ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32 ppc40x_timers_init(uint32_t value) "frequency %" PRIu32 ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d" -ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32 +ppc_irq_set_exit(void *env, uint32_t irq, uint32_t level, uint32_t pending, uint32_t request) "env [%p] irq 0x%05" PRIx32 " level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32 ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d" ppc_irq_reset(const char *name) "%s" ppc_irq_cpu(const char *action) "%s" @@ -179,3 +179,4 @@ ppc405ep_clocks_setup(const char *trace) "%s" ppc4xx_sdram_enable(const char *trace) "%s SDRAM controller" ppc4xx_sdram_unmap(uint64_t addr, uint64_t size) "Unmap RAM area 0x%" PRIx64 " size 0x%" PRIx64 ppc4xx_sdram_map(uint64_t addr, uint64_t size) "Map RAM area 0x%" PRIx64 " size 0x%" PRIx64 +ppc4xx_sdram_init(uint64_t base, uint64_t size, uint32_t bcr) "Init RAM area 0x%" PRIx64 " size 0x%" PRIx64 " bcr 0x%x" diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index e82bf27338..ebd351c840 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -30,6 +30,7 @@ #include "sysemu/device_tree.h" #include "sysemu/qtest.h" #include "sysemu/kvm.h" +#include "sysemu/reset.h" #include <libfdt.h> @@ -241,6 +242,8 @@ uint64_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt) rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr, &address_space_memory); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); return fdt_addr; } diff --git a/hw/rx/rx-gdbsim.c b/hw/rx/rx-gdbsim.c index 8ffe1b8035..47c17026c7 100644 --- a/hw/rx/rx-gdbsim.c +++ b/hw/rx/rx-gdbsim.c @@ -25,6 +25,7 @@ #include "hw/rx/rx62n.h" #include "sysemu/qtest.h" #include "sysemu/device_tree.h" +#include "sysemu/reset.h" #include "hw/boards.h" #include "qom/object.h" @@ -148,6 +149,8 @@ static void rx_gdbsim_init(MachineState *machine) dtb_offset = ROUND_DOWN(machine->ram_size - dtb_size, 16); rom_add_blob_fixed("dtb", dtb, dtb_size, SDRAM_BASE + dtb_offset); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr(SDRAM_BASE + dtb_offset, dtb_size)); /* Set dtb address to R1 */ RX_CPU(first_cpu)->env.regs[1] = SDRAM_BASE + dtb_offset; } diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c index 728ba24547..8dfe92d8df 100644 --- a/hw/s390x/pv.c +++ b/hw/s390x/pv.c @@ -50,7 +50,7 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) * This macro lets us pass the command as a string to the function so * we can print it on an error. */ -#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data); +#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data) #define s390_pv_cmd_exit(cmd, data) \ { \ int rc; \ diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 1cc20d8717..806de32034 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -411,7 +411,7 @@ static void s390_pv_prepare_reset(S390CcwMachineState *ms) s390_pv_prep_reset(); } -static void s390_machine_reset(MachineState *machine) +static void s390_machine_reset(MachineState *machine, ShutdownCause reason) { S390CcwMachineState *ms = S390_CCW_MACHINE(machine); enum s390_reset reset_type; @@ -433,7 +433,7 @@ static void s390_machine_reset(MachineState *machine) s390_machine_unprotect(ms); } - qemu_devices_reset(); + qemu_devices_reset(reason); s390_crypto_reset(); /* configure and start the ipl CPU only */ diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c index 9d0cbfbce2..e2202dae2d 100644 --- a/hw/s390x/tod-kvm.c +++ b/hw/s390x/tod-kvm.c @@ -13,6 +13,7 @@ #include "qemu/module.h" #include "sysemu/runstate.h" #include "hw/s390x/tod.h" +#include "hw/s390x/pv.h" #include "kvm/kvm_s390x.h" static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp) @@ -84,6 +85,14 @@ static void kvm_s390_tod_vm_state_change(void *opaque, bool running, S390TODState *td = opaque; Error *local_err = NULL; + /* + * Under PV, the clock is under ultravisor control, hence we cannot restore + * it on resume. + */ + if (s390_is_pv()) { + return; + } + if (running && td->stopped) { /* Set the old TOD when running the VM - start the TOD clock. */ kvm_s390_set_tod_raw(&td->base, &local_err); diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h index e8c753d6d1..964570f8e8 100644 --- a/hw/sd/sdhci-internal.h +++ b/hw/sd/sdhci-internal.h @@ -288,26 +288,6 @@ enum { extern const VMStateDescription sdhci_vmstate; - -#define ESDHC_MIX_CTRL 0x48 - -#define ESDHC_VENDOR_SPEC 0xc0 -#define ESDHC_IMX_FRC_SDCLK_ON (1 << 8) - -#define ESDHC_DLL_CTRL 0x60 - -#define ESDHC_TUNING_CTRL 0xcc -#define ESDHC_TUNE_CTRL_STATUS 0x68 -#define ESDHC_WTMK_LVL 0x44 - -/* Undocumented register used by guests working around erratum ERR004536 */ -#define ESDHC_UNDOCUMENTED_REG27 0x6c - -#define ESDHC_CTRL_4BITBUS (0x1 << 1) -#define ESDHC_CTRL_8BITBUS (0x2 << 1) - -#define ESDHC_PRNSTS_SDSTB (1 << 3) - /* * Default SD/MMC host controller features information, which will be * presented in CAPABILITIES register of generic SD host controller at reset. diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 0e5e988927..306070c872 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -1577,6 +1577,25 @@ static const TypeInfo sdhci_bus_info = { /* --- qdev i.MX eSDHC --- */ +#define USDHC_MIX_CTRL 0x48 + +#define USDHC_VENDOR_SPEC 0xc0 +#define USDHC_IMX_FRC_SDCLK_ON (1 << 8) + +#define USDHC_DLL_CTRL 0x60 + +#define USDHC_TUNING_CTRL 0xcc +#define USDHC_TUNE_CTRL_STATUS 0x68 +#define USDHC_WTMK_LVL 0x44 + +/* Undocumented register used by guests working around erratum ERR004536 */ +#define USDHC_UNDOCUMENTED_REG27 0x6c + +#define USDHC_CTRL_4BITBUS (0x1 << 1) +#define USDHC_CTRL_8BITBUS (0x2 << 1) + +#define USDHC_PRNSTS_SDSTB (1 << 3) + static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size) { SDHCIState *s = SYSBUS_SDHCI(opaque); @@ -1596,11 +1615,11 @@ static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size) hostctl1 = SDHC_DMA_TYPE(s->hostctl1) << (8 - 3); if (s->hostctl1 & SDHC_CTRL_8BITBUS) { - hostctl1 |= ESDHC_CTRL_8BITBUS; + hostctl1 |= USDHC_CTRL_8BITBUS; } if (s->hostctl1 & SDHC_CTRL_4BITBUS) { - hostctl1 |= ESDHC_CTRL_4BITBUS; + hostctl1 |= USDHC_CTRL_4BITBUS; } ret = hostctl1; @@ -1611,21 +1630,21 @@ static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size) case SDHC_PRNSTS: /* Add SDSTB (SD Clock Stable) bit to PRNSTS */ - ret = sdhci_read(opaque, offset, size) & ~ESDHC_PRNSTS_SDSTB; + ret = sdhci_read(opaque, offset, size) & ~USDHC_PRNSTS_SDSTB; if (s->clkcon & SDHC_CLOCK_INT_STABLE) { - ret |= ESDHC_PRNSTS_SDSTB; + ret |= USDHC_PRNSTS_SDSTB; } break; - case ESDHC_VENDOR_SPEC: + case USDHC_VENDOR_SPEC: ret = s->vendor_spec; break; - case ESDHC_DLL_CTRL: - case ESDHC_TUNE_CTRL_STATUS: - case ESDHC_UNDOCUMENTED_REG27: - case ESDHC_TUNING_CTRL: - case ESDHC_MIX_CTRL: - case ESDHC_WTMK_LVL: + case USDHC_DLL_CTRL: + case USDHC_TUNE_CTRL_STATUS: + case USDHC_UNDOCUMENTED_REG27: + case USDHC_TUNING_CTRL: + case USDHC_MIX_CTRL: + case USDHC_WTMK_LVL: ret = 0; break; } @@ -1641,18 +1660,18 @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) uint32_t value = (uint32_t)val; switch (offset) { - case ESDHC_DLL_CTRL: - case ESDHC_TUNE_CTRL_STATUS: - case ESDHC_UNDOCUMENTED_REG27: - case ESDHC_TUNING_CTRL: - case ESDHC_WTMK_LVL: + case USDHC_DLL_CTRL: + case USDHC_TUNE_CTRL_STATUS: + case USDHC_UNDOCUMENTED_REG27: + case USDHC_TUNING_CTRL: + case USDHC_WTMK_LVL: break; - case ESDHC_VENDOR_SPEC: + case USDHC_VENDOR_SPEC: s->vendor_spec = value; switch (s->vendor) { case SDHCI_VENDOR_IMX: - if (value & ESDHC_IMX_FRC_SDCLK_ON) { + if (value & USDHC_IMX_FRC_SDCLK_ON) { s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF; } else { s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF; @@ -1721,12 +1740,12 @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) * Second, split "Data Transfer Width" from bits 2 and 1 in to * bits 5 and 1 */ - if (value & ESDHC_CTRL_8BITBUS) { + if (value & USDHC_CTRL_8BITBUS) { hostctl1 |= SDHC_CTRL_8BITBUS; } - if (value & ESDHC_CTRL_4BITBUS) { - hostctl1 |= ESDHC_CTRL_4BITBUS; + if (value & USDHC_CTRL_4BITBUS) { + hostctl1 |= USDHC_CTRL_4BITBUS; } /* @@ -1749,7 +1768,7 @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) sdhci_write(opaque, offset, value, size); break; - case ESDHC_MIX_CTRL: + case USDHC_MIX_CTRL: /* * So, when SD/MMC stack in Linux tries to write to "Transfer * Mode Register", ESDHC i.MX quirk code will translate it diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c index 2bf8c754b2..ec0fa440d7 100644 --- a/hw/timer/imx_epit.c +++ b/hw/timer/imx_epit.c @@ -275,10 +275,15 @@ static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value, /* If IOVW bit is set then set the timer value */ ptimer_set_count(s->timer_reload, s->lr); } - + /* + * Commit the change to s->timer_reload, so it can propagate. Otherwise + * the timer interrupt may not fire properly. The commit must happen + * before calling imx_epit_reload_compare_timer(), which reads + * s->timer_reload internally again. + */ + ptimer_transaction_commit(s->timer_reload); imx_epit_reload_compare_timer(s); ptimer_transaction_commit(s->timer_cmp); - ptimer_transaction_commit(s->timer_reload); break; case 3: /* CMP */ diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 596d4434d2..5bd14cad96 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -33,6 +33,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp) ++b) { switch (b) { case VIRTIO_F_ANY_LAYOUT: + case VIRTIO_RING_F_EVENT_IDX: continue; case VIRTIO_F_ACCESS_PLATFORM: @@ -218,12 +219,22 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, static void vhost_svq_kick(VhostShadowVirtqueue *svq) { + bool needs_kick; + /* * We need to expose the available array entries before checking the used * flags */ smp_mb(); - if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { + + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { + uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]); + needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1); + } else { + needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY); + } + + if (!needs_kick) { return; } @@ -369,15 +380,27 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) */ static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) { - svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); - /* Make sure the flag is written before the read of used_idx */ + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { + uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num]; + *used_event = svq->shadow_used_idx; + } else { + svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } + + /* Make sure the event is enabled before the read of used_idx */ smp_mb(); return !vhost_svq_more_used(svq); } static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) { - svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + /* + * No need to disable notification in the event idx case, since used event + * index is already an index too far away. + */ + if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { + svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } } static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, @@ -570,16 +593,16 @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) { size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; - size_t avail_size = offsetof(vring_avail_t, ring) + - sizeof(uint16_t) * svq->vring.num; + size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + + sizeof(uint16_t); return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); } size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) { - size_t used_size = offsetof(vring_used_t, ring) + - sizeof(vring_used_elem_t) * svq->vring.num; + size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) + + sizeof(uint16_t); return ROUND_UP(used_size, qemu_real_host_page_size()); } diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 30d03e987a..ed170def48 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -467,7 +467,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, int fd = memory_region_get_fd(&vmem->memdev->mr); Error *local_err = NULL; - os_mem_prealloc(fd, area, size, 1, &local_err); + qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); if (local_err) { static bool warned; diff --git a/include/block/block-common.h b/include/block/block-common.h index fdb7306e78..297704c1e9 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -80,6 +80,15 @@ typedef enum { */ BDRV_REQ_MAY_UNMAP = 0x4, + /* + * An optimization hint when all QEMUIOVector elements are within + * previously registered bdrv_register_buf() memory ranges. + * + * Code that replaces the user's QEMUIOVector elements with bounce buffers + * must take care to clear this flag. + */ + BDRV_REQ_REGISTERED_BUF = 0x8, + BDRV_REQ_FUA = 0x10, BDRV_REQ_WRITE_COMPRESSED = 0x20, @@ -313,6 +322,45 @@ enum { * * At least one of DATA, METADATA, FILTERED, or COW must be set for * every child. + * + * + * = Connection with bs->children, bs->file and bs->backing fields = + * + * 1. Filters + * + * Filter drivers have drv->is_filter = true. + * + * Filter node has exactly one FILTERED|PRIMARY child, and may have other + * children which must not have these bits (one example is the + * copy-before-write filter, which also has its target DATA child). + * + * Filter nodes never have COW children. + * + * For most filters, the filtered child is linked in bs->file, bs->backing is + * NULL. For some filters (as an exception), it is the other way around; those + * drivers will have drv->filtered_child_is_backing set to true (see that + * field’s documentation for what drivers this concerns) + * + * 2. "raw" driver (block/raw-format.c) + * + * Formally it's not a filter (drv->is_filter = false) + * + * bs->backing is always NULL + * + * Only has one child, linked in bs->file. Its role is either FILTERED|PRIMARY + * (like filter) or DATA|PRIMARY depending on options. + * + * 3. Other drivers + * + * Don't have any FILTERED children. + * + * May have at most one COW child. In this case it's linked in bs->backing. + * Otherwise bs->backing is NULL. COW child is never PRIMARY. + * + * May have at most one PRIMARY child. In this case it's linked in bs->file. + * Otherwise bs->file is NULL. + * + * May also have some other children that don't have the PRIMARY or COW bit set. */ enum BdrvChildRoleBits { /* diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 21265e3966..bb42ed9559 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -76,6 +76,9 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildClass *child_class, BdrvChildRole child_role, bool allow_none, Error **errp); +int bdrv_open_file_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState *parent, Error **errp); BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp); @@ -217,17 +220,12 @@ void coroutine_fn bdrv_co_lock(BlockDriverState *bs); */ void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); -void bdrv_set_aio_context_ignore(BlockDriverState *bs, - AioContext *new_context, GSList **ignore); -int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - Error **errp); -int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp); -bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, - GSList **ignore, Error **errp); -bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, - GSList **ignore, Error **errp); AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); +bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); +int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); @@ -243,9 +241,15 @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); * Register/unregister a buffer for I/O. For example, VFIO drivers are * interested to know the memory areas that would later be used for I/O, so * that they can prepare IOMMU mapping etc., to get better performance. + * + * Buffers must not overlap and they must be unregistered with the same <host, + * size> values that they were registered with. + * + * Returns: true on success, false on failure */ -void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); -void bdrv_unregister_buf(BlockDriverState *bs, void *host); +bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size, + Error **errp); +void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size); void bdrv_cancel_in_flight(BlockDriverState *bs); diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h index 50ce0247c3..ba0593c440 100644 --- a/include/block/block-hmp-cmds.h +++ b/include/block/block-hmp-cmds.h @@ -38,7 +38,7 @@ void hmp_nbd_server_add(Monitor *mon, const QDict *qdict); void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict); void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict); -void hmp_block_resize(Monitor *mon, const QDict *qdict); +void coroutine_fn hmp_block_resize(Monitor *mon, const QDict *qdict); void hmp_block_stream(Monitor *mon, const QDict *qdict); void hmp_block_passwd(Monitor *mon, const QDict *qdict); void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); diff --git a/include/block/block-io.h b/include/block/block-io.h index 492f95fc05..770ddeb7c8 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -83,12 +83,13 @@ void bdrv_aio_cancel(BlockAIOCB *acb); void bdrv_aio_cancel_async(BlockAIOCB *acb); /* sg packet commands */ -int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); +int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); /* Ensure contents are flushed to disk. */ int coroutine_fn bdrv_co_flush(BlockDriverState *bs); -int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); +int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 8947abab76..5a2cc077a0 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -120,6 +120,20 @@ struct BlockDriver { */ bool is_filter; /* + * Only make sense for filter drivers, for others must be false. + * If true, filtered child is bs->backing. Otherwise it's bs->file. + * Two internal filters use bs->backing as filtered child and has this + * field set to true: mirror_top and commit_top. There also two such test + * filters in tests/unit/test-bdrv-graph-mod.c. + * + * Never create any more such filters! + * + * TODO: imagine how to deprecate this behavior and make all filters work + * similarly using bs->file as filtered child. + */ + bool filtered_child_is_backing; + + /* * Set to true if the BlockDriver is a format driver. Format nodes * generally do not expect their children to be other format nodes * (except for backing files), and so format probing is disabled @@ -433,9 +447,12 @@ struct BlockDriver { * that it can do IOMMU mapping with VFIO etc., in order to get better * performance. In the case of VFIO drivers, this callback is used to do * DMA mapping for hot buffers. + * + * Returns: true on success, false on failure */ - void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); - void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); + bool (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size, + Error **errp); + void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size); /* * This field is modified only under the BQL, and is part of @@ -731,13 +748,11 @@ struct BlockDriver { void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); - bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, - const char *name, - uint32_t granularity, - Error **errp); - int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs, - const char *name, - Error **errp); + bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( + BlockDriverState *bs, const char *name, uint32_t granularity, + Error **errp); + int coroutine_fn (*bdrv_co_remove_persistent_dirty_bitmap)( + BlockDriverState *bs, const char *name, Error **errp); }; static inline bool block_driver_can_compress(BlockDriver *drv) @@ -892,9 +907,9 @@ struct BdrvChildClass { int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, const char *filename, Error **errp); - bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp); - void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); + bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); AioContext *(*get_parent_aio_context)(BdrvChild *child); @@ -1042,16 +1057,13 @@ struct BlockDriverState { QDict *full_open_options; char exact_filename[PATH_MAX]; - BdrvChild *backing; - BdrvChild *file; - /* I/O Limits */ BlockLimits bl; /* * Flags honored during pread */ - unsigned int supported_read_flags; + BdrvRequestFlags supported_read_flags; /* * Flags honored during pwrite (so far: BDRV_REQ_FUA, * BDRV_REQ_WRITE_UNCHANGED). @@ -1069,12 +1081,12 @@ struct BlockDriverState { * flag), or they have to explicitly take the WRITE permission for * their children. */ - unsigned int supported_write_flags; + BdrvRequestFlags supported_write_flags; /* * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ - unsigned int supported_zero_flags; + BdrvRequestFlags supported_zero_flags; /* * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). * @@ -1082,7 +1094,7 @@ struct BlockDriverState { * that any added space reads as all zeros. If this can't be guaranteed, * the operation must fail. */ - unsigned int supported_truncate_flags; + BdrvRequestFlags supported_truncate_flags; /* the following member gives a name to every node on the bs graph. */ char node_name[32]; @@ -1103,7 +1115,19 @@ struct BlockDriverState { * parent node of this node. */ BlockDriverState *inherits_from; + + /* + * @backing and @file are some of @children or NULL. All these three fields + * (@file, @backing and @children) are modified only in + * bdrv_child_cb_attach() and bdrv_child_cb_detach(). + * + * See also comment in include/block/block.h, to learn how backing and file + * are connected with BdrvChildRole. + */ QLIST_HEAD(, BdrvChild) children; + BdrvChild *backing; + BdrvChild *file; + QLIST_HEAD(, BdrvChild) parents; QDict *options; @@ -1230,7 +1254,7 @@ static inline BlockDriverState *child_bs(BdrvChild *child) } int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp); -int get_tmp_filename(char *filename, int size); +char *create_tmp_file(Error **errp); void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, QDict *options); diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index c493510ee9..6feaa40ca7 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -92,6 +92,7 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb); bool qemu_ram_is_migratable(RAMBlock *rb); void qemu_ram_set_migratable(RAMBlock *rb); void qemu_ram_unset_migratable(RAMBlock *rb); +int qemu_ram_get_fd(RAMBlock *rb); size_t qemu_ram_pagesize(RAMBlock *block); size_t qemu_ram_pagesize_largest(void); diff --git a/include/hw/boards.h b/include/hw/boards.h index 311ed17e18..90f1dd3aeb 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -231,7 +231,7 @@ struct MachineClass { const char *deprecation_reason; void (*init)(MachineState *state); - void (*reset)(MachineState *state); + void (*reset)(MachineState *state, ShutdownCause reason); void (*wakeup)(MachineState *state); int (*kvm_type)(MachineState *machine, const char *arg); diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index f9b58773f7..8830546121 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -333,6 +333,7 @@ struct CPUState { struct QemuThread *thread; #ifdef _WIN32 HANDLE hThread; + QemuSemaphore sem; #endif int thread_id; bool running, has_waiter; diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h index 10c6dd535f..f8c86e09ec 100644 --- a/include/hw/ppc/ppc4xx.h +++ b/include/hw/ppc/ppc4xx.h @@ -29,18 +29,6 @@ #include "exec/memory.h" #include "hw/sysbus.h" -typedef struct { - MemoryRegion ram; - MemoryRegion container; /* used for clipping */ - hwaddr base; - hwaddr size; - uint32_t bcr; -} Ppc4xxSdramBank; - -void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks, - Ppc4xxSdramBank ram_banks[], - const ram_addr_t sdram_bank_sizes[]); - #define TYPE_PPC4xx_PCI_HOST_BRIDGE "ppc4xx-pcihost" /* @@ -111,6 +99,14 @@ struct Ppc4xxEbcState { }; /* SDRAM DDR controller */ +typedef struct { + MemoryRegion ram; + MemoryRegion container; /* used for clipping */ + hwaddr base; + hwaddr size; + uint32_t bcr; +} Ppc4xxSdramBank; + #define SDR0_DDR0_DDRM_ENCODE(n) ((((unsigned long)(n)) & 0x03) << 29) #define SDR0_DDR0_DDRM_DDR1 0x20000000 #define SDR0_DDR0_DDRM_DDR2 0x40000000 diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index d311c57cca..7f589b4146 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -19,6 +19,7 @@ #include "hw/block/block.h" #include "sysemu/iothread.h" #include "sysemu/block-backend.h" +#include "sysemu/block-ram-registrar.h" #include "qom/object.h" #define TYPE_VIRTIO_BLK "virtio-blk-device" @@ -64,6 +65,7 @@ struct VirtIOBlock { struct VirtIOBlockDataPlane *dataplane; uint64_t host_features; size_t config_size; + BlockRAMRegistrar blk_ram_registrar; }; typedef struct VirtIOBlockReq { diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index a9cf064ee8..dfbc0c9a2f 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -15,6 +15,7 @@ #define HMP_H #include "qemu/readline.h" +#include "qemu/coroutine.h" #include "qapi/qapi-types-common.h" bool hmp_handle_error(Monitor *mon, Error *err); @@ -81,7 +82,7 @@ void hmp_netdev_del(Monitor *mon, const QDict *qdict); void hmp_getfd(Monitor *mon, const QDict *qdict); void hmp_closefd(Monitor *mon, const QDict *qdict); void hmp_sendkey(Monitor *mon, const QDict *qdict); -void hmp_screendump(Monitor *mon, const QDict *qdict); +void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict); void hmp_chardev_add(Monitor *mon, const QDict *qdict); void hmp_chardev_change(Monitor *mon, const QDict *qdict); void hmp_chardev_remove(Monitor *mon, const QDict *qdict); diff --git a/include/net/net.h b/include/net/net.h index 81d0b21def..3db75ff841 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -177,6 +177,7 @@ ssize_t qemu_send_packet_async(NetClientState *nc, const uint8_t *buf, void qemu_purge_queued_packets(NetClientState *nc); void qemu_flush_queued_packets(NetClientState *nc); void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge); +void qemu_set_info_str(NetClientState *nc, const char *fmt, ...); void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]); bool qemu_has_ufo(NetClientState *nc); bool qemu_has_vnet_hdr(NetClientState *nc); @@ -220,9 +221,11 @@ extern NICInfo nd_table[MAX_NICS]; extern const char *host_net_devices[]; /* from net.c */ -int net_client_parse(QemuOptsList *opts_list, const char *str); +bool netdev_is_modern(const char *optarg); +void netdev_parse_modern(const char *optarg); +void net_client_parse(QemuOptsList *opts_list, const char *str); void show_netdevs(void); -int net_init_clients(Error **errp); +void net_init_clients(void); void net_check_clients(void); void net_cleanup(void); void hmp_host_net_add(Monitor *mon, const QDict *qdict); diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h index 596fce0c54..87ca83b155 100644 --- a/include/qapi/qmp/qerror.h +++ b/include/qapi/qmp/qerror.h @@ -50,9 +50,6 @@ #define QERR_MISSING_PARAMETER \ "Parameter '%s' is missing" -#define QERR_PERMISSION_DENIED \ - "Insufficient permission to perform this operation" - #define QERR_PROPERTY_VALUE_BAD \ "Property '%s.%s' doesn't take value '%s'" diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index aae33cce17..89650a2d7f 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -198,14 +198,25 @@ typedef struct CoQueue { */ void qemu_co_queue_init(CoQueue *queue); +typedef enum { + /* + * Enqueue at front instead of back. Use this to re-queue a request when + * its wait condition is not satisfied after being woken up. + */ + CO_QUEUE_WAIT_FRONT = 0x1, +} CoQueueWaitFlags; + /** * Adds the current coroutine to the CoQueue and transfers control to the * caller of the coroutine. The mutex is unlocked during the wait and * locked again afterwards. */ #define qemu_co_queue_wait(queue, lock) \ - qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock)) -void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock); + qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), 0) +#define qemu_co_queue_wait_flags(queue, lock, flags) \ + qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), (flags)) +void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock, + CoQueueWaitFlags flags); /** * Removes the next coroutine from the CoQueue, and queue it to run after @@ -276,7 +287,7 @@ void qemu_co_rwlock_init(CoRwlock *lock); * of a parallel writer, control is transferred to the caller of the current * coroutine. */ -void qemu_co_rwlock_rdlock(CoRwlock *lock); +void coroutine_fn qemu_co_rwlock_rdlock(CoRwlock *lock); /** * Write Locks the CoRwlock from a reader. This is a bit more efficient than @@ -285,7 +296,7 @@ void qemu_co_rwlock_rdlock(CoRwlock *lock); * to the caller of the current coroutine; another writer might run while * @qemu_co_rwlock_upgrade blocks. */ -void qemu_co_rwlock_upgrade(CoRwlock *lock); +void coroutine_fn qemu_co_rwlock_upgrade(CoRwlock *lock); /** * Downgrades a write-side critical section to a reader. Downgrading with @@ -293,20 +304,20 @@ void qemu_co_rwlock_upgrade(CoRwlock *lock); * followed by @qemu_co_rwlock_rdlock. This makes it more efficient, but * may also sometimes be necessary for correctness. */ -void qemu_co_rwlock_downgrade(CoRwlock *lock); +void coroutine_fn qemu_co_rwlock_downgrade(CoRwlock *lock); /** * Write Locks the mutex. If the lock cannot be taken immediately because * of a parallel reader, control is transferred to the caller of the current * coroutine. */ -void qemu_co_rwlock_wrlock(CoRwlock *lock); +void coroutine_fn qemu_co_rwlock_wrlock(CoRwlock *lock); /** * Unlocks the read/write lock and schedules the next coroutine that was * waiting for this lock to be run. */ -void qemu_co_rwlock_unlock(CoRwlock *lock); +void coroutine_fn qemu_co_rwlock_unlock(CoRwlock *lock); typedef struct QemuCoSleep { Coroutine *to_wake; @@ -378,8 +389,9 @@ void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); * The same interface as qemu_sendv_recvv(), with added yielding. * XXX should mark these as coroutine_fn */ -ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, - size_t offset, size_t bytes, bool do_send); +ssize_t coroutine_fn qemu_co_sendv_recvv(int sockfd, struct iovec *iov, + unsigned iov_cnt, size_t offset, + size_t bytes, bool do_send); #define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \ qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false) #define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \ @@ -388,7 +400,8 @@ ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, /** * The same as above, but with just a single buffer */ -ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send); +ssize_t coroutine_fn qemu_co_send_recv(int sockfd, void *buf, size_t bytes, + bool do_send); #define qemu_co_recv(sockfd, buf, bytes) \ qemu_co_send_recv(sockfd, buf, bytes, false) #define qemu_co_send(sockfd, buf, bytes) \ diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 2276094729..b9c4307779 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -576,8 +576,23 @@ unsigned long qemu_getauxval(unsigned long type); void qemu_set_tty_echo(int fd, bool echo); -void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, - Error **errp); +typedef struct ThreadContext ThreadContext; + +/** + * qemu_prealloc_mem: + * @fd: the fd mapped into the area, -1 for anonymous memory + * @area: start address of the are to preallocate + * @sz: the size of the area to preallocate + * @max_threads: maximum number of threads to use + * @errp: returns an error if this function fails + * + * Preallocate memory (populate/prefault page tables writable) for the virtual + * memory area starting at @area with the size of @sz. After a successful call, + * each page in the area was faulted in writable at least once, for example, + * after allocating file blocks for mapped files. + */ +void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, + ThreadContext *tc, Error **errp); /** * qemu_get_pid_name: diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 036745e586..2b0698a7c9 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -33,6 +33,19 @@ int qemu_socketpair(int domain, int type, int protocol, int sv[2]); #endif int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); +/* + * A variant of send(2) which handles partial send. + * + * Return the number of bytes transferred over the socket. + * Set errno if fewer than `count' bytes are sent. + * + * This function don't work with non-blocking socket's. + * Any of the possibilities with non-blocking socket's is bad: + * - return a short write (then name is wrong) + * - busy wait adding (errno == EAGAIN) to the loop + */ +ssize_t qemu_send_full(int s, const void *buf, size_t count) + G_GNUC_WARN_UNUSED_RESULT; int socket_set_cork(int fd, int v); int socket_set_nodelay(int fd); void qemu_socket_set_block(int fd); @@ -58,6 +71,7 @@ NetworkAddressFamily inet_netfamily(int family); int unix_listen(const char *path, Error **errp); int unix_connect(const char *path, Error **errp); +char *socket_uri(SocketAddress *addr); SocketAddress *socket_parse(const char *str, Error **errp); int socket_connect(SocketAddress *addr, Error **errp); int socket_listen(SocketAddress *addr, int num, Error **errp); @@ -65,6 +79,8 @@ void socket_listen_cleanup(int fd, Error **errp); int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp); /* Old, ipv4 only bits. Don't use for new code. */ +int convert_host_port(struct sockaddr_in *saddr, const char *host, + const char *port, Error **errp); int parse_host_port(struct sockaddr_in *saddr, const char *str, Error **errp); int socket_init(void); @@ -139,5 +155,4 @@ SocketAddress *socket_address_flatten(SocketAddressLegacy *addr); * Return 0 on success. */ int socket_address_parse_named_fd(SocketAddress *addr, Error **errp); - #endif /* QEMU_SOCKETS_H */ diff --git a/include/qemu/thread-context.h b/include/qemu/thread-context.h new file mode 100644 index 0000000000..2ebd6b7fe1 --- /dev/null +++ b/include/qemu/thread-context.h @@ -0,0 +1,57 @@ +/* + * QEMU Thread Context + * + * Copyright Red Hat Inc., 2022 + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSEMU_THREAD_CONTEXT_H +#define SYSEMU_THREAD_CONTEXT_H + +#include "qapi/qapi-types-machine.h" +#include "qemu/thread.h" +#include "qom/object.h" + +#define TYPE_THREAD_CONTEXT "thread-context" +OBJECT_DECLARE_TYPE(ThreadContext, ThreadContextClass, + THREAD_CONTEXT) + +struct ThreadContextClass { + ObjectClass parent_class; +}; + +struct ThreadContext { + /* private */ + Object parent; + + /* private */ + unsigned int thread_id; + QemuThread thread; + + /* Semaphore to wait for context thread action. */ + QemuSemaphore sem; + /* Semaphore to wait for action in context thread. */ + QemuSemaphore sem_thread; + /* Mutex to synchronize requests. */ + QemuMutex mutex; + + /* Commands for the thread to execute. */ + int thread_cmd; + void *thread_cmd_data; + + /* CPU affinity bitmap used for initialization. */ + unsigned long *init_cpu_bitmap; + int init_cpu_nbits; +}; + +void thread_context_create_thread(ThreadContext *tc, QemuThread *thread, + const char *name, + void *(*start_routine)(void *), void *arg, + int mode); + +#endif /* SYSEMU_THREAD_CONTEXT_H */ diff --git a/include/qemu/thread.h b/include/qemu/thread.h index 20641e5844..7c6703bce3 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -185,6 +185,10 @@ void qemu_event_destroy(QemuEvent *ev); void qemu_thread_create(QemuThread *thread, const char *name, void *(*start_routine)(void *), void *arg, int mode); +int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, + unsigned long nbits); +int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, + unsigned long *nbits); void *qemu_thread_join(QemuThread *thread); void qemu_thread_get_self(QemuThread *thread); bool qemu_thread_is_self(QemuThread *thread); diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index 415f0c91d7..6858e39cb6 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -106,8 +106,8 @@ void blk_io_limits_enable(BlockBackend *blk, const char *group); void blk_io_limits_update_group(BlockBackend *blk, const char *group); void blk_set_force_allow_inactivate(BlockBackend *blk); -void blk_register_buf(BlockBackend *blk, void *host, size_t size); -void blk_unregister_buf(BlockBackend *blk, void *host); +bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp); +void blk_unregister_buf(BlockBackend *blk, void *host, size_t size); const BdrvChild *blk_root(BlockBackend *blk); diff --git a/include/sysemu/block-ram-registrar.h b/include/sysemu/block-ram-registrar.h new file mode 100644 index 0000000000..d8b2f7942b --- /dev/null +++ b/include/sysemu/block-ram-registrar.h @@ -0,0 +1,37 @@ +/* + * BlockBackend RAM Registrar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef BLOCK_RAM_REGISTRAR_H +#define BLOCK_RAM_REGISTRAR_H + +#include "exec/ramlist.h" + +/** + * struct BlockRAMRegistrar: + * + * Keeps RAMBlock memory registered with a BlockBackend using + * blk_register_buf() including hotplugged memory. + * + * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar + * with blk_ram_registrar_init() before submitting I/O requests with the + * BDRV_REQ_REGISTERED_BUF flag set. + */ +typedef struct { + BlockBackend *blk; + RAMBlockNotifier notifier; + bool ok; +} BlockRAMRegistrar; + +void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk); +void blk_ram_registrar_destroy(BlockRAMRegistrar *r); + +/* Have all RAMBlocks been registered successfully? */ +static inline bool blk_ram_registrar_ok(BlockRAMRegistrar *r) +{ + return r->ok; +} + +#endif /* BLOCK_RAM_REGISTRAR_H */ diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h index e7c5441f56..ca5339beae 100644 --- a/include/sysemu/device_tree.h +++ b/include/sysemu/device_tree.h @@ -197,6 +197,15 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt, qdt_tmp); \ }) + +/** + * qemu_fdt_randomize_seeds: + * @fdt: device tree blob + * + * Re-randomize all "rng-seed" properties with new seeds. + */ +void qemu_fdt_randomize_seeds(void *fdt); + #define FDT_PCI_RANGE_RELOCATABLE 0x80000000 #define FDT_PCI_RANGE_PREFETCHABLE 0x40000000 #define FDT_PCI_RANGE_ALIASED 0x20000000 diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 9ff5c16963..39326f1d4f 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -18,6 +18,7 @@ #include "qom/object.h" #include "exec/memory.h" #include "qemu/bitmap.h" +#include "qemu/thread-context.h" #define TYPE_MEMORY_BACKEND "memory-backend" OBJECT_DECLARE_TYPE(HostMemoryBackend, HostMemoryBackendClass, @@ -66,6 +67,7 @@ struct HostMemoryBackend { bool merge, dump, use_canonical_path; bool prealloc, is_mapped, share, reserve; uint32_t prealloc_threads; + ThreadContext *prealloc_context; DECLARE_BITMAP(host_nodes, MAX_NODES + 1); HostMemPolicy policy; diff --git a/include/sysemu/reset.h b/include/sysemu/reset.h index 0b0d6d7598..609e4d50c2 100644 --- a/include/sysemu/reset.h +++ b/include/sysemu/reset.h @@ -1,10 +1,13 @@ #ifndef QEMU_SYSEMU_RESET_H #define QEMU_SYSEMU_RESET_H +#include "qapi/qapi-events-run-state.h" + typedef void QEMUResetHandler(void *opaque); void qemu_register_reset(QEMUResetHandler *func, void *opaque); +void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque); void qemu_unregister_reset(QEMUResetHandler *func, void *opaque); -void qemu_devices_reset(void); +void qemu_devices_reset(ShutdownCause reason); #endif diff --git a/io/channel-watch.c b/io/channel-watch.c index 0289b3647c..ad7c568a84 100644 --- a/io/channel-watch.c +++ b/io/channel-watch.c @@ -115,28 +115,24 @@ static gboolean qio_channel_socket_source_check(GSource *source) { static struct timeval tv0; - QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; - WSANETWORKEVENTS ev; fd_set rfds, wfds, xfds; if (!ssource->condition) { return 0; } - WSAEnumNetworkEvents(ssource->socket, ssource->ioc->event, &ev); - FD_ZERO(&rfds); FD_ZERO(&wfds); FD_ZERO(&xfds); if (ssource->condition & G_IO_IN) { - FD_SET((SOCKET)ssource->socket, &rfds); + FD_SET(ssource->socket, &rfds); } if (ssource->condition & G_IO_OUT) { - FD_SET((SOCKET)ssource->socket, &wfds); + FD_SET(ssource->socket, &wfds); } if (ssource->condition & G_IO_PRI) { - FD_SET((SOCKET)ssource->socket, &xfds); + FD_SET(ssource->socket, &xfds); } ssource->revents = 0; if (select(0, &rfds, &wfds, &xfds, &tv0) == 0) { @@ -285,11 +281,9 @@ GSource *qio_channel_create_socket_watch(QIOChannel *ioc, GSource *source; QIOChannelSocketSource *ssource; -#ifdef WIN32 WSAEventSelect(socket, ioc->event, FD_READ | FD_ACCEPT | FD_CLOSE | FD_CONNECT | FD_WRITE | FD_OOB); -#endif source = g_source_new(&qio_channel_socket_source_funcs, sizeof(QIOChannelSocketSource)); @@ -588,7 +588,7 @@ static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns) next_aio_context = job->aio_context; /* * Coroutine has resumed, but in the meanwhile the job AioContext - * might have changed via bdrv_try_set_aio_context(), so we need to move + * might have changed via bdrv_try_change_aio_context(), so we need to move * the coroutine too in the new aiocontext. */ while (qemu_get_current_aio_context() != next_aio_context) { diff --git a/meson.build b/meson.build index b686dfef75..15dff0a8d3 100644 --- a/meson.build +++ b/meson.build @@ -75,7 +75,7 @@ have_tools = get_option('tools') \ .allowed() have_ga = get_option('guest_agent') \ .disable_auto_if(not have_system and not have_tools) \ - .require(targetos in ['sunos', 'linux', 'windows'], + .require(targetos in ['sunos', 'linux', 'windows', 'freebsd'], error_message: 'unsupported OS for QEMU guest agent') \ .allowed() have_block = have_system or have_tools @@ -636,10 +636,16 @@ if vmnet.found() and not cc.has_header_symbol('vmnet/vmnet.h', endif seccomp = not_found +seccomp_has_sysrawrc = false if not get_option('seccomp').auto() or have_system or have_tools seccomp = dependency('libseccomp', version: '>=2.3.0', required: get_option('seccomp'), method: 'pkg-config', kwargs: static_kwargs) + if seccomp.found() + seccomp_has_sysrawrc = cc.has_header_symbol('seccomp.h', + 'SCMP_FLTATR_API_SYSRAWRC', + dependencies: seccomp) + endif endif libcap_ng = not_found @@ -771,6 +777,13 @@ if not get_option('virglrenderer').auto() or have_system or have_vhost_user_gpu required: get_option('virglrenderer'), kwargs: static_kwargs) endif +blkio = not_found +if not get_option('blkio').auto() or have_block + blkio = dependency('blkio', + method: 'pkg-config', + required: get_option('blkio'), + kwargs: static_kwargs) +endif curl = not_found if not get_option('curl').auto() or have_block curl = dependency('libcurl', version: '>=7.29.0', @@ -1815,6 +1828,7 @@ config_host_data.set('CONFIG_LIBUDEV', libudev.found()) config_host_data.set('CONFIG_LZO', lzo.found()) config_host_data.set('CONFIG_MPATH', mpathpersist.found()) config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) +config_host_data.set('CONFIG_BLKIO', blkio.found()) config_host_data.set('CONFIG_CURL', curl.found()) config_host_data.set('CONFIG_CURSES', curses.found()) config_host_data.set('CONFIG_GBM', gbm.found()) @@ -1839,7 +1853,6 @@ config_host_data.set('CONFIG_LIBNFS', libnfs.found()) config_host_data.set('CONFIG_LIBSSH', libssh.found()) config_host_data.set('CONFIG_LINUX_AIO', libaio.found()) config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found()) -config_host_data.set('CONFIG_LIBURING_REGISTER_RING_FD', cc.has_function('io_uring_register_ring_fd', prefix: '#include <liburing.h>', dependencies:linux_io_uring)) config_host_data.set('CONFIG_LIBPMEM', libpmem.found()) config_host_data.set('CONFIG_NUMA', numa.found()) config_host_data.set('CONFIG_OPENGL', opengl.found()) @@ -1849,6 +1862,9 @@ config_host_data.set('CONFIG_RDMA', rdma.found()) config_host_data.set('CONFIG_SDL', sdl.found()) config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found()) config_host_data.set('CONFIG_SECCOMP', seccomp.found()) +if seccomp.found() + config_host_data.set('CONFIG_SECCOMP_SYSRAWRC', seccomp_has_sysrawrc) +endif config_host_data.set('CONFIG_SNAPPY', snappy.found()) config_host_data.set('CONFIG_TPM', have_tpm) config_host_data.set('CONFIG_USB_LIBUSB', libusb.found()) @@ -2114,7 +2130,23 @@ config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_pre pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); return 0; }''', dependencies: threads)) +config_host_data.set('CONFIG_PTHREAD_AFFINITY_NP', cc.links(gnu_source_prefix + ''' + #include <pthread.h> + static void *f(void *p) { return NULL; } + int main(void) + { + int setsize = CPU_ALLOC_SIZE(64); + pthread_t thread; + cpu_set_t *cpuset; + pthread_create(&thread, 0, f, 0); + cpuset = CPU_ALLOC(64); + CPU_ZERO_S(setsize, cpuset); + pthread_setaffinity_np(thread, setsize, cpuset); + pthread_getaffinity_np(thread, setsize, cpuset); + CPU_FREE(cpuset); + return 0; + }''', dependencies: threads)) config_host_data.set('CONFIG_SIGNALFD', cc.links(gnu_source_prefix + ''' #include <sys/signalfd.h> #include <stddef.h> @@ -3869,6 +3901,7 @@ summary_info += {'PAM': pam} summary_info += {'iconv support': iconv} summary_info += {'curses support': curses} summary_info += {'virgl support': virgl} +summary_info += {'blkio support': blkio} summary_info += {'curl support': curl} summary_info += {'Multipath support': mpathpersist} summary_info += {'PNG support': png} diff --git a/meson_options.txt b/meson_options.txt index 79c6af18d5..66128178bf 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -117,6 +117,8 @@ option('bzip2', type : 'feature', value : 'auto', description: 'bzip2 support for DMG images') option('cap_ng', type : 'feature', value : 'auto', description: 'cap_ng support') +option('blkio', type : 'feature', value : 'auto', + description: 'libblkio block device driver') option('bpf', type : 'feature', value : 'auto', description: 'eBPF support') option('cocoa', type : 'feature', value : 'auto', diff --git a/migration/savevm.c b/migration/savevm.c index 48e85c052c..a0cdb714f7 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -3058,7 +3058,7 @@ bool load_snapshot(const char *name, const char *vmstate, goto err_drain; } - qemu_system_reset(SHUTDOWN_CAUSE_NONE); + qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD); mis->from_src_file = f; if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index bab86c5537..01b789a79e 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -199,27 +199,6 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict) qapi_free_MouseInfoList(mice_list); } -static char *SocketAddress_to_str(SocketAddress *addr) -{ - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: - return g_strdup_printf("tcp:%s:%s", - addr->u.inet.host, - addr->u.inet.port); - case SOCKET_ADDRESS_TYPE_UNIX: - return g_strdup_printf("unix:%s", - addr->u.q_unix.path); - case SOCKET_ADDRESS_TYPE_FD: - return g_strdup_printf("fd:%s", addr->u.fd.str); - case SOCKET_ADDRESS_TYPE_VSOCK: - return g_strdup_printf("tcp:%s:%s", - addr->u.vsock.cid, - addr->u.vsock.port); - default: - return g_strdup("unknown address type"); - } -} - void hmp_info_migrate(Monitor *mon, const QDict *qdict) { MigrationInfo *info; @@ -382,7 +361,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "socket address: [\n"); for (addr = info->socket_address; addr; addr = addr->next) { - char *s = SocketAddress_to_str(addr->value); + char *s = socket_uri(addr->value); monitor_printf(mon, "\t%s\n", s); g_free(s); } diff --git a/net/clients.h b/net/clients.h index c9157789f2..ed8bdfff1e 100644 --- a/net/clients.h +++ b/net/clients.h @@ -40,6 +40,12 @@ int net_init_hubport(const Netdev *netdev, const char *name, int net_init_socket(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); +int net_init_stream(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); + +int net_init_dgram(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); + int net_init_tap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); diff --git a/net/dgram.c b/net/dgram.c new file mode 100644 index 0000000000..9f7bf38376 --- /dev/null +++ b/net/dgram.c @@ -0,0 +1,623 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2022 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" +#include "qemu/cutils.h" + +typedef struct NetDgramState { + NetClientState nc; + int fd; + SocketReadState rs; + bool read_poll; /* waiting to receive data? */ + bool write_poll; /* waiting to transmit data? */ + /* contains destination iff connectionless */ + struct sockaddr *dest_addr; + socklen_t dest_len; +} NetDgramState; + +static void net_dgram_send(void *opaque); +static void net_dgram_writable(void *opaque); + +static void net_dgram_update_fd_handler(NetDgramState *s) +{ + qemu_set_fd_handler(s->fd, + s->read_poll ? net_dgram_send : NULL, + s->write_poll ? net_dgram_writable : NULL, + s); +} + +static void net_dgram_read_poll(NetDgramState *s, bool enable) +{ + s->read_poll = enable; + net_dgram_update_fd_handler(s); +} + +static void net_dgram_write_poll(NetDgramState *s, bool enable) +{ + s->write_poll = enable; + net_dgram_update_fd_handler(s); +} + +static void net_dgram_writable(void *opaque) +{ + NetDgramState *s = opaque; + + net_dgram_write_poll(s, false); + + qemu_flush_queued_packets(&s->nc); +} + +static ssize_t net_dgram_receive(NetClientState *nc, + const uint8_t *buf, size_t size) +{ + NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc); + ssize_t ret; + + do { + if (s->dest_addr) { + ret = sendto(s->fd, buf, size, 0, s->dest_addr, s->dest_len); + } else { + ret = send(s->fd, buf, size, 0); + } + } while (ret == -1 && errno == EINTR); + + if (ret == -1 && errno == EAGAIN) { + net_dgram_write_poll(s, true); + return 0; + } + return ret; +} + +static void net_dgram_send_completed(NetClientState *nc, ssize_t len) +{ + NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc); + + if (!s->read_poll) { + net_dgram_read_poll(s, true); + } +} + +static void net_dgram_rs_finalize(SocketReadState *rs) +{ + NetDgramState *s = container_of(rs, NetDgramState, rs); + + if (qemu_send_packet_async(&s->nc, rs->buf, + rs->packet_len, + net_dgram_send_completed) == 0) { + net_dgram_read_poll(s, false); + } +} + +static void net_dgram_send(void *opaque) +{ + NetDgramState *s = opaque; + int size; + + size = recv(s->fd, s->rs.buf, sizeof(s->rs.buf), 0); + if (size < 0) { + return; + } + if (size == 0) { + /* end of connection */ + net_dgram_read_poll(s, false); + net_dgram_write_poll(s, false); + return; + } + if (qemu_send_packet_async(&s->nc, s->rs.buf, size, + net_dgram_send_completed) == 0) { + net_dgram_read_poll(s, false); + } +} + +static int net_dgram_mcast_create(struct sockaddr_in *mcastaddr, + struct in_addr *localaddr, + Error **errp) +{ + struct ip_mreq imr; + int fd; + int val, ret; +#ifdef __OpenBSD__ + unsigned char loop; +#else + int loop; +#endif + + if (!IN_MULTICAST(ntohl(mcastaddr->sin_addr.s_addr))) { + error_setg(errp, "specified mcastaddr %s (0x%08x) " + "does not contain a multicast address", + inet_ntoa(mcastaddr->sin_addr), + (int)ntohl(mcastaddr->sin_addr.s_addr)); + return -1; + } + + fd = qemu_socket(PF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create datagram socket"); + return -1; + } + + /* + * Allow multiple sockets to bind the same multicast ip and port by setting + * SO_REUSEADDR. This is the only situation where SO_REUSEADDR should be set + * on windows. Use socket_set_fast_reuse otherwise as it sets SO_REUSEADDR + * only on posix systems. + */ + val = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't set socket option SO_REUSEADDR"); + goto fail; + } + + ret = bind(fd, (struct sockaddr *)mcastaddr, sizeof(*mcastaddr)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't bind ip=%s to socket", + inet_ntoa(mcastaddr->sin_addr)); + goto fail; + } + + /* Add host to multicast group */ + imr.imr_multiaddr = mcastaddr->sin_addr; + if (localaddr) { + imr.imr_interface = *localaddr; + } else { + imr.imr_interface.s_addr = htonl(INADDR_ANY); + } + + ret = setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't add socket to multicast group %s", + inet_ntoa(imr.imr_multiaddr)); + goto fail; + } + + /* Force mcast msgs to loopback (eg. several QEMUs in same host */ + loop = 1; + ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_LOOP, + &loop, sizeof(loop)); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't force multicast message to loopback"); + goto fail; + } + + /* If a bind address is given, only send packets from that address */ + if (localaddr != NULL) { + ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF, + localaddr, sizeof(*localaddr)); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't set the default network send interface"); + goto fail; + } + } + + qemu_socket_set_nonblock(fd); + return fd; +fail: + if (fd >= 0) { + closesocket(fd); + } + return -1; +} + +static void net_dgram_cleanup(NetClientState *nc) +{ + NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc); + if (s->fd != -1) { + net_dgram_read_poll(s, false); + net_dgram_write_poll(s, false); + close(s->fd); + s->fd = -1; + } + g_free(s->dest_addr); + s->dest_addr = NULL; + s->dest_len = 0; +} + +static NetClientInfo net_dgram_socket_info = { + .type = NET_CLIENT_DRIVER_DGRAM, + .size = sizeof(NetDgramState), + .receive = net_dgram_receive, + .cleanup = net_dgram_cleanup, +}; + +static NetDgramState *net_dgram_fd_init(NetClientState *peer, + const char *model, + const char *name, + int fd, + Error **errp) +{ + NetClientState *nc; + NetDgramState *s; + + nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name); + + s = DO_UPCAST(NetDgramState, nc, nc); + + s->fd = fd; + net_socket_rs_init(&s->rs, net_dgram_rs_finalize, false); + net_dgram_read_poll(s, true); + + return s; +} + +static int net_dgram_mcast_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *remote, + SocketAddress *local, + Error **errp) +{ + NetDgramState *s; + int fd, ret; + struct sockaddr_in *saddr; + + if (remote->type != SOCKET_ADDRESS_TYPE_INET) { + error_setg(errp, "multicast only support inet type"); + return -1; + } + + saddr = g_new(struct sockaddr_in, 1); + if (convert_host_port(saddr, remote->u.inet.host, remote->u.inet.port, + errp) < 0) { + g_free(saddr); + return -1; + } + + if (!local) { + fd = net_dgram_mcast_create(saddr, NULL, errp); + if (fd < 0) { + g_free(saddr); + return -1; + } + } else { + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: { + struct in_addr localaddr; + + if (inet_aton(local->u.inet.host, &localaddr) == 0) { + g_free(saddr); + error_setg(errp, "localaddr '%s' is not a valid IPv4 address", + local->u.inet.host); + return -1; + } + + fd = net_dgram_mcast_create(saddr, &localaddr, errp); + if (fd < 0) { + g_free(saddr); + return -1; + } + break; + } + case SOCKET_ADDRESS_TYPE_FD: { + int newfd; + + fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); + if (fd == -1) { + g_free(saddr); + return -1; + } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + g_free(saddr); + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } + + /* + * fd passed: multicast: "learn" dest_addr address from bound + * address and save it. Because this may be "shared" socket from a + * "master" process, datagrams would be recv() by ONLY ONE process: + * we must "clone" this dgram socket --jjo + */ + + saddr = g_new(struct sockaddr_in, 1); + + if (convert_host_port(saddr, local->u.inet.host, local->u.inet.port, + errp) < 0) { + g_free(saddr); + closesocket(fd); + return -1; + } + + /* must be bound */ + if (saddr->sin_addr.s_addr == 0) { + error_setg(errp, "can't setup multicast destination address"); + g_free(saddr); + closesocket(fd); + return -1; + } + /* clone dgram socket */ + newfd = net_dgram_mcast_create(saddr, NULL, errp); + if (newfd < 0) { + g_free(saddr); + closesocket(fd); + return -1; + } + /* clone newfd to fd, close newfd */ + dup2(newfd, fd); + close(newfd); + break; + } + default: + g_free(saddr); + error_setg(errp, "only support inet or fd type for local"); + return -1; + } + } + + s = net_dgram_fd_init(peer, model, name, fd, errp); + if (!s) { + g_free(saddr); + return -1; + } + + g_assert(s->dest_addr == NULL); + s->dest_addr = (struct sockaddr *)saddr; + s->dest_len = sizeof(*saddr); + + if (!local) { + qemu_set_info_str(&s->nc, "mcast=%s:%d", + inet_ntoa(saddr->sin_addr), + ntohs(saddr->sin_port)); + } else { + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: + qemu_set_info_str(&s->nc, "mcast=%s:%d", + inet_ntoa(saddr->sin_addr), + ntohs(saddr->sin_port)); + break; + case SOCKET_ADDRESS_TYPE_FD: + qemu_set_info_str(&s->nc, "fd=%d (cloned mcast=%s:%d)", + fd, inet_ntoa(saddr->sin_addr), + ntohs(saddr->sin_port)); + break; + default: + g_assert_not_reached(); + } + } + + return 0; + +} + + +int net_init_dgram(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + NetDgramState *s; + int fd, ret; + SocketAddress *remote, *local; + struct sockaddr *dest_addr; + struct sockaddr_in laddr_in, raddr_in; + struct sockaddr_un laddr_un, raddr_un; + socklen_t dest_len; + + assert(netdev->type == NET_CLIENT_DRIVER_DGRAM); + + remote = netdev->u.dgram.remote; + local = netdev->u.dgram.local; + + /* detect multicast address */ + if (remote && remote->type == SOCKET_ADDRESS_TYPE_INET) { + struct sockaddr_in mcastaddr; + + if (convert_host_port(&mcastaddr, remote->u.inet.host, + remote->u.inet.port, errp) < 0) { + return -1; + } + + if (IN_MULTICAST(ntohl(mcastaddr.sin_addr.s_addr))) { + return net_dgram_mcast_init(peer, "dram", name, remote, local, + errp); + } + } + + /* unicast address */ + if (!local) { + error_setg(errp, "dgram requires local= parameter"); + return -1; + } + + if (remote) { + if (local->type == SOCKET_ADDRESS_TYPE_FD) { + error_setg(errp, "don't set remote with local.fd"); + return -1; + } + if (remote->type != local->type) { + error_setg(errp, "remote and local types must be the same"); + return -1; + } + } else { + if (local->type != SOCKET_ADDRESS_TYPE_FD) { + error_setg(errp, + "type=inet or type=unix requires remote parameter"); + return -1; + } + } + + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: + if (convert_host_port(&laddr_in, local->u.inet.host, local->u.inet.port, + errp) < 0) { + return -1; + } + + if (convert_host_port(&raddr_in, remote->u.inet.host, + remote->u.inet.port, errp) < 0) { + return -1; + } + + fd = qemu_socket(PF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create datagram socket"); + return -1; + } + + ret = socket_set_fast_reuse(fd); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't set socket option SO_REUSEADDR"); + closesocket(fd); + return -1; + } + ret = bind(fd, (struct sockaddr *)&laddr_in, sizeof(laddr_in)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't bind ip=%s to socket", + inet_ntoa(laddr_in.sin_addr)); + closesocket(fd); + return -1; + } + qemu_socket_set_nonblock(fd); + + dest_len = sizeof(raddr_in); + dest_addr = g_malloc(dest_len); + memcpy(dest_addr, &raddr_in, dest_len); + break; + case SOCKET_ADDRESS_TYPE_UNIX: + ret = unlink(local->u.q_unix.path); + if (ret < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, "failed to unlink socket %s", + local->u.q_unix.path); + return -1; + } + + laddr_un.sun_family = PF_UNIX; + ret = snprintf(laddr_un.sun_path, sizeof(laddr_un.sun_path), "%s", + local->u.q_unix.path); + if (ret < 0 || ret >= sizeof(laddr_un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", + local->u.q_unix.path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(laddr_un.sun_path)); + } + + raddr_un.sun_family = PF_UNIX; + ret = snprintf(raddr_un.sun_path, sizeof(raddr_un.sun_path), "%s", + remote->u.q_unix.path); + if (ret < 0 || ret >= sizeof(raddr_un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", + remote->u.q_unix.path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(raddr_un.sun_path)); + } + + fd = qemu_socket(PF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create datagram socket"); + return -1; + } + + ret = bind(fd, (struct sockaddr *)&laddr_un, sizeof(laddr_un)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't bind unix=%s to socket", + laddr_un.sun_path); + closesocket(fd); + return -1; + } + qemu_socket_set_nonblock(fd); + + dest_len = sizeof(raddr_un); + dest_addr = g_malloc(dest_len); + memcpy(dest_addr, &raddr_un, dest_len); + break; + case SOCKET_ADDRESS_TYPE_FD: + fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); + if (fd == -1) { + return -1; + } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } + dest_addr = NULL; + dest_len = 0; + break; + default: + error_setg(errp, "only support inet or fd type for local"); + return -1; + } + + s = net_dgram_fd_init(peer, "dgram", name, fd, errp); + if (!s) { + return -1; + } + + if (remote) { + g_assert(s->dest_addr == NULL); + s->dest_addr = dest_addr; + s->dest_len = dest_len; + } + + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: + qemu_set_info_str(&s->nc, "udp=%s:%d/%s:%d", + inet_ntoa(laddr_in.sin_addr), + ntohs(laddr_in.sin_port), + inet_ntoa(raddr_in.sin_addr), + ntohs(raddr_in.sin_port)); + break; + case SOCKET_ADDRESS_TYPE_UNIX: + qemu_set_info_str(&s->nc, "udp=%s:%s", + laddr_un.sun_path, raddr_un.sun_path); + break; + case SOCKET_ADDRESS_TYPE_FD: { + SocketAddress *sa; + SocketAddressType sa_type; + + sa = socket_local_address(fd, errp); + if (sa) { + sa_type = sa->type; + qapi_free_SocketAddress(sa); + + qemu_set_info_str(&s->nc, "fd=%d %s", fd, + SocketAddressType_str(sa_type)); + } else { + qemu_set_info_str(&s->nc, "fd=%d", fd); + } + break; + } + default: + g_assert_not_reached(); + } + + return 0; +} @@ -313,6 +313,8 @@ void net_hub_check_clients(void) case NET_CLIENT_DRIVER_USER: case NET_CLIENT_DRIVER_TAP: case NET_CLIENT_DRIVER_SOCKET: + case NET_CLIENT_DRIVER_STREAM: + case NET_CLIENT_DRIVER_DGRAM: case NET_CLIENT_DRIVER_VDE: case NET_CLIENT_DRIVER_VHOST_USER: has_host_dev = 1; diff --git a/net/l2tpv3.c b/net/l2tpv3.c index af373e5c30..350041a0d6 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -723,8 +723,7 @@ int net_init_l2tpv3(const Netdev *netdev, l2tpv3_read_poll(s, true); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "l2tpv3: connected"); + qemu_set_info_str(&s->nc, "l2tpv3: connected"); return 0; outerr: qemu_del_net_client(nc); diff --git a/net/meson.build b/net/meson.build index d1be76daf3..6cd1e3dab3 100644 --- a/net/meson.build +++ b/net/meson.build @@ -13,6 +13,8 @@ softmmu_ss.add(files( 'net.c', 'queue.c', 'socket.c', + 'stream.c', + 'dgram.c', 'util.c', )) @@ -48,12 +48,14 @@ #include "qemu/qemu-print.h" #include "qemu/main-loop.h" #include "qemu/option.h" +#include "qemu/keyval.h" #include "qapi/error.h" #include "qapi/opts-visitor.h" #include "sysemu/runstate.h" #include "net/colo-compare.h" #include "net/filter.h" #include "qapi/string-output-visitor.h" +#include "qapi/qobject-input-visitor.h" /* Net bridge is currently not supported for W32. */ #if !defined(_WIN32) @@ -63,58 +65,70 @@ static VMChangeStateEntry *net_change_state_entry; static QTAILQ_HEAD(, NetClientState) net_clients; +typedef struct NetdevQueueEntry { + Netdev *nd; + Location loc; + QSIMPLEQ_ENTRY(NetdevQueueEntry) entry; +} NetdevQueueEntry; + +typedef QSIMPLEQ_HEAD(, NetdevQueueEntry) NetdevQueue; + +static NetdevQueue nd_queue = QSIMPLEQ_HEAD_INITIALIZER(nd_queue); + /***********************************************************/ /* network device redirectors */ -int parse_host_port(struct sockaddr_in *saddr, const char *str, - Error **errp) +int convert_host_port(struct sockaddr_in *saddr, const char *host, + const char *port, Error **errp) { - gchar **substrings; struct hostent *he; - const char *addr, *p, *r; - int port, ret = 0; + const char *r; + long p; memset(saddr, 0, sizeof(*saddr)); - substrings = g_strsplit(str, ":", 2); - if (!substrings || !substrings[0] || !substrings[1]) { - error_setg(errp, "host address '%s' doesn't contain ':' " - "separating host from port", str); - ret = -1; - goto out; - } - - addr = substrings[0]; - p = substrings[1]; - saddr->sin_family = AF_INET; - if (addr[0] == '\0') { + if (host[0] == '\0') { saddr->sin_addr.s_addr = 0; } else { - if (qemu_isdigit(addr[0])) { - if (!inet_aton(addr, &saddr->sin_addr)) { + if (qemu_isdigit(host[0])) { + if (!inet_aton(host, &saddr->sin_addr)) { error_setg(errp, "host address '%s' is not a valid " - "IPv4 address", addr); - ret = -1; - goto out; + "IPv4 address", host); + return -1; } } else { - he = gethostbyname(addr); + he = gethostbyname(host); if (he == NULL) { - error_setg(errp, "can't resolve host address '%s'", addr); - ret = -1; - goto out; + error_setg(errp, "can't resolve host address '%s'", host); + return -1; } saddr->sin_addr = *(struct in_addr *)he->h_addr; } } - port = strtol(p, (char **)&r, 0); - if (r == p) { - error_setg(errp, "port number '%s' is invalid", p); + if (qemu_strtol(port, &r, 0, &p) != 0) { + error_setg(errp, "port number '%s' is invalid", port); + return -1; + } + saddr->sin_port = htons(p); + return 0; +} + +int parse_host_port(struct sockaddr_in *saddr, const char *str, + Error **errp) +{ + gchar **substrings; + int ret; + + substrings = g_strsplit(str, ":", 2); + if (!substrings || !substrings[0] || !substrings[1]) { + error_setg(errp, "host address '%s' doesn't contain ':' " + "separating host from port", str); ret = -1; goto out; } - saddr->sin_port = htons(port); + + ret = convert_host_port(saddr, substrings[0], substrings[1], errp); out: g_strfreev(substrings); @@ -128,13 +142,20 @@ char *qemu_mac_strdup_printf(const uint8_t *macaddr) macaddr[3], macaddr[4], macaddr[5]); } +void qemu_set_info_str(NetClientState *nc, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vsnprintf(nc->info_str, sizeof(nc->info_str), fmt, ap); + va_end(ap); +} + void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]) { - snprintf(nc->info_str, sizeof(nc->info_str), - "model=%s,macaddr=%02x:%02x:%02x:%02x:%02x:%02x", - nc->model, - macaddr[0], macaddr[1], macaddr[2], - macaddr[3], macaddr[4], macaddr[5]); + qemu_set_info_str(nc, "model=%s,macaddr=%02x:%02x:%02x:%02x:%02x:%02x", + nc->model, macaddr[0], macaddr[1], macaddr[2], + macaddr[3], macaddr[4], macaddr[5]); } static int mac_table[256] = {0}; @@ -1001,6 +1022,8 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( #endif [NET_CLIENT_DRIVER_TAP] = net_init_tap, [NET_CLIENT_DRIVER_SOCKET] = net_init_socket, + [NET_CLIENT_DRIVER_STREAM] = net_init_stream, + [NET_CLIENT_DRIVER_DGRAM] = net_init_dgram, #ifdef CONFIG_VDE [NET_CLIENT_DRIVER_VDE] = net_init_vde, #endif @@ -1036,19 +1059,23 @@ static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp) if (is_netdev) { if (netdev->type == NET_CLIENT_DRIVER_NIC || !net_client_init_fun[netdev->type]) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", - "a netdev backend type"); + error_setg(errp, "network backend '%s' is not compiled into this binary", + NetClientDriver_str(netdev->type)); return -1; } } else { if (netdev->type == NET_CLIENT_DRIVER_NONE) { return 0; /* nothing to do */ } - if (netdev->type == NET_CLIENT_DRIVER_HUBPORT || - !net_client_init_fun[netdev->type]) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", - "a net backend type (maybe it is not compiled " - "into this binary)"); + if (netdev->type == NET_CLIENT_DRIVER_HUBPORT) { + error_setg(errp, "network backend '%s' is only supported with -netdev/-nic", + NetClientDriver_str(netdev->type)); + return -1; + } + + if (!net_client_init_fun[netdev->type]) { + error_setg(errp, "network backend '%s' is not compiled into this binary", + NetClientDriver_str(netdev->type)); return -1; } @@ -1088,6 +1115,8 @@ void show_netdevs(void) int idx; const char *available_netdevs[] = { "socket", + "stream", + "dgram", "hubport", "tap", #ifdef CONFIG_SLIRP @@ -1560,36 +1589,97 @@ out: return ret; } -int net_init_clients(Error **errp) +static void netdev_init_modern(void) +{ + while (!QSIMPLEQ_EMPTY(&nd_queue)) { + NetdevQueueEntry *nd = QSIMPLEQ_FIRST(&nd_queue); + + QSIMPLEQ_REMOVE_HEAD(&nd_queue, entry); + loc_push_restore(&nd->loc); + net_client_init1(nd->nd, true, &error_fatal); + loc_pop(&nd->loc); + qapi_free_Netdev(nd->nd); + g_free(nd); + } +} + +void net_init_clients(void) { net_change_state_entry = qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL); QTAILQ_INIT(&net_clients); - if (qemu_opts_foreach(qemu_find_opts("netdev"), - net_init_netdev, NULL, errp)) { - return -1; - } + netdev_init_modern(); - if (qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, errp)) { - return -1; - } + qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL, + &error_fatal); - if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) { - return -1; + qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, + &error_fatal); + + qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, + &error_fatal); +} + +/* + * Does this -netdev argument use modern rather than traditional syntax? + * Modern syntax is to be parsed with netdev_parse_modern(). + * Traditional syntax is to be parsed with net_client_parse(). + */ +bool netdev_is_modern(const char *optarg) +{ + QemuOpts *opts; + bool is_modern; + const char *type; + static QemuOptsList dummy_opts = { + .name = "netdev", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(dummy_opts.head), + .desc = { { } }, + }; + + if (optarg[0] == '{') { + /* This is JSON, which means it's modern syntax */ + return true; } - return 0; + opts = qemu_opts_create(&dummy_opts, NULL, false, &error_abort); + qemu_opts_do_parse(opts, optarg, dummy_opts.implied_opt_name, + &error_abort); + type = qemu_opt_get(opts, "type"); + is_modern = !g_strcmp0(type, "stream") || !g_strcmp0(type, "dgram"); + + qemu_opts_reset(&dummy_opts); + + return is_modern; +} + +/* + * netdev_parse_modern() uses modern, more expressive syntax than + * net_client_parse(), but supports only the -netdev option. + * netdev_parse_modern() appends to @nd_queue, whereas net_client_parse() + * appends to @qemu_netdev_opts. + */ +void netdev_parse_modern(const char *optarg) +{ + Visitor *v; + NetdevQueueEntry *nd; + + v = qobject_input_visitor_new_str(optarg, "type", &error_fatal); + nd = g_new(NetdevQueueEntry, 1); + visit_type_Netdev(v, NULL, &nd->nd, &error_fatal); + visit_free(v); + loc_save(&nd->loc); + + QSIMPLEQ_INSERT_TAIL(&nd_queue, nd, entry); } -int net_client_parse(QemuOptsList *opts_list, const char *optarg) +void net_client_parse(QemuOptsList *opts_list, const char *optarg) { if (!qemu_opts_parse_noisily(opts_list, optarg, true)) { - return -1; + exit(1); } - - return 0; } /* From FreeBSD */ diff --git a/net/slirp.c b/net/slirp.c index 8679be6444..14a8d59277 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -611,9 +611,8 @@ static int net_slirp_init(NetClientState *peer, const char *model, nc = qemu_new_net_client(&net_slirp_info, peer, model, name); - snprintf(nc->info_str, sizeof(nc->info_str), - "net=%s,restrict=%s", inet_ntoa(net), - restricted ? "on" : "off"); + qemu_set_info_str(nc, "net=%s,restrict=%s", inet_ntoa(net), + restricted ? "on" : "off"); s = DO_UPCAST(SlirpState, nc, nc); diff --git a/net/socket.c b/net/socket.c index bfd8596250..4944bb70d5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -179,7 +179,7 @@ static void net_socket_send(void *opaque) s->fd = -1; net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); s->nc.link_down = true; - memset(s->nc.info_str, 0, sizeof(s->nc.info_str)); + qemu_set_info_str(&s->nc, ""); return; } @@ -387,16 +387,15 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, /* mcast: save bound address as dst */ if (is_connected && mcast != NULL) { s->dgram_dst = saddr; - snprintf(nc->info_str, sizeof(nc->info_str), - "socket: fd=%d (cloned mcast=%s:%d)", - fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(nc, "socket: fd=%d (cloned mcast=%s:%d)", fd, + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); } else { if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) { s->dgram_dst.sin_family = AF_UNIX; } - snprintf(nc->info_str, sizeof(nc->info_str), - "socket: fd=%d %s", fd, SocketAddressType_str(sa_type)); + qemu_set_info_str(nc, "socket: fd=%d %s", fd, + SocketAddressType_str(sa_type)); } return s; @@ -430,7 +429,7 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, nc = qemu_new_net_client(&net_socket_info, peer, model, name); - snprintf(nc->info_str, sizeof(nc->info_str), "socket: fd=%d", fd); + qemu_set_info_str(nc, "socket: fd=%d", fd); s = DO_UPCAST(NetSocketState, nc, nc); @@ -497,9 +496,8 @@ static void net_socket_accept(void *opaque) s->fd = fd; s->nc.link_down = false; net_socket_connect(s); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: connection from %s:%d", - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: connection from %s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); } static int net_socket_listen_init(NetClientState *peer, @@ -579,8 +577,7 @@ static int net_socket_connect_init(NetClientState *peer, if (errno == EINTR || errno == EWOULDBLOCK) { /* continue */ } else if (errno == EINPROGRESS || - errno == EALREADY || - errno == EINVAL) { + errno == EALREADY) { break; } else { error_setg_errno(errp, errno, "can't connect socket"); @@ -597,9 +594,8 @@ static int net_socket_connect_init(NetClientState *peer, return -1; } - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: connect to %s:%d", - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: connect to %s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); return 0; } @@ -642,9 +638,8 @@ static int net_socket_mcast_init(NetClientState *peer, s->dgram_dst = saddr; - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: mcast=%s:%d", - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: mcast=%s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); return 0; } @@ -697,9 +692,8 @@ static int net_socket_udp_init(NetClientState *peer, s->dgram_dst = raddr; - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: udp=%s:%d", - inet_ntoa(raddr.sin_addr), ntohs(raddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: udp=%s:%d", inet_ntoa(raddr.sin_addr), + ntohs(raddr.sin_port)); return 0; } diff --git a/net/stream.c b/net/stream.c new file mode 100644 index 0000000000..53b7040cc4 --- /dev/null +++ b/net/stream.c @@ -0,0 +1,386 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2022 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" +#include "qemu/cutils.h" +#include "io/channel.h" +#include "io/channel-socket.h" +#include "io/net-listener.h" +#include "qapi/qapi-events-net.h" + +typedef struct NetStreamState { + NetClientState nc; + QIOChannel *listen_ioc; + QIONetListener *listener; + QIOChannel *ioc; + guint ioc_read_tag; + guint ioc_write_tag; + SocketReadState rs; + unsigned int send_index; /* number of bytes sent*/ +} NetStreamState; + +static void net_stream_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque); + +static gboolean net_stream_writable(QIOChannel *ioc, + GIOCondition condition, + gpointer data) +{ + NetStreamState *s = data; + + s->ioc_write_tag = 0; + + qemu_flush_queued_packets(&s->nc); + + return G_SOURCE_REMOVE; +} + +static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + uint32_t len = htonl(size); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + }, { + .iov_base = (void *)buf, + .iov_len = size, + }, + }; + struct iovec local_iov[2]; + unsigned int nlocal_iov; + size_t remaining; + ssize_t ret; + + remaining = iov_size(iov, 2) - s->send_index; + nlocal_iov = iov_copy(local_iov, 2, iov, 2, s->send_index, remaining); + ret = qio_channel_writev(s->ioc, local_iov, nlocal_iov, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + s->send_index = 0; + return -errno; + } + if (ret < (ssize_t)remaining) { + s->send_index += ret; + s->ioc_write_tag = qio_channel_add_watch(s->ioc, G_IO_OUT, + net_stream_writable, s, NULL); + return 0; + } + s->send_index = 0; + return size; +} + +static gboolean net_stream_send(QIOChannel *ioc, + GIOCondition condition, + gpointer data); + +static void net_stream_send_completed(NetClientState *nc, ssize_t len) +{ + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + + if (!s->ioc_read_tag) { + s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, + net_stream_send, s, NULL); + } +} + +static void net_stream_rs_finalize(SocketReadState *rs) +{ + NetStreamState *s = container_of(rs, NetStreamState, rs); + + if (qemu_send_packet_async(&s->nc, rs->buf, + rs->packet_len, + net_stream_send_completed) == 0) { + if (s->ioc_read_tag) { + g_source_remove(s->ioc_read_tag); + s->ioc_read_tag = 0; + } + } +} + +static gboolean net_stream_send(QIOChannel *ioc, + GIOCondition condition, + gpointer data) +{ + NetStreamState *s = data; + int size; + int ret; + char buf1[NET_BUFSIZE]; + const char *buf; + + size = qio_channel_read(s->ioc, buf1, sizeof(buf1), NULL); + if (size < 0) { + if (errno != EWOULDBLOCK) { + goto eoc; + } + } else if (size == 0) { + /* end of connection */ + eoc: + s->ioc_read_tag = 0; + if (s->ioc_write_tag) { + g_source_remove(s->ioc_write_tag); + s->ioc_write_tag = 0; + } + if (s->listener) { + qio_net_listener_set_client_func(s->listener, net_stream_listen, + s, NULL); + } + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; + + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); + s->nc.link_down = true; + qemu_set_info_str(&s->nc, ""); + + qapi_event_send_netdev_stream_disconnected(s->nc.name); + + return G_SOURCE_REMOVE; + } + buf = buf1; + + ret = net_fill_rstate(&s->rs, (const uint8_t *)buf, size); + + if (ret == -1) { + goto eoc; + } + + return G_SOURCE_CONTINUE; +} + +static void net_stream_cleanup(NetClientState *nc) +{ + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + if (s->ioc) { + if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { + if (s->ioc_read_tag) { + g_source_remove(s->ioc_read_tag); + s->ioc_read_tag = 0; + } + if (s->ioc_write_tag) { + g_source_remove(s->ioc_write_tag); + s->ioc_write_tag = 0; + } + } + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; + } + if (s->listen_ioc) { + if (s->listener) { + qio_net_listener_disconnect(s->listener); + object_unref(OBJECT(s->listener)); + s->listener = NULL; + } + object_unref(OBJECT(s->listen_ioc)); + s->listen_ioc = NULL; + } +} + +static NetClientInfo net_stream_info = { + .type = NET_CLIENT_DRIVER_STREAM, + .size = sizeof(NetStreamState), + .receive = net_stream_receive, + .cleanup = net_stream_cleanup, +}; + +static void net_stream_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque) +{ + NetStreamState *s = opaque; + SocketAddress *addr; + char *uri; + + object_ref(OBJECT(cioc)); + + qio_net_listener_set_client_func(s->listener, NULL, s, NULL); + + s->ioc = QIO_CHANNEL(cioc); + qio_channel_set_name(s->ioc, "stream-server"); + s->nc.link_down = false; + + s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, + s, NULL); + + if (cioc->localAddr.ss_family == AF_UNIX) { + addr = qio_channel_socket_get_local_address(cioc, NULL); + } else { + addr = qio_channel_socket_get_remote_address(cioc, NULL); + } + g_assert(addr != NULL); + uri = socket_uri(addr); + qemu_set_info_str(&s->nc, uri); + g_free(uri); + qapi_event_send_netdev_stream_connected(s->nc.name, addr); + qapi_free_SocketAddress(addr); +} + +static void net_stream_server_listening(QIOTask *task, gpointer opaque) +{ + NetStreamState *s = opaque; + QIOChannelSocket *listen_sioc = QIO_CHANNEL_SOCKET(s->listen_ioc); + SocketAddress *addr; + int ret; + + if (listen_sioc->fd < 0) { + qemu_set_info_str(&s->nc, "connection error"); + return; + } + + addr = qio_channel_socket_get_local_address(listen_sioc, NULL); + g_assert(addr != NULL); + ret = qemu_socket_try_set_nonblock(listen_sioc->fd); + if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { + qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", + addr->u.fd.str, -ret); + return; + } + g_assert(ret == 0); + qapi_free_SocketAddress(addr); + + s->nc.link_down = true; + s->listener = qio_net_listener_new(); + + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); + qio_net_listener_set_client_func(s->listener, net_stream_listen, s, NULL); + qio_net_listener_add(s->listener, listen_sioc); +} + +static int net_stream_server_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *addr, + Error **errp) +{ + NetClientState *nc; + NetStreamState *s; + QIOChannelSocket *listen_sioc = qio_channel_socket_new(); + + nc = qemu_new_net_client(&net_stream_info, peer, model, name); + s = DO_UPCAST(NetStreamState, nc, nc); + + s->listen_ioc = QIO_CHANNEL(listen_sioc); + qio_channel_socket_listen_async(listen_sioc, addr, 0, + net_stream_server_listening, s, + NULL, NULL); + + return 0; +} + +static void net_stream_client_connected(QIOTask *task, gpointer opaque) +{ + NetStreamState *s = opaque; + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(s->ioc); + SocketAddress *addr; + gchar *uri; + int ret; + + if (sioc->fd < 0) { + qemu_set_info_str(&s->nc, "connection error"); + goto error; + } + + addr = qio_channel_socket_get_remote_address(sioc, NULL); + g_assert(addr != NULL); + uri = socket_uri(addr); + qemu_set_info_str(&s->nc, uri); + g_free(uri); + + ret = qemu_socket_try_set_nonblock(sioc->fd); + if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { + qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", + addr->u.fd.str, -ret); + qapi_free_SocketAddress(addr); + goto error; + } + g_assert(ret == 0); + + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); + + /* Disable Nagle algorithm on TCP sockets to reduce latency */ + qio_channel_set_delay(s->ioc, false); + + s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, + s, NULL); + s->nc.link_down = false; + qapi_event_send_netdev_stream_connected(s->nc.name, addr); + qapi_free_SocketAddress(addr); + + return; +error: + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; +} + +static int net_stream_client_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *addr, + Error **errp) +{ + NetStreamState *s; + NetClientState *nc; + QIOChannelSocket *sioc = qio_channel_socket_new(); + + nc = qemu_new_net_client(&net_stream_info, peer, model, name); + s = DO_UPCAST(NetStreamState, nc, nc); + + s->ioc = QIO_CHANNEL(sioc); + s->nc.link_down = true; + + qio_channel_socket_connect_async(sioc, addr, + net_stream_client_connected, s, + NULL, NULL); + + return 0; +} + +int net_init_stream(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + const NetdevStreamOptions *sock; + + assert(netdev->type == NET_CLIENT_DRIVER_STREAM); + sock = &netdev->u.stream; + + if (!sock->has_server || !sock->server) { + return net_stream_client_init(peer, "stream", name, sock->addr, errp); + } + return net_stream_server_init(peer, "stream", name, sock->addr, errp); +} diff --git a/net/tap-win32.c b/net/tap-win32.c index 7466f22e77..a49c28ba5d 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -789,8 +789,7 @@ static int tap_win32_init(NetClientState *peer, const char *model, s = DO_UPCAST(TAPState, nc, nc); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "tap: ifname=%s", ifname); + qemu_set_info_str(&s->nc, "tap: ifname=%s", ifname); s->handle = handle; @@ -630,8 +630,7 @@ int net_init_bridge(const Netdev *netdev, const char *name, } s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, - br); + qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br); return 0; } @@ -690,14 +689,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } if (tap->has_fd || tap->has_fds) { - snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd); + qemu_set_info_str(&s->nc, "fd=%d", fd); } else if (tap->has_helper) { - snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s", - tap->helper); + qemu_set_info_str(&s->nc, "helper=%s", tap->helper); } else { - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "ifname=%s,script=%s,downscript=%s", ifname, script, - downscript); + qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname, + script, downscript); if (strcmp(downscript, "no") != 0) { snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); @@ -98,8 +98,7 @@ static int net_vde_init(NetClientState *peer, const char *model, nc = qemu_new_net_client(&net_vde_info, peer, model, name); - snprintf(nc->info_str, sizeof(nc->info_str), "sock=%s,fd=%d", - sock, vde_datafd(vde)); + qemu_set_info_str(nc, "sock=%s,fd=%d", sock, vde_datafd(vde)); s = DO_UPCAST(VDEState, nc, nc); diff --git a/net/vhost-user.c b/net/vhost-user.c index b1a0247b59..3a6b90da86 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -341,8 +341,7 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, user = g_new0(struct VhostUserState, 1); for (i = 0; i < queues; i++) { nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); - snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", - i, chr->label); + qemu_set_info_str(nc, "vhost-user%d to %s", i, chr->label); nc->queue_index = i; if (!nc0) { nc0 = nc; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 4bc3fd01a8..854ebd61ae 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -63,7 +63,6 @@ const int vdpa_feature_bits[] = { VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_RX_EXTRA, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_CTRL_MAC_ADDR, VIRTIO_NET_F_RSS, VIRTIO_NET_F_MQ, @@ -463,48 +462,6 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { }; /** - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. - */ -static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) -{ - struct virtio_net_ctrl_hdr ctrl; - - if (unlikely(len < sizeof(ctrl))) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: invalid legnth of out buffer %zu\n", __func__, len); - return false; - } - - memcpy(&ctrl, out_buf, sizeof(ctrl)); - switch (ctrl.class) { - case VIRTIO_NET_CTRL_MAC: - switch (ctrl.cmd) { - case VIRTIO_NET_CTRL_MAC_ADDR_SET: - return true; - default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", - __func__, ctrl.cmd); - }; - break; - case VIRTIO_NET_CTRL_MQ: - switch (ctrl.cmd) { - case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: - return true; - default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mq cmd %u\n", - __func__, ctrl.cmd); - }; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", - __func__, ctrl.class); - }; - - return false; -} - -/** * Validate and copy control virtqueue commands. * * Following QEMU guidelines, we offer a copy of the buffers to the device to @@ -527,16 +484,10 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, .iov_len = sizeof(status), }; ssize_t dev_written = -EINVAL; - bool ok; out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_len()); - ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); - if (unlikely(!ok)) { - goto out; - } - dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); if (unlikely(dev_written < 0)) { goto out; @@ -593,7 +544,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, device, name); } - snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); + qemu_set_info_str(nc, TYPE_VHOST_VDPA); s = DO_UPCAST(VhostVDPAState, nc, nc); s->vhost_vdpa.device_fd = vdpa_device_fd; @@ -683,14 +634,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; - if (!opts->vhostdev) { - error_setg(errp, "vdpa character device not specified with vhostdev"); + if (!opts->has_vhostdev && !opts->has_vhostfd) { + error_setg(errp, + "vhost-vdpa: neither vhostdev= nor vhostfd= was specified"); return -1; } - vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); - if (vdpa_device_fd == -1) { - return -errno; + if (opts->has_vhostdev && opts->has_vhostfd) { + error_setg(errp, + "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive"); + return -1; + } + + if (opts->has_vhostdev) { + vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); + if (vdpa_device_fd == -1) { + return -errno; + } + } else if (opts->has_vhostfd) { + vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp); + if (vdpa_device_fd == -1) { + error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: "); + return -1; + } } r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); diff --git a/qapi/block-core.json b/qapi/block-core.json index 882b266532..cb5079e645 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2951,11 +2951,18 @@ 'file', 'snapshot-access', 'ftp', 'ftps', 'gluster', {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' }, {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' }, - 'http', 'https', 'iscsi', - 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', - 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd', + 'http', 'https', + { 'name': 'io_uring', 'if': 'CONFIG_BLKIO' }, + 'iscsi', + 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', + { 'name': 'nvme-io_uring', 'if': 'CONFIG_BLKIO' }, + 'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', + 'raw', 'rbd', { 'name': 'replication', 'if': 'CONFIG_REPLICATION' }, - 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } + 'ssh', 'throttle', 'vdi', 'vhdx', + { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' }, + { 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' }, + 'vmdk', 'vpc', 'vvfat' ] } ## # @BlockdevOptionsFile: @@ -3679,6 +3686,58 @@ '*logfile': 'str' } } ## +# @BlockdevOptionsIoUring: +# +# Driver specific block device options for the io_uring backend. +# +# @filename: path to the image file +# +# Since: 7.2 +## +{ 'struct': 'BlockdevOptionsIoUring', + 'data': { 'filename': 'str' }, + 'if': 'CONFIG_BLKIO' } + +## +# @BlockdevOptionsNvmeIoUring: +# +# Driver specific block device options for the nvme-io_uring backend. +# +# @filename: path to the image file +# +# Since: 7.2 +## +{ 'struct': 'BlockdevOptionsNvmeIoUring', + 'data': { 'filename': 'str' }, + 'if': 'CONFIG_BLKIO' } + +## +# @BlockdevOptionsVirtioBlkVhostUser: +# +# Driver specific block device options for the virtio-blk-vhost-user backend. +# +# @path: path to the vhost-user UNIX domain socket. +# +# Since: 7.2 +## +{ 'struct': 'BlockdevOptionsVirtioBlkVhostUser', + 'data': { 'path': 'str' }, + 'if': 'CONFIG_BLKIO' } + +## +# @BlockdevOptionsVirtioBlkVhostVdpa: +# +# Driver specific block device options for the virtio-blk-vhost-vdpa backend. +# +# @path: path to the vhost-vdpa character device. +# +# Since: 7.2 +## +{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', + 'data': { 'path': 'str' }, + 'if': 'CONFIG_BLKIO' } + +## # @IscsiTransport: # # An enumeration of libiscsi transport types @@ -4305,6 +4364,8 @@ 'if': 'HAVE_HOST_BLOCK_DEVICE' }, 'http': 'BlockdevOptionsCurlHttp', 'https': 'BlockdevOptionsCurlHttps', + 'io_uring': { 'type': 'BlockdevOptionsIoUring', + 'if': 'CONFIG_BLKIO' }, 'iscsi': 'BlockdevOptionsIscsi', 'luks': 'BlockdevOptionsLUKS', 'nbd': 'BlockdevOptionsNbd', @@ -4312,6 +4373,8 @@ 'null-aio': 'BlockdevOptionsNull', 'null-co': 'BlockdevOptionsNull', 'nvme': 'BlockdevOptionsNVMe', + 'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring', + 'if': 'CONFIG_BLKIO' }, 'parallels': 'BlockdevOptionsGenericFormat', 'preallocate':'BlockdevOptionsPreallocate', 'qcow2': 'BlockdevOptionsQcow2', @@ -4327,6 +4390,12 @@ 'throttle': 'BlockdevOptionsThrottle', 'vdi': 'BlockdevOptionsGenericFormat', 'vhdx': 'BlockdevOptionsGenericFormat', + 'virtio-blk-vhost-user': + { 'type': 'BlockdevOptionsVirtioBlkVhostUser', + 'if': 'CONFIG_BLKIO' }, + 'virtio-blk-vhost-vdpa': + { 'type': 'BlockdevOptionsVirtioBlkVhostVdpa', + 'if': 'CONFIG_BLKIO' }, 'vmdk': 'BlockdevOptionsGenericCOWFormat', 'vpc': 'BlockdevOptionsGenericFormat', 'vvfat': 'BlockdevOptionsVVFAT' diff --git a/qapi/net.json b/qapi/net.json index dd088c09c5..522ac582ed 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -7,6 +7,7 @@ ## { 'include': 'common.json' } +{ 'include': 'sockets.json' } ## # @set_link: @@ -442,6 +443,8 @@ # @vhostdev: path of vhost-vdpa device # (default:'/dev/vhost-vdpa-0') # +# @vhostfd: file descriptor of an already opened vhost vdpa device +# # @queues: number of queues to be created for multiqueue vhost-vdpa # (default: 1) # @@ -456,6 +459,7 @@ { 'struct': 'NetdevVhostVDPAOptions', 'data': { '*vhostdev': 'str', + '*vhostfd': 'str', '*queues': 'int', '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } @@ -574,6 +578,60 @@ 'if': 'CONFIG_VMNET' } ## +# @NetdevStreamOptions: +# +# Configuration info for stream socket netdev +# +# @addr: socket address to listen on (server=true) +# or connect to (server=false) +# @server: create server socket (default: false) +# +# Only SocketAddress types 'unix', 'inet' and 'fd' are supported. +# +# Since: 7.2 +## +{ 'struct': 'NetdevStreamOptions', + 'data': { + 'addr': 'SocketAddress', + '*server': 'bool' } } + +## +# @NetdevDgramOptions: +# +# Configuration info for datagram socket netdev. +# +# @remote: remote address +# @local: local address +# +# Only SocketAddress types 'unix', 'inet' and 'fd' are supported. +# +# If remote address is present and it's a multicast address, local address +# is optional. Otherwise local address is required and remote address is +# optional. +# +# .. table:: Valid parameters combination table +# :widths: auto +# +# ============= ======== ===== +# remote local okay? +# ============= ======== ===== +# absent absent no +# absent not fd no +# absent fd yes +# multicast absent yes +# multicast present yes +# not multicast absent no +# not multicast present yes +# ============= ======== ===== +# +# Since: 7.2 +## +{ 'struct': 'NetdevDgramOptions', + 'data': { + '*local': 'SocketAddress', + '*remote': 'SocketAddress' } } + +## # @NetClientDriver: # # Available netdev drivers. @@ -584,10 +642,13 @@ # @vmnet-host since 7.1 # @vmnet-shared since 7.1 # @vmnet-bridged since 7.1 +# @stream since 7.2 +# @dgram since 7.2 ## { 'enum': 'NetClientDriver', - 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', - 'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa', + 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'stream', + 'dgram', 'vde', 'bridge', 'hubport', 'netmap', 'vhost-user', + 'vhost-vdpa', { 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' }, { 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' }, { 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] } @@ -607,6 +668,8 @@ # 'vmnet-host' - since 7.1 # 'vmnet-shared' - since 7.1 # 'vmnet-bridged' - since 7.1 +# 'stream' since 7.2 +# 'dgram' since 7.2 ## { 'union': 'Netdev', 'base': { 'id': 'str', 'type': 'NetClientDriver' }, @@ -617,6 +680,8 @@ 'tap': 'NetdevTapOptions', 'l2tpv3': 'NetdevL2TPv3Options', 'socket': 'NetdevSocketOptions', + 'stream': 'NetdevStreamOptions', + 'dgram': 'NetdevDgramOptions', 'vde': 'NetdevVdeOptions', 'bridge': 'NetdevBridgeOptions', 'hubport': 'NetdevHubPortOptions', @@ -833,3 +898,52 @@ ## { 'event': 'FAILOVER_NEGOTIATED', 'data': {'device-id': 'str'} } + +## +# @NETDEV_STREAM_CONNECTED: +# +# Emitted when the netdev stream backend is connected +# +# @netdev-id: QEMU netdev id that is connected +# @addr: The destination address +# +# Since: 7.2 +# +# Example: +# +# <- { "event": "NETDEV_STREAM_CONNECTED", +# "data": { "netdev-id": "netdev0", +# "addr": { "port": "47666", "ipv6": true, +# "host": "::1", "type": "inet" } }, +# "timestamp": { "seconds": 1666269863, "microseconds": 311222 } } +# +# or +# +# <- { "event": "NETDEV_STREAM_CONNECTED", +# "data": { "netdev-id": "netdev0", +# "addr": { "path": "/tmp/qemu0", "type": "unix" } }, +# "timestamp": { "seconds": 1666269706, "microseconds": 413651 } } +# +## +{ 'event': 'NETDEV_STREAM_CONNECTED', + 'data': { 'netdev-id': 'str', + 'addr': 'SocketAddress' } } + +## +# @NETDEV_STREAM_DISCONNECTED: +# +# Emitted when the netdev stream backend is disconnected +# +# @netdev-id: QEMU netdev id that is disconnected +# +# Since: 7.2 +# +# Example: +# +# <- { 'event': 'NETDEV_STREAM_DISCONNECTED', +# 'data': {'netdev-id': 'netdev0'}, +# 'timestamp': {'seconds': 1663330937, 'microseconds': 526695} } +# +## +{ 'event': 'NETDEV_STREAM_DISCONNECTED', + 'data': { 'netdev-id': 'str' } } diff --git a/qapi/qom.json b/qapi/qom.json index 80dd419b39..87fcad2423 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -578,6 +578,9 @@ # # @prealloc-threads: number of CPU threads to use for prealloc (default: 1) # +# @prealloc-context: thread context to use for creation of preallocation threads +# (default: none) (since 7.2) +# # @share: if false, the memory is private to QEMU; if true, it is shared # (default: false) # @@ -608,6 +611,7 @@ '*policy': 'HostMemPolicy', '*prealloc': 'bool', '*prealloc-threads': 'uint32', + '*prealloc-context': 'str', '*share': 'bool', '*reserve': 'bool', 'size': 'size', @@ -831,6 +835,28 @@ '*kernel-hashes': 'bool' } } ## +# @ThreadContextProperties: +# +# Properties for thread context objects. +# +# @cpu-affinity: the list of host CPU numbers used as CPU affinity for all +# threads created in the thread context (default: QEMU main +# thread CPU affinity) +# +# @node-affinity: the list of host node numbers that will be resolved to a +# list of host CPU numbers used as CPU affinity. This is a +# shortcut for specifying the list of host CPU numbers +# belonging to the host nodes manually by setting +# @cpu-affinity. (default: QEMU main thread affinity) +# +# Since: 7.2 +## +{ 'struct': 'ThreadContextProperties', + 'data': { '*cpu-affinity': ['uint16'], + '*node-affinity': ['uint16'] } } + + +## # @ObjectType: # # Features: @@ -882,6 +908,7 @@ { 'name': 'secret_keyring', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest', + 'thread-context', 's390-pv-guest', 'throttle-group', 'tls-creds-anon', @@ -948,6 +975,7 @@ 'secret_keyring': { 'type': 'SecretKeyringProperties', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest': 'SevGuestProperties', + 'thread-context': 'ThreadContextProperties', 'throttle-group': 'ThrottleGroupProperties', 'tls-creds-anon': 'TlsCredsAnonProperties', 'tls-creds-psk': 'TlsCredsPskProperties', diff --git a/qapi/run-state.json b/qapi/run-state.json index 49989d30e6..419c188dd1 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -86,12 +86,16 @@ # ignores --no-reboot. This is useful for sanitizing # hypercalls on s390 that are used during kexec/kdump/boot # +# @snapshot-load: A snapshot is being loaded by the record & replay +# subsystem. This value is used only within QEMU. It +# doesn't occur in QMP. (since 7.2) +# ## { 'enum': 'ShutdownCause', # Beware, shutdown_caused_by_guest() depends on enumeration order 'data': [ 'none', 'host-error', 'host-qmp-quit', 'host-qmp-system-reset', 'host-signal', 'host-ui', 'guest-shutdown', 'guest-reset', - 'guest-panic', 'subsystem-reset'] } + 'guest-panic', 'subsystem-reset', 'snapshot-load'] } ## # @StatusInfo: diff --git a/qemu-img.c b/qemu-img.c index ace3adf8ae..a3b64c88af 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -4371,7 +4371,7 @@ static int img_bench(int argc, char **argv) struct timeval t1, t2; int i; bool force_share = false; - size_t buf_size; + size_t buf_size = 0; for (;;) { static const struct option long_options[] = { @@ -4570,7 +4570,7 @@ static int img_bench(int argc, char **argv) data.buf = blk_blockalign(blk, buf_size); memset(data.buf, pattern, data.nrreq * data.bufsize); - blk_register_buf(blk, data.buf, buf_size); + blk_register_buf(blk, data.buf, buf_size, &error_fatal); data.qiov = g_new(QEMUIOVector, data.nrreq); for (i = 0; i < data.nrreq; i++) { @@ -4593,7 +4593,7 @@ static int img_bench(int argc, char **argv) out: if (data.buf) { - blk_unregister_buf(blk, data.buf); + blk_unregister_buf(blk, data.buf, buf_size); } qemu_vfree(data.buf); blk_unref(blk); diff --git a/qemu-options.hx b/qemu-options.hx index eb38e5dc40..ceee0ddc25 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2772,6 +2772,20 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" " configure a network backend to connect to another network\n" " using an UDP tunnel\n" + "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" + "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" + "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" + " configure a network backend to connect to another network\n" + " using a socket connection in stream mode.\n" + "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" + "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=fd,local.str=file-descriptor]\n" + " configure a network backend to connect to a multicast maddr and port\n" + " use ``local.host=addr`` to specify the host address to send packets from\n" + "-netdev dgram,id=str,local.type=inet,local.host=addr,local.port=port[,remote.type=inet,remote.host=addr,remote.port=port]\n" + "-netdev dgram,id=str,local.type=unix,local.path=path[,remote.type=unix,remote.path=path]\n" + "-netdev dgram,id=str,local.type=fd,local.str=file-descriptor\n" + " configure a network backend to connect to another network\n" + " using an UDP tunnel\n" #ifdef CONFIG_VDE "-netdev vde,id=str[,sock=socketpath][,port=n][,group=groupname][,mode=octalmode]\n" " configure a network backend to connect to port 'n' of a vde switch\n" @@ -2790,8 +2804,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, " configure a vhost-user network, backed by a chardev 'dev'\n" #endif #ifdef __linux__ - "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n" + "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n" " configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n" + " use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n" + " use 'vhostfd=h' to connect to an already opened vhost vdpa device\n" #endif #ifdef CONFIG_VMNET "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n" @@ -3296,7 +3312,7 @@ SRST -netdev type=vhost-user,id=net0,chardev=chr0 \ -device virtio-net-pci,netdev=net0 -``-netdev vhost-vdpa,vhostdev=/path/to/dev`` +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. vDPA device is a device that uses a datapath which complies with diff --git a/qga/channel-posix.c b/qga/channel-posix.c index 6796a02cff..0c5175d957 100644 --- a/qga/channel-posix.c +++ b/qga/channel-posix.c @@ -138,7 +138,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, 0 ); if (fd == -1) { - error_setg_errno(errp, errno, "error opening channel"); + error_setg_errno(errp, errno, "error opening channel '%s'", path); return false; } #ifdef CONFIG_SOLARIS @@ -149,6 +149,25 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, return false; } #endif +#ifdef __FreeBSD__ + /* + * In the default state channel sends echo of every command to a + * client. The client programm doesn't expect this and raises an + * error. Suppress echo by resetting ECHO terminal flag. + */ + struct termios tio; + if (tcgetattr(fd, &tio) < 0) { + error_setg_errno(errp, errno, "error getting channel termios attrs"); + close(fd); + return false; + } + tio.c_lflag &= ~ECHO; + if (tcsetattr(fd, TCSAFLUSH, &tio) < 0) { + error_setg_errno(errp, errno, "error setting channel termios attrs"); + close(fd); + return false; + } +#endif /* __FreeBSD__ */ ret = ga_channel_client_add(c, fd); if (ret) { error_setg(errp, "error adding channel to main loop"); @@ -163,7 +182,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, assert(fd < 0); fd = qga_open_cloexec(path, O_RDWR | O_NOCTTY | O_NONBLOCK, 0); if (fd == -1) { - error_setg_errno(errp, errno, "error opening channel"); + error_setg_errno(errp, errno, "error opening channel '%s'", path); return false; } tcgetattr(fd, &tio); diff --git a/qga/commands-bsd.c b/qga/commands-bsd.c new file mode 100644 index 0000000000..15cade2d4c --- /dev/null +++ b/qga/commands-bsd.c @@ -0,0 +1,200 @@ +/* + * QEMU Guest Agent BSD-specific command implementations + * + * Copyright (c) Virtuozzo International GmbH. + * + * Authors: + * Alexander Ivanov <alexander.ivanov@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qga-qapi-commands.h" +#include "qapi/qmp/qerror.h" +#include "qapi/error.h" +#include "qemu/queue.h" +#include "commands-common.h" +#include <sys/ioctl.h> +#include <sys/param.h> +#include <sys/ucred.h> +#include <sys/mount.h> +#include <net/if_dl.h> +#include <net/ethernet.h> +#include <paths.h> + +#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM) +bool build_fs_mount_list(FsMountList *mounts, Error **errp) +{ + FsMount *mount; + struct statfs *mntbuf, *mntp; + struct stat statbuf; + int i, count, ret; + + count = getmntinfo(&mntbuf, MNT_NOWAIT); + if (count == 0) { + error_setg_errno(errp, errno, "getmntinfo failed"); + return false; + } + + for (i = 0; i < count; i++) { + mntp = &mntbuf[i]; + ret = stat(mntp->f_mntonname, &statbuf); + if (ret != 0) { + error_setg_errno(errp, errno, "stat failed on %s", + mntp->f_mntonname); + return false; + } + + mount = g_new0(FsMount, 1); + + mount->dirname = g_strdup(mntp->f_mntonname); + mount->devtype = g_strdup(mntp->f_fstypename); + mount->devmajor = major(mount->dev); + mount->devminor = minor(mount->dev); + mount->fsid = mntp->f_fsid; + mount->dev = statbuf.st_dev; + + QTAILQ_INSERT_TAIL(mounts, mount, next); + } + return true; +} +#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */ + +#if defined(CONFIG_FSFREEZE) +static int ufssuspend_fd = -1; +static int ufssuspend_cnt; + +int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints, + strList *mountpoints, + FsMountList mounts, + Error **errp) +{ + int ret; + strList *list; + struct FsMount *mount; + + if (ufssuspend_fd != -1) { + error_setg(errp, "filesystems have already frozen"); + return -1; + } + + ufssuspend_cnt = 0; + ufssuspend_fd = qemu_open(_PATH_UFSSUSPEND, O_RDWR, errp); + if (ufssuspend_fd == -1) { + return -1; + } + + QTAILQ_FOREACH_REVERSE(mount, &mounts, next) { + /* + * To issue fsfreeze in the reverse order of mounts, check if the + * mount is listed in the list here + */ + if (has_mountpoints) { + for (list = mountpoints; list; list = list->next) { + if (g_str_equal(list->value, mount->dirname)) { + break; + } + } + if (!list) { + continue; + } + } + + /* Only UFS supports suspend */ + if (!g_str_equal(mount->devtype, "ufs")) { + continue; + } + + ret = ioctl(ufssuspend_fd, UFSSUSPEND, &mount->fsid); + if (ret == -1) { + /* + * ioctl returns EBUSY for all the FS except the first one + * that was suspended + */ + if (errno == EBUSY) { + continue; + } + error_setg_errno(errp, errno, "failed to freeze %s", + mount->dirname); + goto error; + } + ufssuspend_cnt++; + } + return ufssuspend_cnt; +error: + close(ufssuspend_fd); + ufssuspend_fd = -1; + return -1; + +} + +/* + * We don't need to call UFSRESUME ioctl because all the frozen FS + * are thawed on /dev/ufssuspend closing. + */ +int qmp_guest_fsfreeze_do_thaw(Error **errp) +{ + int ret = ufssuspend_cnt; + ufssuspend_cnt = 0; + if (ufssuspend_fd != -1) { + close(ufssuspend_fd); + ufssuspend_fd = -1; + } + return ret; +} + +GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp) +{ + error_setg(errp, QERR_UNSUPPORTED); + return NULL; +} + +GuestDiskInfoList *qmp_guest_get_disks(Error **errp) +{ + error_setg(errp, QERR_UNSUPPORTED); + return NULL; +} + +GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp) +{ + error_setg(errp, QERR_UNSUPPORTED); + return NULL; +} + +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp) +{ + error_setg(errp, QERR_UNSUPPORTED); + return NULL; +} +#endif /* CONFIG_FSFREEZE */ + +#ifdef HAVE_GETIFADDRS +/* + * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a + * buffer with ETHER_ADDR_LEN length at least. + * + * Returns false in case of an error, otherwise true. "obtained" arguument + * is true if a MAC address was obtained successful, otherwise false. + */ +bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf, + bool *obtained, Error **errp) +{ + struct sockaddr_dl *sdp; + + *obtained = false; + + if (ifa->ifa_addr->sa_family != AF_LINK) { + /* We can get HW address only for AF_LINK family. */ + g_debug("failed to get MAC address of %s", ifa->ifa_name); + return true; + } + + sdp = (struct sockaddr_dl *)ifa->ifa_addr; + memcpy(buf, sdp->sdl_data + sdp->sdl_nlen, ETHER_ADDR_LEN); + *obtained = true; + + return true; +} +#endif /* HAVE_GETIFADDRS */ diff --git a/qga/commands-common.h b/qga/commands-common.h index d0e4a9696f..8c1c56aac9 100644 --- a/qga/commands-common.h +++ b/qga/commands-common.h @@ -10,6 +10,57 @@ #define QGA_COMMANDS_COMMON_H #include "qga-qapi-types.h" +#include "guest-agent-core.h" +#include "qemu/queue.h" + +#if defined(__linux__) +#include <linux/fs.h> +#ifdef FIFREEZE +#define CONFIG_FSFREEZE +#endif +#ifdef FITRIM +#define CONFIG_FSTRIM +#endif +#endif /* __linux__ */ + +#ifdef __FreeBSD__ +#include <ufs/ffs/fs.h> +#ifdef UFSSUSPEND +#define CONFIG_FSFREEZE +#endif +#endif /* __FreeBSD__ */ + +#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM) +typedef struct FsMount { + char *dirname; + char *devtype; + unsigned int devmajor, devminor; +#if defined(__FreeBSD__) + dev_t dev; + fsid_t fsid; +#endif + QTAILQ_ENTRY(FsMount) next; +} FsMount; + +typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList; + +bool build_fs_mount_list(FsMountList *mounts, Error **errp); +void free_fs_mount_list(FsMountList *mounts); +#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */ + +#if defined(CONFIG_FSFREEZE) +int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints, + strList *mountpoints, + FsMountList mounts, + Error **errp); +int qmp_guest_fsfreeze_do_thaw(Error **errp); +#endif /* CONFIG_FSFREEZE */ + +#ifdef HAVE_GETIFADDRS +#include <ifaddrs.h> +bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf, + bool *obtained, Error **errp); +#endif typedef struct GuestFileHandle GuestFileHandle; diff --git a/qga/commands-linux.c b/qga/commands-linux.c new file mode 100644 index 0000000000..214e408fcd --- /dev/null +++ b/qga/commands-linux.c @@ -0,0 +1,286 @@ +/* + * QEMU Guest Agent Linux-specific command implementations + * + * Copyright IBM Corp. 2011 + * + * Authors: + * Michael Roth <mdroth@linux.vnet.ibm.com> + * Michal Privoznik <mprivozn@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "commands-common.h" +#include "cutils.h" +#include <mntent.h> +#include <sys/ioctl.h> + +#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM) +static int dev_major_minor(const char *devpath, + unsigned int *devmajor, unsigned int *devminor) +{ + struct stat st; + + *devmajor = 0; + *devminor = 0; + + if (stat(devpath, &st) < 0) { + slog("failed to stat device file '%s': %s", devpath, strerror(errno)); + return -1; + } + if (S_ISDIR(st.st_mode)) { + /* It is bind mount */ + return -2; + } + if (S_ISBLK(st.st_mode)) { + *devmajor = major(st.st_rdev); + *devminor = minor(st.st_rdev); + return 0; + } + return -1; +} + +static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp) +{ + struct mntent *ment; + FsMount *mount; + char const *mtab = "/proc/self/mounts"; + FILE *fp; + unsigned int devmajor, devminor; + + fp = setmntent(mtab, "r"); + if (!fp) { + error_setg(errp, "failed to open mtab file: '%s'", mtab); + return false; + } + + while ((ment = getmntent(fp))) { + /* + * An entry which device name doesn't start with a '/' is + * either a dummy file system or a network file system. + * Add special handling for smbfs and cifs as is done by + * coreutils as well. + */ + if ((ment->mnt_fsname[0] != '/') || + (strcmp(ment->mnt_type, "smbfs") == 0) || + (strcmp(ment->mnt_type, "cifs") == 0)) { + continue; + } + if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) { + /* Skip bind mounts */ + continue; + } + + mount = g_new0(FsMount, 1); + mount->dirname = g_strdup(ment->mnt_dir); + mount->devtype = g_strdup(ment->mnt_type); + mount->devmajor = devmajor; + mount->devminor = devminor; + + QTAILQ_INSERT_TAIL(mounts, mount, next); + } + + endmntent(fp); + return true; +} + +static void decode_mntname(char *name, int len) +{ + int i, j = 0; + for (i = 0; i <= len; i++) { + if (name[i] != '\\') { + name[j++] = name[i]; + } else if (name[i + 1] == '\\') { + name[j++] = '\\'; + i++; + } else if (name[i + 1] >= '0' && name[i + 1] <= '3' && + name[i + 2] >= '0' && name[i + 2] <= '7' && + name[i + 3] >= '0' && name[i + 3] <= '7') { + name[j++] = (name[i + 1] - '0') * 64 + + (name[i + 2] - '0') * 8 + + (name[i + 3] - '0'); + i += 3; + } else { + name[j++] = name[i]; + } + } +} + +/* + * Walk the mount table and build a list of local file systems + */ +bool build_fs_mount_list(FsMountList *mounts, Error **errp) +{ + FsMount *mount; + char const *mountinfo = "/proc/self/mountinfo"; + FILE *fp; + char *line = NULL, *dash; + size_t n; + char check; + unsigned int devmajor, devminor; + int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e; + + fp = fopen(mountinfo, "r"); + if (!fp) { + return build_fs_mount_list_from_mtab(mounts, errp); + } + + while (getline(&line, &n, fp) != -1) { + ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c", + &devmajor, &devminor, &dir_s, &dir_e, &check); + if (ret < 3) { + continue; + } + dash = strstr(line + dir_e, " - "); + if (!dash) { + continue; + } + ret = sscanf(dash, " - %n%*s%n %n%*s%n%c", + &type_s, &type_e, &dev_s, &dev_e, &check); + if (ret < 1) { + continue; + } + line[dir_e] = 0; + dash[type_e] = 0; + dash[dev_e] = 0; + decode_mntname(line + dir_s, dir_e - dir_s); + decode_mntname(dash + dev_s, dev_e - dev_s); + if (devmajor == 0) { + /* btrfs reports major number = 0 */ + if (strcmp("btrfs", dash + type_s) != 0 || + dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) { + continue; + } + } + + mount = g_new0(FsMount, 1); + mount->dirname = g_strdup(line + dir_s); + mount->devtype = g_strdup(dash + type_s); + mount->devmajor = devmajor; + mount->devminor = devminor; + + QTAILQ_INSERT_TAIL(mounts, mount, next); + } + free(line); + + fclose(fp); + return true; +} +#endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */ + +#ifdef CONFIG_FSFREEZE +/* + * Walk list of mounted file systems in the guest, and freeze the ones which + * are real local file systems. + */ +int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints, + strList *mountpoints, + FsMountList mounts, + Error **errp) +{ + struct FsMount *mount; + strList *list; + int fd, ret, i = 0; + + QTAILQ_FOREACH_REVERSE(mount, &mounts, next) { + /* To issue fsfreeze in the reverse order of mounts, check if the + * mount is listed in the list here */ + if (has_mountpoints) { + for (list = mountpoints; list; list = list->next) { + if (strcmp(list->value, mount->dirname) == 0) { + break; + } + } + if (!list) { + continue; + } + } + + fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0); + if (fd == -1) { + error_setg_errno(errp, errno, "failed to open %s", mount->dirname); + return -1; + } + + /* we try to cull filesystems we know won't work in advance, but other + * filesystems may not implement fsfreeze for less obvious reasons. + * these will report EOPNOTSUPP. we simply ignore these when tallying + * the number of frozen filesystems. + * if a filesystem is mounted more than once (aka bind mount) a + * consecutive attempt to freeze an already frozen filesystem will + * return EBUSY. + * + * any other error means a failure to freeze a filesystem we + * expect to be freezable, so return an error in those cases + * and return system to thawed state. + */ + ret = ioctl(fd, FIFREEZE); + if (ret == -1) { + if (errno != EOPNOTSUPP && errno != EBUSY) { + error_setg_errno(errp, errno, "failed to freeze %s", + mount->dirname); + close(fd); + return -1; + } + } else { + i++; + } + close(fd); + } + return i; +} + +int qmp_guest_fsfreeze_do_thaw(Error **errp) +{ + int ret; + FsMountList mounts; + FsMount *mount; + int fd, i = 0, logged; + Error *local_err = NULL; + + QTAILQ_INIT(&mounts); + if (!build_fs_mount_list(&mounts, &local_err)) { + error_propagate(errp, local_err); + return -1; + } + + QTAILQ_FOREACH(mount, &mounts, next) { + logged = false; + fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0); + if (fd == -1) { + continue; + } + /* we have no way of knowing whether a filesystem was actually unfrozen + * as a result of a successful call to FITHAW, only that if an error + * was returned the filesystem was *not* unfrozen by that particular + * call. + * + * since multiple preceding FIFREEZEs require multiple calls to FITHAW + * to unfreeze, continuing issuing FITHAW until an error is returned, + * in which case either the filesystem is in an unfreezable state, or, + * more likely, it was thawed previously (and remains so afterward). + * + * also, since the most recent successful call is the one that did + * the actual unfreeze, we can use this to provide an accurate count + * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which + * may * be useful for determining whether a filesystem was unfrozen + * during the freeze/thaw phase by a process other than qemu-ga. + */ + do { + ret = ioctl(fd, FITHAW); + if (ret == 0 && !logged) { + i++; + logged = true; + } + } while (ret == 0); + close(fd); + } + + free_fs_mount_list(&mounts); + + return i; +} +#endif /* CONFIG_FSFREEZE */ diff --git a/qga/commands-posix.c b/qga/commands-posix.c index eea819cff0..32493d6383 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -16,11 +16,9 @@ #include <sys/utsname.h> #include <sys/wait.h> #include <dirent.h> -#include "guest-agent-core.h" #include "qga-qapi-commands.h" #include "qapi/error.h" #include "qapi/qmp/qerror.h" -#include "qemu/queue.h" #include "qemu/host-utils.h" #include "qemu/sockets.h" #include "qemu/base64.h" @@ -35,28 +33,20 @@ #if defined(__linux__) #include <mntent.h> -#include <linux/fs.h> #include <sys/statvfs.h> #include <linux/nvme_ioctl.h> #ifdef CONFIG_LIBUDEV #include <libudev.h> #endif - -#ifdef FIFREEZE -#define CONFIG_FSFREEZE -#endif -#ifdef FITRIM -#define CONFIG_FSTRIM -#endif #endif #ifdef HAVE_GETIFADDRS #include <arpa/inet.h> #include <sys/socket.h> #include <net/if.h> +#include <net/ethernet.h> #include <sys/types.h> -#include <ifaddrs.h> #ifdef CONFIG_SOLARIS #include <sys/sockio.h> #endif @@ -92,6 +82,10 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp) const char *powerdown_flag = "-i5"; const char *halt_flag = "-i0"; const char *reboot_flag = "-i6"; +#elif defined(CONFIG_BSD) + const char *powerdown_flag = "-p"; + const char *halt_flag = "-h"; + const char *reboot_flag = "-r"; #else const char *powerdown_flag = "-P"; const char *halt_flag = "-H"; @@ -122,6 +116,9 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp) #ifdef CONFIG_SOLARIS execl("/sbin/shutdown", "shutdown", shutdown_flag, "-g0", "-y", "hypervisor initiated shutdown", (char *)NULL); +#elif defined(CONFIG_BSD) + execl("/sbin/shutdown", "shutdown", shutdown_flag, "+0", + "hypervisor initiated shutdown", (char *)NULL); #else execl("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0", "hypervisor initiated shutdown", (char *)NULL); @@ -617,20 +614,8 @@ void qmp_guest_file_flush(int64_t handle, Error **errp) } } -/* linux-specific implementations. avoid this if at all possible. */ -#if defined(__linux__) - #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM) -typedef struct FsMount { - char *dirname; - char *devtype; - unsigned int devmajor, devminor; - QTAILQ_ENTRY(FsMount) next; -} FsMount; - -typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList; - -static void free_fs_mount_list(FsMountList *mounts) +void free_fs_mount_list(FsMountList *mounts) { FsMount *mount, *temp; @@ -645,159 +630,158 @@ static void free_fs_mount_list(FsMountList *mounts) g_free(mount); } } +#endif + +#if defined(CONFIG_FSFREEZE) +typedef enum { + FSFREEZE_HOOK_THAW = 0, + FSFREEZE_HOOK_FREEZE, +} FsfreezeHookArg; + +static const char *fsfreeze_hook_arg_string[] = { + "thaw", + "freeze", +}; -static int dev_major_minor(const char *devpath, - unsigned int *devmajor, unsigned int *devminor) +static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp) { - struct stat st; + int status; + pid_t pid; + const char *hook; + const char *arg_str = fsfreeze_hook_arg_string[arg]; + Error *local_err = NULL; + + hook = ga_fsfreeze_hook(ga_state); + if (!hook) { + return; + } + if (access(hook, X_OK) != 0) { + error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook); + return; + } - *devmajor = 0; - *devminor = 0; + slog("executing fsfreeze hook with arg '%s'", arg_str); + pid = fork(); + if (pid == 0) { + setsid(); + reopen_fd_to_null(0); + reopen_fd_to_null(1); + reopen_fd_to_null(2); - if (stat(devpath, &st) < 0) { - slog("failed to stat device file '%s': %s", devpath, strerror(errno)); - return -1; + execl(hook, hook, arg_str, NULL); + _exit(EXIT_FAILURE); + } else if (pid < 0) { + error_setg_errno(errp, errno, "failed to create child process"); + return; } - if (S_ISDIR(st.st_mode)) { - /* It is bind mount */ - return -2; + + ga_wait_child(pid, &status, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } - if (S_ISBLK(st.st_mode)) { - *devmajor = major(st.st_rdev); - *devminor = minor(st.st_rdev); - return 0; + + if (!WIFEXITED(status)) { + error_setg(errp, "fsfreeze hook has terminated abnormally"); + return; + } + + status = WEXITSTATUS(status); + if (status) { + error_setg(errp, "fsfreeze hook has failed with status %d", status); + return; } - return -1; } /* - * Walk the mount table and build a list of local file systems + * Return status of freeze/thaw */ -static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp) +GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp) { - struct mntent *ment; - FsMount *mount; - char const *mtab = "/proc/self/mounts"; - FILE *fp; - unsigned int devmajor, devminor; - - fp = setmntent(mtab, "r"); - if (!fp) { - error_setg(errp, "failed to open mtab file: '%s'", mtab); - return false; + if (ga_is_frozen(ga_state)) { + return GUEST_FSFREEZE_STATUS_FROZEN; } - while ((ment = getmntent(fp))) { - /* - * An entry which device name doesn't start with a '/' is - * either a dummy file system or a network file system. - * Add special handling for smbfs and cifs as is done by - * coreutils as well. - */ - if ((ment->mnt_fsname[0] != '/') || - (strcmp(ment->mnt_type, "smbfs") == 0) || - (strcmp(ment->mnt_type, "cifs") == 0)) { - continue; - } - if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) { - /* Skip bind mounts */ - continue; - } + return GUEST_FSFREEZE_STATUS_THAWED; +} + +int64_t qmp_guest_fsfreeze_freeze(Error **errp) +{ + return qmp_guest_fsfreeze_freeze_list(false, NULL, errp); +} + +int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints, + strList *mountpoints, + Error **errp) +{ + int ret; + FsMountList mounts; + Error *local_err = NULL; - mount = g_new0(FsMount, 1); - mount->dirname = g_strdup(ment->mnt_dir); - mount->devtype = g_strdup(ment->mnt_type); - mount->devmajor = devmajor; - mount->devminor = devminor; + slog("guest-fsfreeze called"); - QTAILQ_INSERT_TAIL(mounts, mount, next); + execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -1; } - endmntent(fp); - return true; -} + QTAILQ_INIT(&mounts); + if (!build_fs_mount_list(&mounts, &local_err)) { + error_propagate(errp, local_err); + return -1; + } -static void decode_mntname(char *name, int len) -{ - int i, j = 0; - for (i = 0; i <= len; i++) { - if (name[i] != '\\') { - name[j++] = name[i]; - } else if (name[i + 1] == '\\') { - name[j++] = '\\'; - i++; - } else if (name[i + 1] >= '0' && name[i + 1] <= '3' && - name[i + 2] >= '0' && name[i + 2] <= '7' && - name[i + 3] >= '0' && name[i + 3] <= '7') { - name[j++] = (name[i + 1] - '0') * 64 + - (name[i + 2] - '0') * 8 + - (name[i + 3] - '0'); - i += 3; - } else { - name[j++] = name[i]; - } + /* cannot risk guest agent blocking itself on a write in this state */ + ga_set_frozen(ga_state); + + ret = qmp_guest_fsfreeze_do_freeze_list(has_mountpoints, mountpoints, + mounts, errp); + + free_fs_mount_list(&mounts); + /* We may not issue any FIFREEZE here. + * Just unset ga_state here and ready for the next call. + */ + if (ret == 0) { + ga_unset_frozen(ga_state); + } else if (ret < 0) { + qmp_guest_fsfreeze_thaw(NULL); } + return ret; } -static bool build_fs_mount_list(FsMountList *mounts, Error **errp) +int64_t qmp_guest_fsfreeze_thaw(Error **errp) { - FsMount *mount; - char const *mountinfo = "/proc/self/mountinfo"; - FILE *fp; - char *line = NULL, *dash; - size_t n; - char check; - unsigned int devmajor, devminor; - int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e; + int ret; - fp = fopen(mountinfo, "r"); - if (!fp) { - return build_fs_mount_list_from_mtab(mounts, errp); + ret = qmp_guest_fsfreeze_do_thaw(errp); + if (ret >= 0) { + ga_unset_frozen(ga_state); + execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp); + } else { + ret = 0; } - while (getline(&line, &n, fp) != -1) { - ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c", - &devmajor, &devminor, &dir_s, &dir_e, &check); - if (ret < 3) { - continue; - } - dash = strstr(line + dir_e, " - "); - if (!dash) { - continue; - } - ret = sscanf(dash, " - %n%*s%n %n%*s%n%c", - &type_s, &type_e, &dev_s, &dev_e, &check); - if (ret < 1) { - continue; - } - line[dir_e] = 0; - dash[type_e] = 0; - dash[dev_e] = 0; - decode_mntname(line + dir_s, dir_e - dir_s); - decode_mntname(dash + dev_s, dev_e - dev_s); - if (devmajor == 0) { - /* btrfs reports major number = 0 */ - if (strcmp("btrfs", dash + type_s) != 0 || - dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) { - continue; - } - } + return ret; +} - mount = g_new0(FsMount, 1); - mount->dirname = g_strdup(line + dir_s); - mount->devtype = g_strdup(dash + type_s); - mount->devmajor = devmajor; - mount->devminor = devminor; +static void guest_fsfreeze_cleanup(void) +{ + Error *err = NULL; - QTAILQ_INSERT_TAIL(mounts, mount, next); + if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) { + qmp_guest_fsfreeze_thaw(&err); + if (err) { + slog("failed to clean up frozen filesystems: %s", + error_get_pretty(err)); + error_free(err); + } } - free(line); - - fclose(fp); - return true; } #endif +/* linux-specific implementations. avoid this if at all possible. */ +#if defined(__linux__) #if defined(CONFIG_FSFREEZE) static char *get_pci_driver(char const *syspath, int pathlen, Error **errp) @@ -1621,248 +1605,6 @@ GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp) free_fs_mount_list(&mounts); return ret; } - - -typedef enum { - FSFREEZE_HOOK_THAW = 0, - FSFREEZE_HOOK_FREEZE, -} FsfreezeHookArg; - -static const char *fsfreeze_hook_arg_string[] = { - "thaw", - "freeze", -}; - -static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp) -{ - int status; - pid_t pid; - const char *hook; - const char *arg_str = fsfreeze_hook_arg_string[arg]; - Error *local_err = NULL; - - hook = ga_fsfreeze_hook(ga_state); - if (!hook) { - return; - } - if (access(hook, X_OK) != 0) { - error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook); - return; - } - - slog("executing fsfreeze hook with arg '%s'", arg_str); - pid = fork(); - if (pid == 0) { - setsid(); - reopen_fd_to_null(0); - reopen_fd_to_null(1); - reopen_fd_to_null(2); - - execl(hook, hook, arg_str, NULL); - _exit(EXIT_FAILURE); - } else if (pid < 0) { - error_setg_errno(errp, errno, "failed to create child process"); - return; - } - - ga_wait_child(pid, &status, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - if (!WIFEXITED(status)) { - error_setg(errp, "fsfreeze hook has terminated abnormally"); - return; - } - - status = WEXITSTATUS(status); - if (status) { - error_setg(errp, "fsfreeze hook has failed with status %d", status); - return; - } -} - -/* - * Return status of freeze/thaw - */ -GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp) -{ - if (ga_is_frozen(ga_state)) { - return GUEST_FSFREEZE_STATUS_FROZEN; - } - - return GUEST_FSFREEZE_STATUS_THAWED; -} - -int64_t qmp_guest_fsfreeze_freeze(Error **errp) -{ - return qmp_guest_fsfreeze_freeze_list(false, NULL, errp); -} - -/* - * Walk list of mounted file systems in the guest, and freeze the ones which - * are real local file systems. - */ -int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints, - strList *mountpoints, - Error **errp) -{ - int ret = 0, i = 0; - strList *list; - FsMountList mounts; - struct FsMount *mount; - Error *local_err = NULL; - int fd; - - slog("guest-fsfreeze called"); - - execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return -1; - } - - QTAILQ_INIT(&mounts); - if (!build_fs_mount_list(&mounts, &local_err)) { - error_propagate(errp, local_err); - return -1; - } - - /* cannot risk guest agent blocking itself on a write in this state */ - ga_set_frozen(ga_state); - - QTAILQ_FOREACH_REVERSE(mount, &mounts, next) { - /* To issue fsfreeze in the reverse order of mounts, check if the - * mount is listed in the list here */ - if (has_mountpoints) { - for (list = mountpoints; list; list = list->next) { - if (strcmp(list->value, mount->dirname) == 0) { - break; - } - } - if (!list) { - continue; - } - } - - fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0); - if (fd == -1) { - error_setg_errno(errp, errno, "failed to open %s", mount->dirname); - goto error; - } - - /* we try to cull filesystems we know won't work in advance, but other - * filesystems may not implement fsfreeze for less obvious reasons. - * these will report EOPNOTSUPP. we simply ignore these when tallying - * the number of frozen filesystems. - * if a filesystem is mounted more than once (aka bind mount) a - * consecutive attempt to freeze an already frozen filesystem will - * return EBUSY. - * - * any other error means a failure to freeze a filesystem we - * expect to be freezable, so return an error in those cases - * and return system to thawed state. - */ - ret = ioctl(fd, FIFREEZE); - if (ret == -1) { - if (errno != EOPNOTSUPP && errno != EBUSY) { - error_setg_errno(errp, errno, "failed to freeze %s", - mount->dirname); - close(fd); - goto error; - } - } else { - i++; - } - close(fd); - } - - free_fs_mount_list(&mounts); - /* We may not issue any FIFREEZE here. - * Just unset ga_state here and ready for the next call. - */ - if (i == 0) { - ga_unset_frozen(ga_state); - } - return i; - -error: - free_fs_mount_list(&mounts); - qmp_guest_fsfreeze_thaw(NULL); - return 0; -} - -/* - * Walk list of frozen file systems in the guest, and thaw them. - */ -int64_t qmp_guest_fsfreeze_thaw(Error **errp) -{ - int ret; - FsMountList mounts; - FsMount *mount; - int fd, i = 0, logged; - Error *local_err = NULL; - - QTAILQ_INIT(&mounts); - if (!build_fs_mount_list(&mounts, &local_err)) { - error_propagate(errp, local_err); - return 0; - } - - QTAILQ_FOREACH(mount, &mounts, next) { - logged = false; - fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0); - if (fd == -1) { - continue; - } - /* we have no way of knowing whether a filesystem was actually unfrozen - * as a result of a successful call to FITHAW, only that if an error - * was returned the filesystem was *not* unfrozen by that particular - * call. - * - * since multiple preceding FIFREEZEs require multiple calls to FITHAW - * to unfreeze, continuing issuing FITHAW until an error is returned, - * in which case either the filesystem is in an unfreezable state, or, - * more likely, it was thawed previously (and remains so afterward). - * - * also, since the most recent successful call is the one that did - * the actual unfreeze, we can use this to provide an accurate count - * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which - * may * be useful for determining whether a filesystem was unfrozen - * during the freeze/thaw phase by a process other than qemu-ga. - */ - do { - ret = ioctl(fd, FITHAW); - if (ret == 0 && !logged) { - i++; - logged = true; - } - } while (ret == 0); - close(fd); - } - - ga_unset_frozen(ga_state); - free_fs_mount_list(&mounts); - - execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp); - - return i; -} - -static void guest_fsfreeze_cleanup(void) -{ - Error *err = NULL; - - if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) { - qmp_guest_fsfreeze_thaw(&err); - if (err) { - slog("failed to clean up frozen filesystems: %s", - error_get_pretty(err)); - error_free(err); - } - } -} #endif /* CONFIG_FSFREEZE */ #if defined(CONFIG_FSTRIM) @@ -2372,7 +2114,9 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp) return processed; } +#endif /* __linux__ */ +#if defined(__linux__) || defined(__FreeBSD__) void qmp_guest_set_user_password(const char *username, const char *password, bool crypted, @@ -2406,10 +2150,15 @@ void qmp_guest_set_user_password(const char *username, goto out; } +#ifdef __FreeBSD__ + chpasswddata = g_strdup(rawpasswddata); + passwd_path = g_find_program_in_path("pw"); +#else chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata); - chpasswdlen = strlen(chpasswddata); - passwd_path = g_find_program_in_path("chpasswd"); +#endif + + chpasswdlen = strlen(chpasswddata); if (!passwd_path) { error_setg(errp, "cannot find 'passwd' program in PATH"); @@ -2430,11 +2179,17 @@ void qmp_guest_set_user_password(const char *username, reopen_fd_to_null(1); reopen_fd_to_null(2); +#ifdef __FreeBSD__ + const char *h_arg; + h_arg = (crypted) ? "-H" : "-h"; + execl(passwd_path, "pw", "usermod", "-n", username, h_arg, "0", NULL); +#else if (crypted) { execl(passwd_path, "chpasswd", "-e", NULL); } else { execl(passwd_path, "chpasswd", NULL); } +#endif _exit(EXIT_FAILURE); } else if (pid < 0) { error_setg_errno(errp, errno, "failed to create child process"); @@ -2477,7 +2232,17 @@ out: close(datafd[1]); } } +#else /* __linux__ || __FreeBSD__ */ +void qmp_guest_set_user_password(const char *username, + const char *password, + bool crypted, + Error **errp) +{ + error_setg(errp, QERR_UNSUPPORTED); +} +#endif /* __linux__ || __FreeBSD__ */ +#ifdef __linux__ static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf, int size, Error **errp) { @@ -3014,14 +2779,6 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp) return -1; } -void qmp_guest_set_user_password(const char *username, - const char *password, - bool crypted, - Error **errp) -{ - error_setg(errp, QERR_UNSUPPORTED); -} - GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp) { error_setg(errp, QERR_UNSUPPORTED); @@ -3124,6 +2881,57 @@ static int guest_get_network_stats(const char *name, return -1; } +#ifndef __FreeBSD__ +/* + * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a + * buffer with ETHER_ADDR_LEN length at least. + * + * Returns false in case of an error, otherwise true. "obtained" argument + * is true if a MAC address was obtained successful, otherwise false. + */ +bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf, + bool *obtained, Error **errp) +{ + struct ifreq ifr; + int sock; + + *obtained = false; + + /* we haven't obtained HW address yet */ + sock = socket(PF_INET, SOCK_STREAM, 0); + if (sock == -1) { + error_setg_errno(errp, errno, "failed to create socket"); + return false; + } + + memset(&ifr, 0, sizeof(ifr)); + pstrcpy(ifr.ifr_name, IF_NAMESIZE, ifa->ifa_name); + if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) { + /* + * We can't get the hw addr of this interface, but that's not a + * fatal error. + */ + if (errno == EADDRNOTAVAIL) { + /* The interface doesn't have a hw addr (e.g. loopback). */ + g_debug("failed to get MAC address of %s: %s", + ifa->ifa_name, strerror(errno)); + } else{ + g_warning("failed to get MAC address of %s: %s", + ifa->ifa_name, strerror(errno)); + } + } else { +#ifdef CONFIG_SOLARIS + memcpy(buf, &ifr.ifr_addr.sa_data, ETHER_ADDR_LEN); +#else + memcpy(buf, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); +#endif + *obtained = true; + } + close(sock); + return true; +} +#endif /* __FreeBSD__ */ + /* * Build information about guest interfaces */ @@ -3144,9 +2952,8 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp) GuestNetworkInterfaceStat *interface_stat = NULL; char addr4[INET_ADDRSTRLEN]; char addr6[INET6_ADDRSTRLEN]; - int sock; - struct ifreq ifr; - unsigned char *mac_addr; + unsigned char mac_addr[ETHER_ADDR_LEN]; + bool obtained; void *p; g_debug("Processing %s interface", ifa->ifa_name); @@ -3161,45 +2968,17 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp) } if (!info->has_hardware_address) { - /* we haven't obtained HW address yet */ - sock = socket(PF_INET, SOCK_STREAM, 0); - if (sock == -1) { - error_setg_errno(errp, errno, "failed to create socket"); + if (!guest_get_hw_addr(ifa, mac_addr, &obtained, errp)) { goto error; } - - memset(&ifr, 0, sizeof(ifr)); - pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->name); - if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) { - /* - * We can't get the hw addr of this interface, but that's not a - * fatal error. Don't set info->hardware_address, but keep - * going. - */ - if (errno == EADDRNOTAVAIL) { - /* The interface doesn't have a hw addr (e.g. loopback). */ - g_debug("failed to get MAC address of %s: %s", - ifa->ifa_name, strerror(errno)); - } else{ - g_warning("failed to get MAC address of %s: %s", - ifa->ifa_name, strerror(errno)); - } - - } else { -#ifdef CONFIG_SOLARIS - mac_addr = (unsigned char *) &ifr.ifr_addr.sa_data; -#else - mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data; -#endif + if (obtained) { info->hardware_address = g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x", (int) mac_addr[0], (int) mac_addr[1], (int) mac_addr[2], (int) mac_addr[3], (int) mac_addr[4], (int) mac_addr[5]); - info->has_hardware_address = true; } - close(sock); } if (ifa->ifa_addr && diff --git a/qga/main.c b/qga/main.c index 5a9d8252e0..b3580508fa 100644 --- a/qga/main.c +++ b/qga/main.c @@ -37,17 +37,16 @@ #include "qga/service-win32.h" #include "qga/vss-win32.h" #endif -#ifdef __linux__ -#include <linux/fs.h> -#ifdef FIFREEZE -#define CONFIG_FSFREEZE -#endif -#endif +#include "commands-common.h" #ifndef _WIN32 +#ifdef __FreeBSD__ +#define QGA_VIRTIO_PATH_DEFAULT "/dev/vtcon/org.qemu.guest_agent.0" +#else /* __FreeBSD__ */ #define QGA_VIRTIO_PATH_DEFAULT "/dev/virtio-ports/org.qemu.guest_agent.0" -#define QGA_STATE_RELATIVE_DIR "run" +#endif /* __FreeBSD__ */ #define QGA_SERIAL_PATH_DEFAULT "/dev/ttyS0" +#define QGA_STATE_RELATIVE_DIR "run" #else #define QGA_VIRTIO_PATH_DEFAULT "\\\\.\\Global\\org.qemu.guest_agent.0" #define QGA_STATE_RELATIVE_DIR "qemu-ga" diff --git a/qga/meson.build b/qga/meson.build index a0ffd6d268..3cfb9166e5 100644 --- a/qga/meson.build +++ b/qga/meson.build @@ -72,6 +72,12 @@ qga_ss.add(when: 'CONFIG_POSIX', if_true: files( 'commands-posix.c', 'commands-posix-ssh.c', )) +qga_ss.add(when: 'CONFIG_LINUX', if_true: files( + 'commands-linux.c', +)) +qga_ss.add(when: 'CONFIG_BSD', if_true: files( + 'commands-bsd.c', +)) qga_ss.add(when: 'CONFIG_WIN32', if_true: files( 'channel-win32.c', 'commands-win32.c', diff --git a/qom/object.c b/qom/object.c index d34608558e..e5cef30f6d 100644 --- a/qom/object.c +++ b/qom/object.c @@ -1383,7 +1383,8 @@ bool object_property_get(Object *obj, const char *name, Visitor *v, } if (!prop->get) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property '%s.%s' is not readable", + object_get_typename(obj), name); return false; } prop->get(obj, v, name, prop->opaque, &err); @@ -1402,7 +1403,8 @@ bool object_property_set(Object *obj, const char *name, Visitor *v, } if (!prop->set) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property '%s.%s' is not writable", + object_get_typename(obj), name); return false; } prop->set(obj, v, name, prop->opaque, errp); diff --git a/scripts/git-submodule.sh b/scripts/git-submodule.sh index e225d3a963..7be41f5948 100755 --- a/scripts/git-submodule.sh +++ b/scripts/git-submodule.sh @@ -51,6 +51,12 @@ validate_error() { exit 1 } +if test -n "$maybe_modules" && ! test -e ".git" +then + echo "$0: unexpectedly called with submodules but no git checkout exists" + exit 1 +fi + modules="" for m in $maybe_modules do @@ -63,12 +69,6 @@ do fi done -if test -n "$maybe_modules" && ! test -e ".git" -then - echo "$0: unexpectedly called with submodules but no git checkout exists" - exit 1 -fi - case "$command" in status|validate) if test -z "$maybe_modules" diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index eb3267bef5..2cb0de5601 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -67,6 +67,7 @@ meson_options_help() { printf "%s\n" ' auth-pam PAM access control' printf "%s\n" ' avx2 AVX2 optimizations' printf "%s\n" ' avx512f AVX512F optimizations' + printf "%s\n" ' blkio libblkio block device driver' printf "%s\n" ' bochs bochs image format support' printf "%s\n" ' bpf eBPF support' printf "%s\n" ' brlapi brlapi character device driver' @@ -198,6 +199,8 @@ _meson_option_parse() { --disable-gcov) printf "%s" -Db_coverage=false ;; --enable-lto) printf "%s" -Db_lto=true ;; --disable-lto) printf "%s" -Db_lto=false ;; + --enable-blkio) printf "%s" -Dblkio=enabled ;; + --disable-blkio) printf "%s" -Dblkio=disabled ;; --block-drv-ro-whitelist=*) quote_sh "-Dblock_drv_ro_whitelist=$2" ;; --block-drv-rw-whitelist=*) quote_sh "-Dblock_drv_rw_whitelist=$2" ;; --enable-block-drv-whitelist-in-tools) printf "%s" -Dblock_drv_whitelist_in_tools=true ;; diff --git a/softmmu/cpus.c b/softmmu/cpus.c index 61b27ff59d..5a584a8d57 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -354,7 +354,7 @@ static void qemu_init_sigbus(void) /* * ALERT: when modifying this, take care that SIGBUS forwarding in - * os_mem_prealloc() will continue working as expected. + * qemu_prealloc_mem() will continue working as expected. */ memset(&action, 0, sizeof(action)); action.sa_flags = SA_SIGINFO; @@ -437,18 +437,19 @@ void qemu_wait_io_event(CPUState *cpu) void cpus_kick_thread(CPUState *cpu) { -#ifndef _WIN32 - int err; - if (cpu->thread_kicked) { return; } cpu->thread_kicked = true; - err = pthread_kill(cpu->thread->thread, SIG_IPI); + +#ifndef _WIN32 + int err = pthread_kill(cpu->thread->thread, SIG_IPI); if (err && err != ESRCH) { fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); exit(1); } +#else + qemu_sem_post(&cpu->sem); #endif } diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c index ce74f3d48d..30aa3aea9f 100644 --- a/softmmu/device_tree.c +++ b/softmmu/device_tree.c @@ -22,6 +22,7 @@ #include "qemu/option.h" #include "qemu/bswap.h" #include "qemu/cutils.h" +#include "qemu/guest-random.h" #include "sysemu/device_tree.h" #include "hw/loader.h" #include "hw/boards.h" @@ -680,3 +681,23 @@ void hmp_dumpdtb(Monitor *mon, const QDict *qdict) info_report("dtb dumped to %s", filename); } + +void qemu_fdt_randomize_seeds(void *fdt) +{ + int noffset, poffset, len; + const char *name; + uint8_t *data; + + for (noffset = fdt_next_node(fdt, 0, NULL); + noffset >= 0; + noffset = fdt_next_node(fdt, noffset, NULL)) { + for (poffset = fdt_first_property_offset(fdt, noffset); + poffset >= 0; + poffset = fdt_next_property_offset(fdt, poffset)) { + data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len); + if (!data || strcmp(name, "rng-seed")) + continue; + qemu_guest_getrandom_nofail(data, len); + } + } +} diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 56e03e07b5..d9578ccfd4 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1748,6 +1748,11 @@ void qemu_ram_unset_migratable(RAMBlock *rb) rb->flags &= ~RAM_MIGRATABLE; } +int qemu_ram_get_fd(RAMBlock *rb) +{ + return rb->fd; +} + /* Called with iothread lock held. */ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) { diff --git a/softmmu/qemu-seccomp.c b/softmmu/qemu-seccomp.c index deaf8a4ef5..d66a2a1226 100644 --- a/softmmu/qemu-seccomp.c +++ b/softmmu/qemu-seccomp.c @@ -312,6 +312,19 @@ static int seccomp_start(uint32_t seccomp_opts, Error **errp) goto seccomp_return; } +#if defined(CONFIG_SECCOMP_SYSRAWRC) + /* + * This must be the first seccomp_attr_set() call to have full + * error propagation from subsequent seccomp APIs. + */ + rc = seccomp_attr_set(ctx, SCMP_FLTATR_API_SYSRAWRC, 1); + if (rc != 0) { + error_setg_errno(errp, -rc, + "failed to set seccomp rawrc attribute"); + goto seccomp_return; + } +#endif + rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); if (rc != 0) { error_setg_errno(errp, -rc, diff --git a/softmmu/qtest.c b/softmmu/qtest.c index f8acef2628..afea7693d0 100644 --- a/softmmu/qtest.c +++ b/softmmu/qtest.c @@ -977,7 +977,7 @@ static void qtest_set_log(Object *obj, const char *value, Error **errp) QTest *q = QTEST(obj); if (qtest == q) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property 'log' can not be set now"); } else { g_free(q->log); q->log = g_strdup(value); @@ -997,7 +997,7 @@ static void qtest_set_chardev(Object *obj, const char *value, Error **errp) Chardev *chr; if (qtest == q) { - error_setg(errp, QERR_PERMISSION_DENIED); + error_setg(errp, "Property 'chardev' can not be set now"); return; } diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 1e68680b9d..3dd83d5e5d 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -441,11 +441,16 @@ void qemu_system_reset(ShutdownCause reason) cpu_synchronize_all_states(); if (mc && mc->reset) { - mc->reset(current_machine); + mc->reset(current_machine, reason); } else { - qemu_devices_reset(); + qemu_devices_reset(reason); } - if (reason && reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) { + switch (reason) { + case SHUTDOWN_CAUSE_NONE: + case SHUTDOWN_CAUSE_SUBSYSTEM_RESET: + case SHUTDOWN_CAUSE_SNAPSHOT_LOAD: + break; + default: qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } cpu_synchronize_all_post_reset(); diff --git a/softmmu/vl.c b/softmmu/vl.c index b464da25bc..5115221efe 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1760,6 +1760,27 @@ static void object_option_parse(const char *optarg) } /* + * Very early object creation, before the sandbox options have been activated. + */ +static bool object_create_pre_sandbox(const char *type) +{ + /* + * Objects should in general not get initialized "too early" without + * a reason. If you add one, state the reason in a comment! + */ + + /* + * Reason: -sandbox on,resourcecontrol=deny disallows setting CPU + * affinity of threads. + */ + if (g_str_equal(type, "thread-context")) { + return true; + } + + return false; +} + +/* * Initial object creation happens before all other * QEMU data types are created. The majority of objects * can be created at this point. The rng-egd object @@ -1773,6 +1794,11 @@ static bool object_create_early(const char *type) * add one, state the reason in a comment! */ + /* Reason: already created. */ + if (object_create_pre_sandbox(type)) { + return false; + } + /* Reason: property "chardev" */ if (g_str_equal(type, "rng-egd") || g_str_equal(type, "qtest")) { @@ -1895,7 +1921,7 @@ static void qemu_create_early_backends(void) */ static bool object_create_late(const char *type) { - return !object_create_early(type); + return !object_create_early(type) && !object_create_pre_sandbox(type); } static void qemu_create_late_backends(void) @@ -1904,7 +1930,7 @@ static void qemu_create_late_backends(void) qtest_server_init(qtest_chrdev, qtest_log, &error_fatal); } - net_init_clients(&error_fatal); + net_init_clients(); object_option_foreach_add(object_create_late); @@ -2351,6 +2377,11 @@ static int process_runstate_actions(void *opaque, QemuOpts *opts, Error **errp) static void qemu_process_early_options(void) { + qemu_opts_foreach(qemu_find_opts("name"), + parse_name, NULL, &error_fatal); + + object_option_foreach_add(object_create_pre_sandbox); + #ifdef CONFIG_SECCOMP QemuOptsList *olist = qemu_find_opts_err("sandbox", NULL); if (olist) { @@ -2358,9 +2389,6 @@ static void qemu_process_early_options(void) } #endif - qemu_opts_foreach(qemu_find_opts("name"), - parse_name, NULL, &error_fatal); - if (qemu_opts_foreach(qemu_find_opts("action"), process_runstate_actions, NULL, &error_fatal)) { exit(1); @@ -2801,21 +2829,19 @@ void qemu_init(int argc, char **argv) break; case QEMU_OPTION_netdev: default_net = 0; - if (net_client_parse(qemu_find_opts("netdev"), optarg) == -1) { - exit(1); + if (netdev_is_modern(optarg)) { + netdev_parse_modern(optarg); + } else { + net_client_parse(qemu_find_opts("netdev"), optarg); } break; case QEMU_OPTION_nic: default_net = 0; - if (net_client_parse(qemu_find_opts("nic"), optarg) == -1) { - exit(1); - } + net_client_parse(qemu_find_opts("nic"), optarg); break; case QEMU_OPTION_net: default_net = 0; - if (net_client_parse(qemu_find_opts("net"), optarg) == -1) { - exit(1); - } + net_client_parse(qemu_find_opts("net"), optarg); break; #ifdef CONFIG_LIBISCSI case QEMU_OPTION_iscsi: diff --git a/stubs/meson.build b/stubs/meson.build index d8f3fd5c44..4314161f5f 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -29,6 +29,7 @@ stub_ss.add(files('migr-blocker.c')) stub_ss.add(files('module-opts.c')) stub_ss.add(files('monitor.c')) stub_ss.add(files('monitor-core.c')) +stub_ss.add(files('physmem.c')) stub_ss.add(files('qemu-timer-notify-cb.c')) stub_ss.add(files('qmp_memory_device.c')) stub_ss.add(files('qmp-command-available.c')) diff --git a/stubs/physmem.c b/stubs/physmem.c new file mode 100644 index 0000000000..1fc5f2df29 --- /dev/null +++ b/stubs/physmem.c @@ -0,0 +1,13 @@ +#include "qemu/osdep.h" +#include "exec/cpu-common.h" + +RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, + ram_addr_t *offset) +{ + return NULL; +} + +int qemu_ram_get_fd(RAMBlock *rb) +{ + return -1; +} diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 0a7bfbf999..a021df9e9e 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -587,14 +587,24 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, if ((target_el > cur_el) && (target_el != 1)) { /* Exceptions targeting a higher EL may not be maskable */ if (arm_feature(env, ARM_FEATURE_AARCH64)) { - /* - * 64-bit masking rules are simple: exceptions to EL3 - * can't be masked, and exceptions to EL2 can only be - * masked from Secure state. The HCR and SCR settings - * don't affect the masking logic, only the interrupt routing. - */ - if (target_el == 3 || !secure || (env->cp15.scr_el3 & SCR_EEL2)) { + switch (target_el) { + case 2: + /* + * According to ARM DDI 0487H.a, an interrupt can be masked + * when HCR_E2H and HCR_TGE are both set regardless of the + * current Security state. Note that we need to revisit this + * part again once we need to support NMI. + */ + if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + unmasked = true; + } + break; + case 3: + /* Interrupt cannot be masked when the target EL is 3 */ unmasked = true; + break; + default: + g_assert_not_reached(); } } else { /* diff --git a/target/arm/cpu.h b/target/arm/cpu.h index db9ec6a038..9aeed3c848 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -4147,6 +4147,21 @@ static inline bool isar_feature_aa64_lva(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0; } +static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0; +} + +static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; +} + +static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2; +} + static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 85e0d1daf1..3d74f134f5 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -1165,6 +1165,7 @@ static void aarch64_max_initfn(Object *obj) cpu->isar.id_aa64mmfr0 = t; t = cpu->isar.id_aa64mmfr1; + t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */ t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */ t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* FEAT_HPDS */ @@ -1185,6 +1186,7 @@ static void aarch64_max_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */ t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */ t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */ + t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ cpu->isar.id_aa64mmfr2 = t; t = cpu->isar.id_aa64zfr0; diff --git a/target/arm/helper.c b/target/arm/helper.c index c672903f43..b070a20f1a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -10352,7 +10352,7 @@ int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) { if (regime_has_2_ranges(mmu_idx)) { return extract64(tcr, 37, 2); - } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + } else if (regime_is_stage2(mmu_idx)) { return 0; /* VTCR_EL2 */ } else { /* Replicate the single TBI bit so we always have 2 bits. */ @@ -10364,7 +10364,7 @@ int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) { if (regime_has_2_ranges(mmu_idx)) { return extract64(tcr, 51, 2); - } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + } else if (regime_is_stage2(mmu_idx)) { return 0; /* VTCR_EL2 */ } else { /* Replicate the single TBID bit so we always have 2 bits. */ @@ -10470,11 +10470,11 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data) { uint64_t tcr = regime_tcr(env, mmu_idx); - bool epd, hpd, tsz_oob, ds; + bool epd, hpd, tsz_oob, ds, ha, hd; int select, tsz, tbi, max_tsz, min_tsz, ps, sh; ARMGranuleSize gran; ARMCPU *cpu = env_archcpu(env); - bool stage2 = mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; + bool stage2 = regime_is_stage2(mmu_idx); if (!regime_has_2_ranges(mmu_idx)) { select = 0; @@ -10489,8 +10489,12 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, epd = false; sh = extract32(tcr, 12, 2); ps = extract32(tcr, 16, 3); + ha = extract32(tcr, 21, 1) && cpu_isar_feature(aa64_hafs, cpu); + hd = extract32(tcr, 22, 1) && cpu_isar_feature(aa64_hdbs, cpu); ds = extract64(tcr, 32, 1); } else { + bool e0pd; + /* * Bit 55 is always between the two regions, and is canonical for * determining if address tagging is enabled. @@ -10502,15 +10506,24 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, epd = extract32(tcr, 7, 1); sh = extract32(tcr, 12, 2); hpd = extract64(tcr, 41, 1); + e0pd = extract64(tcr, 55, 1); } else { tsz = extract32(tcr, 16, 6); gran = tg1_to_gran_size(extract32(tcr, 30, 2)); epd = extract32(tcr, 23, 1); sh = extract32(tcr, 28, 2); hpd = extract64(tcr, 42, 1); + e0pd = extract64(tcr, 56, 1); } ps = extract64(tcr, 32, 3); + ha = extract64(tcr, 39, 1) && cpu_isar_feature(aa64_hafs, cpu); + hd = extract64(tcr, 40, 1) && cpu_isar_feature(aa64_hdbs, cpu); ds = extract64(tcr, 59, 1); + + if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) && + regime_is_user(env, mmu_idx)) { + epd = true; + } } gran = sanitize_gran_size(cpu, gran, stage2); @@ -10532,22 +10545,18 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, } ds = false; } else if (ds) { - switch (mmu_idx) { - case ARMMMUIdx_Stage2: - case ARMMMUIdx_Stage2_S: + if (regime_is_stage2(mmu_idx)) { if (gran == Gran16K) { ds = cpu_isar_feature(aa64_tgran16_2_lpa2, cpu); } else { ds = cpu_isar_feature(aa64_tgran4_2_lpa2, cpu); } - break; - default: + } else { if (gran == Gran16K) { ds = cpu_isar_feature(aa64_tgran16_lpa2, cpu); } else { ds = cpu_isar_feature(aa64_tgran4_lpa2, cpu); } - break; } if (ds) { min_tsz = 12; @@ -10581,6 +10590,8 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, .hpd = hpd, .tsz_oob = tsz_oob, .ds = ds, + .ha = ha, + .hd = ha && hd, .gran = gran, }; } diff --git a/target/arm/internals.h b/target/arm/internals.h index b26c9ca17b..d9121d9ff8 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -338,6 +338,7 @@ typedef enum ARMFaultType { ARMFault_AsyncExternal, ARMFault_Debug, ARMFault_TLBConflict, + ARMFault_UnsuppAtomicUpdate, ARMFault_Lockdown, ARMFault_Exclusive, ARMFault_ICacheMaint, @@ -524,6 +525,9 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi) case ARMFault_TLBConflict: fsc = 0x30; break; + case ARMFault_UnsuppAtomicUpdate: + fsc = 0x31; + break; case ARMFault_Lockdown: fsc = 0x34; break; @@ -673,6 +677,11 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) } } +static inline bool regime_is_stage2(ARMMMUIdx mmu_idx) +{ + return mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; +} + /* Return the exception level which controls this address translation regime */ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) { @@ -707,6 +716,25 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) } } +static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_MUser: + case ARMMMUIdx_MSUser: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MSUserNegPri: + return true; + default: + return false; + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + g_assert_not_reached(); + } +} + /* Return the SCTLR value which controls this address translation regime */ static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) { @@ -1041,6 +1069,8 @@ typedef struct ARMVAParameters { bool hpd : 1; bool tsz_oob : 1; /* tsz has been clamped to legal range */ bool ds : 1; + bool ha : 1; + bool hd : 1; ARMGranuleSize gran : 2; } ARMVAParameters; diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 6c5ed56a10..58a7bbda50 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -9,6 +9,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/range.h" +#include "qemu/main-loop.h" #include "exec/exec-all.h" #include "cpu.h" #include "internals.h" @@ -17,10 +18,13 @@ typedef struct S1Translate { ARMMMUIdx in_mmu_idx; + ARMMMUIdx in_ptw_idx; bool in_secure; bool in_debug; bool out_secure; + bool out_rw; bool out_be; + hwaddr out_virt; hwaddr out_phys; void *out_host; } S1Translate; @@ -104,25 +108,6 @@ static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; } -static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSUser: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MSUserNegPri: - return true; - default: - return false; - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - g_assert_not_reached(); - } -} - /* Return the TTBR associated with this translation regime */ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) { @@ -233,33 +218,26 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, { bool is_secure = ptw->in_secure; ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - ARMMMUIdx s2_mmu_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; - bool s2_phys = false; + ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx; uint8_t pte_attrs; bool pte_secure; - if (!arm_mmu_idx_is_stage1_of_2(mmu_idx) - || regime_translation_disabled(env, s2_mmu_idx, is_secure)) { - s2_mmu_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; - s2_phys = true; - } + ptw->out_virt = addr; if (unlikely(ptw->in_debug)) { /* * From gdbstub, do not use softmmu so that we don't modify the * state of the cpu at all, including softmmu tlb contents. */ - if (s2_phys) { - ptw->out_phys = addr; - pte_attrs = 0; - pte_secure = is_secure; - } else { + if (regime_is_stage2(s2_mmu_idx)) { S1Translate s2ptw = { .in_mmu_idx = s2_mmu_idx, + .in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS, .in_secure = is_secure, .in_debug = true, }; GetPhysAddrResult s2 = { }; + if (!get_phys_addr_lpae(env, &s2ptw, addr, MMU_DATA_LOAD, false, &s2, fi)) { goto fail; @@ -267,8 +245,14 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, ptw->out_phys = s2.f.phys_addr; pte_attrs = s2.cacheattrs.attrs; pte_secure = s2.f.attrs.secure; + } else { + /* Regime is physical. */ + ptw->out_phys = addr; + pte_attrs = 0; + pte_secure = is_secure; } ptw->out_host = NULL; + ptw->out_rw = false; } else { CPUTLBEntryFull *full; int flags; @@ -283,11 +267,12 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, goto fail; } ptw->out_phys = full->phys_addr; + ptw->out_rw = full->prot & PAGE_WRITE; pte_attrs = full->pte_attrs; pte_secure = full->attrs.secure; } - if (!s2_phys) { + if (regime_is_stage2(s2_mmu_idx)) { uint64_t hcr = arm_hcr_el2_eff_secstate(env, is_secure); if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) { @@ -322,24 +307,20 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, } /* All loads done in the course of a page table walk go through here. */ -static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr, +static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { CPUState *cs = env_cpu(env); + void *host = ptw->out_host; uint32_t data; - if (!S1_ptw_translate(env, ptw, addr, fi)) { - /* Failure. */ - assert(fi->s1ptw); - return 0; - } - - if (likely(ptw->out_host)) { + if (likely(host)) { /* Page tables are in RAM, and we have the host address. */ + data = qatomic_read((uint32_t *)host); if (ptw->out_be) { - data = ldl_be_p(ptw->out_host); + data = be32_to_cpu(data); } else { - data = ldl_le_p(ptw->out_host); + data = le32_to_cpu(data); } } else { /* Page tables are in MMIO. */ @@ -361,25 +342,29 @@ static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr, return data; } -static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr, +static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { CPUState *cs = env_cpu(env); + void *host = ptw->out_host; uint64_t data; - if (!S1_ptw_translate(env, ptw, addr, fi)) { - /* Failure. */ - assert(fi->s1ptw); - return 0; - } - - if (likely(ptw->out_host)) { + if (likely(host)) { /* Page tables are in RAM, and we have the host address. */ +#ifdef CONFIG_ATOMIC64 + data = qatomic_read__nocheck((uint64_t *)host); + if (ptw->out_be) { + data = be64_to_cpu(data); + } else { + data = le64_to_cpu(data); + } +#else if (ptw->out_be) { - data = ldq_be_p(ptw->out_host); + data = ldq_be_p(host); } else { - data = ldq_le_p(ptw->out_host); + data = ldq_le_p(host); } +#endif } else { /* Page tables are in MMIO. */ MemTxAttrs attrs = { .secure = ptw->out_secure }; @@ -400,6 +385,91 @@ static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr, return data; } +static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, + uint64_t new_val, S1Translate *ptw, + ARMMMUFaultInfo *fi) +{ + uint64_t cur_val; + void *host = ptw->out_host; + + if (unlikely(!host)) { + fi->type = ARMFault_UnsuppAtomicUpdate; + fi->s1ptw = true; + return 0; + } + + /* + * Raising a stage2 Protection fault for an atomic update to a read-only + * page is delayed until it is certain that there is a change to make. + */ + if (unlikely(!ptw->out_rw)) { + int flags; + void *discard; + + env->tlb_fi = fi; + flags = probe_access_flags(env, ptw->out_virt, MMU_DATA_STORE, + arm_to_core_mmu_idx(ptw->in_ptw_idx), + true, &discard, 0); + env->tlb_fi = NULL; + + if (unlikely(flags & TLB_INVALID_MASK)) { + assert(fi->type != ARMFault_None); + fi->s2addr = ptw->out_virt; + fi->stage2 = true; + fi->s1ptw = true; + fi->s1ns = !ptw->in_secure; + return 0; + } + + /* In case CAS mismatches and we loop, remember writability. */ + ptw->out_rw = true; + } + +#ifdef CONFIG_ATOMIC64 + if (ptw->out_be) { + old_val = cpu_to_be64(old_val); + new_val = cpu_to_be64(new_val); + cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val); + cur_val = be64_to_cpu(cur_val); + } else { + old_val = cpu_to_le64(old_val); + new_val = cpu_to_le64(new_val); + cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val); + cur_val = le64_to_cpu(cur_val); + } +#else + /* + * We can't support the full 64-bit atomic cmpxchg on the host. + * Because this is only used for FEAT_HAFDBS, which is only for AA64, + * we know that TCG_OVERSIZED_GUEST is set, which means that we are + * running in round-robin mode and could only race with dma i/o. + */ +#ifndef TCG_OVERSIZED_GUEST +# error "Unexpected configuration" +#endif + bool locked = qemu_mutex_iothread_locked(); + if (!locked) { + qemu_mutex_lock_iothread(); + } + if (ptw->out_be) { + cur_val = ldq_be_p(host); + if (cur_val == old_val) { + stq_be_p(host, new_val); + } + } else { + cur_val = ldq_le_p(host); + if (cur_val == old_val) { + stq_le_p(host, new_val); + } + } + if (!locked) { + qemu_mutex_unlock_iothread(); + } +#endif + + return cur_val; +} + static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, uint32_t *table, uint32_t address) { @@ -529,7 +599,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, fi->type = ARMFault_Translation; goto do_fault; } - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -567,7 +640,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, /* Fine pagetable. */ table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); } - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -652,7 +728,10 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, fi->type = ARMFault_Translation; goto do_fault; } - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -705,7 +784,10 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, ns = extract32(desc, 3, 1); /* Lookup l2 entry. */ table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -842,8 +924,7 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, bool have_wxn; int wxn = 0; - assert(mmu_idx != ARMMMUIdx_Stage2); - assert(mmu_idx != ARMMMUIdx_Stage2_S); + assert(!regime_is_stage2(mmu_idx)); user_rw = simple_ap_to_rw_prot_is_user(ap, true); if (is_user) { @@ -1067,15 +1148,13 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, ARMCPU *cpu = env_archcpu(env); ARMMMUIdx mmu_idx = ptw->in_mmu_idx; bool is_secure = ptw->in_secure; - /* Read an LPAE long-descriptor translation table. */ - ARMFaultType fault_type = ARMFault_Translation; uint32_t level; ARMVAParameters param; uint64_t ttbr; hwaddr descaddr, indexmask, indexmask_grainsize; uint32_t tableattrs; target_ulong page_size; - uint32_t attrs; + uint64_t attrs; int32_t stride; int addrsize, inputsize, outputsize; uint64_t tcr = regime_tcr(env, mmu_idx); @@ -1083,7 +1162,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint32_t el = regime_el(env, mmu_idx); uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); - bool guarded = false; + uint64_t descriptor, new_descriptor; + bool nstable; /* TODO: This code does not support shareability levels. */ if (aarch64) { @@ -1103,8 +1183,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * so our choice is to always raise the fault. */ if (param.tsz_oob) { - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } addrsize = 64 - 8 * param.tbi; @@ -1141,8 +1220,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, addrsize - inputsize); if (-top_bits != param.select) { /* The gap between the two regions is a Translation fault */ - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } } @@ -1168,10 +1246,10 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * Translation table walk disabled => Translation fault on TLB miss * Note: This is always 0 on 64-bit EL2 and EL3. */ - goto do_fault; + goto do_translation_fault; } - if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { + if (!regime_is_stage2(mmu_idx)) { /* * The starting level depends on the virtual address size (which can * be up to 48 bits) and the translation granule size. It indicates @@ -1199,8 +1277,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, if (param.ds && stride == 9 && sl2) { if (sl0 != 0) { level = 0; - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } startlevel = -1; } else if (!aarch64 || stride == 9) { @@ -1219,8 +1296,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, ok = check_s2_mmu_setup(cpu, aarch64, startlevel, inputsize, stride, outputsize); if (!ok) { - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } level = startlevel; } @@ -1242,7 +1318,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr |= extract64(ttbr, 2, 4) << 48; } else if (descaddr >> outputsize) { level = 0; - fault_type = ARMFault_AddressSize; + fi->type = ARMFault_AddressSize; goto do_fault; } @@ -1276,120 +1352,173 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * bits at each step. */ tableattrs = is_secure ? 0 : (1 << 4); - for (;;) { - uint64_t descriptor; - bool nstable; - - descaddr |= (address >> (stride * (4 - level))) & indexmask; - descaddr &= ~7ULL; - nstable = extract32(tableattrs, 4, 1); - ptw->in_secure = !nstable; - descriptor = arm_ldq_ptw(env, ptw, descaddr, fi); - if (fi->type != ARMFault_None) { - goto do_fault; - } - if (!(descriptor & 1) || - (!(descriptor & 2) && (level == 3))) { - /* Invalid, or the Reserved level 3 encoding */ - goto do_fault; + next_level: + descaddr |= (address >> (stride * (4 - level))) & indexmask; + descaddr &= ~7ULL; + nstable = extract32(tableattrs, 4, 1); + if (!nstable) { + /* + * Stage2_S -> Stage2 or Phys_S -> Phys_NS + * Assert that the non-secure idx are even, and relative order. + */ + QEMU_BUILD_BUG_ON((ARMMMUIdx_Phys_NS & 1) != 0); + QEMU_BUILD_BUG_ON((ARMMMUIdx_Stage2 & 1) != 0); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS + 1 != ARMMMUIdx_Phys_S); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2 + 1 != ARMMMUIdx_Stage2_S); + ptw->in_ptw_idx &= ~1; + ptw->in_secure = false; + } + if (!S1_ptw_translate(env, ptw, descaddr, fi)) { + goto do_fault; + } + descriptor = arm_ldq_ptw(env, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + new_descriptor = descriptor; + + restart_atomic_update: + if (!(descriptor & 1) || (!(descriptor & 2) && (level == 3))) { + /* Invalid, or the Reserved level 3 encoding */ + goto do_translation_fault; + } + + descaddr = descriptor & descaddrmask; + + /* + * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12] + * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of + * descaddr are in [9:8]. Otherwise, if descaddr is out of range, + * raise AddressSizeFault. + */ + if (outputsize > 48) { + if (param.ds) { + descaddr |= extract64(descriptor, 8, 2) << 50; + } else { + descaddr |= extract64(descriptor, 12, 4) << 48; } + } else if (descaddr >> outputsize) { + fi->type = ARMFault_AddressSize; + goto do_fault; + } + + if ((descriptor & 2) && (level < 3)) { + /* + * Table entry. The top five bits are attributes which may + * propagate down through lower levels of the table (and + * which are all arranged so that 0 means "no effect", so + * we can gather them up by ORing in the bits at each level). + */ + tableattrs |= extract64(descriptor, 59, 5); + level++; + indexmask = indexmask_grainsize; + goto next_level; + } - descaddr = descriptor & descaddrmask; + /* + * Block entry at level 1 or 2, or page entry at level 3. + * These are basically the same thing, although the number + * of bits we pull in from the vaddr varies. Note that although + * descaddrmask masks enough of the low bits of the descriptor + * to give a correct page or table address, the address field + * in a block descriptor is smaller; so we need to explicitly + * clear the lower bits here before ORing in the low vaddr bits. + * + * Afterward, descaddr is the final physical address. + */ + page_size = (1ULL << ((stride * (4 - level)) + 3)); + descaddr &= ~(hwaddr)(page_size - 1); + descaddr |= (address & (page_size - 1)); + if (likely(!ptw->in_debug)) { /* - * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12] - * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of - * descaddr are in [9:8]. Otherwise, if descaddr is out of range, - * raise AddressSizeFault. + * Access flag. + * If HA is enabled, prepare to update the descriptor below. + * Otherwise, pass the access fault on to software. */ - if (outputsize > 48) { - if (param.ds) { - descaddr |= extract64(descriptor, 8, 2) << 50; + if (!(descriptor & (1 << 10))) { + if (param.ha) { + new_descriptor |= 1 << 10; /* AF */ } else { - descaddr |= extract64(descriptor, 12, 4) << 48; + fi->type = ARMFault_AccessFlag; + goto do_fault; } - } else if (descaddr >> outputsize) { - fault_type = ARMFault_AddressSize; - goto do_fault; } - if ((descriptor & 2) && (level < 3)) { - /* - * Table entry. The top five bits are attributes which may - * propagate down through lower levels of the table (and - * which are all arranged so that 0 means "no effect", so - * we can gather them up by ORing in the bits at each level). - */ - tableattrs |= extract64(descriptor, 59, 5); - level++; - indexmask = indexmask_grainsize; - continue; - } /* - * Block entry at level 1 or 2, or page entry at level 3. - * These are basically the same thing, although the number - * of bits we pull in from the vaddr varies. Note that although - * descaddrmask masks enough of the low bits of the descriptor - * to give a correct page or table address, the address field - * in a block descriptor is smaller; so we need to explicitly - * clear the lower bits here before ORing in the low vaddr bits. + * Dirty Bit. + * If HD is enabled, pre-emptively set/clear the appropriate AP/S2AP + * bit for writeback. The actual write protection test may still be + * overridden by tableattrs, to be merged below. */ - page_size = (1ULL << ((stride * (4 - level)) + 3)); - descaddr &= ~(hwaddr)(page_size - 1); - descaddr |= (address & (page_size - 1)); - /* Extract attributes from the descriptor */ - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 52, 12) << 10); - - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { - /* Stage 2 table descriptors do not include any attribute fields */ - break; - } - /* Merge in attributes from table descriptors */ - attrs |= nstable << 3; /* NS */ - guarded = extract64(descriptor, 50, 1); /* GP */ - if (param.hpd) { - /* HPD disables all the table attributes except NSTable. */ - break; + if (param.hd + && extract64(descriptor, 51, 1) /* DBM */ + && access_type == MMU_DATA_STORE) { + if (regime_is_stage2(mmu_idx)) { + new_descriptor |= 1ull << 7; /* set S2AP[1] */ + } else { + new_descriptor &= ~(1ull << 7); /* clear AP[2] */ + } } - attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ - /* - * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 - * means "force PL1 access only", which means forcing AP[1] to 0. - */ - attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */ - attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */ - break; } + /* - * Here descaddr is the final physical address, and attributes - * are all in attrs. + * Extract attributes from the (modified) descriptor, and apply + * table descriptors. Stage 2 table descriptors do not include + * any attribute fields. HPD disables all the table attributes + * except NSTable. */ - fault_type = ARMFault_AccessFlag; - if ((attrs & (1 << 8)) == 0) { - /* Access flag */ - goto do_fault; + attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); + if (!regime_is_stage2(mmu_idx)) { + attrs |= nstable << 5; /* NS */ + if (!param.hpd) { + attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ + /* + * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 + * means "force PL1 access only", which means forcing AP[1] to 0. + */ + attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ + attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ + } } - ap = extract32(attrs, 4, 2); - - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + ap = extract32(attrs, 6, 2); + if (regime_is_stage2(mmu_idx)) { ns = mmu_idx == ARMMMUIdx_Stage2; - xn = extract32(attrs, 11, 2); + xn = extract64(attrs, 53, 2); result->f.prot = get_S2prot(env, ap, xn, s1_is_el0); } else { - ns = extract32(attrs, 3, 1); - xn = extract32(attrs, 12, 1); - pxn = extract32(attrs, 11, 1); + ns = extract32(attrs, 5, 1); + xn = extract64(attrs, 54, 1); + pxn = extract64(attrs, 53, 1); result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); } - fault_type = ARMFault_Permission; if (!(result->f.prot & (1 << access_type))) { + fi->type = ARMFault_Permission; goto do_fault; } + /* If FEAT_HAFDBS has made changes, update the PTE. */ + if (new_descriptor != descriptor) { + new_descriptor = arm_casq_ptw(env, descriptor, new_descriptor, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + /* + * I_YZSVV says that if the in-memory descriptor has changed, + * then we must use the information in that new value + * (which might include a different output address, different + * attributes, or generate a fault). + * Restart the handling of the descriptor value from scratch. + */ + if (new_descriptor != descriptor) { + descriptor = new_descriptor; + goto restart_atomic_update; + } + } + if (ns) { /* * The NS bit will (as required by the architecture) have no effect if @@ -1401,15 +1530,15 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */ if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) { - result->f.guarded = guarded; + result->f.guarded = extract64(attrs, 50, 1); /* GP */ } - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + if (regime_is_stage2(mmu_idx)) { result->cacheattrs.is_s2_format = true; - result->cacheattrs.attrs = extract32(attrs, 0, 4); + result->cacheattrs.attrs = extract32(attrs, 2, 4); } else { /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 0, 3); + uint8_t attrindx = extract32(attrs, 2, 3); uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; assert(attrindx <= 7); result->cacheattrs.is_s2_format = false; @@ -1424,19 +1553,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, if (param.ds) { result->cacheattrs.shareability = param.sh; } else { - result->cacheattrs.shareability = extract32(attrs, 6, 2); + result->cacheattrs.shareability = extract32(attrs, 8, 2); } result->f.phys_addr = descaddr; result->f.lg_page_size = ctz64(page_size); return false; -do_fault: - fi->type = fault_type; + do_translation_fault: + fi->type = ARMFault_Translation; + do_fault: fi->level = level; /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ - fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 || - mmu_idx == ARMMMUIdx_Stage2_S); + fi->stage2 = fi->s1ptw || regime_is_stage2(mmu_idx); fi->s1ns = mmu_idx == ARMMMUIdx_Stage2; return true; } @@ -2442,7 +2571,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { hwaddr ipa; - int s1_prot; + int s1_prot, s1_lgpgsz; bool is_secure = ptw->in_secure; bool ret, ipa_secure, s2walk_secure; ARMCacheAttrs cacheattrs1; @@ -2470,6 +2599,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0; ptw->in_mmu_idx = s2walk_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; + ptw->in_ptw_idx = s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; ptw->in_secure = s2walk_secure; /* @@ -2477,6 +2607,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, * Save the stage1 results so that we may merge prot and cacheattrs later. */ s1_prot = result->f.prot; + s1_lgpgsz = result->f.lg_page_size; cacheattrs1 = result->cacheattrs; memset(result, 0, sizeof(*result)); @@ -2491,6 +2622,14 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, return ret; } + /* + * Use the maximum of the S1 & S2 page size, so that invalidation + * of pages > TARGET_PAGE_SIZE works correctly. + */ + if (result->f.lg_page_size < s1_lgpgsz) { + result->f.lg_page_size = s1_lgpgsz; + } + /* Combine the S1 and S2 cache attributes. */ hcr = arm_hcr_el2_eff_secstate(env, is_secure); if (hcr & HCR_DC) { @@ -2529,10 +2668,32 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx); bool is_secure = ptw->in_secure; + ARMMMUIdx s1_mmu_idx; + + switch (mmu_idx) { + case ARMMMUIdx_Phys_S: + case ARMMMUIdx_Phys_NS: + /* Checking Phys early avoids special casing later vs regime_el. */ + return get_phys_addr_disabled(env, address, access_type, mmu_idx, + is_secure, result, fi); + + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + /* First stage lookup uses second stage for ptw. */ + ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; + break; - if (mmu_idx != s1_mmu_idx) { + case ARMMMUIdx_E10_0: + s1_mmu_idx = ARMMMUIdx_Stage1_E0; + goto do_twostage; + case ARMMMUIdx_E10_1: + s1_mmu_idx = ARMMMUIdx_Stage1_E1; + goto do_twostage; + case ARMMMUIdx_E10_1_PAN: + s1_mmu_idx = ARMMMUIdx_Stage1_E1_PAN; + do_twostage: /* * Call ourselves recursively to do the stage 1 and then stage 2 * translations if mmu_idx is a two-stage regime, and EL2 present. @@ -2543,6 +2704,12 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, return get_phys_addr_twostage(env, ptw, address, access_type, result, fi); } + /* fall through */ + + default: + /* Single stage and second stage uses physical for ptw. */ + ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; + break; } /* diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c index 0ebac04bc4..1a97b41c6b 100644 --- a/target/ppc/cpu.c +++ b/target/ppc/cpu.c @@ -73,6 +73,7 @@ void ppc_store_msr(CPUPPCState *env, target_ulong value) hreg_store_msr(env, value, 0); } +#if !defined(CONFIG_USER_ONLY) void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val) { PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); @@ -81,7 +82,10 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val) env->spr[SPR_LPCR] = val & pcc->lpcr_mask; /* The gtse bit affects hflags */ hreg_compute_hflags(env); + + ppc_maybe_interrupt(env); } +#endif static inline void fpscr_set_rounding_mode(CPUPPCState *env) { diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index cca6c4e51c..81d4263a07 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -696,7 +696,9 @@ enum { HFLAGS_PR = 14, /* MSR_PR */ HFLAGS_PMCC0 = 15, /* MMCR0 PMCC bit 0 */ HFLAGS_PMCC1 = 16, /* MMCR0 PMCC bit 1 */ - HFLAGS_INSN_CNT = 17, /* PMU instruction count enabled */ + HFLAGS_PMCJCE = 17, /* MMCR0 PMCjCE bit */ + HFLAGS_PMC_OTHER = 18, /* PMC other than PMC5-6 is enabled */ + HFLAGS_INSN_CNT = 19, /* PMU instruction count enabled */ HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */ HFLAGS_VR = 25, /* MSR_VR if cpu has VRE */ @@ -1358,6 +1360,7 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, DumpState *s); #ifndef CONFIG_USER_ONLY +void ppc_maybe_interrupt(CPUPPCState *env); void ppc_cpu_do_interrupt(CPUState *cpu); bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req); void ppc_cpu_do_system_reset(CPUState *cs); @@ -1370,9 +1373,9 @@ void ppc_translate_init(void); #if !defined(CONFIG_USER_ONLY) void ppc_store_sdr1(CPUPPCState *env, target_ulong value); +void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val); #endif /* !defined(CONFIG_USER_ONLY) */ void ppc_store_msr(CPUPPCState *env, target_ulong value); -void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val); void ppc_cpu_list(void); @@ -2416,27 +2419,27 @@ enum { /* Hardware exceptions definitions */ enum { /* External hardware exception sources */ - PPC_INTERRUPT_RESET = 0, /* Reset exception */ - PPC_INTERRUPT_WAKEUP, /* Wakeup exception */ - PPC_INTERRUPT_MCK, /* Machine check exception */ - PPC_INTERRUPT_EXT, /* External interrupt */ - PPC_INTERRUPT_SMI, /* System management interrupt */ - PPC_INTERRUPT_CEXT, /* Critical external interrupt */ - PPC_INTERRUPT_DEBUG, /* External debug exception */ - PPC_INTERRUPT_THERM, /* Thermal exception */ + PPC_INTERRUPT_RESET = 0x00001, /* Reset exception */ + PPC_INTERRUPT_WAKEUP = 0x00002, /* Wakeup exception */ + PPC_INTERRUPT_MCK = 0x00004, /* Machine check exception */ + PPC_INTERRUPT_EXT = 0x00008, /* External interrupt */ + PPC_INTERRUPT_SMI = 0x00010, /* System management interrupt */ + PPC_INTERRUPT_CEXT = 0x00020, /* Critical external interrupt */ + PPC_INTERRUPT_DEBUG = 0x00040, /* External debug exception */ + PPC_INTERRUPT_THERM = 0x00080, /* Thermal exception */ /* Internal hardware exception sources */ - PPC_INTERRUPT_DECR, /* Decrementer exception */ - PPC_INTERRUPT_HDECR, /* Hypervisor decrementer exception */ - PPC_INTERRUPT_PIT, /* Programmable interval timer interrupt */ - PPC_INTERRUPT_FIT, /* Fixed interval timer interrupt */ - PPC_INTERRUPT_WDT, /* Watchdog timer interrupt */ - PPC_INTERRUPT_CDOORBELL, /* Critical doorbell interrupt */ - PPC_INTERRUPT_DOORBELL, /* Doorbell interrupt */ - PPC_INTERRUPT_PERFM, /* Performance monitor interrupt */ - PPC_INTERRUPT_HMI, /* Hypervisor Maintenance interrupt */ - PPC_INTERRUPT_HDOORBELL, /* Hypervisor Doorbell interrupt */ - PPC_INTERRUPT_HVIRT, /* Hypervisor virtualization interrupt */ - PPC_INTERRUPT_EBB, /* Event-based Branch exception */ + PPC_INTERRUPT_DECR = 0x00100, /* Decrementer exception */ + PPC_INTERRUPT_HDECR = 0x00200, /* Hypervisor decrementer exception */ + PPC_INTERRUPT_PIT = 0x00400, /* Programmable interval timer int. */ + PPC_INTERRUPT_FIT = 0x00800, /* Fixed interval timer interrupt */ + PPC_INTERRUPT_WDT = 0x01000, /* Watchdog timer interrupt */ + PPC_INTERRUPT_CDOORBELL = 0x02000, /* Critical doorbell interrupt */ + PPC_INTERRUPT_DOORBELL = 0x04000, /* Doorbell interrupt */ + PPC_INTERRUPT_PERFM = 0x08000, /* Performance monitor interrupt */ + PPC_INTERRUPT_HMI = 0x10000, /* Hypervisor Maintenance interrupt */ + PPC_INTERRUPT_HDOORBELL = 0x20000, /* Hypervisor Doorbell interrupt */ + PPC_INTERRUPT_HVIRT = 0x40000, /* Hypervisor virtualization interrupt */ + PPC_INTERRUPT_EBB = 0x80000, /* Event-based Branch exception */ }; /* Processor Compatibility mask (PCR) */ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 335351c226..32e94153d1 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5960,46 +5960,10 @@ static bool ppc_pvr_match_power7(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return true; } -static bool cpu_has_work_POWER7(CPUState *cs) -{ - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - - if (cs->halted) { - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) { - return false; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) && - (env->spr[SPR_LPCR] & LPCR_P7_PECE0)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) && - (env->spr[SPR_LPCR] & LPCR_P7_PECE1)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) && - (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) && - (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) { - return true; - } - if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) { - return true; - } - return false; - } else { - return FIELD_EX64(env->msr, MSR, EE) && - (cs->interrupt_request & CPU_INTERRUPT_HARD); - } -} - POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); - CPUClass *cc = CPU_CLASS(oc); dc->fw_name = "PowerPC,POWER7"; dc->desc = "POWER7"; @@ -6008,7 +5972,6 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) pcc->pcr_supported = PCR_COMPAT_2_06 | PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER7; pcc->check_pow = check_pow_nocheck; - cc->has_work = cpu_has_work_POWER7; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6133,54 +6096,10 @@ static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return true; } -static bool cpu_has_work_POWER8(CPUState *cs) -{ - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - - if (cs->halted) { - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) { - return false; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) && - (env->spr[SPR_LPCR] & LPCR_P8_PECE2)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) && - (env->spr[SPR_LPCR] & LPCR_P8_PECE3)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) && - (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) && - (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) && - (env->spr[SPR_LPCR] & LPCR_P8_PECE0)) { - return true; - } - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) && - (env->spr[SPR_LPCR] & LPCR_P8_PECE1)) { - return true; - } - if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) { - return true; - } - return false; - } else { - return FIELD_EX64(env->msr, MSR, EE) && - (cs->interrupt_request & CPU_INTERRUPT_HARD); - } -} - POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); - CPUClass *cc = CPU_CLASS(oc); dc->fw_name = "PowerPC,POWER8"; dc->desc = "POWER8"; @@ -6189,7 +6108,6 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->pcr_supported = PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER8; pcc->check_pow = check_pow_nocheck; - cc->has_work = cpu_has_work_POWER8; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6351,71 +6269,10 @@ static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return false; } -static bool cpu_has_work_POWER9(CPUState *cs) -{ - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - - if (cs->halted) { - uint64_t psscr = env->spr[SPR_PSSCR]; - - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) { - return false; - } - - /* If EC is clear, just return true on any pending interrupt */ - if (!(psscr & PSSCR_EC)) { - return true; - } - /* External Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) && - (env->spr[SPR_LPCR] & LPCR_EEE)) { - bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); - if (!heic || !FIELD_EX64_HV(env->msr) || - FIELD_EX64(env->msr, MSR, PR)) { - return true; - } - } - /* Decrementer Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) && - (env->spr[SPR_LPCR] & LPCR_DEE)) { - return true; - } - /* Machine Check or Hypervisor Maintenance Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK | - 1u << PPC_INTERRUPT_HMI)) && (env->spr[SPR_LPCR] & LPCR_OEE)) { - return true; - } - /* Privileged Doorbell Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) && - (env->spr[SPR_LPCR] & LPCR_PDEE)) { - return true; - } - /* Hypervisor Doorbell Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) && - (env->spr[SPR_LPCR] & LPCR_HDEE)) { - return true; - } - /* Hypervisor virtualization exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HVIRT)) && - (env->spr[SPR_LPCR] & LPCR_HVEE)) { - return true; - } - if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) { - return true; - } - return false; - } else { - return FIELD_EX64(env->msr, MSR, EE) && - (cs->interrupt_request & CPU_INTERRUPT_HARD); - } -} - POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); - CPUClass *cc = CPU_CLASS(oc); dc->fw_name = "PowerPC,POWER9"; dc->desc = "POWER9"; @@ -6425,7 +6282,6 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER9; pcc->check_pow = check_pow_nocheck; - cc->has_work = cpu_has_work_POWER9; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6584,71 +6440,10 @@ static bool ppc_pvr_match_power10(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return false; } -static bool cpu_has_work_POWER10(CPUState *cs) -{ - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - - if (cs->halted) { - uint64_t psscr = env->spr[SPR_PSSCR]; - - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) { - return false; - } - - /* If EC is clear, just return true on any pending interrupt */ - if (!(psscr & PSSCR_EC)) { - return true; - } - /* External Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) && - (env->spr[SPR_LPCR] & LPCR_EEE)) { - bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); - if (!heic || !FIELD_EX64_HV(env->msr) || - FIELD_EX64(env->msr, MSR, PR)) { - return true; - } - } - /* Decrementer Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) && - (env->spr[SPR_LPCR] & LPCR_DEE)) { - return true; - } - /* Machine Check or Hypervisor Maintenance Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK | - 1u << PPC_INTERRUPT_HMI)) && (env->spr[SPR_LPCR] & LPCR_OEE)) { - return true; - } - /* Privileged Doorbell Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) && - (env->spr[SPR_LPCR] & LPCR_PDEE)) { - return true; - } - /* Hypervisor Doorbell Exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) && - (env->spr[SPR_LPCR] & LPCR_HDEE)) { - return true; - } - /* Hypervisor virtualization exception */ - if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HVIRT)) && - (env->spr[SPR_LPCR] & LPCR_HVEE)) { - return true; - } - if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) { - return true; - } - return false; - } else { - return FIELD_EX64(env->msr, MSR, EE) && - (cs->interrupt_request & CPU_INTERRUPT_HARD); - } -} - POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); - CPUClass *cc = CPU_CLASS(oc); dc->fw_name = "PowerPC,POWER10"; dc->desc = "POWER10"; @@ -6659,7 +6454,6 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) PCR_COMPAT_2_06 | PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER10; pcc->check_pow = check_pow_nocheck; - cc->has_work = cpu_has_work_POWER10; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -7232,11 +7026,7 @@ static void ppc_restore_state_to_opc(CPUState *cs, static bool ppc_cpu_has_work(CPUState *cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - - return FIELD_EX64(env->msr, MSR, EE) && - (cs->interrupt_request & CPU_INTERRUPT_HARD); + return cs->interrupt_request & CPU_INTERRUPT_HARD; } static void ppc_cpu_reset(DeviceState *dev) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 43f2480e94..09a81561d4 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "internal.h" #include "helper_regs.h" +#include "hw/ppc/ppc.h" #include "trace.h" @@ -389,6 +390,7 @@ static void powerpc_set_excp_state(PowerPCCPU *cpu, target_ulong vector, env->nip = vector; env->msr = msr; hreg_compute_hflags(env); + ppc_maybe_interrupt(env); powerpc_reset_excp_state(cpu); @@ -1683,29 +1685,355 @@ void ppc_cpu_do_interrupt(CPUState *cs) powerpc_excp(cpu, cs->exception_index); } -static void ppc_hw_interrupt(CPUPPCState *env) +#if defined(TARGET_PPC64) +#define P7_UNUSED_INTERRUPTS \ + (PPC_INTERRUPT_RESET | PPC_INTERRUPT_HVIRT | PPC_INTERRUPT_CEXT | \ + PPC_INTERRUPT_WDT | PPC_INTERRUPT_CDOORBELL | PPC_INTERRUPT_FIT | \ + PPC_INTERRUPT_PIT | PPC_INTERRUPT_DOORBELL | PPC_INTERRUPT_HDOORBELL | \ + PPC_INTERRUPT_THERM | PPC_INTERRUPT_EBB) + +static int p7_interrupt_powersave(CPUPPCState *env) +{ + if ((env->pending_interrupts & PPC_INTERRUPT_EXT) && + (env->spr[SPR_LPCR] & LPCR_P7_PECE0)) { + return PPC_INTERRUPT_EXT; + } + if ((env->pending_interrupts & PPC_INTERRUPT_DECR) && + (env->spr[SPR_LPCR] & LPCR_P7_PECE1)) { + return PPC_INTERRUPT_DECR; + } + if ((env->pending_interrupts & PPC_INTERRUPT_MCK) && + (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) { + return PPC_INTERRUPT_MCK; + } + if ((env->pending_interrupts & PPC_INTERRUPT_HMI) && + (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) { + return PPC_INTERRUPT_HMI; + } + if (env->pending_interrupts & PPC_INTERRUPT_RESET) { + return PPC_INTERRUPT_RESET; + } + return 0; +} + +static int p7_next_unmasked_interrupt(CPUPPCState *env) { PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = CPU(cpu); + /* Ignore MSR[EE] when coming out of some power management states */ + bool msr_ee = FIELD_EX64(env->msr, MSR, EE) || env->resume_as_sreset; + + assert((env->pending_interrupts & P7_UNUSED_INTERRUPTS) == 0); + + if (cs->halted) { + /* LPCR[PECE] controls which interrupts can exit power-saving mode */ + return p7_interrupt_powersave(env); + } + + /* Machine check exception */ + if (env->pending_interrupts & PPC_INTERRUPT_MCK) { + return PPC_INTERRUPT_MCK; + } + + /* Hypervisor decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_HDECR) { + /* LPCR will be clear when not supported so this will work */ + bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE); + if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hdice) { + /* HDEC clears on delivery */ + return PPC_INTERRUPT_HDECR; + } + } + + /* External interrupt can ignore MSR:EE under some circumstances */ + if (env->pending_interrupts & PPC_INTERRUPT_EXT) { + bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); + bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); + /* HEIC blocks delivery to the hypervisor */ + if ((msr_ee && !(heic && FIELD_EX64_HV(env->msr) && + !FIELD_EX64(env->msr, MSR, PR))) || + (env->has_hv_mode && !FIELD_EX64_HV(env->msr) && !lpes0)) { + return PPC_INTERRUPT_EXT; + } + } + if (msr_ee != 0) { + /* Decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_DECR) { + return PPC_INTERRUPT_DECR; + } + if (env->pending_interrupts & PPC_INTERRUPT_PERFM) { + return PPC_INTERRUPT_PERFM; + } + } + + return 0; +} + +#define P8_UNUSED_INTERRUPTS \ + (PPC_INTERRUPT_RESET | PPC_INTERRUPT_DEBUG | PPC_INTERRUPT_HVIRT | \ + PPC_INTERRUPT_CEXT | PPC_INTERRUPT_WDT | PPC_INTERRUPT_CDOORBELL | \ + PPC_INTERRUPT_FIT | PPC_INTERRUPT_PIT | PPC_INTERRUPT_THERM) + +static int p8_interrupt_powersave(CPUPPCState *env) +{ + if ((env->pending_interrupts & PPC_INTERRUPT_EXT) && + (env->spr[SPR_LPCR] & LPCR_P8_PECE2)) { + return PPC_INTERRUPT_EXT; + } + if ((env->pending_interrupts & PPC_INTERRUPT_DECR) && + (env->spr[SPR_LPCR] & LPCR_P8_PECE3)) { + return PPC_INTERRUPT_DECR; + } + if ((env->pending_interrupts & PPC_INTERRUPT_MCK) && + (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) { + return PPC_INTERRUPT_MCK; + } + if ((env->pending_interrupts & PPC_INTERRUPT_HMI) && + (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) { + return PPC_INTERRUPT_HMI; + } + if ((env->pending_interrupts & PPC_INTERRUPT_DOORBELL) && + (env->spr[SPR_LPCR] & LPCR_P8_PECE0)) { + return PPC_INTERRUPT_DOORBELL; + } + if ((env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) && + (env->spr[SPR_LPCR] & LPCR_P8_PECE1)) { + return PPC_INTERRUPT_HDOORBELL; + } + if (env->pending_interrupts & PPC_INTERRUPT_RESET) { + return PPC_INTERRUPT_RESET; + } + return 0; +} + +static int p8_next_unmasked_interrupt(CPUPPCState *env) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = CPU(cpu); + /* Ignore MSR[EE] when coming out of some power management states */ + bool msr_ee = FIELD_EX64(env->msr, MSR, EE) || env->resume_as_sreset; + + assert((env->pending_interrupts & P8_UNUSED_INTERRUPTS) == 0); + + if (cs->halted) { + /* LPCR[PECE] controls which interrupts can exit power-saving mode */ + return p8_interrupt_powersave(env); + } + + /* Machine check exception */ + if (env->pending_interrupts & PPC_INTERRUPT_MCK) { + return PPC_INTERRUPT_MCK; + } + + /* Hypervisor decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_HDECR) { + /* LPCR will be clear when not supported so this will work */ + bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE); + if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hdice) { + /* HDEC clears on delivery */ + return PPC_INTERRUPT_HDECR; + } + } + + /* External interrupt can ignore MSR:EE under some circumstances */ + if (env->pending_interrupts & PPC_INTERRUPT_EXT) { + bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); + bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); + /* HEIC blocks delivery to the hypervisor */ + if ((msr_ee && !(heic && FIELD_EX64_HV(env->msr) && + !FIELD_EX64(env->msr, MSR, PR))) || + (env->has_hv_mode && !FIELD_EX64_HV(env->msr) && !lpes0)) { + return PPC_INTERRUPT_EXT; + } + } + if (msr_ee != 0) { + /* Decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_DECR) { + return PPC_INTERRUPT_DECR; + } + if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { + return PPC_INTERRUPT_DOORBELL; + } + if (env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) { + return PPC_INTERRUPT_HDOORBELL; + } + if (env->pending_interrupts & PPC_INTERRUPT_PERFM) { + return PPC_INTERRUPT_PERFM; + } + /* EBB exception */ + if (env->pending_interrupts & PPC_INTERRUPT_EBB) { + /* + * EBB exception must be taken in problem state and + * with BESCR_GE set. + */ + if (FIELD_EX64(env->msr, MSR, PR) && + (env->spr[SPR_BESCR] & BESCR_GE)) { + return PPC_INTERRUPT_EBB; + } + } + } + + return 0; +} + +#define P9_UNUSED_INTERRUPTS \ + (PPC_INTERRUPT_RESET | PPC_INTERRUPT_DEBUG | PPC_INTERRUPT_CEXT | \ + PPC_INTERRUPT_WDT | PPC_INTERRUPT_CDOORBELL | PPC_INTERRUPT_FIT | \ + PPC_INTERRUPT_PIT | PPC_INTERRUPT_THERM) + +static int p9_interrupt_powersave(CPUPPCState *env) +{ + /* External Exception */ + if ((env->pending_interrupts & PPC_INTERRUPT_EXT) && + (env->spr[SPR_LPCR] & LPCR_EEE)) { + bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); + if (!heic || !FIELD_EX64_HV(env->msr) || + FIELD_EX64(env->msr, MSR, PR)) { + return PPC_INTERRUPT_EXT; + } + } + /* Decrementer Exception */ + if ((env->pending_interrupts & PPC_INTERRUPT_DECR) && + (env->spr[SPR_LPCR] & LPCR_DEE)) { + return PPC_INTERRUPT_DECR; + } + /* Machine Check or Hypervisor Maintenance Exception */ + if (env->spr[SPR_LPCR] & LPCR_OEE) { + if (env->pending_interrupts & PPC_INTERRUPT_MCK) { + return PPC_INTERRUPT_MCK; + } + if (env->pending_interrupts & PPC_INTERRUPT_HMI) { + return PPC_INTERRUPT_HMI; + } + } + /* Privileged Doorbell Exception */ + if ((env->pending_interrupts & PPC_INTERRUPT_DOORBELL) && + (env->spr[SPR_LPCR] & LPCR_PDEE)) { + return PPC_INTERRUPT_DOORBELL; + } + /* Hypervisor Doorbell Exception */ + if ((env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) && + (env->spr[SPR_LPCR] & LPCR_HDEE)) { + return PPC_INTERRUPT_HDOORBELL; + } + /* Hypervisor virtualization exception */ + if ((env->pending_interrupts & PPC_INTERRUPT_HVIRT) && + (env->spr[SPR_LPCR] & LPCR_HVEE)) { + return PPC_INTERRUPT_HVIRT; + } + if (env->pending_interrupts & PPC_INTERRUPT_RESET) { + return PPC_INTERRUPT_RESET; + } + return 0; +} + +static int p9_next_unmasked_interrupt(CPUPPCState *env) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = CPU(cpu); + /* Ignore MSR[EE] when coming out of some power management states */ + bool msr_ee = FIELD_EX64(env->msr, MSR, EE) || env->resume_as_sreset; + + assert((env->pending_interrupts & P9_UNUSED_INTERRUPTS) == 0); + + if (cs->halted) { + if (env->spr[SPR_PSSCR] & PSSCR_EC) { + /* + * When PSSCR[EC] is set, LPCR[PECE] controls which interrupts can + * wakeup the processor + */ + return p9_interrupt_powersave(env); + } else { + /* + * When it's clear, any system-caused exception exits power-saving + * mode, even the ones that gate on MSR[EE]. + */ + msr_ee = true; + } + } + + /* Machine check exception */ + if (env->pending_interrupts & PPC_INTERRUPT_MCK) { + return PPC_INTERRUPT_MCK; + } + + /* Hypervisor decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_HDECR) { + /* LPCR will be clear when not supported so this will work */ + bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE); + if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hdice) { + /* HDEC clears on delivery */ + return PPC_INTERRUPT_HDECR; + } + } + + /* Hypervisor virtualization interrupt */ + if (env->pending_interrupts & PPC_INTERRUPT_HVIRT) { + /* LPCR will be clear when not supported so this will work */ + bool hvice = !!(env->spr[SPR_LPCR] & LPCR_HVICE); + if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hvice) { + return PPC_INTERRUPT_HVIRT; + } + } + + /* External interrupt can ignore MSR:EE under some circumstances */ + if (env->pending_interrupts & PPC_INTERRUPT_EXT) { + bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); + bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); + /* HEIC blocks delivery to the hypervisor */ + if ((msr_ee && !(heic && FIELD_EX64_HV(env->msr) && + !FIELD_EX64(env->msr, MSR, PR))) || + (env->has_hv_mode && !FIELD_EX64_HV(env->msr) && !lpes0)) { + return PPC_INTERRUPT_EXT; + } + } + if (msr_ee != 0) { + /* Decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_DECR) { + return PPC_INTERRUPT_DECR; + } + if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { + return PPC_INTERRUPT_DOORBELL; + } + if (env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) { + return PPC_INTERRUPT_HDOORBELL; + } + if (env->pending_interrupts & PPC_INTERRUPT_PERFM) { + return PPC_INTERRUPT_PERFM; + } + /* EBB exception */ + if (env->pending_interrupts & PPC_INTERRUPT_EBB) { + /* + * EBB exception must be taken in problem state and + * with BESCR_GE set. + */ + if (FIELD_EX64(env->msr, MSR, PR) && + (env->spr[SPR_BESCR] & BESCR_GE)) { + return PPC_INTERRUPT_EBB; + } + } + } + + return 0; +} +#endif + +static int ppc_next_unmasked_interrupt_generic(CPUPPCState *env) +{ bool async_deliver; /* External reset */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET); - powerpc_excp(cpu, POWERPC_EXCP_RESET); - return; + if (env->pending_interrupts & PPC_INTERRUPT_RESET) { + return PPC_INTERRUPT_RESET; } /* Machine check exception */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_MCK)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_MCK); - powerpc_excp(cpu, POWERPC_EXCP_MCHECK); - return; + if (env->pending_interrupts & PPC_INTERRUPT_MCK) { + return PPC_INTERRUPT_MCK; } #if 0 /* TODO */ /* External debug exception */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_DEBUG)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DEBUG); - powerpc_excp(cpu, POWERPC_EXCP_DEBUG); - return; + if (env->pending_interrupts & PPC_INTERRUPT_DEBUG) { + return PPC_INTERRUPT_DEBUG; } #endif @@ -1718,129 +2046,246 @@ static void ppc_hw_interrupt(CPUPPCState *env) async_deliver = FIELD_EX64(env->msr, MSR, EE) || env->resume_as_sreset; /* Hypervisor decrementer exception */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDECR)) { + if (env->pending_interrupts & PPC_INTERRUPT_HDECR) { /* LPCR will be clear when not supported so this will work */ bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE); if ((async_deliver || !FIELD_EX64_HV(env->msr)) && hdice) { /* HDEC clears on delivery */ - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR); - powerpc_excp(cpu, POWERPC_EXCP_HDECR); - return; + return PPC_INTERRUPT_HDECR; } } /* Hypervisor virtualization interrupt */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_HVIRT)) { + if (env->pending_interrupts & PPC_INTERRUPT_HVIRT) { /* LPCR will be clear when not supported so this will work */ bool hvice = !!(env->spr[SPR_LPCR] & LPCR_HVICE); if ((async_deliver || !FIELD_EX64_HV(env->msr)) && hvice) { - powerpc_excp(cpu, POWERPC_EXCP_HVIRT); - return; + return PPC_INTERRUPT_HVIRT; } } /* External interrupt can ignore MSR:EE under some circumstances */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_EXT)) { + if (env->pending_interrupts & PPC_INTERRUPT_EXT) { bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); /* HEIC blocks delivery to the hypervisor */ if ((async_deliver && !(heic && FIELD_EX64_HV(env->msr) && !FIELD_EX64(env->msr, MSR, PR))) || (env->has_hv_mode && !FIELD_EX64_HV(env->msr) && !lpes0)) { - if (books_vhyp_promotes_external_to_hvirt(cpu)) { - powerpc_excp(cpu, POWERPC_EXCP_HVIRT); - } else { - powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); - } - return; + return PPC_INTERRUPT_EXT; } } if (FIELD_EX64(env->msr, MSR, CE)) { /* External critical interrupt */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_CEXT)) { - powerpc_excp(cpu, POWERPC_EXCP_CRITICAL); - return; + if (env->pending_interrupts & PPC_INTERRUPT_CEXT) { + return PPC_INTERRUPT_CEXT; } } if (async_deliver != 0) { /* Watchdog timer on embedded PowerPC */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_WDT)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_WDT); - powerpc_excp(cpu, POWERPC_EXCP_WDT); - return; + if (env->pending_interrupts & PPC_INTERRUPT_WDT) { + return PPC_INTERRUPT_WDT; } - if (env->pending_interrupts & (1 << PPC_INTERRUPT_CDOORBELL)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_CDOORBELL); - powerpc_excp(cpu, POWERPC_EXCP_DOORCI); - return; + if (env->pending_interrupts & PPC_INTERRUPT_CDOORBELL) { + return PPC_INTERRUPT_CDOORBELL; } /* Fixed interval timer on embedded PowerPC */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_FIT)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_FIT); - powerpc_excp(cpu, POWERPC_EXCP_FIT); - return; + if (env->pending_interrupts & PPC_INTERRUPT_FIT) { + return PPC_INTERRUPT_FIT; } /* Programmable interval timer on embedded PowerPC */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_PIT)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PIT); - powerpc_excp(cpu, POWERPC_EXCP_PIT); - return; + if (env->pending_interrupts & PPC_INTERRUPT_PIT) { + return PPC_INTERRUPT_PIT; } /* Decrementer exception */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_DECR)) { - if (ppc_decr_clear_on_delivery(env)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DECR); - } - powerpc_excp(cpu, POWERPC_EXCP_DECR); - return; + if (env->pending_interrupts & PPC_INTERRUPT_DECR) { + return PPC_INTERRUPT_DECR; } - if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL); - if (is_book3s_arch2x(env)) { - powerpc_excp(cpu, POWERPC_EXCP_SDOOR); - } else { - powerpc_excp(cpu, POWERPC_EXCP_DOORI); - } - return; + if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { + return PPC_INTERRUPT_DOORBELL; } - if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDOORBELL)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDOORBELL); - powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV); - return; + if (env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) { + return PPC_INTERRUPT_HDOORBELL; } - if (env->pending_interrupts & (1 << PPC_INTERRUPT_PERFM)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PERFM); - powerpc_excp(cpu, POWERPC_EXCP_PERFM); - return; + if (env->pending_interrupts & PPC_INTERRUPT_PERFM) { + return PPC_INTERRUPT_PERFM; } /* Thermal interrupt */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_THERM)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_THERM); - powerpc_excp(cpu, POWERPC_EXCP_THERM); - return; + if (env->pending_interrupts & PPC_INTERRUPT_THERM) { + return PPC_INTERRUPT_THERM; } /* EBB exception */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_EBB)) { + if (env->pending_interrupts & PPC_INTERRUPT_EBB) { /* * EBB exception must be taken in problem state and * with BESCR_GE set. */ if (FIELD_EX64(env->msr, MSR, PR) && (env->spr[SPR_BESCR] & BESCR_GE)) { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_EBB); + return PPC_INTERRUPT_EBB; + } + } + } - if (env->spr[SPR_BESCR] & BESCR_PMEO) { - powerpc_excp(cpu, POWERPC_EXCP_PERFM_EBB); - } else if (env->spr[SPR_BESCR] & BESCR_EEO) { - powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL_EBB); - } + return 0; +} - return; - } +static int ppc_next_unmasked_interrupt(CPUPPCState *env) +{ + switch (env->excp_model) { +#if defined(TARGET_PPC64) + case POWERPC_EXCP_POWER7: + return p7_next_unmasked_interrupt(env); + case POWERPC_EXCP_POWER8: + return p8_next_unmasked_interrupt(env); + case POWERPC_EXCP_POWER9: + case POWERPC_EXCP_POWER10: + return p9_next_unmasked_interrupt(env); +#endif + default: + return ppc_next_unmasked_interrupt_generic(env); + } +} + +/* + * Sets CPU_INTERRUPT_HARD if there is at least one unmasked interrupt to be + * delivered and clears CPU_INTERRUPT_HARD otherwise. + * + * This method is called by ppc_set_interrupt when an interrupt is raised or + * lowered, and should also be called whenever an interrupt masking condition + * is changed, e.g.: + * - When relevant bits of MSR are altered, like EE, HV, PR, etc.; + * - When relevant bits of LPCR are altered, like PECE, HDICE, HVICE, etc.; + * - When PSSCR[EC] or env->resume_as_sreset are changed; + * - When cs->halted is changed and the CPU has a different interrupt masking + * logic in power-saving mode (e.g., POWER7/8/9/10); + */ +void ppc_maybe_interrupt(CPUPPCState *env) +{ + CPUState *cs = env_cpu(env); + bool locked = false; + + if (!qemu_mutex_iothread_locked()) { + locked = true; + qemu_mutex_lock_iothread(); + } + + if (ppc_next_unmasked_interrupt(env)) { + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); + } + + if (locked) { + qemu_mutex_unlock_iothread(); + } +} + +#if defined(TARGET_PPC64) +static void p7_deliver_interrupt(CPUPPCState *env, int interrupt) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + + switch (interrupt) { + case PPC_INTERRUPT_MCK: /* Machine check exception */ + env->pending_interrupts &= ~PPC_INTERRUPT_MCK; + powerpc_excp(cpu, POWERPC_EXCP_MCHECK); + break; + + case PPC_INTERRUPT_HDECR: /* Hypervisor decrementer exception */ + /* HDEC clears on delivery */ + env->pending_interrupts &= ~PPC_INTERRUPT_HDECR; + powerpc_excp(cpu, POWERPC_EXCP_HDECR); + break; + + case PPC_INTERRUPT_EXT: + if (books_vhyp_promotes_external_to_hvirt(cpu)) { + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); + } else { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); } + break; + + case PPC_INTERRUPT_DECR: /* Decrementer exception */ + powerpc_excp(cpu, POWERPC_EXCP_DECR); + break; + case PPC_INTERRUPT_PERFM: + env->pending_interrupts &= ~PPC_INTERRUPT_PERFM; + powerpc_excp(cpu, POWERPC_EXCP_PERFM); + break; + case 0: + /* + * This is a bug ! It means that has_work took us out of halt without + * anything to deliver while in a PM state that requires getting + * out via a 0x100 + * + * This means we will incorrectly execute past the power management + * instruction instead of triggering a reset. + * + * It generally means a discrepancy between the wakeup conditions in the + * processor has_work implementation and the logic in this function. + */ + assert(!env->resume_as_sreset); + break; + default: + cpu_abort(cs, "Invalid PowerPC interrupt %d. Aborting\n", interrupt); } +} - if (env->resume_as_sreset) { +static void p8_deliver_interrupt(CPUPPCState *env, int interrupt) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + + switch (interrupt) { + case PPC_INTERRUPT_MCK: /* Machine check exception */ + env->pending_interrupts &= ~PPC_INTERRUPT_MCK; + powerpc_excp(cpu, POWERPC_EXCP_MCHECK); + break; + + case PPC_INTERRUPT_HDECR: /* Hypervisor decrementer exception */ + /* HDEC clears on delivery */ + env->pending_interrupts &= ~PPC_INTERRUPT_HDECR; + powerpc_excp(cpu, POWERPC_EXCP_HDECR); + break; + + case PPC_INTERRUPT_EXT: + if (books_vhyp_promotes_external_to_hvirt(cpu)) { + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); + } else { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); + } + break; + + case PPC_INTERRUPT_DECR: /* Decrementer exception */ + powerpc_excp(cpu, POWERPC_EXCP_DECR); + break; + case PPC_INTERRUPT_DOORBELL: + env->pending_interrupts &= ~PPC_INTERRUPT_DOORBELL; + if (is_book3s_arch2x(env)) { + powerpc_excp(cpu, POWERPC_EXCP_SDOOR); + } else { + powerpc_excp(cpu, POWERPC_EXCP_DOORI); + } + break; + case PPC_INTERRUPT_HDOORBELL: + env->pending_interrupts &= ~PPC_INTERRUPT_HDOORBELL; + powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV); + break; + case PPC_INTERRUPT_PERFM: + env->pending_interrupts &= ~PPC_INTERRUPT_PERFM; + powerpc_excp(cpu, POWERPC_EXCP_PERFM); + break; + case PPC_INTERRUPT_EBB: /* EBB exception */ + env->pending_interrupts &= ~PPC_INTERRUPT_EBB; + if (env->spr[SPR_BESCR] & BESCR_PMEO) { + powerpc_excp(cpu, POWERPC_EXCP_PERFM_EBB); + } else if (env->spr[SPR_BESCR] & BESCR_EEO) { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL_EBB); + } + break; + case 0: /* * This is a bug ! It means that has_work took us out of halt without * anything to deliver while in a PM state that requires getting @@ -1852,8 +2297,214 @@ static void ppc_hw_interrupt(CPUPPCState *env) * It generally means a discrepancy between the wakeup conditions in the * processor has_work implementation and the logic in this function. */ - cpu_abort(env_cpu(env), - "Wakeup from PM state but interrupt Undelivered"); + assert(!env->resume_as_sreset); + break; + default: + cpu_abort(cs, "Invalid PowerPC interrupt %d. Aborting\n", interrupt); + } +} + +static void p9_deliver_interrupt(CPUPPCState *env, int interrupt) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + + if (cs->halted && !(env->spr[SPR_PSSCR] & PSSCR_EC) && + !FIELD_EX64(env->msr, MSR, EE)) { + /* + * A pending interrupt took us out of power-saving, but MSR[EE] says + * that we should return to NIP+4 instead of delivering it. + */ + return; + } + + switch (interrupt) { + case PPC_INTERRUPT_MCK: /* Machine check exception */ + env->pending_interrupts &= ~PPC_INTERRUPT_MCK; + powerpc_excp(cpu, POWERPC_EXCP_MCHECK); + break; + + case PPC_INTERRUPT_HDECR: /* Hypervisor decrementer exception */ + /* HDEC clears on delivery */ + env->pending_interrupts &= ~PPC_INTERRUPT_HDECR; + powerpc_excp(cpu, POWERPC_EXCP_HDECR); + break; + case PPC_INTERRUPT_HVIRT: /* Hypervisor virtualization interrupt */ + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); + break; + + case PPC_INTERRUPT_EXT: + if (books_vhyp_promotes_external_to_hvirt(cpu)) { + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); + } else { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); + } + break; + + case PPC_INTERRUPT_DECR: /* Decrementer exception */ + powerpc_excp(cpu, POWERPC_EXCP_DECR); + break; + case PPC_INTERRUPT_DOORBELL: + env->pending_interrupts &= ~PPC_INTERRUPT_DOORBELL; + powerpc_excp(cpu, POWERPC_EXCP_SDOOR); + break; + case PPC_INTERRUPT_HDOORBELL: + env->pending_interrupts &= ~PPC_INTERRUPT_HDOORBELL; + powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV); + break; + case PPC_INTERRUPT_PERFM: + env->pending_interrupts &= ~PPC_INTERRUPT_PERFM; + powerpc_excp(cpu, POWERPC_EXCP_PERFM); + break; + case PPC_INTERRUPT_EBB: /* EBB exception */ + env->pending_interrupts &= ~PPC_INTERRUPT_EBB; + if (env->spr[SPR_BESCR] & BESCR_PMEO) { + powerpc_excp(cpu, POWERPC_EXCP_PERFM_EBB); + } else if (env->spr[SPR_BESCR] & BESCR_EEO) { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL_EBB); + } + break; + case 0: + /* + * This is a bug ! It means that has_work took us out of halt without + * anything to deliver while in a PM state that requires getting + * out via a 0x100 + * + * This means we will incorrectly execute past the power management + * instruction instead of triggering a reset. + * + * It generally means a discrepancy between the wakeup conditions in the + * processor has_work implementation and the logic in this function. + */ + assert(!env->resume_as_sreset); + break; + default: + cpu_abort(cs, "Invalid PowerPC interrupt %d. Aborting\n", interrupt); + } +} +#endif + +static void ppc_deliver_interrupt_generic(CPUPPCState *env, int interrupt) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + + switch (interrupt) { + case PPC_INTERRUPT_RESET: /* External reset */ + env->pending_interrupts &= ~PPC_INTERRUPT_RESET; + powerpc_excp(cpu, POWERPC_EXCP_RESET); + break; + case PPC_INTERRUPT_MCK: /* Machine check exception */ + env->pending_interrupts &= ~PPC_INTERRUPT_MCK; + powerpc_excp(cpu, POWERPC_EXCP_MCHECK); + break; + + case PPC_INTERRUPT_HDECR: /* Hypervisor decrementer exception */ + /* HDEC clears on delivery */ + env->pending_interrupts &= ~PPC_INTERRUPT_HDECR; + powerpc_excp(cpu, POWERPC_EXCP_HDECR); + break; + case PPC_INTERRUPT_HVIRT: /* Hypervisor virtualization interrupt */ + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); + break; + + case PPC_INTERRUPT_EXT: + if (books_vhyp_promotes_external_to_hvirt(cpu)) { + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); + } else { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); + } + break; + case PPC_INTERRUPT_CEXT: /* External critical interrupt */ + powerpc_excp(cpu, POWERPC_EXCP_CRITICAL); + break; + + case PPC_INTERRUPT_WDT: /* Watchdog timer on embedded PowerPC */ + env->pending_interrupts &= ~PPC_INTERRUPT_WDT; + powerpc_excp(cpu, POWERPC_EXCP_WDT); + break; + case PPC_INTERRUPT_CDOORBELL: + env->pending_interrupts &= ~PPC_INTERRUPT_CDOORBELL; + powerpc_excp(cpu, POWERPC_EXCP_DOORCI); + break; + case PPC_INTERRUPT_FIT: /* Fixed interval timer on embedded PowerPC */ + env->pending_interrupts &= ~PPC_INTERRUPT_FIT; + powerpc_excp(cpu, POWERPC_EXCP_FIT); + break; + case PPC_INTERRUPT_PIT: /* Programmable interval timer on embedded ppc */ + env->pending_interrupts &= ~PPC_INTERRUPT_PIT; + powerpc_excp(cpu, POWERPC_EXCP_PIT); + break; + case PPC_INTERRUPT_DECR: /* Decrementer exception */ + if (ppc_decr_clear_on_delivery(env)) { + env->pending_interrupts &= ~PPC_INTERRUPT_DECR; + } + powerpc_excp(cpu, POWERPC_EXCP_DECR); + break; + case PPC_INTERRUPT_DOORBELL: + env->pending_interrupts &= ~PPC_INTERRUPT_DOORBELL; + if (is_book3s_arch2x(env)) { + powerpc_excp(cpu, POWERPC_EXCP_SDOOR); + } else { + powerpc_excp(cpu, POWERPC_EXCP_DOORI); + } + break; + case PPC_INTERRUPT_HDOORBELL: + env->pending_interrupts &= ~PPC_INTERRUPT_HDOORBELL; + powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV); + break; + case PPC_INTERRUPT_PERFM: + env->pending_interrupts &= ~PPC_INTERRUPT_PERFM; + powerpc_excp(cpu, POWERPC_EXCP_PERFM); + break; + case PPC_INTERRUPT_THERM: /* Thermal interrupt */ + env->pending_interrupts &= ~PPC_INTERRUPT_THERM; + powerpc_excp(cpu, POWERPC_EXCP_THERM); + break; + case PPC_INTERRUPT_EBB: /* EBB exception */ + env->pending_interrupts &= ~PPC_INTERRUPT_EBB; + if (env->spr[SPR_BESCR] & BESCR_PMEO) { + powerpc_excp(cpu, POWERPC_EXCP_PERFM_EBB); + } else if (env->spr[SPR_BESCR] & BESCR_EEO) { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL_EBB); + } + break; + case 0: + /* + * This is a bug ! It means that has_work took us out of halt without + * anything to deliver while in a PM state that requires getting + * out via a 0x100 + * + * This means we will incorrectly execute past the power management + * instruction instead of triggering a reset. + * + * It generally means a discrepancy between the wakeup conditions in the + * processor has_work implementation and the logic in this function. + */ + assert(!env->resume_as_sreset); + break; + default: + cpu_abort(cs, "Invalid PowerPC interrupt %d. Aborting\n", interrupt); + } +} + +static void ppc_deliver_interrupt(CPUPPCState *env, int interrupt) +{ + switch (env->excp_model) { +#if defined(TARGET_PPC64) + case POWERPC_EXCP_POWER7: + p7_deliver_interrupt(env, interrupt); + break; + case POWERPC_EXCP_POWER8: + p8_deliver_interrupt(env, interrupt); + break; + case POWERPC_EXCP_POWER9: + case POWERPC_EXCP_POWER10: + p9_deliver_interrupt(env, interrupt); + break; +#endif + default: + ppc_deliver_interrupt_generic(env, interrupt); } } @@ -1889,15 +2540,22 @@ bool ppc_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; + int interrupt; - if (interrupt_request & CPU_INTERRUPT_HARD) { - ppc_hw_interrupt(env); - if (env->pending_interrupts == 0) { - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; - } - return true; + if ((interrupt_request & CPU_INTERRUPT_HARD) == 0) { + return false; } - return false; + + interrupt = ppc_next_unmasked_interrupt(env); + if (interrupt == 0) { + return false; + } + + ppc_deliver_interrupt(env, interrupt); + if (env->pending_interrupts == 0) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); + } + return true; } #endif /* !CONFIG_USER_ONLY */ @@ -1958,6 +2616,11 @@ void helper_store_msr(CPUPPCState *env, target_ulong val) } } +void helper_ppc_maybe_interrupt(CPUPPCState *env) +{ + ppc_maybe_interrupt(env); +} + #if defined(TARGET_PPC64) void helper_scv(CPUPPCState *env, uint32_t lev) { @@ -1978,6 +2641,8 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn) /* Condition for waking up at 0x100 */ env->resume_as_sreset = (insn != PPC_PM_STOP) || (env->spr[SPR_PSSCR] & PSSCR_EC); + + ppc_maybe_interrupt(env); } #endif /* defined(TARGET_PPC64) */ @@ -2086,7 +2751,6 @@ void helper_rfebb(CPUPPCState *env, target_ulong s) static void do_ebb(CPUPPCState *env, int ebb_excp) { PowerPCCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); /* * FSCR_EBB and FSCR_IC_EBB are the same bits used with @@ -2104,8 +2768,7 @@ static void do_ebb(CPUPPCState *env, int ebb_excp) if (FIELD_EX64(env->msr, MSR, PR)) { powerpc_excp(cpu, ebb_excp); } else { - env->pending_interrupts |= 1 << PPC_INTERRUPT_EBB; - cpu_interrupt(cs, CPU_INTERRUPT_HARD); + ppc_set_irq(cpu, PPC_INTERRUPT_EBB, 1); } } @@ -2298,7 +2961,7 @@ void helper_msgclr(CPUPPCState *env, target_ulong rb) return; } - env->pending_interrupts &= ~(1 << irq); + ppc_set_irq(env_archcpu(env), irq, 0); } void helper_msgsnd(target_ulong rb) @@ -2317,8 +2980,7 @@ void helper_msgsnd(target_ulong rb) CPUPPCState *cenv = &cpu->env; if ((rb & DBELL_BRDCAST) || (cenv->spr[SPR_BOOKE_PIR] == pir)) { - cenv->pending_interrupts |= 1 << irq; - cpu_interrupt(cs, CPU_INTERRUPT_HARD); + ppc_set_irq(cpu, irq, 1); } } qemu_mutex_unlock_iothread(); @@ -2342,7 +3004,7 @@ void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb) return; } - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDOORBELL); + ppc_set_irq(env_archcpu(env), PPC_INTERRUPT_HDOORBELL, 0); } static void book3s_msgsnd_common(int pir, int irq) @@ -2356,8 +3018,7 @@ static void book3s_msgsnd_common(int pir, int irq) /* TODO: broadcast message to all threads of the same processor */ if (cenv->spr_cb[SPR_PIR].default_value == pir) { - cenv->pending_interrupts |= 1 << irq; - cpu_interrupt(cs, CPU_INTERRUPT_HARD); + ppc_set_irq(cpu, irq, 1); } } qemu_mutex_unlock_iothread(); @@ -2383,7 +3044,7 @@ void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb) return; } - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL); + ppc_set_irq(env_archcpu(env), PPC_INTERRUPT_HDOORBELL, 0); } /* diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index ae25f32d6e..a66e16c212 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -3241,93 +3241,82 @@ void helper_XVXSIGSP(ppc_vsr_t *xt, ppc_vsr_t *xb) *xt = t; } -/* - * VSX_TEST_DC - VSX floating point test data class - * op - instruction mnemonic - * nels - number of elements (1, 2 or 4) - * xbn - VSR register number - * tp - type (float32 or float64) - * fld - vsr_t field (VsrD(*) or VsrW(*)) - * tfld - target vsr_t field (VsrD(*) or VsrW(*)) - * fld_max - target field max - * scrf - set result in CR and FPCC - */ -#define VSX_TEST_DC(op, nels, xbn, tp, fld, tfld, fld_max, scrf) \ -void helper_##op(CPUPPCState *env, uint32_t opcode) \ +#define VSX_TSTDC(tp) \ +static int32_t tp##_tstdc(tp b, uint32_t dcmx) \ { \ - ppc_vsr_t *xt = &env->vsr[xT(opcode)]; \ - ppc_vsr_t *xb = &env->vsr[xbn]; \ - ppc_vsr_t t = { }; \ - uint32_t i, sign, dcmx; \ - uint32_t cc, match = 0; \ - \ - if (!scrf) { \ - dcmx = DCMX_XV(opcode); \ - } else { \ - t = *xt; \ - dcmx = DCMX(opcode); \ - } \ - \ - for (i = 0; i < nels; i++) { \ - sign = tp##_is_neg(xb->fld); \ - if (tp##_is_any_nan(xb->fld)) { \ - match = extract32(dcmx, 6, 1); \ - } else if (tp##_is_infinity(xb->fld)) { \ - match = extract32(dcmx, 4 + !sign, 1); \ - } else if (tp##_is_zero(xb->fld)) { \ - match = extract32(dcmx, 2 + !sign, 1); \ - } else if (tp##_is_zero_or_denormal(xb->fld)) { \ - match = extract32(dcmx, 0 + !sign, 1); \ - } \ - \ - if (scrf) { \ - cc = sign << CRF_LT_BIT | match << CRF_EQ_BIT; \ - env->fpscr &= ~FP_FPCC; \ - env->fpscr |= cc << FPSCR_FPCC; \ - env->crf[BF(opcode)] = cc; \ - } else { \ - t.tfld = match ? fld_max : 0; \ - } \ - match = 0; \ - } \ - if (!scrf) { \ - *xt = t; \ + uint32_t match = 0; \ + uint32_t sign = tp##_is_neg(b); \ + if (tp##_is_any_nan(b)) { \ + match = extract32(dcmx, 6, 1); \ + } else if (tp##_is_infinity(b)) { \ + match = extract32(dcmx, 4 + !sign, 1); \ + } else if (tp##_is_zero(b)) { \ + match = extract32(dcmx, 2 + !sign, 1); \ + } else if (tp##_is_zero_or_denormal(b)) { \ + match = extract32(dcmx, 0 + !sign, 1); \ } \ + return (match != 0); \ } -VSX_TEST_DC(xvtstdcdp, 2, xB(opcode), float64, VsrD(i), VsrD(i), UINT64_MAX, 0) -VSX_TEST_DC(xvtstdcsp, 4, xB(opcode), float32, VsrW(i), VsrW(i), UINT32_MAX, 0) -VSX_TEST_DC(xststdcdp, 1, xB(opcode), float64, VsrD(0), VsrD(0), 0, 1) -VSX_TEST_DC(xststdcqp, 1, (rB(opcode) + 32), float128, f128, VsrD(0), 0, 1) +VSX_TSTDC(float32) +VSX_TSTDC(float64) +VSX_TSTDC(float128) +#undef VSX_TSTDC -void helper_xststdcsp(CPUPPCState *env, uint32_t opcode, ppc_vsr_t *xb) +void helper_XVTSTDCDP(ppc_vsr_t *t, ppc_vsr_t *b, uint64_t dcmx, uint32_t v) { - uint32_t dcmx, sign, exp; - uint32_t cc, match = 0, not_sp = 0; - float64 arg = xb->VsrD(0); - float64 arg_sp; - - dcmx = DCMX(opcode); - exp = (arg >> 52) & 0x7FF; - sign = float64_is_neg(arg); + int i; + for (i = 0; i < 2; i++) { + t->s64[i] = (int64_t)-float64_tstdc(b->f64[i], dcmx); + } +} - if (float64_is_any_nan(arg)) { - match = extract32(dcmx, 6, 1); - } else if (float64_is_infinity(arg)) { - match = extract32(dcmx, 4 + !sign, 1); - } else if (float64_is_zero(arg)) { - match = extract32(dcmx, 2 + !sign, 1); - } else if (float64_is_zero_or_denormal(arg) || (exp > 0 && exp < 0x381)) { - match = extract32(dcmx, 0 + !sign, 1); +void helper_XVTSTDCSP(ppc_vsr_t *t, ppc_vsr_t *b, uint64_t dcmx, uint32_t v) +{ + int i; + for (i = 0; i < 4; i++) { + t->s32[i] = (int32_t)-float32_tstdc(b->f32[i], dcmx); } +} - arg_sp = helper_todouble(helper_tosingle(arg)); - not_sp = arg != arg_sp; +static bool not_SP_value(float64 val) +{ + return val != helper_todouble(helper_tosingle(val)); +} +/* + * VSX_XS_TSTDC - VSX Scalar Test Data Class + * NAME - instruction name + * FLD - vsr_t field (VsrD(0) or f128) + * TP - type (float64 or float128) + */ +#define VSX_XS_TSTDC(NAME, FLD, TP) \ + void helper_##NAME(CPUPPCState *env, uint32_t bf, \ + uint32_t dcmx, ppc_vsr_t *b) \ + { \ + uint32_t cc, match, sign = TP##_is_neg(b->FLD); \ + match = TP##_tstdc(b->FLD, dcmx); \ + cc = sign << CRF_LT_BIT | match << CRF_EQ_BIT; \ + env->fpscr &= ~FP_FPCC; \ + env->fpscr |= cc << FPSCR_FPCC; \ + env->crf[bf] = cc; \ + } + +VSX_XS_TSTDC(XSTSTDCDP, VsrD(0), float64) +VSX_XS_TSTDC(XSTSTDCQP, f128, float128) +#undef VSX_XS_TSTDC + +void helper_XSTSTDCSP(CPUPPCState *env, uint32_t bf, + uint32_t dcmx, ppc_vsr_t *b) +{ + uint32_t cc, match, sign = float64_is_neg(b->VsrD(0)); + uint32_t exp = (b->VsrD(0) >> 52) & 0x7FF; + int not_sp = (int)not_SP_value(b->VsrD(0)); + match = float64_tstdc(b->VsrD(0), dcmx) || (exp > 0 && exp < 0x381); cc = sign << CRF_LT_BIT | match << CRF_EQ_BIT | not_sp << CRF_SO_BIT; env->fpscr &= ~FP_FPCC; env->fpscr |= cc << FPSCR_FPCC; - env->crf[BF(opcode)] = cc; + env->crf[bf] = cc; } void helper_xsrqpi(CPUPPCState *env, uint32_t opcode, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 57eee07256..8dd22a35e4 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -10,6 +10,7 @@ DEF_HELPER_4(HASHSTP, void, env, tl, tl, tl) DEF_HELPER_4(HASHCHKP, void, env, tl, tl, tl) #if !defined(CONFIG_USER_ONLY) DEF_HELPER_2(store_msr, void, env, tl) +DEF_HELPER_1(ppc_maybe_interrupt, void, env) DEF_HELPER_1(rfi, void, env) DEF_HELPER_1(40x_rfci, void, env) DEF_HELPER_1(rfci, void, env) @@ -29,6 +30,7 @@ DEF_HELPER_2(store_mmcr1, void, env, tl) DEF_HELPER_3(store_pmc, void, env, i32, i64) DEF_HELPER_2(read_pmc, tl, env, i32) DEF_HELPER_2(insns_inc, void, env, i32) +DEF_HELPER_1(handle_pmc5_overflow, void, env) #endif DEF_HELPER_1(check_tlb_flush_local, void, env) DEF_HELPER_1(check_tlb_flush_global, void, env) @@ -143,15 +145,15 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) #define dh_ctype_acc ppc_acc_t * #define dh_typecode_acc dh_typecode_ptr -DEF_HELPER_FLAGS_3(vavgub, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vavguh, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vavguw, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vabsdub, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vabsduh, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vabsduw, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vavgsb, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vavgsh, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vavgsw, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_4(VAVGUB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VAVGUH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VAVGUW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VABSDUB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VABSDUH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VABSDUW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VAVGSB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VAVGSH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(VAVGSW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) DEF_HELPER_4(vcmpeqfp, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgefp, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgtfp, void, env, avr, avr, avr) @@ -193,11 +195,7 @@ DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_3(vaddcuw, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_FLAGS_2(vprtybw, TCG_CALL_NO_RWG, void, avr, avr) -DEF_HELPER_FLAGS_2(vprtybd, TCG_CALL_NO_RWG, void, avr, avr) -DEF_HELPER_FLAGS_2(vprtybq, TCG_CALL_NO_RWG, void, avr, avr) -DEF_HELPER_FLAGS_3(vsubcuw, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VPRTYBQ, TCG_CALL_NO_RWG, void, avr, avr, i32) DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) @@ -231,8 +229,6 @@ DEF_HELPER_FLAGS_2(VSTRIBL, TCG_CALL_NO_RWG, i32, avr, avr) DEF_HELPER_FLAGS_2(VSTRIBR, TCG_CALL_NO_RWG, i32, avr, avr) DEF_HELPER_FLAGS_2(VSTRIHL, TCG_CALL_NO_RWG, i32, avr, avr) DEF_HELPER_FLAGS_2(VSTRIHR, TCG_CALL_NO_RWG, i32, avr, avr) -DEF_HELPER_FLAGS_2(vnegw, TCG_CALL_NO_RWG, void, avr, avr) -DEF_HELPER_FLAGS_2(vnegd, TCG_CALL_NO_RWG, void, avr, avr) DEF_HELPER_FLAGS_2(vupkhpx, TCG_CALL_NO_RWG, void, avr, avr) DEF_HELPER_FLAGS_2(vupklpx, TCG_CALL_NO_RWG, void, avr, avr) DEF_HELPER_FLAGS_2(vupkhsb, TCG_CALL_NO_RWG, void, avr, avr) @@ -258,13 +254,13 @@ DEF_HELPER_4(vpkuhum, void, env, avr, avr, avr) DEF_HELPER_4(vpkuwum, void, env, avr, avr, avr) DEF_HELPER_4(vpkudum, void, env, avr, avr, avr) DEF_HELPER_FLAGS_3(vpkpx, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_5(vmhaddshs, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vmhraddshs, void, env, avr, avr, avr, avr) +DEF_HELPER_5(VMHADDSHS, void, env, avr, avr, avr, avr) +DEF_HELPER_5(VMHRADDSHS, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VMSUMUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_5(VMSUMUHS, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VMSUMSHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr) -DEF_HELPER_FLAGS_4(vmladduhm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_5(VMLADDUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env) DEF_HELPER_3(lvebx, void, env, avr, tl) @@ -423,9 +419,9 @@ DEF_HELPER_3(xscvuxdsp, void, env, vsr, vsr) DEF_HELPER_3(xscvsxdsp, void, env, vsr, vsr) DEF_HELPER_4(xscvudqp, void, env, i32, vsr, vsr) DEF_HELPER_3(xscvuxddp, void, env, vsr, vsr) -DEF_HELPER_3(xststdcsp, void, env, i32, vsr) -DEF_HELPER_2(xststdcdp, void, env, i32) -DEF_HELPER_2(xststdcqp, void, env, i32) +DEF_HELPER_4(XSTSTDCSP, void, env, i32, i32, vsr) +DEF_HELPER_4(XSTSTDCDP, void, env, i32, i32, vsr) +DEF_HELPER_4(XSTSTDCQP, void, env, i32, i32, vsr) DEF_HELPER_3(xsrdpi, void, env, vsr, vsr) DEF_HELPER_3(xsrdpic, void, env, vsr, vsr) DEF_HELPER_3(xsrdpim, void, env, vsr, vsr) @@ -523,8 +519,8 @@ DEF_HELPER_3(xvcvsxdsp, void, env, vsr, vsr) DEF_HELPER_3(xvcvuxdsp, void, env, vsr, vsr) DEF_HELPER_3(xvcvsxwsp, void, env, vsr, vsr) DEF_HELPER_3(xvcvuxwsp, void, env, vsr, vsr) -DEF_HELPER_2(xvtstdcsp, void, env, i32) -DEF_HELPER_2(xvtstdcdp, void, env, i32) +DEF_HELPER_FLAGS_4(XVTSTDCSP, TCG_CALL_NO_RWG, void, vsr, vsr, i64, i32) +DEF_HELPER_FLAGS_4(XVTSTDCDP, TCG_CALL_NO_RWG, void, vsr, vsr, i64, i32) DEF_HELPER_3(xvrspi, void, env, vsr, vsr) DEF_HELPER_3(xvrspic, void, env, vsr, vsr) DEF_HELPER_3(xvrspim, void, env, vsr, vsr) diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index 12235ea2e9..c0aee5855b 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -109,6 +109,9 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env) if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) { hflags |= 1 << HFLAGS_PMCC1; } + if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE) { + hflags |= 1 << HFLAGS_PMCJCE; + } #ifndef CONFIG_USER_ONLY if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) { @@ -119,6 +122,9 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env) if (env->pmc_ins_cnt) { hflags |= 1 << HFLAGS_INSN_CNT; } + if (env->pmc_ins_cnt & 0x1e) { + hflags |= 1 << HFLAGS_PMC_OTHER; + } #endif /* @@ -260,6 +266,8 @@ int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv) env->msr = value; hreg_compute_hflags(env); #if !defined(CONFIG_USER_ONLY) + ppc_maybe_interrupt(env); + if (unlikely(FIELD_EX64(env->msr, MSR, POW))) { if (!env->pending_interrupts && (*env->check_pow)(env)) { cs->halted = 1; diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index a5249ee32c..f8f589e9fd 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -199,6 +199,12 @@ @XX2_uim4 ...... ..... . uim:4 ..... ......... .. &XX2_uim xt=%xx_xt xb=%xx_xb +%xx_uim7 6:1 2:1 16:5 +@XX2_uim7 ...... ..... ..... ..... .... . ... . .. &XX2_uim xt=%xx_xt xb=%xx_xb uim=%xx_uim7 + +&XX2_bf_uim bf xb uim +@XX2_bf_uim ...... bf:3 uim:7 ..... ......... . . &XX2_bf_uim + &XX2_bf_xb bf xb @XX2_bf_xb ...... bf:3 .. ..... ..... ......... . . &XX2_bf_xb xb=%xx_xb @@ -519,6 +525,21 @@ VCMPNEZW 000100 ..... ..... ..... . 0110000111 @VC VCMPSQ 000100 ... -- ..... ..... 00101000001 @VX_bf VCMPUQ 000100 ... -- ..... ..... 00100000001 @VX_bf +## Vector Integer Average Instructions + +VAVGSB 000100 ..... ..... ..... 10100000010 @VX +VAVGSH 000100 ..... ..... ..... 10101000010 @VX +VAVGSW 000100 ..... ..... ..... 10110000010 @VX +VAVGUB 000100 ..... ..... ..... 10000000010 @VX +VAVGUH 000100 ..... ..... ..... 10001000010 @VX +VAVGUW 000100 ..... ..... ..... 10010000010 @VX + +## Vector Integer Absolute Difference Instructions + +VABSDUB 000100 ..... ..... ..... 10000000011 @VX +VABSDUH 000100 ..... ..... ..... 10001000011 @VX +VABSDUW 000100 ..... ..... ..... 10010000011 @VX + ## Vector Bit Manipulation Instruction VGNB 000100 ..... -- ... ..... 10011001100 @VX_n @@ -529,6 +550,10 @@ VCTZDM 000100 ..... ..... ..... 11111000100 @VX VPDEPD 000100 ..... ..... ..... 10111001101 @VX VPEXTD 000100 ..... ..... ..... 10110001101 @VX +VPRTYBD 000100 ..... 01001 ..... 11000000010 @VX_tb +VPRTYBQ 000100 ..... 01010 ..... 11000000010 @VX_tb +VPRTYBW 000100 ..... 01000 ..... 11000000010 @VX_tb + ## Vector Permute and Formatting Instruction VEXTDUBVLX 000100 ..... ..... ..... ..... 011000 @VA @@ -608,12 +633,14 @@ VRLQNM 000100 ..... ..... ..... 00101000101 @VX ## Vector Integer Arithmetic Instructions +VADDCUW 000100 ..... ..... ..... 00110000000 @VX VADDCUQ 000100 ..... ..... ..... 00101000000 @VX VADDUQM 000100 ..... ..... ..... 00100000000 @VX VADDEUQM 000100 ..... ..... ..... ..... 111100 @VA VADDECUQ 000100 ..... ..... ..... ..... 111101 @VA +VSUBCUW 000100 ..... ..... ..... 10110000000 @VX VSUBCUQ 000100 ..... ..... ..... 10101000000 @VX VSUBUQM 000100 ..... ..... ..... 10100000000 @VX @@ -627,6 +654,9 @@ VEXTSH2D 000100 ..... 11001 ..... 11000000010 @VX_tb VEXTSW2D 000100 ..... 11010 ..... 11000000010 @VX_tb VEXTSD2Q 000100 ..... 11011 ..... 11000000010 @VX_tb +VNEGD 000100 ..... 00111 ..... 11000000010 @VX_tb +VNEGW 000100 ..... 00110 ..... 11000000010 @VX_tb + ## Vector Mask Manipulation Instructions MTVSRBM 000100 ..... 10000 ..... 11001000010 @VX_tb @@ -693,6 +723,10 @@ VMSUMUHS 000100 ..... ..... ..... ..... 100111 @VA VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA VMSUMUDM 000100 ..... ..... ..... ..... 100011 @VA +VMLADDUHM 000100 ..... ..... ..... ..... 100010 @VA +VMHADDSHS 000100 ..... ..... ..... ..... 100000 @VA +VMHRADDSHS 000100 ..... ..... ..... ..... 100001 @VA + ## Vector String Instructions VSTRIBL 000100 ..... 00000 ..... . 0000001101 @VX_tb_rc @@ -726,6 +760,17 @@ STXVRHX 011111 ..... ..... ..... 0010101101 . @X_TSX STXVRWX 011111 ..... ..... ..... 0011001101 . @X_TSX STXVRDX 011111 ..... ..... ..... 0011101101 . @X_TSX +## VSX Vector Binary Floating-Point Sign Manipulation Instructions + +XVABSDP 111100 ..... 00000 ..... 111011001 .. @XX2 +XVABSSP 111100 ..... 00000 ..... 110011001 .. @XX2 +XVNABSDP 111100 ..... 00000 ..... 111101001 .. @XX2 +XVNABSSP 111100 ..... 00000 ..... 110101001 .. @XX2 +XVNEGDP 111100 ..... 00000 ..... 111111001 .. @XX2 +XVNEGSP 111100 ..... 00000 ..... 110111001 .. @XX2 +XVCPSGNDP 111100 ..... ..... ..... 11110000 ... @XX3 +XVCPSGNSP 111100 ..... ..... ..... 11010000 ... @XX3 + ## VSX Scalar Multiply-Add Instructions XSMADDADP 111100 ..... ..... ..... 00100001 . . . @XX3 @@ -809,6 +854,11 @@ XSCVSPDPN 111100 ..... ----- ..... 101001011 .. @XX2 ## VSX Binary Floating-Point Math Support Instructions XVXSIGSP 111100 ..... 01001 ..... 111011011 .. @XX2 +XVTSTDCDP 111100 ..... ..... ..... 1111 . 101 ... @XX2_uim7 +XVTSTDCSP 111100 ..... ..... ..... 1101 . 101 ... @XX2_uim7 +XSTSTDCSP 111100 ... ....... ..... 100101010 . - @XX2_bf_uim xb=%xx_xb +XSTSTDCDP 111100 ... ....... ..... 101101010 . - @XX2_bf_uim xb=%xx_xb +XSTSTDCQP 111111 ... ....... xb:5 1011000100 - @XX2_bf_uim ## VSX Vector Test Least-Significant Bit by Byte Instruction @@ -908,3 +958,11 @@ SLBSYNC 011111 ----- ----- ----- 0101010010 - TLBIE 011111 ..... - .. . . ..... 0100110010 - @X_tlbie TLBIEL 011111 ..... - .. . . ..... 0100010010 - @X_tlbie + +# Processor Control Instructions + +MSGCLR 011111 ----- ----- ..... 0011101110 - @X_rb +MSGSND 011111 ----- ----- ..... 0011001110 - @X_rb +MSGCLRP 011111 ----- ----- ..... 0010101110 - @X_rb +MSGSNDP 011111 ----- ----- ..... 0010001110 - @X_rb +MSGSYNC 011111 ----- ----- ----- 1101110110 - diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 696096100b..d97a7f1f28 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -492,40 +492,8 @@ static inline void set_vscr_sat(CPUPPCState *env) env->vscr_sat.u32[0] = 1; } -void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { - r->u32[i] = ~a->u32[i] < b->u32[i]; - } -} - -/* vprtybw */ -void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) -{ - int i; - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { - uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); - res ^= res >> 8; - r->u32[i] = res & 1; - } -} - -/* vprtybd */ -void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) -{ - int i; - for (i = 0; i < ARRAY_SIZE(r->u64); i++) { - uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); - res ^= res >> 16; - res ^= res >> 8; - r->u64[i] = res & 1; - } -} - /* vprtybq */ -void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) +void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) { uint64_t res = b->u64[0] ^ b->u64[1]; res ^= res >> 32; @@ -602,29 +570,27 @@ VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) #undef VARITHSAT_SIGNED #undef VARITHSAT_UNSIGNED -#define VAVG_DO(name, element, etype) \ - void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ - r->element[i] = x >> 1; \ - } \ +#define VAVG(name, element, etype) \ + void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ + { \ + int i; \ + \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ + r->element[i] = x >> 1; \ + } \ } -#define VAVG(type, signed_element, signed_type, unsigned_element, \ - unsigned_type) \ - VAVG_DO(avgs##type, signed_element, signed_type) \ - VAVG_DO(avgu##type, unsigned_element, unsigned_type) -VAVG(b, s8, int16_t, u8, uint16_t) -VAVG(h, s16, int32_t, u16, uint32_t) -VAVG(w, s32, int64_t, u32, uint64_t) -#undef VAVG_DO +VAVG(VAVGSB, s8, int16_t) +VAVG(VAVGUB, u8, uint16_t) +VAVG(VAVGSH, s16, int32_t) +VAVG(VAVGUH, u16, uint32_t) +VAVG(VAVGSW, s32, int64_t) +VAVG(VAVGUW, u32, uint64_t) #undef VAVG -#define VABSDU_DO(name, element) \ -void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ +#define VABSDU(name, element) \ +void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ { \ int i; \ \ @@ -640,12 +606,9 @@ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) * element - element type to access from vector */ -#define VABSDU(type, element) \ - VABSDU_DO(absdu##type, element) -VABSDU(b, u8) -VABSDU(h, u16) -VABSDU(w, u32) -#undef VABSDU_DO +VABSDU(VABSDUB, u8) +VABSDU(VABSDUH, u16) +VABSDU(VABSDUW, u32) #undef VABSDU #define VCF(suffix, cvt, element) \ @@ -939,7 +902,7 @@ target_ulong helper_vctzlsbb(ppc_avr_t *r) return count; } -void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, +void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { int sat = 0; @@ -957,7 +920,7 @@ void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } } -void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, +void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { int sat = 0; @@ -974,7 +937,8 @@ void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } } -void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, + uint32_t v) { int i; @@ -1936,18 +1900,6 @@ XXBLEND(W, 32) XXBLEND(D, 64) #undef XXBLEND -#define VNEG(name, element) \ -void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ -{ \ - int i; \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - r->element[i] = -b->element[i]; \ - } \ -} -VNEG(vnegw, s32) -VNEG(vnegd, s64) -#undef VNEG - void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int sh = (b->VsrB(0xf) >> 3) & 0xf; @@ -1961,15 +1913,6 @@ void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) #endif } -void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { - r->u32[i] = a->u32[i] >= b->u32[i]; - } -} - void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int64_t t; diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index b0a5e7ce76..a9bc1522e2 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -25,6 +25,7 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "mmu-book3s-v3.h" +#include "hw/ppc/ppc.h" #include "helper_regs.h" @@ -163,7 +164,7 @@ target_ulong helper_load_dpdes(CPUPPCState *env) helper_hfscr_facility_check(env, HFSCR_MSGP, "load DPDES", HFSCR_IC_MSGP); /* TODO: TCG supports only one thread */ - if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) { + if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { dpdes = 1; } @@ -173,7 +174,6 @@ target_ulong helper_load_dpdes(CPUPPCState *env) void helper_store_dpdes(CPUPPCState *env, target_ulong val) { PowerPCCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); helper_hfscr_facility_check(env, HFSCR_MSGP, "store DPDES", HFSCR_IC_MSGP); @@ -184,12 +184,7 @@ void helper_store_dpdes(CPUPPCState *env, target_ulong val) return; } - if (val & 0x1) { - env->pending_interrupts |= 1 << PPC_INTERRUPT_DOORBELL; - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else { - env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL); - } + ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & 0x1); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 00f2e9fa2e..031efda0df 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -238,6 +238,8 @@ static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type, static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls) { + bool ret; + /* * Check if this is a valid level, according to POWER9 and POWER10 * Processor User's Manuals, sections 4.10.4.1 and 5.10.6.1, respectively: @@ -249,16 +251,25 @@ static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls) */ switch (level) { case 0: /* Root Page Dir */ - return psize == 52 && nls == 13; + ret = psize == 52 && nls == 13; + break; case 1: case 2: - return nls == 9; + ret = nls == 9; + break; case 3: - return nls == 9 || nls == 5; + ret = nls == 9 || nls == 5; + break; default: - qemu_log_mask(LOG_GUEST_ERROR, "invalid radix level: %d\n", level); - return false; + ret = false; + } + + if (unlikely(!ret)) { + qemu_log_mask(LOG_GUEST_ERROR, "invalid radix configuration: " + "level %d size %d nls %"PRIu64"\n", + level, psize, nls); } + return ret; } static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr, @@ -519,11 +530,13 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, if (!ppc_radix64_is_valid_level(level++, *g_page_size, nls)) { fault_cause |= DSISR_R_BADCONFIG; - return 1; + ret = 1; + } else { + ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, + &h_raddr, &nls, g_page_size, + &pte, &fault_cause); } - ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, &h_raddr, - &nls, g_page_size, &pte, &fault_cause); if (ret) { /* No valid pte */ if (guest_visible) { diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c index beeab5c494..1381072b9e 100644 --- a/target/ppc/power8-pmu.c +++ b/target/ppc/power8-pmu.c @@ -22,8 +22,6 @@ #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) -#define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL - static bool pmc_has_overflow_enabled(CPUPPCState *env, int sprn) { if (sprn == SPR_POWER_PMC1) { @@ -88,49 +86,47 @@ static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns) bool overflow_triggered = false; target_ulong tmp; - if (unlikely(ins_cnt & 0x1e)) { - if (ins_cnt & (1 << 1)) { - tmp = env->spr[SPR_POWER_PMC1]; - tmp += num_insns; - if (tmp >= PMC_COUNTER_NEGATIVE_VAL && (mmcr0 & MMCR0_PMC1CE)) { - tmp = PMC_COUNTER_NEGATIVE_VAL; - overflow_triggered = true; - } - env->spr[SPR_POWER_PMC1] = tmp; + if (ins_cnt & (1 << 1)) { + tmp = env->spr[SPR_POWER_PMC1]; + tmp += num_insns; + if (tmp >= PMC_COUNTER_NEGATIVE_VAL && (mmcr0 & MMCR0_PMC1CE)) { + tmp = PMC_COUNTER_NEGATIVE_VAL; + overflow_triggered = true; } + env->spr[SPR_POWER_PMC1] = tmp; + } - if (ins_cnt & (1 << 2)) { - tmp = env->spr[SPR_POWER_PMC2]; - tmp += num_insns; - if (tmp >= PMC_COUNTER_NEGATIVE_VAL && (mmcr0 & MMCR0_PMCjCE)) { - tmp = PMC_COUNTER_NEGATIVE_VAL; - overflow_triggered = true; - } - env->spr[SPR_POWER_PMC2] = tmp; + if (ins_cnt & (1 << 2)) { + tmp = env->spr[SPR_POWER_PMC2]; + tmp += num_insns; + if (tmp >= PMC_COUNTER_NEGATIVE_VAL && (mmcr0 & MMCR0_PMCjCE)) { + tmp = PMC_COUNTER_NEGATIVE_VAL; + overflow_triggered = true; + } + env->spr[SPR_POWER_PMC2] = tmp; + } + + if (ins_cnt & (1 << 3)) { + tmp = env->spr[SPR_POWER_PMC3]; + tmp += num_insns; + if (tmp >= PMC_COUNTER_NEGATIVE_VAL && (mmcr0 & MMCR0_PMCjCE)) { + tmp = PMC_COUNTER_NEGATIVE_VAL; + overflow_triggered = true; } + env->spr[SPR_POWER_PMC3] = tmp; + } - if (ins_cnt & (1 << 3)) { - tmp = env->spr[SPR_POWER_PMC3]; + if (ins_cnt & (1 << 4)) { + target_ulong mmcr1 = env->spr[SPR_POWER_MMCR1]; + int sel = extract64(mmcr1, MMCR1_PMC4EVT_EXTR, MMCR1_EVT_SIZE); + if (sel == 0x02 || (env->spr[SPR_CTRL] & CTRL_RUN)) { + tmp = env->spr[SPR_POWER_PMC4]; tmp += num_insns; if (tmp >= PMC_COUNTER_NEGATIVE_VAL && (mmcr0 & MMCR0_PMCjCE)) { tmp = PMC_COUNTER_NEGATIVE_VAL; overflow_triggered = true; } - env->spr[SPR_POWER_PMC3] = tmp; - } - - if (ins_cnt & (1 << 4)) { - target_ulong mmcr1 = env->spr[SPR_POWER_MMCR1]; - int sel = extract64(mmcr1, MMCR1_PMC4EVT_EXTR, MMCR1_EVT_SIZE); - if (sel == 0x02 || (env->spr[SPR_CTRL] & CTRL_RUN)) { - tmp = env->spr[SPR_POWER_PMC4]; - tmp += num_insns; - if (tmp >= PMC_COUNTER_NEGATIVE_VAL && (mmcr0 & MMCR0_PMCjCE)) { - tmp = PMC_COUNTER_NEGATIVE_VAL; - overflow_triggered = true; - } - env->spr[SPR_POWER_PMC4] = tmp; - } + env->spr[SPR_POWER_PMC4] = tmp; } } @@ -310,6 +306,12 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu) raise_ebb_perfm_exception(env); } +void helper_handle_pmc5_overflow(CPUPPCState *env) +{ + env->spr[SPR_POWER_PMC5] = PMC_COUNTER_NEGATIVE_VAL; + fire_PMC_interrupt(env_archcpu(env)); +} + /* This helper assumes that the PMC is running. */ void helper_insns_inc(CPUPPCState *env, uint32_t num_insns) { diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h index 9692dd765e..c0093e2219 100644 --- a/target/ppc/power8-pmu.h +++ b/target/ppc/power8-pmu.h @@ -14,6 +14,9 @@ #define POWER8_PMU_H #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) + +#define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL + void cpu_ppc_pmu_init(CPUPPCState *env); void pmu_update_summaries(CPUPPCState *env); #else diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 7228857e23..19c1d17cb0 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -36,6 +36,7 @@ #include "exec/log.h" #include "qemu/atomic128.h" #include "spr_common.h" +#include "power8-pmu.h" #include "qemu/qemu-print.h" #include "qapi/error.h" @@ -177,6 +178,8 @@ struct DisasContext { bool hr; bool mmcr0_pmcc0; bool mmcr0_pmcc1; + bool mmcr0_pmcjce; + bool pmc_other; bool pmu_insn_cnt; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; @@ -305,6 +308,14 @@ static void gen_icount_io_start(DisasContext *ctx) } } +#if !defined(CONFIG_USER_ONLY) +static void gen_ppc_maybe_interrupt(DisasContext *ctx) +{ + gen_icount_io_start(ctx); + gen_helper_ppc_maybe_interrupt(cpu_env); +} +#endif + /* * Tells the caller what is the appropriate exception to generate and prepares * SPR registers for this exception. @@ -4261,6 +4272,9 @@ static void pmu_count_insns(DisasContext *ctx) } #if !defined(CONFIG_USER_ONLY) + TCGLabel *l; + TCGv t0; + /* * The PMU insns_inc() helper stops the internal PMU timer if a * counter overflows happens. In that case, if the guest is @@ -4269,8 +4283,26 @@ static void pmu_count_insns(DisasContext *ctx) */ gen_icount_io_start(ctx); - gen_helper_insns_inc(cpu_env, tcg_constant_i32(ctx->base.num_insns)); -#else + /* Avoid helper calls when only PMC5-6 are enabled. */ + if (!ctx->pmc_other) { + l = gen_new_label(); + t0 = tcg_temp_new(); + + gen_load_spr(t0, SPR_POWER_PMC5); + tcg_gen_addi_tl(t0, t0, ctx->base.num_insns); + gen_store_spr(SPR_POWER_PMC5, t0); + /* Check for overflow, if it's enabled */ + if (ctx->mmcr0_pmcjce) { + tcg_gen_brcondi_tl(TCG_COND_LT, t0, PMC_COUNTER_NEGATIVE_VAL, l); + gen_helper_handle_pmc5_overflow(cpu_env); + } + + gen_set_label(l); + tcg_temp_free(t0); + } else { + gen_helper_insns_inc(cpu_env, tcg_constant_i32(ctx->base.num_insns)); + } + #else /* * User mode can read (but not write) PMC5 and start/stop * the PMU via MMCR0_FC. In this case just increment @@ -4283,7 +4315,7 @@ static void pmu_count_insns(DisasContext *ctx) gen_store_spr(SPR_POWER_PMC5, t0); tcg_temp_free(t0); -#endif /* #if !defined(CONFIG_USER_ONLY) */ + #endif /* #if !defined(CONFIG_USER_ONLY) */ } #else static void pmu_count_insns(DisasContext *ctx) @@ -6161,7 +6193,6 @@ static void gen_tlbilx_booke206(DisasContext *ctx) #endif /* defined(CONFIG_USER_ONLY) */ } - /* wrtee */ static void gen_wrtee(DisasContext *ctx) { @@ -6175,6 +6206,7 @@ static void gen_wrtee(DisasContext *ctx) tcg_gen_andi_tl(t0, cpu_gpr[rD(ctx->opcode)], (1 << MSR_EE)); tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE)); tcg_gen_or_tl(cpu_msr, cpu_msr, t0); + gen_ppc_maybe_interrupt(ctx); tcg_temp_free(t0); /* * Stop translation to have a chance to raise an exception if we @@ -6193,6 +6225,7 @@ static void gen_wrteei(DisasContext *ctx) CHK_SV(ctx); if (ctx->opcode & 0x00008000) { tcg_gen_ori_tl(cpu_msr, cpu_msr, (1 << MSR_EE)); + gen_ppc_maybe_interrupt(ctx); /* Stop translation to have a chance to raise an exception */ ctx->base.is_jmp = DISAS_EXIT_UPDATE; } else { @@ -6239,68 +6272,6 @@ static void gen_icbt_440(DisasContext *ctx) */ } -/* Embedded.Processor Control */ - -static void gen_msgclr(DisasContext *ctx) -{ -#if defined(CONFIG_USER_ONLY) - GEN_PRIV(ctx); -#else - CHK_HV(ctx); - if (is_book3s_arch2x(ctx)) { - gen_helper_book3s_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]); - } else { - gen_helper_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]); - } -#endif /* defined(CONFIG_USER_ONLY) */ -} - -static void gen_msgsnd(DisasContext *ctx) -{ -#if defined(CONFIG_USER_ONLY) - GEN_PRIV(ctx); -#else - CHK_HV(ctx); - if (is_book3s_arch2x(ctx)) { - gen_helper_book3s_msgsnd(cpu_gpr[rB(ctx->opcode)]); - } else { - gen_helper_msgsnd(cpu_gpr[rB(ctx->opcode)]); - } -#endif /* defined(CONFIG_USER_ONLY) */ -} - -#if defined(TARGET_PPC64) -static void gen_msgclrp(DisasContext *ctx) -{ -#if defined(CONFIG_USER_ONLY) - GEN_PRIV(ctx); -#else - CHK_SV(ctx); - gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[rB(ctx->opcode)]); -#endif /* defined(CONFIG_USER_ONLY) */ -} - -static void gen_msgsndp(DisasContext *ctx) -{ -#if defined(CONFIG_USER_ONLY) - GEN_PRIV(ctx); -#else - CHK_SV(ctx); - gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[rB(ctx->opcode)]); -#endif /* defined(CONFIG_USER_ONLY) */ -} -#endif - -static void gen_msgsync(DisasContext *ctx) -{ -#if defined(CONFIG_USER_ONLY) - GEN_PRIV(ctx); -#else - CHK_HV(ctx); -#endif /* defined(CONFIG_USER_ONLY) */ - /* interpreted as no-op */ -} - #if defined(TARGET_PPC64) static void gen_maddld(DisasContext *ctx) { @@ -6545,12 +6516,12 @@ static int64_t dw_compose_ea(DisasContext *ctx, int x) } \ } while (0) -#define REQUIRE_HV(CTX) \ - do { \ - if (unlikely((CTX)->pr || !(CTX)->hv)) \ - gen_priv_opc(CTX); \ - return true; \ - } \ +#define REQUIRE_HV(CTX) \ + do { \ + if (unlikely((CTX)->pr || !(CTX)->hv)) { \ + gen_priv_opc(CTX); \ + return true; \ + } \ } while (0) #else #define REQUIRE_SV(CTX) do { gen_priv_opc(CTX); return true; } while (0) @@ -6628,6 +6599,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a) #include "translate/branch-impl.c.inc" +#include "translate/processor-ctrl-impl.c.inc" + #include "translate/storage-ctrl-impl.c.inc" /* Handles lfdp */ @@ -6901,12 +6874,6 @@ GEN_HANDLER2_E(tlbivax_booke206, "tlbivax", 0x1F, 0x12, 0x18, 0x00000001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER2_E(tlbilx_booke206, "tlbilx", 0x1F, 0x12, 0x00, 0x03800001, PPC_NONE, PPC2_BOOKE206), -GEN_HANDLER2_E(msgsnd, "msgsnd", 0x1F, 0x0E, 0x06, 0x03ff0001, - PPC_NONE, PPC2_PRCNTL), -GEN_HANDLER2_E(msgclr, "msgclr", 0x1F, 0x0E, 0x07, 0x03ff0001, - PPC_NONE, PPC2_PRCNTL), -GEN_HANDLER2_E(msgsync, "msgsync", 0x1F, 0x16, 0x1B, 0x00000000, - PPC_NONE, PPC2_PRCNTL), GEN_HANDLER(wrtee, 0x1F, 0x03, 0x04, 0x000FFC01, PPC_WRTEE), GEN_HANDLER(wrteei, 0x1F, 0x03, 0x05, 0x000E7C01, PPC_WRTEE), GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02, 0x00000000, PPC_440_SPEC), @@ -6921,15 +6888,10 @@ GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x00000001, PPC_ALTIVEC), GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC), GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC), GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC), -GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC), #if defined(TARGET_PPC64) GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300), -GEN_HANDLER2_E(msgsndp, "msgsndp", 0x1F, 0x0E, 0x04, 0x03ff0001, - PPC_NONE, PPC2_ISA207S), -GEN_HANDLER2_E(msgclrp, "msgclrp", 0x1F, 0x0E, 0x05, 0x03ff0001, - PPC_NONE, PPC2_ISA207S), #endif #undef GEN_INT_ARITH_ADD @@ -7574,6 +7536,8 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->hr = (hflags >> HFLAGS_HR) & 1; ctx->mmcr0_pmcc0 = (hflags >> HFLAGS_PMCC0) & 1; ctx->mmcr0_pmcc1 = (hflags >> HFLAGS_PMCC1) & 1; + ctx->mmcr0_pmcjce = (hflags >> HFLAGS_PMCJCE) & 1; + ctx->pmc_other = (hflags >> HFLAGS_PMC_OTHER) & 1; ctx->pmu_insn_cnt = (hflags >> HFLAGS_INSN_CNT) & 1; ctx->singlestep_enabled = 0; diff --git a/target/ppc/translate/processor-ctrl-impl.c.inc b/target/ppc/translate/processor-ctrl-impl.c.inc new file mode 100644 index 0000000000..cc7a50d579 --- /dev/null +++ b/target/ppc/translate/processor-ctrl-impl.c.inc @@ -0,0 +1,105 @@ +/* + * Power ISA decode for Storage Control instructions + * + * Copyright (c) 2022 Instituto de Pesquisas Eldorado (eldorado.org.br) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Processor Control Instructions + */ + +static bool trans_MSGCLR(DisasContext *ctx, arg_X_rb *a) +{ + if (!(ctx->insns_flags2 & PPC2_ISA207S)) { + /* + * Before Power ISA 2.07, processor control instructions were only + * implemented in the "Embedded.Processor Control" category. + */ + REQUIRE_INSNS_FLAGS2(ctx, PRCNTL); + } + + REQUIRE_HV(ctx); + +#if !defined(CONFIG_USER_ONLY) + if (is_book3s_arch2x(ctx)) { + gen_helper_book3s_msgclr(cpu_env, cpu_gpr[a->rb]); + } else { + gen_helper_msgclr(cpu_env, cpu_gpr[a->rb]); + } +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MSGSND(DisasContext *ctx, arg_X_rb *a) +{ + if (!(ctx->insns_flags2 & PPC2_ISA207S)) { + /* + * Before Power ISA 2.07, processor control instructions were only + * implemented in the "Embedded.Processor Control" category. + */ + REQUIRE_INSNS_FLAGS2(ctx, PRCNTL); + } + + REQUIRE_HV(ctx); + +#if !defined(CONFIG_USER_ONLY) + if (is_book3s_arch2x(ctx)) { + gen_helper_book3s_msgsnd(cpu_gpr[a->rb]); + } else { + gen_helper_msgsnd(cpu_gpr[a->rb]); + } +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MSGCLRP(DisasContext *ctx, arg_X_rb *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA207S); + REQUIRE_SV(ctx); +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) + gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[a->rb]); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MSGSNDP(DisasContext *ctx, arg_X_rb *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA207S); + REQUIRE_SV(ctx); +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) + gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[a->rb]); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MSGSYNC(DisasContext *ctx, arg_MSGSYNC *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_HV(ctx); + + /* interpreted as no-op */ + return true; +} diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index e644ad3236..7741f2eb49 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -431,21 +431,6 @@ GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12); GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13); GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14); GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15); -GEN_VXFORM(vavgub, 1, 16); -GEN_VXFORM(vabsdub, 1, 16); -GEN_VXFORM_DUAL(vavgub, PPC_ALTIVEC, PPC_NONE, \ - vabsdub, PPC_NONE, PPC2_ISA300) -GEN_VXFORM(vavguh, 1, 17); -GEN_VXFORM(vabsduh, 1, 17); -GEN_VXFORM_DUAL(vavguh, PPC_ALTIVEC, PPC_NONE, \ - vabsduh, PPC_NONE, PPC2_ISA300) -GEN_VXFORM(vavguw, 1, 18); -GEN_VXFORM(vabsduw, 1, 18); -GEN_VXFORM_DUAL(vavguw, PPC_ALTIVEC, PPC_NONE, \ - vabsduw, PPC_NONE, PPC2_ISA300) -GEN_VXFORM(vavgsb, 1, 20); -GEN_VXFORM(vavgsh, 1, 21); -GEN_VXFORM(vavgsw, 1, 22); GEN_VXFORM(vmrghb, 6, 0); GEN_VXFORM(vmrghh, 6, 1); GEN_VXFORM(vmrghw, 6, 2); @@ -803,8 +788,6 @@ GEN_VXFORM(vsrv, 2, 28); GEN_VXFORM(vslv, 2, 29); GEN_VXFORM(vslo, 6, 16); GEN_VXFORM(vsro, 6, 17); -GEN_VXFORM(vaddcuw, 0, 6); -GEN_VXFORM(vsubcuw, 0, 22); static bool do_vector_gvec3_VX(DisasContext *ctx, arg_VX *a, int vece, void (*gen_gvec)(unsigned, uint32_t, uint32_t, @@ -1661,9 +1644,71 @@ GEN_VXFORM_NOA_ENV(vrfim, 5, 11); GEN_VXFORM_NOA_ENV(vrfin, 5, 8); GEN_VXFORM_NOA_ENV(vrfip, 5, 10); GEN_VXFORM_NOA_ENV(vrfiz, 5, 9); -GEN_VXFORM_NOA(vprtybw, 1, 24); -GEN_VXFORM_NOA(vprtybd, 1, 24); -GEN_VXFORM_NOA(vprtybq, 1, 24); + +static void gen_vprtyb_vec(unsigned vece, TCGv_vec t, TCGv_vec b) +{ + int i; + TCGv_vec tmp = tcg_temp_new_vec_matching(b); + /* MO_32 is 2, so 2 iteractions for MO_32 and 3 for MO_64 */ + for (i = 0; i < vece; i++) { + tcg_gen_shri_vec(vece, tmp, b, (4 << (vece - i))); + tcg_gen_xor_vec(vece, b, tmp, b); + } + tcg_gen_and_vec(vece, t, b, tcg_constant_vec_matching(t, vece, 1)); + tcg_temp_free_vec(tmp); +} + +/* vprtybw */ +static void gen_vprtyb_i32(TCGv_i32 t, TCGv_i32 b) +{ + tcg_gen_ctpop_i32(t, b); + tcg_gen_and_i32(t, t, tcg_constant_i32(1)); +} + +/* vprtybd */ +static void gen_vprtyb_i64(TCGv_i64 t, TCGv_i64 b) +{ + tcg_gen_ctpop_i64(t, b); + tcg_gen_and_i64(t, t, tcg_constant_i64(1)); +} + +static bool do_vx_vprtyb(DisasContext *ctx, arg_VX_tb *a, unsigned vece) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, 0 + }; + + static const GVecGen2 op[] = { + { + .fniv = gen_vprtyb_vec, + .fni4 = gen_vprtyb_i32, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vprtyb_vec, + .fni8 = gen_vprtyb_i64, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_VPRTYBQ, + .vece = MO_128 + }, + }; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_2(avr_full_offset(a->vrt), avr_full_offset(a->vrb), + 16, 16, &op[vece - MO_32]); + + return true; +} + +TRANS(VPRTYBW, do_vx_vprtyb, MO_32) +TRANS(VPRTYBD, do_vx_vprtyb, MO_64) +TRANS(VPRTYBQ, do_vx_vprtyb, MO_128) static void gen_vsplt(DisasContext *ctx, int vece) { @@ -2521,25 +2566,7 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \ tcg_temp_free_ptr(rd); \ } -GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16) - -static void gen_vmladduhm(DisasContext *ctx) -{ - TCGv_ptr ra, rb, rc, rd; - if (unlikely(!ctx->altivec_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VPU); - return; - } - ra = gen_avr_ptr(rA(ctx->opcode)); - rb = gen_avr_ptr(rB(ctx->opcode)); - rc = gen_avr_ptr(rC(ctx->opcode)); - rd = gen_avr_ptr(rD(ctx->opcode)); - gen_helper_vmladduhm(rd, ra, rb, rc); - tcg_temp_free_ptr(ra); - tcg_temp_free_ptr(rb); - tcg_temp_free_ptr(rc); - tcg_temp_free_ptr(rd); -} +GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) static bool do_va_helper(DisasContext *ctx, arg_VA *a, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) @@ -2569,6 +2596,36 @@ TRANS_FLAGS2(ALTIVEC_207, VSUBECUQ, do_va_helper, gen_helper_VSUBECUQ) TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM) TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR) +static void gen_vmladduhm_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, + TCGv_vec c) +{ + tcg_gen_mul_vec(vece, t, a, b); + tcg_gen_add_vec(vece, t, t, c); +} + +static bool trans_VMLADDUHM(DisasContext *ctx, arg_VA *a) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_add_vec, INDEX_op_mul_vec, 0 + }; + + static const GVecGen4 op = { + .fno = gen_helper_VMLADDUHM, + .fniv = gen_vmladduhm_vec, + .opt_opc = vecop_list, + .vece = MO_16 + }; + + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_4(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), avr_full_offset(a->rc), + 16, 16, &op); + + return true; +} + static bool trans_VSEL(DisasContext *ctx, arg_VA *a) { REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); @@ -2608,14 +2665,26 @@ static bool do_va_env_helper(DisasContext *ctx, arg_VA *a, TRANS_FLAGS(ALTIVEC, VMSUMUHS, do_va_env_helper, gen_helper_VMSUMUHS) TRANS_FLAGS(ALTIVEC, VMSUMSHS, do_va_env_helper, gen_helper_VMSUMSHS) -GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) +TRANS_FLAGS(ALTIVEC, VMHADDSHS, do_va_env_helper, gen_helper_VMHADDSHS) +TRANS_FLAGS(ALTIVEC, VMHRADDSHS, do_va_env_helper, gen_helper_VMHRADDSHS) GEN_VXFORM_NOA(vclzb, 1, 28) GEN_VXFORM_NOA(vclzh, 1, 29) GEN_VXFORM_TRANS(vclzw, 1, 30) GEN_VXFORM_TRANS(vclzd, 1, 31) -GEN_VXFORM_NOA_2(vnegw, 1, 24, 6) -GEN_VXFORM_NOA_2(vnegd, 1, 24, 7) + +static bool do_vneg(DisasContext *ctx, arg_VX_tb *a, unsigned vece) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_neg(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrb), + 16, 16); + return true; +} + +TRANS(VNEGW, do_vneg, MO_32) +TRANS(VNEGD, do_vneg, MO_64) static void gen_vexts_i64(TCGv_i64 t, TCGv_i64 b, int64_t s) { @@ -2834,8 +2903,6 @@ static void gen_xpnd04_2(DisasContext *ctx) } -GEN_VXFORM_DUAL(vsubcuw, PPC_ALTIVEC, PPC_NONE, \ - xpnd04_1, PPC_NONE, PPC2_ISA300) GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \ xpnd04_2, PPC_NONE, PPC2_ISA300) @@ -3097,6 +3164,63 @@ TRANS_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD) TRANS_FLAGS2(ALTIVEC_207, VSUBCUQ, do_vx_helper, gen_helper_VSUBCUQ) TRANS_FLAGS2(ALTIVEC_207, VSUBUQM, do_vx_helper, gen_helper_VSUBUQM) +static void gen_VADDCUW_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_not_vec(vece, a, a); + tcg_gen_cmp_vec(TCG_COND_LTU, vece, t, a, b); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(t, vece, 1)); +} + +static void gen_VADDCUW_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_not_i32(a, a); + tcg_gen_setcond_i32(TCG_COND_LTU, t, a, b); +} + +static void gen_VSUBCUW_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_cmp_vec(TCG_COND_GEU, vece, t, a, b); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(t, vece, 1)); +} + +static void gen_VSUBCUW_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_setcond_i32(TCG_COND_GEU, t, a, b); +} + +static bool do_vx_vaddsubcuw(DisasContext *ctx, arg_VX *a, int add) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_cmp_vec, 0 + }; + + static const GVecGen3 op[] = { + { + .fniv = gen_VSUBCUW_vec, + .fni4 = gen_VSUBCUW_i32, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_VADDCUW_vec, + .fni4 = gen_VADDCUW_i32, + .opt_opc = vecop_list, + .vece = MO_32 + }, + }; + + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op[add]); + + return true; +} + +TRANS(VSUBCUW, do_vx_vaddsubcuw, 0) +TRANS(VADDCUW, do_vx_vaddsubcuw, 1) + static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even, void (*gen_mul)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) { @@ -3234,6 +3358,146 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64) TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64) TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64) +static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, + void (*gen_shr_vec)(unsigned, TCGv_vec, TCGv_vec, int64_t)) +{ + TCGv_vec tmp = tcg_temp_new_vec_matching(t); + tcg_gen_or_vec(vece, tmp, a, b); + tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1)); + gen_shr_vec(vece, a, a, 1); + gen_shr_vec(vece, b, b, 1); + tcg_gen_add_vec(vece, t, a, b); + tcg_gen_add_vec(vece, t, t, tmp); + tcg_temp_free_vec(tmp); +} + +QEMU_FLATTEN +static void gen_vavgu(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vavg(vece, t, a, b, tcg_gen_shri_vec); +} + +QEMU_FLATTEN +static void gen_vavgs(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vavg(vece, t, a, b, tcg_gen_sari_vec); +} + +static bool do_vx_vavg(DisasContext *ctx, arg_VX *a, int sign, int vece) +{ + static const TCGOpcode vecop_list_s[] = { + INDEX_op_add_vec, INDEX_op_sari_vec, 0 + }; + static const TCGOpcode vecop_list_u[] = { + INDEX_op_add_vec, INDEX_op_shri_vec, 0 + }; + + static const GVecGen3 op[2][3] = { + { + { + .fniv = gen_vavgu, + .fno = gen_helper_VAVGUB, + .opt_opc = vecop_list_u, + .vece = MO_8 + }, + { + .fniv = gen_vavgu, + .fno = gen_helper_VAVGUH, + .opt_opc = vecop_list_u, + .vece = MO_16 + }, + { + .fniv = gen_vavgu, + .fno = gen_helper_VAVGUW, + .opt_opc = vecop_list_u, + .vece = MO_32 + }, + }, + { + { + .fniv = gen_vavgs, + .fno = gen_helper_VAVGSB, + .opt_opc = vecop_list_s, + .vece = MO_8 + }, + { + .fniv = gen_vavgs, + .fno = gen_helper_VAVGSH, + .opt_opc = vecop_list_s, + .vece = MO_16 + }, + { + .fniv = gen_vavgs, + .fno = gen_helper_VAVGSW, + .opt_opc = vecop_list_s, + .vece = MO_32 + }, + }, + }; + + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op[sign][vece]); + + + return true; +} + + +TRANS_FLAGS(ALTIVEC, VAVGSB, do_vx_vavg, 1, MO_8) +TRANS_FLAGS(ALTIVEC, VAVGSH, do_vx_vavg, 1, MO_16) +TRANS_FLAGS(ALTIVEC, VAVGSW, do_vx_vavg, 1, MO_32) +TRANS_FLAGS(ALTIVEC, VAVGUB, do_vx_vavg, 0, MO_8) +TRANS_FLAGS(ALTIVEC, VAVGUH, do_vx_vavg, 0, MO_16) +TRANS_FLAGS(ALTIVEC, VAVGUW, do_vx_vavg, 0, MO_32) + +static void gen_vabsdu(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_umax_vec(vece, t, a, b); + tcg_gen_umin_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, t, t, a); +} + +static bool do_vabsdu(DisasContext *ctx, arg_VX *a, const int vece) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0 + }; + + static const GVecGen3 op[] = { + { + .fniv = gen_vabsdu, + .fno = gen_helper_VABSDUB, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vabsdu, + .fno = gen_helper_VABSDUH, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vabsdu, + .fno = gen_helper_VABSDUW, + .opt_opc = vecop_list, + .vece = MO_32 + }, + }; + + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op[vece]); + + return true; +} + +TRANS_FLAGS2(ISA300, VABSDUB, do_vabsdu, MO_8) +TRANS_FLAGS2(ISA300, VABSDUH, do_vabsdu, MO_16) +TRANS_FLAGS2(ISA300, VABSDUW, do_vabsdu, MO_32) + static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece, void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b), void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)) diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index a3a0fd0650..33fec8aca4 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -83,12 +83,6 @@ GEN_VXFORM(vminsb, 1, 12), GEN_VXFORM(vminsh, 1, 13), GEN_VXFORM(vminsw, 1, 14), GEN_VXFORM_207(vminsd, 1, 15), -GEN_VXFORM_DUAL(vavgub, vabsdub, 1, 16, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM_DUAL(vavguh, vabsduh, 1, 17, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM_DUAL(vavguw, vabsduw, 1, 18, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM(vavgsb, 1, 20), -GEN_VXFORM(vavgsh, 1, 21), -GEN_VXFORM(vavgsw, 1, 22), GEN_VXFORM(vmrghb, 6, 0), GEN_VXFORM(vmrghh, 6, 1), GEN_VXFORM(vmrghw, 6, 2), @@ -106,12 +100,8 @@ GEN_VXFORM_300(vsrv, 2, 28), GEN_VXFORM_300(vslv, 2, 29), GEN_VXFORM(vslo, 6, 16), GEN_VXFORM(vsro, 6, 17), -GEN_VXFORM(vaddcuw, 0, 6), -GEN_HANDLER_E_2(vprtybw, 0x4, 0x1, 0x18, 8, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E_2(vprtybd, 0x4, 0x1, 0x18, 9, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E_2(vprtybq, 0x4, 0x1, 0x18, 10, 0, PPC_NONE, PPC2_ISA300), -GEN_VXFORM_DUAL(vsubcuw, xpnd04_1, 0, 22, PPC_ALTIVEC, PPC_NONE), +GEN_VXFORM(xpnd04_1, 0, 22), GEN_VXFORM_300(bcdsr, 0, 23), GEN_VXFORM_300(bcdsr, 0, 31), GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE), @@ -182,8 +172,6 @@ GEN_VXFORM_300_EXT(vextractd, 6, 11, 0x100000), GEN_VXFORM(vspltisb, 6, 12), GEN_VXFORM(vspltish, 6, 13), GEN_VXFORM(vspltisw, 6, 14), -GEN_VXFORM_300_EO(vnegw, 0x01, 0x18, 0x06), -GEN_VXFORM_300_EO(vnegd, 0x01, 0x18, 0x07), GEN_VXFORM_300_EO(vctzb, 0x01, 0x18, 0x1C), GEN_VXFORM_300_EO(vctzh, 0x01, 0x18, 0x1D), GEN_VXFORM_300_EO(vctzw, 0x01, 0x18, 0x1E), @@ -219,7 +207,6 @@ GEN_VXFORM_UIMM(vctsxs, 5, 15), #define GEN_VAFORM_PAIRED(name0, name1, opc2) \ GEN_HANDLER(name0##_##name1, 0x04, opc2, 0xFF, 0x00000000, PPC_ALTIVEC) -GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16), GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23), GEN_VXFORM_DUAL(vclzb, vpopcntb, 1, 28, PPC_NONE, PPC2_ALTIVEC_207), diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index e6e5c45ffd..4deb29ee42 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -630,6 +630,10 @@ static void gen_mtvsrws(DisasContext *ctx) #define OP_CPSGN 4 #define SGN_MASK_DP 0x8000000000000000ull #define SGN_MASK_SP 0x8000000080000000ull +#define EXP_MASK_DP 0x7FF0000000000000ull +#define EXP_MASK_SP 0x7F8000007F800000ull +#define FRC_MASK_DP (~(SGN_MASK_DP | EXP_MASK_DP)) +#define FRC_MASK_SP (~(SGN_MASK_SP | EXP_MASK_SP)) #define VSX_SCALAR_MOVE(name, op, sgn_mask) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -729,67 +733,125 @@ VSX_SCALAR_MOVE_QP(xsnabsqp, OP_NABS, SGN_MASK_DP) VSX_SCALAR_MOVE_QP(xsnegqp, OP_NEG, SGN_MASK_DP) VSX_SCALAR_MOVE_QP(xscpsgnqp, OP_CPSGN, SGN_MASK_DP) -#define VSX_VECTOR_MOVE(name, op, sgn_mask) \ -static void glue(gen_, name)(DisasContext *ctx) \ - { \ - TCGv_i64 xbh, xbl, sgm; \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - xbh = tcg_temp_new_i64(); \ - xbl = tcg_temp_new_i64(); \ - sgm = tcg_temp_new_i64(); \ - get_cpu_vsr(xbh, xB(ctx->opcode), true); \ - get_cpu_vsr(xbl, xB(ctx->opcode), false); \ - tcg_gen_movi_i64(sgm, sgn_mask); \ - switch (op) { \ - case OP_ABS: { \ - tcg_gen_andc_i64(xbh, xbh, sgm); \ - tcg_gen_andc_i64(xbl, xbl, sgm); \ - break; \ - } \ - case OP_NABS: { \ - tcg_gen_or_i64(xbh, xbh, sgm); \ - tcg_gen_or_i64(xbl, xbl, sgm); \ - break; \ - } \ - case OP_NEG: { \ - tcg_gen_xor_i64(xbh, xbh, sgm); \ - tcg_gen_xor_i64(xbl, xbl, sgm); \ - break; \ - } \ - case OP_CPSGN: { \ - TCGv_i64 xah = tcg_temp_new_i64(); \ - TCGv_i64 xal = tcg_temp_new_i64(); \ - get_cpu_vsr(xah, xA(ctx->opcode), true); \ - get_cpu_vsr(xal, xA(ctx->opcode), false); \ - tcg_gen_and_i64(xah, xah, sgm); \ - tcg_gen_and_i64(xal, xal, sgm); \ - tcg_gen_andc_i64(xbh, xbh, sgm); \ - tcg_gen_andc_i64(xbl, xbl, sgm); \ - tcg_gen_or_i64(xbh, xbh, xah); \ - tcg_gen_or_i64(xbl, xbl, xal); \ - tcg_temp_free_i64(xah); \ - tcg_temp_free_i64(xal); \ - break; \ - } \ - } \ - set_cpu_vsr(xT(ctx->opcode), xbh, true); \ - set_cpu_vsr(xT(ctx->opcode), xbl, false); \ - tcg_temp_free_i64(xbh); \ - tcg_temp_free_i64(xbl); \ - tcg_temp_free_i64(sgm); \ - } - -VSX_VECTOR_MOVE(xvabsdp, OP_ABS, SGN_MASK_DP) -VSX_VECTOR_MOVE(xvnabsdp, OP_NABS, SGN_MASK_DP) -VSX_VECTOR_MOVE(xvnegdp, OP_NEG, SGN_MASK_DP) -VSX_VECTOR_MOVE(xvcpsgndp, OP_CPSGN, SGN_MASK_DP) -VSX_VECTOR_MOVE(xvabssp, OP_ABS, SGN_MASK_SP) -VSX_VECTOR_MOVE(xvnabssp, OP_NABS, SGN_MASK_SP) -VSX_VECTOR_MOVE(xvnegsp, OP_NEG, SGN_MASK_SP) -VSX_VECTOR_MOVE(xvcpsgnsp, OP_CPSGN, SGN_MASK_SP) +#define TCG_OP_IMM_i64(FUNC, OP, IMM) \ + static void FUNC(TCGv_i64 t, TCGv_i64 b) \ + { \ + OP(t, b, IMM); \ + } + +TCG_OP_IMM_i64(do_xvabssp_i64, tcg_gen_andi_i64, ~SGN_MASK_SP) +TCG_OP_IMM_i64(do_xvnabssp_i64, tcg_gen_ori_i64, SGN_MASK_SP) +TCG_OP_IMM_i64(do_xvnegsp_i64, tcg_gen_xori_i64, SGN_MASK_SP) +TCG_OP_IMM_i64(do_xvabsdp_i64, tcg_gen_andi_i64, ~SGN_MASK_DP) +TCG_OP_IMM_i64(do_xvnabsdp_i64, tcg_gen_ori_i64, SGN_MASK_DP) +TCG_OP_IMM_i64(do_xvnegdp_i64, tcg_gen_xori_i64, SGN_MASK_DP) +#undef TCG_OP_IMM_i64 + +static void xv_msb_op1(unsigned vece, TCGv_vec t, TCGv_vec b, + void (*tcg_gen_op_vec)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) +{ + uint64_t msb = (vece == MO_32) ? SGN_MASK_SP : SGN_MASK_DP; + tcg_gen_op_vec(vece, t, b, tcg_constant_vec_matching(t, vece, msb)); +} + +static void do_xvabs_vec(unsigned vece, TCGv_vec t, TCGv_vec b) +{ + xv_msb_op1(vece, t, b, tcg_gen_andc_vec); +} + +static void do_xvnabs_vec(unsigned vece, TCGv_vec t, TCGv_vec b) +{ + xv_msb_op1(vece, t, b, tcg_gen_or_vec); +} + +static void do_xvneg_vec(unsigned vece, TCGv_vec t, TCGv_vec b) +{ + xv_msb_op1(vece, t, b, tcg_gen_xor_vec); +} + +static bool do_vsx_msb_op(DisasContext *ctx, arg_XX2 *a, unsigned vece, + void (*vec)(unsigned, TCGv_vec, TCGv_vec), + void (*i64)(TCGv_i64, TCGv_i64)) +{ + static const TCGOpcode vecop_list[] = { + 0 + }; + + const GVecGen2 op = { + .fni8 = i64, + .fniv = vec, + .opt_opc = vecop_list, + .vece = vece + }; + + REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); + + tcg_gen_gvec_2(vsr_full_offset(a->xt), vsr_full_offset(a->xb), + 16, 16, &op); + + return true; +} + +TRANS(XVABSDP, do_vsx_msb_op, MO_64, do_xvabs_vec, do_xvabsdp_i64) +TRANS(XVNABSDP, do_vsx_msb_op, MO_64, do_xvnabs_vec, do_xvnabsdp_i64) +TRANS(XVNEGDP, do_vsx_msb_op, MO_64, do_xvneg_vec, do_xvnegdp_i64) +TRANS(XVABSSP, do_vsx_msb_op, MO_32, do_xvabs_vec, do_xvabssp_i64) +TRANS(XVNABSSP, do_vsx_msb_op, MO_32, do_xvnabs_vec, do_xvnabssp_i64) +TRANS(XVNEGSP, do_vsx_msb_op, MO_32, do_xvneg_vec, do_xvnegsp_i64) + +static void do_xvcpsgndp_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_andi_i64(a, a, SGN_MASK_DP); + tcg_gen_andi_i64(b, b, ~SGN_MASK_DP); + tcg_gen_or_i64(t, a, b); +} + +static void do_xvcpsgnsp_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_andi_i64(a, a, SGN_MASK_SP); + tcg_gen_andi_i64(b, b, ~SGN_MASK_SP); + tcg_gen_or_i64(t, a, b); +} + +static void do_xvcpsgn_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + uint64_t msb = (vece == MO_32) ? SGN_MASK_SP : SGN_MASK_DP; + tcg_gen_bitsel_vec(vece, t, tcg_constant_vec_matching(t, vece, msb), a, b); +} + +static bool do_xvcpsgn(DisasContext *ctx, arg_XX3 *a, unsigned vece) +{ + static const TCGOpcode vecop_list[] = { + 0 + }; + + static const GVecGen3 op[] = { + { + .fni8 = do_xvcpsgnsp_i64, + .fniv = do_xvcpsgn_vec, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = do_xvcpsgndp_i64, + .fniv = do_xvcpsgn_vec, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); + + tcg_gen_gvec_3(vsr_full_offset(a->xt), vsr_full_offset(a->xa), + vsr_full_offset(a->xb), 16, 16, &op[vece - MO_32]); + + return true; +} + +TRANS(XVCPSGNSP, do_xvcpsgn, MO_32) +TRANS(XVCPSGNDP, do_xvcpsgn, MO_64) #define VSX_CMP(name, op1, op2, inval, type) \ static void gen_##name(DisasContext *ctx) \ @@ -1052,6 +1114,192 @@ GEN_VSX_HELPER_X2(xscvhpdp, 0x16, 0x15, 0x10, PPC2_ISA300) GEN_VSX_HELPER_R2(xscvsdqp, 0x04, 0x1A, 0x0A, PPC2_ISA300) GEN_VSX_HELPER_X2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX) +/* test if +Inf */ +static void gen_is_pos_inf(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v) +{ + uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP; + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b, + tcg_constant_vec_matching(t, vece, exp_msk)); +} + +/* test if -Inf */ +static void gen_is_neg_inf(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v) +{ + uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP; + uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP; + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b, + tcg_constant_vec_matching(t, vece, sgn_msk | exp_msk)); +} + +/* test if +Inf or -Inf */ +static void gen_is_any_inf(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v) +{ + uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP; + uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP; + tcg_gen_andc_vec(vece, b, b, tcg_constant_vec_matching(t, vece, sgn_msk)); + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b, + tcg_constant_vec_matching(t, vece, exp_msk)); +} + +/* test if +0 */ +static void gen_is_pos_zero(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v) +{ + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b, + tcg_constant_vec_matching(t, vece, 0)); +} + +/* test if -0 */ +static void gen_is_neg_zero(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v) +{ + uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP; + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b, + tcg_constant_vec_matching(t, vece, sgn_msk)); +} + +/* test if +0 or -0 */ +static void gen_is_any_zero(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v) +{ + uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP; + tcg_gen_andc_vec(vece, b, b, tcg_constant_vec_matching(t, vece, sgn_msk)); + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b, + tcg_constant_vec_matching(t, vece, 0)); +} + +/* test if +Denormal */ +static void gen_is_pos_denormal(unsigned vece, TCGv_vec t, + TCGv_vec b, int64_t v) +{ + uint64_t frc_msk = (vece == MO_32) ? (uint32_t)FRC_MASK_SP : FRC_MASK_DP; + tcg_gen_cmp_vec(TCG_COND_LEU, vece, t, b, + tcg_constant_vec_matching(t, vece, frc_msk)); + tcg_gen_cmp_vec(TCG_COND_NE, vece, b, b, + tcg_constant_vec_matching(t, vece, 0)); + tcg_gen_and_vec(vece, t, t, b); +} + +/* test if -Denormal */ +static void gen_is_neg_denormal(unsigned vece, TCGv_vec t, + TCGv_vec b, int64_t v) +{ + uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP; + uint64_t frc_msk = (vece == MO_32) ? (uint32_t)FRC_MASK_SP : FRC_MASK_DP; + tcg_gen_cmp_vec(TCG_COND_LEU, vece, t, b, + tcg_constant_vec_matching(t, vece, sgn_msk | frc_msk)); + tcg_gen_cmp_vec(TCG_COND_GTU, vece, b, b, + tcg_constant_vec_matching(t, vece, sgn_msk)); + tcg_gen_and_vec(vece, t, t, b); +} + +/* test if +Denormal or -Denormal */ +static void gen_is_any_denormal(unsigned vece, TCGv_vec t, + TCGv_vec b, int64_t v) +{ + uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP; + uint64_t frc_msk = (vece == MO_32) ? (uint32_t)FRC_MASK_SP : FRC_MASK_DP; + tcg_gen_andc_vec(vece, b, b, tcg_constant_vec_matching(t, vece, sgn_msk)); + tcg_gen_cmp_vec(TCG_COND_LE, vece, t, b, + tcg_constant_vec_matching(t, vece, frc_msk)); + tcg_gen_cmp_vec(TCG_COND_NE, vece, b, b, + tcg_constant_vec_matching(t, vece, 0)); + tcg_gen_and_vec(vece, t, t, b); +} + +/* test if NaN */ +static void gen_is_nan(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v) +{ + uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP; + uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP; + tcg_gen_and_vec(vece, b, b, tcg_constant_vec_matching(t, vece, ~sgn_msk)); + tcg_gen_cmp_vec(TCG_COND_GT, vece, t, b, + tcg_constant_vec_matching(t, vece, exp_msk)); +} + +static bool do_xvtstdc(DisasContext *ctx, arg_XX2_uim *a, unsigned vece) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_cmp_vec, 0 + }; + + GVecGen2i op = { + .fnoi = (vece == MO_32) ? gen_helper_XVTSTDCSP : gen_helper_XVTSTDCDP, + .vece = vece, + .opt_opc = vecop_list + }; + + REQUIRE_VSX(ctx); + + switch (a->uim) { + case 0: + set_cpu_vsr(a->xt, tcg_constant_i64(0), true); + set_cpu_vsr(a->xt, tcg_constant_i64(0), false); + return true; + case ((1 << 0) | (1 << 1)): + /* test if +Denormal or -Denormal */ + op.fniv = gen_is_any_denormal; + break; + case (1 << 0): + /* test if -Denormal */ + op.fniv = gen_is_neg_denormal; + break; + case (1 << 1): + /* test if +Denormal */ + op.fniv = gen_is_pos_denormal; + break; + case ((1 << 2) | (1 << 3)): + /* test if +0 or -0 */ + op.fniv = gen_is_any_zero; + break; + case (1 << 2): + /* test if -0 */ + op.fniv = gen_is_neg_zero; + break; + case (1 << 3): + /* test if +0 */ + op.fniv = gen_is_pos_zero; + break; + case ((1 << 4) | (1 << 5)): + /* test if +Inf or -Inf */ + op.fniv = gen_is_any_inf; + break; + case (1 << 4): + /* test if -Inf */ + op.fniv = gen_is_neg_inf; + break; + case (1 << 5): + /* test if +Inf */ + op.fniv = gen_is_pos_inf; + break; + case (1 << 6): + /* test if NaN */ + op.fniv = gen_is_nan; + break; + } + tcg_gen_gvec_2i(vsr_full_offset(a->xt), vsr_full_offset(a->xb), + 16, 16, a->uim, &op); + + return true; +} + +TRANS_FLAGS2(VSX, XVTSTDCSP, do_xvtstdc, MO_32) +TRANS_FLAGS2(VSX, XVTSTDCDP, do_xvtstdc, MO_64) + +static bool do_XX2_bf_uim(DisasContext *ctx, arg_XX2_bf_uim *a, bool vsr, + void (*gen_helper)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_ptr)) +{ + TCGv_ptr xb; + + REQUIRE_VSX(ctx); + xb = vsr ? gen_vsr_ptr(a->xb) : gen_avr_ptr(a->xb); + gen_helper(cpu_env, tcg_constant_i32(a->bf), tcg_constant_i32(a->uim), xb); + tcg_temp_free_ptr(xb); + + return true; +} + +TRANS_FLAGS2(ISA300, XSTSTDCSP, do_XX2_bf_uim, true, gen_helper_XSTSTDCSP) +TRANS_FLAGS2(ISA300, XSTSTDCDP, do_XX2_bf_uim, true, gen_helper_XSTSTDCDP) +TRANS_FLAGS2(ISA300, XSTSTDCQP, do_XX2_bf_uim, false, gen_helper_XSTSTDCQP) + bool trans_XSCVSPDPN(DisasContext *ctx, arg_XX2 *a) { TCGv_i64 tmp; @@ -1098,9 +1346,6 @@ GEN_VSX_HELPER_X2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xscvsxdsp, 0x10, 0x13, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xscvuxdsp, 0x10, 0x12, 0, PPC2_VSX207) -GEN_VSX_HELPER_X1(xststdcsp, 0x14, 0x12, 0, PPC2_ISA300) -GEN_VSX_HELPER_2(xststdcdp, 0x14, 0x16, 0, PPC2_ISA300) -GEN_VSX_HELPER_2(xststdcqp, 0x04, 0x16, 0, PPC2_ISA300) GEN_VSX_HELPER_X3(xvadddp, 0x00, 0x0C, 0, PPC2_VSX) GEN_VSX_HELPER_X3(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX) @@ -1155,8 +1400,6 @@ GEN_VSX_HELPER_X2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX) -GEN_VSX_HELPER_2(xvtstdcsp, 0x14, 0x1A, 0, PPC2_VSX) -GEN_VSX_HELPER_2(xvtstdcdp, 0x14, 0x1E, 0, PPC2_VSX) static bool trans_XXPERM(DisasContext *ctx, arg_XX3 *a) { diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index bff14bbece..a3ba094d62 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -147,33 +147,12 @@ GEN_HANDLER_E(xsiexpdp, 0x3C, 0x16, 0x1C, 0, PPC_NONE, PPC2_ISA300), GEN_VSX_XFORM_300(xsiexpqp, 0x4, 0x1B, 0x00000001), #endif -GEN_XX2FORM(xststdcdp, 0x14, 0x16, PPC2_ISA300), -GEN_XX2FORM(xststdcsp, 0x14, 0x12, PPC2_ISA300), -GEN_VSX_XFORM_300(xststdcqp, 0x04, 0x16, 0x00000001), - GEN_XX3FORM(xviexpsp, 0x00, 0x1B, PPC2_ISA300), GEN_XX3FORM(xviexpdp, 0x00, 0x1F, PPC2_ISA300), GEN_XX2FORM_EO(xvxexpdp, 0x16, 0x1D, 0x00, PPC2_ISA300), GEN_XX2FORM_EO(xvxsigdp, 0x16, 0x1D, 0x01, PPC2_ISA300), GEN_XX2FORM_EO(xvxexpsp, 0x16, 0x1D, 0x08, PPC2_ISA300), -/* DCMX = bit[25] << 6 | bit[29] << 5 | bit[11:15] */ -#define GEN_XX2FORM_DCMX(name, opc2, opc3, fl2) \ -GEN_XX3FORM(name, opc2, opc3 | 0, fl2), \ -GEN_XX3FORM(name, opc2, opc3 | 1, fl2) - -GEN_XX2FORM_DCMX(xvtstdcdp, 0x14, 0x1E, PPC2_ISA300), -GEN_XX2FORM_DCMX(xvtstdcsp, 0x14, 0x1A, PPC2_ISA300), - -GEN_XX2FORM(xvabsdp, 0x12, 0x1D, PPC2_VSX), -GEN_XX2FORM(xvnabsdp, 0x12, 0x1E, PPC2_VSX), -GEN_XX2FORM(xvnegdp, 0x12, 0x1F, PPC2_VSX), -GEN_XX3FORM(xvcpsgndp, 0x00, 0x1E, PPC2_VSX), -GEN_XX2FORM(xvabssp, 0x12, 0x19, PPC2_VSX), -GEN_XX2FORM(xvnabssp, 0x12, 0x1A, PPC2_VSX), -GEN_XX2FORM(xvnegsp, 0x12, 0x1B, PPC2_VSX), -GEN_XX3FORM(xvcpsgnsp, 0x00, 0x1A, PPC2_VSX), - GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX), GEN_VSX_XFORM_300(xsaddqp, 0x04, 0x00, 0x0), GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX), diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc index 3526ba3e3b..b69c1a111c 100644 --- a/target/s390x/tcg/translate_vx.c.inc +++ b/target/s390x/tcg/translate_vx.c.inc @@ -2723,7 +2723,7 @@ static DisasJumpType op_vfene(DisasContext *s, DisasOps *o) static DisasJumpType op_vistr(DisasContext *s, DisasOps *o) { - const uint8_t es = get_field(s, m4); + const uint8_t es = get_field(s, m3); const uint8_t m5 = get_field(s, m5); static gen_helper_gvec_2 * const g[3] = { gen_helper_gvec_vistr8, diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker index 9b7541261a..094f66f4eb 100644 --- a/tests/docker/dockerfiles/alpine.docker +++ b/tests/docker/dockerfiles/alpine.docker @@ -94,6 +94,7 @@ RUN apk update && \ sdl2_image-dev \ sed \ snappy-dev \ + sndio-dev \ sparse \ spice-dev \ spice-protocol \ @@ -119,8 +120,8 @@ RUN apk update && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/g++ && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/gcc +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker index d89113c0df..1f70d41aeb 100644 --- a/tests/docker/dockerfiles/centos8.docker +++ b/tests/docker/dockerfiles/centos8.docker @@ -130,8 +130,8 @@ RUN dnf distro-sync -y && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/g++ && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/gcc +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" diff --git a/tests/docker/dockerfiles/debian-amd64-cross.docker b/tests/docker/dockerfiles/debian-amd64-cross.docker index 9047759e76..5e57309361 100644 --- a/tests/docker/dockerfiles/debian-amd64-cross.docker +++ b/tests/docker/dockerfiles/debian-amd64-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture amd64 && \ @@ -74,76 +75,76 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-x86-64-linux-gnu \ - gcc-x86-64-linux-gnu \ - libaio-dev:amd64 \ - libasan5:amd64 \ - libasound2-dev:amd64 \ - libattr1-dev:amd64 \ - libbpf-dev:amd64 \ - libbrlapi-dev:amd64 \ - libbz2-dev:amd64 \ - libc6-dev:amd64 \ - libcacard-dev:amd64 \ - libcap-ng-dev:amd64 \ - libcapstone-dev:amd64 \ - libcmocka-dev:amd64 \ - libcurl4-gnutls-dev:amd64 \ - libdaxctl-dev:amd64 \ - libdrm-dev:amd64 \ - libepoxy-dev:amd64 \ - libfdt-dev:amd64 \ - libffi-dev:amd64 \ - libfuse3-dev:amd64 \ - libgbm-dev:amd64 \ - libgcrypt20-dev:amd64 \ - libglib2.0-dev:amd64 \ - libglusterfs-dev:amd64 \ - libgnutls28-dev:amd64 \ - libgtk-3-dev:amd64 \ - libibumad-dev:amd64 \ - libibverbs-dev:amd64 \ - libiscsi-dev:amd64 \ - libjemalloc-dev:amd64 \ - libjpeg62-turbo-dev:amd64 \ - libjson-c-dev:amd64 \ - liblttng-ust-dev:amd64 \ - liblzo2-dev:amd64 \ - libncursesw5-dev:amd64 \ - libnfs-dev:amd64 \ - libnuma-dev:amd64 \ - libpam0g-dev:amd64 \ - libpixman-1-dev:amd64 \ - libpmem-dev:amd64 \ - libpng-dev:amd64 \ - libpulse-dev:amd64 \ - librbd-dev:amd64 \ - librdmacm-dev:amd64 \ - libsasl2-dev:amd64 \ - libsdl2-dev:amd64 \ - libsdl2-image-dev:amd64 \ - libseccomp-dev:amd64 \ - libselinux1-dev:amd64 \ - libslirp-dev:amd64 \ - libsnappy-dev:amd64 \ - libspice-server-dev:amd64 \ - libssh-gcrypt-dev:amd64 \ - libsystemd-dev:amd64 \ - libtasn1-6-dev:amd64 \ - libubsan1:amd64 \ - libudev-dev:amd64 \ - liburing-dev:amd64 \ - libusb-1.0-0-dev:amd64 \ - libusbredirhost-dev:amd64 \ - libvdeplug-dev:amd64 \ - libvirglrenderer-dev:amd64 \ - libvte-2.91-dev:amd64 \ - libxen-dev:amd64 \ - libzstd-dev:amd64 \ - nettle-dev:amd64 \ - systemtap-sdt-dev:amd64 \ - xfslibs-dev:amd64 \ - zlib1g-dev:amd64 && \ + g++-x86-64-linux-gnu \ + gcc-x86-64-linux-gnu \ + libaio-dev:amd64 \ + libasan5:amd64 \ + libasound2-dev:amd64 \ + libattr1-dev:amd64 \ + libbpf-dev:amd64 \ + libbrlapi-dev:amd64 \ + libbz2-dev:amd64 \ + libc6-dev:amd64 \ + libcacard-dev:amd64 \ + libcap-ng-dev:amd64 \ + libcapstone-dev:amd64 \ + libcmocka-dev:amd64 \ + libcurl4-gnutls-dev:amd64 \ + libdaxctl-dev:amd64 \ + libdrm-dev:amd64 \ + libepoxy-dev:amd64 \ + libfdt-dev:amd64 \ + libffi-dev:amd64 \ + libfuse3-dev:amd64 \ + libgbm-dev:amd64 \ + libgcrypt20-dev:amd64 \ + libglib2.0-dev:amd64 \ + libglusterfs-dev:amd64 \ + libgnutls28-dev:amd64 \ + libgtk-3-dev:amd64 \ + libibumad-dev:amd64 \ + libibverbs-dev:amd64 \ + libiscsi-dev:amd64 \ + libjemalloc-dev:amd64 \ + libjpeg62-turbo-dev:amd64 \ + libjson-c-dev:amd64 \ + liblttng-ust-dev:amd64 \ + liblzo2-dev:amd64 \ + libncursesw5-dev:amd64 \ + libnfs-dev:amd64 \ + libnuma-dev:amd64 \ + libpam0g-dev:amd64 \ + libpixman-1-dev:amd64 \ + libpmem-dev:amd64 \ + libpng-dev:amd64 \ + libpulse-dev:amd64 \ + librbd-dev:amd64 \ + librdmacm-dev:amd64 \ + libsasl2-dev:amd64 \ + libsdl2-dev:amd64 \ + libsdl2-image-dev:amd64 \ + libseccomp-dev:amd64 \ + libselinux1-dev:amd64 \ + libslirp-dev:amd64 \ + libsnappy-dev:amd64 \ + libspice-server-dev:amd64 \ + libssh-gcrypt-dev:amd64 \ + libsystemd-dev:amd64 \ + libtasn1-6-dev:amd64 \ + libubsan1:amd64 \ + libudev-dev:amd64 \ + liburing-dev:amd64 \ + libusb-1.0-0-dev:amd64 \ + libusbredirhost-dev:amd64 \ + libvdeplug-dev:amd64 \ + libvirglrenderer-dev:amd64 \ + libvte-2.91-dev:amd64 \ + libxen-dev:amd64 \ + libzstd-dev:amd64 \ + nettle-dev:amd64 \ + systemtap-sdt-dev:amd64 \ + xfslibs-dev:amd64 \ + zlib1g-dev:amd64 && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/debian-amd64.docker b/tests/docker/dockerfiles/debian-amd64.docker index a8b728ca64..bfeab01ee3 100644 --- a/tests/docker/dockerfiles/debian-amd64.docker +++ b/tests/docker/dockerfiles/debian-amd64.docker @@ -11,123 +11,124 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - clang \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - g++ \ - gcc \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libaio-dev \ - libasan5 \ - libasound2-dev \ - libattr1-dev \ - libbpf-dev \ - libbrlapi-dev \ - libbz2-dev \ - libc6-dev \ - libcacard-dev \ - libcap-ng-dev \ - libcapstone-dev \ - libcmocka-dev \ - libcurl4-gnutls-dev \ - libdaxctl-dev \ - libdrm-dev \ - libepoxy-dev \ - libfdt-dev \ - libffi-dev \ - libfuse3-dev \ - libgbm-dev \ - libgcrypt20-dev \ - libglib2.0-dev \ - libglusterfs-dev \ - libgnutls28-dev \ - libgtk-3-dev \ - libibumad-dev \ - libibverbs-dev \ - libiscsi-dev \ - libjemalloc-dev \ - libjpeg62-turbo-dev \ - libjson-c-dev \ - liblttng-ust-dev \ - liblzo2-dev \ - libncursesw5-dev \ - libnfs-dev \ - libnuma-dev \ - libpam0g-dev \ - libpcre2-dev \ - libpixman-1-dev \ - libpmem-dev \ - libpng-dev \ - libpulse-dev \ - librbd-dev \ - librdmacm-dev \ - libsasl2-dev \ - libsdl2-dev \ - libsdl2-image-dev \ - libseccomp-dev \ - libselinux1-dev \ - libslirp-dev \ - libsnappy-dev \ - libspice-protocol-dev \ - libspice-server-dev \ - libssh-gcrypt-dev \ - libsystemd-dev \ - libtasn1-6-dev \ - libubsan1 \ - libudev-dev \ - liburing-dev \ - libusb-1.0-0-dev \ - libusbredirhost-dev \ - libvdeplug-dev \ - libvirglrenderer-dev \ - libvte-2.91-dev \ - libxen-dev \ - libzstd-dev \ - llvm \ - locales \ - make \ - meson \ - multipath-tools \ - ncat \ - nettle-dev \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - systemtap-sdt-dev \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo \ - xfslibs-dev \ - zlib1g-dev && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + clang \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + g++ \ + gcc \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libaio-dev \ + libasan5 \ + libasound2-dev \ + libattr1-dev \ + libbpf-dev \ + libbrlapi-dev \ + libbz2-dev \ + libc6-dev \ + libcacard-dev \ + libcap-ng-dev \ + libcapstone-dev \ + libcmocka-dev \ + libcurl4-gnutls-dev \ + libdaxctl-dev \ + libdrm-dev \ + libepoxy-dev \ + libfdt-dev \ + libffi-dev \ + libfuse3-dev \ + libgbm-dev \ + libgcrypt20-dev \ + libglib2.0-dev \ + libglusterfs-dev \ + libgnutls28-dev \ + libgtk-3-dev \ + libibumad-dev \ + libibverbs-dev \ + libiscsi-dev \ + libjemalloc-dev \ + libjpeg62-turbo-dev \ + libjson-c-dev \ + liblttng-ust-dev \ + liblzo2-dev \ + libncursesw5-dev \ + libnfs-dev \ + libnuma-dev \ + libpam0g-dev \ + libpcre2-dev \ + libpixman-1-dev \ + libpmem-dev \ + libpng-dev \ + libpulse-dev \ + librbd-dev \ + librdmacm-dev \ + libsasl2-dev \ + libsdl2-dev \ + libsdl2-image-dev \ + libseccomp-dev \ + libselinux1-dev \ + libslirp-dev \ + libsnappy-dev \ + libsndio-dev \ + libspice-protocol-dev \ + libspice-server-dev \ + libssh-gcrypt-dev \ + libsystemd-dev \ + libtasn1-6-dev \ + libubsan1 \ + libudev-dev \ + liburing-dev \ + libusb-1.0-0-dev \ + libusbredirhost-dev \ + libvdeplug-dev \ + libvirglrenderer-dev \ + libvte-2.91-dev \ + libxen-dev \ + libzstd-dev \ + llvm \ + locales \ + make \ + meson \ + multipath-tools \ + ncat \ + nettle-dev \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + systemtap-sdt-dev \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo \ + xfslibs-dev \ + zlib1g-dev && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ @@ -140,11 +141,11 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/g++ && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/gcc +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" # netmap/cscope/global RUN DEBIAN_FRONTEND=noninteractive eatmydata \ apt install -y --no-install-recommends \ diff --git a/tests/docker/dockerfiles/debian-arm64-cross.docker b/tests/docker/dockerfiles/debian-arm64-cross.docker index 17a5709245..98885bd0ee 100644 --- a/tests/docker/dockerfiles/debian-arm64-cross.docker +++ b/tests/docker/dockerfiles/debian-arm64-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture arm64 && \ @@ -74,75 +75,75 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-aarch64-linux-gnu \ - gcc-aarch64-linux-gnu \ - libaio-dev:arm64 \ - libasan5:arm64 \ - libasound2-dev:arm64 \ - libattr1-dev:arm64 \ - libbpf-dev:arm64 \ - libbrlapi-dev:arm64 \ - libbz2-dev:arm64 \ - libc6-dev:arm64 \ - libcacard-dev:arm64 \ - libcap-ng-dev:arm64 \ - libcapstone-dev:arm64 \ - libcmocka-dev:arm64 \ - libcurl4-gnutls-dev:arm64 \ - libdaxctl-dev:arm64 \ - libdrm-dev:arm64 \ - libepoxy-dev:arm64 \ - libfdt-dev:arm64 \ - libffi-dev:arm64 \ - libfuse3-dev:arm64 \ - libgbm-dev:arm64 \ - libgcrypt20-dev:arm64 \ - libglib2.0-dev:arm64 \ - libglusterfs-dev:arm64 \ - libgnutls28-dev:arm64 \ - libgtk-3-dev:arm64 \ - libibumad-dev:arm64 \ - libibverbs-dev:arm64 \ - libiscsi-dev:arm64 \ - libjemalloc-dev:arm64 \ - libjpeg62-turbo-dev:arm64 \ - libjson-c-dev:arm64 \ - liblttng-ust-dev:arm64 \ - liblzo2-dev:arm64 \ - libncursesw5-dev:arm64 \ - libnfs-dev:arm64 \ - libnuma-dev:arm64 \ - libpam0g-dev:arm64 \ - libpixman-1-dev:arm64 \ - libpng-dev:arm64 \ - libpulse-dev:arm64 \ - librbd-dev:arm64 \ - librdmacm-dev:arm64 \ - libsasl2-dev:arm64 \ - libsdl2-dev:arm64 \ - libsdl2-image-dev:arm64 \ - libseccomp-dev:arm64 \ - libselinux1-dev:arm64 \ - libslirp-dev:arm64 \ - libsnappy-dev:arm64 \ - libspice-server-dev:arm64 \ - libssh-gcrypt-dev:arm64 \ - libsystemd-dev:arm64 \ - libtasn1-6-dev:arm64 \ - libubsan1:arm64 \ - libudev-dev:arm64 \ - liburing-dev:arm64 \ - libusb-1.0-0-dev:arm64 \ - libusbredirhost-dev:arm64 \ - libvdeplug-dev:arm64 \ - libvirglrenderer-dev:arm64 \ - libvte-2.91-dev:arm64 \ - libxen-dev:arm64 \ - libzstd-dev:arm64 \ - nettle-dev:arm64 \ - systemtap-sdt-dev:arm64 \ - xfslibs-dev:arm64 \ - zlib1g-dev:arm64 && \ + g++-aarch64-linux-gnu \ + gcc-aarch64-linux-gnu \ + libaio-dev:arm64 \ + libasan5:arm64 \ + libasound2-dev:arm64 \ + libattr1-dev:arm64 \ + libbpf-dev:arm64 \ + libbrlapi-dev:arm64 \ + libbz2-dev:arm64 \ + libc6-dev:arm64 \ + libcacard-dev:arm64 \ + libcap-ng-dev:arm64 \ + libcapstone-dev:arm64 \ + libcmocka-dev:arm64 \ + libcurl4-gnutls-dev:arm64 \ + libdaxctl-dev:arm64 \ + libdrm-dev:arm64 \ + libepoxy-dev:arm64 \ + libfdt-dev:arm64 \ + libffi-dev:arm64 \ + libfuse3-dev:arm64 \ + libgbm-dev:arm64 \ + libgcrypt20-dev:arm64 \ + libglib2.0-dev:arm64 \ + libglusterfs-dev:arm64 \ + libgnutls28-dev:arm64 \ + libgtk-3-dev:arm64 \ + libibumad-dev:arm64 \ + libibverbs-dev:arm64 \ + libiscsi-dev:arm64 \ + libjemalloc-dev:arm64 \ + libjpeg62-turbo-dev:arm64 \ + libjson-c-dev:arm64 \ + liblttng-ust-dev:arm64 \ + liblzo2-dev:arm64 \ + libncursesw5-dev:arm64 \ + libnfs-dev:arm64 \ + libnuma-dev:arm64 \ + libpam0g-dev:arm64 \ + libpixman-1-dev:arm64 \ + libpng-dev:arm64 \ + libpulse-dev:arm64 \ + librbd-dev:arm64 \ + librdmacm-dev:arm64 \ + libsasl2-dev:arm64 \ + libsdl2-dev:arm64 \ + libsdl2-image-dev:arm64 \ + libseccomp-dev:arm64 \ + libselinux1-dev:arm64 \ + libslirp-dev:arm64 \ + libsnappy-dev:arm64 \ + libspice-server-dev:arm64 \ + libssh-gcrypt-dev:arm64 \ + libsystemd-dev:arm64 \ + libtasn1-6-dev:arm64 \ + libubsan1:arm64 \ + libudev-dev:arm64 \ + liburing-dev:arm64 \ + libusb-1.0-0-dev:arm64 \ + libusbredirhost-dev:arm64 \ + libvdeplug-dev:arm64 \ + libvirglrenderer-dev:arm64 \ + libvte-2.91-dev:arm64 \ + libxen-dev:arm64 \ + libzstd-dev:arm64 \ + nettle-dev:arm64 \ + systemtap-sdt-dev:arm64 \ + xfslibs-dev:arm64 \ + zlib1g-dev:arm64 && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/debian-armel-cross.docker b/tests/docker/dockerfiles/debian-armel-cross.docker index 701fc70db0..d5c08714e4 100644 --- a/tests/docker/dockerfiles/debian-armel-cross.docker +++ b/tests/docker/dockerfiles/debian-armel-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture armel && \ @@ -74,74 +75,74 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-arm-linux-gnueabi \ - gcc-arm-linux-gnueabi \ - libaio-dev:armel \ - libasan5:armel \ - libasound2-dev:armel \ - libattr1-dev:armel \ - libbpf-dev:armel \ - libbrlapi-dev:armel \ - libbz2-dev:armel \ - libc6-dev:armel \ - libcacard-dev:armel \ - libcap-ng-dev:armel \ - libcapstone-dev:armel \ - libcmocka-dev:armel \ - libcurl4-gnutls-dev:armel \ - libdaxctl-dev:armel \ - libdrm-dev:armel \ - libepoxy-dev:armel \ - libfdt-dev:armel \ - libffi-dev:armel \ - libfuse3-dev:armel \ - libgbm-dev:armel \ - libgcrypt20-dev:armel \ - libglib2.0-dev:armel \ - libglusterfs-dev:armel \ - libgnutls28-dev:armel \ - libgtk-3-dev:armel \ - libibumad-dev:armel \ - libibverbs-dev:armel \ - libiscsi-dev:armel \ - libjemalloc-dev:armel \ - libjpeg62-turbo-dev:armel \ - libjson-c-dev:armel \ - liblttng-ust-dev:armel \ - liblzo2-dev:armel \ - libncursesw5-dev:armel \ - libnfs-dev:armel \ - libnuma-dev:armel \ - libpam0g-dev:armel \ - libpixman-1-dev:armel \ - libpng-dev:armel \ - libpulse-dev:armel \ - librbd-dev:armel \ - librdmacm-dev:armel \ - libsasl2-dev:armel \ - libsdl2-dev:armel \ - libsdl2-image-dev:armel \ - libseccomp-dev:armel \ - libselinux1-dev:armel \ - libslirp-dev:armel \ - libsnappy-dev:armel \ - libspice-server-dev:armel \ - libssh-gcrypt-dev:armel \ - libsystemd-dev:armel \ - libtasn1-6-dev:armel \ - libubsan1:armel \ - libudev-dev:armel \ - liburing-dev:armel \ - libusb-1.0-0-dev:armel \ - libusbredirhost-dev:armel \ - libvdeplug-dev:armel \ - libvirglrenderer-dev:armel \ - libvte-2.91-dev:armel \ - libzstd-dev:armel \ - nettle-dev:armel \ - systemtap-sdt-dev:armel \ - xfslibs-dev:armel \ - zlib1g-dev:armel && \ + g++-arm-linux-gnueabi \ + gcc-arm-linux-gnueabi \ + libaio-dev:armel \ + libasan5:armel \ + libasound2-dev:armel \ + libattr1-dev:armel \ + libbpf-dev:armel \ + libbrlapi-dev:armel \ + libbz2-dev:armel \ + libc6-dev:armel \ + libcacard-dev:armel \ + libcap-ng-dev:armel \ + libcapstone-dev:armel \ + libcmocka-dev:armel \ + libcurl4-gnutls-dev:armel \ + libdaxctl-dev:armel \ + libdrm-dev:armel \ + libepoxy-dev:armel \ + libfdt-dev:armel \ + libffi-dev:armel \ + libfuse3-dev:armel \ + libgbm-dev:armel \ + libgcrypt20-dev:armel \ + libglib2.0-dev:armel \ + libglusterfs-dev:armel \ + libgnutls28-dev:armel \ + libgtk-3-dev:armel \ + libibumad-dev:armel \ + libibverbs-dev:armel \ + libiscsi-dev:armel \ + libjemalloc-dev:armel \ + libjpeg62-turbo-dev:armel \ + libjson-c-dev:armel \ + liblttng-ust-dev:armel \ + liblzo2-dev:armel \ + libncursesw5-dev:armel \ + libnfs-dev:armel \ + libnuma-dev:armel \ + libpam0g-dev:armel \ + libpixman-1-dev:armel \ + libpng-dev:armel \ + libpulse-dev:armel \ + librbd-dev:armel \ + librdmacm-dev:armel \ + libsasl2-dev:armel \ + libsdl2-dev:armel \ + libsdl2-image-dev:armel \ + libseccomp-dev:armel \ + libselinux1-dev:armel \ + libslirp-dev:armel \ + libsnappy-dev:armel \ + libspice-server-dev:armel \ + libssh-gcrypt-dev:armel \ + libsystemd-dev:armel \ + libtasn1-6-dev:armel \ + libubsan1:armel \ + libudev-dev:armel \ + liburing-dev:armel \ + libusb-1.0-0-dev:armel \ + libusbredirhost-dev:armel \ + libvdeplug-dev:armel \ + libvirglrenderer-dev:armel \ + libvte-2.91-dev:armel \ + libzstd-dev:armel \ + nettle-dev:armel \ + systemtap-sdt-dev:armel \ + xfslibs-dev:armel \ + zlib1g-dev:armel && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/debian-armhf-cross.docker b/tests/docker/dockerfiles/debian-armhf-cross.docker index 5a11fe3900..471444fcf4 100644 --- a/tests/docker/dockerfiles/debian-armhf-cross.docker +++ b/tests/docker/dockerfiles/debian-armhf-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture armhf && \ @@ -74,75 +75,75 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-arm-linux-gnueabihf \ - gcc-arm-linux-gnueabihf \ - libaio-dev:armhf \ - libasan5:armhf \ - libasound2-dev:armhf \ - libattr1-dev:armhf \ - libbpf-dev:armhf \ - libbrlapi-dev:armhf \ - libbz2-dev:armhf \ - libc6-dev:armhf \ - libcacard-dev:armhf \ - libcap-ng-dev:armhf \ - libcapstone-dev:armhf \ - libcmocka-dev:armhf \ - libcurl4-gnutls-dev:armhf \ - libdaxctl-dev:armhf \ - libdrm-dev:armhf \ - libepoxy-dev:armhf \ - libfdt-dev:armhf \ - libffi-dev:armhf \ - libfuse3-dev:armhf \ - libgbm-dev:armhf \ - libgcrypt20-dev:armhf \ - libglib2.0-dev:armhf \ - libglusterfs-dev:armhf \ - libgnutls28-dev:armhf \ - libgtk-3-dev:armhf \ - libibumad-dev:armhf \ - libibverbs-dev:armhf \ - libiscsi-dev:armhf \ - libjemalloc-dev:armhf \ - libjpeg62-turbo-dev:armhf \ - libjson-c-dev:armhf \ - liblttng-ust-dev:armhf \ - liblzo2-dev:armhf \ - libncursesw5-dev:armhf \ - libnfs-dev:armhf \ - libnuma-dev:armhf \ - libpam0g-dev:armhf \ - libpixman-1-dev:armhf \ - libpng-dev:armhf \ - libpulse-dev:armhf \ - librbd-dev:armhf \ - librdmacm-dev:armhf \ - libsasl2-dev:armhf \ - libsdl2-dev:armhf \ - libsdl2-image-dev:armhf \ - libseccomp-dev:armhf \ - libselinux1-dev:armhf \ - libslirp-dev:armhf \ - libsnappy-dev:armhf \ - libspice-server-dev:armhf \ - libssh-gcrypt-dev:armhf \ - libsystemd-dev:armhf \ - libtasn1-6-dev:armhf \ - libubsan1:armhf \ - libudev-dev:armhf \ - liburing-dev:armhf \ - libusb-1.0-0-dev:armhf \ - libusbredirhost-dev:armhf \ - libvdeplug-dev:armhf \ - libvirglrenderer-dev:armhf \ - libvte-2.91-dev:armhf \ - libxen-dev:armhf \ - libzstd-dev:armhf \ - nettle-dev:armhf \ - systemtap-sdt-dev:armhf \ - xfslibs-dev:armhf \ - zlib1g-dev:armhf && \ + g++-arm-linux-gnueabihf \ + gcc-arm-linux-gnueabihf \ + libaio-dev:armhf \ + libasan5:armhf \ + libasound2-dev:armhf \ + libattr1-dev:armhf \ + libbpf-dev:armhf \ + libbrlapi-dev:armhf \ + libbz2-dev:armhf \ + libc6-dev:armhf \ + libcacard-dev:armhf \ + libcap-ng-dev:armhf \ + libcapstone-dev:armhf \ + libcmocka-dev:armhf \ + libcurl4-gnutls-dev:armhf \ + libdaxctl-dev:armhf \ + libdrm-dev:armhf \ + libepoxy-dev:armhf \ + libfdt-dev:armhf \ + libffi-dev:armhf \ + libfuse3-dev:armhf \ + libgbm-dev:armhf \ + libgcrypt20-dev:armhf \ + libglib2.0-dev:armhf \ + libglusterfs-dev:armhf \ + libgnutls28-dev:armhf \ + libgtk-3-dev:armhf \ + libibumad-dev:armhf \ + libibverbs-dev:armhf \ + libiscsi-dev:armhf \ + libjemalloc-dev:armhf \ + libjpeg62-turbo-dev:armhf \ + libjson-c-dev:armhf \ + liblttng-ust-dev:armhf \ + liblzo2-dev:armhf \ + libncursesw5-dev:armhf \ + libnfs-dev:armhf \ + libnuma-dev:armhf \ + libpam0g-dev:armhf \ + libpixman-1-dev:armhf \ + libpng-dev:armhf \ + libpulse-dev:armhf \ + librbd-dev:armhf \ + librdmacm-dev:armhf \ + libsasl2-dev:armhf \ + libsdl2-dev:armhf \ + libsdl2-image-dev:armhf \ + libseccomp-dev:armhf \ + libselinux1-dev:armhf \ + libslirp-dev:armhf \ + libsnappy-dev:armhf \ + libspice-server-dev:armhf \ + libssh-gcrypt-dev:armhf \ + libsystemd-dev:armhf \ + libtasn1-6-dev:armhf \ + libubsan1:armhf \ + libudev-dev:armhf \ + liburing-dev:armhf \ + libusb-1.0-0-dev:armhf \ + libusbredirhost-dev:armhf \ + libvdeplug-dev:armhf \ + libvirglrenderer-dev:armhf \ + libvte-2.91-dev:armhf \ + libxen-dev:armhf \ + libzstd-dev:armhf \ + nettle-dev:armhf \ + systemtap-sdt-dev:armhf \ + xfslibs-dev:armhf \ + zlib1g-dev:armhf && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/debian-mips64el-cross.docker b/tests/docker/dockerfiles/debian-mips64el-cross.docker index 9b90a4d6ff..15b0224b76 100644 --- a/tests/docker/dockerfiles/debian-mips64el-cross.docker +++ b/tests/docker/dockerfiles/debian-mips64el-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture mips64el && \ @@ -74,72 +75,72 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-mips64el-linux-gnuabi64 \ - gcc-mips64el-linux-gnuabi64 \ - libaio-dev:mips64el \ - libasound2-dev:mips64el \ - libattr1-dev:mips64el \ - libbpf-dev:mips64el \ - libbrlapi-dev:mips64el \ - libbz2-dev:mips64el \ - libc6-dev:mips64el \ - libcacard-dev:mips64el \ - libcap-ng-dev:mips64el \ - libcapstone-dev:mips64el \ - libcmocka-dev:mips64el \ - libcurl4-gnutls-dev:mips64el \ - libdaxctl-dev:mips64el \ - libdrm-dev:mips64el \ - libepoxy-dev:mips64el \ - libfdt-dev:mips64el \ - libffi-dev:mips64el \ - libfuse3-dev:mips64el \ - libgbm-dev:mips64el \ - libgcrypt20-dev:mips64el \ - libglib2.0-dev:mips64el \ - libglusterfs-dev:mips64el \ - libgnutls28-dev:mips64el \ - libgtk-3-dev:mips64el \ - libibumad-dev:mips64el \ - libibverbs-dev:mips64el \ - libiscsi-dev:mips64el \ - libjemalloc-dev:mips64el \ - libjpeg62-turbo-dev:mips64el \ - libjson-c-dev:mips64el \ - liblttng-ust-dev:mips64el \ - liblzo2-dev:mips64el \ - libncursesw5-dev:mips64el \ - libnfs-dev:mips64el \ - libnuma-dev:mips64el \ - libpam0g-dev:mips64el \ - libpixman-1-dev:mips64el \ - libpng-dev:mips64el \ - libpulse-dev:mips64el \ - librbd-dev:mips64el \ - librdmacm-dev:mips64el \ - libsasl2-dev:mips64el \ - libsdl2-dev:mips64el \ - libsdl2-image-dev:mips64el \ - libseccomp-dev:mips64el \ - libselinux1-dev:mips64el \ - libslirp-dev:mips64el \ - libsnappy-dev:mips64el \ - libspice-server-dev:mips64el \ - libssh-gcrypt-dev:mips64el \ - libsystemd-dev:mips64el \ - libtasn1-6-dev:mips64el \ - libudev-dev:mips64el \ - liburing-dev:mips64el \ - libusb-1.0-0-dev:mips64el \ - libusbredirhost-dev:mips64el \ - libvdeplug-dev:mips64el \ - libvirglrenderer-dev:mips64el \ - libvte-2.91-dev:mips64el \ - libzstd-dev:mips64el \ - nettle-dev:mips64el \ - systemtap-sdt-dev:mips64el \ - xfslibs-dev:mips64el \ - zlib1g-dev:mips64el && \ + g++-mips64el-linux-gnuabi64 \ + gcc-mips64el-linux-gnuabi64 \ + libaio-dev:mips64el \ + libasound2-dev:mips64el \ + libattr1-dev:mips64el \ + libbpf-dev:mips64el \ + libbrlapi-dev:mips64el \ + libbz2-dev:mips64el \ + libc6-dev:mips64el \ + libcacard-dev:mips64el \ + libcap-ng-dev:mips64el \ + libcapstone-dev:mips64el \ + libcmocka-dev:mips64el \ + libcurl4-gnutls-dev:mips64el \ + libdaxctl-dev:mips64el \ + libdrm-dev:mips64el \ + libepoxy-dev:mips64el \ + libfdt-dev:mips64el \ + libffi-dev:mips64el \ + libfuse3-dev:mips64el \ + libgbm-dev:mips64el \ + libgcrypt20-dev:mips64el \ + libglib2.0-dev:mips64el \ + libglusterfs-dev:mips64el \ + libgnutls28-dev:mips64el \ + libgtk-3-dev:mips64el \ + libibumad-dev:mips64el \ + libibverbs-dev:mips64el \ + libiscsi-dev:mips64el \ + libjemalloc-dev:mips64el \ + libjpeg62-turbo-dev:mips64el \ + libjson-c-dev:mips64el \ + liblttng-ust-dev:mips64el \ + liblzo2-dev:mips64el \ + libncursesw5-dev:mips64el \ + libnfs-dev:mips64el \ + libnuma-dev:mips64el \ + libpam0g-dev:mips64el \ + libpixman-1-dev:mips64el \ + libpng-dev:mips64el \ + libpulse-dev:mips64el \ + librbd-dev:mips64el \ + librdmacm-dev:mips64el \ + libsasl2-dev:mips64el \ + libsdl2-dev:mips64el \ + libsdl2-image-dev:mips64el \ + libseccomp-dev:mips64el \ + libselinux1-dev:mips64el \ + libslirp-dev:mips64el \ + libsnappy-dev:mips64el \ + libspice-server-dev:mips64el \ + libssh-gcrypt-dev:mips64el \ + libsystemd-dev:mips64el \ + libtasn1-6-dev:mips64el \ + libudev-dev:mips64el \ + liburing-dev:mips64el \ + libusb-1.0-0-dev:mips64el \ + libusbredirhost-dev:mips64el \ + libvdeplug-dev:mips64el \ + libvirglrenderer-dev:mips64el \ + libvte-2.91-dev:mips64el \ + libzstd-dev:mips64el \ + nettle-dev:mips64el \ + systemtap-sdt-dev:mips64el \ + xfslibs-dev:mips64el \ + zlib1g-dev:mips64el && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/debian-mipsel-cross.docker b/tests/docker/dockerfiles/debian-mipsel-cross.docker index 02feaf26cb..a5d3ca6e2f 100644 --- a/tests/docker/dockerfiles/debian-mipsel-cross.docker +++ b/tests/docker/dockerfiles/debian-mipsel-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture mipsel && \ @@ -74,72 +75,72 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-mipsel-linux-gnu \ - gcc-mipsel-linux-gnu \ - libaio-dev:mipsel \ - libasound2-dev:mipsel \ - libattr1-dev:mipsel \ - libbpf-dev:mipsel \ - libbrlapi-dev:mipsel \ - libbz2-dev:mipsel \ - libc6-dev:mipsel \ - libcacard-dev:mipsel \ - libcap-ng-dev:mipsel \ - libcapstone-dev:mipsel \ - libcmocka-dev:mipsel \ - libcurl4-gnutls-dev:mipsel \ - libdaxctl-dev:mipsel \ - libdrm-dev:mipsel \ - libepoxy-dev:mipsel \ - libfdt-dev:mipsel \ - libffi-dev:mipsel \ - libfuse3-dev:mipsel \ - libgbm-dev:mipsel \ - libgcrypt20-dev:mipsel \ - libglib2.0-dev:mipsel \ - libglusterfs-dev:mipsel \ - libgnutls28-dev:mipsel \ - libgtk-3-dev:mipsel \ - libibumad-dev:mipsel \ - libibverbs-dev:mipsel \ - libiscsi-dev:mipsel \ - libjemalloc-dev:mipsel \ - libjpeg62-turbo-dev:mipsel \ - libjson-c-dev:mipsel \ - liblttng-ust-dev:mipsel \ - liblzo2-dev:mipsel \ - libncursesw5-dev:mipsel \ - libnfs-dev:mipsel \ - libnuma-dev:mipsel \ - libpam0g-dev:mipsel \ - libpixman-1-dev:mipsel \ - libpng-dev:mipsel \ - libpulse-dev:mipsel \ - librbd-dev:mipsel \ - librdmacm-dev:mipsel \ - libsasl2-dev:mipsel \ - libsdl2-dev:mipsel \ - libsdl2-image-dev:mipsel \ - libseccomp-dev:mipsel \ - libselinux1-dev:mipsel \ - libslirp-dev:mipsel \ - libsnappy-dev:mipsel \ - libspice-server-dev:mipsel \ - libssh-gcrypt-dev:mipsel \ - libsystemd-dev:mipsel \ - libtasn1-6-dev:mipsel \ - libudev-dev:mipsel \ - liburing-dev:mipsel \ - libusb-1.0-0-dev:mipsel \ - libusbredirhost-dev:mipsel \ - libvdeplug-dev:mipsel \ - libvirglrenderer-dev:mipsel \ - libvte-2.91-dev:mipsel \ - libzstd-dev:mipsel \ - nettle-dev:mipsel \ - systemtap-sdt-dev:mipsel \ - xfslibs-dev:mipsel \ - zlib1g-dev:mipsel && \ + g++-mipsel-linux-gnu \ + gcc-mipsel-linux-gnu \ + libaio-dev:mipsel \ + libasound2-dev:mipsel \ + libattr1-dev:mipsel \ + libbpf-dev:mipsel \ + libbrlapi-dev:mipsel \ + libbz2-dev:mipsel \ + libc6-dev:mipsel \ + libcacard-dev:mipsel \ + libcap-ng-dev:mipsel \ + libcapstone-dev:mipsel \ + libcmocka-dev:mipsel \ + libcurl4-gnutls-dev:mipsel \ + libdaxctl-dev:mipsel \ + libdrm-dev:mipsel \ + libepoxy-dev:mipsel \ + libfdt-dev:mipsel \ + libffi-dev:mipsel \ + libfuse3-dev:mipsel \ + libgbm-dev:mipsel \ + libgcrypt20-dev:mipsel \ + libglib2.0-dev:mipsel \ + libglusterfs-dev:mipsel \ + libgnutls28-dev:mipsel \ + libgtk-3-dev:mipsel \ + libibumad-dev:mipsel \ + libibverbs-dev:mipsel \ + libiscsi-dev:mipsel \ + libjemalloc-dev:mipsel \ + libjpeg62-turbo-dev:mipsel \ + libjson-c-dev:mipsel \ + liblttng-ust-dev:mipsel \ + liblzo2-dev:mipsel \ + libncursesw5-dev:mipsel \ + libnfs-dev:mipsel \ + libnuma-dev:mipsel \ + libpam0g-dev:mipsel \ + libpixman-1-dev:mipsel \ + libpng-dev:mipsel \ + libpulse-dev:mipsel \ + librbd-dev:mipsel \ + librdmacm-dev:mipsel \ + libsasl2-dev:mipsel \ + libsdl2-dev:mipsel \ + libsdl2-image-dev:mipsel \ + libseccomp-dev:mipsel \ + libselinux1-dev:mipsel \ + libslirp-dev:mipsel \ + libsnappy-dev:mipsel \ + libspice-server-dev:mipsel \ + libssh-gcrypt-dev:mipsel \ + libsystemd-dev:mipsel \ + libtasn1-6-dev:mipsel \ + libudev-dev:mipsel \ + liburing-dev:mipsel \ + libusb-1.0-0-dev:mipsel \ + libusbredirhost-dev:mipsel \ + libvdeplug-dev:mipsel \ + libvirglrenderer-dev:mipsel \ + libvte-2.91-dev:mipsel \ + libzstd-dev:mipsel \ + nettle-dev:mipsel \ + systemtap-sdt-dev:mipsel \ + xfslibs-dev:mipsel \ + zlib1g-dev:mipsel && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/debian-ppc64el-cross.docker b/tests/docker/dockerfiles/debian-ppc64el-cross.docker index 97d3872ee2..d2954e61f6 100644 --- a/tests/docker/dockerfiles/debian-ppc64el-cross.docker +++ b/tests/docker/dockerfiles/debian-ppc64el-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture ppc64el && \ @@ -74,74 +75,74 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-powerpc64le-linux-gnu \ - gcc-powerpc64le-linux-gnu \ - libaio-dev:ppc64el \ - libasan5:ppc64el \ - libasound2-dev:ppc64el \ - libattr1-dev:ppc64el \ - libbpf-dev:ppc64el \ - libbrlapi-dev:ppc64el \ - libbz2-dev:ppc64el \ - libc6-dev:ppc64el \ - libcacard-dev:ppc64el \ - libcap-ng-dev:ppc64el \ - libcapstone-dev:ppc64el \ - libcmocka-dev:ppc64el \ - libcurl4-gnutls-dev:ppc64el \ - libdaxctl-dev:ppc64el \ - libdrm-dev:ppc64el \ - libepoxy-dev:ppc64el \ - libfdt-dev:ppc64el \ - libffi-dev:ppc64el \ - libfuse3-dev:ppc64el \ - libgbm-dev:ppc64el \ - libgcrypt20-dev:ppc64el \ - libglib2.0-dev:ppc64el \ - libglusterfs-dev:ppc64el \ - libgnutls28-dev:ppc64el \ - libgtk-3-dev:ppc64el \ - libibumad-dev:ppc64el \ - libibverbs-dev:ppc64el \ - libiscsi-dev:ppc64el \ - libjemalloc-dev:ppc64el \ - libjpeg62-turbo-dev:ppc64el \ - libjson-c-dev:ppc64el \ - liblttng-ust-dev:ppc64el \ - liblzo2-dev:ppc64el \ - libncursesw5-dev:ppc64el \ - libnfs-dev:ppc64el \ - libnuma-dev:ppc64el \ - libpam0g-dev:ppc64el \ - libpixman-1-dev:ppc64el \ - libpng-dev:ppc64el \ - libpulse-dev:ppc64el \ - librbd-dev:ppc64el \ - librdmacm-dev:ppc64el \ - libsasl2-dev:ppc64el \ - libsdl2-dev:ppc64el \ - libsdl2-image-dev:ppc64el \ - libseccomp-dev:ppc64el \ - libselinux1-dev:ppc64el \ - libslirp-dev:ppc64el \ - libsnappy-dev:ppc64el \ - libspice-server-dev:ppc64el \ - libssh-gcrypt-dev:ppc64el \ - libsystemd-dev:ppc64el \ - libtasn1-6-dev:ppc64el \ - libubsan1:ppc64el \ - libudev-dev:ppc64el \ - liburing-dev:ppc64el \ - libusb-1.0-0-dev:ppc64el \ - libusbredirhost-dev:ppc64el \ - libvdeplug-dev:ppc64el \ - libvirglrenderer-dev:ppc64el \ - libvte-2.91-dev:ppc64el \ - libzstd-dev:ppc64el \ - nettle-dev:ppc64el \ - systemtap-sdt-dev:ppc64el \ - xfslibs-dev:ppc64el \ - zlib1g-dev:ppc64el && \ + g++-powerpc64le-linux-gnu \ + gcc-powerpc64le-linux-gnu \ + libaio-dev:ppc64el \ + libasan5:ppc64el \ + libasound2-dev:ppc64el \ + libattr1-dev:ppc64el \ + libbpf-dev:ppc64el \ + libbrlapi-dev:ppc64el \ + libbz2-dev:ppc64el \ + libc6-dev:ppc64el \ + libcacard-dev:ppc64el \ + libcap-ng-dev:ppc64el \ + libcapstone-dev:ppc64el \ + libcmocka-dev:ppc64el \ + libcurl4-gnutls-dev:ppc64el \ + libdaxctl-dev:ppc64el \ + libdrm-dev:ppc64el \ + libepoxy-dev:ppc64el \ + libfdt-dev:ppc64el \ + libffi-dev:ppc64el \ + libfuse3-dev:ppc64el \ + libgbm-dev:ppc64el \ + libgcrypt20-dev:ppc64el \ + libglib2.0-dev:ppc64el \ + libglusterfs-dev:ppc64el \ + libgnutls28-dev:ppc64el \ + libgtk-3-dev:ppc64el \ + libibumad-dev:ppc64el \ + libibverbs-dev:ppc64el \ + libiscsi-dev:ppc64el \ + libjemalloc-dev:ppc64el \ + libjpeg62-turbo-dev:ppc64el \ + libjson-c-dev:ppc64el \ + liblttng-ust-dev:ppc64el \ + liblzo2-dev:ppc64el \ + libncursesw5-dev:ppc64el \ + libnfs-dev:ppc64el \ + libnuma-dev:ppc64el \ + libpam0g-dev:ppc64el \ + libpixman-1-dev:ppc64el \ + libpng-dev:ppc64el \ + libpulse-dev:ppc64el \ + librbd-dev:ppc64el \ + librdmacm-dev:ppc64el \ + libsasl2-dev:ppc64el \ + libsdl2-dev:ppc64el \ + libsdl2-image-dev:ppc64el \ + libseccomp-dev:ppc64el \ + libselinux1-dev:ppc64el \ + libslirp-dev:ppc64el \ + libsnappy-dev:ppc64el \ + libspice-server-dev:ppc64el \ + libssh-gcrypt-dev:ppc64el \ + libsystemd-dev:ppc64el \ + libtasn1-6-dev:ppc64el \ + libubsan1:ppc64el \ + libudev-dev:ppc64el \ + liburing-dev:ppc64el \ + libusb-1.0-0-dev:ppc64el \ + libusbredirhost-dev:ppc64el \ + libvdeplug-dev:ppc64el \ + libvirglrenderer-dev:ppc64el \ + libvte-2.91-dev:ppc64el \ + libzstd-dev:ppc64el \ + nettle-dev:ppc64el \ + systemtap-sdt-dev:ppc64el \ + xfslibs-dev:ppc64el \ + zlib1g-dev:ppc64el && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker index 95585e9e56..d43ce16317 100644 --- a/tests/docker/dockerfiles/debian-s390x-cross.docker +++ b/tests/docker/dockerfiles/debian-s390x-cross.docker @@ -11,62 +11,63 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdextrautils \ - bzip2 \ - ca-certificates \ - ccache \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libglib2.0-dev \ - libpcre2-dev \ - libspice-protocol-dev \ - llvm \ - locales \ - make \ - meson \ - ncat \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo && \ + bash \ + bc \ + bison \ + bsdextrautils \ + bzip2 \ + ca-certificates \ + ccache \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libglib2.0-dev \ + libpcre2-dev \ + libsndio-dev \ + libspice-protocol-dev \ + llvm \ + locales \ + make \ + meson \ + ncat \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ dpkg-reconfigure locales +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" RUN export DEBIAN_FRONTEND=noninteractive && \ dpkg --add-architecture s390x && \ @@ -74,73 +75,73 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y dpkg-dev && \ eatmydata apt-get install --no-install-recommends -y \ - g++-s390x-linux-gnu \ - gcc-s390x-linux-gnu \ - libaio-dev:s390x \ - libasan5:s390x \ - libasound2-dev:s390x \ - libattr1-dev:s390x \ - libbpf-dev:s390x \ - libbrlapi-dev:s390x \ - libbz2-dev:s390x \ - libc6-dev:s390x \ - libcacard-dev:s390x \ - libcap-ng-dev:s390x \ - libcapstone-dev:s390x \ - libcmocka-dev:s390x \ - libcurl4-gnutls-dev:s390x \ - libdaxctl-dev:s390x \ - libdrm-dev:s390x \ - libepoxy-dev:s390x \ - libfdt-dev:s390x \ - libffi-dev:s390x \ - libfuse3-dev:s390x \ - libgbm-dev:s390x \ - libgcrypt20-dev:s390x \ - libglib2.0-dev:s390x \ - libglusterfs-dev:s390x \ - libgnutls28-dev:s390x \ - libgtk-3-dev:s390x \ - libibumad-dev:s390x \ - libibverbs-dev:s390x \ - libiscsi-dev:s390x \ - libjemalloc-dev:s390x \ - libjpeg62-turbo-dev:s390x \ - libjson-c-dev:s390x \ - liblttng-ust-dev:s390x \ - liblzo2-dev:s390x \ - libncursesw5-dev:s390x \ - libnfs-dev:s390x \ - libnuma-dev:s390x \ - libpam0g-dev:s390x \ - libpixman-1-dev:s390x \ - libpng-dev:s390x \ - libpulse-dev:s390x \ - librbd-dev:s390x \ - librdmacm-dev:s390x \ - libsasl2-dev:s390x \ - libsdl2-dev:s390x \ - libsdl2-image-dev:s390x \ - libseccomp-dev:s390x \ - libselinux1-dev:s390x \ - libslirp-dev:s390x \ - libsnappy-dev:s390x \ - libssh-gcrypt-dev:s390x \ - libsystemd-dev:s390x \ - libtasn1-6-dev:s390x \ - libubsan1:s390x \ - libudev-dev:s390x \ - liburing-dev:s390x \ - libusb-1.0-0-dev:s390x \ - libusbredirhost-dev:s390x \ - libvdeplug-dev:s390x \ - libvirglrenderer-dev:s390x \ - libvte-2.91-dev:s390x \ - libzstd-dev:s390x \ - nettle-dev:s390x \ - systemtap-sdt-dev:s390x \ - xfslibs-dev:s390x \ - zlib1g-dev:s390x && \ + g++-s390x-linux-gnu \ + gcc-s390x-linux-gnu \ + libaio-dev:s390x \ + libasan5:s390x \ + libasound2-dev:s390x \ + libattr1-dev:s390x \ + libbpf-dev:s390x \ + libbrlapi-dev:s390x \ + libbz2-dev:s390x \ + libc6-dev:s390x \ + libcacard-dev:s390x \ + libcap-ng-dev:s390x \ + libcapstone-dev:s390x \ + libcmocka-dev:s390x \ + libcurl4-gnutls-dev:s390x \ + libdaxctl-dev:s390x \ + libdrm-dev:s390x \ + libepoxy-dev:s390x \ + libfdt-dev:s390x \ + libffi-dev:s390x \ + libfuse3-dev:s390x \ + libgbm-dev:s390x \ + libgcrypt20-dev:s390x \ + libglib2.0-dev:s390x \ + libglusterfs-dev:s390x \ + libgnutls28-dev:s390x \ + libgtk-3-dev:s390x \ + libibumad-dev:s390x \ + libibverbs-dev:s390x \ + libiscsi-dev:s390x \ + libjemalloc-dev:s390x \ + libjpeg62-turbo-dev:s390x \ + libjson-c-dev:s390x \ + liblttng-ust-dev:s390x \ + liblzo2-dev:s390x \ + libncursesw5-dev:s390x \ + libnfs-dev:s390x \ + libnuma-dev:s390x \ + libpam0g-dev:s390x \ + libpixman-1-dev:s390x \ + libpng-dev:s390x \ + libpulse-dev:s390x \ + librbd-dev:s390x \ + librdmacm-dev:s390x \ + libsasl2-dev:s390x \ + libsdl2-dev:s390x \ + libsdl2-image-dev:s390x \ + libseccomp-dev:s390x \ + libselinux1-dev:s390x \ + libslirp-dev:s390x \ + libsnappy-dev:s390x \ + libssh-gcrypt-dev:s390x \ + libsystemd-dev:s390x \ + libtasn1-6-dev:s390x \ + libubsan1:s390x \ + libudev-dev:s390x \ + liburing-dev:s390x \ + libusb-1.0-0-dev:s390x \ + libusbredirhost-dev:s390x \ + libvdeplug-dev:s390x \ + libvirglrenderer-dev:s390x \ + libvte-2.91-dev:s390x \ + libzstd-dev:s390x \ + nettle-dev:s390x \ + systemtap-sdt-dev:s390x \ + xfslibs-dev:s390x \ + zlib1g-dev:s390x && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ mkdir -p /usr/local/share/meson/cross && \ diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index fe84166ca1..d200c7fc10 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -18,120 +18,120 @@ exec "$@"' > /usr/bin/nosync && \ chmod +x /usr/bin/nosync && \ nosync dnf update -y && \ nosync dnf install -y \ - SDL2-devel \ - SDL2_image-devel \ - alsa-lib-devel \ - bash \ - bc \ - bison \ - brlapi-devel \ - bzip2 \ - bzip2-devel \ - ca-certificates \ - capstone-devel \ - ccache \ - clang \ - ctags \ - cyrus-sasl-devel \ - daxctl-devel \ - dbus-daemon \ - device-mapper-multipath-devel \ - diffutils \ - findutils \ - flex \ - fuse3-devel \ - gcc \ - gcc-c++ \ - gcovr \ - genisoimage \ - gettext \ - git \ - glib2-devel \ - glib2-static \ - glibc-langpack-en \ - glibc-static \ - glusterfs-api-devel \ - gnutls-devel \ - gtk3-devel \ - hostname \ - jemalloc-devel \ - json-c-devel \ - libaio-devel \ - libasan \ - libattr-devel \ - libbpf-devel \ - libcacard-devel \ - libcap-ng-devel \ - libcmocka-devel \ - libcurl-devel \ - libdrm-devel \ - libepoxy-devel \ - libfdt-devel \ - libffi-devel \ - libgcrypt-devel \ - libiscsi-devel \ - libjpeg-devel \ - libnfs-devel \ - libpmem-devel \ - libpng-devel \ - librbd-devel \ - libseccomp-devel \ - libselinux-devel \ - libslirp-devel \ - libssh-devel \ - libtasn1-devel \ - libubsan \ - liburing-devel \ - libusbx-devel \ - libzstd-devel \ - llvm \ - lttng-ust-devel \ - lzo-devel \ - make \ - mesa-libgbm-devel \ - meson \ - ncurses-devel \ - nettle-devel \ - ninja-build \ - nmap-ncat \ - numactl-devel \ - openssh-clients \ - pam-devel \ - pcre-static \ - perl-base \ - pixman-devel \ - pkgconfig \ - pulseaudio-libs-devel \ - python3 \ - python3-PyYAML \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-sphinx \ - python3-sphinx_rtd_theme \ - rdma-core-devel \ - rpm \ - sed \ - snappy-devel \ - sparse \ - spice-protocol \ - spice-server-devel \ - systemd-devel \ - systemtap-sdt-devel \ - tar \ - tesseract \ - tesseract-langpack-eng \ - texinfo \ - usbredir-devel \ - util-linux \ - virglrenderer-devel \ - vte291-devel \ - which \ - xen-devel \ - xfsprogs-devel \ - zlib-devel \ - zlib-static && \ + SDL2-devel \ + SDL2_image-devel \ + alsa-lib-devel \ + bash \ + bc \ + bison \ + brlapi-devel \ + bzip2 \ + bzip2-devel \ + ca-certificates \ + capstone-devel \ + ccache \ + clang \ + ctags \ + cyrus-sasl-devel \ + daxctl-devel \ + dbus-daemon \ + device-mapper-multipath-devel \ + diffutils \ + findutils \ + flex \ + fuse3-devel \ + gcc \ + gcc-c++ \ + gcovr \ + genisoimage \ + gettext \ + git \ + glib2-devel \ + glib2-static \ + glibc-langpack-en \ + glibc-static \ + glusterfs-api-devel \ + gnutls-devel \ + gtk3-devel \ + hostname \ + jemalloc-devel \ + json-c-devel \ + libaio-devel \ + libasan \ + libattr-devel \ + libbpf-devel \ + libcacard-devel \ + libcap-ng-devel \ + libcmocka-devel \ + libcurl-devel \ + libdrm-devel \ + libepoxy-devel \ + libfdt-devel \ + libffi-devel \ + libgcrypt-devel \ + libiscsi-devel \ + libjpeg-devel \ + libnfs-devel \ + libpmem-devel \ + libpng-devel \ + librbd-devel \ + libseccomp-devel \ + libselinux-devel \ + libslirp-devel \ + libssh-devel \ + libtasn1-devel \ + libubsan \ + liburing-devel \ + libusbx-devel \ + libzstd-devel \ + llvm \ + lttng-ust-devel \ + lzo-devel \ + make \ + mesa-libgbm-devel \ + meson \ + ncurses-devel \ + nettle-devel \ + ninja-build \ + nmap-ncat \ + numactl-devel \ + openssh-clients \ + pam-devel \ + pcre-static \ + perl-base \ + pixman-devel \ + pkgconfig \ + pulseaudio-libs-devel \ + python3 \ + python3-PyYAML \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-sphinx \ + python3-sphinx_rtd_theme \ + rdma-core-devel \ + rpm \ + sed \ + snappy-devel \ + sparse \ + spice-protocol \ + spice-server-devel \ + systemd-devel \ + systemtap-sdt-devel \ + tar \ + tesseract \ + tesseract-langpack-eng \ + texinfo \ + usbredir-devel \ + util-linux \ + virglrenderer-devel \ + vte291-devel \ + which \ + xen-devel \ + xfsprogs-devel \ + zlib-devel \ + zlib-static && \ nosync dnf autoremove -y && \ nosync dnf clean all -y && \ rpm -qa | sort > /packages.txt && \ @@ -142,8 +142,8 @@ exec "$@"' > /usr/bin/nosync && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/g++ && \ ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/gcc +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker index d80064756f..4361b01464 100644 --- a/tests/docker/dockerfiles/opensuse-leap.docker +++ b/tests/docker/dockerfiles/opensuse-leap.docker @@ -104,6 +104,7 @@ RUN zypper update -y && \ rpm \ sed \ snappy-devel \ + sndio-devel \ sparse \ spice-protocol-devel \ systemd-devel \ @@ -132,8 +133,8 @@ RUN zypper update -y && \ RUN /usr/bin/pip3 install meson==0.56.0 +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker index 24594afc15..9417bca2fa 100644 --- a/tests/docker/dockerfiles/ubuntu2004.docker +++ b/tests/docker/dockerfiles/ubuntu2004.docker @@ -11,122 +11,123 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get install -y eatmydata && \ eatmydata apt-get dist-upgrade -y && \ eatmydata apt-get install --no-install-recommends -y \ - bash \ - bc \ - bison \ - bsdmainutils \ - bzip2 \ - ca-certificates \ - ccache \ - clang \ - dbus \ - debianutils \ - diffutils \ - exuberant-ctags \ - findutils \ - flex \ - g++ \ - gcc \ - gcovr \ - genisoimage \ - gettext \ - git \ - hostname \ - libaio-dev \ - libasan5 \ - libasound2-dev \ - libattr1-dev \ - libbrlapi-dev \ - libbz2-dev \ - libc6-dev \ - libcacard-dev \ - libcap-ng-dev \ - libcapstone-dev \ - libcmocka-dev \ - libcurl4-gnutls-dev \ - libdaxctl-dev \ - libdrm-dev \ - libepoxy-dev \ - libfdt-dev \ - libffi-dev \ - libfuse3-dev \ - libgbm-dev \ - libgcrypt20-dev \ - libglib2.0-dev \ - libglusterfs-dev \ - libgnutls28-dev \ - libgtk-3-dev \ - libibumad-dev \ - libibverbs-dev \ - libiscsi-dev \ - libjemalloc-dev \ - libjpeg-turbo8-dev \ - libjson-c-dev \ - liblttng-ust-dev \ - liblzo2-dev \ - libncursesw5-dev \ - libnfs-dev \ - libnuma-dev \ - libpam0g-dev \ - libpcre2-dev \ - libpixman-1-dev \ - libpmem-dev \ - libpng-dev \ - libpulse-dev \ - librbd-dev \ - librdmacm-dev \ - libsasl2-dev \ - libsdl2-dev \ - libsdl2-image-dev \ - libseccomp-dev \ - libselinux1-dev \ - libslirp-dev \ - libsnappy-dev \ - libspice-protocol-dev \ - libspice-server-dev \ - libssh-dev \ - libsystemd-dev \ - libtasn1-6-dev \ - libubsan1 \ - libudev-dev \ - libusb-1.0-0-dev \ - libusbredirhost-dev \ - libvdeplug-dev \ - libvirglrenderer-dev \ - libvte-2.91-dev \ - libxen-dev \ - libzstd-dev \ - llvm \ - locales \ - make \ - multipath-tools \ - ncat \ - nettle-dev \ - ninja-build \ - openssh-client \ - perl-base \ - pkgconf \ - python3 \ - python3-numpy \ - python3-opencv \ - python3-pillow \ - python3-pip \ - python3-setuptools \ - python3-sphinx \ - python3-sphinx-rtd-theme \ - python3-venv \ - python3-wheel \ - python3-yaml \ - rpm2cpio \ - sed \ - sparse \ - systemtap-sdt-dev \ - tar \ - tesseract-ocr \ - tesseract-ocr-eng \ - texinfo \ - xfslibs-dev \ - zlib1g-dev && \ + bash \ + bc \ + bison \ + bsdmainutils \ + bzip2 \ + ca-certificates \ + ccache \ + clang \ + dbus \ + debianutils \ + diffutils \ + exuberant-ctags \ + findutils \ + flex \ + g++ \ + gcc \ + gcovr \ + genisoimage \ + gettext \ + git \ + hostname \ + libaio-dev \ + libasan5 \ + libasound2-dev \ + libattr1-dev \ + libbrlapi-dev \ + libbz2-dev \ + libc6-dev \ + libcacard-dev \ + libcap-ng-dev \ + libcapstone-dev \ + libcmocka-dev \ + libcurl4-gnutls-dev \ + libdaxctl-dev \ + libdrm-dev \ + libepoxy-dev \ + libfdt-dev \ + libffi-dev \ + libfuse3-dev \ + libgbm-dev \ + libgcrypt20-dev \ + libglib2.0-dev \ + libglusterfs-dev \ + libgnutls28-dev \ + libgtk-3-dev \ + libibumad-dev \ + libibverbs-dev \ + libiscsi-dev \ + libjemalloc-dev \ + libjpeg-turbo8-dev \ + libjson-c-dev \ + liblttng-ust-dev \ + liblzo2-dev \ + libncursesw5-dev \ + libnfs-dev \ + libnuma-dev \ + libpam0g-dev \ + libpcre2-dev \ + libpixman-1-dev \ + libpmem-dev \ + libpng-dev \ + libpulse-dev \ + librbd-dev \ + librdmacm-dev \ + libsasl2-dev \ + libsdl2-dev \ + libsdl2-image-dev \ + libseccomp-dev \ + libselinux1-dev \ + libslirp-dev \ + libsnappy-dev \ + libsndio-dev \ + libspice-protocol-dev \ + libspice-server-dev \ + libssh-dev \ + libsystemd-dev \ + libtasn1-6-dev \ + libubsan1 \ + libudev-dev \ + libusb-1.0-0-dev \ + libusbredirhost-dev \ + libvdeplug-dev \ + libvirglrenderer-dev \ + libvte-2.91-dev \ + libxen-dev \ + libzstd-dev \ + llvm \ + locales \ + make \ + multipath-tools \ + ncat \ + nettle-dev \ + ninja-build \ + openssh-client \ + perl-base \ + pkgconf \ + python3 \ + python3-numpy \ + python3-opencv \ + python3-pillow \ + python3-pip \ + python3-setuptools \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ + python3-wheel \ + python3-yaml \ + rpm2cpio \ + sed \ + sparse \ + systemtap-sdt-dev \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ + texinfo \ + xfslibs-dev \ + zlib1g-dev && \ eatmydata apt-get autoremove -y && \ eatmydata apt-get autoclean -y && \ sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ @@ -141,11 +142,11 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ RUN /usr/bin/pip3 install meson==0.56.0 +ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" ENV LANG "en_US.UTF-8" ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" -ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" # Apply patch https://reviews.llvm.org/D75820 # This is required for TSan in clang-10 to compile with QEMU. RUN sed -i 's/^const/static const/g' /usr/lib/llvm-10/lib/clang/10.0.0/include/sanitizer/tsan_interface.h diff --git a/tests/lcitool/libvirt-ci b/tests/lcitool/libvirt-ci -Subproject e3712b79122180fdb3b7a7ea8cbee47ece253f9 +Subproject 79691a50a5f99bd7adda236f66c3c09371b01af diff --git a/tests/lcitool/projects/qemu.yml b/tests/lcitool/projects/qemu.yml index 0d92819249..c62dbc00f9 100644 --- a/tests/lcitool/projects/qemu.yml +++ b/tests/lcitool/projects/qemu.yml @@ -101,6 +101,7 @@ packages: - sdl2-image - sed - snappy + - sndio - sparse - spice-protocol - spice-server diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index f1a506518b..4c079b11e3 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -375,7 +375,8 @@ if [ "${VALGRIND_QEMU_VM}" == "y" ]; then _casenotrun "Valgrind needs a valid TMPDIR for itself" fi VALGRIND_QEMU_VM= \ -TMPDIR=/nonexistent run_qemu -drive driver=null-co,snapshot=on +TMPDIR=/nonexistent run_qemu -drive driver=null-co,snapshot=on | + sed -e "s#'[^']*/vl\.[A-Za-z0-9]\{6\}'#SNAPSHOT_PATH#g" # Using snapshot=on together with read-only=on echo "info block" | diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 441f83e41a..e5ddb03bda 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -459,7 +459,7 @@ wrote 4096/4096 bytes at offset 0 read 4096/4096 bytes at offset 0 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Testing: -drive driver=null-co,snapshot=on -QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory +QEMU_PROG: -drive driver=null-co,snapshot=on: Could not open temporary file SNAPSHOT_PATH: No such file or directory Testing: -drive file=TEST_DIR/t.qcow2,snapshot=on,read-only=on,if=none,id=drive0 QEMU X.Y.Z monitor - type 'help' for more information diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index 063e4fc584..bade1ff3b9 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -539,7 +539,7 @@ wrote 4096/4096 bytes at offset 0 read 4096/4096 bytes at offset 0 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Testing: -drive driver=null-co,snapshot=on -QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory +QEMU_PROG: -drive driver=null-co,snapshot=on: Could not open temporary file SNAPSHOT_PATH: No such file or directory Testing: -drive file=TEST_DIR/t.qcow2,snapshot=on,read-only=on,if=none,id=drive0 QEMU X.Y.Z monitor - type 'help' for more information diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c index cbe0fb549b..61f25a72b6 100644 --- a/tests/qtest/cxl-test.c +++ b/tests/qtest/cxl-test.c @@ -101,6 +101,7 @@ static void cxl_t3d(void) qtest_start(cmdline->str); qtest_end(); + rmdir(tmpfs); } static void cxl_1pxb_2rp_2t3d(void) @@ -115,6 +116,7 @@ static void cxl_1pxb_2rp_2t3d(void) qtest_start(cmdline->str); qtest_end(); + rmdir(tmpfs); } static void cxl_2pxb_4rp_4t3d(void) @@ -130,6 +132,7 @@ static void cxl_2pxb_4rp_4t3d(void) qtest_start(cmdline->str); qtest_end(); + rmdir(tmpfs); } #endif /* CONFIG_POSIX */ diff --git a/tests/qtest/dbus-vmstate-test.c b/tests/qtest/dbus-vmstate-test.c index 74ede651f6..6c990864e3 100644 --- a/tests/qtest/dbus-vmstate-test.c +++ b/tests/qtest/dbus-vmstate-test.c @@ -233,7 +233,7 @@ test_dbus_vmstate(Test *test) test->src_qemu = src_qemu; if (test->migrate_fail) { wait_for_migration_fail(src_qemu, true); - qtest_set_expected_status(dst_qemu, 1); + qtest_set_expected_status(dst_qemu, EXIT_FAILURE); } else { wait_for_migration_complete(src_qemu); } diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c index 3f44f731d1..5a6afa2b57 100644 --- a/tests/qtest/device-plug-test.c +++ b/tests/qtest/device-plug-test.c @@ -112,16 +112,16 @@ static void test_pci_unplug_json_request(void) static void test_q35_pci_unplug_json_request(void) { - const char *port = "-device '{\"driver\": \"pcie-root-port\", " - "\"id\": \"p1\"}'"; + const char *port = "-device \"{'driver': 'pcie-root-port', " + "'id': 'p1'}\""; - const char *bridge = "-device '{\"driver\": \"pcie-pci-bridge\", " - "\"id\": \"b1\", " - "\"bus\": \"p1\"}'"; + const char *bridge = "-device \"{'driver': 'pcie-pci-bridge', " + "'id': 'b1', " + "'bus': 'p1'}\""; - const char *device = "-device '{\"driver\": \"virtio-mouse-pci\", " - "\"bus\": \"b1\", " - "\"id\": \"dev0\"}'"; + const char *device = "-device \"{'driver': 'virtio-mouse-pci', " + "'bus': 'b1', " + "'id': 'dev0'}\""; QTestState *qtest = qtest_initf("-machine q35 %s %s %s", port, bridge, device); diff --git a/tests/qtest/libqmp.c b/tests/qtest/libqmp.c index ade26c15f0..2b08382e5d 100644 --- a/tests/qtest/libqmp.c +++ b/tests/qtest/libqmp.c @@ -23,6 +23,7 @@ #endif #include "qemu/cutils.h" +#include "qemu/sockets.h" #include "qapi/error.h" #include "qapi/qmp/json-parser.h" #include "qapi/qmp/qjson.h" @@ -36,7 +37,7 @@ typedef struct { static void socket_send(int fd, const char *buf, size_t size) { - size_t res = qemu_write_full(fd, buf, size); + ssize_t res = qemu_send_full(fd, buf, size); assert(res == size); } @@ -69,7 +70,7 @@ QDict *qmp_fd_receive(int fd) ssize_t len; char c; - len = read(fd, &c, 1); + len = recv(fd, &c, 1, 0); if (len == -1 && errno == EINTR) { continue; } diff --git a/tests/qtest/libqos/e1000e.c b/tests/qtest/libqos/e1000e.c index fc14b07884..ed47e34044 100644 --- a/tests/qtest/libqos/e1000e.c +++ b/tests/qtest/libqos/e1000e.c @@ -17,6 +17,7 @@ */ #include "qemu/osdep.h" +#include "hw/net/e1000_regs.h" #include "../libqtest.h" #include "pci-pc.h" #include "qemu/sockets.h" @@ -27,49 +28,13 @@ #include "qgraph.h" #include "e1000e.h" -#define E1000E_IMS (0x00d0) +#define E1000E_IVAR_TEST_CFG \ + (E1000E_RX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID | \ + ((E1000E_TX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << 8) | \ + ((E1000E_OTHER_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << 16) | \ + E1000_IVAR_TX_INT_EVERY_WB) -#define E1000E_STATUS (0x0008) -#define E1000E_STATUS_LU BIT(1) -#define E1000E_STATUS_ASDV1000 BIT(9) - -#define E1000E_CTRL (0x0000) -#define E1000E_CTRL_RESET BIT(26) - -#define E1000E_RCTL (0x0100) -#define E1000E_RCTL_EN BIT(1) -#define E1000E_RCTL_UPE BIT(3) -#define E1000E_RCTL_MPE BIT(4) - -#define E1000E_RFCTL (0x5008) -#define E1000E_RFCTL_EXTEN BIT(15) - -#define E1000E_TCTL (0x0400) -#define E1000E_TCTL_EN BIT(1) - -#define E1000E_CTRL_EXT (0x0018) -#define E1000E_CTRL_EXT_DRV_LOAD BIT(28) -#define E1000E_CTRL_EXT_TXLSFLOW BIT(22) - -#define E1000E_IVAR (0x00E4) -#define E1000E_IVAR_TEST_CFG ((E1000E_RX0_MSG_ID << 0) | BIT(3) | \ - (E1000E_TX0_MSG_ID << 8) | BIT(11) | \ - (E1000E_OTHER_MSG_ID << 16) | BIT(19) | \ - BIT(31)) - -#define E1000E_RING_LEN (0x1000) - -#define E1000E_TDBAL (0x3800) - -#define E1000E_TDBAH (0x3804) -#define E1000E_TDH (0x3810) - -#define E1000E_RDBAL (0x2800) -#define E1000E_RDBAH (0x2804) -#define E1000E_RDH (0x2810) - -#define E1000E_TXD_LEN (16) -#define E1000E_RXD_LEN (16) +#define E1000E_RING_LEN (0x1000) static void e1000e_macreg_write(QE1000E *d, uint32_t reg, uint32_t val) { @@ -87,30 +52,34 @@ void e1000e_tx_ring_push(QE1000E *d, void *descr) { QE1000E_PCI *d_pci = container_of(d, QE1000E_PCI, e1000e); uint32_t tail = e1000e_macreg_read(d, E1000E_TDT); - uint32_t len = e1000e_macreg_read(d, E1000E_TDLEN) / E1000E_TXD_LEN; + uint32_t len = e1000e_macreg_read(d, E1000E_TDLEN) / E1000_RING_DESC_LEN; - qtest_memwrite(d_pci->pci_dev.bus->qts, d->tx_ring + tail * E1000E_TXD_LEN, - descr, E1000E_TXD_LEN); + qtest_memwrite(d_pci->pci_dev.bus->qts, + d->tx_ring + tail * E1000_RING_DESC_LEN, + descr, E1000_RING_DESC_LEN); e1000e_macreg_write(d, E1000E_TDT, (tail + 1) % len); /* Read WB data for the packet transmitted */ - qtest_memread(d_pci->pci_dev.bus->qts, d->tx_ring + tail * E1000E_TXD_LEN, - descr, E1000E_TXD_LEN); + qtest_memread(d_pci->pci_dev.bus->qts, + d->tx_ring + tail * E1000_RING_DESC_LEN, + descr, E1000_RING_DESC_LEN); } void e1000e_rx_ring_push(QE1000E *d, void *descr) { QE1000E_PCI *d_pci = container_of(d, QE1000E_PCI, e1000e); uint32_t tail = e1000e_macreg_read(d, E1000E_RDT); - uint32_t len = e1000e_macreg_read(d, E1000E_RDLEN) / E1000E_RXD_LEN; + uint32_t len = e1000e_macreg_read(d, E1000E_RDLEN) / E1000_RING_DESC_LEN; - qtest_memwrite(d_pci->pci_dev.bus->qts, d->rx_ring + tail * E1000E_RXD_LEN, - descr, E1000E_RXD_LEN); + qtest_memwrite(d_pci->pci_dev.bus->qts, + d->rx_ring + tail * E1000_RING_DESC_LEN, + descr, E1000_RING_DESC_LEN); e1000e_macreg_write(d, E1000E_RDT, (tail + 1) % len); /* Read WB data for the packet received */ - qtest_memread(d_pci->pci_dev.bus->qts, d->rx_ring + tail * E1000E_RXD_LEN, - descr, E1000E_RXD_LEN); + qtest_memread(d_pci->pci_dev.bus->qts, + d->rx_ring + tail * E1000_RING_DESC_LEN, + descr, E1000_RING_DESC_LEN); } static void e1000e_foreach_callback(QPCIDevice *dev, int devfn, void *data) @@ -151,53 +120,53 @@ static void e1000e_pci_start_hw(QOSGraphObject *obj) qpci_device_enable(&d->pci_dev); /* Reset the device */ - val = e1000e_macreg_read(&d->e1000e, E1000E_CTRL); - e1000e_macreg_write(&d->e1000e, E1000E_CTRL, val | E1000E_CTRL_RESET); + val = e1000e_macreg_read(&d->e1000e, E1000_CTRL); + e1000e_macreg_write(&d->e1000e, E1000_CTRL, val | E1000_CTRL_RST); /* Enable and configure MSI-X */ qpci_msix_enable(&d->pci_dev); - e1000e_macreg_write(&d->e1000e, E1000E_IVAR, E1000E_IVAR_TEST_CFG); + e1000e_macreg_write(&d->e1000e, E1000_IVAR, E1000E_IVAR_TEST_CFG); /* Check the device status - link and speed */ - val = e1000e_macreg_read(&d->e1000e, E1000E_STATUS); - g_assert_cmphex(val & (E1000E_STATUS_LU | E1000E_STATUS_ASDV1000), - ==, E1000E_STATUS_LU | E1000E_STATUS_ASDV1000); + val = e1000e_macreg_read(&d->e1000e, E1000_STATUS); + g_assert_cmphex(val & (E1000_STATUS_LU | E1000_STATUS_LAN_INIT_DONE), + ==, E1000_STATUS_LU | E1000_STATUS_LAN_INIT_DONE); /* Initialize TX/RX logic */ - e1000e_macreg_write(&d->e1000e, E1000E_RCTL, 0); - e1000e_macreg_write(&d->e1000e, E1000E_TCTL, 0); + e1000e_macreg_write(&d->e1000e, E1000_RCTL, 0); + e1000e_macreg_write(&d->e1000e, E1000_TCTL, 0); /* Notify the device that the driver is ready */ - val = e1000e_macreg_read(&d->e1000e, E1000E_CTRL_EXT); - e1000e_macreg_write(&d->e1000e, E1000E_CTRL_EXT, - val | E1000E_CTRL_EXT_DRV_LOAD | E1000E_CTRL_EXT_TXLSFLOW); + val = e1000e_macreg_read(&d->e1000e, E1000_CTRL_EXT); + e1000e_macreg_write(&d->e1000e, E1000_CTRL_EXT, + val | E1000_CTRL_EXT_DRV_LOAD); - e1000e_macreg_write(&d->e1000e, E1000E_TDBAL, + e1000e_macreg_write(&d->e1000e, E1000_TDBAL, (uint32_t) d->e1000e.tx_ring); - e1000e_macreg_write(&d->e1000e, E1000E_TDBAH, + e1000e_macreg_write(&d->e1000e, E1000_TDBAH, (uint32_t) (d->e1000e.tx_ring >> 32)); e1000e_macreg_write(&d->e1000e, E1000E_TDLEN, E1000E_RING_LEN); e1000e_macreg_write(&d->e1000e, E1000E_TDT, 0); - e1000e_macreg_write(&d->e1000e, E1000E_TDH, 0); + e1000e_macreg_write(&d->e1000e, E1000_TDH, 0); /* Enable transmit */ - e1000e_macreg_write(&d->e1000e, E1000E_TCTL, E1000E_TCTL_EN); - e1000e_macreg_write(&d->e1000e, E1000E_RDBAL, + e1000e_macreg_write(&d->e1000e, E1000_TCTL, E1000_TCTL_EN); + e1000e_macreg_write(&d->e1000e, E1000_RDBAL, (uint32_t)d->e1000e.rx_ring); - e1000e_macreg_write(&d->e1000e, E1000E_RDBAH, + e1000e_macreg_write(&d->e1000e, E1000_RDBAH, (uint32_t)(d->e1000e.rx_ring >> 32)); e1000e_macreg_write(&d->e1000e, E1000E_RDLEN, E1000E_RING_LEN); e1000e_macreg_write(&d->e1000e, E1000E_RDT, 0); - e1000e_macreg_write(&d->e1000e, E1000E_RDH, 0); + e1000e_macreg_write(&d->e1000e, E1000_RDH, 0); /* Enable receive */ - e1000e_macreg_write(&d->e1000e, E1000E_RFCTL, E1000E_RFCTL_EXTEN); - e1000e_macreg_write(&d->e1000e, E1000E_RCTL, E1000E_RCTL_EN | - E1000E_RCTL_UPE | - E1000E_RCTL_MPE); + e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); + e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN | + E1000_RCTL_UPE | + E1000_RCTL_MPE); /* Enable all interrupts */ - e1000e_macreg_write(&d->e1000e, E1000E_IMS, 0xFFFFFFFF); + e1000e_macreg_write(&d->e1000e, E1000_IMS, 0xFFFFFFFF); } diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build index 113c80b4e4..32f028872c 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -33,8 +33,6 @@ libqos_srcs = files( 'sdhci.c', 'tpci200.c', 'virtio.c', - 'virtio-9p.c', - 'virtio-9p-client.c', 'virtio-balloon.c', 'virtio-blk.c', 'vhost-user-blk.c', @@ -62,6 +60,10 @@ libqos_srcs = files( 'x86_64_pc-machine.c', ) +if have_virtfs + libqos_srcs += files('virtio-9p.c', 'virtio-9p-client.c') +endif + libqos = static_library('qos', libqos_srcs + genh, name_suffix: 'fa', build_by_default: false) diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index b23eb3edc3..2fbc3b88f3 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -16,9 +16,11 @@ #include "qemu/osdep.h" +#ifndef _WIN32 #include <sys/socket.h> #include <sys/wait.h> #include <sys/un.h> +#endif /* _WIN32 */ #ifdef __linux__ #include <sys/prctl.h> #endif /* __linux__ */ @@ -27,13 +29,25 @@ #include "libqmp.h" #include "qemu/ctype.h" #include "qemu/cutils.h" +#include "qemu/sockets.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qlist.h" #include "qapi/qmp/qstring.h" #define MAX_IRQ 256 -#define SOCKET_TIMEOUT 50 + +#ifndef _WIN32 +# define SOCKET_TIMEOUT 50 +# define CMD_EXEC "exec " +# define DEV_STDERR "/dev/fd/2" +# define DEV_NULL "/dev/null" +#else +# define SOCKET_TIMEOUT 50000 +# define CMD_EXEC "" +# define DEV_STDERR "2" +# define DEV_NULL "nul" +#endif typedef void (*QTestSendFn)(QTestState *s, const char *buf); typedef void (*ExternalSendFn)(void *s, const char *buf); @@ -57,6 +71,9 @@ struct QTestState int qmp_fd; pid_t qemu_pid; /* our child QEMU process */ int wstatus; +#ifdef _WIN32 + DWORD exit_code; +#endif int expected_status; bool big_endian; bool irq_level[MAX_IRQ]; @@ -90,8 +107,16 @@ static int socket_accept(int sock) struct sockaddr_un addr; socklen_t addrlen; int ret; + /* + * timeout unit of blocking receive calls is different among platfoms. + * It's in seconds on non-Windows platforms but milliseconds on Windows. + */ +#ifndef _WIN32 struct timeval timeout = { .tv_sec = SOCKET_TIMEOUT, .tv_usec = 0 }; +#else + DWORD timeout = SOCKET_TIMEOUT; +#endif if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (void *)&timeout, sizeof(timeout))) { @@ -118,10 +143,18 @@ bool qtest_probe_child(QTestState *s) pid_t pid = s->qemu_pid; if (pid != -1) { +#ifndef _WIN32 pid = waitpid(pid, &s->wstatus, WNOHANG); if (pid == 0) { return true; } +#else + GetExitCodeProcess((HANDLE)pid, &s->exit_code); + if (s->exit_code == STILL_ACTIVE) { + return true; + } + CloseHandle((HANDLE)pid); +#endif s->qemu_pid = -1; } return false; @@ -132,24 +165,14 @@ void qtest_set_expected_status(QTestState *s, int status) s->expected_status = status; } -void qtest_kill_qemu(QTestState *s) +static void qtest_check_status(QTestState *s) { - pid_t pid = s->qemu_pid; - int wstatus; - - /* Skip wait if qtest_probe_child already reaped. */ - if (pid != -1) { - kill(pid, SIGTERM); - TFR(pid = waitpid(s->qemu_pid, &s->wstatus, 0)); - assert(pid == s->qemu_pid); - s->qemu_pid = -1; - } - /* * Check whether qemu exited with expected exit status; anything else is * fishy and should be logged with as much detail as possible. */ - wstatus = s->wstatus; +#ifndef _WIN32 + int wstatus = s->wstatus; if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != s->expected_status) { fprintf(stderr, "%s:%d: kill_qemu() tried to terminate QEMU " "process but encountered exit status %d (expected %d)\n", @@ -165,6 +188,50 @@ void qtest_kill_qemu(QTestState *s) __FILE__, __LINE__, sig, signame, dump); abort(); } +#else + if (s->exit_code != s->expected_status) { + fprintf(stderr, "%s:%d: kill_qemu() tried to terminate QEMU " + "process but encountered exit status %ld (expected %d)\n", + __FILE__, __LINE__, s->exit_code, s->expected_status); + abort(); + } +#endif +} + +void qtest_wait_qemu(QTestState *s) +{ +#ifndef _WIN32 + pid_t pid; + + TFR(pid = waitpid(s->qemu_pid, &s->wstatus, 0)); + assert(pid == s->qemu_pid); +#else + DWORD ret; + + ret = WaitForSingleObject((HANDLE)s->qemu_pid, INFINITE); + assert(ret == WAIT_OBJECT_0); + GetExitCodeProcess((HANDLE)s->qemu_pid, &s->exit_code); + CloseHandle((HANDLE)s->qemu_pid); +#endif + + qtest_check_status(s); +} + +void qtest_kill_qemu(QTestState *s) +{ + /* Skip wait if qtest_probe_child() already reaped */ + if (s->qemu_pid != -1) { +#ifndef _WIN32 + kill(s->qemu_pid, SIGTERM); +#else + TerminateProcess((HANDLE)s->qemu_pid, s->expected_status); +#endif + qtest_wait_qemu(s); + s->qemu_pid = -1; + return; + } + + qtest_check_status(s); } static void kill_qemu_hook_func(void *s) @@ -243,6 +310,38 @@ static const char *qtest_qemu_binary(void) return qemu_bin; } +#ifdef _WIN32 +static pid_t qtest_create_process(char *cmd) +{ + STARTUPINFO si; + PROCESS_INFORMATION pi; + BOOL ret; + + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + ZeroMemory(&pi, sizeof(pi)); + + ret = CreateProcess(NULL, /* module name */ + cmd, /* command line */ + NULL, /* process handle not inheritable */ + NULL, /* thread handle not inheritable */ + FALSE, /* set handle inheritance to FALSE */ + 0, /* No creation flags */ + NULL, /* use parent's environment block */ + NULL, /* use parent's starting directory */ + &si, /* pointer to STARTUPINFO structure */ + &pi /* pointer to PROCESS_INFORMATION structure */ + ); + if (ret == 0) { + fprintf(stderr, "%s:%d: unable to create a new process (%s)\n", + __FILE__, __LINE__, strerror(GetLastError())); + abort(); + } + + return (pid_t)pi.hProcess; +} +#endif /* _WIN32 */ + QTestState *qtest_init_without_qmp_handshake(const char *extra_args) { QTestState *s; @@ -270,6 +369,7 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) unlink(socket_path); unlink(qmp_socket_path); + socket_init(); sock = init_socket(socket_path); qmpsock = init_socket(qmp_socket_path); @@ -278,7 +378,7 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) qtest_add_abrt_handler(kill_qemu_hook_func, s); - command = g_strdup_printf("exec %s %s" + command = g_strdup_printf(CMD_EXEC "%s %s" "-qtest unix:%s " "-qtest-log %s " "-chardev socket,path=%s,id=char0 " @@ -287,7 +387,7 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) "%s" " -accel qtest", qemu_binary, tracearg, socket_path, - getenv("QTEST_LOG") ? "/dev/fd/2" : "/dev/null", + getenv("QTEST_LOG") ? DEV_STDERR : DEV_NULL, qmp_socket_path, extra_args ?: ""); @@ -296,6 +396,7 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) s->pending_events = NULL; s->wstatus = 0; s->expected_status = 0; +#ifndef _WIN32 s->qemu_pid = fork(); if (s->qemu_pid == 0) { #ifdef __linux__ @@ -318,6 +419,9 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) execlp("/bin/sh", "sh", "-c", command, NULL); exit(1); } +#else + s->qemu_pid = qtest_create_process(command); +#endif /* _WIN32 */ g_free(command); s->fd = socket_accept(sock); @@ -336,9 +440,19 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) s->irq_level[i] = false; } + /* + * Stopping QEMU for debugging is not supported on Windows. + * + * Using DebugActiveProcess() API can suspend the QEMU process, + * but gdb cannot attach to the process. Using the undocumented + * NtSuspendProcess() can suspend the QEMU process and gdb can + * attach to the process, but gdb cannot resume it. + */ +#ifndef _WIN32 if (getenv("QTEST_STOP")) { kill(s->qemu_pid, SIGSTOP); } +#endif /* ask endianness of the target */ @@ -392,6 +506,7 @@ QTestState *qtest_init_with_serial(const char *extra_args, int *sock_fd) g_assert_true(sock_dir != NULL); sock_path = g_strdup_printf("%s/sock", sock_dir); + socket_init(); sock_fd_init = init_socket(sock_path); qts = qtest_initf("-chardev socket,id=s0,path=%s -serial chardev:s0 %s", @@ -428,7 +543,7 @@ void qtest_quit(QTestState *s) static void socket_send(int fd, const char *buf, size_t size) { - size_t res = qemu_write_full(fd, buf, size); + ssize_t res = qemu_send_full(fd, buf, size); assert(res == size); } @@ -460,7 +575,7 @@ static GString *qtest_client_socket_recv_line(QTestState *s) ssize_t len; char buffer[1024]; - len = read(s->fd, buffer, sizeof(buffer)); + len = recv(s->fd, buffer, sizeof(buffer), 0); if (len == -1 && errno == EINTR) { continue; } diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h index 65c040e504..91a5f7edd9 100644 --- a/tests/qtest/libqtest.h +++ b/tests/qtest/libqtest.h @@ -76,6 +76,15 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args); QTestState *qtest_init_with_serial(const char *extra_args, int *sock_fd); /** + * qtest_wait_qemu: + * @s: #QTestState instance to operate on. + * + * Wait for the QEMU process to terminate. It is safe to call this function + * multiple times. + */ +void qtest_wait_qemu(QTestState *s); + +/** * qtest_kill_qemu: * @s: #QTestState instance to operate on. * diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index aa1ba179fa..d2eb107f0c 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -1342,7 +1342,7 @@ static void test_precopy_common(MigrateCommon *args) wait_for_migration_fail(from, allow_active); if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) { - qtest_set_expected_status(to, 1); + qtest_set_expected_status(to, EXIT_FAILURE); } } else { if (args->iterations) { @@ -1738,7 +1738,7 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) migrate_qmp(from, uri, "{}"); if (should_fail) { - qtest_set_expected_status(to, 1); + qtest_set_expected_status(to, EXIT_FAILURE); wait_for_migration_fail(from, true); } else { wait_for_migration_complete(from); @@ -2141,6 +2141,10 @@ static void test_multifd_tcp_cancel(void) migrate_cancel(from); + /* Make sure QEMU process "to" exited */ + qtest_set_expected_status(to, EXIT_FAILURE); + qtest_wait_qemu(to); + args = (MigrateStart){ .only_target = true, }; diff --git a/tests/qtest/modules-test.c b/tests/qtest/modules-test.c index 88217686e1..be2575ae6d 100644 --- a/tests/qtest/modules-test.c +++ b/tests/qtest/modules-test.c @@ -16,6 +16,9 @@ static void test_modules_load(const void *data) int main(int argc, char *argv[]) { const char *modules[] = { +#ifdef CONFIG_BLKIO + "block-", "blkio", +#endif #ifdef CONFIG_CURL "block-", "curl", #endif diff --git a/tests/qtest/tpm-crb-swtpm-test.c b/tests/qtest/tpm-crb-swtpm-test.c index 55fdb5657d..40254f762f 100644 --- a/tests/qtest/tpm-crb-swtpm-test.c +++ b/tests/qtest/tpm-crb-swtpm-test.c @@ -13,7 +13,6 @@ */ #include "qemu/osdep.h" -#include <glib/gstdio.h> #include "libqtest.h" #include "qemu/module.h" @@ -62,9 +61,9 @@ int main(int argc, char **argv) tpm_crb_swtpm_migration_test); ret = g_test_run(); - g_rmdir(ts.dst_tpm_path); + tpm_util_rmdir(ts.dst_tpm_path); g_free(ts.dst_tpm_path); - g_rmdir(ts.src_tpm_path); + tpm_util_rmdir(ts.src_tpm_path); g_free(ts.src_tpm_path); g_free(ts.uri); diff --git a/tests/qtest/tpm-tis-device-swtpm-test.c b/tests/qtest/tpm-tis-device-swtpm-test.c index 7b20035142..8c067fddd4 100644 --- a/tests/qtest/tpm-tis-device-swtpm-test.c +++ b/tests/qtest/tpm-tis-device-swtpm-test.c @@ -14,7 +14,6 @@ */ #include "qemu/osdep.h" -#include <glib/gstdio.h> #include "libqtest.h" #include "qemu/module.h" @@ -66,9 +65,9 @@ int main(int argc, char **argv) tpm_tis_swtpm_migration_test); ret = g_test_run(); - g_rmdir(ts.dst_tpm_path); + tpm_util_rmdir(ts.dst_tpm_path); g_free(ts.dst_tpm_path); - g_rmdir(ts.src_tpm_path); + tpm_util_rmdir(ts.src_tpm_path); g_free(ts.src_tpm_path); g_free(ts.uri); diff --git a/tests/qtest/tpm-tis-swtpm-test.c b/tests/qtest/tpm-tis-swtpm-test.c index 90131cb3c4..11539c0a52 100644 --- a/tests/qtest/tpm-tis-swtpm-test.c +++ b/tests/qtest/tpm-tis-swtpm-test.c @@ -13,7 +13,6 @@ */ #include "qemu/osdep.h" -#include <glib/gstdio.h> #include "libqtest.h" #include "qemu/module.h" @@ -61,9 +60,9 @@ int main(int argc, char **argv) tpm_tis_swtpm_migration_test); ret = g_test_run(); - g_rmdir(ts.dst_tpm_path); + tpm_util_rmdir(ts.dst_tpm_path); g_free(ts.dst_tpm_path); - g_rmdir(ts.src_tpm_path); + tpm_util_rmdir(ts.src_tpm_path); g_free(ts.src_tpm_path); g_free(ts.uri); diff --git a/tests/qtest/tpm-util.c b/tests/qtest/tpm-util.c index e0dc5da0af..a7efe2d0d2 100644 --- a/tests/qtest/tpm-util.c +++ b/tests/qtest/tpm-util.c @@ -13,6 +13,7 @@ */ #include "qemu/osdep.h" +#include <glib/gstdio.h> #include "hw/acpi/tpm.h" #include "libqtest.h" @@ -292,3 +293,21 @@ void tpm_util_migration_start_qemu(QTestState **src_qemu, g_free(src_qemu_args); g_free(dst_qemu_args); } + +/* Remove directory with remainders of swtpm */ +void tpm_util_rmdir(const char *path) +{ + char *filename; + int ret; + + filename = g_strdup_printf("%s/tpm2-00.permall", path); + g_unlink(filename); + g_free(filename); + + filename = g_strdup_printf("%s/.lock", path); + g_unlink(filename); + g_free(filename); + + ret = g_rmdir(path); + g_assert(!ret); +} diff --git a/tests/qtest/tpm-util.h b/tests/qtest/tpm-util.h index 3b97d69017..80720afac0 100644 --- a/tests/qtest/tpm-util.h +++ b/tests/qtest/tpm-util.h @@ -53,5 +53,6 @@ void tpm_util_migration_start_qemu(QTestState **src_qemu, const char *machine_options); void tpm_util_wait_for_migration_complete(QTestState *who); +void tpm_util_rmdir(const char *path); #endif /* TESTS_TPM_UTIL_H */ diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index c830313e67..07fcc6d0ce 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -1,6 +1,13 @@ S390X_SRC=$(SRC_PATH)/tests/tcg/s390x VPATH+=$(S390X_SRC) CFLAGS+=-march=zEC12 -m64 + +config-cc.mak: Makefile + $(quiet-@)( \ + $(call cc-option,-march=z14, CROSS_CC_HAS_Z14); \ + $(call cc-option,-march=z15, CROSS_CC_HAS_Z15)) 3> config-cc.mak +-include config-cc.mak + TESTS+=hello-s390x TESTS+=csst TESTS+=ipm @@ -18,20 +25,24 @@ TESTS+=signals-s390x TESTS+=branch-relative-long TESTS+=noexec +Z13_TESTS=vistr +$(Z13_TESTS): CFLAGS+=-march=z13 -O2 +TESTS+=$(Z13_TESTS) + +ifneq ($(CROSS_CC_HAS_Z14),) Z14_TESTS=vfminmax vfminmax: LDFLAGS+=-lm $(Z14_TESTS): CFLAGS+=-march=z14 -O2 +TESTS+=$(Z14_TESTS) +endif -TESTS+=$(if $(shell $(CC) -march=z14 -S -o /dev/null -xc /dev/null \ - >/dev/null 2>&1 && echo OK),$(Z14_TESTS)) - -VECTOR_TESTS=vxeh2_vs -VECTOR_TESTS+=vxeh2_vcvt -VECTOR_TESTS+=vxeh2_vlstr -$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 - -TESTS+=$(if $(shell $(CC) -march=z15 -S -o /dev/null -xc /dev/null \ - >/dev/null 2>&1 && echo OK),$(VECTOR_TESTS)) +ifneq ($(CROSS_CC_HAS_Z15),) +Z15_TESTS=vxeh2_vs +Z15_TESTS+=vxeh2_vcvt +Z15_TESTS+=vxeh2_vlstr +$(Z15_TESTS): CFLAGS+=-march=z15 -O2 +TESTS+=$(Z15_TESTS) +endif ifneq ($(HAVE_GDB_BIN),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py diff --git a/tests/tcg/s390x/vistr.c b/tests/tcg/s390x/vistr.c new file mode 100644 index 0000000000..8e3e987d71 --- /dev/null +++ b/tests/tcg/s390x/vistr.c @@ -0,0 +1,45 @@ +/* + * Test the VECTOR ISOLATE STRING (vistr) instruction + */ +#include <stdint.h> +#include <stdio.h> +#include "vx.h" + +static inline void vistr(S390Vector *v1, S390Vector *v2, + const uint8_t m3, const uint8_t m5) +{ + asm volatile("vistr %[v1], %[v2], %[m3], %[m5]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [m3] "i" (m3) + , [m5] "i" (m5) + : "cc"); +} + +int main(int argc, char *argv[]) +{ + S390Vector vd = {}; + S390Vector vs16 = { + .h[0] = 0x1234, .h[1] = 0x0056, .h[2] = 0x7800, .h[3] = 0x0000, + .h[4] = 0x0078, .h[5] = 0x0000, .h[6] = 0x6543, .h[7] = 0x2100 + }; + S390Vector vs32 = { + .w[0] = 0x12340000, .w[1] = 0x78654300, + .w[2] = 0x0, .w[3] = 0x12, + }; + + vistr(&vd, &vs16, 1, 0); + if (vd.h[0] != 0x1234 || vd.h[1] != 0x0056 || vd.h[2] != 0x7800 || + vd.h[3] || vd.h[4] || vd.h[5] || vd.h[6] || vd.h[7]) { + puts("ERROR: vitrh failed!"); + return 1; + } + + vistr(&vd, &vs32, 2, 0); + if (vd.w[0] != 0x12340000 || vd.w[1] != 0x78654300 || vd.w[2] || vd.w[3]) { + puts("ERROR: vitrf failed!"); + return 1; + } + + return 0; +} diff --git a/tests/unit/crypto-tls-psk-helpers.c b/tests/unit/crypto-tls-psk-helpers.c index 511e08cc9c..c6cc740772 100644 --- a/tests/unit/crypto-tls-psk-helpers.c +++ b/tests/unit/crypto-tls-psk-helpers.c @@ -27,15 +27,14 @@ static void test_tls_psk_init_common(const char *pskfile, const char *user, const char *key) { - FILE *fp; + g_autoptr(GError) gerr = NULL; + g_autofree char *line = g_strdup_printf("%s:%s\n", user, key); - fp = fopen(pskfile, "w"); - if (fp == NULL) { - g_critical("Failed to create pskfile %s: %s", pskfile, strerror(errno)); + g_file_set_contents(pskfile, line, strlen(line), &gerr); + if (gerr != NULL) { + g_critical("Failed to create pskfile %s: %s", pskfile, gerr->message); abort(); } - fprintf(fp, "%s:%s\n", user, key); - fclose(fp); } void test_tls_psk_init(const char *pskfile) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 4924ceb562..09dc4a4891 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1538,16 +1538,16 @@ static void test_set_aio_context(void) &error_abort); bdrv_drained_begin(bs); - bdrv_try_set_aio_context(bs, ctx_a, &error_abort); + bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); aio_context_acquire(ctx_a); bdrv_drained_end(bs); bdrv_drained_begin(bs); - bdrv_try_set_aio_context(bs, ctx_b, &error_abort); + bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); aio_context_release(ctx_a); aio_context_acquire(ctx_b); - bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort); + bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); aio_context_release(ctx_b); bdrv_drained_end(bs); @@ -1830,9 +1830,8 @@ static void test_drop_intermediate_poll(void) for (i = 0; i < 3; i++) { if (i) { /* Takes the reference to chain[i - 1] */ - chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1], - "chain", &chain_child_class, - BDRV_CHILD_COW, &error_abort); + bdrv_attach_child(chain[i], chain[i - 1], "chain", + &chain_child_class, BDRV_CHILD_COW, &error_abort); } } @@ -1970,6 +1969,7 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) static BlockDriver bdrv_replace_test = { .format_name = "replace_test", .instance_size = sizeof(BDRVReplaceTestState), + .supports_backing = true, .bdrv_close = bdrv_replace_test_close, .bdrv_co_preadv = bdrv_replace_test_co_preadv, @@ -2049,9 +2049,8 @@ static void do_test_replace_child_mid_drain(int old_drain_count, new_child_bs->total_sectors = 1; bdrv_ref(old_child_bs); - parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child", - &child_of_bds, BDRV_CHILD_COW, - &error_abort); + bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, + BDRV_CHILD_COW, &error_abort); for (i = 0; i < old_drain_count; i++) { bdrv_drained_begin(old_child_bs); diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c index a6e3bb79be..c522591531 100644 --- a/tests/unit/test-bdrv-graph-mod.c +++ b/tests/unit/test-bdrv-graph-mod.c @@ -26,6 +26,8 @@ static BlockDriver bdrv_pass_through = { .format_name = "pass-through", + .is_filter = true, + .filtered_child_is_backing = true, .bdrv_child_perm = bdrv_default_perms, }; @@ -57,6 +59,8 @@ static void exclusive_write_perms(BlockDriverState *bs, BdrvChild *c, static BlockDriver bdrv_exclusive_writer = { .format_name = "exclusive-writer", + .is_filter = true, + .filtered_child_is_backing = true, .bdrv_child_perm = exclusive_write_perms, }; @@ -134,7 +138,7 @@ static void test_update_perm_tree(void) blk_insert_bs(root, bs, &error_abort); bdrv_attach_child(filter, bs, "child", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); + BDRV_CHILD_DATA, &error_abort); ret = bdrv_append(filter, bs, NULL); g_assert_cmpint(ret, <, 0); @@ -228,11 +232,14 @@ static void test_parallel_exclusive_write(void) */ bdrv_ref(base); - bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_DATA, + bdrv_attach_child(top, fl1, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); - bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, + bdrv_attach_child(fl1, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); - bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, + bdrv_attach_child(fl2, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_replace_node(fl1, fl2, &error_abort); @@ -241,13 +248,26 @@ static void test_parallel_exclusive_write(void) bdrv_unref(top); } -static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared) +/* + * write-to-selected node may have several DATA children, one of them may be + * "selected". Exclusive write permission is taken on selected child. + * + * We don't realize write handler itself, as we need only to test how permission + * update works. + */ +typedef struct BDRVWriteToSelectedState { + BdrvChild *selected; +} BDRVWriteToSelectedState; + +static void write_to_selected_perms(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) { - if (bs->file && c == bs->file) { + BDRVWriteToSelectedState *s = bs->opaque; + + if (s->selected && c == s->selected) { *nperm = BLK_PERM_WRITE; *nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE; } else { @@ -256,9 +276,10 @@ static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c, } } -static BlockDriver bdrv_write_to_file = { - .format_name = "tricky-perm", - .bdrv_child_perm = write_to_file_perms, +static BlockDriver bdrv_write_to_selected = { + .format_name = "write-to-selected", + .instance_size = sizeof(BDRVWriteToSelectedState), + .bdrv_child_perm = write_to_selected_perms, }; @@ -266,15 +287,18 @@ static BlockDriver bdrv_write_to_file = { * The following test shows that topological-sort order is required for * permission update, simple DFS is not enough. * - * Consider the block driver which has two filter children: one active - * with exclusive write access and one inactive with no specific - * permissions. + * Consider the block driver (write-to-selected) which has two children: one is + * selected so we have exclusive write access to it and for the other one we + * don't need any specific permissions. * * And, these two children has a common base child, like this: + * (additional "top" on top is used in test just because the only public + * function to update permission should get a specific child to update. + * Making bdrv_refresh_perms() public just for this test isn't worth it) * - * ┌─────┐ ┌──────┐ - * │ fl2 │ ◀── │ top │ - * └─────┘ └──────┘ + * ┌─────┐ ┌───────────────────┐ ┌─────┐ + * │ fl2 │ ◀── │ write-to-selected │ ◀── │ top │ + * └─────┘ └───────────────────┘ └─────┘ * │ │ * │ │ w * │ ▼ @@ -290,14 +314,14 @@ static BlockDriver bdrv_write_to_file = { * * So, exclusive write is propagated. * - * Assume, we want to make fl2 active instead of fl1. - * So, we set some option for top driver and do permission update. + * Assume, we want to select fl2 instead of fl1. + * So, we set some option for write-to-selected driver and do permission update. * * With simple DFS, if permission update goes first through - * top->fl1->base branch it will succeed: it firstly drop exclusive write - * permissions and than apply them for another BdrvChildren. - * But if permission update goes first through top->fl2->base branch it - * will fail, as when we try to update fl2->base child, old not yet + * write-to-selected -> fl1 -> base branch it will succeed: it firstly drop + * exclusive write permissions and than apply them for another BdrvChildren. + * But if permission update goes first through write-to-selected -> fl2 -> base + * branch it will fail, as when we try to update fl2->base child, old not yet * updated fl1->base child will be in conflict. * * With topological-sort order we always update parents before children, so fl1 @@ -306,9 +330,10 @@ static BlockDriver bdrv_write_to_file = { static void test_parallel_perm_update(void) { BlockDriverState *top = no_perm_node("top"); - BlockDriverState *tricky = - bdrv_new_open_driver(&bdrv_write_to_file, "tricky", BDRV_O_RDWR, + BlockDriverState *ws = + bdrv_new_open_driver(&bdrv_write_to_selected, "ws", BDRV_O_RDWR, &error_abort); + BDRVWriteToSelectedState *s = ws->opaque; BlockDriverState *base = no_perm_node("base"); BlockDriverState *fl1 = pass_through_node("fl1"); BlockDriverState *fl2 = pass_through_node("fl2"); @@ -320,33 +345,35 @@ static void test_parallel_perm_update(void) */ bdrv_ref(base); - bdrv_attach_child(top, tricky, "file", &child_of_bds, BDRV_CHILD_DATA, + bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, &error_abort); - c_fl1 = bdrv_attach_child(tricky, fl1, "first", &child_of_bds, - BDRV_CHILD_FILTERED, &error_abort); - c_fl2 = bdrv_attach_child(tricky, fl2, "second", &child_of_bds, - BDRV_CHILD_FILTERED, &error_abort); - bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, + c_fl1 = bdrv_attach_child(ws, fl1, "first", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); + c_fl2 = bdrv_attach_child(ws, fl2, "second", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); + bdrv_attach_child(fl1, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); - bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, + bdrv_attach_child(fl2, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); /* Select fl1 as first child to be active */ - tricky->file = c_fl1; + s->selected = c_fl1; bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort); assert(c_fl1->perm & BLK_PERM_WRITE); assert(!(c_fl2->perm & BLK_PERM_WRITE)); /* Now, try to switch active child and update permissions */ - tricky->file = c_fl2; + s->selected = c_fl2; bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort); assert(c_fl2->perm & BLK_PERM_WRITE); assert(!(c_fl1->perm & BLK_PERM_WRITE)); /* Switch once more, to not care about real child order in the list */ - tricky->file = c_fl1; + s->selected = c_fl1; bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort); assert(c_fl1->perm & BLK_PERM_WRITE); @@ -379,7 +406,8 @@ static void test_append_greedy_filter(void) BlockDriverState *base = no_perm_node("base"); BlockDriverState *fl = exclusive_writer_node("fl1"); - bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_COW, + bdrv_attach_child(top, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_append(fl, base, &error_abort); diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c index def0709b2b..8ca5adec5e 100644 --- a/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c @@ -765,7 +765,7 @@ static void test_propagate_mirror(void) filter = bdrv_find_node("filter_node"); /* Change the AioContext of src */ - bdrv_try_set_aio_context(src, ctx, &error_abort); + bdrv_try_change_aio_context(src, ctx, NULL, &error_abort); g_assert(bdrv_get_aio_context(src) == ctx); g_assert(bdrv_get_aio_context(target) == ctx); g_assert(bdrv_get_aio_context(filter) == ctx); @@ -773,7 +773,7 @@ static void test_propagate_mirror(void) /* Change the AioContext of target */ aio_context_acquire(ctx); - bdrv_try_set_aio_context(target, main_ctx, &error_abort); + bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); aio_context_release(ctx); g_assert(bdrv_get_aio_context(src) == main_ctx); g_assert(bdrv_get_aio_context(target) == main_ctx); @@ -783,7 +783,7 @@ static void test_propagate_mirror(void) blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_insert_bs(blk, src, &error_abort); - bdrv_try_set_aio_context(target, ctx, &local_err); + bdrv_try_change_aio_context(target, ctx, NULL, &local_err); error_free_or_abort(&local_err); g_assert(blk_get_aio_context(blk) == main_ctx); @@ -794,7 +794,7 @@ static void test_propagate_mirror(void) /* ...unless we explicitly allow it */ aio_context_acquire(ctx); blk_set_allow_aio_context_change(blk, true); - bdrv_try_set_aio_context(target, ctx, &error_abort); + bdrv_try_change_aio_context(target, ctx, NULL, &error_abort); aio_context_release(ctx); g_assert(blk_get_aio_context(blk) == ctx); @@ -806,7 +806,7 @@ static void test_propagate_mirror(void) aio_context_acquire(ctx); blk_set_aio_context(blk, main_ctx, &error_abort); - bdrv_try_set_aio_context(target, main_ctx, &error_abort); + bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); aio_context_release(ctx); blk_unref(blk); diff --git a/tests/unit/test-crypto-block.c b/tests/unit/test-crypto-block.c index 3417b67be5..b629e240a9 100644 --- a/tests/unit/test-crypto-block.c +++ b/tests/unit/test-crypto-block.c @@ -22,6 +22,7 @@ #include "qapi/error.h" #include "crypto/init.h" #include "crypto/block.h" +#include "crypto/block-luks-priv.h" #include "qemu/buffer.h" #include "qemu/module.h" #include "crypto/secret.h" @@ -30,7 +31,8 @@ #endif #if (defined(_WIN32) || defined RUSAGE_THREAD) && \ - (defined(CONFIG_NETTLE) || defined(CONFIG_GCRYPT)) + (defined(CONFIG_NETTLE) || defined(CONFIG_GCRYPT) || \ + defined(CONFIG_GNUTLS_CRYPTO)) #define TEST_LUKS #else #undef TEST_LUKS @@ -344,6 +346,230 @@ static void test_block(gconstpointer opaque) } +#ifdef TEST_LUKS +typedef const char *(*LuksHeaderDoBadStuff)(QCryptoBlockLUKSHeader *hdr); + +static void +test_luks_bad_header(gconstpointer data) +{ + LuksHeaderDoBadStuff badstuff = data; + QCryptoBlock *blk; + Buffer buf; + Object *sec = test_block_secret(); + QCryptoBlockLUKSHeader hdr; + Error *err = NULL; + const char *msg; + + memset(&buf, 0, sizeof(buf)); + buffer_init(&buf, "header"); + + /* Correctly create the volume initially */ + blk = qcrypto_block_create(&luks_create_opts_default, NULL, + test_block_init_func, + test_block_write_func, + &buf, + &error_abort); + g_assert(blk); + + qcrypto_block_free(blk); + + /* Mangle it in some unpleasant way */ + g_assert(buf.offset >= sizeof(hdr)); + memcpy(&hdr, buf.buffer, sizeof(hdr)); + qcrypto_block_luks_to_disk_endian(&hdr); + + msg = badstuff(&hdr); + + qcrypto_block_luks_from_disk_endian(&hdr); + memcpy(buf.buffer, &hdr, sizeof(hdr)); + + /* Check that we fail to open it again */ + blk = qcrypto_block_open(&luks_open_opts, NULL, + test_block_read_func, + &buf, + 0, + 1, + &err); + g_assert(!blk); + g_assert(err); + + g_assert_cmpstr(error_get_pretty(err), ==, msg); + error_free(err); + + object_unparent(sec); + + buffer_free(&buf); +} + +static const char *luks_bad_null_term_cipher_name(QCryptoBlockLUKSHeader *hdr) +{ + /* Replace NUL termination with spaces */ + char *offset = hdr->cipher_name + strlen(hdr->cipher_name); + memset(offset, ' ', sizeof(hdr->cipher_name) - (offset - hdr->cipher_name)); + + return "LUKS header cipher name is not NUL terminated"; +} + +static const char *luks_bad_null_term_cipher_mode(QCryptoBlockLUKSHeader *hdr) +{ + /* Replace NUL termination with spaces */ + char *offset = hdr->cipher_mode + strlen(hdr->cipher_mode); + memset(offset, ' ', sizeof(hdr->cipher_mode) - (offset - hdr->cipher_mode)); + + return "LUKS header cipher mode is not NUL terminated"; +} + +static const char *luks_bad_null_term_hash_spec(QCryptoBlockLUKSHeader *hdr) +{ + /* Replace NUL termination with spaces */ + char *offset = hdr->hash_spec + strlen(hdr->hash_spec); + memset(offset, ' ', sizeof(hdr->hash_spec) - (offset - hdr->hash_spec)); + + return "LUKS header hash spec is not NUL terminated"; +} + +static const char *luks_bad_cipher_name_empty(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_name, "", 1); + + return "Algorithm '' with key size 32 bytes not supported"; +} + +static const char *luks_bad_cipher_name_unknown(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_name, "aess", 5); + + return "Algorithm 'aess' with key size 32 bytes not supported"; +} + +static const char *luks_bad_cipher_xts_size(QCryptoBlockLUKSHeader *hdr) +{ + hdr->master_key_len = 33; + + return "XTS cipher key length should be a multiple of 2"; +} + +static const char *luks_bad_cipher_cbc_size(QCryptoBlockLUKSHeader *hdr) +{ + hdr->master_key_len = 33; + memcpy(hdr->cipher_mode, "cbc-essiv", 10); + + return "Algorithm 'aes' with key size 33 bytes not supported"; +} + +static const char *luks_bad_cipher_mode_empty(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_mode, "", 1); + + return "Unexpected cipher mode string format ''"; +} + +static const char *luks_bad_cipher_mode_unknown(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_mode, "xfs", 4); + + return "Unexpected cipher mode string format 'xfs'"; +} + +static const char *luks_bad_ivgen_separator(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_mode, "xts:plain64", 12); + + return "Unexpected cipher mode string format 'xts:plain64'"; +} + +static const char *luks_bad_ivgen_name_empty(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_mode, "xts-", 5); + + return "IV generator '' not supported"; +} + +static const char *luks_bad_ivgen_name_unknown(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_mode, "xts-plain65", 12); + + return "IV generator 'plain65' not supported"; +} + +static const char *luks_bad_ivgen_hash_empty(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_mode, "xts-plain65:", 13); + + return "Hash algorithm '' not supported"; +} + +static const char *luks_bad_ivgen_hash_unknown(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->cipher_mode, "xts-plain65:sha257", 19); + + return "Hash algorithm 'sha257' not supported"; +} + +static const char *luks_bad_hash_spec_empty(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->hash_spec, "", 1); + + return "Hash algorithm '' not supported"; +} + +static const char *luks_bad_hash_spec_unknown(QCryptoBlockLUKSHeader *hdr) +{ + memcpy(hdr->hash_spec, "sha2566", 8); + + return "Hash algorithm 'sha2566' not supported"; +} + +static const char *luks_bad_stripes(QCryptoBlockLUKSHeader *hdr) +{ + hdr->key_slots[0].stripes = 3999; + + return "Keyslot 0 is corrupted (stripes 3999 != 4000)"; +} + +static const char *luks_bad_key_overlap_header(QCryptoBlockLUKSHeader *hdr) +{ + hdr->key_slots[0].key_offset_sector = 2; + + return "Keyslot 0 is overlapping with the LUKS header"; +} + +static const char *luks_bad_key_overlap_key(QCryptoBlockLUKSHeader *hdr) +{ + hdr->key_slots[0].key_offset_sector = hdr->key_slots[1].key_offset_sector; + + return "Keyslots 0 and 1 are overlapping in the header"; +} + +static const char *luks_bad_key_overlap_payload(QCryptoBlockLUKSHeader *hdr) +{ + hdr->key_slots[0].key_offset_sector = hdr->payload_offset_sector + 42; + + return "Keyslot 0 is overlapping with the encrypted payload"; +} + +static const char *luks_bad_payload_overlap_header(QCryptoBlockLUKSHeader *hdr) +{ + hdr->payload_offset_sector = 2; + + return "LUKS payload is overlapping with the header"; +} + +static const char *luks_bad_key_iterations(QCryptoBlockLUKSHeader *hdr) +{ + hdr->key_slots[0].iterations = 0; + + return "Keyslot 0 iteration count is zero"; +} + +static const char *luks_bad_iterations(QCryptoBlockLUKSHeader *hdr) +{ + hdr->master_key_iterations = 0; + + return "LUKS key iteration count is zero"; +} +#endif + int main(int argc, char **argv) { gsize i; @@ -364,5 +590,79 @@ int main(int argc, char **argv) } } +#ifdef TEST_LUKS + if (g_test_slow()) { + g_test_add_data_func("/crypto/block/luks/bad/cipher-name-nul-term", + luks_bad_null_term_cipher_name, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/cipher-mode-nul-term", + luks_bad_null_term_cipher_mode, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/hash-spec-nul-term", + luks_bad_null_term_hash_spec, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/cipher-name-empty", + luks_bad_cipher_name_empty, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/cipher-name-unknown", + luks_bad_cipher_name_unknown, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/cipher-xts-size", + luks_bad_cipher_xts_size, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/cipher-cbc-size", + luks_bad_cipher_cbc_size, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/cipher-mode-empty", + luks_bad_cipher_mode_empty, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/cipher-mode-unknown", + luks_bad_cipher_mode_unknown, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/ivgen-separator", + luks_bad_ivgen_separator, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/ivgen-name-empty", + luks_bad_ivgen_name_empty, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/ivgen-name-unknown", + luks_bad_ivgen_name_unknown, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/ivgen-hash-empty", + luks_bad_ivgen_hash_empty, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/ivgen-hash-unknown", + luks_bad_ivgen_hash_unknown, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/hash-spec-empty", + luks_bad_hash_spec_empty, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/hash-spec-unknown", + luks_bad_hash_spec_unknown, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/stripes", + luks_bad_stripes, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/key-overlap-header", + luks_bad_key_overlap_header, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/key-overlap-key", + luks_bad_key_overlap_key, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/key-overlap-payload", + luks_bad_key_overlap_payload, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/payload-overlap-header", + luks_bad_payload_overlap_header, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/iterations", + luks_bad_iterations, + test_luks_bad_header); + g_test_add_data_func("/crypto/block/luks/bad/key-iterations", + luks_bad_key_iterations, + test_luks_bad_header); + } +#endif + return g_test_run(); } diff --git a/tests/vm/freebsd b/tests/vm/freebsd index 3643fe325d..d6ff4461ba 100755 --- a/tests/vm/freebsd +++ b/tests/vm/freebsd @@ -66,6 +66,9 @@ class FreeBSDVM(basevm.BaseVM): # libs: networking "libslirp", + + # libs: sndio + "sndio", ] BUILD_SCRIPT = """ diff --git a/tests/vm/openbsd b/tests/vm/openbsd index 6f1b6f5b98..eaeb201e91 100755 --- a/tests/vm/openbsd +++ b/tests/vm/openbsd @@ -22,8 +22,8 @@ class OpenBSDVM(basevm.BaseVM): name = "openbsd" arch = "x86_64" - link = "https://cdn.openbsd.org/pub/OpenBSD/7.1/amd64/install71.iso" - csum = "d3a7c5b9bf890bc404304a1c96f9ee72e1d9bbcf9cc849c1133bdb0d67843396" + link = "https://cdn.openbsd.org/pub/OpenBSD/7.2/amd64/install72.iso" + csum = "0369ef40a3329efcb978c578c7fdc7bda71e502aecec930a74b44160928c91d3" size = "20G" pkgs = [ # tools @@ -56,6 +56,9 @@ class OpenBSDVM(basevm.BaseVM): # libs: migration "zstd", + + # libs: networking + "libslirp", ] BUILD_SCRIPT = """ diff --git a/util/meson.build b/util/meson.build index 5e282130df..c0a7bc54d4 100644 --- a/util/meson.build +++ b/util/meson.build @@ -1,4 +1,5 @@ util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c')) +util_ss.add(files('thread-context.c'), numa) if not config_host_data.get('CONFIG_ATOMIC64') util_ss.add(files('atomic64.c')) endif diff --git a/util/osdep.c b/util/osdep.c index 746d5f7d71..77c1a6c562 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -502,6 +502,28 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) return ret; } +ssize_t qemu_send_full(int s, const void *buf, size_t count) +{ + ssize_t ret = 0; + ssize_t total = 0; + + while (count) { + ret = send(s, buf, count, 0); + if (ret < 0) { + if (errno == EINTR) { + continue; + } + break; + } + + count -= ret; + buf += ret; + total += ret; + } + + return total; +} + void qemu_set_hw_version(const char *version) { hw_version = version; diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 827a7aadba..59a891b6a8 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -42,6 +42,7 @@ #include "qemu/cutils.h" #include "qemu/compiler.h" #include "qemu/units.h" +#include "qemu/thread-context.h" #ifdef CONFIG_LINUX #include <sys/syscall.h> @@ -329,7 +330,7 @@ static void sigbus_handler(int signal) return; } #endif /* CONFIG_LINUX */ - warn_report("os_mem_prealloc: unrelated SIGBUS detected and ignored"); + warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored"); } static void *do_touch_pages(void *arg) @@ -399,13 +400,13 @@ static void *do_madv_populate_write_pages(void *arg) } static inline int get_memset_num_threads(size_t hpagesize, size_t numpages, - int smp_cpus) + int max_threads) { long host_procs = sysconf(_SC_NPROCESSORS_ONLN); int ret = 1; if (host_procs > 0) { - ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus); + ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads); } /* Especially with gigantic pages, don't create more threads than pages. */ @@ -418,11 +419,12 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages, } static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, - int smp_cpus, bool use_madv_populate_write) + int max_threads, ThreadContext *tc, + bool use_madv_populate_write) { static gsize initialized = 0; MemsetContext context = { - .num_threads = get_memset_num_threads(hpagesize, numpages, smp_cpus), + .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads), }; size_t numpages_per_thread, leftover; void *(*touch_fn)(void *); @@ -457,9 +459,16 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, context.threads[i].numpages = numpages_per_thread + (i < leftover); context.threads[i].hpagesize = hpagesize; context.threads[i].context = &context; - qemu_thread_create(&context.threads[i].pgthread, "touch_pages", - touch_fn, &context.threads[i], - QEMU_THREAD_JOINABLE); + if (tc) { + thread_context_create_thread(tc, &context.threads[i].pgthread, + "touch_pages", + touch_fn, &context.threads[i], + QEMU_THREAD_JOINABLE); + } else { + qemu_thread_create(&context.threads[i].pgthread, "touch_pages", + touch_fn, &context.threads[i], + QEMU_THREAD_JOINABLE); + } addr += context.threads[i].numpages * hpagesize; } @@ -494,13 +503,13 @@ static bool madv_populate_write_possible(char *area, size_t pagesize) errno != EINVAL; } -void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, - Error **errp) +void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, + ThreadContext *tc, Error **errp) { static gsize initialized; int ret; size_t hpagesize = qemu_fd_getpagesize(fd); - size_t numpages = DIV_ROUND_UP(memory, hpagesize); + size_t numpages = DIV_ROUND_UP(sz, hpagesize); bool use_madv_populate_write; struct sigaction act; @@ -530,24 +539,24 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, if (ret) { qemu_mutex_unlock(&sigbus_mutex); error_setg_errno(errp, errno, - "os_mem_prealloc: failed to install signal handler"); + "qemu_prealloc_mem: failed to install signal handler"); return; } } /* touch pages simultaneously */ - ret = touch_all_pages(area, hpagesize, numpages, smp_cpus, + ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, use_madv_populate_write); if (ret) { error_setg_errno(errp, -ret, - "os_mem_prealloc: preallocating memory failed"); + "qemu_prealloc_mem: preallocating memory failed"); } if (!use_madv_populate_write) { ret = sigaction(SIGBUS, &sigbus_oldact, NULL); if (ret) { /* Terminate QEMU since it can't recover from error */ - perror("os_mem_prealloc: failed to reinstall signal handler"); + perror("qemu_prealloc_mem: failed to reinstall signal handler"); exit(1); } qemu_mutex_unlock(&sigbus_mutex); diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 5723d3eb4c..a67cb3822e 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -268,14 +268,14 @@ int getpagesize(void) return system_info.dwPageSize; } -void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, - Error **errp) +void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, + ThreadContext *tc, Error **errp) { int i; size_t pagesize = qemu_real_host_page_size(); - memory = (memory + pagesize - 1) & -pagesize; - for (i = 0; i < memory / pagesize; i++) { + sz = (sz + pagesize - 1) & -pagesize; + for (i = 0; i < sz / pagesize; i++) { memset(area + pagesize * i, 0, 1); } } diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c index 15c82d9348..45c6b57374 100644 --- a/util/qemu-coroutine-lock.c +++ b/util/qemu-coroutine-lock.c @@ -39,10 +39,15 @@ void qemu_co_queue_init(CoQueue *queue) QSIMPLEQ_INIT(&queue->entries); } -void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock) +void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock, + CoQueueWaitFlags flags) { Coroutine *self = qemu_coroutine_self(); - QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next); + if (flags & CO_QUEUE_WAIT_FRONT) { + QSIMPLEQ_INSERT_HEAD(&queue->entries, self, co_queue_next); + } else { + QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next); + } if (lock) { qemu_lockable_unlock(lock); diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 83f4bd6fd2..d185245023 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -919,9 +919,8 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, if (saddr->path[0] || abstract) { path = saddr->path; } else { - const char *tmpdir = getenv("TMPDIR"); - tmpdir = tmpdir ? tmpdir : "/tmp"; - path = pathbuf = g_strdup_printf("%s/qemu-socket-XXXXXX", tmpdir); + path = pathbuf = g_strdup_printf("%s/qemu-socket-XXXXXX", + g_get_tmp_dir()); } pathlen = strlen(path); @@ -1077,6 +1076,26 @@ int unix_connect(const char *path, Error **errp) return sock; } +char *socket_uri(SocketAddress *addr) +{ + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: + return g_strdup_printf("tcp:%s:%s", + addr->u.inet.host, + addr->u.inet.port); + case SOCKET_ADDRESS_TYPE_UNIX: + return g_strdup_printf("unix:%s", + addr->u.q_unix.path); + case SOCKET_ADDRESS_TYPE_FD: + return g_strdup_printf("fd:%s", addr->u.fd.str); + case SOCKET_ADDRESS_TYPE_VSOCK: + return g_strdup_printf("vsock:%s:%s", + addr->u.vsock.cid, + addr->u.vsock.port); + default: + return g_strdup("unknown address type"); + } +} SocketAddress *socket_parse(const char *str, Error **errp) { @@ -1104,6 +1123,11 @@ SocketAddress *socket_parse(const char *str, Error **errp) if (vsock_parse(&addr->u.vsock, str + strlen("vsock:"), errp)) { goto fail; } + } else if (strstart(str, "tcp:", NULL)) { + addr->type = SOCKET_ADDRESS_TYPE_INET; + if (inet_parse(&addr->u.inet, str + strlen("tcp:"), errp)) { + goto fail; + } } else { addr->type = SOCKET_ADDRESS_TYPE_INET; if (inet_parse(&addr->u.inet, str, errp)) { diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index ac1d56e673..bae938c670 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -16,6 +16,7 @@ #include "qemu/notify.h" #include "qemu-thread-common.h" #include "qemu/tsan.h" +#include "qemu/bitmap.h" static bool name_threads; @@ -552,6 +553,75 @@ void qemu_thread_create(QemuThread *thread, const char *name, pthread_attr_destroy(&attr); } +int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, + unsigned long nbits) +{ +#if defined(CONFIG_PTHREAD_AFFINITY_NP) + const size_t setsize = CPU_ALLOC_SIZE(nbits); + unsigned long value; + cpu_set_t *cpuset; + int err; + + cpuset = CPU_ALLOC(nbits); + g_assert(cpuset); + + CPU_ZERO_S(setsize, cpuset); + value = find_first_bit(host_cpus, nbits); + while (value < nbits) { + CPU_SET_S(value, setsize, cpuset); + value = find_next_bit(host_cpus, nbits, value + 1); + } + + err = pthread_setaffinity_np(thread->thread, setsize, cpuset); + CPU_FREE(cpuset); + return err; +#else + return -ENOSYS; +#endif +} + +int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, + unsigned long *nbits) +{ +#if defined(CONFIG_PTHREAD_AFFINITY_NP) + unsigned long tmpbits; + cpu_set_t *cpuset; + size_t setsize; + int i, err; + + tmpbits = CPU_SETSIZE; + while (true) { + setsize = CPU_ALLOC_SIZE(tmpbits); + cpuset = CPU_ALLOC(tmpbits); + g_assert(cpuset); + + err = pthread_getaffinity_np(thread->thread, setsize, cpuset); + if (err) { + CPU_FREE(cpuset); + if (err != -EINVAL) { + return err; + } + tmpbits *= 2; + } else { + break; + } + } + + /* Convert the result into a proper bitmap. */ + *nbits = tmpbits; + *host_cpus = bitmap_new(tmpbits); + for (i = 0; i < tmpbits; i++) { + if (CPU_ISSET(i, cpuset)) { + set_bit(i, *host_cpus); + } + } + CPU_FREE(cpuset); + return 0; +#else + return -ENOSYS; +#endif +} + void qemu_thread_get_self(QemuThread *thread) { thread->thread = pthread_self(); diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index b9a467d7db..69db254ac7 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -477,6 +477,18 @@ void qemu_thread_create(QemuThread *thread, const char *name, thread->data = data; } +int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, + unsigned long nbits) +{ + return -ENOSYS; +} + +int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, + unsigned long *nbits) +{ + return -ENOSYS; +} + void qemu_thread_get_self(QemuThread *thread) { thread->data = qemu_thread_data; diff --git a/util/thread-context.c b/util/thread-context.c new file mode 100644 index 0000000000..4138245332 --- /dev/null +++ b/util/thread-context.c @@ -0,0 +1,362 @@ +/* + * QEMU Thread Context + * + * Copyright Red Hat Inc., 2022 + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/thread-context.h" +#include "qapi/error.h" +#include "qapi/qapi-builtin-visit.h" +#include "qapi/visitor.h" +#include "qemu/config-file.h" +#include "qapi/qapi-builtin-visit.h" +#include "qom/object_interfaces.h" +#include "qemu/module.h" +#include "qemu/bitmap.h" + +#ifdef CONFIG_NUMA +#include <numa.h> +#endif + +enum { + TC_CMD_NONE = 0, + TC_CMD_STOP, + TC_CMD_NEW, +}; + +typedef struct ThreadContextCmdNew { + QemuThread *thread; + const char *name; + void *(*start_routine)(void *); + void *arg; + int mode; +} ThreadContextCmdNew; + +static void *thread_context_run(void *opaque) +{ + ThreadContext *tc = opaque; + + tc->thread_id = qemu_get_thread_id(); + qemu_sem_post(&tc->sem); + + while (true) { + /* + * Threads inherit the CPU affinity of the creating thread. For this + * reason, we create new (especially short-lived) threads from our + * persistent context thread. + * + * Especially when QEMU is not allowed to set the affinity itself, + * management tools can simply set the affinity of the context thread + * after creating the context, to have new threads created via + * the context inherit the CPU affinity automatically. + */ + switch (tc->thread_cmd) { + case TC_CMD_NONE: + break; + case TC_CMD_STOP: + tc->thread_cmd = TC_CMD_NONE; + qemu_sem_post(&tc->sem); + return NULL; + case TC_CMD_NEW: { + ThreadContextCmdNew *cmd_new = tc->thread_cmd_data; + + qemu_thread_create(cmd_new->thread, cmd_new->name, + cmd_new->start_routine, cmd_new->arg, + cmd_new->mode); + tc->thread_cmd = TC_CMD_NONE; + tc->thread_cmd_data = NULL; + qemu_sem_post(&tc->sem); + break; + } + default: + g_assert_not_reached(); + } + qemu_sem_wait(&tc->sem_thread); + } +} + +static void thread_context_set_cpu_affinity(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + ThreadContext *tc = THREAD_CONTEXT(obj); + uint16List *l, *host_cpus = NULL; + unsigned long *bitmap = NULL; + int nbits = 0, ret; + Error *err = NULL; + + if (tc->init_cpu_bitmap) { + error_setg(errp, "Mixing CPU and node affinity not supported"); + return; + } + + visit_type_uint16List(v, name, &host_cpus, &err); + if (err) { + error_propagate(errp, err); + return; + } + + if (!host_cpus) { + error_setg(errp, "CPU list is empty"); + goto out; + } + + for (l = host_cpus; l; l = l->next) { + nbits = MAX(nbits, l->value + 1); + } + bitmap = bitmap_new(nbits); + for (l = host_cpus; l; l = l->next) { + set_bit(l->value, bitmap); + } + + if (tc->thread_id != -1) { + /* + * Note: we won't be adjusting the affinity of any thread that is still + * around, but only the affinity of the context thread. + */ + ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits); + if (ret) { + error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret)); + } + } else { + tc->init_cpu_bitmap = bitmap; + bitmap = NULL; + tc->init_cpu_nbits = nbits; + } +out: + g_free(bitmap); + qapi_free_uint16List(host_cpus); +} + +static void thread_context_get_cpu_affinity(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + unsigned long *bitmap, nbits, value; + ThreadContext *tc = THREAD_CONTEXT(obj); + uint16List *host_cpus = NULL; + uint16List **tail = &host_cpus; + int ret; + + if (tc->thread_id == -1) { + error_setg(errp, "Object not initialized yet"); + return; + } + + ret = qemu_thread_get_affinity(&tc->thread, &bitmap, &nbits); + if (ret) { + error_setg(errp, "Getting CPU affinity failed: %s", strerror(ret)); + return; + } + + value = find_first_bit(bitmap, nbits); + while (value < nbits) { + QAPI_LIST_APPEND(tail, value); + + value = find_next_bit(bitmap, nbits, value + 1); + } + g_free(bitmap); + + visit_type_uint16List(v, name, &host_cpus, errp); + qapi_free_uint16List(host_cpus); +} + +static void thread_context_set_node_affinity(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ +#ifdef CONFIG_NUMA + const int nbits = numa_num_possible_cpus(); + ThreadContext *tc = THREAD_CONTEXT(obj); + uint16List *l, *host_nodes = NULL; + unsigned long *bitmap = NULL; + struct bitmask *tmp_cpus; + Error *err = NULL; + int ret, i; + + if (tc->init_cpu_bitmap) { + error_setg(errp, "Mixing CPU and node affinity not supported"); + return; + } + + visit_type_uint16List(v, name, &host_nodes, &err); + if (err) { + error_propagate(errp, err); + return; + } + + if (!host_nodes) { + error_setg(errp, "Node list is empty"); + goto out; + } + + bitmap = bitmap_new(nbits); + tmp_cpus = numa_allocate_cpumask(); + for (l = host_nodes; l; l = l->next) { + numa_bitmask_clearall(tmp_cpus); + ret = numa_node_to_cpus(l->value, tmp_cpus); + if (ret) { + /* We ignore any errors, such as impossible nodes. */ + continue; + } + for (i = 0; i < nbits; i++) { + if (numa_bitmask_isbitset(tmp_cpus, i)) { + set_bit(i, bitmap); + } + } + } + numa_free_cpumask(tmp_cpus); + + if (bitmap_empty(bitmap, nbits)) { + error_setg(errp, "The nodes select no CPUs"); + goto out; + } + + if (tc->thread_id != -1) { + /* + * Note: we won't be adjusting the affinity of any thread that is still + * around for now, but only the affinity of the context thread. + */ + ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits); + if (ret) { + error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret)); + } + } else { + tc->init_cpu_bitmap = bitmap; + bitmap = NULL; + tc->init_cpu_nbits = nbits; + } +out: + g_free(bitmap); + qapi_free_uint16List(host_nodes); +#else + error_setg(errp, "NUMA node affinity is not supported by this QEMU"); +#endif +} + +static void thread_context_get_thread_id(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + ThreadContext *tc = THREAD_CONTEXT(obj); + uint64_t value = tc->thread_id; + + visit_type_uint64(v, name, &value, errp); +} + +static void thread_context_instance_complete(UserCreatable *uc, Error **errp) +{ + ThreadContext *tc = THREAD_CONTEXT(uc); + char *thread_name; + int ret; + + thread_name = g_strdup_printf("TC %s", + object_get_canonical_path_component(OBJECT(uc))); + qemu_thread_create(&tc->thread, thread_name, thread_context_run, tc, + QEMU_THREAD_JOINABLE); + g_free(thread_name); + + /* Wait until initialization of the thread is done. */ + while (tc->thread_id == -1) { + qemu_sem_wait(&tc->sem); + } + + if (tc->init_cpu_bitmap) { + ret = qemu_thread_set_affinity(&tc->thread, tc->init_cpu_bitmap, + tc->init_cpu_nbits); + if (ret) { + error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret)); + } + g_free(tc->init_cpu_bitmap); + tc->init_cpu_bitmap = NULL; + } +} + +static void thread_context_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = thread_context_instance_complete; + object_class_property_add(oc, "thread-id", "int", + thread_context_get_thread_id, NULL, NULL, + NULL); + object_class_property_add(oc, "cpu-affinity", "int", + thread_context_get_cpu_affinity, + thread_context_set_cpu_affinity, NULL, NULL); + object_class_property_add(oc, "node-affinity", "int", NULL, + thread_context_set_node_affinity, NULL, NULL); +} + +static void thread_context_instance_init(Object *obj) +{ + ThreadContext *tc = THREAD_CONTEXT(obj); + + tc->thread_id = -1; + qemu_sem_init(&tc->sem, 0); + qemu_sem_init(&tc->sem_thread, 0); + qemu_mutex_init(&tc->mutex); +} + +static void thread_context_instance_finalize(Object *obj) +{ + ThreadContext *tc = THREAD_CONTEXT(obj); + + if (tc->thread_id != -1) { + tc->thread_cmd = TC_CMD_STOP; + qemu_sem_post(&tc->sem_thread); + qemu_thread_join(&tc->thread); + } + qemu_sem_destroy(&tc->sem); + qemu_sem_destroy(&tc->sem_thread); + qemu_mutex_destroy(&tc->mutex); +} + +static const TypeInfo thread_context_info = { + .name = TYPE_THREAD_CONTEXT, + .parent = TYPE_OBJECT, + .class_init = thread_context_class_init, + .instance_size = sizeof(ThreadContext), + .instance_init = thread_context_instance_init, + .instance_finalize = thread_context_instance_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void thread_context_register_types(void) +{ + type_register_static(&thread_context_info); +} +type_init(thread_context_register_types) + +void thread_context_create_thread(ThreadContext *tc, QemuThread *thread, + const char *name, + void *(*start_routine)(void *), void *arg, + int mode) +{ + ThreadContextCmdNew data = { + .thread = thread, + .name = name, + .start_routine = start_routine, + .arg = arg, + .mode = mode, + }; + + qemu_mutex_lock(&tc->mutex); + tc->thread_cmd = TC_CMD_NEW; + tc->thread_cmd_data = &data; + qemu_sem_post(&tc->sem_thread); + + while (tc->thread_cmd != TC_CMD_NONE) { + qemu_sem_wait(&tc->sem); + } + qemu_mutex_unlock(&tc->mutex); +} diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c index 5ba01177bf..0d1520caac 100644 --- a/util/vfio-helpers.c +++ b/util/vfio-helpers.c @@ -847,10 +847,13 @@ void qemu_vfio_close(QEMUVFIOState *s) if (!s) { return; } + + ram_block_notifier_remove(&s->ram_notifier); + for (i = 0; i < s->nr_mappings; ++i) { qemu_vfio_undo_mapping(s, &s->mappings[i], NULL); } - ram_block_notifier_remove(&s->ram_notifier); + g_free(s->usable_iova_ranges); s->nb_iova_ranges = 0; qemu_vfio_reset(s); |