diff options
92 files changed, 1233 insertions, 1079 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index c98a61caee..6ccdec7f02 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1245,7 +1245,7 @@ F: hw/openrisc/openrisc_sim.c PowerPC Machines ---------------- -405 +405 (ref405ep and taihu) L: qemu-ppc@nongnu.org S: Orphan F: hw/ppc/ppc405_boards.c @@ -1281,6 +1281,7 @@ New World (mac99) M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> L: qemu-ppc@nongnu.org S: Odd Fixes +F: docs/system/ppc/powermac.rst F: hw/ppc/mac_newworld.c F: hw/pci-host/uninorth.c F: hw/pci-bridge/dec.[hc] @@ -1299,6 +1300,7 @@ Old World (g3beige) M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> L: qemu-ppc@nongnu.org S: Odd Fixes +F: docs/system/ppc/powermac.rst F: hw/ppc/mac_oldworld.c F: hw/pci-host/grackle.c F: hw/misc/macio/ @@ -1312,6 +1314,7 @@ PReP M: Hervé Poussineau <hpoussin@reactos.org> L: qemu-ppc@nongnu.org S: Maintained +F: docs/system/ppc/prep.rst F: hw/ppc/prep.c F: hw/ppc/prep_systemio.c F: hw/ppc/rs6000_mc.c @@ -1324,7 +1327,7 @@ F: include/hw/isa/pc87312.h F: include/hw/rtc/m48t59.h F: tests/avocado/ppc_prep_40p.py -sPAPR +sPAPR (pseries) M: Cédric Le Goater <clg@kaod.org> M: Daniel Henrique Barboza <danielhb413@gmail.com> R: David Gibson <david@gibson.dropbear.id.au> @@ -1336,8 +1339,8 @@ F: include/hw/*/spapr* F: hw/*/xics* F: include/hw/*/xics* F: pc-bios/slof.bin -F: docs/specs/ppc-spapr-hcalls.txt -F: docs/specs/ppc-spapr-hotplug.txt +F: docs/system/ppc/pseries.rst +F: docs/specs/ppc-spapr-* F: tests/qtest/spapr* F: tests/qtest/libqos/*spapr* F: tests/qtest/rtas* @@ -1348,6 +1351,7 @@ PowerNV (Non-Virtualized) M: Cédric Le Goater <clg@kaod.org> L: qemu-ppc@nongnu.org S: Maintained +F: docs/system/ppc/powernv.rst F: hw/ppc/pnv* F: hw/intc/pnv* F: hw/intc/xics_pnv.c @@ -206,14 +206,11 @@ recurse-clean: $(addsuffix /clean, $(ROM_DIRS)) clean: recurse-clean -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean || : -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) clean-ctlist || : -# avoid old build problems by removing potentially incorrect old files - rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h find . \( -name '*.so' -o -name '*.dll' -o -name '*.[oda]' \) -type f \ ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-aarch64.a \ ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \ -exec rm {} + - rm -f TAGS cscope.* *.pod *~ */*~ - rm -f fsdev/*.pod scsi/*.pod + rm -f TAGS cscope.* *~ */*~ VERSION = $(shell cat $(SRC_PATH)/VERSION) @@ -224,10 +221,10 @@ qemu-%.tar.bz2: distclean: clean -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || : - rm -f config-host.mak config-host.h* config-poison.h + rm -f config-host.mak rm -f tests/tcg/config-*.mak - rm -f config-all-disas.mak config.status - rm -f roms/seabios/config.mak roms/vgabios/config.mak + rm -f config.status + rm -f roms/seabios/config.mak rm -f qemu-plugins-ld.symbols qemu-plugins-ld64.symbols rm -f *-config-target.h *-config-devices.mak *-config-devices.h rm -rf meson-private meson-logs meson-info compile_commands.json diff --git a/block/curl.c b/block/curl.c index 4a8ae2b269..6a6cd72975 100644 --- a/block/curl.c +++ b/block/curl.c @@ -125,7 +125,7 @@ static gboolean curl_drop_socket(void *key, void *value, void *opaque) BDRVCURLState *s = socket->s; aio_set_fd_handler(s->aio_context, socket->fd, false, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); return true; } @@ -173,19 +173,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, switch (action) { case CURL_POLL_IN: aio_set_fd_handler(s->aio_context, fd, false, - curl_multi_do, NULL, NULL, socket); + curl_multi_do, NULL, NULL, NULL, socket); break; case CURL_POLL_OUT: aio_set_fd_handler(s->aio_context, fd, false, - NULL, curl_multi_do, NULL, socket); + NULL, curl_multi_do, NULL, NULL, socket); break; case CURL_POLL_INOUT: aio_set_fd_handler(s->aio_context, fd, false, - curl_multi_do, curl_multi_do, NULL, socket); + curl_multi_do, curl_multi_do, + NULL, NULL, socket); break; case CURL_POLL_REMOVE: aio_set_fd_handler(s->aio_context, fd, false, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); break; } diff --git a/block/export/fuse.c b/block/export/fuse.c index 823c126d23..6710d8aed8 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -223,7 +223,7 @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint, aio_set_fd_handler(exp->common.ctx, fuse_session_fd(exp->fuse_session), true, - read_from_fuse_export, NULL, NULL, exp); + read_from_fuse_export, NULL, NULL, NULL, exp); exp->fd_handler_set_up = true; return 0; @@ -267,7 +267,7 @@ static void fuse_export_shutdown(BlockExport *blk_exp) if (exp->fd_handler_set_up) { aio_set_fd_handler(exp->common.ctx, fuse_session_fd(exp->fuse_session), true, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); exp->fd_handler_set_up = false; } } diff --git a/block/file-posix.c b/block/file-posix.c index b283093e5b..1f1756e192 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -106,10 +106,6 @@ #include <sys/diskslice.h> #endif -#ifdef CONFIG_XFS -#include <xfs/xfs.h> -#endif - /* OS X does not have O_DSYNC */ #ifndef O_DSYNC #ifdef O_SYNC @@ -156,9 +152,6 @@ typedef struct BDRVRawState { int perm_change_flags; BDRVReopenState *reopen_state; -#ifdef CONFIG_XFS - bool is_xfs:1; -#endif bool has_discard:1; bool has_write_zeroes:1; bool discard_zeroes:1; @@ -409,14 +402,22 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) { bs->bl.request_alignment = 0; } -#ifdef CONFIG_XFS - if (s->is_xfs) { - struct dioattr da; - if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) { - bs->bl.request_alignment = da.d_miniosz; - /* The kernel returns wrong information for d_mem */ - /* s->buf_align = da.d_mem; */ - } + +#ifdef __linux__ + /* + * The XFS ioctl definitions are shipped in extra packages that might + * not always be available. Since we just need the XFS_IOC_DIOINFO ioctl + * here, we simply use our own definition instead: + */ + struct xfs_dioattr { + uint32_t d_mem; + uint32_t d_miniosz; + uint32_t d_maxiosz; + } da; + if (ioctl(fd, _IOR('X', 30, struct xfs_dioattr), &da) >= 0) { + bs->bl.request_alignment = da.d_miniosz; + /* The kernel returns wrong information for d_mem */ + /* s->buf_align = da.d_mem; */ } #endif @@ -798,12 +799,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, #endif s->needs_alignment = raw_needs_alignment(bs); -#ifdef CONFIG_XFS - if (platform_test_xfs_fd(s->fd)) { - s->is_xfs = true; - } -#endif - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; if (S_ISREG(st.st_mode)) { /* When extending regular files, we get zeros from the OS */ diff --git a/block/io_uring.c b/block/io_uring.c index dfa475cc87..782afdb433 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -292,12 +292,14 @@ static bool qemu_luring_poll_cb(void *opaque) { LuringState *s = opaque; - if (io_uring_cq_ready(&s->ring)) { - luring_process_completions_and_submit(s); - return true; - } + return io_uring_cq_ready(&s->ring); +} + +static void qemu_luring_poll_ready(void *opaque) +{ + LuringState *s = opaque; - return false; + luring_process_completions_and_submit(s); } static void ioq_init(LuringQueue *io_q) @@ -402,8 +404,8 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, void luring_detach_aio_context(LuringState *s, AioContext *old_context) { - aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL, - s); + aio_set_fd_handler(old_context, s->ring.ring_fd, false, + NULL, NULL, NULL, NULL, s); qemu_bh_delete(s->completion_bh); s->aio_context = NULL; } @@ -413,7 +415,8 @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context) s->aio_context = new_context; s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s); aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false, - qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s); + qemu_luring_completion_cb, NULL, + qemu_luring_poll_cb, qemu_luring_poll_ready, s); } LuringState *luring_init(Error **errp) diff --git a/block/iscsi.c b/block/iscsi.c index 57aa07a40d..51f2a5eeaa 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -363,7 +363,7 @@ iscsi_set_events(IscsiLun *iscsilun) false, (ev & POLLIN) ? iscsi_process_read : NULL, (ev & POLLOUT) ? iscsi_process_write : NULL, - NULL, + NULL, NULL, iscsilun); iscsilun->events = ev; } @@ -1534,7 +1534,7 @@ static void iscsi_detach_aio_context(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi), - false, NULL, NULL, NULL, NULL); + false, NULL, NULL, NULL, NULL, NULL); iscsilun->events = 0; if (iscsilun->nop_timer) { diff --git a/block/linux-aio.c b/block/linux-aio.c index f53ae72e21..4c423fcccf 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -263,12 +263,15 @@ static bool qemu_laio_poll_cb(void *opaque) LinuxAioState *s = container_of(e, LinuxAioState, e); struct io_event *events; - if (!io_getevents_peek(s->ctx, &events)) { - return false; - } + return io_getevents_peek(s->ctx, &events); +} + +static void qemu_laio_poll_ready(EventNotifier *opaque) +{ + EventNotifier *e = opaque; + LinuxAioState *s = container_of(e, LinuxAioState, e); qemu_laio_process_completions_and_submit(s); - return true; } static void ioq_init(LaioQueue *io_q) @@ -427,7 +430,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) { - aio_set_event_notifier(old_context, &s->e, false, NULL, NULL); + aio_set_event_notifier(old_context, &s->e, false, NULL, NULL, NULL); qemu_bh_delete(s->completion_bh); s->aio_context = NULL; } @@ -438,7 +441,8 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context) s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s); aio_set_event_notifier(new_context, &s->e, false, qemu_laio_completion_cb, - qemu_laio_poll_cb); + qemu_laio_poll_cb, + qemu_laio_poll_ready); } LinuxAioState *laio_init(Error **errp) diff --git a/block/nfs.c b/block/nfs.c index 577aea1d22..444c40b458 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -197,7 +197,7 @@ static void nfs_set_events(NFSClient *client) false, (ev & POLLIN) ? nfs_process_read : NULL, (ev & POLLOUT) ? nfs_process_write : NULL, - NULL, client); + NULL, NULL, client); } client->events = ev; @@ -372,7 +372,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs) NFSClient *client = bs->opaque; aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), - false, NULL, NULL, NULL, NULL); + false, NULL, NULL, NULL, NULL, NULL); client->events = 0; } @@ -390,7 +390,7 @@ static void nfs_client_close(NFSClient *client) if (client->context) { qemu_mutex_lock(&client->mutex); aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), - false, NULL, NULL, NULL, NULL); + false, NULL, NULL, NULL, NULL, NULL); qemu_mutex_unlock(&client->mutex); if (client->fh) { nfs_close(client->context, client->fh); diff --git a/block/nvme.c b/block/nvme.c index fa360b9b3c..dd20de3865 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -605,10 +605,8 @@ out: return ret; } -static bool nvme_poll_queue(NVMeQueuePair *q) +static void nvme_poll_queue(NVMeQueuePair *q) { - bool progress = false; - const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; @@ -619,30 +617,23 @@ static bool nvme_poll_queue(NVMeQueuePair *q) * cannot race with itself. */ if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) { - return false; + return; } qemu_mutex_lock(&q->lock); while (nvme_process_completion(q)) { /* Keep polling */ - progress = true; } qemu_mutex_unlock(&q->lock); - - return progress; } -static bool nvme_poll_queues(BDRVNVMeState *s) +static void nvme_poll_queues(BDRVNVMeState *s) { - bool progress = false; int i; for (i = 0; i < s->queue_count; i++) { - if (nvme_poll_queue(s->queues[i])) { - progress = true; - } + nvme_poll_queue(s->queues[i]); } - return progress; } static void nvme_handle_event(EventNotifier *n) @@ -703,8 +694,30 @@ static bool nvme_poll_cb(void *opaque) EventNotifier *e = opaque; BDRVNVMeState *s = container_of(e, BDRVNVMeState, irq_notifier[MSIX_SHARED_IRQ_IDX]); + int i; - return nvme_poll_queues(s); + for (i = 0; i < s->queue_count; i++) { + NVMeQueuePair *q = s->queues[i]; + const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; + NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; + + /* + * q->lock isn't needed because nvme_process_completion() only runs in + * the event loop thread and cannot race with itself. + */ + if ((le16_to_cpu(cqe->status) & 0x1) != q->cq_phase) { + return true; + } + } + return false; +} + +static void nvme_poll_ready(EventNotifier *e) +{ + BDRVNVMeState *s = container_of(e, BDRVNVMeState, + irq_notifier[MSIX_SHARED_IRQ_IDX]); + + nvme_poll_queues(s); } static int nvme_init(BlockDriverState *bs, const char *device, int namespace, @@ -839,7 +852,8 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, } aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, nvme_handle_event, nvme_poll_cb); + false, nvme_handle_event, nvme_poll_cb, + nvme_poll_ready); if (!nvme_identify(bs, namespace, errp)) { ret = -EIO; @@ -924,7 +938,7 @@ static void nvme_close(BlockDriverState *bs) g_free(s->queues); aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, NULL, NULL); + false, NULL, NULL, NULL); event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map, 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE); @@ -1520,7 +1534,7 @@ static void nvme_detach_aio_context(BlockDriverState *bs) aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, NULL, NULL); + false, NULL, NULL, NULL); } static void nvme_attach_aio_context(BlockDriverState *bs, @@ -1530,7 +1544,8 @@ static void nvme_attach_aio_context(BlockDriverState *bs, s->aio_context = new_context; aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, nvme_handle_event, nvme_poll_cb); + false, nvme_handle_event, nvme_poll_cb, + nvme_poll_ready); for (unsigned i = 0; i < s->queue_count; i++) { NVMeQueuePair *q = s->queues[i]; diff --git a/block/ssh.c b/block/ssh.c index e0fbb4934b..3b5bf34031 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -990,7 +990,7 @@ static void restart_coroutine(void *opaque) AioContext *ctx = bdrv_get_aio_context(bs); trace_ssh_restart_coroutine(restart->co); - aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL); + aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL, NULL); aio_co_wake(restart->co); } @@ -1020,7 +1020,7 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs) trace_ssh_co_yield(s->sock, rd_handler, wr_handler); aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, - false, rd_handler, wr_handler, NULL, &restart); + false, rd_handler, wr_handler, NULL, NULL, &restart); qemu_coroutine_yield(); trace_ssh_co_yield_back(s->sock); } diff --git a/block/win32-aio.c b/block/win32-aio.c index b7221a272f..c57e10c997 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -172,7 +172,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, AioContext *old_context) { - aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL); + aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL, NULL); aio->aio_ctx = NULL; } @@ -181,7 +181,7 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, { aio->aio_ctx = new_context; aio_set_event_notifier(new_context, &aio->e, false, - win32_aio_completion_cb, NULL); + win32_aio_completion_cb, NULL, NULL); } QEMUWin32AIOState *win32_aio_init(void) diff --git a/bsd-user/special-errno.h b/bsd-user/include/special-errno.h index 03599d9b5a..03599d9b5a 100644 --- a/bsd-user/special-errno.h +++ b/bsd-user/include/special-errno.h diff --git a/bsd-user/meson.build b/bsd-user/meson.build index 9fcb80c3fa..8380fa44c2 100644 --- a/bsd-user/meson.build +++ b/bsd-user/meson.build @@ -4,7 +4,7 @@ endif bsd_user_ss = ss.source_set() -common_user_inc += include_directories('.') +common_user_inc += include_directories('include') bsd_user_ss.add(files( 'bsdload.c', diff --git a/common-user/meson.build b/common-user/meson.build index 5cb42bc664..26212dda5c 100644 --- a/common-user/meson.build +++ b/common-user/meson.build @@ -1,6 +1,6 @@ common_user_inc += include_directories('host/' / host_arch) -common_user_ss.add(files( +user_ss.add(files( 'safe-syscall.S', 'safe-syscall-error.c', )) @@ -78,7 +78,6 @@ TMPC="${TMPDIR1}/${TMPB}.c" TMPO="${TMPDIR1}/${TMPB}.o" TMPCXX="${TMPDIR1}/${TMPB}.cxx" TMPE="${TMPDIR1}/${TMPB}.exe" -TMPTXT="${TMPDIR1}/${TMPB}.txt" rm -f config.log @@ -291,7 +290,6 @@ EXTRA_CXXFLAGS="" EXTRA_LDFLAGS="" xen_ctrl_version="$default_feature" -xfs="$default_feature" membarrier="$default_feature" vhost_kernel="$default_feature" vhost_net="$default_feature" @@ -309,21 +307,16 @@ debug="no" sanitizers="no" tsan="no" fortify_source="$default_feature" -strip_opt="yes" -mingw32="no" gcov="no" EXESUF="" modules="no" module_upgrades="no" prefix="/usr/local" qemu_suffix="qemu" -bsd="no" -linux="no" -solaris="no" profiler="no" softmmu="yes" -linux_user="no" -bsd_user="no" +linux_user="" +bsd_user="" pkgversion="" pie="" qom_cast_debug="yes" @@ -333,8 +326,6 @@ opengl="$default_feature" cpuid_h="no" avx2_opt="$default_feature" guest_agent="$default_feature" -guest_agent_with_vss="no" -guest_agent_ntddscsi="no" vss_win32_sdk="$default_feature" win_sdk="no" want_tools="$default_feature" @@ -529,6 +520,10 @@ fi # OS specific +mingw32="no" +bsd="no" +linux="no" +solaris="no" case $targetos in windows) mingw32="yes" @@ -540,7 +535,6 @@ gnu/kfreebsd) ;; freebsd) bsd="yes" - bsd_user="yes" make="${MAKE-gmake}" # needed for kinfo_getvmmap(3) in libutil.h ;; @@ -585,7 +579,6 @@ haiku) ;; linux) linux="yes" - linux_user="yes" vhost_user=${default_feature:-yes} ;; esac @@ -896,7 +889,6 @@ for opt do debug_tcg="yes" debug_mutex="yes" debug="yes" - strip_opt="no" fortify_source="no" ;; --enable-sanitizers) sanitizers="yes" @@ -907,8 +899,6 @@ for opt do ;; --disable-tsan) tsan="no" ;; - --disable-strip) strip_opt="no" - ;; --disable-slirp) slirp="disabled" ;; --enable-slirp) slirp="enabled" @@ -1021,10 +1011,6 @@ for opt do ;; --enable-opengl) opengl="yes" ;; - --disable-xfsctl) xfs="no" - ;; - --enable-xfsctl) xfs="yes" - ;; --disable-zlib-test) ;; --enable-guest-agent) guest_agent="yes" @@ -1265,18 +1251,26 @@ if eval test -z "\${cross_cc_$cpu}"; then cross_cc_vars="$cross_cc_vars cross_cc_${cpu}" fi -# For user-mode emulation the host arch has to be one we explicitly -# support, even if we're using TCI. -if [ "$ARCH" = "unknown" ]; then - bsd_user="no" - linux_user="no" -fi - default_target_list="" deprecated_targets_list=ppc64abi32-linux-user deprecated_features="" mak_wilds="" +if [ "$linux_user" != no ]; then + if [ "$targetos" = linux ] && [ -d $source_path/linux-user/include/host/$cpu ]; then + linux_user=yes + elif [ "$linux_user" = yes ]; then + error_exit "linux-user not supported on this architecture" + fi +fi +if [ "$bsd_user" != no ]; then + if [ "$bsd_user" = "" ]; then + test $targetos = freebsd && bsd_user=yes + fi + if [ "$bsd_user" = yes ] && ! [ -d $source_path/bsd-user/$targetos ]; then + error_exit "bsd-user not supported on this host OS" + fi +fi if [ "$softmmu" = "yes" ]; then mak_wilds="${mak_wilds} $source_path/configs/targets/*-softmmu.mak" fi @@ -1367,7 +1361,6 @@ Advanced options (experts only): --enable-debug enable common debug build options --enable-sanitizers enable default sanitizers --enable-tsan enable thread sanitizer - --disable-strip disable stripping binaries --disable-werror disable compilation abort on warning --disable-stack-protector disable compiler-provided stack protection --audio-drv-list=LIST set audio drivers to try if -audiodev is not used @@ -1431,7 +1424,6 @@ cat << EOF avx512f AVX512F optimization support replication replication support opengl opengl support - xfsctl xfsctl support qom-cast-debug cast debugging support tools build qemu-io, qemu-nbd and qemu-img tools bochs bochs image format support @@ -1702,6 +1694,7 @@ if test "$static" = "yes" ; then plugins="no" fi fi +test "$plugins" = "" && plugins=yes cat > $TMPC << EOF @@ -2324,91 +2317,6 @@ EOF fi ########################################## -# xfsctl() probe, used for file-posix.c -if test "$xfs" != "no" ; then - cat > $TMPC << EOF -#include <stddef.h> /* NULL */ -#include <xfs/xfs.h> -int main(void) -{ - xfsctl(NULL, 0, 0, NULL); - return 0; -} -EOF - if compile_prog "" "" ; then - xfs="yes" - else - if test "$xfs" = "yes" ; then - feature_not_found "xfs" "Install xfsprogs/xfslibs devel" - fi - xfs=no - fi -fi - -########################################## -# plugin linker support probe - -if test "$plugins" != "no"; then - - ######################################### - # See if --dynamic-list is supported by the linker - - ld_dynamic_list="no" - cat > $TMPTXT <<EOF -{ - foo; -}; -EOF - - cat > $TMPC <<EOF -#include <stdio.h> -void foo(void); - -void foo(void) -{ - printf("foo\n"); -} - -int main(void) -{ - foo(); - return 0; -} -EOF - - if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then - ld_dynamic_list="yes" - fi - - ######################################### - # See if -exported_symbols_list is supported by the linker - - ld_exported_symbols_list="no" - cat > $TMPTXT <<EOF - _foo -EOF - - if compile_prog "" "-Wl,-exported_symbols_list,$TMPTXT" ; then - ld_exported_symbols_list="yes" - fi - - if test "$ld_dynamic_list" = "no" && - test "$ld_exported_symbols_list" = "no" ; then - if test "$plugins" = "yes"; then - error_exit \ - "Plugin support requires dynamic linking and specifying a set of symbols " \ - "that are exported to plugins. Unfortunately your linker doesn't " \ - "support the flag (--dynamic-list or -exported_symbols_list) used " \ - "for this purpose." - else - plugins="no" - fi - else - plugins="yes" - fi -fi - -########################################## # glib support probe glib_req_ver=2.56 @@ -2639,6 +2547,7 @@ fi ########################################## # check if we have VSS SDK headers for win +guest_agent_with_vss="no" if test "$mingw32" = "yes" && test "$guest_agent" != "no" && \ test "$vss_win32_sdk" != "no" ; then case "$vss_win32_sdk" in @@ -2669,7 +2578,6 @@ EOF echo "ERROR: The headers are extracted in the directory \`inc'." feature_not_found "VSS support" fi - guest_agent_with_vss="no" fi fi @@ -2696,6 +2604,7 @@ fi ########################################## # check if mingw environment provides a recent ntddscsi.h +guest_agent_ntddscsi="no" if test "$mingw32" = "yes" && test "$guest_agent" != "no"; then cat > $TMPC << EOF #include <windows.h> @@ -3400,9 +3309,6 @@ echo "GIT_SUBMODULES_ACTION=$git_submodules_action" >> $config_host_mak if test "$debug_tcg" = "yes" ; then echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak fi -if test "$strip_opt" = "yes" ; then - echo "STRIP=${strip}" >> $config_host_mak -fi if test "$mingw32" = "yes" ; then echo "CONFIG_WIN32=y" >> $config_host_mak if test "$guest_agent_with_vss" = "yes" ; then @@ -3456,9 +3362,6 @@ echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak if test "$block_drv_whitelist_tools" = "yes" ; then echo "CONFIG_BDRV_WHITELIST_TOOLS=y" >> $config_host_mak fi -if test "$xfs" = "yes" ; then - echo "CONFIG_XFS=y" >> $config_host_mak -fi qemu_version=$(head $source_path/VERSION) echo "PKGVERSION=$pkgversion" >>$config_host_mak echo "SRC_PATH=$source_path" >> $config_host_mak @@ -3645,22 +3548,6 @@ fi if test "$plugins" = "yes" ; then echo "CONFIG_PLUGIN=y" >> $config_host_mak - # Copy the export object list to the build dir - if test "$ld_dynamic_list" = "yes" ; then - echo "CONFIG_HAS_LD_DYNAMIC_LIST=yes" >> $config_host_mak - ld_symbols=qemu-plugins-ld.symbols - cp "$source_path/plugins/qemu-plugins.symbols" $ld_symbols - elif test "$ld_exported_symbols_list" = "yes" ; then - echo "CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST=yes" >> $config_host_mak - ld64_symbols=qemu-plugins-ld64.symbols - echo "# Automatically generated by configure - do not modify" > $ld64_symbols - grep 'qemu_' "$source_path/plugins/qemu-plugins.symbols" | sed 's/;//g' | \ - sed -E 's/^[[:space:]]*(.*)/_\1/' >> $ld64_symbols - else - error_exit \ - "If \$plugins=yes, either \$ld_dynamic_list or " \ - "\$ld_exported_symbols_list should have been set to 'yes'." - fi fi if test -n "$gdb_bin"; then @@ -3699,6 +3586,7 @@ echo "GLIB_LIBS=$glib_libs" >> $config_host_mak echo "GLIB_VERSION=$(pkg-config --modversion glib-2.0)" >> $config_host_mak echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak +echo "STRIP=$strip" >> $config_host_mak echo "EXESUF=$EXESUF" >> $config_host_mak echo "LIBS_QGA=$libs_qga" >> $config_host_mak @@ -3776,9 +3664,6 @@ fi # so the build tree will be missing the link back to the new file, and # tests might fail. Prefer to keep the relevant files in their own # directory and symlink the directory instead. -# UNLINK is used to remove symlinks from older development versions -# that might get into the way when doing "git update" without doing -# a "make distclean" in between. LINKS="Makefile" LINKS="$LINKS tests/tcg/Makefile.target" LINKS="$LINKS pc-bios/optionrom/Makefile" @@ -3790,7 +3675,6 @@ LINKS="$LINKS tests/avocado tests/data" LINKS="$LINKS tests/qemu-iotests/check" LINKS="$LINKS python" LINKS="$LINKS contrib/plugins/Makefile " -UNLINK="pc-bios/keymaps" for bios_file in \ $source_path/pc-bios/*.bin \ $source_path/pc-bios/*.elf \ @@ -3812,11 +3696,6 @@ for f in $LINKS ; do symlink "$source_path/$f" "$f" fi done -for f in $UNLINK ; do - if [ -L "$f" ]; then - rm -f "$f" - fi -done (for i in $cross_cc_vars; do export $i @@ -3825,7 +3704,8 @@ export target_list source_path use_containers cpu $source_path/tests/tcg/configure.sh) # temporary config to build submodules -for rom in seabios; do +if test -f $source_path/roms/seabios/Makefile; then + for rom in seabios; do config_mak=roms/$rom/config.mak echo "# Automatically generated by configure - do not modify" > $config_mak echo "SRC_PATH=$source_path/roms/$rom" >> $config_mak @@ -3838,7 +3718,8 @@ for rom in seabios; do echo "IASL=$iasl" >> $config_mak echo "LD=$ld" >> $config_mak echo "RANLIB=$ranlib" >> $config_mak -done + done +fi config_mak=pc-bios/optionrom/config.mak echo "# Automatically generated by configure - do not modify" > $config_mak @@ -3925,7 +3806,6 @@ if test "$skip_meson" = no; then -Doptimization=$(if test "$debug" = yes; then echo 0; else echo 2; fi) \ -Ddebug=$(if test "$debug_info" = yes; then echo true; else echo false; fi) \ -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ - -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ -Db_lto=$lto -Dcfi=$cfi -Dtcg=$tcg -Dxen=$xen \ @@ -3956,17 +3836,6 @@ if test -n "${deprecated_features}"; then echo " features: ${deprecated_features}" fi -# Create list of config switches that should be poisoned in common code... -# but filter out CONFIG_TCG and CONFIG_USER_ONLY which are special. -target_configs_h=$(ls *-config-devices.h *-config-target.h 2>/dev/null) -if test -n "$target_configs_h" ; then - sed -n -e '/CONFIG_TCG/d' -e '/CONFIG_USER_ONLY/d' \ - -e '/^#define / { s///; s/ .*//; s/^/#pragma GCC poison /p; }' \ - $target_configs_h | sort -u > config-poison.h -else - :> config-poison.h -fi - # Save the configure command line for later reuse. cat <<EOD >config.status #!/bin/sh diff --git a/contrib/elf2dmp/meson.build b/contrib/elf2dmp/meson.build index 4d86cb390a..6707d43c4f 100644 --- a/contrib/elf2dmp/meson.build +++ b/contrib/elf2dmp/meson.build @@ -1,5 +1,5 @@ if curl.found() - executable('elf2dmp', files('main.c', 'addrspace.c', 'download.c', 'pdb.c', 'qemu_elf.c'), + executable('elf2dmp', files('main.c', 'addrspace.c', 'download.c', 'pdb.c', 'qemu_elf.c'), genh, dependencies: [glib, curl], install: true) endif diff --git a/contrib/ivshmem-client/meson.build b/contrib/ivshmem-client/meson.build index 1b171efb4f..ce8dcca84d 100644 --- a/contrib/ivshmem-client/meson.build +++ b/contrib/ivshmem-client/meson.build @@ -1,4 +1,4 @@ -executable('ivshmem-client', files('ivshmem-client.c', 'main.c'), +executable('ivshmem-client', files('ivshmem-client.c', 'main.c'), genh, dependencies: glib, build_by_default: targetos == 'linux', install: false) diff --git a/contrib/ivshmem-server/meson.build b/contrib/ivshmem-server/meson.build index 3a53942201..c6c3c82e89 100644 --- a/contrib/ivshmem-server/meson.build +++ b/contrib/ivshmem-server/meson.build @@ -1,4 +1,4 @@ -executable('ivshmem-server', files('ivshmem-server.c', 'main.c'), +executable('ivshmem-server', files('ivshmem-server.c', 'main.c'), genh, dependencies: [qemuutil, rt], build_by_default: targetos == 'linux', install: false) diff --git a/contrib/rdmacm-mux/meson.build b/contrib/rdmacm-mux/meson.build index 6cc5016747..7674f54cc5 100644 --- a/contrib/rdmacm-mux/meson.build +++ b/contrib/rdmacm-mux/meson.build @@ -2,7 +2,7 @@ if 'CONFIG_PVRDMA' in config_host # if not found, CONFIG_PVRDMA should not be set # FIXME: broken on big endian architectures libumad = cc.find_library('ibumad', required: true) - executable('rdmacm-mux', files('main.c'), + executable('rdmacm-mux', files('main.c'), genh, dependencies: [glib, libumad], build_by_default: false, install: false) diff --git a/docs/papr-pef.txt b/docs/papr-pef.txt deleted file mode 100644 index 72550e9bf8..0000000000 --- a/docs/papr-pef.txt +++ /dev/null @@ -1,30 +0,0 @@ -POWER (PAPR) Protected Execution Facility (PEF) -=============================================== - -Protected Execution Facility (PEF), also known as Secure Guest support -is a feature found on IBM POWER9 and POWER10 processors. - -If a suitable firmware including an Ultravisor is installed, it adds -an extra memory protection mode to the CPU. The ultravisor manages a -pool of secure memory which cannot be accessed by the hypervisor. - -When this feature is enabled in QEMU, a guest can use ultracalls to -enter "secure mode". This transfers most of its memory to secure -memory, where it cannot be eavesdropped by a compromised hypervisor. - -Launching ---------- - -To launch a guest which will be permitted to enter PEF secure mode: - -# ${QEMU} \ - -object pef-guest,id=pef0 \ - -machine confidential-guest-support=pef0 \ - ... - -Live Migration ----------------- - -Live migration is not yet implemented for PEF guests. For -consistency, we currently prevent migration if the PEF feature is -enabled, whether or not the guest has actually entered secure mode. diff --git a/docs/specs/ppc-spapr-hcalls.rst b/docs/specs/ppc-spapr-hcalls.rst index 28daf9734a..6cdcef2026 100644 --- a/docs/specs/ppc-spapr-hcalls.rst +++ b/docs/specs/ppc-spapr-hcalls.rst @@ -1,13 +1,12 @@ +====================== sPAPR hypervisor calls ----------------------- +====================== When used with the ``pseries`` machine type, ``qemu-system-ppc64`` implements -a set of hypervisor calls (a.k.a. hcalls) defined in the `Linux on Power -Architecture Reference document (LoPAR) -<https://cdn.openpowerfoundation.org/wp-content/uploads/2020/07/LoPAR-20200812.pdf>`_. -This document is a subset of the Power Architecture Platform Reference (PAPR+) -specification (IBM internal only), which is what PowerVM, the IBM proprietary -hypervisor, adheres to. +a set of hypervisor calls (a.k.a. hcalls) defined in the Linux on Power +Architecture Reference ([LoPAR]_) document. This document is a subset of the +Power Architecture Platform Reference (PAPR+) specification (IBM internal only), +which is what PowerVM, the IBM proprietary hypervisor, adheres to. The subset in LoPAR is selected based on the requirements of Linux as a guest. @@ -18,8 +17,8 @@ running in the guest and QEMU. All those hypercalls start at hcall number 0xf000 which correspond to an implementation specific range in PAPR. -H_RTAS (0xf000) -^^^^^^^^^^^^^^^ +``H_RTAS (0xf000)`` +=================== RTAS stands for Run-Time Abstraction Sercies and is a set of runtime services generally provided by the firmware inside the guest to the operating system. It @@ -44,8 +43,8 @@ Returns: ``H_PARAMETER``: Unknown token. -H_LOGICAL_MEMOP (0xf001) -^^^^^^^^^^^^^^^^^^^^^^^^ +``H_LOGICAL_MEMOP (0xf001)`` +============================ When the guest runs in "real mode" (in powerpc terminology this means with MMU disabled, i.e. guest effective address equals to guest physical address), it diff --git a/docs/system/ppc/pseries.rst b/docs/system/ppc/pseries.rst index 72e315eff6..1689324815 100644 --- a/docs/system/ppc/pseries.rst +++ b/docs/system/ppc/pseries.rst @@ -1,19 +1,18 @@ +=================================== pSeries family boards (``pseries``) =================================== -The Power machine para-virtualized environment described by the `Linux on Power -Architecture Reference document (LoPAR) -<https://openpowerfoundation.org/wp-content/uploads/2020/07/LoPAR-20200812.pdf>`_ -is called pSeries. This environment is also known as sPAPR, System p guests, or -simply Power Linux guests (although it is capable of running other operating -systems, such as AIX). +The Power machine para-virtualized environment described by the Linux on Power +Architecture Reference ([LoPAR]_) document is called pSeries. This environment +is also known as sPAPR, System p guests, or simply Power Linux guests (although +it is capable of running other operating systems, such as AIX). Even though pSeries is designed to behave as a guest environment, it is also capable of acting as a hypervisor OS, providing, on that role, nested virtualization capabilities. Supported devices ------------------ +================= * Multi processor support for many Power processors generations: POWER7, POWER7+, POWER8, POWER8NVL, POWER9, and Power10. Support for POWER5+ exists, @@ -26,12 +25,12 @@ Supported devices * PCIe device pass through. Missing devices ---------------- +=============== * SPICE support. Firmware --------- +======== `SLOF <https://github.com/aik/SLOF>`_ (Slimline Open Firmware) is an implementation of the `IEEE 1275-1994, Standard for Boot (Initialization @@ -42,14 +41,14 @@ QEMU includes a prebuilt image of SLOF which is updated when a more recent version is required. Build directions ----------------- +================ .. code-block:: bash ./configure --target-list=ppc64-softmmu && make Running instructions --------------------- +==================== Someone can select the pSeries machine type by running QEMU with the following options: @@ -59,7 +58,7 @@ options: qemu-system-ppc64 -M pseries <other QEMU arguments> sPAPR devices -------------- +============= The sPAPR specification defines a set of para-virtualized devices, which are also supported by the pSeries machine in QEMU and can be instantiated with the @@ -102,11 +101,9 @@ device, or specify one with an ID NVRAM device with ``-global spapr-nvram.drive=pfid``. sPAPR specification -^^^^^^^^^^^^^^^^^^^ +------------------- -The main source of documentation on the sPAPR standard is the `Linux on Power -Architecture Reference document (LoPAR) -<https://openpowerfoundation.org/wp-content/uploads/2020/07/LoPAR-20200812.pdf>`_. +The main source of documentation on the sPAPR standard is the [LoPAR]_ document. However, documentation specific to QEMU's implementation of the specification can also be found in QEMU documentation: @@ -124,7 +121,7 @@ Other documentation available in QEMU docs directory: (``/docs/specs/ppc-spapr-uv-hcalls.txt``). Switching between the KVM-PR and KVM-HV kernel module ------------------------------------------------------ +===================================================== Currently, there are two implementations of KVM on Power, ``kvm_hv.ko`` and ``kvm_pr.ko``. @@ -139,7 +136,7 @@ possible to switch between the two modes with the ``kvm-type`` parameter: instead. KVM-PR -^^^^^^ +------ KVM-PR uses the so-called **PR**\ oblem state of the PPC CPUs to run the guests, i.e. the virtual machine is run in user mode and all privileged instructions @@ -166,7 +163,7 @@ In order to run KVM-PR guests with POWER9 processors, someone will need to start QEMU with ``kernel_irqchip=off`` command line option. KVM-HV -^^^^^^ +------ KVM-HV uses the hypervisor mode of more recent Power processors, that allow access to the bare metal hardware directly. Although POWER7 had this capability, @@ -188,7 +185,7 @@ CPUs generations, e.g. you can run a POWER7 guest on a POWER8 host by using ``-cpu POWER8,compat=power7`` as parameter to QEMU. Modules support ---------------- +=============== As noticed in the sections above, each module can run in a different environment. The following table shows with which environment each module can @@ -230,9 +227,45 @@ nested. Combinations not shown in the table are not available. .. [3] Introduced on Power10 machines. + +POWER (PAPR) Protected Execution Facility (PEF) +----------------------------------------------- + +Protected Execution Facility (PEF), also known as Secure Guest support +is a feature found on IBM POWER9 and POWER10 processors. + +If a suitable firmware including an Ultravisor is installed, it adds +an extra memory protection mode to the CPU. The ultravisor manages a +pool of secure memory which cannot be accessed by the hypervisor. + +When this feature is enabled in QEMU, a guest can use ultracalls to +enter "secure mode". This transfers most of its memory to secure +memory, where it cannot be eavesdropped by a compromised hypervisor. + +Launching +^^^^^^^^^ + +To launch a guest which will be permitted to enter PEF secure mode:: + + $ qemu-system-ppc64 \ + -object pef-guest,id=pef0 \ + -machine confidential-guest-support=pef0 \ + ... + +Live Migration +^^^^^^^^^^^^^^ + +Live migration is not yet implemented for PEF guests. For +consistency, QEMU currently prevents migration if the PEF feature is +enabled, whether or not the guest has actually entered secure mode. + + Maintainer contact information ------------------------------- +============================== Cédric Le Goater <clg@kaod.org> Daniel Henrique Barboza <danielhb413@gmail.com> + +.. [LoPAR] `Linux on Power Architecture Reference document (LoPAR) revision + 2.9 <https://openpowerfoundation.org/wp-content/uploads/2020/07/LoPAR-20200812.pdf>`_. diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index ee5a5352dc..49276e46f2 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -154,17 +154,6 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) g_free(s); } -static bool virtio_blk_data_plane_handle_output(VirtIODevice *vdev, - VirtQueue *vq) -{ - VirtIOBlock *s = (VirtIOBlock *)vdev; - - assert(s->dataplane); - assert(s->dataplane_started); - - return virtio_blk_handle_vq(s, vq); -} - /* Context: QEMU global mutex held */ int virtio_blk_data_plane_start(VirtIODevice *vdev) { @@ -258,8 +247,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) for (i = 0; i < nvqs; i++) { VirtQueue *vq = virtio_get_queue(s->vdev, i); - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, - virtio_blk_data_plane_handle_output); + virtio_queue_aio_attach_host_notifier(vq, s->ctx); } aio_context_release(s->ctx); return 0; @@ -302,7 +290,7 @@ static void virtio_blk_data_plane_stop_bh(void *opaque) for (i = 0; i < s->conf->num_queues; i++) { VirtQueue *vq = virtio_get_queue(s->vdev, i); - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); + virtio_queue_aio_detach_host_notifier(vq, s->ctx); } } diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index f139cd7cc9..82676cdd01 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -767,12 +767,11 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) return 0; } -bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) +void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) { VirtIOBlockReq *req; MultiReqBuffer mrb = {}; bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; aio_context_acquire(blk_get_aio_context(s->blk)); blk_io_plug(s->blk); @@ -783,7 +782,6 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) } while ((req = virtio_blk_get_request(s, vq))) { - progress = true; if (virtio_blk_handle_request(req, &mrb)) { virtqueue_detach_element(req->vq, &req->elem, 0); virtio_blk_free_request(req); @@ -802,19 +800,13 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) blk_io_unplug(s->blk); aio_context_release(blk_get_aio_context(s->blk)); - return progress; -} - -static void virtio_blk_handle_output_do(VirtIOBlock *s, VirtQueue *vq) -{ - virtio_blk_handle_vq(s, vq); } static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBlock *s = (VirtIOBlock *)vdev; - if (s->dataplane) { + if (s->dataplane && !s->dataplane_started) { /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start * dataplane here instead of waiting for .set_status(). */ @@ -823,7 +815,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) return; } } - virtio_blk_handle_output_do(s, vq); + virtio_blk_handle_vq(s, vq); } void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index c78084cce7..7fb35dc031 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -19,6 +19,7 @@ #include "hw/irq.h" #include "hw/qdev-properties.h" #include "qom/object.h" +#include "sysemu/sysemu.h" #define phb3_error(phb, fmt, ...) \ qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n", \ @@ -981,10 +982,6 @@ static void pnv_phb3_instance_init(Object *obj) /* Power Bus Common Queue */ object_initialize_child(obj, "pbcq", &phb->pbcq, TYPE_PNV_PBCQ); - /* Root Port */ - object_initialize_child(obj, "root", &phb->root, TYPE_PNV_PHB3_ROOT_PORT); - qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0)); - qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false); } static void pnv_phb3_realize(DeviceState *dev, Error **errp) @@ -994,6 +991,30 @@ static void pnv_phb3_realize(DeviceState *dev, Error **errp) PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); int i; + /* User created devices */ + if (!phb->chip) { + Error *local_err = NULL; + BusState *s; + + phb->chip = pnv_get_chip(pnv, phb->chip_id); + if (!phb->chip) { + error_setg(errp, "invalid chip id: %d", phb->chip_id); + return; + } + + /* + * Reparent user created devices to the chip to build + * correctly the device tree. + */ + pnv_chip_parent_fixup(phb->chip, OBJECT(phb), phb->phb_id); + + s = qdev_get_parent_bus(DEVICE(phb->chip)); + if (!qdev_set_parent_bus(DEVICE(phb), s, &local_err)) { + error_propagate(errp, local_err); + return; + } + } + if (phb->phb_id >= PNV_CHIP_GET_CLASS(phb->chip)->num_phbs) { error_setg(errp, "invalid PHB index: %d", phb->phb_id); return; @@ -1053,10 +1074,10 @@ static void pnv_phb3_realize(DeviceState *dev, Error **errp) pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb); - /* Add a single Root port */ - qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id); - qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id); - qdev_realize(DEVICE(&phb->root), BUS(pci->bus), &error_fatal); + if (defaults_enabled()) { + pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), + TYPE_PNV_PHB3_ROOT_PORT); + } } void pnv_phb3_update_regions(PnvPHB3 *phb) @@ -1107,7 +1128,7 @@ static void pnv_phb3_class_init(ObjectClass *klass, void *data) dc->realize = pnv_phb3_realize; device_class_set_props(dc, pnv_phb3_properties); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - dc->user_creatable = false; + dc->user_creatable = true; } static const TypeInfo pnv_phb3_type_info = { @@ -1142,8 +1163,24 @@ static const TypeInfo pnv_phb3_root_bus_info = { static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp) { PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + PCIDevice *pci = PCI_DEVICE(dev); + PCIBus *bus = pci_get_bus(pci); + PnvPHB3 *phb = NULL; Error *local_err = NULL; + phb = (PnvPHB3 *) object_dynamic_cast(OBJECT(bus->qbus.parent), + TYPE_PNV_PHB3); + + if (!phb) { + error_setg(errp, +"pnv_phb3_root_port devices must be connected to pnv-phb3 buses"); + return; + } + + /* Set unique chassis/slot values for the root port */ + qdev_prop_set_uint8(&pci->qdev, "chassis", phb->chip_id); + qdev_prop_set_uint16(&pci->qdev, "slot", phb->phb_id); + rpc->parent_realize(dev, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -1161,7 +1198,7 @@ static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data) device_class_set_parent_realize(dc, pnv_phb3_root_port_realize, &rpc->parent_realize); - dc->user_creatable = false; + dc->user_creatable = true; k->vendor_id = PCI_VENDOR_ID_IBM; k->device_id = 0x03dc; diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 5ba26e250a..a7b638831e 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -22,12 +22,17 @@ #include "hw/irq.h" #include "hw/qdev-properties.h" #include "qom/object.h" +#include "sysemu/sysemu.h" #include "trace.h" #define phb_error(phb, fmt, ...) \ qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n", \ (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__) +#define phb_pec_error(pec, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n", \ + (pec)->chip_id, (pec)->index, ## __VA_ARGS__) + /* * QEMU version of the GETFIELD/SETFIELD macros * @@ -151,7 +156,10 @@ static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off, } pdev = pci_find_device(pci->bus, 0, 0); - assert(pdev); + if (!pdev) { + phb_error(phb, "rc_config_write device not found\n"); + return; + } pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE, bswap32(val), 4); @@ -170,7 +178,10 @@ static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off, } pdev = pci_find_device(pci->bus, 0, 0); - assert(pdev); + if (!pdev) { + phb_error(phb, "rc_config_read device not found\n"); + return ~0ull; + } val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4); return bswap32(val); @@ -847,6 +858,284 @@ const MemoryRegionOps pnv_phb4_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; +static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return stack->nest_regs[reg]; +} + +static void pnv_phb4_update_regions(PnvPhb4PecStack *stack) +{ + PnvPHB4 *phb = stack->phb; + + /* Unmap first always */ + if (memory_region_is_mapped(&phb->mr_regs)) { + memory_region_del_subregion(&stack->phbbar, &phb->mr_regs); + } + if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) { + memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio); + } + + /* Map registers if enabled */ + if (memory_region_is_mapped(&stack->phbbar)) { + memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs); + } + + /* Map ESB if enabled */ + if (memory_region_is_mapped(&stack->intbar)) { + memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio); + } + + /* Check/update m32 */ + pnv_phb4_check_all_mbt(phb); +} + +static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack) +{ + PnvPhb4PecState *pec = stack->pec; + MemoryRegion *sysmem = get_system_memory(); + uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN]; + uint64_t bar, mask, size; + char name[64]; + + /* + * NOTE: This will really not work well if those are remapped + * after the PHB has created its sub regions. We could do better + * if we had a way to resize regions but we don't really care + * that much in practice as the stuff below really only happens + * once early during boot + */ + + /* Handle unmaps */ + if (memory_region_is_mapped(&stack->mmbar0) && + !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { + memory_region_del_subregion(sysmem, &stack->mmbar0); + } + if (memory_region_is_mapped(&stack->mmbar1) && + !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { + memory_region_del_subregion(sysmem, &stack->mmbar1); + } + if (memory_region_is_mapped(&stack->phbbar) && + !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) { + memory_region_del_subregion(sysmem, &stack->phbbar); + } + if (memory_region_is_mapped(&stack->intbar) && + !(bar_en & PEC_NEST_STK_BAR_EN_INT)) { + memory_region_del_subregion(sysmem, &stack->intbar); + } + + /* Update PHB */ + pnv_phb4_update_regions(stack); + + /* Handle maps */ + if (!memory_region_is_mapped(&stack->mmbar0) && + (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { + bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8; + mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK]; + size = ((~mask) >> 8) + 1; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->mmbar0, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->mmbar0); + stack->mmio0_base = bar; + stack->mmio0_size = size; + } + if (!memory_region_is_mapped(&stack->mmbar1) && + (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { + bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8; + mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK]; + size = ((~mask) >> 8) + 1; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->mmbar1, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->mmbar1); + stack->mmio1_base = bar; + stack->mmio1_size = size; + } + if (!memory_region_is_mapped(&stack->phbbar) && + (bar_en & PEC_NEST_STK_BAR_EN_PHB)) { + bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8; + size = PNV_PHB4_NUM_REGS << 3; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->phbbar, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->phbbar); + } + if (!memory_region_is_mapped(&stack->intbar) && + (bar_en & PEC_NEST_STK_BAR_EN_INT)) { + bar = stack->nest_regs[PEC_NEST_STK_INT_BAR] >> 8; + size = PNV_PHB4_MAX_INTs << 16; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-int", + stack->pec->chip_id, stack->pec->index, stack->stack_no); + memory_region_init(&stack->intbar, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->intbar); + } + + /* Update PHB */ + pnv_phb4_update_regions(stack); +} + +static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + PnvPhb4PecState *pec = stack->pec; + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_NEST_STK_PCI_NEST_FIR: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_CLR: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_SET: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSK: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSKC: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSKS: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_ACT0: + case PEC_NEST_STK_PCI_NEST_FIR_ACT1: + stack->nest_regs[reg] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_WOF: + stack->nest_regs[reg] = 0; + break; + case PEC_NEST_STK_ERR_REPORT_0: + case PEC_NEST_STK_ERR_REPORT_1: + case PEC_NEST_STK_PBCQ_GNRL_STATUS: + /* Flag error ? */ + break; + case PEC_NEST_STK_PBCQ_MODE: + stack->nest_regs[reg] = val & 0xff00000000000000ull; + break; + case PEC_NEST_STK_MMIO_BAR0: + case PEC_NEST_STK_MMIO_BAR0_MASK: + case PEC_NEST_STK_MMIO_BAR1: + case PEC_NEST_STK_MMIO_BAR1_MASK: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & + (PEC_NEST_STK_BAR_EN_MMIO0 | + PEC_NEST_STK_BAR_EN_MMIO1)) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xffffffffff000000ull; + break; + case PEC_NEST_STK_PHB_REGS_BAR: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xffffffffffc00000ull; + break; + case PEC_NEST_STK_INT_BAR: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xfffffff000000000ull; + break; + case PEC_NEST_STK_BAR_EN: + stack->nest_regs[reg] = val & 0xf000000000000000ull; + pnv_pec_stk_update_map(stack); + break; + case PEC_NEST_STK_DATA_FRZ_TYPE: + case PEC_NEST_STK_PBCQ_TUN_BAR: + /* Not used for now */ + stack->nest_regs[reg] = val; + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = { + .read = pnv_pec_stk_nest_xscom_read, + .write = pnv_pec_stk_nest_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return stack->pci_regs[reg]; +} + +static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_PCI_STK_PCI_FIR: + stack->pci_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_CLR: + stack->pci_regs[PEC_PCI_STK_PCI_FIR] &= val; + break; + case PEC_PCI_STK_PCI_FIR_SET: + stack->pci_regs[PEC_PCI_STK_PCI_FIR] |= val; + break; + case PEC_PCI_STK_PCI_FIR_MSK: + stack->pci_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_MSKC: + stack->pci_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val; + break; + case PEC_PCI_STK_PCI_FIR_MSKS: + stack->pci_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val; + break; + case PEC_PCI_STK_PCI_FIR_ACT0: + case PEC_PCI_STK_PCI_FIR_ACT1: + stack->pci_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_WOF: + stack->pci_regs[reg] = 0; + break; + case PEC_PCI_STK_ETU_RESET: + stack->pci_regs[reg] = val & 0x8000000000000000ull; + /* TODO: Implement reset */ + break; + case PEC_PCI_STK_PBAIB_ERR_REPORT: + break; + case PEC_PCI_STK_PBAIB_TX_CMD_CRED: + case PEC_PCI_STK_PBAIB_TX_DAT_CRED: + stack->pci_regs[reg] = val; + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = { + .read = pnv_pec_stk_pci_xscom_read, + .write = pnv_pec_stk_pci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num) { /* Check that out properly ... */ @@ -1063,6 +1352,23 @@ static const TypeInfo pnv_phb4_iommu_memory_region_info = { }; /* + * Return the index/phb-id of a PHB4 that belongs to a + * pec->stacks[stack_index] stack. + */ +int pnv_phb4_pec_get_phb_id(PnvPhb4PecState *pec, int stack_index) +{ + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); + int index = pec->index; + int offset = 0; + + while (index--) { + offset += pecc->num_stacks[index]; + } + + return offset + stack_index; +} + +/* * MSI/MSIX memory region implementation. * The handler handles both MSI and MSIX. */ @@ -1151,6 +1457,52 @@ static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &ds->dma_as; } +static void pnv_phb4_xscom_realize(PnvPHB4 *phb) +{ + PnvPhb4PecStack *stack = phb->stack; + PnvPhb4PecState *pec = stack->pec; + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); + uint32_t pec_nest_base; + uint32_t pec_pci_base; + char name[64]; + + assert(pec); + + /* Initialize the XSCOM regions for the stack registers */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-stack-%d", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->nest_regs_mr, OBJECT(stack), + &pnv_pec_stk_nest_xscom_ops, stack, name, + PHB4_PEC_NEST_STK_REGS_COUNT); + + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->pci_regs_mr, OBJECT(stack), + &pnv_pec_stk_pci_xscom_ops, stack, name, + PHB4_PEC_PCI_STK_REGS_COUNT); + + /* PHB pass-through */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d-phb", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->phb_regs_mr, OBJECT(phb), + &pnv_phb4_xscom_ops, phb, name, 0x40); + + pec_nest_base = pecc->xscom_nest_base(pec); + pec_pci_base = pecc->xscom_pci_base(pec); + + /* Populate the XSCOM address space. */ + pnv_xscom_add_subregion(pec->chip, + pec_nest_base + 0x40 * (stack->stack_no + 1), + &stack->nest_regs_mr); + pnv_xscom_add_subregion(pec->chip, + pec_pci_base + 0x40 * (stack->stack_no + 1), + &stack->pci_regs_mr); + pnv_xscom_add_subregion(pec->chip, + pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 + + 0x40 * stack->stack_no, + &stack->phb_regs_mr); +} + static void pnv_phb4_instance_init(Object *obj) { PnvPHB4 *phb = PNV_PHB4(obj); @@ -1159,12 +1511,35 @@ static void pnv_phb4_instance_init(Object *obj) /* XIVE interrupt source object */ object_initialize_child(obj, "source", &phb->xsrc, TYPE_XIVE_SOURCE); +} - /* Root Port */ - object_initialize_child(obj, "root", &phb->root, TYPE_PNV_PHB4_ROOT_PORT); +static PnvPhb4PecStack *pnv_phb4_get_stack(PnvChip *chip, PnvPHB4 *phb, + Error **errp) +{ + Pnv9Chip *chip9 = PNV9_CHIP(chip); + int chip_id = phb->chip_id; + int index = phb->phb_id; + int i, j; + + for (i = 0; i < chip->num_pecs; i++) { + /* + * For each PEC, check the amount of stacks it supports + * and see if the given phb4 index matches a stack. + */ + PnvPhb4PecState *pec = &chip9->pecs[i]; - qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0)); - qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false); + for (j = 0; j < pec->num_stacks; j++) { + if (index == pnv_phb4_pec_get_phb_id(pec, j)) { + return &pec->stacks[j]; + } + } + } + + error_setg(errp, + "pnv-phb4 chip-id %d index %d didn't match any existing PEC", + chip_id, index); + + return NULL; } static void pnv_phb4_realize(DeviceState *dev, Error **errp) @@ -1172,10 +1547,51 @@ static void pnv_phb4_realize(DeviceState *dev, Error **errp) PnvPHB4 *phb = PNV_PHB4(dev); PCIHostState *pci = PCI_HOST_BRIDGE(dev); XiveSource *xsrc = &phb->xsrc; + Error *local_err = NULL; int nr_irqs; char name[32]; - assert(phb->stack); + /* User created PHB */ + if (!phb->stack) { + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + PnvChip *chip = pnv_get_chip(pnv, phb->chip_id); + PnvPhb4PecClass *pecc; + BusState *s; + + if (!chip) { + error_setg(errp, "invalid chip id: %d", phb->chip_id); + return; + } + + phb->stack = pnv_phb4_get_stack(chip, phb, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* All other phb properties but 'version' are already set */ + pecc = PNV_PHB4_PEC_GET_CLASS(phb->stack->pec); + object_property_set_int(OBJECT(phb), "version", pecc->version, + &error_fatal); + + /* + * Assign stack->phb since pnv_phb4_update_regions() uses it + * to access the phb. + */ + phb->stack->phb = phb; + + /* + * Reparent user created devices to the chip to build + * correctly the device tree. + */ + pnv_chip_parent_fixup(chip, OBJECT(phb), phb->phb_id); + + s = qdev_get_parent_bus(DEVICE(chip)); + if (!qdev_set_parent_bus(DEVICE(phb), s, &local_err)) { + error_propagate(errp, local_err); + return; + } + } /* Set the "big_phb" flag */ phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3; @@ -1208,10 +1624,11 @@ static void pnv_phb4_realize(DeviceState *dev, Error **errp) pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb); pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE; - /* Add a single Root port */ - qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id); - qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id); - qdev_realize(DEVICE(&phb->root), BUS(pci->bus), &error_fatal); + /* Add a single Root port if running with defaults */ + if (defaults_enabled()) { + pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), + TYPE_PNV_PHB4_ROOT_PORT); + } /* Setup XIVE Source */ if (phb->big_phb) { @@ -1228,6 +1645,8 @@ static void pnv_phb4_realize(DeviceState *dev, Error **errp) pnv_phb4_update_xsrc(phb); phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs); + + pnv_phb4_xscom_realize(phb); } static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge, @@ -1277,7 +1696,7 @@ static void pnv_phb4_class_init(ObjectClass *klass, void *data) dc->realize = pnv_phb4_realize; device_class_set_props(dc, pnv_phb4_properties); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - dc->user_creatable = false; + dc->user_creatable = true; xfc->notify = pnv_phb4_xive_notify; } @@ -1338,8 +1757,23 @@ static void pnv_phb4_root_port_reset(DeviceState *dev) static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp) { PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + PCIDevice *pci = PCI_DEVICE(dev); + PCIBus *bus = pci_get_bus(pci); + PnvPHB4 *phb = NULL; Error *local_err = NULL; + phb = (PnvPHB4 *) object_dynamic_cast(OBJECT(bus->qbus.parent), + TYPE_PNV_PHB4); + + if (!phb) { + error_setg(errp, "%s must be connected to pnv-phb4 buses", dev->id); + return; + } + + /* Set unique chassis/slot values for the root port */ + qdev_prop_set_uint8(&pci->qdev, "chassis", phb->chip_id); + qdev_prop_set_uint16(&pci->qdev, "slot", phb->phb_id); + rpc->parent_realize(dev, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -1354,7 +1788,7 @@ static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data) PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass); dc->desc = "IBM PHB4 PCIE Root Port"; - dc->user_creatable = false; + dc->user_creatable = true; device_class_set_parent_realize(dc, pnv_phb4_root_port_realize, &rpc->parent_realize); @@ -1388,32 +1822,6 @@ static void pnv_phb4_register_types(void) type_init(pnv_phb4_register_types); -void pnv_phb4_update_regions(PnvPhb4PecStack *stack) -{ - PnvPHB4 *phb = &stack->phb; - - /* Unmap first always */ - if (memory_region_is_mapped(&phb->mr_regs)) { - memory_region_del_subregion(&stack->phbbar, &phb->mr_regs); - } - if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) { - memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio); - } - - /* Map registers if enabled */ - if (memory_region_is_mapped(&stack->phbbar)) { - memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs); - } - - /* Map ESB if enabled */ - if (memory_region_is_mapped(&stack->intbar)) { - memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio); - } - - /* Check/update m32 */ - pnv_phb4_check_all_mbt(phb); -} - void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon) { uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3]; diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c index f3e4fa0c82..7fe7f1f007 100644 --- a/hw/pci-host/pnv_phb4_pec.c +++ b/hw/pci-host/pnv_phb4_pec.c @@ -19,6 +19,7 @@ #include "hw/pci/pci_bus.h" #include "hw/ppc/pnv.h" #include "hw/qdev-properties.h" +#include "sysemu/sysemu.h" #include <libfdt.h> @@ -111,258 +112,6 @@ static const MemoryRegionOps pnv_pec_pci_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr, - unsigned size) -{ - PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); - uint32_t reg = addr >> 3; - - /* TODO: add list of allowed registers and error out if not */ - return stack->nest_regs[reg]; -} - -static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack) -{ - PnvPhb4PecState *pec = stack->pec; - MemoryRegion *sysmem = get_system_memory(); - uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN]; - uint64_t bar, mask, size; - char name[64]; - - /* - * NOTE: This will really not work well if those are remapped - * after the PHB has created its sub regions. We could do better - * if we had a way to resize regions but we don't really care - * that much in practice as the stuff below really only happens - * once early during boot - */ - - /* Handle unmaps */ - if (memory_region_is_mapped(&stack->mmbar0) && - !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { - memory_region_del_subregion(sysmem, &stack->mmbar0); - } - if (memory_region_is_mapped(&stack->mmbar1) && - !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { - memory_region_del_subregion(sysmem, &stack->mmbar1); - } - if (memory_region_is_mapped(&stack->phbbar) && - !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) { - memory_region_del_subregion(sysmem, &stack->phbbar); - } - if (memory_region_is_mapped(&stack->intbar) && - !(bar_en & PEC_NEST_STK_BAR_EN_INT)) { - memory_region_del_subregion(sysmem, &stack->intbar); - } - - /* Update PHB */ - pnv_phb4_update_regions(stack); - - /* Handle maps */ - if (!memory_region_is_mapped(&stack->mmbar0) && - (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { - bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8; - mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK]; - size = ((~mask) >> 8) + 1; - snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0", - pec->chip_id, pec->index, stack->stack_no); - memory_region_init(&stack->mmbar0, OBJECT(stack), name, size); - memory_region_add_subregion(sysmem, bar, &stack->mmbar0); - stack->mmio0_base = bar; - stack->mmio0_size = size; - } - if (!memory_region_is_mapped(&stack->mmbar1) && - (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { - bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8; - mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK]; - size = ((~mask) >> 8) + 1; - snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1", - pec->chip_id, pec->index, stack->stack_no); - memory_region_init(&stack->mmbar1, OBJECT(stack), name, size); - memory_region_add_subregion(sysmem, bar, &stack->mmbar1); - stack->mmio1_base = bar; - stack->mmio1_size = size; - } - if (!memory_region_is_mapped(&stack->phbbar) && - (bar_en & PEC_NEST_STK_BAR_EN_PHB)) { - bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8; - size = PNV_PHB4_NUM_REGS << 3; - snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb", - pec->chip_id, pec->index, stack->stack_no); - memory_region_init(&stack->phbbar, OBJECT(stack), name, size); - memory_region_add_subregion(sysmem, bar, &stack->phbbar); - } - if (!memory_region_is_mapped(&stack->intbar) && - (bar_en & PEC_NEST_STK_BAR_EN_INT)) { - bar = stack->nest_regs[PEC_NEST_STK_INT_BAR] >> 8; - size = PNV_PHB4_MAX_INTs << 16; - snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-int", - stack->pec->chip_id, stack->pec->index, stack->stack_no); - memory_region_init(&stack->intbar, OBJECT(stack), name, size); - memory_region_add_subregion(sysmem, bar, &stack->intbar); - } - - /* Update PHB */ - pnv_phb4_update_regions(stack); -} - -static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); - PnvPhb4PecState *pec = stack->pec; - uint32_t reg = addr >> 3; - - switch (reg) { - case PEC_NEST_STK_PCI_NEST_FIR: - stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val; - break; - case PEC_NEST_STK_PCI_NEST_FIR_CLR: - stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val; - break; - case PEC_NEST_STK_PCI_NEST_FIR_SET: - stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val; - break; - case PEC_NEST_STK_PCI_NEST_FIR_MSK: - stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val; - break; - case PEC_NEST_STK_PCI_NEST_FIR_MSKC: - stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val; - break; - case PEC_NEST_STK_PCI_NEST_FIR_MSKS: - stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val; - break; - case PEC_NEST_STK_PCI_NEST_FIR_ACT0: - case PEC_NEST_STK_PCI_NEST_FIR_ACT1: - stack->nest_regs[reg] = val; - break; - case PEC_NEST_STK_PCI_NEST_FIR_WOF: - stack->nest_regs[reg] = 0; - break; - case PEC_NEST_STK_ERR_REPORT_0: - case PEC_NEST_STK_ERR_REPORT_1: - case PEC_NEST_STK_PBCQ_GNRL_STATUS: - /* Flag error ? */ - break; - case PEC_NEST_STK_PBCQ_MODE: - stack->nest_regs[reg] = val & 0xff00000000000000ull; - break; - case PEC_NEST_STK_MMIO_BAR0: - case PEC_NEST_STK_MMIO_BAR0_MASK: - case PEC_NEST_STK_MMIO_BAR1: - case PEC_NEST_STK_MMIO_BAR1_MASK: - if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & - (PEC_NEST_STK_BAR_EN_MMIO0 | - PEC_NEST_STK_BAR_EN_MMIO1)) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); - } - stack->nest_regs[reg] = val & 0xffffffffff000000ull; - break; - case PEC_NEST_STK_PHB_REGS_BAR: - if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); - } - stack->nest_regs[reg] = val & 0xffffffffffc00000ull; - break; - case PEC_NEST_STK_INT_BAR: - if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); - } - stack->nest_regs[reg] = val & 0xfffffff000000000ull; - break; - case PEC_NEST_STK_BAR_EN: - stack->nest_regs[reg] = val & 0xf000000000000000ull; - pnv_pec_stk_update_map(stack); - break; - case PEC_NEST_STK_DATA_FRZ_TYPE: - case PEC_NEST_STK_PBCQ_TUN_BAR: - /* Not used for now */ - stack->nest_regs[reg] = val; - break; - default: - qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx - "=%"PRIx64"\n", addr, val); - } -} - -static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = { - .read = pnv_pec_stk_nest_xscom_read, - .write = pnv_pec_stk_nest_xscom_write, - .valid.min_access_size = 8, - .valid.max_access_size = 8, - .impl.min_access_size = 8, - .impl.max_access_size = 8, - .endianness = DEVICE_BIG_ENDIAN, -}; - -static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr, - unsigned size) -{ - PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); - uint32_t reg = addr >> 3; - - /* TODO: add list of allowed registers and error out if not */ - return stack->pci_regs[reg]; -} - -static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); - uint32_t reg = addr >> 3; - - switch (reg) { - case PEC_PCI_STK_PCI_FIR: - stack->nest_regs[reg] = val; - break; - case PEC_PCI_STK_PCI_FIR_CLR: - stack->nest_regs[PEC_PCI_STK_PCI_FIR] &= val; - break; - case PEC_PCI_STK_PCI_FIR_SET: - stack->nest_regs[PEC_PCI_STK_PCI_FIR] |= val; - break; - case PEC_PCI_STK_PCI_FIR_MSK: - stack->nest_regs[reg] = val; - break; - case PEC_PCI_STK_PCI_FIR_MSKC: - stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val; - break; - case PEC_PCI_STK_PCI_FIR_MSKS: - stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val; - break; - case PEC_PCI_STK_PCI_FIR_ACT0: - case PEC_PCI_STK_PCI_FIR_ACT1: - stack->nest_regs[reg] = val; - break; - case PEC_PCI_STK_PCI_FIR_WOF: - stack->nest_regs[reg] = 0; - break; - case PEC_PCI_STK_ETU_RESET: - stack->nest_regs[reg] = val & 0x8000000000000000ull; - /* TODO: Implement reset */ - break; - case PEC_PCI_STK_PBAIB_ERR_REPORT: - break; - case PEC_PCI_STK_PBAIB_TX_CMD_CRED: - case PEC_PCI_STK_PBAIB_TX_DAT_CRED: - stack->nest_regs[reg] = val; - break; - default: - qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx - "=%"PRIx64"\n", addr, val); - } -} - -static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = { - .read = pnv_pec_stk_pci_xscom_read, - .write = pnv_pec_stk_pci_xscom_write, - .valid.min_access_size = 8, - .valid.max_access_size = 8, - .impl.min_access_size = 8, - .impl.max_access_size = 8, - .endianness = DEVICE_BIG_ENDIAN, -}; - static void pnv_pec_instance_init(Object *obj) { PnvPhb4PecState *pec = PNV_PHB4_PEC(obj); @@ -374,19 +123,6 @@ static void pnv_pec_instance_init(Object *obj) } } -static int pnv_pec_phb_offset(PnvPhb4PecState *pec) -{ - PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); - int index = pec->index; - int offset = 0; - - while (index--) { - offset += pecc->num_stacks[index]; - } - - return offset; -} - static void pnv_pec_realize(DeviceState *dev, Error **errp) { PnvPhb4PecState *pec = PNV_PHB4_PEC(dev); @@ -405,10 +141,8 @@ static void pnv_pec_realize(DeviceState *dev, Error **errp) for (i = 0; i < pec->num_stacks; i++) { PnvPhb4PecStack *stack = &pec->stacks[i]; Object *stk_obj = OBJECT(stack); - int phb_id = pnv_pec_phb_offset(pec) + i; object_property_set_int(stk_obj, "stack-no", i, &error_abort); - object_property_set_int(stk_obj, "phb-id", phb_id, &error_abort); object_property_set_link(stk_obj, "pec", OBJECT(pec), &error_abort); if (!qdev_realize(DEVICE(stk_obj), NULL, errp)) { return; @@ -462,8 +196,7 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, pecc->compat_size))); for (i = 0; i < pec->num_stacks; i++) { - PnvPhb4PecStack *stack = &pec->stacks[i]; - PnvPHB4 *phb = &stack->phb; + int phb_id = pnv_phb4_pec_get_phb_id(pec, i); int stk_offset; name = g_strdup_printf("stack@%x", i); @@ -473,7 +206,7 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, _FDT((fdt_setprop(fdt, stk_offset, "compatible", pecc->stk_compat, pecc->stk_compat_size))); _FDT((fdt_setprop_cell(fdt, stk_offset, "reg", i))); - _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb->phb_id))); + _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb_id))); } return 0; @@ -543,69 +276,38 @@ static const TypeInfo pnv_pec_type_info = { } }; -static void pnv_pec_stk_instance_init(Object *obj) +static void pnv_pec_stk_default_phb_realize(PnvPhb4PecStack *stack, + Error **errp) { - PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(obj); - - object_initialize_child(obj, "phb", &stack->phb, TYPE_PNV_PHB4); - object_property_add_alias(obj, "phb-id", OBJECT(&stack->phb), "index"); -} - -static void pnv_pec_stk_realize(DeviceState *dev, Error **errp) -{ - PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev); PnvPhb4PecState *pec = stack->pec; PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); - PnvChip *chip = pec->chip; - uint32_t pec_nest_base; - uint32_t pec_pci_base; - char name[64]; - - assert(pec); - - /* Initialize the XSCOM regions for the stack registers */ - snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-stack-%d", - pec->chip_id, pec->index, stack->stack_no); - pnv_xscom_region_init(&stack->nest_regs_mr, OBJECT(stack), - &pnv_pec_stk_nest_xscom_ops, stack, name, - PHB4_PEC_NEST_STK_REGS_COUNT); - - snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d", - pec->chip_id, pec->index, stack->stack_no); - pnv_xscom_region_init(&stack->pci_regs_mr, OBJECT(stack), - &pnv_pec_stk_pci_xscom_ops, stack, name, - PHB4_PEC_PCI_STK_REGS_COUNT); + int phb_id = pnv_phb4_pec_get_phb_id(pec, stack->stack_no); - /* PHB pass-through */ - snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d-phb", - pec->chip_id, pec->index, stack->stack_no); - pnv_xscom_region_init(&stack->phb_regs_mr, OBJECT(&stack->phb), - &pnv_phb4_xscom_ops, &stack->phb, name, 0x40); + stack->phb = PNV_PHB4(qdev_new(TYPE_PNV_PHB4)); - object_property_set_int(OBJECT(&stack->phb), "chip-id", pec->chip_id, + object_property_set_int(OBJECT(stack->phb), "chip-id", pec->chip_id, &error_fatal); - object_property_set_int(OBJECT(&stack->phb), "version", pecc->version, + object_property_set_int(OBJECT(stack->phb), "index", phb_id, &error_fatal); - object_property_set_link(OBJECT(&stack->phb), "stack", OBJECT(stack), + object_property_set_int(OBJECT(stack->phb), "version", pecc->version, + &error_fatal); + object_property_set_link(OBJECT(stack->phb), "stack", OBJECT(stack), &error_abort); - if (!sysbus_realize(SYS_BUS_DEVICE(&stack->phb), errp)) { + + if (!sysbus_realize(SYS_BUS_DEVICE(stack->phb), errp)) { + return; + } +} + +static void pnv_pec_stk_realize(DeviceState *dev, Error **errp) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev); + + if (!defaults_enabled()) { return; } - pec_nest_base = pecc->xscom_nest_base(pec); - pec_pci_base = pecc->xscom_pci_base(pec); - - /* Populate the XSCOM address space. */ - pnv_xscom_add_subregion(chip, - pec_nest_base + 0x40 * (stack->stack_no + 1), - &stack->nest_regs_mr); - pnv_xscom_add_subregion(chip, - pec_pci_base + 0x40 * (stack->stack_no + 1), - &stack->pci_regs_mr); - pnv_xscom_add_subregion(chip, - pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 + - 0x40 * stack->stack_no, - &stack->phb_regs_mr); + pnv_pec_stk_default_phb_realize(stack, errp); } static Property pnv_pec_stk_properties[] = { @@ -630,7 +332,6 @@ static const TypeInfo pnv_pec_stk_type_info = { .name = TYPE_PNV_PHB4_PEC_STACK, .parent = TYPE_DEVICE, .instance_size = sizeof(PnvPhb4PecStack), - .instance_init = pnv_pec_stk_instance_init, .class_init = pnv_pec_stk_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_PNV_XSCOM_INTERFACE }, diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 9de8b83530..837146a2fb 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1099,7 +1099,6 @@ static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, static void pnv_chip_power8_instance_init(Object *obj) { - PnvChip *chip = PNV_CHIP(obj); Pnv8Chip *chip8 = PNV8_CHIP(obj); PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); int i; @@ -1117,14 +1116,14 @@ static void pnv_chip_power8_instance_init(Object *obj) object_initialize_child(obj, "homer", &chip8->homer, TYPE_PNV8_HOMER); - for (i = 0; i < pcc->num_phbs; i++) { + if (defaults_enabled()) { + chip8->num_phbs = pcc->num_phbs; + } + + for (i = 0; i < chip8->num_phbs; i++) { object_initialize_child(obj, "phb[*]", &chip8->phbs[i], TYPE_PNV_PHB3); } - /* - * Number of PHBs is the chip default - */ - chip->num_phbs = pcc->num_phbs; } static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) @@ -1156,6 +1155,14 @@ static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) } } +/* Attach a root port device */ +void pnv_phb_attach_root_port(PCIHostState *pci, const char *name) +{ + PCIDevice *root = pci_new(PCI_DEVFN(0, 0), name); + + pci_realize_and_unref(root, pci->bus, &error_fatal); +} + static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) { PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev); @@ -1239,7 +1246,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) &chip8->homer.regs); /* PHB3 controllers */ - for (i = 0; i < chip->num_phbs; i++) { + for (i = 0; i < chip8->num_phbs; i++) { PnvPHB3 *phb = &chip8->phbs[i]; object_property_set_int(OBJECT(phb), "index", i, &error_fatal); @@ -1806,6 +1813,36 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq) return NULL; } +void pnv_chip_parent_fixup(PnvChip *chip, Object *obj, int index) +{ + Object *parent = OBJECT(chip); + g_autofree char *default_id = + g_strdup_printf("%s[%d]", object_get_typename(obj), index); + + if (obj->parent == parent) { + return; + } + + object_ref(obj); + object_unparent(obj); + object_property_add_child( + parent, DEVICE(obj)->id ? DEVICE(obj)->id : default_id, obj); + object_unref(obj); +} + +PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id) +{ + int i; + + for (i = 0; i < pnv->num_chips; i++) { + PnvChip *chip = pnv->chips[i]; + if (chip->chip_id == chip_id) { + return chip; + } + } + return NULL; +} + static int pnv_ics_resend_child(Object *child, void *opaque) { PnvPHB3 *phb3 = (PnvPHB3 *) object_dynamic_cast(child, TYPE_PNV_PHB3); @@ -1903,6 +1940,8 @@ static void pnv_machine_power8_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); + + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB3); } static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) @@ -1921,6 +1960,8 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); pmc->dt_power_mgt = pnv_dt_power_mgt; + + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB4); } static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8373429325..72f5dce751 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -723,10 +723,12 @@ static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset, * * Only CPUs for which we create core types in spapr_cpu_core.c * are possible, and all of those have VMX */ - if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) { - _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2))); - } else { - _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1))); + if (env->insns_flags & PPC_ALTIVEC) { + if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2))); + } else { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1))); + } } /* Advertise DFP (Decimal Floating Point) if available diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 18eb824c97..29575cbaf6 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -49,51 +49,6 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) } } -static bool virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev, - VirtQueue *vq) -{ - bool progress = false; - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - - virtio_scsi_acquire(s); - if (!s->dataplane_fenced) { - assert(s->ctx && s->dataplane_started); - progress = virtio_scsi_handle_cmd_vq(s, vq); - } - virtio_scsi_release(s); - return progress; -} - -static bool virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev, - VirtQueue *vq) -{ - bool progress = false; - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - - virtio_scsi_acquire(s); - if (!s->dataplane_fenced) { - assert(s->ctx && s->dataplane_started); - progress = virtio_scsi_handle_ctrl_vq(s, vq); - } - virtio_scsi_release(s); - return progress; -} - -static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev, - VirtQueue *vq) -{ - bool progress = false; - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - - virtio_scsi_acquire(s); - if (!s->dataplane_fenced) { - assert(s->ctx && s->dataplane_started); - progress = virtio_scsi_handle_event_vq(s, vq); - } - virtio_scsi_release(s); - return progress; -} - static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); @@ -118,10 +73,10 @@ static void virtio_scsi_dataplane_stop_bh(void *opaque) VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); int i; - virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, NULL); - virtio_queue_aio_set_host_notifier_handler(vs->event_vq, s->ctx, NULL); + virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx); + virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx); for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_set_host_notifier_handler(vs->cmd_vqs[i], s->ctx, NULL); + virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx); } } @@ -182,14 +137,11 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) memory_region_transaction_commit(); aio_context_acquire(s->ctx); - virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, - virtio_scsi_data_plane_handle_ctrl); - virtio_queue_aio_set_host_notifier_handler(vs->event_vq, s->ctx, - virtio_scsi_data_plane_handle_event); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); + virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx); for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_set_host_notifier_handler(vs->cmd_vqs[i], s->ctx, - virtio_scsi_data_plane_handle_cmd); + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); } s->dataplane_starting = false; diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 51fd09522a..34a968ecfb 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -720,7 +720,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) /* use non-QOM casts in the data path */ VirtIOSCSI *s = (VirtIOSCSI *)vdev; - if (s->ctx) { + if (s->ctx && !s->dataplane_started) { virtio_device_start_ioeventfd(vdev); if (!s->dataplane_fenced) { return; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 5d18868d7d..aae72fb8b7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -125,7 +125,6 @@ struct VirtQueue uint16_t vector; VirtIOHandleOutput handle_output; - VirtIOHandleAIOOutput handle_aio_output; VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; @@ -2303,24 +2302,6 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) } } -static bool virtio_queue_notify_aio_vq(VirtQueue *vq) -{ - bool ret = false; - - if (vq->vring.desc && vq->handle_aio_output) { - VirtIODevice *vdev = vq->vdev; - - trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); - ret = vq->handle_aio_output(vdev, vq); - - if (unlikely(vdev->start_on_kick)) { - virtio_set_started(vdev, true); - } - } - - return ret; -} - static void virtio_queue_notify_vq(VirtQueue *vq) { if (vq->vring.desc && vq->handle_output) { @@ -2399,7 +2380,6 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, vdev->vq[i].vring.num_default = queue_size; vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; vdev->vq[i].handle_output = handle_output; - vdev->vq[i].handle_aio_output = NULL; vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) * queue_size); @@ -2411,7 +2391,6 @@ void virtio_delete_queue(VirtQueue *vq) vq->vring.num = 0; vq->vring.num_default = 0; vq->handle_output = NULL; - vq->handle_aio_output = NULL; g_free(vq->used_elems); vq->used_elems = NULL; virtio_virtqueue_reset_region_cache(vq); @@ -3516,14 +3495,6 @@ EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) return &vq->guest_notifier; } -static void virtio_queue_host_notifier_aio_read(EventNotifier *n) -{ - VirtQueue *vq = container_of(n, VirtQueue, host_notifier); - if (event_notifier_test_and_clear(n)) { - virtio_queue_notify_aio_vq(vq); - } -} - static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n) { VirtQueue *vq = container_of(n, VirtQueue, host_notifier); @@ -3536,11 +3507,14 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) EventNotifier *n = opaque; VirtQueue *vq = container_of(n, VirtQueue, host_notifier); - if (!vq->vring.desc || virtio_queue_empty(vq)) { - return false; - } + return vq->vring.desc && !virtio_queue_empty(vq); +} + +static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n) +{ + VirtQueue *vq = container_of(n, VirtQueue, host_notifier); - return virtio_queue_notify_aio_vq(vq); + virtio_queue_notify_vq(vq); } static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) @@ -3551,24 +3525,23 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) virtio_queue_set_notification(vq, 1); } -void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, - VirtIOHandleAIOOutput handle_output) +void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) { - if (handle_output) { - vq->handle_aio_output = handle_output; - aio_set_event_notifier(ctx, &vq->host_notifier, true, - virtio_queue_host_notifier_aio_read, - virtio_queue_host_notifier_aio_poll); - aio_set_event_notifier_poll(ctx, &vq->host_notifier, - virtio_queue_host_notifier_aio_poll_begin, - virtio_queue_host_notifier_aio_poll_end); - } else { - aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL); - /* Test and clear notifier before after disabling event, - * in case poll callback didn't have time to run. */ - virtio_queue_host_notifier_aio_read(&vq->host_notifier); - vq->handle_aio_output = NULL; - } + aio_set_event_notifier(ctx, &vq->host_notifier, true, + virtio_queue_host_notifier_read, + virtio_queue_host_notifier_aio_poll, + virtio_queue_host_notifier_aio_poll_ready); + aio_set_event_notifier_poll(ctx, &vq->host_notifier, + virtio_queue_host_notifier_aio_poll_begin, + virtio_queue_host_notifier_aio_poll_end); +} + +void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) +{ + aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); + /* Test and clear notifier before after disabling event, + * in case poll callback didn't have time to run. */ + virtio_queue_host_notifier_read(&vq->host_notifier); } void virtio_queue_host_notifier_read(EventNotifier *n) diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index 416583f130..645a29a5a0 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -1115,11 +1115,11 @@ void xen_device_set_event_channel_context(XenDevice *xendev, if (channel->ctx) aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); channel->ctx = ctx; aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, - xen_device_event, NULL, xen_device_poll, channel); + xen_device_event, NULL, xen_device_poll, NULL, channel); } XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, @@ -1193,7 +1193,7 @@ void xen_device_unbind_event_channel(XenDevice *xendev, QLIST_REMOVE(channel, list); aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_unbind failed"); diff --git a/include/block/aio.h b/include/block/aio.h index 47fbe9d81f..5634173b12 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -469,6 +469,7 @@ void aio_set_fd_handler(AioContext *ctx, IOHandler *io_read, IOHandler *io_write, AioPollFn *io_poll, + IOHandler *io_poll_ready, void *opaque); /* Set polling begin/end callbacks for a file descriptor that has already been @@ -490,7 +491,8 @@ void aio_set_event_notifier(AioContext *ctx, EventNotifier *notifier, bool is_external, EventNotifierHandler *io_read, - AioPollFn *io_poll); + AioPollFn *io_poll, + EventNotifierHandler *io_poll_ready); /* Set polling begin/end callbacks for an event notifier that has already been * registered with aio_set_event_notifier. Do nothing if the event notifier is diff --git a/include/hw/pci-host/pnv_phb3.h b/include/hw/pci-host/pnv_phb3.h index e9c13e6bd8..af6ec83cf6 100644 --- a/include/hw/pci-host/pnv_phb3.h +++ b/include/hw/pci-host/pnv_phb3.h @@ -105,7 +105,7 @@ struct PnvPBCQState { /* * PHB3 PCIe Root port */ -#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root-bus" +#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root" #define TYPE_PNV_PHB3_ROOT_PORT "pnv-phb3-root-port" @@ -155,8 +155,6 @@ struct PnvPHB3 { PnvPBCQState pbcq; - PnvPHB3RootPort root; - QLIST_HEAD(, PnvPhb3DMASpace) dma_spaces; PnvChip *chip; diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h index 4a19338db3..4b7ce8a723 100644 --- a/include/hw/pci-host/pnv_phb4.h +++ b/include/hw/pci-host/pnv_phb4.h @@ -15,6 +15,7 @@ #include "hw/ppc/xive.h" #include "qom/object.h" +typedef struct PnvPhb4PecState PnvPhb4PecState; typedef struct PnvPhb4PecStack PnvPhb4PecStack; typedef struct PnvPHB4 PnvPHB4; typedef struct PnvChip PnvChip; @@ -46,7 +47,7 @@ typedef struct PnvPhb4DMASpace { /* * PHB4 PCIe Root port */ -#define TYPE_PNV_PHB4_ROOT_BUS "pnv-phb4-root-bus" +#define TYPE_PNV_PHB4_ROOT_BUS "pnv-phb4-root" #define TYPE_PNV_PHB4_ROOT_PORT "pnv-phb4-root-port" typedef struct PnvPHB4RootPort { @@ -78,8 +79,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB4, PNV_PHB4) struct PnvPHB4 { PCIExpressHost parent_obj; - PnvPHB4RootPort root; - uint32_t chip_id; uint32_t phb_id; @@ -132,7 +131,7 @@ struct PnvPHB4 { }; void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon); -void pnv_phb4_update_regions(PnvPhb4PecStack *stack); +int pnv_phb4_pec_get_phb_id(PnvPhb4PecState *pec, int stack_index); extern const MemoryRegionOps pnv_phb4_xscom_ops; /* @@ -177,8 +176,11 @@ struct PnvPhb4PecStack { /* The owner PEC */ PnvPhb4PecState *pec; - /* The actual PHB */ - PnvPHB4 phb; + /* + * PHB4 pointer. pnv_phb4_update_regions() needs to access + * the PHB4 via a PnvPhb4PecStack pointer. + */ + PnvPHB4 *phb; }; struct PnvPhb4PecState { diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index c781525277..0e9e16544f 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -52,7 +52,6 @@ struct PnvChip { uint64_t cores_mask; PnvCore **cores; - uint32_t num_phbs; uint32_t num_pecs; MemoryRegion xscom_mmio; @@ -82,6 +81,7 @@ struct Pnv8Chip { #define PNV8_CHIP_PHB3_MAX 4 PnvPHB3 phbs[PNV8_CHIP_PHB3_MAX]; + uint32_t num_phbs; XICSFabric *xics; }; @@ -136,8 +136,8 @@ struct PnvChipClass { /*< public >*/ uint64_t chip_cfam_id; uint64_t cores_mask; - uint32_t num_phbs; uint32_t num_pecs; + uint32_t num_phbs; DeviceRealize parent_realize; @@ -177,6 +177,8 @@ DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10, TYPE_PNV_CHIP_POWER10) PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir); +void pnv_phb_attach_root_port(PCIHostState *pci, const char *name); +void pnv_chip_parent_fixup(PnvChip *chip, Object *obj, int index); #define TYPE_PNV_MACHINE MACHINE_TYPE_NAME("powernv") typedef struct PnvMachineClass PnvMachineClass; @@ -217,6 +219,8 @@ struct PnvMachineState { hwaddr fw_load_addr; }; +PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id); + #define PNV_FDT_ADDR 0x01000000 #define PNV_TIMEBASE_FREQ 512000000ULL diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 29655a406d..d311c57cca 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -90,7 +90,7 @@ typedef struct MultiReqBuffer { bool is_write; } MultiReqBuffer; -bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); #endif diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 8bab9cfb75..f095637058 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -175,7 +175,6 @@ void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3); void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name); typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *); -typedef bool (*VirtIOHandleAIOOutput)(VirtIODevice *, VirtQueue *); VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, VirtIOHandleOutput handle_output); @@ -317,8 +316,8 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); void virtio_queue_host_notifier_read(EventNotifier *n); -void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, - VirtIOHandleAIOOutput handle_output); +void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx); +void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); VirtQueue *virtio_vector_next_queue(VirtQueue *vq); diff --git a/io/channel-command.c b/io/channel-command.c index b2a9e27138..338da73ade 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -346,8 +346,10 @@ static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc, void *opaque) { QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); - aio_set_fd_handler(ctx, cioc->readfd, false, io_read, NULL, NULL, opaque); - aio_set_fd_handler(ctx, cioc->writefd, false, NULL, io_write, NULL, opaque); + aio_set_fd_handler(ctx, cioc->readfd, false, + io_read, NULL, NULL, NULL, opaque); + aio_set_fd_handler(ctx, cioc->writefd, false, + NULL, io_write, NULL, NULL, opaque); } diff --git a/io/channel-file.c b/io/channel-file.c index c4bf799a80..d7cf6d278f 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -191,7 +191,8 @@ static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc, void *opaque) { QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); - aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, NULL, opaque); + aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, + NULL, NULL, opaque); } static GSource *qio_channel_file_create_watch(QIOChannel *ioc, diff --git a/io/channel-socket.c b/io/channel-socket.c index 606ec97cf7..459922c874 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -761,7 +761,8 @@ static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc, void *opaque) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - aio_set_fd_handler(ctx, sioc->fd, false, io_read, io_write, NULL, opaque); + aio_set_fd_handler(ctx, sioc->fd, false, + io_read, io_write, NULL, NULL, opaque); } static GSource *qio_channel_socket_create_watch(QIOChannel *ioc, diff --git a/linux-user/host/aarch64/host-signal.h b/linux-user/include/host/aarch64/host-signal.h index 9770b36dc1..9770b36dc1 100644 --- a/linux-user/host/aarch64/host-signal.h +++ b/linux-user/include/host/aarch64/host-signal.h diff --git a/linux-user/host/alpha/host-signal.h b/linux-user/include/host/alpha/host-signal.h index f4c942948a..f4c942948a 100644 --- a/linux-user/host/alpha/host-signal.h +++ b/linux-user/include/host/alpha/host-signal.h diff --git a/linux-user/host/arm/host-signal.h b/linux-user/include/host/arm/host-signal.h index 6c095773c0..6c095773c0 100644 --- a/linux-user/host/arm/host-signal.h +++ b/linux-user/include/host/arm/host-signal.h diff --git a/linux-user/host/i386/host-signal.h b/linux-user/include/host/i386/host-signal.h index abe1ece5c9..abe1ece5c9 100644 --- a/linux-user/host/i386/host-signal.h +++ b/linux-user/include/host/i386/host-signal.h diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/include/host/loongarch64/host-signal.h index 7effa24251..7effa24251 100644 --- a/linux-user/host/loongarch64/host-signal.h +++ b/linux-user/include/host/loongarch64/host-signal.h diff --git a/linux-user/host/mips/host-signal.h b/linux-user/include/host/mips/host-signal.h index c666ed8c3f..c666ed8c3f 100644 --- a/linux-user/host/mips/host-signal.h +++ b/linux-user/include/host/mips/host-signal.h diff --git a/linux-user/host/ppc/host-signal.h b/linux-user/include/host/ppc/host-signal.h index 1d8e658ff7..1d8e658ff7 100644 --- a/linux-user/host/ppc/host-signal.h +++ b/linux-user/include/host/ppc/host-signal.h diff --git a/linux-user/host/ppc64/host-signal.h b/linux-user/include/host/ppc64/host-signal.h index a353c22a90..a353c22a90 100644 --- a/linux-user/host/ppc64/host-signal.h +++ b/linux-user/include/host/ppc64/host-signal.h diff --git a/linux-user/host/riscv/host-signal.h b/linux-user/include/host/riscv/host-signal.h index a4f170efb0..a4f170efb0 100644 --- a/linux-user/host/riscv/host-signal.h +++ b/linux-user/include/host/riscv/host-signal.h diff --git a/linux-user/host/s390/host-signal.h b/linux-user/include/host/s390/host-signal.h index a524f2ab00..a524f2ab00 100644 --- a/linux-user/host/s390/host-signal.h +++ b/linux-user/include/host/s390/host-signal.h diff --git a/linux-user/host/s390x/host-signal.h b/linux-user/include/host/s390x/host-signal.h index 0e83f9358d..0e83f9358d 100644 --- a/linux-user/host/s390x/host-signal.h +++ b/linux-user/include/host/s390x/host-signal.h diff --git a/linux-user/host/sparc/host-signal.h b/linux-user/include/host/sparc/host-signal.h index 7342936071..7342936071 100644 --- a/linux-user/host/sparc/host-signal.h +++ b/linux-user/include/host/sparc/host-signal.h diff --git a/linux-user/host/sparc64/host-signal.h b/linux-user/include/host/sparc64/host-signal.h index 1191fe2d40..1191fe2d40 100644 --- a/linux-user/host/sparc64/host-signal.h +++ b/linux-user/include/host/sparc64/host-signal.h diff --git a/linux-user/host/x32/host-signal.h b/linux-user/include/host/x32/host-signal.h index 26800591d3..26800591d3 100644 --- a/linux-user/host/x32/host-signal.h +++ b/linux-user/include/host/x32/host-signal.h diff --git a/linux-user/host/x86_64/host-signal.h b/linux-user/include/host/x86_64/host-signal.h index c71d597eb2..c71d597eb2 100644 --- a/linux-user/host/x86_64/host-signal.h +++ b/linux-user/include/host/x86_64/host-signal.h diff --git a/linux-user/special-errno.h b/linux-user/include/special-errno.h index 4120455baa..4120455baa 100644 --- a/linux-user/special-errno.h +++ b/linux-user/include/special-errno.h diff --git a/linux-user/meson.build b/linux-user/meson.build index b2f4afd5e7..de4320af05 100644 --- a/linux-user/meson.build +++ b/linux-user/meson.build @@ -4,8 +4,8 @@ endif linux_user_ss = ss.source_set() -common_user_inc += include_directories('host/' / host_arch) -common_user_inc += include_directories('.') +common_user_inc += include_directories('include/host/' / host_arch) +common_user_inc += include_directories('include') linux_user_ss.add(files( 'elfload.c', diff --git a/meson.build b/meson.build index c1b1db1e28..762d7cee85 100644 --- a/meson.build +++ b/meson.build @@ -238,6 +238,7 @@ endif # Target-specific checks and dependencies # ########################################### +# Fuzzing if get_option('fuzzing') and get_option('fuzzing_engine') == '' and \ not cc.links(''' #include <stdint.h> @@ -249,6 +250,7 @@ if get_option('fuzzing') and get_option('fuzzing_engine') == '' and \ error('Your compiler does not support -fsanitize=fuzzer') endif +# Tracing backends if 'ftrace' in get_option('trace_backends') and targetos != 'linux' error('ftrace is supported only on Linux') endif @@ -262,6 +264,7 @@ if 'syslog' in get_option('trace_backends') and not cc.compiles(''' error('syslog is not supported on this system') endif +# Miscellaneous Linux-only features if targetos != 'linux' and get_option('mpath').enabled() error('Multipath is supported only on Linux') endif @@ -271,6 +274,7 @@ if targetos != 'linux' and get_option('multiprocess').enabled() endif multiprocess_allowed = targetos == 'linux' and not get_option('multiprocess').disabled() +# Target-specific libraries and flags libm = cc.find_library('m', required: false) threads = dependency('threads') util = cc.find_library('util', required: false) @@ -311,6 +315,7 @@ elif targetos == 'openbsd' endif endif +# Target-specific configuration of accelerators accelerators = [] if not get_option('kvm').disabled() and targetos == 'linux' accelerators += 'CONFIG_KVM' @@ -2028,6 +2033,18 @@ config_all += { 'CONFIG_ALL': true, } +target_configs_h = [] +foreach target: target_dirs + target_configs_h += config_target_h[target] + target_configs_h += config_devices_h.get(target, []) +endforeach +genh += custom_target('config-poison.h', + input: [target_configs_h], + output: 'config-poison.h', + capture: true, + command: [find_program('scripts/make-config-poison.sh'), + target_configs_h]) + ############## # Submodules # ############## @@ -2399,7 +2416,6 @@ blockdev_ss = ss.source_set() block_ss = ss.source_set() chardev_ss = ss.source_set() common_ss = ss.source_set() -common_user_ss = ss.source_set() crypto_ss = ss.source_set() hwcore_ss = ss.source_set() io_ss = ss.source_set() @@ -2651,17 +2667,6 @@ subdir('common-user') subdir('bsd-user') subdir('linux-user') -common_user_ss = common_user_ss.apply(config_all, strict: false) -common_user = static_library('common-user', - sources: common_user_ss.sources(), - dependencies: common_user_ss.dependencies(), - include_directories: common_user_inc, - name_suffix: 'fa', - build_by_default: false) -common_user = declare_dependency(link_with: common_user) - -user_ss.add(common_user) - # needed for fuzzing binaries subdir('tests/qtest/libqos') subdir('tests/qtest/fuzz') @@ -2861,8 +2866,10 @@ common_ss.add(hwcore) # Targets # ########### +emulator_modules = [] foreach m : block_mods + softmmu_mods - shared_module(m.name(), + emulator_modules += shared_module(m.name(), + build_by_default: true, name_prefix: '', link_whole: m, install: true, @@ -2879,6 +2886,7 @@ common_all = common_ss.apply(config_all, strict: false) common_all = static_library('common', build_by_default: false, sources: common_all.sources() + genh, + include_directories: common_user_inc, implicit_include_directories: false, dependencies: common_all.dependencies(), name_suffix: 'fa') @@ -2919,6 +2927,7 @@ foreach target : target_dirs else abi = config_target['TARGET_ABI_DIR'] target_type='user' + target_inc += common_user_inc qemu_target_name = 'qemu-' + target_name if target_base_arch in target_user_arch t = target_user_arch[target_base_arch].apply(config_target, strict: false) @@ -2927,7 +2936,6 @@ foreach target : target_dirs endif if 'CONFIG_LINUX_USER' in config_target base_dir = 'linux-user' - target_inc += include_directories('linux-user/host/' / host_arch) endif if 'CONFIG_BSD_USER' in config_target base_dir = 'bsd-user' @@ -3456,7 +3464,6 @@ if spice_protocol.found() summary_info += {' spice server support': spice} endif summary_info += {'rbd support': rbd} -summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')} summary_info += {'smartcard support': cacard} summary_info += {'U2F support': u2f} summary_info += {'libusb': libusb} diff --git a/migration/rdma.c b/migration/rdma.c index f5d3bbe7e9..c7c7a38487 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3161,14 +3161,14 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); if (io_read) { aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, - false, io_read, io_write, NULL, opaque); + false, io_read, io_write, NULL, NULL, opaque); aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, - false, io_read, io_write, NULL, opaque); + false, io_read, io_write, NULL, NULL, opaque); } else { aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, - false, io_read, io_write, NULL, opaque); + false, io_read, io_write, NULL, NULL, opaque); aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, - false, io_read, io_write, NULL, opaque); + false, io_read, io_write, NULL, NULL, opaque); } } diff --git a/pc-bios/README b/pc-bios/README index c51ae58824..ba6c15e769 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -14,7 +14,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/aik/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20211112. + built from git tag qemu-slof-20220110. - VOF (Virtual Open Firmware) is a minimalistic firmware to work with -machine pseries,x-vof=on. When enabled, the firmware acts as a slim shim and diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile index cee9d2c63b..0eb68efc7b 100644 --- a/pc-bios/s390-ccw/Makefile +++ b/pc-bios/s390-ccw/Makefile @@ -44,8 +44,6 @@ build-all: s390-ccw.img s390-netboot.img s390-ccw.elf: $(OBJECTS) $(call quiet-command,$(CC) $(LDFLAGS) -o $@ $(OBJECTS),"BUILD","$(TARGET_DIR)$@") -STRIP ?= strip - s390-ccw.img: s390-ccw.elf $(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@") diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin Binary files differindex 046ca63709..cbbe23e910 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin diff --git a/plugins/meson.build b/plugins/meson.build index b3de57853b..fa12047327 100644 --- a/plugins/meson.build +++ b/plugins/meson.build @@ -1,10 +1,15 @@ plugin_ldflags = [] # Modules need more symbols than just those in plugins/qemu-plugins.symbols if not enable_modules - if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host - plugin_ldflags = ['-Wl,--dynamic-list=qemu-plugins-ld.symbols'] - elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host - plugin_ldflags = ['-Wl,-exported_symbols_list,qemu-plugins-ld64.symbols'] + if targetos == 'darwin' + qemu_plugins_symbols_list = configure_file( + input: files('qemu-plugins.symbols'), + output: 'qemu-plugins-ld64.symbols', + capture: true, + command: ['sed', '-ne', 's/^[[:space:]]*\\(qemu_.*\\);/_\\1/p', '@INPUT@']) + plugin_ldflags = ['-Wl,-exported_symbols_list,plugins/qemu-plugins-ld64.symbols'] + else + plugin_ldflags = ['-Xlinker', '--dynamic-list=' + (meson.project_source_root() / 'plugins/qemu-plugins.symbols')] endif endif diff --git a/roms/SLOF b/roms/SLOF -Subproject a6906b024c6cca5a86496f51eb4bfee3a0c3614 +Subproject 5b4c5acdcd552a4e1796aeca6bb700f6cbb0282 diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure index 048e80dc49..e05f2fddcc 100755 --- a/scripts/ci/org.centos/stream/8/x86_64/configure +++ b/scripts/ci/org.centos/stream/8/x86_64/configure @@ -151,7 +151,6 @@ --disable-whpx \ --disable-xen \ --disable-xen-pci-passthrough \ ---disable-xfsctl \ --disable-xkbcommon \ --disable-zstd \ --enable-attr \ diff --git a/scripts/coverity-scan/run-coverity-scan b/scripts/coverity-scan/run-coverity-scan index 7395bbfad4..6d443250a9 100755 --- a/scripts/coverity-scan/run-coverity-scan +++ b/scripts/coverity-scan/run-coverity-scan @@ -398,7 +398,7 @@ echo "Configuring..." --enable-xen --enable-brlapi \ --enable-linux-aio --enable-attr \ --enable-cap-ng --enable-trace-backends=log --enable-spice --enable-rbd \ - --enable-xfsctl --enable-libusb --enable-usb-redir \ + --enable-libusb --enable-usb-redir \ --enable-libiscsi --enable-libnfs --enable-seccomp \ --enable-tpm --enable-libssh --enable-lzo --enable-snappy --enable-bzip2 \ --enable-numa --enable-rdma --enable-smartcard --enable-virglrenderer \ diff --git a/scripts/make-config-poison.sh b/scripts/make-config-poison.sh new file mode 100755 index 0000000000..d222a04304 --- /dev/null +++ b/scripts/make-config-poison.sh @@ -0,0 +1,16 @@ +#! /bin/sh + +if test $# = 0; then + exit 0 +fi + +# Create list of config switches that should be poisoned in common code... +# but filter out CONFIG_TCG and CONFIG_USER_ONLY which are special. +exec sed -n \ + -e' /CONFIG_TCG/d' \ + -e '/CONFIG_USER_ONLY/d' \ + -e '/^#define / {' \ + -e 's///' \ + -e 's/ .*//' \ + -e 's/^/#pragma GCC poison /p' \ + -e '}' "$@" diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py index 96969d89ee..98ae944148 100755 --- a/scripts/meson-buildoptions.py +++ b/scripts/meson-buildoptions.py @@ -36,6 +36,10 @@ SKIP_OPTIONS = { "trace_file", } +BUILTIN_OPTIONS = { + "strip", +} + LINE_WIDTH = 76 @@ -90,14 +94,17 @@ def allow_arg(opt): return not (set(opt["choices"]) <= {"auto", "disabled", "enabled"}) +def filter_options(json): + if ":" in json["name"]: + return False + if json["section"] == "user": + return json["name"] not in SKIP_OPTIONS + else: + return json["name"] in BUILTIN_OPTIONS + + def load_options(json): - json = [ - x - for x in json - if x["section"] == "user" - and ":" not in x["name"] - and x["name"] not in SKIP_OPTIONS - ] + json = [x for x in json if filter_options(x)] return sorted(json, key=lambda x: x["name"]) diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 50bd7bed4d..a4af02c527 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -13,6 +13,7 @@ meson_options_help() { printf "%s\n" ' jemalloc/system/tcmalloc)' printf "%s\n" ' --enable-slirp[=CHOICE] Whether and how to find the slirp library' printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' + printf "%s\n" ' --enable-strip Strip targets on install' printf "%s\n" ' --enable-tcg-interpreter TCG with bytecode interpreter (slow)' printf "%s\n" ' --enable-trace-backends=CHOICE' printf "%s\n" ' Set available tracing backends [log] (choices:' @@ -237,6 +238,8 @@ _meson_option_parse() { --disable-spice) printf "%s" -Dspice=disabled ;; --enable-spice-protocol) printf "%s" -Dspice_protocol=enabled ;; --disable-spice-protocol) printf "%s" -Dspice_protocol=disabled ;; + --enable-strip) printf "%s" -Dstrip=true ;; + --disable-strip) printf "%s" -Dstrip=false ;; --enable-tcg) printf "%s" -Dtcg=enabled ;; --disable-tcg) printf "%s" -Dtcg=disabled ;; --enable-tcg-interpreter) printf "%s" -Dtcg_interpreter=true ;; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 04f2b790c9..9911d7c871 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1455,6 +1455,9 @@ typedef struct CPUX86State { SegmentCache idt; /* only base and limit are used */ target_ulong cr[5]; /* NOTE: cr1 is unused */ + + bool pdptrs_valid; + uint64_t pdptrs[4]; int32_t a20_mask; BNDReg bnd_regs[4]; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 13f8e30c2a..2c8feb4a6f 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -124,6 +124,7 @@ static uint32_t num_architectural_pmu_fixed_counters; static int has_xsave; static int has_xcrs; static int has_pit_state2; +static int has_sregs2; static int has_exception_payload; static bool has_msr_mcg_ext_ctl; @@ -2324,6 +2325,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) has_xsave = kvm_check_extension(s, KVM_CAP_XSAVE); has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); + has_sregs2 = kvm_check_extension(s, KVM_CAP_SREGS2) > 0; hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); @@ -2605,11 +2607,11 @@ static int kvm_put_sregs(X86CPU *cpu) CPUX86State *env = &cpu->env; struct kvm_sregs sregs; + /* + * The interrupt_bitmap is ignored because KVM_SET_SREGS is + * always followed by KVM_SET_VCPU_EVENTS. + */ memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); - if (env->interrupt_injected >= 0) { - sregs.interrupt_bitmap[env->interrupt_injected / 64] |= - (uint64_t)1 << (env->interrupt_injected % 64); - } if ((env->eflags & VM_MASK)) { set_v8086_seg(&sregs.cs, &env->segs[R_CS]); @@ -2650,6 +2652,61 @@ static int kvm_put_sregs(X86CPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); } +static int kvm_put_sregs2(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_sregs2 sregs; + int i; + + sregs.flags = 0; + + if ((env->eflags & VM_MASK)) { + set_v8086_seg(&sregs.cs, &env->segs[R_CS]); + set_v8086_seg(&sregs.ds, &env->segs[R_DS]); + set_v8086_seg(&sregs.es, &env->segs[R_ES]); + set_v8086_seg(&sregs.fs, &env->segs[R_FS]); + set_v8086_seg(&sregs.gs, &env->segs[R_GS]); + set_v8086_seg(&sregs.ss, &env->segs[R_SS]); + } else { + set_seg(&sregs.cs, &env->segs[R_CS]); + set_seg(&sregs.ds, &env->segs[R_DS]); + set_seg(&sregs.es, &env->segs[R_ES]); + set_seg(&sregs.fs, &env->segs[R_FS]); + set_seg(&sregs.gs, &env->segs[R_GS]); + set_seg(&sregs.ss, &env->segs[R_SS]); + } + + set_seg(&sregs.tr, &env->tr); + set_seg(&sregs.ldt, &env->ldt); + + sregs.idt.limit = env->idt.limit; + sregs.idt.base = env->idt.base; + memset(sregs.idt.padding, 0, sizeof sregs.idt.padding); + sregs.gdt.limit = env->gdt.limit; + sregs.gdt.base = env->gdt.base; + memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding); + + sregs.cr0 = env->cr[0]; + sregs.cr2 = env->cr[2]; + sregs.cr3 = env->cr[3]; + sregs.cr4 = env->cr[4]; + + sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state); + sregs.apic_base = cpu_get_apic_base(cpu->apic_state); + + sregs.efer = env->efer; + + if (env->pdptrs_valid) { + for (i = 0; i < 4; i++) { + sregs.pdptrs[i] = env->pdptrs[i]; + } + sregs.flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID; + } + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS2, &sregs); +} + + static void kvm_msr_buf_reset(X86CPU *cpu) { memset(cpu->kvm_msr_buf, 0, MSR_BUF_SIZE); @@ -3284,22 +3341,55 @@ static int kvm_get_sregs(X86CPU *cpu) { CPUX86State *env = &cpu->env; struct kvm_sregs sregs; - int bit, i, ret; + int ret; ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); if (ret < 0) { return ret; } - /* There can only be one pending IRQ set in the bitmap at a time, so try - to find it and save its number instead (-1 for none). */ - env->interrupt_injected = -1; - for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) { - if (sregs.interrupt_bitmap[i]) { - bit = ctz64(sregs.interrupt_bitmap[i]); - env->interrupt_injected = i * 64 + bit; - break; - } + /* + * The interrupt_bitmap is ignored because KVM_GET_SREGS is + * always preceded by KVM_GET_VCPU_EVENTS. + */ + + get_seg(&env->segs[R_CS], &sregs.cs); + get_seg(&env->segs[R_DS], &sregs.ds); + get_seg(&env->segs[R_ES], &sregs.es); + get_seg(&env->segs[R_FS], &sregs.fs); + get_seg(&env->segs[R_GS], &sregs.gs); + get_seg(&env->segs[R_SS], &sregs.ss); + + get_seg(&env->tr, &sregs.tr); + get_seg(&env->ldt, &sregs.ldt); + + env->idt.limit = sregs.idt.limit; + env->idt.base = sregs.idt.base; + env->gdt.limit = sregs.gdt.limit; + env->gdt.base = sregs.gdt.base; + + env->cr[0] = sregs.cr0; + env->cr[2] = sregs.cr2; + env->cr[3] = sregs.cr3; + env->cr[4] = sregs.cr4; + + env->efer = sregs.efer; + + /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */ + x86_update_hflags(env); + + return 0; +} + +static int kvm_get_sregs2(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_sregs2 sregs; + int i, ret; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS2, &sregs); + if (ret < 0) { + return ret; } get_seg(&env->segs[R_CS], &sregs.cs); @@ -3324,6 +3414,14 @@ static int kvm_get_sregs(X86CPU *cpu) env->efer = sregs.efer; + env->pdptrs_valid = sregs.flags & KVM_SREGS2_FLAGS_PDPTRS_VALID; + + if (env->pdptrs_valid) { + for (i = 0; i < 4; i++) { + env->pdptrs[i] = sregs.pdptrs[i]; + } + } + /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */ x86_update_hflags(env); @@ -4173,7 +4271,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); /* must be before kvm_put_nested_state so that EFER.SVME is set */ - ret = kvm_put_sregs(x86_cpu); + ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu); if (ret < 0) { return ret; } @@ -4278,7 +4376,7 @@ int kvm_arch_get_registers(CPUState *cs) if (ret < 0) { goto out; } - ret = kvm_get_sregs(cpu); + ret = has_sregs2 ? kvm_get_sregs2(cpu) : kvm_get_sregs(cpu); if (ret < 0) { goto out; } diff --git a/target/i386/machine.c b/target/i386/machine.c index 83c2b91529..6202f47793 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1451,6 +1451,34 @@ static const VMStateDescription vmstate_msr_intel_sgx = { .needed = intel_sgx_msrs_needed, .fields = (VMStateField[]) { VMSTATE_UINT64_ARRAY(env.msr_ia32_sgxlepubkeyhash, X86CPU, 4), + VMSTATE_END_OF_LIST() + } + }; + +static bool pdptrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + return env->pdptrs_valid; +} + +static int pdptrs_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + env->pdptrs_valid = true; + return 0; +} + + +static const VMStateDescription vmstate_pdptrs = { + .name = "cpu/pdptrs", + .version_id = 1, + .minimum_version_id = 1, + .needed = pdptrs_needed, + .post_load = pdptrs_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.pdptrs, X86CPU, 4), VMSTATE_END_OF_LIST() } }; @@ -1593,6 +1621,7 @@ const VMStateDescription vmstate_x86_cpu = { #endif &vmstate_msr_tsx_ctrl, &vmstate_msr_intel_sgx, + &vmstate_pdptrs, NULL } }; diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c index bb392f6d88..993740897d 100644 --- a/target/ppc/arch_dump.c +++ b/target/ppc/arch_dump.c @@ -237,7 +237,7 @@ int cpu_get_dump_info(ArchDumpInfo *info, info->d_machine = PPC_ELF_MACHINE; info->d_class = ELFCLASS; - if (ppc_interrupts_little_endian(cpu)) { + if (ppc_interrupts_little_endian(cpu, cpu->env.has_hv_mode)) { info->d_endian = ELFDATA2LSB; } else { info->d_endian = ELFDATA2MSB; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index f20d4ffa6d..f99cd0ea92 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2728,20 +2728,29 @@ static inline bool ppc_has_spr(PowerPCCPU *cpu, int spr) return cpu->env.spr_cb[spr].name != NULL; } -static inline bool ppc_interrupts_little_endian(PowerPCCPU *cpu) +#if !defined(CONFIG_USER_ONLY) +static inline bool ppc_interrupts_little_endian(PowerPCCPU *cpu, bool hv) { PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + CPUPPCState *env = &cpu->env; + bool ile; + + if (hv && env->has_hv_mode) { + if (is_isa300(pcc)) { + ile = !!(env->spr[SPR_HID0] & HID0_POWER9_HILE); + } else { + ile = !!(env->spr[SPR_HID0] & HID0_HILE); + } - /* - * Only models that have an LPCR and know about LPCR_ILE can do little - * endian. - */ - if (pcc->lpcr_mask & LPCR_ILE) { - return !!(cpu->env.spr[SPR_LPCR] & LPCR_ILE); + } else if (pcc->lpcr_mask & LPCR_ILE) { + ile = !!(env->spr[SPR_LPCR] & LPCR_ILE); + } else { + ile = !!(msr_ile); } - return false; + return ile; } +#endif void dump_mmu(CPUPPCState *env); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index cc93bff3fa..e30e86fe9d 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -6953,10 +6953,12 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data) PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | + PPC_FLOAT_EXT | PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ | PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_64B | + PPC_POPCNTB | PPC_SEGMENT_64B | PPC_SLBI; pcc->insns_flags2 = PPC2_FP_CVT_S64; pcc->msr_mask = (1ull << MSR_SF) | diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index a779dc936a..bc646c67a0 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -30,8 +30,6 @@ #include "exec/cpu_ldst.h" #endif -/* #define DEBUG_SOFTWARE_TLB */ - /*****************************************************************************/ /* Exception processing */ #if !defined(CONFIG_USER_ONLY) @@ -135,6 +133,39 @@ static void dump_hcall(CPUPPCState *env) env->nip); } +static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp) +{ + const char *es; + target_ulong *miss, *cmp; + int en; + + if (!qemu_loglevel_mask(CPU_LOG_MMU)) { + return; + } + + if (excp == POWERPC_EXCP_IFTLB) { + es = "I"; + en = 'I'; + miss = &env->spr[SPR_IMISS]; + cmp = &env->spr[SPR_ICMP]; + } else { + if (excp == POWERPC_EXCP_DLTLB) { + es = "DL"; + } else { + es = "DS"; + } + en = 'D'; + miss = &env->spr[SPR_DMISS]; + cmp = &env->spr[SPR_DCMP]; + } + qemu_log("6xx %sTLB miss: %cM " TARGET_FMT_lx " %cC " + TARGET_FMT_lx " H1 " TARGET_FMT_lx " H2 " + TARGET_FMT_lx " %08x\n", es, en, *miss, en, *cmp, + env->spr[SPR_HASH1], env->spr[SPR_HASH2], + env->error_code); +} + + static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, target_ulong *msr) { @@ -365,7 +396,7 @@ static void powerpc_set_excp_state(PowerPCCPU *cpu, * Note that this function should be greatly optimized when called * with a constant excp, from ppc_hw_interrupt */ -static void powerpc_excp(PowerPCCPU *cpu, int excp) +static inline void powerpc_excp_legacy(PowerPCCPU *cpu, int excp) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -669,23 +700,6 @@ static void powerpc_excp(PowerPCCPU *cpu, int excp) case POWERPC_EXCP_SPEU: /* SPE/embedded floating-point unavailable/VPU */ env->spr[SPR_BOOKE_ESR] = ESR_SPV; break; - case POWERPC_EXCP_EFPDI: /* Embedded floating-point data interrupt */ - /* XXX: TODO */ - cpu_abort(cs, "Embedded floating point data exception " - "is not implemented yet !\n"); - env->spr[SPR_BOOKE_ESR] = ESR_SPV; - break; - case POWERPC_EXCP_EFPRI: /* Embedded floating-point round interrupt */ - /* XXX: TODO */ - cpu_abort(cs, "Embedded floating point round exception " - "is not implemented yet !\n"); - env->spr[SPR_BOOKE_ESR] = ESR_SPV; - break; - case POWERPC_EXCP_EPERFM: /* Embedded performance monitor interrupt */ - /* XXX: TODO */ - cpu_abort(cs, - "Performance counter exception is not implemented yet !\n"); - break; case POWERPC_EXCP_DOORI: /* Embedded doorbell interrupt */ break; case POWERPC_EXCP_DOORCI: /* Embedded doorbell critical interrupt */ @@ -750,19 +764,6 @@ static void powerpc_excp(PowerPCCPU *cpu, int excp) case POWERPC_EXCP_PIT: /* Programmable interval timer interrupt */ trace_ppc_excp_print("PIT"); break; - case POWERPC_EXCP_IO: /* IO error exception */ - /* XXX: TODO */ - cpu_abort(cs, "601 IO error exception is not implemented yet !\n"); - break; - case POWERPC_EXCP_RUNM: /* Run mode exception */ - /* XXX: TODO */ - cpu_abort(cs, "601 run mode exception is not implemented yet !\n"); - break; - case POWERPC_EXCP_EMUL: /* Emulation trap exception */ - /* XXX: TODO */ - cpu_abort(cs, "602 emulation trap exception " - "is not implemented yet !\n"); - break; case POWERPC_EXCP_IFTLB: /* Instruction fetch TLB error */ case POWERPC_EXCP_DLTLB: /* Data load TLB miss */ case POWERPC_EXCP_DSTLB: /* Data store TLB miss */ @@ -777,34 +778,8 @@ static void powerpc_excp(PowerPCCPU *cpu, int excp) } /* fall through */ case POWERPC_EXCP_7x5: -#if defined(DEBUG_SOFTWARE_TLB) - if (qemu_log_enabled()) { - const char *es; - target_ulong *miss, *cmp; - int en; - - if (excp == POWERPC_EXCP_IFTLB) { - es = "I"; - en = 'I'; - miss = &env->spr[SPR_IMISS]; - cmp = &env->spr[SPR_ICMP]; - } else { - if (excp == POWERPC_EXCP_DLTLB) { - es = "DL"; - } else { - es = "DS"; - } - en = 'D'; - miss = &env->spr[SPR_DMISS]; - cmp = &env->spr[SPR_DCMP]; - } - qemu_log("6xx %sTLB miss: %cM " TARGET_FMT_lx " %cC " - TARGET_FMT_lx " H1 " TARGET_FMT_lx " H2 " - TARGET_FMT_lx " %08x\n", es, en, *miss, en, *cmp, - env->spr[SPR_HASH1], env->spr[SPR_HASH2], - env->error_code); - } -#endif + ppc_excp_debug_sw_tlb(env, excp); + msr |= env->crf[0] << 28; msr |= env->error_code; /* key, D/I, S/L bits */ /* Set way using a LRU mechanism */ @@ -815,56 +790,25 @@ static void powerpc_excp(PowerPCCPU *cpu, int excp) break; } break; + case POWERPC_EXCP_EFPDI: /* Embedded floating-point data interrupt */ + case POWERPC_EXCP_EFPRI: /* Embedded floating-point round interrupt */ + case POWERPC_EXCP_EPERFM: /* Embedded performance monitor interrupt */ + case POWERPC_EXCP_IO: /* IO error exception */ + case POWERPC_EXCP_RUNM: /* Run mode exception */ + case POWERPC_EXCP_EMUL: /* Emulation trap exception */ case POWERPC_EXCP_FPA: /* Floating-point assist exception */ - /* XXX: TODO */ - cpu_abort(cs, "Floating point assist exception " - "is not implemented yet !\n"); - break; case POWERPC_EXCP_DABR: /* Data address breakpoint */ - /* XXX: TODO */ - cpu_abort(cs, "DABR exception is not implemented yet !\n"); - break; case POWERPC_EXCP_IABR: /* Instruction address breakpoint */ - /* XXX: TODO */ - cpu_abort(cs, "IABR exception is not implemented yet !\n"); - break; case POWERPC_EXCP_SMI: /* System management interrupt */ - /* XXX: TODO */ - cpu_abort(cs, "SMI exception is not implemented yet !\n"); - break; case POWERPC_EXCP_THERM: /* Thermal interrupt */ - /* XXX: TODO */ - cpu_abort(cs, "Thermal management exception " - "is not implemented yet !\n"); - break; case POWERPC_EXCP_PERFM: /* Embedded performance monitor interrupt */ - /* XXX: TODO */ - cpu_abort(cs, - "Performance counter exception is not implemented yet !\n"); - break; case POWERPC_EXCP_VPUA: /* Vector assist exception */ - /* XXX: TODO */ - cpu_abort(cs, "VPU assist exception is not implemented yet !\n"); - break; case POWERPC_EXCP_SOFTP: /* Soft patch exception */ - /* XXX: TODO */ - cpu_abort(cs, - "970 soft-patch exception is not implemented yet !\n"); - break; case POWERPC_EXCP_MAINT: /* Maintenance exception */ - /* XXX: TODO */ - cpu_abort(cs, - "970 maintenance exception is not implemented yet !\n"); - break; case POWERPC_EXCP_MEXTBR: /* Maskable external breakpoint */ - /* XXX: TODO */ - cpu_abort(cs, "Maskable external exception " - "is not implemented yet !\n"); - break; case POWERPC_EXCP_NMEXTBR: /* Non maskable external breakpoint */ - /* XXX: TODO */ - cpu_abort(cs, "Non maskable external exception " - "is not implemented yet !\n"); + cpu_abort(cs, "%s exception not implemented\n", + powerpc_excp_name(excp)); break; default: excp_invalid: @@ -888,36 +832,9 @@ static void powerpc_excp(PowerPCCPU *cpu, int excp) * Sort out endianness of interrupt, this differs depending on the * CPU, the HV mode, etc... */ -#ifdef TARGET_PPC64 - if (excp_model == POWERPC_EXCP_POWER7) { - if (!(new_msr & MSR_HVB) && (env->spr[SPR_LPCR] & LPCR_ILE)) { - new_msr |= (target_ulong)1 << MSR_LE; - } - } else if (excp_model == POWERPC_EXCP_POWER8) { - if (new_msr & MSR_HVB) { - if (env->spr[SPR_HID0] & HID0_HILE) { - new_msr |= (target_ulong)1 << MSR_LE; - } - } else if (env->spr[SPR_LPCR] & LPCR_ILE) { - new_msr |= (target_ulong)1 << MSR_LE; - } - } else if (excp_model == POWERPC_EXCP_POWER9 || - excp_model == POWERPC_EXCP_POWER10) { - if (new_msr & MSR_HVB) { - if (env->spr[SPR_HID0] & HID0_POWER9_HILE) { - new_msr |= (target_ulong)1 << MSR_LE; - } - } else if (env->spr[SPR_LPCR] & LPCR_ILE) { - new_msr |= (target_ulong)1 << MSR_LE; - } - } else if (msr_ile) { + if (ppc_interrupts_little_endian(cpu, !!(new_msr & MSR_HVB))) { new_msr |= (target_ulong)1 << MSR_LE; } -#else - if (msr_ile) { - new_msr |= (target_ulong)1 << MSR_LE; - } -#endif #if defined(TARGET_PPC64) if (excp_model == POWERPC_EXCP_BOOKE) { @@ -950,6 +867,16 @@ static void powerpc_excp(PowerPCCPU *cpu, int excp) powerpc_set_excp_state(cpu, vector, new_msr); } +static void powerpc_excp(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + + switch (env->excp_model) { + default: + powerpc_excp_legacy(cpu, excp); + } +} + void ppc_cpu_do_interrupt(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -1126,7 +1053,7 @@ void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector) */ msr = (1ULL << MSR_ME); msr |= env->msr & (1ULL << MSR_SF); - if (ppc_interrupts_little_endian(cpu)) { + if (ppc_interrupts_little_endian(cpu, false)) { msr |= (1ULL << MSR_LE); } diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 37e1eaa449..26937deb6d 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -335,10 +335,9 @@ foreach dir : target_dirs test: executable(test, src, dependencies: deps) } endif - # FIXME: missing dependency on the emulator binary and qemu-img test('qtest-@0@/@1@'.format(target_base, test), qtest_executables[test], - depends: [test_deps, qtest_emulator], + depends: [test_deps, qtest_emulator, emulator_modules], env: qtest_env, args: ['--tap', '-k'], protocol: 'tap', diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target index 0f8645f782..ae8004c76e 100644 --- a/tests/tcg/Makefile.target +++ b/tests/tcg/Makefile.target @@ -33,7 +33,7 @@ all: -include ../../../config-host.mak -include ../config-$(TARGET).mak ifeq ($(CONFIG_USER_ONLY),y) --include $(SRC_PATH)/default-configs/targets/$(TARGET).mak +-include $(SRC_PATH)/configs/targets/$(TARGET)/default.mak endif # for including , in command strings diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 90acf5b0da..64a5e7bfde 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -129,7 +129,7 @@ if have_system 'test-vmstate': [migration, io], 'test-yank': ['socket-helpers.c', qom, io, chardev] } - if 'CONFIG_INOTIFY1' in config_host + if config_host_data.get('CONFIG_INOTIFY1') tests += {'test-util-filemonitor': []} endif diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c index 6feeb9a4a9..178048d2f2 100644 --- a/tests/unit/test-aio.c +++ b/tests/unit/test-aio.c @@ -130,7 +130,7 @@ static void *test_acquire_thread(void *opaque) static void set_event_notifier(AioContext *ctx, EventNotifier *notifier, EventNotifierHandler *handler) { - aio_set_event_notifier(ctx, notifier, false, handler, NULL); + aio_set_event_notifier(ctx, notifier, false, handler, NULL, NULL); } static void dummy_notifier_read(EventNotifier *n) @@ -390,7 +390,7 @@ static void test_aio_external_client(void) for (i = 1; i < 3; i++) { EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true }; event_notifier_init(&data.e, false); - aio_set_event_notifier(ctx, &data.e, true, event_ready_cb, NULL); + aio_set_event_notifier(ctx, &data.e, true, event_ready_cb, NULL, NULL); event_notifier_set(&data.e); for (j = 0; j < i; j++) { aio_disable_external(ctx); diff --git a/tests/unit/test-fdmon-epoll.c b/tests/unit/test-fdmon-epoll.c index 11fd8a2fa9..ef5a856d09 100644 --- a/tests/unit/test-fdmon-epoll.c +++ b/tests/unit/test-fdmon-epoll.c @@ -22,14 +22,14 @@ static void add_event_notifiers(EventNotifier *notifiers, size_t n) for (size_t i = 0; i < n; i++) { event_notifier_init(¬ifiers[i], false); aio_set_event_notifier(ctx, ¬ifiers[i], false, - dummy_fd_handler, NULL); + dummy_fd_handler, NULL, NULL); } } static void remove_event_notifiers(EventNotifier *notifiers, size_t n) { for (size_t i = 0; i < n; i++) { - aio_set_event_notifier(ctx, ¬ifiers[i], false, NULL, NULL); + aio_set_event_notifier(ctx, ¬ifiers[i], false, NULL, NULL, NULL); event_notifier_cleanup(¬ifiers[i]); } } diff --git a/util/aio-posix.c b/util/aio-posix.c index 2b86777e91..7b9f629218 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -23,6 +23,15 @@ #include "trace.h" #include "aio-posix.h" +/* + * G_IO_IN and G_IO_OUT are not appropriate revents values for polling, since + * the handler may not need to access the file descriptor. For example, the + * handler doesn't need to read from an EventNotifier if it polled a memory + * location and a read syscall would be slow. Define our own unique revents + * value to indicate that polling determined this AioHandler is ready. + */ +#define REVENTS_POLL_READY 0 + /* Stop userspace polling on a handler if it isn't active for some time */ #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND) @@ -93,6 +102,7 @@ void aio_set_fd_handler(AioContext *ctx, IOHandler *io_read, IOHandler *io_write, AioPollFn *io_poll, + IOHandler *io_poll_ready, void *opaque) { AioHandler *node; @@ -101,6 +111,10 @@ void aio_set_fd_handler(AioContext *ctx, bool deleted = false; int poll_disable_change; + if (io_poll && !io_poll_ready) { + io_poll = NULL; /* polling only makes sense if there is a handler */ + } + qemu_lockcnt_lock(&ctx->list_lock); node = find_aio_handler(ctx, fd); @@ -127,6 +141,7 @@ void aio_set_fd_handler(AioContext *ctx, new_node->io_read = io_read; new_node->io_write = io_write; new_node->io_poll = io_poll; + new_node->io_poll_ready = io_poll_ready; new_node->opaque = opaque; new_node->is_external = is_external; @@ -182,10 +197,12 @@ void aio_set_event_notifier(AioContext *ctx, EventNotifier *notifier, bool is_external, EventNotifierHandler *io_read, - AioPollFn *io_poll) + AioPollFn *io_poll, + EventNotifierHandler *io_poll_ready) { aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external, - (IOHandler *)io_read, NULL, io_poll, notifier); + (IOHandler *)io_read, NULL, io_poll, + (IOHandler *)io_poll_ready, notifier); } void aio_set_event_notifier_poll(AioContext *ctx, @@ -198,7 +215,8 @@ void aio_set_event_notifier_poll(AioContext *ctx, (IOHandler *)io_poll_end); } -static bool poll_set_started(AioContext *ctx, bool started) +static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list, + bool started) { AioHandler *node; bool progress = false; @@ -228,8 +246,9 @@ static bool poll_set_started(AioContext *ctx, bool started) } /* Poll one last time in case ->io_poll_end() raced with the event */ - if (!started) { - progress = node->io_poll(node->opaque) || progress; + if (!started && node->io_poll(node->opaque)) { + aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY); + progress = true; } } qemu_lockcnt_dec(&ctx->list_lock); @@ -240,8 +259,11 @@ static bool poll_set_started(AioContext *ctx, bool started) bool aio_prepare(AioContext *ctx) { + AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list); + /* Poll mode cannot be used with glib's event loop, disable it. */ - poll_set_started(ctx, false); + poll_set_started(ctx, &ready_list, false); + /* TODO what to do with this list? */ return false; } @@ -321,6 +343,18 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) } QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll); } + if (!QLIST_IS_INSERTED(node, node_deleted) && + revents == 0 && + aio_node_check(ctx, node->is_external) && + node->io_poll_ready) { + node->io_poll_ready(node->opaque); + + /* + * Return early since revents was zero. aio_notify() does not count as + * progress. + */ + return node->opaque != &ctx->notifier; + } if (!QLIST_IS_INSERTED(node, node_deleted) && (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) && @@ -387,6 +421,7 @@ void aio_dispatch(AioContext *ctx) } static bool run_poll_handlers_once(AioContext *ctx, + AioHandlerList *ready_list, int64_t now, int64_t *timeout) { @@ -397,6 +432,8 @@ static bool run_poll_handlers_once(AioContext *ctx, QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) { if (aio_node_check(ctx, node->is_external) && node->io_poll(node->opaque)) { + aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY); + node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS; /* @@ -420,7 +457,9 @@ static bool fdmon_supports_polling(AioContext *ctx) return ctx->fdmon_ops->need_wait != aio_poll_disabled; } -static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now) +static bool remove_idle_poll_handlers(AioContext *ctx, + AioHandlerList *ready_list, + int64_t now) { AioHandler *node; AioHandler *tmp; @@ -451,7 +490,11 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now) * Nevermind about re-adding the handler in the rare case where * this causes progress. */ - progress = node->io_poll(node->opaque) || progress; + if (node->io_poll(node->opaque)) { + aio_add_ready_handler(ready_list, node, + REVENTS_POLL_READY); + progress = true; + } } } } @@ -461,6 +504,7 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now) /* run_poll_handlers: * @ctx: the AioContext + * @ready_list: the list to place ready handlers on * @max_ns: maximum time to poll for, in nanoseconds * * Polls for a given time. @@ -469,7 +513,8 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now) * * Returns: true if progress was made, false otherwise */ -static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) +static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list, + int64_t max_ns, int64_t *timeout) { bool progress; int64_t start_time, elapsed_time; @@ -490,13 +535,15 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); do { - progress = run_poll_handlers_once(ctx, start_time, timeout); + progress = run_poll_handlers_once(ctx, ready_list, + start_time, timeout); elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time; max_ns = qemu_soonest_timeout(*timeout, max_ns); assert(!(max_ns && progress)); } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx)); - if (remove_idle_poll_handlers(ctx, start_time + elapsed_time)) { + if (remove_idle_poll_handlers(ctx, ready_list, + start_time + elapsed_time)) { *timeout = 0; progress = true; } @@ -514,6 +561,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) /* try_poll_mode: * @ctx: the AioContext + * @ready_list: list to add handlers that need to be run * @timeout: timeout for blocking wait, computed by the caller and updated if * polling succeeds. * @@ -521,7 +569,8 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) * * Returns: true if progress was made, false otherwise */ -static bool try_poll_mode(AioContext *ctx, int64_t *timeout) +static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list, + int64_t *timeout) { int64_t max_ns; @@ -531,14 +580,14 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout) max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns); if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) { - poll_set_started(ctx, true); + poll_set_started(ctx, ready_list, true); - if (run_poll_handlers(ctx, max_ns, timeout)) { + if (run_poll_handlers(ctx, ready_list, max_ns, timeout)) { return true; } } - if (poll_set_started(ctx, false)) { + if (poll_set_started(ctx, ready_list, false)) { *timeout = 0; return true; } @@ -549,7 +598,6 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout) bool aio_poll(AioContext *ctx, bool blocking) { AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list); - int ret = 0; bool progress; bool use_notify_me; int64_t timeout; @@ -574,7 +622,7 @@ bool aio_poll(AioContext *ctx, bool blocking) } timeout = blocking ? aio_compute_timeout(ctx) : 0; - progress = try_poll_mode(ctx, &timeout); + progress = try_poll_mode(ctx, &ready_list, &timeout); assert(!(timeout && progress)); /* @@ -604,7 +652,7 @@ bool aio_poll(AioContext *ctx, bool blocking) * system call---a single round of run_poll_handlers_once suffices. */ if (timeout || ctx->fdmon_ops->need_wait(ctx)) { - ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout); + ctx->fdmon_ops->wait(ctx, &ready_list, timeout); } if (use_notify_me) { @@ -657,10 +705,7 @@ bool aio_poll(AioContext *ctx, bool blocking) } progress |= aio_bh_poll(ctx); - - if (ret > 0) { - progress |= aio_dispatch_ready_handlers(ctx, &ready_list); - } + progress |= aio_dispatch_ready_handlers(ctx, &ready_list); aio_free_deleted_handlers(ctx); diff --git a/util/aio-posix.h b/util/aio-posix.h index c80c04506a..7f2c37a684 100644 --- a/util/aio-posix.h +++ b/util/aio-posix.h @@ -24,6 +24,7 @@ struct AioHandler { IOHandler *io_read; IOHandler *io_write; AioPollFn *io_poll; + IOHandler *io_poll_ready; IOHandler *io_poll_begin; IOHandler *io_poll_end; void *opaque; diff --git a/util/aio-win32.c b/util/aio-win32.c index d5b09a1193..7aac89df3a 100644 --- a/util/aio-win32.c +++ b/util/aio-win32.c @@ -68,6 +68,7 @@ void aio_set_fd_handler(AioContext *ctx, IOHandler *io_read, IOHandler *io_write, AioPollFn *io_poll, + IOHandler *io_poll_ready, void *opaque) { /* fd is a SOCKET in our case */ @@ -136,7 +137,8 @@ void aio_set_event_notifier(AioContext *ctx, EventNotifier *e, bool is_external, EventNotifierHandler *io_notify, - AioPollFn *io_poll) + AioPollFn *io_poll, + EventNotifierHandler *io_poll_ready) { AioHandler *node; diff --git a/util/async.c b/util/async.c index 6f6717a34b..08d25feef5 100644 --- a/util/async.c +++ b/util/async.c @@ -362,7 +362,7 @@ aio_ctx_finalize(GSource *source) g_free(bh); } - aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); + aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL, NULL); event_notifier_cleanup(&ctx->notifier); qemu_rec_mutex_destroy(&ctx->lock); qemu_lockcnt_destroy(&ctx->list_lock); @@ -485,6 +485,11 @@ static bool aio_context_notifier_poll(void *opaque) return qatomic_read(&ctx->notified); } +static void aio_context_notifier_poll_ready(EventNotifier *e) +{ + /* Do nothing, we just wanted to kick the event loop */ +} + static void co_schedule_bh_cb(void *opaque) { AioContext *ctx = opaque; @@ -536,7 +541,8 @@ AioContext *aio_context_new(Error **errp) aio_set_event_notifier(ctx, &ctx->notifier, false, aio_context_notifier_cb, - aio_context_notifier_poll); + aio_context_notifier_poll, + aio_context_notifier_poll_ready); #ifdef CONFIG_LINUX_AIO ctx->linux_aio = NULL; #endif diff --git a/util/main-loop.c b/util/main-loop.c index 06b18b195c..4d5a5b9943 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -582,7 +582,7 @@ void qemu_set_fd_handler(int fd, { iohandler_init(); aio_set_fd_handler(iohandler_ctx, fd, false, - fd_read, fd_write, NULL, opaque); + fd_read, fd_write, NULL, NULL, opaque); } void event_notifier_set_handler(EventNotifier *e, @@ -590,5 +590,5 @@ void event_notifier_set_handler(EventNotifier *e, { iohandler_init(); aio_set_event_notifier(iohandler_ctx, e, false, - handler, NULL); + handler, NULL, NULL); } diff --git a/util/meson.build b/util/meson.build index e676b2f6c6..c9a9cc1cf5 100644 --- a/util/meson.build +++ b/util/meson.build @@ -84,7 +84,10 @@ if have_block util_ss.add(files('readline.c')) util_ss.add(files('throttle.c')) util_ss.add(files('timed-average.c')) - util_ss.add(when: 'CONFIG_INOTIFY1', if_true: files('filemonitor-inotify.c'), - if_false: files('filemonitor-stub.c')) + if config_host_data.get('CONFIG_INOTIFY1') + util_ss.add(files('filemonitor-inotify.c')) + else + util_ss.add(files('filemonitor-stub.c')) + endif util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c')) endif diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c index 5b80bb416f..7f5839cb76 100644 --- a/util/qemu-coroutine-io.c +++ b/util/qemu-coroutine-io.c @@ -75,7 +75,8 @@ typedef struct { static void fd_coroutine_enter(void *opaque) { FDYieldUntilData *data = opaque; - aio_set_fd_handler(data->ctx, data->fd, false, NULL, NULL, NULL, NULL); + aio_set_fd_handler(data->ctx, data->fd, false, + NULL, NULL, NULL, NULL, NULL); qemu_coroutine_enter(data->co); } @@ -88,6 +89,6 @@ void coroutine_fn yield_until_fd_readable(int fd) data.co = qemu_coroutine_self(); data.fd = fd; aio_set_fd_handler( - data.ctx, fd, false, fd_coroutine_enter, NULL, NULL, &data); + data.ctx, fd, false, fd_coroutine_enter, NULL, NULL, NULL, &data); qemu_coroutine_yield(); } diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c index 783d847a6d..f68287e811 100644 --- a/util/vhost-user-server.c +++ b/util/vhost-user-server.c @@ -250,7 +250,7 @@ set_watch(VuDev *vu_dev, int fd, int vu_evt, vu_fd_watch->cb = cb; qemu_set_nonblock(fd); aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler, - NULL, NULL, vu_fd_watch); + NULL, NULL, NULL, vu_fd_watch); vu_fd_watch->vu_dev = vu_dev; vu_fd_watch->pvt = pvt; } @@ -270,7 +270,8 @@ static void remove_watch(VuDev *vu_dev, int fd) if (!vu_fd_watch) { return; } - aio_set_fd_handler(server->ioc->ctx, fd, true, NULL, NULL, NULL, NULL); + aio_set_fd_handler(server->ioc->ctx, fd, true, + NULL, NULL, NULL, NULL, NULL); QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next); g_free(vu_fd_watch); @@ -334,7 +335,7 @@ void vhost_user_server_stop(VuServer *server) QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true, - NULL, NULL, NULL, vu_fd_watch); + NULL, NULL, NULL, NULL, vu_fd_watch); } qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); @@ -377,7 +378,7 @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx) QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL, - NULL, vu_fd_watch); + NULL, NULL, vu_fd_watch); } aio_co_schedule(ctx, server->co_trip); @@ -391,7 +392,7 @@ void vhost_user_server_detach_aio_context(VuServer *server) QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) { aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true, - NULL, NULL, NULL, vu_fd_watch); + NULL, NULL, NULL, NULL, vu_fd_watch); } qio_channel_detach_aio_context(server->ioc); |