From de55d3b3815430c27b2e8fe36f85d8e3f2026c95 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Thu, 23 Jul 2020 23:58:40 -0500 Subject: Update PowerPC AT_HWCAP2 definition Add PPC2_FEATURE2_ARCH_3_10 to the PowerPC AT_HWCAP2 definitions. Signed-off-by: Lijun Pan Message-Id: <20200724045845.89976-2-ljp@linux.ibm.com> Signed-off-by: David Gibson Reviewed-by: Richard Henderson --- include/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/elf.h b/include/elf.h index 5b06b55f28..c117a4d1ab 100644 --- a/include/elf.h +++ b/include/elf.h @@ -558,6 +558,7 @@ typedef struct { #define PPC_FEATURE2_HTM_NOSC 0x01000000 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_ARCH_3_10 0x00040000 /* Bits present in AT_HWCAP for Sparc. */ -- cgit v1.2.3-55-g7522 From cf36e5b3760df0a1fdd38970294cf7b0968fcc5c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 7 Aug 2020 13:32:06 +0200 Subject: ppc/xive: Rework setup of XiveSource::esb_mmio Depending on whether XIVE is emultated or backed with a KVM XIVE device, the ESB MMIOs of a XIVE source point to an I/O memory region or a mapped memory region. This is currently handled by checking kvm_irqchip_in_kernel() returns false in xive_source_realize(). This is a bit awkward as we usually need to do extra things when we're using the in-kernel backend, not less. But most important, we can do better: turn the existing "xive.esb" memory region into a plain container, introduce an "xive.esb-emulated" I/O subregion and rename the existing "xive.esb" subregion in the KVM code to "xive.esb-kvm". Since "xive.esb-kvm" is added with overlap and a higher priority, it prevails over "xive.esb-emulated" (ie. a guest using KVM XIVE will interact with "xive.esb-kvm" instead of the default "xive.esb-emulated" region. While here, consolidate the computation of the MMIO region size in a common helper. Suggested-by: Cédric Le Goater Signed-off-by: Greg Kurz Message-Id: <159679992680.876294.7520540158586170894.stgit@bahia.lan> Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/spapr_xive_kvm.c | 4 ++-- hw/intc/xive.c | 11 ++++++----- include/hw/ppc/xive.h | 6 ++++++ 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 893a1ee77e..6130882be6 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -742,7 +742,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, SpaprXive *xive = SPAPR_XIVE(intc); XiveSource *xsrc = &xive->source; Error *local_err = NULL; - size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + size_t esb_len = xive_source_esb_len(xsrc); size_t tima_len = 4ull << TM_SHIFT; CPUState *cs; int fd; @@ -788,7 +788,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, } memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), - "xive.esb", esb_len, xsrc->esb_mmap); + "xive.esb-kvm", esb_len, xsrc->esb_mmap); memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_kvm, 1); diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 9b55e0356c..561d746cd1 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -1128,6 +1128,7 @@ static void xive_source_reset(void *dev) static void xive_source_realize(DeviceState *dev, Error **errp) { XiveSource *xsrc = XIVE_SOURCE(dev); + size_t esb_len = xive_source_esb_len(xsrc); assert(xsrc->xive); @@ -1147,11 +1148,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp) xsrc->status = g_malloc0(xsrc->nr_irqs); xsrc->lsi_map = bitmap_new(xsrc->nr_irqs); - if (!kvm_irqchip_in_kernel()) { - memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), - &xive_source_esb_ops, xsrc, "xive.esb", - (1ull << xsrc->esb_shift) * xsrc->nr_irqs); - } + memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len); + memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc), + &xive_source_esb_ops, xsrc, "xive.esb-emulated", + esb_len); + memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated); qemu_register_reset(xive_source_reset, dev); } diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 705cf48176..82a61eaca7 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -191,6 +191,7 @@ typedef struct XiveSource { uint64_t esb_flags; uint32_t esb_shift; MemoryRegion esb_mmio; + MemoryRegion esb_mmio_emulated; /* KVM support */ void *esb_mmap; @@ -215,6 +216,11 @@ static inline bool xive_source_esb_has_2page(XiveSource *xsrc) xsrc->esb_shift == XIVE_ESB_4K_2PAGE; } +static inline size_t xive_source_esb_len(XiveSource *xsrc) +{ + return (1ull << xsrc->esb_shift) * xsrc->nr_irqs; +} + /* The trigger page is always the first/even page */ static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno) { -- cgit v1.2.3-55-g7522 From e519cdd9bc02990bddd395656bdaec821c94c8fe Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 7 Aug 2020 13:32:14 +0200 Subject: ppc/xive: Introduce dedicated kvm_irqchip_in_kernel() wrappers Calls to the KVM XIVE device are guarded by kvm_irqchip_in_kernel(). This ensures that QEMU won't try to use the device if KVM is disabled or if an in-kernel irqchip isn't required. When using ic-mode=dual with the pseries machine, we have two possible interrupt controllers: XIVE and XICS. The kvm_irqchip_in_kernel() helper will return true as soon as any of the KVM device is created. It might lure QEMU to think that the other one is also around, while it is not. This is exactly what happens with ic-mode=dual at machine init when claiming IRQ numbers, which must be done on all possible IRQ backends, eg. RTAS event sources or the PHB0 LSI table : only the KVM XICS device is active but we end up calling kvmppc_xive_source_reset_one() anyway, which fails. This doesn't cause any trouble because of another bug : kvmppc_xive_source_reset_one() lacks an error_setg() and callers don't see the failure. Most of the other kvmppc_xive_* functions have similar xive->fd checks to filter out the case when KVM XIVE isn't active. It might look safer to have idempotent functions but it doesn't really help to understand what's going on when debugging. Since we already have all the kvm_irqchip_in_kernel() in place, also have the callers to check xive->fd as well before calling KVM XIVE specific code. This is straight-forward for the spapr specific XIVE code. Some more care is needed for the platform agnostic XIVE code since it cannot access xive->fd directly. Introduce new in_kernel() methods in some base XIVE classes for this purpose and implement them only in spapr. In all cases, we still need to call kvm_irqchip_in_kernel() so that compilers can optimize the kvmppc_xive_* calls away when CONFIG_KVM isn't defined, thus avoiding the need for stubs. Signed-off-by: Greg Kurz Message-Id: <159679993438.876294.7285654331498605426.stgit@bahia.lan> Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/spapr_xive.c | 45 +++++++++++++++++++++++++++++++-------------- hw/intc/xive.c | 25 +++++++++++++++++++------ include/hw/ppc/xive.h | 1 + 3 files changed, 51 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 89c8cd9667..3c84f64dc4 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -148,12 +148,19 @@ static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end, xive_end_queue_pic_print_info(end, 6, mon); } +/* + * kvm_irqchip_in_kernel() will cause the compiler to turn this + * info a nop if CONFIG_KVM isn't defined. + */ +#define spapr_xive_in_kernel(xive) \ + (kvm_irqchip_in_kernel() && (xive)->fd != -1) + void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) { XiveSource *xsrc = &xive->source; int i; - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_synchronize_state(xive, &local_err); @@ -507,8 +514,10 @@ static const VMStateDescription vmstate_spapr_xive_eas = { static int vmstate_spapr_xive_pre_save(void *opaque) { - if (kvm_irqchip_in_kernel()) { - return kvmppc_xive_pre_save(SPAPR_XIVE(opaque)); + SpaprXive *xive = SPAPR_XIVE(opaque); + + if (spapr_xive_in_kernel(xive)) { + return kvmppc_xive_pre_save(xive); } return 0; @@ -520,8 +529,10 @@ static int vmstate_spapr_xive_pre_save(void *opaque) */ static int spapr_xive_post_load(SpaprInterruptController *intc, int version_id) { - if (kvm_irqchip_in_kernel()) { - return kvmppc_xive_post_load(SPAPR_XIVE(intc), version_id); + SpaprXive *xive = SPAPR_XIVE(intc); + + if (spapr_xive_in_kernel(xive)) { + return kvmppc_xive_post_load(xive, version_id); } return 0; @@ -564,7 +575,7 @@ static int spapr_xive_claim_irq(SpaprInterruptController *intc, int lisn, xive_source_irq_set_lsi(xsrc, lisn); } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { return kvmppc_xive_source_reset_one(xsrc, lisn, errp); } @@ -641,7 +652,7 @@ static void spapr_xive_set_irq(SpaprInterruptController *intc, int irq, int val) { SpaprXive *xive = SPAPR_XIVE(intc); - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { kvmppc_xive_source_set_irq(&xive->source, irq, val); } else { xive_source_set_irq(&xive->source, irq, val); @@ -749,11 +760,16 @@ static void spapr_xive_deactivate(SpaprInterruptController *intc) spapr_xive_mmio_set_enabled(xive, false); - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { kvmppc_xive_disconnect(intc); } } +static bool spapr_xive_in_kernel_xptr(const XivePresenter *xptr) +{ + return spapr_xive_in_kernel(SPAPR_XIVE(xptr)); +} + static void spapr_xive_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -788,6 +804,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data) sicc->post_load = spapr_xive_post_load; xpc->match_nvt = spapr_xive_match_nvt; + xpc->in_kernel = spapr_xive_in_kernel_xptr; } static const TypeInfo spapr_xive_info = { @@ -1058,7 +1075,7 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu, new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn); } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err); @@ -1379,7 +1396,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, */ out: - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err); @@ -1480,7 +1497,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, args[2] = 0; } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err); @@ -1642,7 +1659,7 @@ static target_ulong h_int_esb(PowerPCCPU *cpu, return H_P3; } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data, flags & SPAPR_XIVE_ESB_STORE); } else { @@ -1717,7 +1734,7 @@ static target_ulong h_int_sync(PowerPCCPU *cpu, * under KVM */ - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_sync_source(xive, lisn, &local_err); @@ -1761,7 +1778,7 @@ static target_ulong h_int_reset(PowerPCCPU *cpu, device_legacy_reset(DEVICE(xive)); - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_reset(xive, &local_err); diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 561d746cd1..a453e8f4dc 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -592,6 +592,17 @@ static const char * const xive_tctx_ring_names[] = { "USER", "OS", "POOL", "PHYS", }; +/* + * kvm_irqchip_in_kernel() will cause the compiler to turn this + * info a nop if CONFIG_KVM isn't defined. + */ +#define xive_in_kernel(xptr) \ + (kvm_irqchip_in_kernel() && \ + ({ \ + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); \ + xpc->in_kernel ? xpc->in_kernel(xptr) : false; \ + })) + void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) { int cpu_index; @@ -606,7 +617,7 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) cpu_index = tctx->cs ? tctx->cs->cpu_index : -1; - if (kvm_irqchip_in_kernel()) { + if (xive_in_kernel(tctx->xptr)) { Error *local_err = NULL; kvmppc_xive_cpu_synchronize_state(tctx, &local_err); @@ -671,7 +682,7 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) } /* Connect the presenter to the VCPU (required for CPU hotplug) */ - if (kvm_irqchip_in_kernel()) { + if (xive_in_kernel(tctx->xptr)) { kvmppc_xive_cpu_connect(tctx, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -682,10 +693,11 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) static int vmstate_xive_tctx_pre_save(void *opaque) { + XiveTCTX *tctx = XIVE_TCTX(opaque); Error *local_err = NULL; - if (kvm_irqchip_in_kernel()) { - kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err); + if (xive_in_kernel(tctx->xptr)) { + kvmppc_xive_cpu_get_state(tctx, &local_err); if (local_err) { error_report_err(local_err); return -1; @@ -697,14 +709,15 @@ static int vmstate_xive_tctx_pre_save(void *opaque) static int vmstate_xive_tctx_post_load(void *opaque, int version_id) { + XiveTCTX *tctx = XIVE_TCTX(opaque); Error *local_err = NULL; - if (kvm_irqchip_in_kernel()) { + if (xive_in_kernel(tctx->xptr)) { /* * Required for hotplugged CPU, for which the state comes * after all states of the machine. */ - kvmppc_xive_cpu_set_state(XIVE_TCTX(opaque), &local_err); + kvmppc_xive_cpu_set_state(tctx, &local_err); if (local_err) { error_report_err(local_err); return -1; diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 82a61eaca7..2f3c5af810 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -402,6 +402,7 @@ typedef struct XivePresenterClass { uint8_t nvt_blk, uint32_t nvt_idx, bool cam_ignore, uint8_t priority, uint32_t logic_serv, XiveTCTXMatch *match); + bool (*in_kernel)(const XivePresenter *xptr); } XivePresenterClass; int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, -- cgit v1.2.3-55-g7522 From 3885ca66881f1d2568e169dcbf793fd493146d14 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 10 Aug 2020 18:54:05 +0200 Subject: spapr/xive: Rework error handling of kvmppc_xive_cpu_connect() Use error_setg_errno() instead of error_setg(strerror()). While here, use -ret instead of errno since kvm_vcpu_enable_cap() returns a negative errno on failure. Use ERRP_GUARD() to ensure that errp can be passed to error_append_hint(), and get rid of the local_err boilerplate. Propagate the return value so that callers may use it as well to check failures. Signed-off-by: Greg Kurz Message-Id: <159707844549.1489912.4862921680328017645.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/intc/spapr_xive_kvm.c | 21 ++++++++++----------- include/hw/ppc/xive.h | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 82a6f99f02..aa1a2f9153 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -144,8 +144,9 @@ void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) } } -void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) { + ERRP_GUARD(); SpaprXive *xive = SPAPR_XIVE(tctx->xptr); unsigned long vcpu_id; int ret; @@ -154,7 +155,7 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) /* Check if CPU was hot unplugged and replugged. */ if (kvm_cpu_is_enabled(tctx->cs)) { - return; + return 0; } vcpu_id = kvm_arch_vcpu_id(tctx->cs); @@ -162,20 +163,18 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, vcpu_id, 0); if (ret < 0) { - Error *local_err = NULL; - - error_setg(&local_err, - "XIVE: unable to connect CPU%ld to KVM device: %s", - vcpu_id, strerror(errno)); - if (errno == ENOSPC) { - error_append_hint(&local_err, "Try -smp maxcpus=N with N < %u\n", + error_setg_errno(errp, -ret, + "XIVE: unable to connect CPU%ld to KVM device", + vcpu_id); + if (ret == -ENOSPC) { + error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n", MACHINE(qdev_get_machine())->smp.max_cpus); } - error_propagate(errp, local_err); - return; + return ret; } kvm_cpu_enable(tctx->cs); + return 0; } /* diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 2f3c5af810..2d87ed4372 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -487,7 +487,7 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb); int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp); void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); -void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); -- cgit v1.2.3-55-g7522 From 5fa36b7ffbcb2056249929a7b1ee4e30c07dc67c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 10 Aug 2020 18:54:26 +0200 Subject: spapr/xive: Rework error handling of kvmppc_xive_cpu_[gs]et_state() kvm_set_one_reg() returns a negative errno on failure, use that instead of errno. Also propagate it to callers so they can use it to check for failures and hopefully get rid of their local_err boilerplate. Signed-off-by: Greg Kurz Message-Id: <159707846665.1489912.14267225652103441921.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/intc/spapr_xive_kvm.c | 15 ++++++++++----- include/hw/ppc/xive.h | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index b2a36fd59d..5e088ccbf8 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -73,7 +73,7 @@ static void kvm_cpu_disable_all(void) * XIVE Thread Interrupt Management context (KVM) */ -void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) { SpaprXive *xive = SPAPR_XIVE(tctx->xptr); uint64_t state[2]; @@ -86,13 +86,16 @@ void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); if (ret != 0) { - error_setg_errno(errp, errno, + error_setg_errno(errp, -ret, "XIVE: could not restore KVM state of CPU %ld", kvm_arch_vcpu_id(tctx->cs)); + return ret; } + + return 0; } -void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) { SpaprXive *xive = SPAPR_XIVE(tctx->xptr); uint64_t state[2] = { 0 }; @@ -102,14 +105,16 @@ void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); if (ret != 0) { - error_setg_errno(errp, errno, + error_setg_errno(errp, -ret, "XIVE: could not capture KVM state of CPU %ld", kvm_arch_vcpu_id(tctx->cs)); - return; + return ret; } /* word0 and word1 of the OS ring. */ *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; + + return 0; } typedef struct { diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 2d87ed4372..785c905357 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -489,7 +489,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp); void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); -void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); -void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); #endif /* PPC_XIVE_H */ -- cgit v1.2.3-55-g7522 From f9a548edf2a54f59c37032dee3763f532e968fee Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 10 Aug 2020 18:54:33 +0200 Subject: spapr/xive: Rework error handling of kvmppc_xive_[gs]et_queue_config() Since kvm_device_access() returns a negative errno on failure, convert kvmppc_xive_get_queue_config() and kvmppc_xive_set_queue_config() to use it for error checking. This allows to get rid of the local_err boilerplate. Propagate the return value so that callers may use it as well to check failures. Signed-off-by: Greg Kurz Message-Id: <159707847357.1489912.2032291280645236480.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/intc/spapr_xive_kvm.c | 35 ++++++++++++++++------------------- include/hw/ppc/spapr_xive.h | 4 ++-- 2 files changed, 18 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 5e088ccbf8..696623f717 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -374,15 +374,15 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) /* * sPAPR XIVE interrupt controller (KVM) */ -void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, - uint32_t end_idx, XiveEND *end, - Error **errp) +int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) { struct kvm_ppc_xive_eq kvm_eq = { 0 }; uint64_t kvm_eq_idx; uint8_t priority; uint32_t server; - Error *local_err = NULL; + int ret; assert(xive_end_is_valid(end)); @@ -394,11 +394,10 @@ void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & KVM_XIVE_EQ_SERVER_MASK; - kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, - &kvm_eq, false, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; + ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, false, errp); + if (ret < 0) { + return ret; } /* @@ -408,17 +407,18 @@ void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, */ end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); + + return 0; } -void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, - uint32_t end_idx, XiveEND *end, - Error **errp) +int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) { struct kvm_ppc_xive_eq kvm_eq = { 0 }; uint64_t kvm_eq_idx; uint8_t priority; uint32_t server; - Error *local_err = NULL; /* * Build the KVM state from the local END structure. @@ -456,12 +456,9 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & KVM_XIVE_EQ_SERVER_MASK; - kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, - &kvm_eq, true, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + return + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, true, errp); } void kvmppc_xive_reset(SpaprXive *xive, Error **errp) diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h index 93d09d68de..d0a08b618f 100644 --- a/include/hw/ppc/spapr_xive.h +++ b/include/hw/ppc/spapr_xive.h @@ -85,10 +85,10 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp); uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, uint64_t data, bool write); -void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, +int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, uint32_t end_idx, XiveEND *end, Error **errp); -void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, +int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, uint32_t end_idx, XiveEND *end, Error **errp); void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp); -- cgit v1.2.3-55-g7522 From d55daadcb801f309556fdbab00b2653d20e26603 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 10 Aug 2020 18:54:47 +0200 Subject: spapr/xive: Rework error handling of kvmppc_xive_set_source_config() Since kvm_device_access() returns a negative errno on failure, convert kvmppc_xive_set_source_config() to use it for error checking. This allows to get rid of the local_err boilerplate. Propagate the return value so that callers may use it as well to check failures. Signed-off-by: Greg Kurz Message-Id: <159707848764.1489912.17078842252160674523.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/intc/spapr_xive_kvm.c | 13 ++++--------- include/hw/ppc/spapr_xive.h | 4 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 4142aaffff..f2dda69218 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -186,8 +186,8 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) * XIVE Interrupt Source (KVM) */ -void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, - Error **errp) +int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp) { uint32_t end_idx; uint32_t end_blk; @@ -196,7 +196,6 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, bool masked; uint32_t eisn; uint64_t kvm_src; - Error *local_err = NULL; assert(xive_eas_is_valid(eas)); @@ -216,12 +215,8 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & KVM_XIVE_SOURCE_EISN_MASK; - kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, - &kvm_src, true, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, + &kvm_src, true, errp); } void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h index d0a08b618f..0ffbe0be02 100644 --- a/include/hw/ppc/spapr_xive.h +++ b/include/hw/ppc/spapr_xive.h @@ -80,8 +80,8 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, Error **errp); void kvmppc_xive_disconnect(SpaprInterruptController *intc); void kvmppc_xive_reset(SpaprXive *xive, Error **errp); -void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, - Error **errp); +int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp); void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp); uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, uint64_t data, bool write); -- cgit v1.2.3-55-g7522 From 1118b6b72719ff83f2be1efc11d7248cf225074c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 10 Aug 2020 18:55:29 +0200 Subject: spapr/xive: Simplify error handling of kvmppc_xive_cpu_synchronize_state() Now that kvmppc_xive_cpu_get_state() returns negative on error, use that and get rid of the temporary Error object and error_propagate(). Signed-off-by: Greg Kurz Message-Id: <159707852916.1489912.8376334685349668124.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/intc/spapr_xive_kvm.c | 14 ++++++-------- include/hw/ppc/xive.h | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index e9a36115be..d871bb1a00 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -119,7 +119,8 @@ int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) typedef struct { XiveTCTX *tctx; - Error *err; + Error **errp; + int ret; } XiveCpuGetState; static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, @@ -127,14 +128,14 @@ static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, { XiveCpuGetState *s = arg.host_ptr; - kvmppc_xive_cpu_get_state(s->tctx, &s->err); + s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp); } -void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) { XiveCpuGetState s = { .tctx = tctx, - .err = NULL, + .errp = errp, }; /* @@ -143,10 +144,7 @@ void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, RUN_ON_CPU_HOST_PTR(&s)); - if (s.err) { - error_propagate(errp, s.err); - return; - } + return s.ret; } int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 785c905357..2c42ae92d2 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -488,7 +488,7 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb); int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp); void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); -void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); -- cgit v1.2.3-55-g7522 From 37035df51eaabb8d26b71da75b88a1c6727de8fa Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 14 Aug 2020 01:12:19 +0200 Subject: nvram: Exit QEMU if NVRAM cannot contain all -prom-env data Since commit 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter"), pseries machines can pre-initialize the "system" partition in the NVRAM with the data passed to all -prom-env parameters on the QEMU command line. In this case it is assumed that all the data fits in 64 KiB, but the user can easily pass more and crash QEMU: $ qemu-system-ppc64 -M pseries $(for ((x=0;x<128;x++)); do \ echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ done) # this requires ~128 Kib malloc(): corrupted top size Aborted (core dumped) This happens because we don't check if all the prom-env data fits in the NVRAM and chrp_nvram_set_var() happily memcpy() it passed the buffer. This crash affects basically all ppc/ppc64 machine types that use -prom-env: - pseries (all versions) - g3beige - mac99 and also sparc/sparc64 machine types: - LX - SPARCClassic - SPARCbook - SS-10 - SS-20 - SS-4 - SS-5 - SS-600MP - Voyager - sun4u - sun4v Add a max_len argument to chrp_nvram_create_system_partition() so that it can check the available size before writing to memory. Since NVRAM is populated at machine init, it seems reasonable to consider this error as fatal. So, instead of reporting an error when we detect that the NVRAM is too small and adapt all machine types to handle it, we simply exit QEMU in all cases. This is still better than crashing. If someone wants another behavior, I guess this can be reworked later. Tested with: $ yes q | \ (for arch in ppc ppc64 sparc sparc64; do \ echo == $arch ==; \ qemu=${arch}-softmmu/qemu-system-$arch; \ for mach in $($qemu -M help | awk '! /^Supported/ { print $1 }'); do \ echo $mach; \ $qemu -M $mach -monitor stdio -nodefaults -nographic \ $(for ((x=0;x<128;x++)); do \ echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ done) >/dev/null; \ done; echo; \ done) Without the patch, affected machine types cause QEMU to report some memory corruption and crash: malloc(): corrupted top size free(): invalid size *** stack smashing detected ***: terminated With the patch, QEMU prints the following message and exits: NVRAM is too small. Try to pass less data to -prom-env It seems that the conditions for the crash have always existed, but it affects pseries, the machine type I care for, since commit 61f20b9dc5b7 only. Fixes: 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter") RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1867739 Reported-by: John Snow Reviewed-by: Laurent Vivier Signed-off-by: Greg Kurz Message-Id: <159736033937.350502.12402444542194031035.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/nvram/chrp_nvram.c | 24 +++++++++++++++++++++--- hw/nvram/mac_nvram.c | 2 +- hw/nvram/spapr_nvram.c | 3 ++- hw/sparc/sun4m.c | 2 +- hw/sparc64/sun4u.c | 2 +- include/hw/nvram/chrp_nvram.h | 3 ++- 6 files changed, 28 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c index d969f26704..d4d10a7c03 100644 --- a/hw/nvram/chrp_nvram.c +++ b/hw/nvram/chrp_nvram.c @@ -21,14 +21,21 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/error-report.h" #include "hw/nvram/chrp_nvram.h" #include "sysemu/sysemu.h" -static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) +static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str, + int max_len) { int len; len = strlen(str) + 1; + + if (max_len < len) { + return -1; + } + memcpy(&nvram[addr], str, len); return addr + len; @@ -38,19 +45,26 @@ static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) * Create a "system partition", used for the Open Firmware * environment variables. */ -int chrp_nvram_create_system_partition(uint8_t *data, int min_len) +int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len) { ChrpNvramPartHdr *part_header; unsigned int i; int end; + if (max_len < sizeof(*part_header)) { + goto fail; + } + part_header = (ChrpNvramPartHdr *)data; part_header->signature = CHRP_NVPART_SYSTEM; pstrcpy(part_header->name, sizeof(part_header->name), "system"); end = sizeof(ChrpNvramPartHdr); for (i = 0; i < nb_prom_envs; i++) { - end = chrp_nvram_set_var(data, end, prom_envs[i]); + end = chrp_nvram_set_var(data, end, prom_envs[i], max_len - end); + if (end == -1) { + goto fail; + } } /* End marker */ @@ -65,6 +79,10 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) chrp_nvram_finish_partition(part_header, end); return end; + +fail: + error_report("NVRAM is too small. Try to pass less data to -prom-env"); + exit(EXIT_FAILURE); } /** diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c index beec1c4e4d..11f2d31cdb 100644 --- a/hw/nvram/mac_nvram.c +++ b/hw/nvram/mac_nvram.c @@ -141,7 +141,7 @@ static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, /* OpenBIOS nvram variables partition */ sysp_end = chrp_nvram_create_system_partition(&nvr->data[off], - DEF_SYSTEM_SIZE) + off; + DEF_SYSTEM_SIZE, len) + off; /* Free space partition */ chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end); diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 15d08281d4..386513499f 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -188,7 +188,8 @@ static void spapr_nvram_realize(SpaprVioDevice *dev, Error **errp) } } else if (nb_prom_envs > 0) { /* Create a system partition to pass the -prom-env variables */ - chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4); + chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4, + nvram->size); chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4], nvram->size - MIN_NVRAM_SIZE / 4); } diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 9be930415f..cf7dfa4af5 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -143,7 +143,7 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, memset(image, '\0', sizeof(image)); /* OpenBIOS nvram variables partition */ - sysp_end = chrp_nvram_create_system_partition(image, 0); + sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); /* Free space partition */ chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 9e30203dcc..37310b73e6 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -136,7 +136,7 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size, memset(image, '\0', sizeof(image)); /* OpenBIOS nvram variables partition */ - sysp_end = chrp_nvram_create_system_partition(image, 0); + sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); /* Free space partition */ chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h index 09941a9be4..4a0f5c21b8 100644 --- a/include/hw/nvram/chrp_nvram.h +++ b/include/hw/nvram/chrp_nvram.h @@ -50,7 +50,8 @@ chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size) header->checksum = sum & 0xff; } -int chrp_nvram_create_system_partition(uint8_t *data, int min_len); +/* chrp_nvram_create_system_partition() failure is fatal */ +int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len); int chrp_nvram_create_free_partition(uint8_t *data, int len); #endif -- cgit v1.2.3-55-g7522