From 7381c5d11fe9a03ad3bf2e5700e96acc5cafe218 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 22 Jun 2021 17:03:36 +1000 Subject: spapr: tune rtas-size QEMU reserves space for RTAS via /rtas/rtas-size which tells the client how much space the RTAS requires to work which includes the RTAS binary blob implementing RTAS runtime. Because pseries supports FWNMI which requires plenty of space, QEMU reserves more than 2KB which is enough for the RTAS blob as it is just 20 bytes (under QEMU). Since FWNMI reset delivery was added, RTAS_SIZE macro is not used anymore. This replaces RTAS_SIZE with RTAS_MIN_SIZE and uses it in the /rtas/rtas-size calculation to account for the RTAS blob. Fixes: 0e236d347790 ("ppc/spapr: Implement FWNMI System Reset delivery") Signed-off-by: Alexey Kardashevskiy Message-Id: <20210622070336.1463250-1-aik@ozlabs.ru> Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- include/hw/ppc/spapr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index f05219f75e..5697327e4c 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -770,7 +770,7 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr); #define SPAPR_IS_PCI_LIOBN(liobn) (!!((liobn) & 0x80000000)) #define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff) -#define RTAS_SIZE 2048 +#define RTAS_MIN_SIZE 20 /* hv_rtas_size in SLOF */ #define RTAS_ERROR_LOG_MAX 2048 /* Offset from rtas-base where error log is placed */ -- cgit v1.2.3-55-g7522 From fc8c745d50150a63f6c5ba2cd0b83b430963b7e8 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 25 Jun 2021 15:51:55 +1000 Subject: spapr: Implement Open Firmware client interface The PAPR platform describes an OS environment that's presented by a combination of a hypervisor and firmware. The features it specifies require collaboration between the firmware and the hypervisor. Since the beginning, the runtime component of the firmware (RTAS) has been implemented as a 20 byte shim which simply forwards it to a hypercall implemented in qemu. The boot time firmware component is SLOF - but a build that's specific to qemu, and has always needed to be updated in sync with it. Even though we've managed to limit the amount of runtime communication we need between qemu and SLOF, there's some, and it has become increasingly awkward to handle as we've implemented new features. This implements a boot time OF client interface (CI) which is enabled by a new "x-vof" pseries machine option (stands for "Virtual Open Firmware). When enabled, QEMU implements the custom H_OF_CLIENT hcall which implements Open Firmware Client Interface (OF CI). This allows using a smaller stateless firmware which does not have to manage the device tree. The new "vof.bin" firmware image is included with source code under pc-bios/. It also includes RTAS blob. This implements a handful of CI methods just to get -kernel/-initrd working. In particular, this implements the device tree fetching and simple memory allocator - "claim" (an OF CI memory allocator) and updates "/memory@0/available" to report the client about available memory. This implements changing some device tree properties which we know how to deal with, the rest is ignored. To allow changes, this skips fdt_pack() when x-vof=on as not packing the blob leaves some room for appending. In absence of SLOF, this assigns phandles to device tree nodes to make device tree traversing work. When x-vof=on, this adds "/chosen" every time QEMU (re)builds a tree. This adds basic instances support which are managed by a hash map ihandle -> [phandle]. Before the guest started, the used memory is: 0..e60 - the initial firmware 8000..10000 - stack 400000.. - kernel 3ea0000.. - initramdisk This OF CI does not implement "interpret". Unlike SLOF, this does not format uninitialized nvram. Instead, this includes a disk image with pre-formatted nvram. With this basic support, this can only boot into kernel directly. However this is just enough for the petitboot kernel and initradmdisk to boot from any possible source. Note this requires reasonably recent guest kernel with: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df5be5be8735 The immediate benefit is much faster booting time which especially crucial with fully emulated early CPU bring up environments. Also this may come handy when/if GRUB-in-the-userspace sees light of the day. This separates VOF and sPAPR in a hope that VOF bits may be reused by other POWERPC boards which do not support pSeries. This assumes potential support for booting from QEMU backends such as blockdev or netdev without devices/drivers used. Signed-off-by: Alexey Kardashevskiy Message-Id: <20210625055155.2252896-1-aik@ozlabs.ru> Reviewed-by: BALATON Zoltan [dwg: Adjusted some includes which broke compile in some more obscure compilation setups] Signed-off-by: David Gibson --- MAINTAINERS | 12 + hw/ppc/Kconfig | 4 + hw/ppc/meson.build | 3 + hw/ppc/spapr.c | 67 ++- hw/ppc/spapr_hcall.c | 25 +- hw/ppc/spapr_vof.c | 153 +++++++ hw/ppc/trace-events | 24 ++ hw/ppc/vof.c | 1049 +++++++++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr.h | 19 +- include/hw/ppc/vof.h | 58 +++ pc-bios/README | 4 + pc-bios/vof-nvram.bin | Bin 0 -> 16384 bytes pc-bios/vof.bin | Bin 0 -> 3784 bytes pc-bios/vof/Makefile | 23 ++ pc-bios/vof/bootmem.c | 14 + pc-bios/vof/ci.c | 91 ++++ pc-bios/vof/entry.S | 49 +++ pc-bios/vof/libc.c | 92 +++++ pc-bios/vof/main.c | 21 + pc-bios/vof/vof.h | 43 ++ pc-bios/vof/vof.lds | 48 +++ tests/qtest/rtas-test.c | 15 +- 22 files changed, 1801 insertions(+), 13 deletions(-) create mode 100644 hw/ppc/spapr_vof.c create mode 100644 hw/ppc/vof.c create mode 100644 include/hw/ppc/vof.h create mode 100644 pc-bios/vof-nvram.bin create mode 100755 pc-bios/vof.bin create mode 100644 pc-bios/vof/Makefile create mode 100644 pc-bios/vof/bootmem.c create mode 100644 pc-bios/vof/ci.c create mode 100644 pc-bios/vof/entry.S create mode 100644 pc-bios/vof/libc.c create mode 100644 pc-bios/vof/main.c create mode 100644 pc-bios/vof/vof.h create mode 100644 pc-bios/vof/vof.lds (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 684142e12e..ce122eeced 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1360,6 +1360,18 @@ F: hw/pci-host/mv64361.c F: hw/pci-host/mv643xx.h F: include/hw/pci-host/mv64361.h +Virtual Open Firmware (VOF) +M: Alexey Kardashevskiy +M: David Gibson +M: Greg Kurz +L: qemu-ppc@nongnu.org +S: Maintained +F: hw/ppc/spapr_vof* +F: hw/ppc/vof* +F: include/hw/ppc/vof* +F: pc-bios/vof/* +F: pc-bios/vof* + RISC-V Machines --------------- OpenTitan diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index 66e0b15d9e..67630f80e1 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -13,6 +13,7 @@ config PSERIES select MSI_NONBROKEN select FDT_PPC select CHRP_NVRAM + select VOF config SPAPR_RNG bool @@ -144,3 +145,6 @@ config FW_CFG_PPC config FDT_PPC bool + +config VOF + bool diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index 597d974dd4..aa4c8e6a2e 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -84,4 +84,7 @@ ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c')) # Pegasos2 ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c')) +ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c')) +ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c')) + hw_arch += {'ppc': ppc_ss} diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9e19c57032..e9b6d0f587 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -101,6 +101,7 @@ #define FDT_MAX_ADDR 0x80000000 /* FDT must stay below that */ #define FW_MAX_SIZE 0x400000 #define FW_FILE_NAME "slof.bin" +#define FW_FILE_NAME_VOF "vof.bin" #define FW_OVERHEAD 0x2800000 #define KERNEL_LOAD_ADDR FW_MAX_SIZE @@ -1643,22 +1644,37 @@ static void spapr_machine_reset(MachineState *machine) fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE; fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE); + if (spapr->vof) { + target_ulong stack_ptr = 0; - rc = fdt_pack(fdt); + spapr_vof_reset(spapr, fdt, &stack_ptr, &error_fatal); - /* Should only fail if we've built a corrupted tree */ - assert(rc == 0); + spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, + stack_ptr, spapr->initrd_base, + spapr->initrd_size); + /* VOF is 32bit BE so enforce MSR here */ + first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE)); + /* + * Do not pack the FDT as the client may change properties. + * VOF client does not expect the FDT so we do not load it to the VM. + */ + } else { + rc = fdt_pack(fdt); + /* Should only fail if we've built a corrupted tree */ + assert(rc == 0); - /* Load the fdt */ + spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, + 0, fdt_addr, 0); + cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); + } qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); - cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); + g_free(spapr->fdt_blob); spapr->fdt_size = fdt_totalsize(fdt); spapr->fdt_initial_size = spapr->fdt_size; spapr->fdt_blob = fdt; /* Set up the entry state */ - spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, 0, fdt_addr, 0); first_ppc_cpu->env.gpr[5] = 0; spapr->fwnmi_system_reset_addr = -1; @@ -2661,7 +2677,8 @@ static void spapr_machine_init(MachineState *machine) SpaprMachineState *spapr = SPAPR_MACHINE(machine); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); MachineClass *mc = MACHINE_GET_CLASS(machine); - const char *bios_name = machine->firmware ?: FW_FILE_NAME; + const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME; + const char *bios_name = machine->firmware ?: bios_default; const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; PCIHostState *phb; @@ -3018,6 +3035,10 @@ static void spapr_machine_init(MachineState *machine) } qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond); + if (spapr->vof) { + spapr->vof->fw_size = fw_size; /* for claim() on itself */ + spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client); + } } #define DEFAULT_KVM_TYPE "auto" @@ -3208,6 +3229,28 @@ static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp) } } +static bool spapr_get_vof(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + return spapr->vof != NULL; +} + +static void spapr_set_vof(Object *obj, bool value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + if (spapr->vof) { + vof_cleanup(spapr->vof); + g_free(spapr->vof); + spapr->vof = NULL; + } + if (!value) { + return; + } + spapr->vof = g_malloc0(sizeof(*spapr->vof)); +} + static char *spapr_get_ic_mode(Object *obj, Error **errp) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); @@ -3333,6 +3376,11 @@ static void spapr_instance_init(Object *obj) stringify(KERNEL_LOAD_ADDR) " for -kernel is the default"); spapr->kernel_addr = KERNEL_LOAD_ADDR; + + object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof); + object_property_set_description(obj, "x-vof", + "Enable Virtual Open Firmware (experimental)"); + /* The machine class defines the default interrupt controller mode */ spapr->irq = smc->irq; object_property_add_str(obj, "ic-mode", spapr_get_ic_mode, @@ -4496,6 +4544,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) XICSFabricClass *xic = XICS_FABRIC_CLASS(oc); InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc); XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc); + VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc); mc->desc = "pSeries Logical Partition (PAPR compliant)"; mc->ignore_boot_device_suffixes = true; @@ -4584,6 +4633,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; xfc->match_nvt = spapr_match_nvt; + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; } static const TypeInfo spapr_machine_info = { @@ -4603,6 +4655,7 @@ static const TypeInfo spapr_machine_info = { { TYPE_XICS_FABRIC }, { TYPE_INTERRUPT_STATS_PROVIDER }, { TYPE_XIVE_FABRIC }, + { TYPE_VOF_MACHINE_IF }, { } }, }; diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index f25014afda..03fc191599 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1080,7 +1080,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, SpaprOptionVector *ov1_guest, *ov5_guest; bool guest_radix; bool raw_mode_supported = false; - bool guest_xive; + bool guest_xive, reset_fdt = false; CPUState *cs; void *fdt; uint32_t max_compat = spapr->max_compat_pvr; @@ -1233,8 +1233,8 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, spapr_setup_hpt(spapr); } - fdt = spapr_build_fdt(spapr, false, fdt_bufsize); - + reset_fdt = spapr->vof != NULL; + fdt = spapr_build_fdt(spapr, reset_fdt, fdt_bufsize); g_free(spapr->fdt_blob); spapr->fdt_size = fdt_totalsize(fdt); spapr->fdt_initial_size = spapr->fdt_size; @@ -1277,6 +1277,25 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, return ret; } +target_ulong spapr_vof_client_architecture_support(MachineState *ms, + CPUState *cs, + target_ulong ovec_addr) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr, + ovec_addr, FDT_MAX_SIZE); + + /* + * This adds stdout and generates phandles for boottime and CAS FDTs. + * It is alright to update the FDT here as do_client_architecture_support() + * does not pack it. + */ + spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob); + + return ret; +} + static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c new file mode 100644 index 0000000000..131a03fec0 --- /dev/null +++ b/hw/ppc/spapr_vof.c @@ -0,0 +1,153 @@ +/* + * SPAPR machine hooks to Virtual Open Firmware, + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/vof.h" +#include "sysemu/sysemu.h" +#include "qom/qom-qobject.h" +#include "trace.h" + +target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *_args) +{ + int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob, + ppc64_phys_to_real(_args[0])); + + if (ret) { + return H_PARAMETER; + } + return H_SUCCESS; +} + +void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt) +{ + char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); + int chosen; + + vof_build_dt(fdt, spapr->vof); + + _FDT(chosen = fdt_path_offset(fdt, "/chosen")); + _FDT(fdt_setprop_string(fdt, chosen, "bootargs", + spapr->vof->bootargs ? : "")); + + /* + * SLOF-less setup requires an open instance of stdout for early + * kernel printk. By now all phandles are settled so we can open + * the default serial console. + */ + if (stdout_path) { + _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout", + stdout_path)); + } +} + +void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, + target_ulong *stack_ptr, Error **errp) +{ + Vof *vof = spapr->vof; + + vof_init(vof, spapr->rma_size, errp); + + *stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE); + if (*stack_ptr == -1) { + error_setg(errp, "Memory allocation for stack failed"); + return; + } + /* Stack grows downwards plus reserve space for the minimum stack frame */ + *stack_ptr += VOF_STACK_SIZE - 0x20; + + if (spapr->kernel_size && + vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) { + error_setg(errp, "Memory for kernel is in use"); + return; + } + + if (spapr->initrd_size && + vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) { + error_setg(errp, "Memory for initramdisk is in use"); + return; + } + + spapr_vof_client_dt_finalize(spapr, fdt); + + /* + * At this point the expected allocation map is: + * + * 0..c38 - the initial firmware + * 8000..10000 - stack + * 400000.. - kernel + * 3ea0000.. - initramdisk + * + * We skip writing FDT as nothing expects it; OF client interface is + * going to be used for reading the device tree. + */ +} + +void spapr_vof_quiesce(MachineState *ms) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + spapr->fdt_size = fdt_totalsize(spapr->fdt_blob); + spapr->fdt_initial_size = spapr->fdt_size; +} + +bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname, + void *val, int vallen) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + /* + * We only allow changing properties which we know how to update in QEMU + * OR + * the ones which we know that they need to survive during "quiesce". + */ + + if (strcmp(path, "/rtas") == 0) { + if (strcmp(propname, "linux,rtas-base") == 0 || + strcmp(propname, "linux,rtas-entry") == 0) { + /* These need to survive quiesce so let them store in the FDT */ + return true; + } + } + + if (strcmp(path, "/chosen") == 0) { + if (strcmp(propname, "bootargs") == 0) { + Vof *vof = spapr->vof; + + g_free(vof->bootargs); + vof->bootargs = g_strndup(val, vallen); + return true; + } + if (strcmp(propname, "linux,initrd-start") == 0) { + if (vallen == sizeof(uint32_t)) { + spapr->initrd_base = ldl_be_p(val); + return true; + } + if (vallen == sizeof(uint64_t)) { + spapr->initrd_base = ldq_be_p(val); + return true; + } + return false; + } + if (strcmp(propname, "linux,initrd-end") == 0) { + if (vallen == sizeof(uint32_t)) { + spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base; + return true; + } + if (vallen == sizeof(uint64_t)) { + spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base; + return true; + } + return false; + } + } + + return true; +} diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index 0ba3e40353..6e90a01072 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -71,6 +71,30 @@ spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx3 spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64 spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed" +# vof.c +vof_error_str_truncated(const char *s, int len) "%s truncated to %d" +vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d" +vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d" +vof_error_unknown_method(const char *method) "\"%s\"" +vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x" +vof_error_unknown_path(const char *path) "\"%s\"" +vof_error_write(uint32_t ih) "ih=0x%x" +vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x" +vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x" +vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x" +vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x" +vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]" +vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d" +vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d" +vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x" +vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x" +vof_package_to_path(uint32_t ph, const char *tmp, uint32_t ret) "ph=0x%x => %s len=%d" +vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, uint32_t ret) "ih=0x%x ph=0x%x => %s len=%d" +vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x" +vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\"" +vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 +vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 + # ppc.c ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)" diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c new file mode 100644 index 0000000000..47c86e394e --- /dev/null +++ b/hw/ppc/vof.c @@ -0,0 +1,1049 @@ +/* + * QEMU PowerPC Virtual Open Firmware. + * + * This implements client interface from OpenFirmware IEEE1275 on the QEMU + * side to leave only a very basic firmware in the VM. + * + * Copyright (c) 2021 IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/range.h" +#include "qemu/units.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "exec/ram_addr.h" +#include "exec/address-spaces.h" +#include "hw/ppc/vof.h" +#include "hw/ppc/fdt.h" +#include "sysemu/runstate.h" +#include "qom/qom-qobject.h" +#include "trace.h" + +#include + +/* + * OF 1275 "nextprop" description suggests is it 32 bytes max but + * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long. + */ +#define OF_PROPNAME_LEN_MAX 64 + +#define VOF_MAX_PATH 256 +#define VOF_MAX_SETPROPLEN 2048 +#define VOF_MAX_METHODLEN 256 +#define VOF_MAX_FORTHCODE 256 +#define VOF_VTY_BUF_SIZE 256 + +typedef struct { + uint64_t start; + uint64_t size; +} OfClaimed; + +typedef struct { + char *path; /* the path used to open the instance */ + uint32_t phandle; +} OfInstance; + +static int readstr(hwaddr pa, char *buf, int size) +{ + if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) { + return -1; + } + if (strnlen(buf, size) == size) { + buf[size - 1] = '\0'; + trace_vof_error_str_truncated(buf, size); + return -1; + } + return 0; +} + +static bool cmpservice(const char *s, unsigned nargs, unsigned nret, + const char *s1, unsigned nargscheck, unsigned nretcheck) +{ + if (strcmp(s, s1)) { + return false; + } + if ((nargscheck && (nargs != nargscheck)) || + (nretcheck && (nret != nretcheck))) { + trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret); + return false; + } + + return true; +} + +static void prop_format(char *tval, int tlen, const void *prop, int len) +{ + int i; + const unsigned char *c; + char *t; + const char bin[] = "..."; + + for (i = 0, c = prop; i < len; ++i, ++c) { + if (*c == '\0' && i == len - 1) { + strncpy(tval, prop, tlen - 1); + return; + } + if (*c < 0x20 || *c >= 0x80) { + break; + } + } + + for (i = 0, c = prop, t = tval; i < len; ++i, ++c) { + if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) { + strcpy(t, bin); + return; + } + if (i && i % 4 == 0 && i != len - 1) { + strcat(t, " "); + ++t; + } + t += sprintf(t, "%02X", *c & 0xFF); + } +} + +static int get_path(const void *fdt, int offset, char *buf, int len) +{ + int ret; + + ret = fdt_get_path(fdt, offset, buf, len - 1); + if (ret < 0) { + return ret; + } + + buf[len - 1] = '\0'; + + return strlen(buf) + 1; +} + +static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len) +{ + int ret; + + ret = fdt_node_offset_by_phandle(fdt, ph); + if (ret < 0) { + return ret; + } + + return get_path(fdt, ret, buf, len); +} + +static int path_offset(const void *fdt, const char *path) +{ + g_autofree char *p = NULL; + char *at; + + /* + * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16 + * + * "Conversion from numeric representation to text representation shall use + * the lower case forms of the hexadecimal digits in the range a..f, + * suppressing leading zeros". + */ + at = strchr(path, '@'); + if (!at) { + return fdt_path_offset(fdt, path); + } + + p = g_strdup(path); + for (at = at - path + p + 1; *at; ++at) { + *at = tolower(*at); + } + return fdt_path_offset(fdt, p); +} + +static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr) +{ + char fullnode[VOF_MAX_PATH]; + uint32_t ret = -1; + int offset; + + if (readstr(nodeaddr, fullnode, sizeof(fullnode))) { + return (uint32_t) ret; + } + + offset = path_offset(fdt, fullnode); + if (offset >= 0) { + ret = fdt_get_phandle(fdt, offset); + } + trace_vof_finddevice(fullnode, ret); + return (uint32_t) ret; +} + +static const void *getprop(const void *fdt, int nodeoff, const char *propname, + int *proplen, bool *write0) +{ + const char *unit, *prop; + const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen); + + if (ret) { + if (write0) { + *write0 = false; + } + return ret; + } + + if (strcmp(propname, "name")) { + return NULL; + } + /* + * We return a value for "name" from path if queried but property does not + * exist. @proplen does not include the unit part in this case. + */ + prop = fdt_get_name(fdt, nodeoff, proplen); + if (!prop) { + *proplen = 0; + return NULL; + } + + unit = memchr(prop, '@', *proplen); + if (unit) { + *proplen = unit - prop; + } + *proplen += 1; + + /* + * Since it might be cut at "@" and there will be no trailing zero + * in the prop buffer, tell the caller to write zero at the end. + */ + if (write0) { + *write0 = true; + } + return prop; +} + +static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname, + uint32_t valaddr, uint32_t vallen) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = 0; + int proplen = 0; + const void *prop; + char trval[64] = ""; + int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph); + bool write0; + + if (nodeoff < 0) { + return -1; + } + if (readstr(pname, propname, sizeof(propname))) { + return -1; + } + prop = getprop(fdt, nodeoff, propname, &proplen, &write0); + if (prop) { + const char zero = 0; + int cb = MIN(proplen, vallen); + + if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK || + /* if that was "name" with a unit address, overwrite '@' with '0' */ + (write0 && + cb == proplen && + VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) { + ret = -1; + } else { + /* + * OF1275 says: + * "Size is either the actual size of the property, or -1 if name + * does not exist", hence returning proplen instead of cb. + */ + ret = proplen; + /* Do not format a value if tracepoint is silent, for performance */ + if (trace_event_get_state(TRACE_VOF_GETPROP) && + qemu_loglevel_mask(LOG_TRACE)) { + prop_format(trval, sizeof(trval), prop, ret); + } + } + } else { + ret = -1; + } + trace_vof_getprop(nodeph, propname, ret, trval); + + return ret; +} + +static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = 0; + int proplen = 0; + const void *prop; + int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph); + + if (nodeoff < 0) { + return -1; + } + if (readstr(pname, propname, sizeof(propname))) { + return -1; + } + prop = getprop(fdt, nodeoff, propname, &proplen, NULL); + if (prop) { + ret = proplen; + } else { + ret = -1; + } + trace_vof_getproplen(nodeph, propname, ret); + + return ret; +} + +static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof, + uint32_t nodeph, uint32_t pname, + uint32_t valaddr, uint32_t vallen) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = -1; + int offset; + char trval[64] = ""; + char nodepath[VOF_MAX_PATH] = ""; + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + g_autofree char *val = NULL; + + if (vallen > VOF_MAX_SETPROPLEN) { + goto trace_exit; + } + if (readstr(pname, propname, sizeof(propname))) { + goto trace_exit; + } + offset = fdt_node_offset_by_phandle(fdt, nodeph); + if (offset < 0) { + goto trace_exit; + } + ret = get_path(fdt, offset, nodepath, sizeof(nodepath)); + if (ret <= 0) { + goto trace_exit; + } + + val = g_malloc0(vallen); + if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) { + goto trace_exit; + } + + if (vmo) { + VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + + if (vmc->setprop && + !vmc->setprop(ms, nodepath, propname, val, vallen)) { + goto trace_exit; + } + } + + ret = fdt_setprop(fdt, offset, propname, val, vallen); + if (ret) { + goto trace_exit; + } + + if (trace_event_get_state(TRACE_VOF_SETPROP) && + qemu_loglevel_mask(LOG_TRACE)) { + prop_format(trval, sizeof(trval), val, vallen); + } + ret = vallen; + +trace_exit: + trace_vof_setprop(nodeph, propname, trval, vallen, ret); + + return ret; +} + +static uint32_t vof_nextprop(const void *fdt, uint32_t phandle, + uint32_t prevaddr, uint32_t nameaddr) +{ + int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle); + char prev[OF_PROPNAME_LEN_MAX + 1]; + const char *tmp; + + if (readstr(prevaddr, prev, sizeof(prev))) { + return -1; + } + + fdt_for_each_property_offset(offset, fdt, nodeoff) { + if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { + return 0; + } + if (prev[0] == '\0' || strcmp(prev, tmp) == 0) { + if (prev[0] != '\0') { + offset = fdt_next_property_offset(fdt, offset); + if (offset < 0) { + return 0; + } + } + if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { + return 0; + } + + if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) { + return -1; + } + return 1; + } + } + + return 0; +} + +static uint32_t vof_peer(const void *fdt, uint32_t phandle) +{ + int ret; + + if (phandle == 0) { + ret = fdt_path_offset(fdt, "/"); + } else { + ret = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + } + + if (ret < 0) { + ret = 0; + } else { + ret = fdt_get_phandle(fdt, ret); + } + + return ret; +} + +static uint32_t vof_child(const void *fdt, uint32_t phandle) +{ + int ret = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + + if (ret < 0) { + ret = 0; + } else { + ret = fdt_get_phandle(fdt, ret); + } + + return ret; +} + +static uint32_t vof_parent(const void *fdt, uint32_t phandle) +{ + int ret = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + + if (ret < 0) { + ret = 0; + } else { + ret = fdt_get_phandle(fdt, ret); + } + + return ret; +} + +static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path) +{ + uint32_t ret = -1; + OfInstance *inst = NULL; + + if (vof->of_instance_last == 0xFFFFFFFF) { + /* We do not recycle ihandles yet */ + goto trace_exit; + } + + inst = g_new0(OfInstance, 1); + inst->phandle = fdt_get_phandle(fdt, offset); + g_assert(inst->phandle); + ++vof->of_instance_last; + + inst->path = g_strdup(path); + g_hash_table_insert(vof->of_instances, + GINT_TO_POINTER(vof->of_instance_last), + inst); + ret = vof->of_instance_last; + +trace_exit: + trace_vof_open(path, inst ? inst->phandle : 0, ret); + + return ret; +} + +uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename, + const char *prop, const char *path) +{ + int node = fdt_path_offset(fdt, nodename); + int inst, offset; + + offset = fdt_path_offset(fdt, path); + if (offset < 0) { + trace_vof_error_unknown_path(path); + return offset; + } + + inst = vof_do_open(fdt, vof, offset, path); + + return fdt_setprop_cell(fdt, node, prop, inst); +} + +static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr) +{ + char path[VOF_MAX_PATH]; + int offset; + + if (readstr(pathaddr, path, sizeof(path))) { + return -1; + } + + offset = path_offset(fdt, path); + if (offset < 0) { + trace_vof_error_unknown_path(path); + return offset; + } + + return vof_do_open(fdt, vof, offset, path); +} + +static void vof_close(Vof *vof, uint32_t ihandle) +{ + if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) { + trace_vof_error_unknown_ihandle_close(ihandle); + } +} + +static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle) +{ + gpointer instp = g_hash_table_lookup(vof->of_instances, + GINT_TO_POINTER(ihandle)); + uint32_t ret = -1; + + if (instp) { + ret = ((OfInstance *)instp)->phandle; + } + trace_vof_instance_to_package(ihandle, ret); + + return ret; +} + +static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle, + uint32_t buf, uint32_t len) +{ + uint32_t ret = -1; + char tmp[VOF_MAX_PATH] = ""; + + ret = phandle_to_path(fdt, phandle, tmp, sizeof(tmp)); + if (ret > 0) { + if (VOF_MEM_WRITE(buf, tmp, ret) != MEMTX_OK) { + ret = -1; + } + } + + trace_vof_package_to_path(phandle, tmp, ret); + + return ret; +} + +static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle, + uint32_t buf, uint32_t len) +{ + uint32_t ret = -1; + uint32_t phandle = vof_instance_to_package(vof, ihandle); + char tmp[VOF_MAX_PATH] = ""; + + if (phandle != -1) { + ret = phandle_to_path(fdt, phandle, tmp, sizeof(tmp)); + if (ret > 0) { + if (VOF_MEM_WRITE(buf, tmp, ret) != MEMTX_OK) { + ret = -1; + } + } + } + trace_vof_instance_to_path(ihandle, phandle, tmp, ret); + + return ret; +} + +static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf, + uint32_t len) +{ + char tmp[VOF_VTY_BUF_SIZE]; + unsigned cb; + OfInstance *inst = (OfInstance *) + g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle)); + + if (!inst) { + trace_vof_error_write(ihandle); + return -1; + } + + for ( ; len > 0; len -= cb) { + cb = MIN(len, sizeof(tmp) - 1); + if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) { + return -1; + } + + /* FIXME: there is no backend(s) yet so just call a trace */ + if (trace_event_get_state(TRACE_VOF_WRITE) && + qemu_loglevel_mask(LOG_TRACE)) { + tmp[cb] = '\0'; + trace_vof_write(ihandle, cb, tmp); + } + } + + return len; +} + +static void vof_claimed_dump(GArray *claimed) +{ + int i; + OfClaimed c; + + if (trace_event_get_state(TRACE_VOF_CLAIMED) && + qemu_loglevel_mask(LOG_TRACE)) { + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + trace_vof_claimed(c.start, c.start + c.size, c.size); + } + } +} + +static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size) +{ + int i; + OfClaimed c; + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + if (ranges_overlap(c.start, c.size, virt, size)) { + return false; + } + } + + return true; +} + +static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size) +{ + OfClaimed newclaim; + + newclaim.start = virt; + newclaim.size = size; + g_array_append_val(claimed, newclaim); +} + +static gint of_claimed_compare_func(gconstpointer a, gconstpointer b) +{ + return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start; +} + +static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base) +{ + int i, n, offset, proplen = 0, sc, ac; + target_ulong mem0_end; + const uint8_t *mem0_reg; + g_autofree uint8_t *avail = NULL; + uint8_t *availcur; + + if (!fdt || !claimed) { + return; + } + + offset = fdt_path_offset(fdt, "/"); + _FDT(offset); + ac = fdt_address_cells(fdt, offset); + g_assert(ac == 1 || ac == 2); + sc = fdt_size_cells(fdt, offset); + g_assert(sc == 1 || sc == 2); + + offset = fdt_path_offset(fdt, "/memory@0"); + _FDT(offset); + + mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen); + g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc)); + if (sc == 2) { + mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac)); + } else { + mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac)); + } + + g_array_sort(claimed, of_claimed_compare_func); + vof_claimed_dump(claimed); + + /* + * VOF resides in the first page so we do not need to check if there is + * available memory before the first claimed block + */ + g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0)); + + avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len); + for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) { + OfClaimed c = g_array_index(claimed, OfClaimed, i); + uint64_t start, size; + + start = c.start + c.size; + if (i < claimed->len - 1) { + OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1); + + size = cn.start - start; + } else { + size = mem0_end - start; + } + + if (ac == 2) { + *(uint64_t *) availcur = cpu_to_be64(start); + } else { + *(uint32_t *) availcur = cpu_to_be32(start); + } + availcur += sizeof(uint32_t) * ac; + if (sc == 2) { + *(uint64_t *) availcur = cpu_to_be64(size); + } else { + *(uint32_t *) availcur = cpu_to_be32(size); + } + availcur += sizeof(uint32_t) * sc; + + if (size) { + trace_vof_avail(c.start + c.size, c.start + c.size + size, size); + ++n; + } + } + _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail))); +} + +/* + * OF1275: + * "Allocates size bytes of memory. If align is zero, the allocated range + * begins at the virtual address virt. Otherwise, an aligned address is + * automatically chosen and the input argument virt is ignored". + * + * In other words, exactly one of @virt and @align is non-zero. + */ +uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, + uint64_t align) +{ + uint64_t ret; + + if (size == 0) { + ret = -1; + } else if (align == 0) { + if (!vof_claim_avail(vof->claimed, virt, size)) { + ret = -1; + } else { + ret = virt; + } + } else { + vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align); + while (1) { + if (vof->claimed_base >= vof->top_addr) { + error_report("Out of RMA memory for the OF client"); + return -1; + } + if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) { + break; + } + vof->claimed_base += size; + } + ret = vof->claimed_base; + } + + if (ret != -1) { + vof->claimed_base = MAX(vof->claimed_base, ret + size); + vof_claim_add(vof->claimed, ret, size); + } + trace_vof_claim(virt, size, align, ret); + + return ret; +} + +static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size) +{ + uint32_t ret = -1; + int i; + GArray *claimed = vof->claimed; + OfClaimed c; + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + if (c.start == virt && c.size == size) { + g_array_remove_index(claimed, i); + ret = 0; + break; + } + } + + trace_vof_release(virt, size, ret); + + return ret; +} + +static void vof_instantiate_rtas(Error **errp) +{ + error_setg(errp, "The firmware should have instantiated RTAS"); +} + +static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr, + uint32_t ihandle, uint32_t param1, + uint32_t param2, uint32_t param3, + uint32_t param4, uint32_t *ret2) +{ + uint32_t ret = -1; + char method[VOF_MAX_METHODLEN] = ""; + OfInstance *inst; + + if (!ihandle) { + goto trace_exit; + } + + inst = (OfInstance *)g_hash_table_lookup(vof->of_instances, + GINT_TO_POINTER(ihandle)); + if (!inst) { + goto trace_exit; + } + + if (readstr(methodaddr, method, sizeof(method))) { + goto trace_exit; + } + + if (strcmp(inst->path, "/") == 0) { + if (strcmp(method, "ibm,client-architecture-support") == 0) { + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + + if (vmo) { + VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + + g_assert(vmc->client_architecture_support); + ret = vmc->client_architecture_support(ms, first_cpu, param1); + } + + *ret2 = 0; + } + } else if (strcmp(inst->path, "/rtas") == 0) { + if (strcmp(method, "instantiate-rtas") == 0) { + vof_instantiate_rtas(&error_fatal); + ret = 0; + *ret2 = param1; /* rtas-base */ + } + } else { + trace_vof_error_unknown_method(method); + } + +trace_exit: + trace_vof_method(ihandle, method, param1, ret, *ret2); + + return ret; +} + +static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1, + uint32_t param2, uint32_t *ret2) +{ + uint32_t ret = -1; + char cmd[VOF_MAX_FORTHCODE] = ""; + + /* No interpret implemented so just call a trace */ + readstr(cmdaddr, cmd, sizeof(cmd)); + trace_vof_interpret(cmd, param1, param2, ret, *ret2); + + return ret; +} + +static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof) +{ + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */ + int rc = fdt_pack(fdt); + + assert(rc == 0); + + if (vmo) { + VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + + if (vmc->quiesce) { + vmc->quiesce(ms); + } + } + + vof_claimed_dump(vof->claimed); +} + +static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof, + const char *service, + uint32_t *args, unsigned nargs, + uint32_t *rets, unsigned nrets) +{ + uint32_t ret = 0; + + /* @nrets includes the value which this function returns */ +#define cmpserv(s, a, r) \ + cmpservice(service, nargs, nrets, (s), (a), (r)) + + if (cmpserv("finddevice", 1, 1)) { + ret = vof_finddevice(fdt, args[0]); + } else if (cmpserv("getprop", 4, 1)) { + ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]); + } else if (cmpserv("getproplen", 2, 1)) { + ret = vof_getproplen(fdt, args[0], args[1]); + } else if (cmpserv("setprop", 4, 1)) { + ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]); + } else if (cmpserv("nextprop", 3, 1)) { + ret = vof_nextprop(fdt, args[0], args[1], args[2]); + } else if (cmpserv("peer", 1, 1)) { + ret = vof_peer(fdt, args[0]); + } else if (cmpserv("child", 1, 1)) { + ret = vof_child(fdt, args[0]); + } else if (cmpserv("parent", 1, 1)) { + ret = vof_parent(fdt, args[0]); + } else if (cmpserv("open", 1, 1)) { + ret = vof_open(fdt, vof, args[0]); + } else if (cmpserv("close", 1, 0)) { + vof_close(vof, args[0]); + } else if (cmpserv("instance-to-package", 1, 1)) { + ret = vof_instance_to_package(vof, args[0]); + } else if (cmpserv("package-to-path", 3, 1)) { + ret = vof_package_to_path(fdt, args[0], args[1], args[2]); + } else if (cmpserv("instance-to-path", 3, 1)) { + ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]); + } else if (cmpserv("write", 3, 1)) { + ret = vof_write(vof, args[0], args[1], args[2]); + } else if (cmpserv("claim", 3, 1)) { + ret = vof_claim(vof, args[0], args[1], args[2]); + if (ret != -1) { + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); + } + } else if (cmpserv("release", 2, 0)) { + ret = vof_release(vof, args[0], args[1]); + if (ret != -1) { + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); + } + } else if (cmpserv("call-method", 0, 0)) { + ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3], + args[4], args[5], rets); + } else if (cmpserv("interpret", 0, 0)) { + ret = vof_call_interpret(args[0], args[1], args[2], rets); + } else if (cmpserv("milliseconds", 0, 1)) { + ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + } else if (cmpserv("quiesce", 0, 0)) { + vof_quiesce(ms, fdt, vof); + } else if (cmpserv("exit", 0, 0)) { + error_report("Stopped as the VM requested \"exit\""); + vm_stop(RUN_STATE_PAUSED); + } else { + trace_vof_error_unknown_service(service, nargs, nrets); + ret = -1; + } + + return ret; +} + +/* Defined as Big Endian */ +struct prom_args { + uint32_t service; + uint32_t nargs; + uint32_t nret; + uint32_t args[10]; +} QEMU_PACKED; + +int vof_client_call(MachineState *ms, Vof *vof, void *fdt, + target_ulong args_real) +{ + struct prom_args args_be; + uint32_t args[ARRAY_SIZE(args_be.args)]; + uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret; + char service[64]; + unsigned nargs, nret, i; + + if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) { + return -EINVAL; + } + nargs = be32_to_cpu(args_be.nargs); + if (nargs >= ARRAY_SIZE(args_be.args)) { + return -EINVAL; + } + + if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) != + MEMTX_OK) { + return -EINVAL; + } + if (strnlen(service, sizeof(service)) == sizeof(service)) { + /* Too long service name */ + return -EINVAL; + } + + for (i = 0; i < nargs; ++i) { + args[i] = be32_to_cpu(args_be.args[i]); + } + + nret = be32_to_cpu(args_be.nret); + ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret); + if (!nret) { + return 0; + } + + args_be.args[nargs] = cpu_to_be32(ret); + for (i = 1; i < nret; ++i) { + args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]); + } + + if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]), + args_be.args + nargs, sizeof(args_be.args[0]) * nret) != + MEMTX_OK) { + return -EINVAL; + } + + return 0; +} + +static void vof_instance_free(gpointer data) +{ + OfInstance *inst = (OfInstance *)data; + + g_free(inst->path); + g_free(inst); +} + +void vof_init(Vof *vof, uint64_t top_addr, Error **errp) +{ + vof_cleanup(vof); + + vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, vof_instance_free); + vof->claimed = g_array_new(false, false, sizeof(OfClaimed)); + + /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */ + vof->top_addr = MIN(top_addr, 4 * GiB); + if (vof_claim(vof, 0, vof->fw_size, 0) == -1) { + error_setg(errp, "Memory for firmware is in use"); + } +} + +void vof_cleanup(Vof *vof) +{ + if (vof->claimed) { + g_array_unref(vof->claimed); + } + if (vof->of_instances) { + g_hash_table_unref(vof->of_instances); + } + vof->claimed = NULL; + vof->of_instances = NULL; +} + +void vof_build_dt(void *fdt, Vof *vof) +{ + uint32_t phandle = fdt_get_max_phandle(fdt); + int offset, proplen = 0; + const void *prop; + + /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */ + for (offset = fdt_next_node(fdt, -1, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + prop = fdt_getprop(fdt, offset, "phandle", &proplen); + if (prop) { + continue; + } + ++phandle; + _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle)); + } + + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); +} + +static const TypeInfo vof_machine_if_info = { + .name = TYPE_VOF_MACHINE_IF, + .parent = TYPE_INTERFACE, + .class_size = sizeof(VofMachineIfClass), +}; + +static void vof_machine_if_register_types(void) +{ + type_register_static(&vof_machine_if_info); +} +type_init(vof_machine_if_register_types) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 5697327e4c..1e63f33e9a 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -12,6 +12,7 @@ #include "hw/ppc/spapr_xive.h" /* For SpaprXive */ #include "hw/ppc/xics.h" /* For ICSState */ #include "hw/ppc/spapr_tpm_proxy.h" +#include "hw/ppc/vof.h" struct SpaprVioBus; struct SpaprPhbState; @@ -180,6 +181,7 @@ struct SpaprMachineState { uint64_t kernel_addr; uint32_t initrd_base; long initrd_size; + Vof *vof; uint64_t rtc_offset; /* Now used only during incoming migration */ struct PPCTimebase tb; bool has_graphics; @@ -558,7 +560,9 @@ struct SpaprMachineState { /* Client Architecture support */ #define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2) #define KVMPPC_H_UPDATE_DT (KVMPPC_HCALL_BASE + 0x3) -#define KVMPPC_HCALL_MAX KVMPPC_H_UPDATE_DT +/* 0x4 was used for KVMPPC_H_UPDATE_PHANDLE in SLOF */ +#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5) +#define KVMPPC_HCALL_MAX KVMPPC_H_VOF_CLIENT /* * The hcall range 0xEF00 to 0xEF80 is reserved for use in facilitating @@ -956,4 +960,17 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); hwaddr spapr_get_rtas_addr(void); bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); + +void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, + target_ulong *stack_ptr, Error **errp); +void spapr_vof_quiesce(MachineState *ms); +bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname, + void *val, int vallen); +target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args); +target_ulong spapr_vof_client_architecture_support(MachineState *ms, + CPUState *cs, + target_ulong ovec_addr); +void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt); + #endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/vof.h b/include/hw/ppc/vof.h new file mode 100644 index 0000000000..640be46163 --- /dev/null +++ b/include/hw/ppc/vof.h @@ -0,0 +1,58 @@ +/* + * Virtual Open Firmware + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_VOF_H +#define HW_VOF_H + +typedef struct Vof { + uint64_t top_addr; /* copied from rma_size */ + GArray *claimed; /* array of SpaprOfClaimed */ + uint64_t claimed_base; + GHashTable *of_instances; /* ihandle -> SpaprOfInstance */ + uint32_t of_instance_last; + char *bootargs; + long fw_size; +} Vof; + +int vof_client_call(MachineState *ms, Vof *vof, void *fdt, + target_ulong args_real); +uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, uint64_t align); +void vof_init(Vof *vof, uint64_t top_addr, Error **errp); +void vof_cleanup(Vof *vof); +void vof_build_dt(void *fdt, Vof *vof); +uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename, + const char *prop, const char *path); + +#define TYPE_VOF_MACHINE_IF "vof-machine-if" + +typedef struct VofMachineIfClass VofMachineIfClass; +DECLARE_CLASS_CHECKERS(VofMachineIfClass, VOF_MACHINE, TYPE_VOF_MACHINE_IF) + +struct VofMachineIfClass { + InterfaceClass parent; + target_ulong (*client_architecture_support)(MachineState *ms, CPUState *cs, + target_ulong vec); + void (*quiesce)(MachineState *ms); + bool (*setprop)(MachineState *ms, const char *path, const char *propname, + void *val, int vallen); +}; + +/* + * Initial stack size is from + * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#REF27292 + * + * "Client programs shall be invoked with a valid stack pointer (r1) with + * at least 32K bytes of memory available for stack growth". + */ +#define VOF_STACK_SIZE 0x8000 + +#define VOF_MEM_READ(pa, buf, size) \ + address_space_read(&address_space_memory, \ + (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size)) +#define VOF_MEM_WRITE(pa, buf, size) \ + address_space_write(&address_space_memory, \ + (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size)) + +#endif /* HW_VOF_H */ diff --git a/pc-bios/README b/pc-bios/README index c101c9a04f..d344e3bc1b 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -16,6 +16,10 @@ https://github.com/aik/SLOF, and the image currently in qemu is built from git tag qemu-slof-20210217. +- VOF (Virtual Open Firmware) is a minimalistic firmware to work with + -machine pseries,x-vof=on. When enabled, the firmware acts as a slim shim and + QEMU implements parts of the IEEE 1275 Open Firmware interface. + - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as if a video card were attached. The master sources reside in a subversion diff --git a/pc-bios/vof-nvram.bin b/pc-bios/vof-nvram.bin new file mode 100644 index 0000000000..d183901cf9 Binary files /dev/null and b/pc-bios/vof-nvram.bin differ diff --git a/pc-bios/vof.bin b/pc-bios/vof.bin new file mode 100755 index 0000000000..1ec670be82 Binary files /dev/null and b/pc-bios/vof.bin differ diff --git a/pc-bios/vof/Makefile b/pc-bios/vof/Makefile new file mode 100644 index 0000000000..aa1678c4d8 --- /dev/null +++ b/pc-bios/vof/Makefile @@ -0,0 +1,23 @@ +all: build-all + +build-all: vof.bin + +CROSS ?= +CC = $(CROSS)gcc +LD = $(CROSS)ld +OBJCOPY = $(CROSS)objcopy + +%.o: %.S + $(CC) -m32 -mbig-endian -mcpu=power4 -c -o $@ $< + +%.o: %.c + $(CC) -m32 -mbig-endian -mcpu=power4 -c -fno-stack-protector -o $@ $< + +vof.elf: entry.o main.o ci.o bootmem.o libc.o + $(LD) -nostdlib -e_start -Tvof.lds -EB -o $@ $^ + +%.bin: %.elf + $(OBJCOPY) -O binary -j .text -j .data -j .toc -j .got2 $^ $@ + +clean: + rm -f *.o vof.bin vof.elf *~ diff --git a/pc-bios/vof/bootmem.c b/pc-bios/vof/bootmem.c new file mode 100644 index 0000000000..771b9e95f9 --- /dev/null +++ b/pc-bios/vof/bootmem.c @@ -0,0 +1,14 @@ +#include "vof.h" + +void boot_from_memory(uint64_t initrd, uint64_t initrdsize) +{ + uint64_t kern[2]; + phandle chosen = ci_finddevice("/chosen"); + + if (ci_getprop(chosen, "qemu,boot-kernel", kern, sizeof(kern)) != + sizeof(kern)) { + return; + } + + do_boot(kern[0], initrd, initrdsize); +} diff --git a/pc-bios/vof/ci.c b/pc-bios/vof/ci.c new file mode 100644 index 0000000000..2b56050238 --- /dev/null +++ b/pc-bios/vof/ci.c @@ -0,0 +1,91 @@ +#include "vof.h" + +struct prom_args { + uint32_t service; + uint32_t nargs; + uint32_t nret; + uint32_t args[10]; +}; + +typedef unsigned long prom_arg_t; + +#define ADDR(x) (uint32_t)(x) + +static int prom_handle(struct prom_args *pargs) +{ + void *rtasbase; + uint32_t rtassize = 0; + phandle rtas; + + if (strcmp("call-method", (void *)(unsigned long)pargs->service)) { + return -1; + } + + if (strcmp("instantiate-rtas", (void *)(unsigned long)pargs->args[0])) { + return -1; + } + + rtas = ci_finddevice("/rtas"); + /* rtas-size is set by QEMU depending of FWNMI support */ + ci_getprop(rtas, "rtas-size", &rtassize, sizeof(rtassize)); + if (rtassize < hv_rtas_size) { + return -1; + } + + rtasbase = (void *)(unsigned long) pargs->args[2]; + + memcpy(rtasbase, hv_rtas, hv_rtas_size); + pargs->args[pargs->nargs] = 0; + pargs->args[pargs->nargs + 1] = pargs->args[2]; + + return 0; +} + +void prom_entry(uint32_t args) +{ + if (prom_handle((void *)(unsigned long) args)) { + ci_entry(args); + } +} + +static int call_ci(const char *service, int nargs, int nret, ...) +{ + int i; + struct prom_args args; + va_list list; + + args.service = ADDR(service); + args.nargs = nargs; + args.nret = nret; + + va_start(list, nret); + for (i = 0; i < nargs; i++) { + args.args[i] = va_arg(list, prom_arg_t); + } + va_end(list); + + for (i = 0; i < nret; i++) { + args.args[nargs + i] = 0; + } + + if (ci_entry((uint32_t)(&args)) < 0) { + return PROM_ERROR; + } + + return (nret > 0) ? args.args[nargs] : 0; +} + +void ci_panic(const char *str) +{ + call_ci("exit", 0, 0); +} + +phandle ci_finddevice(const char *path) +{ + return call_ci("finddevice", 1, 1, path); +} + +uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len) +{ + return call_ci("getprop", 4, 1, ph, propname, prop, len); +} diff --git a/pc-bios/vof/entry.S b/pc-bios/vof/entry.S new file mode 100644 index 0000000000..10a101fb6d --- /dev/null +++ b/pc-bios/vof/entry.S @@ -0,0 +1,49 @@ +#define LOAD32(rn, name) \ + lis rn,name##@h; \ + ori rn,rn,name##@l + +#define ENTRY(func_name) \ + .text; \ + .align 2; \ + .globl .func_name; \ + .func_name: \ + .globl func_name; \ + func_name: + +#define KVMPPC_HCALL_BASE 0xf000 +#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0) +#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5) + + . = 0x100 /* Do exactly as SLOF does */ + +ENTRY(_start) + LOAD32(2, __toc_start) + b entry_c + +ENTRY(_prom_entry) + LOAD32(2, __toc_start) + stwu %r1,-112(%r1) + stw %r31,104(%r1) + mflr %r31 + bl prom_entry + nop + mtlr %r31 + lwz %r31,104(%r1) + addi %r1,%r1,112 + blr + +ENTRY(ci_entry) + mr 4,3 + LOAD32(3,KVMPPC_H_VOF_CLIENT) + sc 1 + blr + +/* This is the actual RTAS blob copied to the OS at instantiate-rtas */ +ENTRY(hv_rtas) + mr %r4,%r3 + LOAD32(3,KVMPPC_H_RTAS) + sc 1 + blr + .globl hv_rtas_size +hv_rtas_size: + .long . - hv_rtas; diff --git a/pc-bios/vof/libc.c b/pc-bios/vof/libc.c new file mode 100644 index 0000000000..00c10e6e7d --- /dev/null +++ b/pc-bios/vof/libc.c @@ -0,0 +1,92 @@ +#include "vof.h" + +int strlen(const char *s) +{ + int len = 0; + + while (*s != 0) { + len += 1; + s += 1; + } + + return len; +} + +int strcmp(const char *s1, const char *s2) +{ + while (*s1 != 0 && *s2 != 0) { + if (*s1 != *s2) { + break; + } + s1 += 1; + s2 += 1; + } + + return *s1 - *s2; +} + +void *memcpy(void *dest, const void *src, size_t n) +{ + char *cdest; + const char *csrc = src; + + cdest = dest; + while (n-- > 0) { + *cdest++ = *csrc++; + } + + return dest; +} + +int memcmp(const void *ptr1, const void *ptr2, size_t n) +{ + const unsigned char *p1 = ptr1; + const unsigned char *p2 = ptr2; + + while (n-- > 0) { + if (*p1 != *p2) { + return *p1 - *p2; + } + p1 += 1; + p2 += 1; + } + + return 0; +} + +void *memmove(void *dest, const void *src, size_t n) +{ + char *cdest; + const char *csrc; + int i; + + /* Do the buffers overlap in a bad way? */ + if (src < dest && src + n >= dest) { + /* Copy from end to start */ + cdest = dest + n - 1; + csrc = src + n - 1; + for (i = 0; i < n; i++) { + *cdest-- = *csrc--; + } + } else { + /* Normal copy is possible */ + cdest = dest; + csrc = src; + for (i = 0; i < n; i++) { + *cdest++ = *csrc++; + } + } + + return dest; +} + +void *memset(void *dest, int c, size_t size) +{ + unsigned char *d = (unsigned char *)dest; + + while (size-- > 0) { + *d++ = (unsigned char)c; + } + + return dest; +} diff --git a/pc-bios/vof/main.c b/pc-bios/vof/main.c new file mode 100644 index 0000000000..9fc30d2d09 --- /dev/null +++ b/pc-bios/vof/main.c @@ -0,0 +1,21 @@ +#include "vof.h" + +void do_boot(unsigned long addr, unsigned long _r3, unsigned long _r4) +{ + register unsigned long r3 __asm__("r3") = _r3; + register unsigned long r4 __asm__("r4") = _r4; + register unsigned long r5 __asm__("r5") = (unsigned long) _prom_entry; + + ((client *)(uint32_t)addr)(); +} + +void entry_c(void) +{ + register unsigned long r3 __asm__("r3"); + register unsigned long r4 __asm__("r4"); + register unsigned long r5 __asm__("r5"); + uint64_t initrd = r3, initrdsize = r4; + + boot_from_memory(initrd, initrdsize); + ci_panic("*** No boot target ***\n"); +} diff --git a/pc-bios/vof/vof.h b/pc-bios/vof/vof.h new file mode 100644 index 0000000000..2d89580769 --- /dev/null +++ b/pc-bios/vof/vof.h @@ -0,0 +1,43 @@ +/* + * Virtual Open Firmware + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +typedef unsigned long long uint64_t; +#define NULL (0) +#define PROM_ERROR (-1u) +typedef unsigned long ihandle; +typedef unsigned long phandle; +typedef int size_t; +typedef void client(void); + +/* globals */ +extern void _prom_entry(void); /* OF CI entry point (i.e. this firmware) */ + +void do_boot(unsigned long addr, unsigned long r3, unsigned long r4); + +/* libc */ +int strlen(const char *s); +int strcmp(const char *s1, const char *s2); +void *memcpy(void *dest, const void *src, size_t n); +int memcmp(const void *ptr1, const void *ptr2, size_t n); +void *memmove(void *dest, const void *src, size_t n); +void *memset(void *dest, int c, size_t size); + +/* CI wrappers */ +void ci_panic(const char *str); +phandle ci_finddevice(const char *path); +uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len); + +/* booting from -kernel */ +void boot_from_memory(uint64_t initrd, uint64_t initrdsize); + +/* Entry points for CI and RTAS */ +extern uint32_t ci_entry(uint32_t params); +extern unsigned long hv_rtas(unsigned long params); +extern unsigned int hv_rtas_size; diff --git a/pc-bios/vof/vof.lds b/pc-bios/vof/vof.lds new file mode 100644 index 0000000000..1506ab4b01 --- /dev/null +++ b/pc-bios/vof/vof.lds @@ -0,0 +1,48 @@ +OUTPUT_FORMAT("elf32-powerpc") +OUTPUT_ARCH(powerpc:common) + +/* set the entry point */ +ENTRY ( __start ) + +SECTIONS { + __executable_start = .; + + .text : { + *(.text) + } + + __etext = .; + + . = ALIGN(8); + + .data : { + *(.data) + *(.rodata .rodata.*) + *(.got1) + *(.sdata) + *(.opd) + } + + /* FIXME bss at end ??? */ + + . = ALIGN(8); + __bss_start = .; + .bss : { + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + } + + . = ALIGN(8); + __bss_end = .; + __bss_size = (__bss_end - __bss_start); + + . = ALIGN(256); + __toc_start = DEFINED (.TOC.) ? .TOC. : ADDR (.got) + 0x8000; + .got : + { + *(.toc .got) + } + . = ALIGN(8); + __toc_end = .; +} diff --git a/tests/qtest/rtas-test.c b/tests/qtest/rtas-test.c index 16751dbd2f..5f1194a6eb 100644 --- a/tests/qtest/rtas-test.c +++ b/tests/qtest/rtas-test.c @@ -5,7 +5,7 @@ #include "libqos/libqos-spapr.h" #include "libqos/rtas.h" -static void test_rtas_get_time_of_day(void) +static void run_test_rtas_get_time_of_day(const char *machine) { QOSState *qs; struct tm tm; @@ -13,7 +13,7 @@ static void test_rtas_get_time_of_day(void) uint64_t ret; time_t t1, t2; - qs = qtest_spapr_boot("-machine pseries"); + qs = qtest_spapr_boot(machine); t1 = time(NULL); ret = qrtas_get_time_of_day(qs->qts, &qs->alloc, &tm, &ns); @@ -24,6 +24,16 @@ static void test_rtas_get_time_of_day(void) qtest_shutdown(qs); } +static void test_rtas_get_time_of_day(void) +{ + run_test_rtas_get_time_of_day("-machine pseries"); +} + +static void test_rtas_get_time_of_day_vof(void) +{ + run_test_rtas_get_time_of_day("-machine pseries,x-vof=on"); +} + int main(int argc, char *argv[]) { const char *arch = qtest_get_arch(); @@ -35,6 +45,7 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } qtest_add_func("rtas/get-time-of-day", test_rtas_get_time_of_day); + qtest_add_func("rtas/get-time-of-day-vof", test_rtas_get_time_of_day_vof); return g_test_run(); } -- cgit v1.2.3-55-g7522 From 17fd09c0212b1595377fd62ade033dcd4147f8b6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 Jun 2021 14:41:07 +1000 Subject: target/ppc/spapr: Update H_GET_CPU_CHARACTERISTICS L1D cache flush bits There are several new L1D cache flush bits added to the hcall which reflect hardware security features for speculative cache access issues. These behaviours are now being specified as negative in order to simplify patched kernel compatibility with older firmware (a new problem found in existing systems would automatically be vulnerable). [dwg: Technically this changes behaviour for existing machine types. After discussion with Nick, we've determined this is safe, because the worst that will happen if a guest gets the wrong information due to a migration is that it will perform some unnecessary workarounds, but will remain correct and secure (well, as secure as it was going to be anyway). In addition the change only affects cap-cfpc=safe which is not enabled by default, and in fact is not possible to set on any current hardware (though it's expected it will be possible on POWER10)] Signed-off-by: Nicholas Piggin Message-Id: <20210615044107.1481608-1-npiggin@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_hcall.c | 2 ++ include/hw/ppc/spapr.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 03fc191599..80ae8eaadd 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1318,6 +1318,8 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR; break; case SPAPR_CAP_FIXED: + behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY; + behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS; break; default: /* broken */ assert(safe_cache == SPAPR_CAP_BROKEN); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 1e63f33e9a..a25e69fe4c 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -400,10 +400,13 @@ struct SpaprMachineState { #define H_CPU_CHAR_THR_RECONF_TRIG PPC_BIT(6) #define H_CPU_CHAR_CACHE_COUNT_DIS PPC_BIT(7) #define H_CPU_CHAR_BCCTR_FLUSH_ASSIST PPC_BIT(9) + #define H_CPU_BEHAV_FAVOUR_SECURITY PPC_BIT(0) #define H_CPU_BEHAV_L1D_FLUSH_PR PPC_BIT(1) #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR PPC_BIT(2) #define H_CPU_BEHAV_FLUSH_COUNT_CACHE PPC_BIT(5) +#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY PPC_BIT(7) +#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS PPC_BIT(8) /* Each control block has to be on a 4K boundary */ #define H_CB_ALIGNMENT 4096 -- cgit v1.2.3-55-g7522 From 21bde1ecb6cecba1d2f0219a1b79c240bed78749 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 8 Jul 2021 16:56:25 +1000 Subject: spapr: Fix implementation of Open Firmware client interface This addresses the comments from v22. The functional changes are (the VOF ones need retesting with Pegasos2): (VOF) setprop will start failing if the machine class callback did not handle it; (VOF) unit addresses are lowered in path_offset(); (SPAPR) /chosen/bootargs is initialized from kernel_cmdline if the client did not change it. Fixes: 5c991e5d4378 ("spapr: Implement Open Firmware client interface") Cc: BALATON Zoltan Signed-off-by: Alexey Kardashevskiy Message-Id: <20210708065625.548396-1-aik@ozlabs.ru> Tested-by: BALATON Zoltan Signed-off-by: David Gibson --- MAINTAINERS | 4 ++-- hw/ppc/spapr.c | 10 +--------- hw/ppc/spapr_hcall.c | 5 ++--- hw/ppc/spapr_vof.c | 32 +++++++++++++++++++++++--------- hw/ppc/vof.c | 30 +++++++++++++++++------------- include/hw/ppc/spapr.h | 3 +-- pc-bios/vof.bin | Bin 3784 -> 3456 bytes pc-bios/vof/ci.c | 2 +- pc-bios/vof/libc.c | 26 -------------------------- pc-bios/vof/main.c | 2 +- pc-bios/vof/vof.h | 2 -- 11 files changed, 48 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index ce122eeced..89d71b42b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1362,8 +1362,8 @@ F: include/hw/pci-host/mv64361.h Virtual Open Firmware (VOF) M: Alexey Kardashevskiy -M: David Gibson -M: Greg Kurz +R: David Gibson +R: Greg Kurz L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/spapr_vof* diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e9b6d0f587..3808d47053 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1645,15 +1645,7 @@ static void spapr_machine_reset(MachineState *machine) fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE); if (spapr->vof) { - target_ulong stack_ptr = 0; - - spapr_vof_reset(spapr, fdt, &stack_ptr, &error_fatal); - - spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, - stack_ptr, spapr->initrd_base, - spapr->initrd_size); - /* VOF is 32bit BE so enforce MSR here */ - first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE)); + spapr_vof_reset(spapr, fdt, &error_fatal); /* * Do not pack the FDT as the client may change properties. * VOF client does not expect the FDT so we do not load it to the VM. diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 80ae8eaadd..0e9a5b2e40 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1080,7 +1080,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, SpaprOptionVector *ov1_guest, *ov5_guest; bool guest_radix; bool raw_mode_supported = false; - bool guest_xive, reset_fdt = false; + bool guest_xive; CPUState *cs; void *fdt; uint32_t max_compat = spapr->max_compat_pvr; @@ -1233,8 +1233,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, spapr_setup_hpt(spapr); } - reset_fdt = spapr->vof != NULL; - fdt = spapr_build_fdt(spapr, reset_fdt, fdt_bufsize); + fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize); g_free(spapr->fdt_blob); spapr->fdt_size = fdt_totalsize(fdt); spapr->fdt_initial_size = spapr->fdt_size; diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c index 131a03fec0..40ce8fe003 100644 --- a/hw/ppc/spapr_vof.c +++ b/hw/ppc/spapr_vof.c @@ -8,6 +8,7 @@ #include "qapi/error.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" +#include "hw/ppc/spapr_cpu_core.h" #include "hw/ppc/fdt.h" #include "hw/ppc/vof.h" #include "sysemu/sysemu.h" @@ -29,13 +30,19 @@ target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr, void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt) { char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); - int chosen; vof_build_dt(fdt, spapr->vof); - _FDT(chosen = fdt_path_offset(fdt, "/chosen")); - _FDT(fdt_setprop_string(fdt, chosen, "bootargs", - spapr->vof->bootargs ? : "")); + if (spapr->vof->bootargs) { + int chosen; + + _FDT(chosen = fdt_path_offset(fdt, "/chosen")); + /* + * If the client did not change "bootargs", spapr_dt_chosen() must have + * stored machine->kernel_cmdline in it before getting here. + */ + _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs)); + } /* * SLOF-less setup requires an open instance of stdout for early @@ -48,20 +55,21 @@ void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt) } } -void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, - target_ulong *stack_ptr, Error **errp) +void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp) { + target_ulong stack_ptr; Vof *vof = spapr->vof; + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); vof_init(vof, spapr->rma_size, errp); - *stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE); - if (*stack_ptr == -1) { + stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE); + if (stack_ptr == -1) { error_setg(errp, "Memory allocation for stack failed"); return; } /* Stack grows downwards plus reserve space for the minimum stack frame */ - *stack_ptr += VOF_STACK_SIZE - 0x20; + stack_ptr += VOF_STACK_SIZE - 0x20; if (spapr->kernel_size && vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) { @@ -77,6 +85,12 @@ void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, spapr_vof_client_dt_finalize(spapr, fdt); + spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, + stack_ptr, spapr->initrd_base, + spapr->initrd_size); + /* VOF is 32bit BE so enforce MSR here */ + first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE)); + /* * At this point the expected allocation map is: * diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c index 47c86e394e..81f6596215 100644 --- a/hw/ppc/vof.c +++ b/hw/ppc/vof.c @@ -144,15 +144,16 @@ static int path_offset(const void *fdt, const char *path) * the lower case forms of the hexadecimal digits in the range a..f, * suppressing leading zeros". */ - at = strchr(path, '@'); - if (!at) { - return fdt_path_offset(fdt, path); - } - p = g_strdup(path); - for (at = at - path + p + 1; *at; ++at) { - *at = tolower(*at); + for (at = strchr(p, '@'); at && *at; ) { + if (*at == '/') { + at = strchr(at, '@'); + } else { + *at = tolower(*at); + ++at; + } } + return fdt_path_offset(fdt, p); } @@ -300,6 +301,7 @@ static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof, char trval[64] = ""; char nodepath[VOF_MAX_PATH] = ""; Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + VofMachineIfClass *vmc; g_autofree char *val = NULL; if (vallen > VOF_MAX_SETPROPLEN) { @@ -322,13 +324,13 @@ static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof, goto trace_exit; } - if (vmo) { - VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + if (!vmo) { + goto trace_exit; + } - if (vmc->setprop && - !vmc->setprop(ms, nodepath, propname, val, vallen)) { - goto trace_exit; - } + vmc = VOF_MACHINE_GET_CLASS(vmo); + if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) { + goto trace_exit; } ret = fdt_setprop(fdt, offset, propname, val, vallen); @@ -919,6 +921,8 @@ static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof, ret = -1; } +#undef cmpserv + return ret; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index a25e69fe4c..779f707fb8 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -964,8 +964,7 @@ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); hwaddr spapr_get_rtas_addr(void); bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); -void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, - target_ulong *stack_ptr, Error **errp); +void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp); void spapr_vof_quiesce(MachineState *ms); bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname, void *val, int vallen); diff --git a/pc-bios/vof.bin b/pc-bios/vof.bin index 1ec670be82..300cb7c7f9 100755 Binary files a/pc-bios/vof.bin and b/pc-bios/vof.bin differ diff --git a/pc-bios/vof/ci.c b/pc-bios/vof/ci.c index 2b56050238..fc4821b3e9 100644 --- a/pc-bios/vof/ci.c +++ b/pc-bios/vof/ci.c @@ -69,7 +69,7 @@ static int call_ci(const char *service, int nargs, int nret, ...) } if (ci_entry((uint32_t)(&args)) < 0) { - return PROM_ERROR; + return -1; } return (nret > 0) ? args.args[nargs] : 0; diff --git a/pc-bios/vof/libc.c b/pc-bios/vof/libc.c index 00c10e6e7d..fdbc30f777 100644 --- a/pc-bios/vof/libc.c +++ b/pc-bios/vof/libc.c @@ -54,32 +54,6 @@ int memcmp(const void *ptr1, const void *ptr2, size_t n) return 0; } -void *memmove(void *dest, const void *src, size_t n) -{ - char *cdest; - const char *csrc; - int i; - - /* Do the buffers overlap in a bad way? */ - if (src < dest && src + n >= dest) { - /* Copy from end to start */ - cdest = dest + n - 1; - csrc = src + n - 1; - for (i = 0; i < n; i++) { - *cdest-- = *csrc--; - } - } else { - /* Normal copy is possible */ - cdest = dest; - csrc = src; - for (i = 0; i < n; i++) { - *cdest++ = *csrc++; - } - } - - return dest; -} - void *memset(void *dest, int c, size_t size) { unsigned char *d = (unsigned char *)dest; diff --git a/pc-bios/vof/main.c b/pc-bios/vof/main.c index 9fc30d2d09..0f0f6b4cb1 100644 --- a/pc-bios/vof/main.c +++ b/pc-bios/vof/main.c @@ -6,7 +6,7 @@ void do_boot(unsigned long addr, unsigned long _r3, unsigned long _r4) register unsigned long r4 __asm__("r4") = _r4; register unsigned long r5 __asm__("r5") = (unsigned long) _prom_entry; - ((client *)(uint32_t)addr)(); + ((void (*)(void))(uint32_t)addr)(); } void entry_c(void) diff --git a/pc-bios/vof/vof.h b/pc-bios/vof/vof.h index 2d89580769..5f12c077f5 100644 --- a/pc-bios/vof/vof.h +++ b/pc-bios/vof/vof.h @@ -10,11 +10,9 @@ typedef unsigned short uint16_t; typedef unsigned long uint32_t; typedef unsigned long long uint64_t; #define NULL (0) -#define PROM_ERROR (-1u) typedef unsigned long ihandle; typedef unsigned long phandle; typedef int size_t; -typedef void client(void); /* globals */ extern void _prom_entry(void); /* OF CI entry point (i.e. this firmware) */ -- cgit v1.2.3-55-g7522 From 327d4b7f3f26eb19b8bc2b1b54afa6874612efdd Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 6 Jul 2021 16:54:39 +0530 Subject: linux-headers: Update Update to mainline commit: 79160a603bdb ("Merge tag 'usb-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb" Signed-off-by: Bharata B Rao Message-Id: <20210706112440.1449562-2-bharata@linux.ibm.com> Signed-off-by: David Gibson --- include/standard-headers/asm-x86/kvm_para.h | 13 +++ include/standard-headers/drm/drm_fourcc.h | 7 ++ include/standard-headers/linux/ethtool.h | 4 +- include/standard-headers/linux/input-event-codes.h | 1 + include/standard-headers/linux/virtio_ids.h | 2 +- include/standard-headers/linux/virtio_vsock.h | 9 ++ linux-headers/asm-arm64/kvm.h | 11 +++ linux-headers/asm-generic/mman-common.h | 3 + linux-headers/asm-generic/unistd.h | 4 +- linux-headers/asm-mips/mman.h | 3 + linux-headers/asm-mips/unistd_n32.h | 1 + linux-headers/asm-mips/unistd_n64.h | 1 + linux-headers/asm-mips/unistd_o32.h | 1 + linux-headers/asm-powerpc/unistd_32.h | 1 + linux-headers/asm-powerpc/unistd_64.h | 1 + linux-headers/asm-s390/unistd_32.h | 1 + linux-headers/asm-s390/unistd_64.h | 1 + linux-headers/asm-x86/kvm.h | 13 +++ linux-headers/asm-x86/unistd_32.h | 7 +- linux-headers/asm-x86/unistd_64.h | 7 +- linux-headers/asm-x86/unistd_x32.h | 7 +- linux-headers/linux/kvm.h | 105 +++++++++++++++++++++ linux-headers/linux/userfaultfd.h | 11 ++- 23 files changed, 197 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index 215d01b4ec..204cfb8640 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -33,6 +33,8 @@ #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 #define KVM_FEATURE_MSI_EXT_DEST_ID 15 +#define KVM_FEATURE_HC_MAP_GPA_RANGE 16 +#define KVM_FEATURE_MIGRATION_CONTROL 17 #define KVM_HINTS_REALTIME 0 @@ -54,6 +56,7 @@ #define MSR_KVM_POLL_CONTROL 0x4b564d05 #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 +#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 struct kvm_steal_time { uint64_t steal; @@ -90,6 +93,16 @@ struct kvm_clock_pairing { /* MSR_KVM_ASYNC_PF_INT */ #define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +/* MSR_KVM_MIGRATION_CONTROL */ +#define KVM_MIGRATION_READY (1 << 0) + +/* KVM_HC_MAP_GPA_RANGE */ +#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0 +#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0) +#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1) +#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4) +#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1) +#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index a61ae520c2..352b51fd0a 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -167,6 +167,13 @@ extern "C" { #define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ #define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ +/* 64 bpp RGB */ +#define DRM_FORMAT_XRGB16161616 fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_XBGR16161616 fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */ + +#define DRM_FORMAT_ARGB16161616 fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + /* * Floating point 64bpp RGB * IEEE 754-2008 binary16 half-precision float diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 218d944a17..053d3fafdf 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -233,7 +233,7 @@ enum tunable_id { ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ /* * Add your fresh new tunable attribute above and remember to update - * tunable_strings[] in net/core/ethtool.c + * tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_TUNABLE_COUNT, }; @@ -297,7 +297,7 @@ enum phy_tunable_id { ETHTOOL_PHY_EDPD, /* * Add your fresh new phy tunable attribute above and remember to update - * phy_tunable_strings[] in net/core/ethtool.c + * phy_tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_PHY_TUNABLE_COUNT, }; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index c403b9cb0d..b5e86b40ab 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -611,6 +611,7 @@ #define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ +#define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index f0c35ce862..4fe842c3a3 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -54,7 +54,7 @@ #define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ -#define VIRTIO_ID_BT 28 /* virtio bluetooth */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_BT 40 /* virtio bluetooth */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h index be443211ce..3a23488e42 100644 --- a/include/standard-headers/linux/virtio_vsock.h +++ b/include/standard-headers/linux/virtio_vsock.h @@ -38,6 +38,9 @@ #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_config.h" +/* The feature bitmap for virtio vsock */ +#define VIRTIO_VSOCK_F_SEQPACKET 1 /* SOCK_SEQPACKET supported */ + struct virtio_vsock_config { uint64_t guest_cid; } QEMU_PACKED; @@ -65,6 +68,7 @@ struct virtio_vsock_hdr { enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_SEQPACKET = 2, }; enum virtio_vsock_op { @@ -91,4 +95,9 @@ enum virtio_vsock_shutdown { VIRTIO_VSOCK_SHUTDOWN_SEND = 2, }; +/* VIRTIO_VSOCK_OP_RW flags values */ +enum virtio_vsock_rw { + VIRTIO_VSOCK_SEQ_EOR = 1, +}; + #endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index b6a0eaa32a..3d2ce9912d 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -184,6 +184,17 @@ struct kvm_vcpu_events { __u32 reserved[12]; }; +struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void *addr; + __u64 flags; + __u64 reserved[2]; +}; + +#define KVM_ARM_TAGS_TO_GUEST 0 +#define KVM_ARM_TAGS_FROM_GUEST 1 + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h index f94f65d429..1567a3294c 100644 --- a/linux-headers/asm-generic/mman-common.h +++ b/linux-headers/asm-generic/mman-common.h @@ -72,6 +72,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 6de5a7fc06..f211961ce1 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -863,8 +863,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +#define __NR_quotactl_fd 443 +__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h index 57dc2ac4f8..40b210c65a 100644 --- a/linux-headers/asm-mips/mman.h +++ b/linux-headers/asm-mips/mman.h @@ -98,6 +98,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index fce51fee09..09cd297698 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -372,6 +372,7 @@ #define __NR_process_madvise (__NR_Linux + 440) #define __NR_epoll_pwait2 (__NR_Linux + 441) #define __NR_mount_setattr (__NR_Linux + 442) +#define __NR_quotactl_fd (__NR_Linux + 443) #define __NR_landlock_create_ruleset (__NR_Linux + 444) #define __NR_landlock_add_rule (__NR_Linux + 445) #define __NR_landlock_restrict_self (__NR_Linux + 446) diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 0996001802..780e0cead6 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -348,6 +348,7 @@ #define __NR_process_madvise (__NR_Linux + 440) #define __NR_epoll_pwait2 (__NR_Linux + 441) #define __NR_mount_setattr (__NR_Linux + 442) +#define __NR_quotactl_fd (__NR_Linux + 443) #define __NR_landlock_create_ruleset (__NR_Linux + 444) #define __NR_landlock_add_rule (__NR_Linux + 445) #define __NR_landlock_restrict_self (__NR_Linux + 446) diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index 954303ad69..06a2b3b55e 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -418,6 +418,7 @@ #define __NR_process_madvise (__NR_Linux + 440) #define __NR_epoll_pwait2 (__NR_Linux + 441) #define __NR_mount_setattr (__NR_Linux + 442) +#define __NR_quotactl_fd (__NR_Linux + 443) #define __NR_landlock_create_ruleset (__NR_Linux + 444) #define __NR_landlock_add_rule (__NR_Linux + 445) #define __NR_landlock_restrict_self (__NR_Linux + 446) diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index 9155778c19..cd5a8a41b2 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -425,6 +425,7 @@ #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 #define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index 3cefa88932..8458effa8d 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -397,6 +397,7 @@ #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 #define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index e8cd34334f..0c3cd299e4 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -415,6 +415,7 @@ #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 #define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 86830e1e83..8dfc08b5e6 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -363,6 +363,7 @@ #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 #define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 0662f644aa..a6c327f8ad 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -159,6 +159,19 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +struct kvm_sregs2 { + /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 flags; + __u64 pdptrs[4]; +}; +#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1 + /* for KVM_GET_FPU and KVM_SET_FPU */ struct kvm_fpu { __u8 fpr[8][16]; diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 8f6ac8c19f..66e96c0c68 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNISTD_32_H -#define _ASM_X86_UNISTD_32_H 1 +#ifndef _ASM_UNISTD_32_H +#define _ASM_UNISTD_32_H #define __NR_restart_syscall 0 #define __NR_exit 1 @@ -433,9 +433,10 @@ #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 #define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 -#endif /* _ASM_X86_UNISTD_32_H */ +#endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index bb187a9268..b8ff6f14ee 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNISTD_64_H -#define _ASM_X86_UNISTD_64_H 1 +#ifndef _ASM_UNISTD_64_H +#define _ASM_UNISTD_64_H #define __NR_read 0 #define __NR_write 1 @@ -355,9 +355,10 @@ #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 #define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 -#endif /* _ASM_X86_UNISTD_64_H */ +#endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 4edd0103ac..06a1097c15 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNISTD_X32_H -#define _ASM_X86_UNISTD_X32_H 1 +#ifndef _ASM_UNISTD_X32_H +#define _ASM_UNISTD_X32_H #define __NR_read (__X32_SYSCALL_BIT + 0) #define __NR_write (__X32_SYSCALL_BIT + 1) @@ -308,6 +308,7 @@ #define __NR_process_madvise (__X32_SYSCALL_BIT + 440) #define __NR_epoll_pwait2 (__X32_SYSCALL_BIT + 441) #define __NR_mount_setattr (__X32_SYSCALL_BIT + 442) +#define __NR_quotactl_fd (__X32_SYSCALL_BIT + 443) #define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444) #define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445) #define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446) @@ -349,4 +350,4 @@ #define __NR_pwritev2 (__X32_SYSCALL_BIT + 547) -#endif /* _ASM_X86_UNISTD_X32_H */ +#endif /* _ASM_UNISTD_X32_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 20d6a263bb..bcaf66cc4d 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -280,6 +280,9 @@ struct kvm_xen_exit { /* Encounter unexpected vm-exit reason */ #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 +/* Flags that describe what fields in emulation_failure hold valid data. */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -383,6 +386,25 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* + * KVM_INTERNAL_ERROR_EMULATION + * + * "struct emulation_failure" is an overlay of "struct internal" + * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of + * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error + * sub-types, this struct is ABI! It also needs to be backwards + * compatible with "struct internal". Take special care that + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + __u8 insn_size; + __u8 insn_bytes[15]; + } emulation_failure; /* KVM_EXIT_OSI */ struct { __u64 gprs[32]; @@ -1083,6 +1105,13 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_HYPERV_ENFORCE_CPUID 199 +#define KVM_CAP_SREGS2 200 +#define KVM_CAP_EXIT_HYPERCALL 201 +#define KVM_CAP_PPC_RPT_INVALIDATE 202 +#define KVM_CAP_BINARY_STATS_FD 203 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 +#define KVM_CAP_ARM_MTE 205 #ifdef KVM_CAP_IRQ_ROUTING @@ -1428,6 +1457,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) @@ -1621,6 +1651,9 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) +#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + struct kvm_xen_vcpu_attr { __u16 type; __u16 pad[3]; @@ -1899,4 +1932,76 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +/** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. + * @name_size: The size in bytes of the memory which contains statistics + * name string including trailing '\0'. The memory is allocated + * at the send of statistics descriptor. + * @num_desc: The number of statistics the vm or vcpu has. + * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed + * by vm/vcpu stats fd. + * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file + * pointd by vm/vcpu stats fd. + * @data_offset: The offset of the vm/vcpu stats' data block in the file + * pointed by vm/vcpu stats fd. + * + * This is the header userspace needs to read from stats fd before any other + * readings. It is used by userspace to discover all the information about the + * vm/vcpu's binary statistics. + * Userspace reads this header from the start of the vm/vcpu's stats fd. + */ +struct kvm_stats_header { + __u32 flags; + __u32 name_size; + __u32 num_desc; + __u32 id_offset; + __u32 desc_offset; + __u32 data_offset; +}; + +#define KVM_STATS_TYPE_SHIFT 0 +#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK + +#define KVM_STATS_UNIT_SHIFT 4 +#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES + +#define KVM_STATS_BASE_SHIFT 8 +#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2 + +/** + * struct kvm_stats_desc - Descriptor of a KVM statistics. + * @flags: Annotations of the stats, like type, unit, etc. + * @exponent: Used together with @flags to determine the unit. + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in + * struture kvm or kvm_vcpu. + * @unused: Unused field for future usage. Always 0 for now. + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +struct kvm_stats_desc { + __u32 flags; + __s16 exponent; + __u16 size; + __u32 offset; + __u32 unused; + char name[]; +}; + +#define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + #endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index b9ac97b70f..8479af5f4c 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -31,7 +31,8 @@ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ - UFFD_FEATURE_MINOR_HUGETLBFS) + UFFD_FEATURE_MINOR_HUGETLBFS | \ + UFFD_FEATURE_MINOR_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -80,8 +81,8 @@ struct uffdio_zeropage) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) -#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ - struct uffdio_continue) +#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) /* read() structure */ struct uffd_msg { @@ -185,6 +186,9 @@ struct uffdio_api { * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults * can be intercepted (via REGISTER_MODE_MINOR) for * hugetlbfs-backed pages. + * + * UFFD_FEATURE_MINOR_SHMEM indicates the same support as + * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -196,6 +200,7 @@ struct uffdio_api { #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) +#define UFFD_FEATURE_MINOR_SHMEM (1<<10) __u64 features; __u64 ioctls; -- cgit v1.2.3-55-g7522 From 82123b756a1a2f1965350e5794aaa7b5c6a15282 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 6 Jul 2021 16:54:40 +0530 Subject: target/ppc: Support for H_RPT_INVALIDATE hcall If KVM_CAP_RPT_INVALIDATE KVM capability is enabled, then - indicate the availability of H_RPT_INVALIDATE hcall to the guest via ibm,hypertas-functions property. - Enable the hcall Both the above are done only if the new sPAPR machine capability cap-rpt-invalidate is set. Signed-off-by: Bharata B Rao Message-Id: <20210706112440.1449562-3-bharata@linux.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 6 ++++++ hw/ppc/spapr_caps.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr.h | 8 ++++++-- target/ppc/kvm.c | 12 ++++++++++++ target/ppc/kvm_ppc.h | 12 ++++++++++++ 5 files changed, 77 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 3808d47053..a007be471e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -881,6 +881,10 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) add_str(hypertas, "hcall-copy"); add_str(hypertas, "hcall-debug"); add_str(hypertas, "hcall-vphn"); + if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) { + add_str(hypertas, "hcall-rpt-invalidate"); + } + add_str(qemu_hypertas, "hcall-memop1"); if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { @@ -2030,6 +2034,7 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_cap_ccf_assist, &vmstate_spapr_cap_fwnmi, &vmstate_spapr_fwnmi, + &vmstate_spapr_cap_rpt_invalidate, NULL } }; @@ -4618,6 +4623,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF; spapr_caps_add_properties(smc); smc->irq = &spapr_irq_dual; smc->dr_phb_enabled = true; diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index d0c419b392..ed7c077a0d 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -582,6 +582,37 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val, } } +static void cap_rpt_invalidate_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + + if (!val) { + /* capability disabled by default */ + return; + } + + if (tcg_enabled()) { + error_setg(errp, "No H_RPT_INVALIDATE support in TCG"); + error_append_hint(errp, + "Try appending -machine cap-rpt-invalidate=off\n"); + } else if (kvm_enabled()) { + if (!kvmppc_has_cap_mmu_radix()) { + error_setg(errp, "H_RPT_INVALIDATE only supported on Radix"); + return; + } + + if (!kvmppc_has_cap_rpt_invalidate()) { + error_setg(errp, + "KVM implementation does not support H_RPT_INVALIDATE"); + error_append_hint(errp, + "Try appending -machine cap-rpt-invalidate=off\n"); + } else { + kvmppc_enable_h_rpt_invalidate(); + } + } +} + SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { [SPAPR_CAP_HTM] = { .name = "htm", @@ -690,6 +721,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { .type = "bool", .apply = cap_fwnmi_apply, }, + [SPAPR_CAP_RPT_INVALIDATE] = { + .name = "rpt-invalidate", + .description = "Allow H_RPT_INVALIDATE", + .index = SPAPR_CAP_RPT_INVALIDATE, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_rpt_invalidate_apply, + }, }; static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr, @@ -830,6 +870,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV); SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI); +SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE); void spapr_caps_init(SpaprMachineState *spapr) { diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 779f707fb8..637652ad16 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -75,8 +75,10 @@ typedef enum { #define SPAPR_CAP_CCF_ASSIST 0x09 /* Implements PAPR FWNMI option */ #define SPAPR_CAP_FWNMI 0x0A +/* Support H_RPT_INVALIDATE */ +#define SPAPR_CAP_RPT_INVALIDATE 0x0B /* Num Caps */ -#define SPAPR_CAP_NUM (SPAPR_CAP_FWNMI + 1) +#define SPAPR_CAP_NUM (SPAPR_CAP_RPT_INVALIDATE + 1) /* * Capability Values @@ -547,8 +549,9 @@ struct SpaprMachineState { #define H_SCM_UNBIND_MEM 0x3F0 #define H_SCM_UNBIND_ALL 0x3FC #define H_SCM_HEALTH 0x400 +#define H_RPT_INVALIDATE 0x448 -#define MAX_HCALL_OPCODE H_SCM_HEALTH +#define MAX_HCALL_OPCODE H_RPT_INVALIDATE /* The hcalls above are standardized in PAPR and implemented by pHyp * as well. @@ -939,6 +942,7 @@ extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv; extern const VMStateDescription vmstate_spapr_cap_large_decr; extern const VMStateDescription vmstate_spapr_cap_ccf_assist; extern const VMStateDescription vmstate_spapr_cap_fwnmi; +extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate; static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap) { diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 104a308abb..dc93b99189 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist; static int cap_ppc_nested_kvm_hv; static int cap_large_decr; static int cap_fwnmi; +static int cap_rpt_invalidate; static uint32_t debug_inst_opcode; @@ -152,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) exit(1); } + cap_rpt_invalidate = kvm_vm_check_extension(s, KVM_CAP_PPC_RPT_INVALIDATE); kvm_ppc_register_host_cpu_type(); return 0; @@ -2040,6 +2042,11 @@ void kvmppc_enable_h_page_init(void) kvmppc_enable_hcall(kvm_state, H_PAGE_INIT); } +void kvmppc_enable_h_rpt_invalidate(void) +{ + kvmppc_enable_hcall(kvm_state, H_RPT_INVALIDATE); +} + void kvmppc_set_papr(PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); @@ -2551,6 +2558,11 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) return 0; } +int kvmppc_has_cap_rpt_invalidate(void) +{ + return cap_rpt_invalidate; +} + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 989f61ace0..ee9325bf9a 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -24,6 +24,7 @@ void kvmppc_enable_logical_ci_hcalls(void); void kvmppc_enable_set_mode_hcall(void); void kvmppc_enable_clear_ref_mod_hcalls(void); void kvmppc_enable_h_page_init(void); +void kvmppc_enable_h_rpt_invalidate(void); void kvmppc_set_papr(PowerPCCPU *cpu); int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr); void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy); @@ -71,6 +72,7 @@ bool kvmppc_has_cap_nested_kvm_hv(void); int kvmppc_set_cap_nested_kvm_hv(int enable); int kvmppc_get_cap_large_decr(void); int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); +int kvmppc_has_cap_rpt_invalidate(void); int kvmppc_enable_hwrng(void); int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); @@ -150,6 +152,11 @@ static inline void kvmppc_enable_h_page_init(void) { } +static inline void kvmppc_enable_h_rpt_invalidate(void) +{ + g_assert_not_reached(); +} + static inline void kvmppc_set_papr(PowerPCCPU *cpu) { } @@ -381,6 +388,11 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) return -1; } +static inline int kvmppc_has_cap_rpt_invalidate(void) +{ + return false; +} + static inline int kvmppc_enable_hwrng(void) { return -1; -- cgit v1.2.3-55-g7522