From 1fc33bb9f05b2c263232ab01dd66d6e33d31cedc Mon Sep 17 00:00:00 2001 From: Claudio Fontana Date: Sat, 12 Dec 2020 16:55:09 +0100 Subject: i386: move whpx accel files into whpx/ Signed-off-by: Claudio Fontana Reviewed-by: Alex BennĂ©e Reviewed-by: Richard Henderson Message-Id: <20201212155530.23098-3-cfontana@suse.de> Signed-off-by: Eduardo Habkost --- target/i386/meson.build | 6 +- target/i386/whp-dispatch.h | 75 -- target/i386/whpx-all.c | 1938 --------------------------------------- target/i386/whpx-apic.c | 274 ------ target/i386/whpx-cpus.c | 96 -- target/i386/whpx-cpus.h | 34 - target/i386/whpx/meson.build | 5 + target/i386/whpx/whp-dispatch.h | 75 ++ target/i386/whpx/whpx-all.c | 1938 +++++++++++++++++++++++++++++++++++++++ target/i386/whpx/whpx-apic.c | 274 ++++++ target/i386/whpx/whpx-cpus.c | 96 ++ target/i386/whpx/whpx-cpus.h | 34 + 12 files changed, 2423 insertions(+), 2422 deletions(-) delete mode 100644 target/i386/whp-dispatch.h delete mode 100644 target/i386/whpx-all.c delete mode 100644 target/i386/whpx-apic.c delete mode 100644 target/i386/whpx-cpus.c delete mode 100644 target/i386/whpx-cpus.h create mode 100644 target/i386/whpx/meson.build create mode 100644 target/i386/whpx/whp-dispatch.h create mode 100644 target/i386/whpx/whpx-all.c create mode 100644 target/i386/whpx/whpx-apic.c create mode 100644 target/i386/whpx/whpx-cpus.c create mode 100644 target/i386/whpx/whpx-cpus.h (limited to 'target') diff --git a/target/i386/meson.build b/target/i386/meson.build index 5363757131..62cd042915 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -27,11 +27,6 @@ i386_softmmu_ss.add(files( 'machine.c', 'monitor.c', )) -i386_softmmu_ss.add(when: 'CONFIG_WHPX', if_true: files( - 'whpx-all.c', - 'whpx-cpus.c', - 'whpx-apic.c', -)) i386_softmmu_ss.add(when: 'CONFIG_HAX', if_true: files( 'hax-all.c', 'hax-mem.c', @@ -41,6 +36,7 @@ i386_softmmu_ss.add(when: ['CONFIG_HAX', 'CONFIG_POSIX'], if_true: files('hax-po i386_softmmu_ss.add(when: ['CONFIG_HAX', 'CONFIG_WIN32'], if_true: files('hax-windows.c')) subdir('kvm') +subdir('whpx') subdir('hvf') target_arch += {'i386': i386_ss} diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h deleted file mode 100644 index cef5d848bd..0000000000 --- a/target/i386/whp-dispatch.h +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef WHP_DISPATCH_H -#define WHP_DISPATCH_H - -#include -#include -#include - -#define WHV_E_UNKNOWN_CAPABILITY 0x80370300L - -#define LIST_WINHVPLATFORM_FUNCTIONS(X) \ - X(HRESULT, WHvGetCapability, (WHV_CAPABILITY_CODE CapabilityCode, VOID* CapabilityBuffer, UINT32 CapabilityBufferSizeInBytes, UINT32* WrittenSizeInBytes)) \ - X(HRESULT, WHvCreatePartition, (WHV_PARTITION_HANDLE* Partition)) \ - X(HRESULT, WHvSetupPartition, (WHV_PARTITION_HANDLE Partition)) \ - X(HRESULT, WHvDeletePartition, (WHV_PARTITION_HANDLE Partition)) \ - X(HRESULT, WHvGetPartitionProperty, (WHV_PARTITION_HANDLE Partition, WHV_PARTITION_PROPERTY_CODE PropertyCode, VOID* PropertyBuffer, UINT32 PropertyBufferSizeInBytes, UINT32* WrittenSizeInBytes)) \ - X(HRESULT, WHvSetPartitionProperty, (WHV_PARTITION_HANDLE Partition, WHV_PARTITION_PROPERTY_CODE PropertyCode, const VOID* PropertyBuffer, UINT32 PropertyBufferSizeInBytes)) \ - X(HRESULT, WHvMapGpaRange, (WHV_PARTITION_HANDLE Partition, VOID* SourceAddress, WHV_GUEST_PHYSICAL_ADDRESS GuestAddress, UINT64 SizeInBytes, WHV_MAP_GPA_RANGE_FLAGS Flags)) \ - X(HRESULT, WHvUnmapGpaRange, (WHV_PARTITION_HANDLE Partition, WHV_GUEST_PHYSICAL_ADDRESS GuestAddress, UINT64 SizeInBytes)) \ - X(HRESULT, WHvTranslateGva, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, WHV_GUEST_VIRTUAL_ADDRESS Gva, WHV_TRANSLATE_GVA_FLAGS TranslateFlags, WHV_TRANSLATE_GVA_RESULT* TranslationResult, WHV_GUEST_PHYSICAL_ADDRESS* Gpa)) \ - X(HRESULT, WHvCreateVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, UINT32 Flags)) \ - X(HRESULT, WHvDeleteVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex)) \ - X(HRESULT, WHvRunVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, VOID* ExitContext, UINT32 ExitContextSizeInBytes)) \ - X(HRESULT, WHvCancelRunVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, UINT32 Flags)) \ - X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \ - X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \ - -/* - * These are supplemental functions that may not be present - * on all versions and are not critical for basic functionality. - */ -#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \ - X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \ - X(HRESULT, WHvRequestInterrupt, (WHV_PARTITION_HANDLE Partition, \ - WHV_INTERRUPT_CONTROL* Interrupt, UINT32 InterruptControlSize)) \ - X(HRESULT, WHvGetVirtualProcessorInterruptControllerState2, \ - (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ - UINT32 StateSize, UINT32* WrittenSize)) \ - X(HRESULT, WHvSetVirtualProcessorInterruptControllerState2, \ - (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ - UINT32 StateSize)) \ - -#define LIST_WINHVEMULATION_FUNCTIONS(X) \ - X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \ - X(HRESULT, WHvEmulatorDestroyEmulator, (WHV_EMULATOR_HANDLE Emulator)) \ - X(HRESULT, WHvEmulatorTryIoEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_X64_IO_PORT_ACCESS_CONTEXT* IoInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ - X(HRESULT, WHvEmulatorTryMmioEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_MEMORY_ACCESS_CONTEXT* MmioInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ - -#define WHP_DEFINE_TYPE(return_type, function_name, signature) \ - typedef return_type (WINAPI *function_name ## _t) signature; - -#define WHP_DECLARE_MEMBER(return_type, function_name, signature) \ - function_name ## _t function_name; - -/* Define function typedef */ -LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE) -LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE) -LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE) - -struct WHPDispatch { - LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER) - LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER) - LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER) -}; - -extern struct WHPDispatch whp_dispatch; - -bool init_whp_dispatch(void); - -typedef enum WHPFunctionList { - WINHV_PLATFORM_FNS_DEFAULT, - WINHV_EMULATION_FNS_DEFAULT, - WINHV_PLATFORM_FNS_SUPPLEMENTAL -} WHPFunctionList; - -#endif /* WHP_DISPATCH_H */ diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c deleted file mode 100644 index 3b824fc9d7..0000000000 --- a/target/i386/whpx-all.c +++ /dev/null @@ -1,1938 +0,0 @@ -/* - * QEMU Windows Hypervisor Platform accelerator (WHPX) - * - * Copyright Microsoft Corp. 2017 - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/address-spaces.h" -#include "exec/ioport.h" -#include "qemu-common.h" -#include "sysemu/accel.h" -#include "sysemu/whpx.h" -#include "sysemu/cpus.h" -#include "sysemu/runstate.h" -#include "qemu/main-loop.h" -#include "hw/boards.h" -#include "hw/i386/ioapic.h" -#include "hw/i386/apic_internal.h" -#include "qemu/error-report.h" -#include "qapi/error.h" -#include "qapi/qapi-types-common.h" -#include "qapi/qapi-visit-common.h" -#include "migration/blocker.h" -#include "whp-dispatch.h" -#include - -#include "whpx-cpus.h" - -#include -#include - -#define HYPERV_APIC_BUS_FREQUENCY (200000000ULL) - -static const WHV_REGISTER_NAME whpx_register_names[] = { - - /* X64 General purpose registers */ - WHvX64RegisterRax, - WHvX64RegisterRcx, - WHvX64RegisterRdx, - WHvX64RegisterRbx, - WHvX64RegisterRsp, - WHvX64RegisterRbp, - WHvX64RegisterRsi, - WHvX64RegisterRdi, - WHvX64RegisterR8, - WHvX64RegisterR9, - WHvX64RegisterR10, - WHvX64RegisterR11, - WHvX64RegisterR12, - WHvX64RegisterR13, - WHvX64RegisterR14, - WHvX64RegisterR15, - WHvX64RegisterRip, - WHvX64RegisterRflags, - - /* X64 Segment registers */ - WHvX64RegisterEs, - WHvX64RegisterCs, - WHvX64RegisterSs, - WHvX64RegisterDs, - WHvX64RegisterFs, - WHvX64RegisterGs, - WHvX64RegisterLdtr, - WHvX64RegisterTr, - - /* X64 Table registers */ - WHvX64RegisterIdtr, - WHvX64RegisterGdtr, - - /* X64 Control Registers */ - WHvX64RegisterCr0, - WHvX64RegisterCr2, - WHvX64RegisterCr3, - WHvX64RegisterCr4, - WHvX64RegisterCr8, - - /* X64 Debug Registers */ - /* - * WHvX64RegisterDr0, - * WHvX64RegisterDr1, - * WHvX64RegisterDr2, - * WHvX64RegisterDr3, - * WHvX64RegisterDr6, - * WHvX64RegisterDr7, - */ - - /* X64 Floating Point and Vector Registers */ - WHvX64RegisterXmm0, - WHvX64RegisterXmm1, - WHvX64RegisterXmm2, - WHvX64RegisterXmm3, - WHvX64RegisterXmm4, - WHvX64RegisterXmm5, - WHvX64RegisterXmm6, - WHvX64RegisterXmm7, - WHvX64RegisterXmm8, - WHvX64RegisterXmm9, - WHvX64RegisterXmm10, - WHvX64RegisterXmm11, - WHvX64RegisterXmm12, - WHvX64RegisterXmm13, - WHvX64RegisterXmm14, - WHvX64RegisterXmm15, - WHvX64RegisterFpMmx0, - WHvX64RegisterFpMmx1, - WHvX64RegisterFpMmx2, - WHvX64RegisterFpMmx3, - WHvX64RegisterFpMmx4, - WHvX64RegisterFpMmx5, - WHvX64RegisterFpMmx6, - WHvX64RegisterFpMmx7, - WHvX64RegisterFpControlStatus, - WHvX64RegisterXmmControlStatus, - - /* X64 MSRs */ - WHvX64RegisterEfer, -#ifdef TARGET_X86_64 - WHvX64RegisterKernelGsBase, -#endif - WHvX64RegisterApicBase, - /* WHvX64RegisterPat, */ - WHvX64RegisterSysenterCs, - WHvX64RegisterSysenterEip, - WHvX64RegisterSysenterEsp, - WHvX64RegisterStar, -#ifdef TARGET_X86_64 - WHvX64RegisterLstar, - WHvX64RegisterCstar, - WHvX64RegisterSfmask, -#endif - - /* Interrupt / Event Registers */ - /* - * WHvRegisterPendingInterruption, - * WHvRegisterInterruptState, - * WHvRegisterPendingEvent0, - * WHvRegisterPendingEvent1 - * WHvX64RegisterDeliverabilityNotifications, - */ -}; - -struct whpx_register_set { - WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)]; -}; - -struct whpx_vcpu { - WHV_EMULATOR_HANDLE emulator; - bool window_registered; - bool interruptable; - bool ready_for_pic_interrupt; - uint64_t tpr; - uint64_t apic_base; - bool interruption_pending; - - /* Must be the last field as it may have a tail */ - WHV_RUN_VP_EXIT_CONTEXT exit_ctx; -}; - -static bool whpx_allowed; -static bool whp_dispatch_initialized; -static HMODULE hWinHvPlatform, hWinHvEmulation; -static uint32_t max_vcpu_index; -struct whpx_state whpx_global; -struct WHPDispatch whp_dispatch; - - -/* - * VP support - */ - -static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu) -{ - return (struct whpx_vcpu *)cpu->hax_vcpu; -} - -static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86, - int r86) -{ - WHV_X64_SEGMENT_REGISTER hs; - unsigned flags = qs->flags; - - hs.Base = qs->base; - hs.Limit = qs->limit; - hs.Selector = qs->selector; - - if (v86) { - hs.Attributes = 0; - hs.SegmentType = 3; - hs.Present = 1; - hs.DescriptorPrivilegeLevel = 3; - hs.NonSystemSegment = 1; - - } else { - hs.Attributes = (flags >> DESC_TYPE_SHIFT); - - if (r86) { - /* hs.Base &= 0xfffff; */ - } - } - - return hs; -} - -static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs) -{ - SegmentCache qs; - - qs.base = hs->Base; - qs.limit = hs->Limit; - qs.selector = hs->Selector; - - qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT; - - return qs; -} - -static int whpx_set_tsc(CPUState *cpu) -{ - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; - WHV_REGISTER_VALUE tsc_val; - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - - /* - * Suspend the partition prior to setting the TSC to reduce the variance - * in TSC across vCPUs. When the first vCPU runs post suspend, the - * partition is automatically resumed. - */ - if (whp_dispatch.WHvSuspendPartitionTime) { - - /* - * Unable to suspend partition while setting TSC is not a fatal - * error. It just increases the likelihood of TSC variance between - * vCPUs and some guest OS are able to handle that just fine. - */ - hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition); - if (FAILED(hr)) { - warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr); - } - } - - tsc_val.Reg64 = env->tsc; - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); - if (FAILED(hr)) { - error_report("WHPX: Failed to set TSC, hr=%08lx", hr); - return -1; - } - - return 0; -} - -static void whpx_set_registers(CPUState *cpu, int level) -{ - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - struct whpx_register_set vcxt; - HRESULT hr; - int idx; - int idx_next; - int i; - int v86, r86; - - assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - - /* - * Following MSRs have side effects on the guest or are too heavy for - * runtime. Limit them to full state update. - */ - if (level >= WHPX_SET_RESET_STATE) { - whpx_set_tsc(cpu); - } - - memset(&vcxt, 0, sizeof(struct whpx_register_set)); - - v86 = (env->eflags & VM_MASK); - r86 = !(env->cr[0] & CR0_PE_MASK); - - vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state); - vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); - - idx = 0; - - /* Indexes for first 16 registers match between HV and QEMU definitions */ - idx_next = 16; - for (idx = 0; idx < CPU_NB_REGS; idx += 1) { - vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx]; - } - idx = idx_next; - - /* Same goes for RIP and RFLAGS */ - assert(whpx_register_names[idx] == WHvX64RegisterRip); - vcxt.values[idx++].Reg64 = env->eip; - - assert(whpx_register_names[idx] == WHvX64RegisterRflags); - vcxt.values[idx++].Reg64 = env->eflags; - - /* Translate 6+4 segment registers. HV and QEMU order matches */ - assert(idx == WHvX64RegisterEs); - for (i = 0; i < 6; i += 1, idx += 1) { - vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86); - } - - assert(idx == WHvX64RegisterLdtr); - vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0); - - assert(idx == WHvX64RegisterTr); - vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0); - - assert(idx == WHvX64RegisterIdtr); - vcxt.values[idx].Table.Base = env->idt.base; - vcxt.values[idx].Table.Limit = env->idt.limit; - idx += 1; - - assert(idx == WHvX64RegisterGdtr); - vcxt.values[idx].Table.Base = env->gdt.base; - vcxt.values[idx].Table.Limit = env->gdt.limit; - idx += 1; - - /* CR0, 2, 3, 4, 8 */ - assert(whpx_register_names[idx] == WHvX64RegisterCr0); - vcxt.values[idx++].Reg64 = env->cr[0]; - assert(whpx_register_names[idx] == WHvX64RegisterCr2); - vcxt.values[idx++].Reg64 = env->cr[2]; - assert(whpx_register_names[idx] == WHvX64RegisterCr3); - vcxt.values[idx++].Reg64 = env->cr[3]; - assert(whpx_register_names[idx] == WHvX64RegisterCr4); - vcxt.values[idx++].Reg64 = env->cr[4]; - assert(whpx_register_names[idx] == WHvX64RegisterCr8); - vcxt.values[idx++].Reg64 = vcpu->tpr; - - /* 8 Debug Registers - Skipped */ - - /* 16 XMM registers */ - assert(whpx_register_names[idx] == WHvX64RegisterXmm0); - idx_next = idx + 16; - for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { - vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0); - vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1); - } - idx = idx_next; - - /* 8 FP registers */ - assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); - for (i = 0; i < 8; i += 1, idx += 1) { - vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0); - /* vcxt.values[idx].Fp.AsUINT128.High64 = - env->fpregs[i].mmx.MMX_Q(1); - */ - } - - /* FP control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); - vcxt.values[idx].FpControlStatus.FpControl = env->fpuc; - vcxt.values[idx].FpControlStatus.FpStatus = - (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; - vcxt.values[idx].FpControlStatus.FpTag = 0; - for (i = 0; i < 8; ++i) { - vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i; - } - vcxt.values[idx].FpControlStatus.Reserved = 0; - vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop; - vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip; - idx += 1; - - /* XMM control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); - vcxt.values[idx].XmmControlStatus.LastFpRdp = 0; - vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr; - vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff; - idx += 1; - - /* MSRs */ - assert(whpx_register_names[idx] == WHvX64RegisterEfer); - vcxt.values[idx++].Reg64 = env->efer; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); - vcxt.values[idx++].Reg64 = env->kernelgsbase; -#endif - - assert(whpx_register_names[idx] == WHvX64RegisterApicBase); - vcxt.values[idx++].Reg64 = vcpu->apic_base; - - /* WHvX64RegisterPat - Skipped */ - - assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); - vcxt.values[idx++].Reg64 = env->sysenter_cs; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); - vcxt.values[idx++].Reg64 = env->sysenter_eip; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); - vcxt.values[idx++].Reg64 = env->sysenter_esp; - assert(whpx_register_names[idx] == WHvX64RegisterStar); - vcxt.values[idx++].Reg64 = env->star; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterLstar); - vcxt.values[idx++].Reg64 = env->lstar; - assert(whpx_register_names[idx] == WHvX64RegisterCstar); - vcxt.values[idx++].Reg64 = env->cstar; - assert(whpx_register_names[idx] == WHvX64RegisterSfmask); - vcxt.values[idx++].Reg64 = env->fmask; -#endif - - /* Interrupt / Event Registers - Skipped */ - - assert(idx == RTL_NUMBER_OF(whpx_register_names)); - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - whpx_register_names, - RTL_NUMBER_OF(whpx_register_names), - &vcxt.values[0]); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set virtual processor context, hr=%08lx", - hr); - } - - return; -} - -static int whpx_get_tsc(CPUState *cpu) -{ - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; - WHV_REGISTER_VALUE tsc_val; - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - - hr = whp_dispatch.WHvGetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); - if (FAILED(hr)) { - error_report("WHPX: Failed to get TSC, hr=%08lx", hr); - return -1; - } - - env->tsc = tsc_val.Reg64; - return 0; -} - -static void whpx_get_registers(CPUState *cpu) -{ - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - struct whpx_register_set vcxt; - uint64_t tpr, apic_base; - HRESULT hr; - int idx; - int idx_next; - int i; - - assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - - if (!env->tsc_valid) { - whpx_get_tsc(cpu); - env->tsc_valid = !runstate_is_running(); - } - - hr = whp_dispatch.WHvGetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - whpx_register_names, - RTL_NUMBER_OF(whpx_register_names), - &vcxt.values[0]); - if (FAILED(hr)) { - error_report("WHPX: Failed to get virtual processor context, hr=%08lx", - hr); - } - - idx = 0; - - /* Indexes for first 16 registers match between HV and QEMU definitions */ - idx_next = 16; - for (idx = 0; idx < CPU_NB_REGS; idx += 1) { - env->regs[idx] = vcxt.values[idx].Reg64; - } - idx = idx_next; - - /* Same goes for RIP and RFLAGS */ - assert(whpx_register_names[idx] == WHvX64RegisterRip); - env->eip = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterRflags); - env->eflags = vcxt.values[idx++].Reg64; - - /* Translate 6+4 segment registers. HV and QEMU order matches */ - assert(idx == WHvX64RegisterEs); - for (i = 0; i < 6; i += 1, idx += 1) { - env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment); - } - - assert(idx == WHvX64RegisterLdtr); - env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment); - assert(idx == WHvX64RegisterTr); - env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment); - assert(idx == WHvX64RegisterIdtr); - env->idt.base = vcxt.values[idx].Table.Base; - env->idt.limit = vcxt.values[idx].Table.Limit; - idx += 1; - assert(idx == WHvX64RegisterGdtr); - env->gdt.base = vcxt.values[idx].Table.Base; - env->gdt.limit = vcxt.values[idx].Table.Limit; - idx += 1; - - /* CR0, 2, 3, 4, 8 */ - assert(whpx_register_names[idx] == WHvX64RegisterCr0); - env->cr[0] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr2); - env->cr[2] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr3); - env->cr[3] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr4); - env->cr[4] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr8); - tpr = vcxt.values[idx++].Reg64; - if (tpr != vcpu->tpr) { - vcpu->tpr = tpr; - cpu_set_apic_tpr(x86_cpu->apic_state, tpr); - } - - /* 8 Debug Registers - Skipped */ - - /* 16 XMM registers */ - assert(whpx_register_names[idx] == WHvX64RegisterXmm0); - idx_next = idx + 16; - for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { - env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64; - env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64; - } - idx = idx_next; - - /* 8 FP registers */ - assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); - for (i = 0; i < 8; i += 1, idx += 1) { - env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64; - /* env->fpregs[i].mmx.MMX_Q(1) = - vcxt.values[idx].Fp.AsUINT128.High64; - */ - } - - /* FP control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); - env->fpuc = vcxt.values[idx].FpControlStatus.FpControl; - env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7; - env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800; - for (i = 0; i < 8; ++i) { - env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1); - } - env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp; - env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip; - idx += 1; - - /* XMM control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); - env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl; - idx += 1; - - /* MSRs */ - assert(whpx_register_names[idx] == WHvX64RegisterEfer); - env->efer = vcxt.values[idx++].Reg64; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); - env->kernelgsbase = vcxt.values[idx++].Reg64; -#endif - - assert(whpx_register_names[idx] == WHvX64RegisterApicBase); - apic_base = vcxt.values[idx++].Reg64; - if (apic_base != vcpu->apic_base) { - vcpu->apic_base = apic_base; - cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base); - } - - /* WHvX64RegisterPat - Skipped */ - - assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); - env->sysenter_cs = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); - env->sysenter_eip = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); - env->sysenter_esp = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterStar); - env->star = vcxt.values[idx++].Reg64; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterLstar); - env->lstar = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCstar); - env->cstar = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterSfmask); - env->fmask = vcxt.values[idx++].Reg64; -#endif - - /* Interrupt / Event Registers - Skipped */ - - assert(idx == RTL_NUMBER_OF(whpx_register_names)); - - if (whpx_apic_in_platform()) { - whpx_apic_get(x86_cpu->apic_state); - } - - return; -} - -static HRESULT CALLBACK whpx_emu_ioport_callback( - void *ctx, - WHV_EMULATOR_IO_ACCESS_INFO *IoAccess) -{ - MemTxAttrs attrs = { 0 }; - address_space_rw(&address_space_io, IoAccess->Port, attrs, - &IoAccess->Data, IoAccess->AccessSize, - IoAccess->Direction); - return S_OK; -} - -static HRESULT CALLBACK whpx_emu_mmio_callback( - void *ctx, - WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) -{ - cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, - ma->Direction); - return S_OK; -} - -static HRESULT CALLBACK whpx_emu_getreg_callback( - void *ctx, - const WHV_REGISTER_NAME *RegisterNames, - UINT32 RegisterCount, - WHV_REGISTER_VALUE *RegisterValues) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - CPUState *cpu = (CPUState *)ctx; - - hr = whp_dispatch.WHvGetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - RegisterNames, RegisterCount, - RegisterValues); - if (FAILED(hr)) { - error_report("WHPX: Failed to get virtual processor registers," - " hr=%08lx", hr); - } - - return hr; -} - -static HRESULT CALLBACK whpx_emu_setreg_callback( - void *ctx, - const WHV_REGISTER_NAME *RegisterNames, - UINT32 RegisterCount, - const WHV_REGISTER_VALUE *RegisterValues) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - CPUState *cpu = (CPUState *)ctx; - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - RegisterNames, RegisterCount, - RegisterValues); - if (FAILED(hr)) { - error_report("WHPX: Failed to set virtual processor registers," - " hr=%08lx", hr); - } - - /* - * The emulator just successfully wrote the register state. We clear the - * dirty state so we avoid the double write on resume of the VP. - */ - cpu->vcpu_dirty = false; - - return hr; -} - -static HRESULT CALLBACK whpx_emu_translate_callback( - void *ctx, - WHV_GUEST_VIRTUAL_ADDRESS Gva, - WHV_TRANSLATE_GVA_FLAGS TranslateFlags, - WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult, - WHV_GUEST_PHYSICAL_ADDRESS *Gpa) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - CPUState *cpu = (CPUState *)ctx; - WHV_TRANSLATE_GVA_RESULT res; - - hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index, - Gva, TranslateFlags, &res, Gpa); - if (FAILED(hr)) { - error_report("WHPX: Failed to translate GVA, hr=%08lx", hr); - } else { - *TranslationResult = res.ResultCode; - } - - return hr; -} - -static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = { - .Size = sizeof(WHV_EMULATOR_CALLBACKS), - .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback, - .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback, - .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback, - .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback, - .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback, -}; - -static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx) -{ - HRESULT hr; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - WHV_EMULATOR_STATUS emu_status; - - hr = whp_dispatch.WHvEmulatorTryMmioEmulation( - vcpu->emulator, cpu, - &vcpu->exit_ctx.VpContext, ctx, - &emu_status); - if (FAILED(hr)) { - error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr); - return -1; - } - - if (!emu_status.EmulationSuccessful) { - error_report("WHPX: Failed to emulate MMIO access with" - " EmulatorReturnStatus: %u", emu_status.AsUINT32); - return -1; - } - - return 0; -} - -static int whpx_handle_portio(CPUState *cpu, - WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx) -{ - HRESULT hr; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - WHV_EMULATOR_STATUS emu_status; - - hr = whp_dispatch.WHvEmulatorTryIoEmulation( - vcpu->emulator, cpu, - &vcpu->exit_ctx.VpContext, ctx, - &emu_status); - if (FAILED(hr)) { - error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr); - return -1; - } - - if (!emu_status.EmulationSuccessful) { - error_report("WHPX: Failed to emulate PortIO access with" - " EmulatorReturnStatus: %u", emu_status.AsUINT32); - return -1; - } - - return 0; -} - -static int whpx_handle_halt(CPUState *cpu) -{ - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - int ret = 0; - - qemu_mutex_lock_iothread(); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) && - !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { - cpu->exception_index = EXCP_HLT; - cpu->halted = true; - ret = 1; - } - qemu_mutex_unlock_iothread(); - - return ret; -} - -static void whpx_vcpu_pre_run(CPUState *cpu) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - int irq; - uint8_t tpr; - WHV_X64_PENDING_INTERRUPTION_REGISTER new_int; - UINT32 reg_count = 0; - WHV_REGISTER_VALUE reg_values[3]; - WHV_REGISTER_NAME reg_names[3]; - - memset(&new_int, 0, sizeof(new_int)); - memset(reg_values, 0, sizeof(reg_values)); - - qemu_mutex_lock_iothread(); - - /* Inject NMI */ - if (!vcpu->interruption_pending && - cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; - vcpu->interruptable = false; - new_int.InterruptionType = WHvX64PendingNmi; - new_int.InterruptionPending = 1; - new_int.InterruptionVector = 2; - } - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; - } - } - - /* - * Force the VCPU out of its inner loop to process any INIT requests or - * commit pending TPR access. - */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && - !(env->hflags & HF_SMM_MASK)) { - cpu->exit_request = 1; - } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->exit_request = 1; - } - } - - /* Get pending hard interruption or replay one that was overwritten */ - if (!whpx_apic_in_platform()) { - if (!vcpu->interruption_pending && - vcpu->interruptable && (env->eflags & IF_MASK)) { - assert(!new_int.InterruptionPending); - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - new_int.InterruptionType = WHvX64PendingInterrupt; - new_int.InterruptionPending = 1; - new_int.InterruptionVector = irq; - } - } - } - - /* Setup interrupt state if new one was prepared */ - if (new_int.InterruptionPending) { - reg_values[reg_count].PendingInterruption = new_int; - reg_names[reg_count] = WHvRegisterPendingInterruption; - reg_count += 1; - } - } else if (vcpu->ready_for_pic_interrupt && - (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - reg_names[reg_count] = WHvRegisterPendingEvent; - reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT) - { - .EventPending = 1, - .EventType = WHvX64PendingEventExtInt, - .Vector = irq, - }; - reg_count += 1; - } - } - - /* Sync the TPR to the CR8 if was modified during the intercept */ - tpr = cpu_get_apic_tpr(x86_cpu->apic_state); - if (tpr != vcpu->tpr) { - vcpu->tpr = tpr; - reg_values[reg_count].Reg64 = tpr; - cpu->exit_request = 1; - reg_names[reg_count] = WHvX64RegisterCr8; - reg_count += 1; - } - - /* Update the state of the interrupt delivery notification */ - if (!vcpu->window_registered && - cpu->interrupt_request & CPU_INTERRUPT_HARD) { - reg_values[reg_count].DeliverabilityNotifications = - (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { - .InterruptNotification = 1 - }; - vcpu->window_registered = 1; - reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; - reg_count += 1; - } - - qemu_mutex_unlock_iothread(); - vcpu->ready_for_pic_interrupt = false; - - if (reg_count) { - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - reg_names, reg_count, reg_values); - if (FAILED(hr)) { - error_report("WHPX: Failed to set interrupt state registers," - " hr=%08lx", hr); - } - } - - return; -} - -static void whpx_vcpu_post_run(CPUState *cpu) -{ - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - - env->eflags = vcpu->exit_ctx.VpContext.Rflags; - - uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8; - if (vcpu->tpr != tpr) { - vcpu->tpr = tpr; - qemu_mutex_lock_iothread(); - cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr); - qemu_mutex_unlock_iothread(); - } - - vcpu->interruption_pending = - vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending; - - vcpu->interruptable = - !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow; - - return; -} - -static void whpx_vcpu_process_async_events(CPUState *cpu) -{ - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && - !(env->hflags & HF_SMM_MASK)) { - whpx_cpu_synchronize_state(cpu); - do_cpu_init(x86_cpu); - vcpu->interruptable = true; - } - - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; - apic_poll_irq(x86_cpu->apic_state); - } - - if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) || - (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { - cpu->halted = false; - } - - if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { - whpx_cpu_synchronize_state(cpu); - do_cpu_sipi(x86_cpu); - } - - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; - whpx_cpu_synchronize_state(cpu); - apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, - env->tpr_access_type); - } - - return; -} - -static int whpx_vcpu_run(CPUState *cpu) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - int ret; - - whpx_vcpu_process_async_events(cpu); - if (cpu->halted && !whpx_apic_in_platform()) { - cpu->exception_index = EXCP_HLT; - qatomic_set(&cpu->exit_request, false); - return 0; - } - - qemu_mutex_unlock_iothread(); - cpu_exec_start(cpu); - - do { - if (cpu->vcpu_dirty) { - whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); - cpu->vcpu_dirty = false; - } - - whpx_vcpu_pre_run(cpu); - - if (qatomic_read(&cpu->exit_request)) { - whpx_vcpu_kick(cpu); - } - - hr = whp_dispatch.WHvRunVirtualProcessor( - whpx->partition, cpu->cpu_index, - &vcpu->exit_ctx, sizeof(vcpu->exit_ctx)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to exec a virtual processor," - " hr=%08lx", hr); - ret = -1; - break; - } - - whpx_vcpu_post_run(cpu); - - switch (vcpu->exit_ctx.ExitReason) { - case WHvRunVpExitReasonMemoryAccess: - ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess); - break; - - case WHvRunVpExitReasonX64IoPortAccess: - ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess); - break; - - case WHvRunVpExitReasonX64InterruptWindow: - vcpu->ready_for_pic_interrupt = 1; - vcpu->window_registered = 0; - ret = 0; - break; - - case WHvRunVpExitReasonX64ApicEoi: - assert(whpx_apic_in_platform()); - ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector); - break; - - case WHvRunVpExitReasonX64Halt: - ret = whpx_handle_halt(cpu); - break; - - case WHvRunVpExitReasonX64ApicInitSipiTrap: { - WHV_INTERRUPT_CONTROL ipi = {0}; - uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr; - uint32_t delivery_mode = - (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT; - int dest_shorthand = - (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; - bool broadcast = false; - bool include_self = false; - uint32_t i; - - /* We only registered for INIT and SIPI exits. */ - if ((delivery_mode != APIC_DM_INIT) && - (delivery_mode != APIC_DM_SIPI)) { - error_report( - "WHPX: Unexpected APIC exit that is not a INIT or SIPI"); - break; - } - - if (delivery_mode == APIC_DM_INIT) { - ipi.Type = WHvX64InterruptTypeInit; - } else { - ipi.Type = WHvX64InterruptTypeSipi; - } - - ipi.DestinationMode = - ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ? - WHvX64InterruptDestinationModeLogical : - WHvX64InterruptDestinationModePhysical; - - ipi.TriggerMode = - ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ? - WHvX64InterruptTriggerModeLevel : - WHvX64InterruptTriggerModeEdge; - - ipi.Vector = icr & APIC_VECTOR_MASK; - switch (dest_shorthand) { - /* no shorthand. Bits 56-63 contain the destination. */ - case 0: - ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK; - hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, - &ipi, sizeof(ipi)); - if (FAILED(hr)) { - error_report("WHPX: Failed to request interrupt hr=%08lx", - hr); - } - - break; - - /* self */ - case 1: - include_self = true; - break; - - /* broadcast, including self */ - case 2: - broadcast = true; - include_self = true; - break; - - /* broadcast, excluding self */ - case 3: - broadcast = true; - break; - } - - if (!broadcast && !include_self) { - break; - } - - for (i = 0; i <= max_vcpu_index; i++) { - if (i == cpu->cpu_index && !include_self) { - continue; - } - - /* - * Assuming that APIC Ids are identity mapped since - * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers - * are not handled yet and the hypervisor doesn't allow the - * guest to modify the APIC ID. - */ - ipi.Destination = i; - hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, - &ipi, sizeof(ipi)); - if (FAILED(hr)) { - error_report( - "WHPX: Failed to request SIPI for %d, hr=%08lx", - i, hr); - } - } - - break; - } - - case WHvRunVpExitReasonCanceled: - cpu->exception_index = EXCP_INTERRUPT; - ret = 1; - break; - - case WHvRunVpExitReasonX64MsrAccess: { - WHV_REGISTER_VALUE reg_values[3] = {0}; - WHV_REGISTER_NAME reg_names[3]; - UINT32 reg_count; - - reg_names[0] = WHvX64RegisterRip; - reg_names[1] = WHvX64RegisterRax; - reg_names[2] = WHvX64RegisterRdx; - - reg_values[0].Reg64 = - vcpu->exit_ctx.VpContext.Rip + - vcpu->exit_ctx.VpContext.InstructionLength; - - /* - * For all unsupported MSR access we: - * ignore writes - * return 0 on read. - */ - reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ? - 1 : 3; - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, - cpu->cpu_index, - reg_names, reg_count, - reg_values); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set MsrAccess state " - " registers, hr=%08lx", hr); - } - ret = 0; - break; - } - case WHvRunVpExitReasonX64Cpuid: { - WHV_REGISTER_VALUE reg_values[5]; - WHV_REGISTER_NAME reg_names[5]; - UINT32 reg_count = 5; - UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0; - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; - - memset(reg_values, 0, sizeof(reg_values)); - - rip = vcpu->exit_ctx.VpContext.Rip + - vcpu->exit_ctx.VpContext.InstructionLength; - cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax; - - /* - * Ideally, these should be supplied to the hypervisor during VCPU - * initialization and it should be able to satisfy this request. - * But, currently, WHPX doesn't support setting CPUID values in the - * hypervisor once the partition has been setup, which is too late - * since VCPUs are realized later. For now, use the values from - * QEMU to satisfy these requests, until WHPX adds support for - * being able to set these values in the hypervisor at runtime. - */ - cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx, - (UINT32 *)&rcx, (UINT32 *)&rdx); - switch (cpuid_fn) { - case 0x40000000: - /* Expose the vmware cpu frequency cpuid leaf */ - rax = 0x40000010; - rbx = rcx = rdx = 0; - break; - - case 0x40000010: - rax = env->tsc_khz; - rbx = env->apic_bus_freq / 1000; /* Hz to KHz */ - rcx = rdx = 0; - break; - - case 0x80000001: - /* Remove any support of OSVW */ - rcx &= ~CPUID_EXT3_OSVW; - break; - } - - reg_names[0] = WHvX64RegisterRip; - reg_names[1] = WHvX64RegisterRax; - reg_names[2] = WHvX64RegisterRcx; - reg_names[3] = WHvX64RegisterRdx; - reg_names[4] = WHvX64RegisterRbx; - - reg_values[0].Reg64 = rip; - reg_values[1].Reg64 = rax; - reg_values[2].Reg64 = rcx; - reg_values[3].Reg64 = rdx; - reg_values[4].Reg64 = rbx; - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - reg_names, - reg_count, - reg_values); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set CpuidAccess state registers," - " hr=%08lx", hr); - } - ret = 0; - break; - } - case WHvRunVpExitReasonNone: - case WHvRunVpExitReasonUnrecoverableException: - case WHvRunVpExitReasonInvalidVpRegisterValue: - case WHvRunVpExitReasonUnsupportedFeature: - case WHvRunVpExitReasonException: - default: - error_report("WHPX: Unexpected VP exit code %d", - vcpu->exit_ctx.ExitReason); - whpx_get_registers(cpu); - qemu_mutex_lock_iothread(); - qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); - break; - } - - } while (!ret); - - cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); - current_cpu = cpu; - - qatomic_set(&cpu->exit_request, false); - - return ret < 0; -} - -static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) -{ - if (!cpu->vcpu_dirty) { - whpx_get_registers(cpu); - cpu->vcpu_dirty = true; - } -} - -static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, - run_on_cpu_data arg) -{ - whpx_set_registers(cpu, WHPX_SET_RESET_STATE); - cpu->vcpu_dirty = false; -} - -static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, - run_on_cpu_data arg) -{ - whpx_set_registers(cpu, WHPX_SET_FULL_STATE); - cpu->vcpu_dirty = false; -} - -static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, - run_on_cpu_data arg) -{ - cpu->vcpu_dirty = true; -} - -/* - * CPU support. - */ - -void whpx_cpu_synchronize_state(CPUState *cpu) -{ - if (!cpu->vcpu_dirty) { - run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); - } -} - -void whpx_cpu_synchronize_post_reset(CPUState *cpu) -{ - run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); -} - -void whpx_cpu_synchronize_post_init(CPUState *cpu) -{ - run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL); -} - -void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) -{ - run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); -} - -/* - * Vcpu support. - */ - -static Error *whpx_migration_blocker; - -static void whpx_cpu_update_state(void *opaque, int running, RunState state) -{ - CPUX86State *env = opaque; - - if (running) { - env->tsc_valid = false; - } -} - -int whpx_init_vcpu(CPUState *cpu) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = NULL; - Error *local_error = NULL; - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - UINT64 freq = 0; - int ret; - - /* Add migration blockers for all unsupported features of the - * Windows Hypervisor Platform - */ - if (whpx_migration_blocker == NULL) { - error_setg(&whpx_migration_blocker, - "State blocked due to non-migratable CPUID feature support," - "dirty memory tracking support, and XSAVE/XRSTOR support"); - - (void)migrate_add_blocker(whpx_migration_blocker, &local_error); - if (local_error) { - error_report_err(local_error); - migrate_del_blocker(whpx_migration_blocker); - error_free(whpx_migration_blocker); - ret = -EINVAL; - goto error; - } - } - - vcpu = g_malloc0(sizeof(struct whpx_vcpu)); - - if (!vcpu) { - error_report("WHPX: Failed to allocte VCPU context."); - ret = -ENOMEM; - goto error; - } - - hr = whp_dispatch.WHvEmulatorCreateEmulator( - &whpx_emu_callbacks, - &vcpu->emulator); - if (FAILED(hr)) { - error_report("WHPX: Failed to setup instruction completion support," - " hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - hr = whp_dispatch.WHvCreateVirtualProcessor( - whpx->partition, cpu->cpu_index, 0); - if (FAILED(hr)) { - error_report("WHPX: Failed to create a virtual processor," - " hr=%08lx", hr); - whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); - ret = -EINVAL; - goto error; - } - - /* - * vcpu's TSC frequency is either specified by user, or use the value - * provided by Hyper-V if the former is not present. In the latter case, we - * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC - * frequency can be migrated later via this field. - */ - if (!env->tsc_khz) { - hr = whp_dispatch.WHvGetCapability( - WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq), - NULL); - if (hr != WHV_E_UNKNOWN_CAPABILITY) { - if (FAILED(hr)) { - printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr); - } else { - env->tsc_khz = freq / 1000; /* Hz to KHz */ - } - } - } - - env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY; - hr = whp_dispatch.WHvGetCapability( - WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL); - if (hr != WHV_E_UNKNOWN_CAPABILITY) { - if (FAILED(hr)) { - printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr); - } else { - env->apic_bus_freq = freq; - } - } - - /* - * If the vmware cpuid frequency leaf option is set, and we have a valid - * tsc value, trap the corresponding cpuid's. - */ - if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) { - UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010}; - - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeCpuidExitList, - cpuidExitList, - RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", - hr); - ret = -EINVAL; - goto error; - } - } - - vcpu->interruptable = true; - cpu->vcpu_dirty = true; - cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; - max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); - qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr); - - return 0; - -error: - g_free(vcpu); - - return ret; -} - -int whpx_vcpu_exec(CPUState *cpu) -{ - int ret; - int fatal; - - for (;;) { - if (cpu->exception_index >= EXCP_INTERRUPT) { - ret = cpu->exception_index; - cpu->exception_index = -1; - break; - } - - fatal = whpx_vcpu_run(cpu); - - if (fatal) { - error_report("WHPX: Failed to exec a virtual processor"); - abort(); - } - } - - return ret; -} - -void whpx_destroy_vcpu(CPUState *cpu) -{ - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - - whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index); - whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); - g_free(cpu->hax_vcpu); - return; -} - -void whpx_vcpu_kick(CPUState *cpu) -{ - struct whpx_state *whpx = &whpx_global; - whp_dispatch.WHvCancelRunVirtualProcessor( - whpx->partition, cpu->cpu_index, 0); -} - -/* - * Memory support. - */ - -static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size, - void *host_va, int add, int rom, - const char *name) -{ - struct whpx_state *whpx = &whpx_global; - HRESULT hr; - - /* - if (add) { - printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n", - (void*)start_pa, (void*)size, host_va, - (rom ? "ROM" : "RAM"), name); - } else { - printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n", - (void*)start_pa, (void*)size, host_va, name); - } - */ - - if (add) { - hr = whp_dispatch.WHvMapGpaRange(whpx->partition, - host_va, - start_pa, - size, - (WHvMapGpaRangeFlagRead | - WHvMapGpaRangeFlagExecute | - (rom ? 0 : WHvMapGpaRangeFlagWrite))); - } else { - hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition, - start_pa, - size); - } - - if (FAILED(hr)) { - error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes," - " Host:%p, hr=%08lx", - (add ? "MAP" : "UNMAP"), name, - (void *)(uintptr_t)start_pa, (void *)size, host_va, hr); - } -} - -static void whpx_process_section(MemoryRegionSection *section, int add) -{ - MemoryRegion *mr = section->mr; - hwaddr start_pa = section->offset_within_address_space; - ram_addr_t size = int128_get64(section->size); - unsigned int delta; - uint64_t host_va; - - if (!memory_region_is_ram(mr)) { - return; - } - - delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask); - delta &= ~qemu_real_host_page_mask; - if (delta > size) { - return; - } - start_pa += delta; - size -= delta; - size &= qemu_real_host_page_mask; - if (!size || (start_pa & ~qemu_real_host_page_mask)) { - return; - } - - host_va = (uintptr_t)memory_region_get_ram_ptr(mr) - + section->offset_within_region + delta; - - whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add, - memory_region_is_rom(mr), mr->name); -} - -static void whpx_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - memory_region_ref(section->mr); - whpx_process_section(section, 1); -} - -static void whpx_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - whpx_process_section(section, 0); - memory_region_unref(section->mr); -} - -static void whpx_transaction_begin(MemoryListener *listener) -{ -} - -static void whpx_transaction_commit(MemoryListener *listener) -{ -} - -static void whpx_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - MemoryRegion *mr = section->mr; - - if (!memory_region_is_ram(mr)) { - return; - } - - memory_region_set_dirty(mr, 0, int128_get64(section->size)); -} - -static MemoryListener whpx_memory_listener = { - .begin = whpx_transaction_begin, - .commit = whpx_transaction_commit, - .region_add = whpx_region_add, - .region_del = whpx_region_del, - .log_sync = whpx_log_sync, - .priority = 10, -}; - -static void whpx_memory_init(void) -{ - memory_listener_register(&whpx_memory_listener, &address_space_memory); -} - -/* - * Load the functions from the given library, using the given handle. If a - * handle is provided, it is used, otherwise the library is opened. The - * handle will be updated on return with the opened one. - */ -static bool load_whp_dispatch_fns(HMODULE *handle, - WHPFunctionList function_list) -{ - HMODULE hLib = *handle; - - #define WINHV_PLATFORM_DLL "WinHvPlatform.dll" - #define WINHV_EMULATION_DLL "WinHvEmulation.dll" - #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \ - whp_dispatch.function_name = \ - (function_name ## _t)GetProcAddress(hLib, #function_name); \ - - #define WHP_LOAD_FIELD(return_type, function_name, signature) \ - whp_dispatch.function_name = \ - (function_name ## _t)GetProcAddress(hLib, #function_name); \ - if (!whp_dispatch.function_name) { \ - error_report("Could not load function %s", #function_name); \ - goto error; \ - } \ - - #define WHP_LOAD_LIB(lib_name, handle_lib) \ - if (!handle_lib) { \ - handle_lib = LoadLibrary(lib_name); \ - if (!handle_lib) { \ - error_report("Could not load library %s.", lib_name); \ - goto error; \ - } \ - } \ - - switch (function_list) { - case WINHV_PLATFORM_FNS_DEFAULT: - WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) - LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD) - break; - - case WINHV_EMULATION_FNS_DEFAULT: - WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib) - LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD) - break; - - case WINHV_PLATFORM_FNS_SUPPLEMENTAL: - WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) - LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL) - break; - } - - *handle = hLib; - return true; - -error: - if (hLib) { - FreeLibrary(hLib); - } - - return false; -} - -static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - struct whpx_state *whpx = &whpx_global; - OnOffSplit mode; - - if (!visit_type_OnOffSplit(v, name, &mode, errp)) { - return; - } - - switch (mode) { - case ON_OFF_SPLIT_ON: - whpx->kernel_irqchip_allowed = true; - whpx->kernel_irqchip_required = true; - break; - - case ON_OFF_SPLIT_OFF: - whpx->kernel_irqchip_allowed = false; - whpx->kernel_irqchip_required = false; - break; - - case ON_OFF_SPLIT_SPLIT: - error_setg(errp, "WHPX: split irqchip currently not supported"); - error_append_hint(errp, - "Try without kernel-irqchip or with kernel-irqchip=on|off"); - break; - - default: - /* - * The value was checked in visit_type_OnOffSplit() above. If - * we get here, then something is wrong in QEMU. - */ - abort(); - } -} - -/* - * Partition support - */ - -static int whpx_accel_init(MachineState *ms) -{ - struct whpx_state *whpx; - int ret; - HRESULT hr; - WHV_CAPABILITY whpx_cap; - UINT32 whpx_cap_size; - WHV_PARTITION_PROPERTY prop; - UINT32 cpuidExitList[] = {1, 0x80000001}; - WHV_CAPABILITY_FEATURES features = {0}; - - whpx = &whpx_global; - - if (!init_whp_dispatch()) { - ret = -ENOSYS; - goto error; - } - - whpx->mem_quota = ms->ram_size; - - hr = whp_dispatch.WHvGetCapability( - WHvCapabilityCodeHypervisorPresent, &whpx_cap, - sizeof(whpx_cap), &whpx_cap_size); - if (FAILED(hr) || !whpx_cap.HypervisorPresent) { - error_report("WHPX: No accelerator found, hr=%08lx", hr); - ret = -ENOSPC; - goto error; - } - - hr = whp_dispatch.WHvGetCapability( - WHvCapabilityCodeFeatures, &features, sizeof(features), NULL); - if (FAILED(hr)) { - error_report("WHPX: Failed to query capabilities, hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - hr = whp_dispatch.WHvCreatePartition(&whpx->partition); - if (FAILED(hr)) { - error_report("WHPX: Failed to create partition, hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); - prop.ProcessorCount = ms->smp.cpus; - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeProcessorCount, - &prop, - sizeof(WHV_PARTITION_PROPERTY)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set partition core count to %d," - " hr=%08lx", ms->smp.cores, hr); - ret = -EINVAL; - goto error; - } - - /* - * Error out if WHP doesn't support apic emulation and user is requiring - * it. - */ - if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation || - !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) { - error_report("WHPX: kernel irqchip requested, but unavailable. " - "Try without kernel-irqchip or with kernel-irqchip=off"); - ret = -EINVAL; - goto error; - } - - if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation && - whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) { - WHV_X64_LOCAL_APIC_EMULATION_MODE mode = - WHvX64LocalApicEmulationModeXApic; - printf("WHPX: setting APIC emulation mode in the hypervisor\n"); - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeLocalApicEmulationMode, - &mode, - sizeof(mode)); - if (FAILED(hr)) { - error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr); - if (whpx->kernel_irqchip_required) { - error_report("WHPX: kernel irqchip requested, but unavailable"); - ret = -EINVAL; - goto error; - } - } else { - whpx->apic_in_platform = true; - } - } - - /* Register for MSR and CPUID exits */ - memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); - prop.ExtendedVmExits.X64MsrExit = 1; - prop.ExtendedVmExits.X64CpuidExit = 1; - if (whpx_apic_in_platform()) { - prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1; - } - - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeExtendedVmExits, - &prop, - sizeof(WHV_PARTITION_PROPERTY)); - if (FAILED(hr)) { - error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeCpuidExitList, - cpuidExitList, - RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", - hr); - ret = -EINVAL; - goto error; - } - - hr = whp_dispatch.WHvSetupPartition(whpx->partition); - if (FAILED(hr)) { - error_report("WHPX: Failed to setup partition, hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - whpx_memory_init(); - - cpus_register_accel(&whpx_cpus); - - printf("Windows Hypervisor Platform accelerator is operational\n"); - return 0; - -error: - - if (NULL != whpx->partition) { - whp_dispatch.WHvDeletePartition(whpx->partition); - whpx->partition = NULL; - } - - return ret; -} - -int whpx_enabled(void) -{ - return whpx_allowed; -} - -static void whpx_accel_class_init(ObjectClass *oc, void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "WHPX"; - ac->init_machine = whpx_accel_init; - ac->allowed = &whpx_allowed; - - object_class_property_add(oc, "kernel-irqchip", "on|off|split", - NULL, whpx_set_kernel_irqchip, - NULL, NULL); - object_class_property_set_description(oc, "kernel-irqchip", - "Configure WHPX in-kernel irqchip"); -} - -static void whpx_accel_instance_init(Object *obj) -{ - struct whpx_state *whpx = &whpx_global; - - memset(whpx, 0, sizeof(struct whpx_state)); - /* Turn on kernel-irqchip, by default */ - whpx->kernel_irqchip_allowed = true; -} - -static const TypeInfo whpx_accel_type = { - .name = ACCEL_CLASS_NAME("whpx"), - .parent = TYPE_ACCEL, - .instance_init = whpx_accel_instance_init, - .class_init = whpx_accel_class_init, -}; - -static void whpx_type_init(void) -{ - type_register_static(&whpx_accel_type); -} - -bool init_whp_dispatch(void) -{ - if (whp_dispatch_initialized) { - return true; - } - - if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) { - goto error; - } - - if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) { - goto error; - } - - assert(load_whp_dispatch_fns(&hWinHvPlatform, - WINHV_PLATFORM_FNS_SUPPLEMENTAL)); - whp_dispatch_initialized = true; - - return true; -error: - if (hWinHvPlatform) { - FreeLibrary(hWinHvPlatform); - } - - if (hWinHvEmulation) { - FreeLibrary(hWinHvEmulation); - } - - return false; -} - -type_init(whpx_type_init); diff --git a/target/i386/whpx-apic.c b/target/i386/whpx-apic.c deleted file mode 100644 index b127a3cb8a..0000000000 --- a/target/i386/whpx-apic.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * WHPX platform APIC support - * - * Copyright (c) 2011 Siemens AG - * - * Authors: - * Jan Kiszka - * John Starks - * - * This work is licensed under the terms of the GNU GPL version 2. - * See the COPYING file in the top-level directory. - */ -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "cpu.h" -#include "hw/i386/apic_internal.h" -#include "hw/i386/apic-msidef.h" -#include "hw/pci/msi.h" -#include "sysemu/hw_accel.h" -#include "sysemu/whpx.h" -#include "whp-dispatch.h" - -static void whpx_put_apic_state(APICCommonState *s, - struct whpx_lapic_state *kapic) -{ - int i; - - memset(kapic, 0, sizeof(*kapic)); - kapic->fields[0x2].data = s->id << 24; - kapic->fields[0x3].data = s->version | ((APIC_LVT_NB - 1) << 16); - kapic->fields[0x8].data = s->tpr; - kapic->fields[0xd].data = s->log_dest << 24; - kapic->fields[0xe].data = s->dest_mode << 28 | 0x0fffffff; - kapic->fields[0xf].data = s->spurious_vec; - for (i = 0; i < 8; i++) { - kapic->fields[0x10 + i].data = s->isr[i]; - kapic->fields[0x18 + i].data = s->tmr[i]; - kapic->fields[0x20 + i].data = s->irr[i]; - } - - kapic->fields[0x28].data = s->esr; - kapic->fields[0x30].data = s->icr[0]; - kapic->fields[0x31].data = s->icr[1]; - for (i = 0; i < APIC_LVT_NB; i++) { - kapic->fields[0x32 + i].data = s->lvt[i]; - } - - kapic->fields[0x38].data = s->initial_count; - kapic->fields[0x3e].data = s->divide_conf; -} - -static void whpx_get_apic_state(APICCommonState *s, - struct whpx_lapic_state *kapic) -{ - int i, v; - - s->id = kapic->fields[0x2].data >> 24; - s->tpr = kapic->fields[0x8].data; - s->arb_id = kapic->fields[0x9].data; - s->log_dest = kapic->fields[0xd].data >> 24; - s->dest_mode = kapic->fields[0xe].data >> 28; - s->spurious_vec = kapic->fields[0xf].data; - for (i = 0; i < 8; i++) { - s->isr[i] = kapic->fields[0x10 + i].data; - s->tmr[i] = kapic->fields[0x18 + i].data; - s->irr[i] = kapic->fields[0x20 + i].data; - } - - s->esr = kapic->fields[0x28].data; - s->icr[0] = kapic->fields[0x30].data; - s->icr[1] = kapic->fields[0x31].data; - for (i = 0; i < APIC_LVT_NB; i++) { - s->lvt[i] = kapic->fields[0x32 + i].data; - } - - s->initial_count = kapic->fields[0x38].data; - s->divide_conf = kapic->fields[0x3e].data; - - v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); - s->count_shift = (v + 1) & 7; - - s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - apic_next_timer(s, s->initial_count_load_time); -} - -static void whpx_apic_set_base(APICCommonState *s, uint64_t val) -{ - s->apicbase = val; -} - -static void whpx_put_apic_base(CPUState *cpu, uint64_t val) -{ - HRESULT hr; - WHV_REGISTER_VALUE reg_value = {.Reg64 = val}; - WHV_REGISTER_NAME reg_name = WHvX64RegisterApicBase; - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx_global.partition, - cpu->cpu_index, - ®_name, 1, - ®_value); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set MSR APIC base, hr=%08lx", hr); - } -} - -static void whpx_apic_set_tpr(APICCommonState *s, uint8_t val) -{ - s->tpr = val; -} - -static uint8_t whpx_apic_get_tpr(APICCommonState *s) -{ - return s->tpr; -} - -static void whpx_apic_vapic_base_update(APICCommonState *s) -{ - /* not implemented yet */ -} - -static void whpx_apic_put(CPUState *cs, run_on_cpu_data data) -{ - APICCommonState *s = data.host_ptr; - struct whpx_lapic_state kapic; - HRESULT hr; - - whpx_put_apic_base(CPU(s->cpu), s->apicbase); - whpx_put_apic_state(s, &kapic); - - hr = whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2( - whpx_global.partition, - cs->cpu_index, - &kapic, - sizeof(kapic)); - if (FAILED(hr)) { - fprintf(stderr, - "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", - hr); - - abort(); - } -} - -void whpx_apic_get(DeviceState *dev) -{ - APICCommonState *s = APIC_COMMON(dev); - CPUState *cpu = CPU(s->cpu); - struct whpx_lapic_state kapic; - - HRESULT hr = whp_dispatch.WHvGetVirtualProcessorInterruptControllerState2( - whpx_global.partition, - cpu->cpu_index, - &kapic, - sizeof(kapic), - NULL); - if (FAILED(hr)) { - fprintf(stderr, - "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", - hr); - - abort(); - } - - whpx_get_apic_state(s, &kapic); -} - -static void whpx_apic_post_load(APICCommonState *s) -{ - run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); -} - -static void whpx_apic_external_nmi(APICCommonState *s) -{ -} - -static void whpx_send_msi(MSIMessage *msg) -{ - uint64_t addr = msg->address; - uint32_t data = msg->data; - uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; - uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; - uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; - uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; - uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; - - WHV_INTERRUPT_CONTROL interrupt = { - /* Values correspond to delivery modes */ - .Type = delivery, - .DestinationMode = dest_mode ? - WHvX64InterruptDestinationModeLogical : - WHvX64InterruptDestinationModePhysical, - - .TriggerMode = trigger_mode ? - WHvX64InterruptTriggerModeLevel : WHvX64InterruptTriggerModeEdge, - .Reserved = 0, - .Vector = vector, - .Destination = dest, - }; - HRESULT hr = whp_dispatch.WHvRequestInterrupt(whpx_global.partition, - &interrupt, sizeof(interrupt)); - if (FAILED(hr)) { - fprintf(stderr, "whpx: injection failed, MSI (%llx, %x) delivery: %d, " - "dest_mode: %d, trigger mode: %d, vector: %d, lost (%08lx)\n", - addr, data, delivery, dest_mode, trigger_mode, vector, hr); - } -} - -static uint64_t whpx_apic_mem_read(void *opaque, hwaddr addr, - unsigned size) -{ - return ~(uint64_t)0; -} - -static void whpx_apic_mem_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - MSIMessage msg = { .address = addr, .data = data }; - whpx_send_msi(&msg); -} - -static const MemoryRegionOps whpx_apic_io_ops = { - .read = whpx_apic_mem_read, - .write = whpx_apic_mem_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void whpx_apic_reset(APICCommonState *s) -{ - /* Not used by WHPX. */ - s->wait_for_sipi = 0; - - run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); -} - -static void whpx_apic_realize(DeviceState *dev, Error **errp) -{ - APICCommonState *s = APIC_COMMON(dev); - - memory_region_init_io(&s->io_memory, OBJECT(s), &whpx_apic_io_ops, s, - "whpx-apic-msi", APIC_SPACE_SIZE); - - msi_nonbroken = true; -} - -static void whpx_apic_class_init(ObjectClass *klass, void *data) -{ - APICCommonClass *k = APIC_COMMON_CLASS(klass); - - k->realize = whpx_apic_realize; - k->reset = whpx_apic_reset; - k->set_base = whpx_apic_set_base; - k->set_tpr = whpx_apic_set_tpr; - k->get_tpr = whpx_apic_get_tpr; - k->post_load = whpx_apic_post_load; - k->vapic_base_update = whpx_apic_vapic_base_update; - k->external_nmi = whpx_apic_external_nmi; - k->send_msi = whpx_send_msi; -} - -static const TypeInfo whpx_apic_info = { - .name = "whpx-apic", - .parent = TYPE_APIC_COMMON, - .instance_size = sizeof(APICCommonState), - .class_init = whpx_apic_class_init, -}; - -static void whpx_apic_register_types(void) -{ - type_register_static(&whpx_apic_info); -} - -type_init(whpx_apic_register_types) diff --git a/target/i386/whpx-cpus.c b/target/i386/whpx-cpus.c deleted file mode 100644 index d9bd5a2d36..0000000000 --- a/target/i386/whpx-cpus.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * QEMU Windows Hypervisor Platform accelerator (WHPX) - * - * Copyright Microsoft Corp. 2017 - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "sysemu/kvm_int.h" -#include "qemu/main-loop.h" -#include "sysemu/cpus.h" -#include "qemu/guest-random.h" - -#include "sysemu/whpx.h" -#include "whpx-cpus.h" - -#include -#include - -static void *whpx_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - int r; - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - cpu->thread_id = qemu_get_thread_id(); - current_cpu = cpu; - - r = whpx_init_vcpu(cpu); - if (r < 0) { - fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r)); - exit(1); - } - - /* signal CPU creation */ - cpu_thread_signal_created(cpu); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = whpx_vcpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - while (cpu_thread_is_idle(cpu)) { - qemu_cond_wait_iothread(cpu->halt_cond); - } - qemu_wait_io_event_common(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - whpx_destroy_vcpu(cpu); - cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -static void whpx_start_vcpu_thread(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, whpx_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -#ifdef _WIN32 - cpu->hThread = qemu_thread_get_handle(cpu->thread); -#endif -} - -static void whpx_kick_vcpu_thread(CPUState *cpu) -{ - if (!qemu_cpu_is_self(cpu)) { - whpx_vcpu_kick(cpu); - } -} - -const CpusAccel whpx_cpus = { - .create_vcpu_thread = whpx_start_vcpu_thread, - .kick_vcpu_thread = whpx_kick_vcpu_thread, - - .synchronize_post_reset = whpx_cpu_synchronize_post_reset, - .synchronize_post_init = whpx_cpu_synchronize_post_init, - .synchronize_state = whpx_cpu_synchronize_state, - .synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm, -}; diff --git a/target/i386/whpx-cpus.h b/target/i386/whpx-cpus.h deleted file mode 100644 index bdb367d1d0..0000000000 --- a/target/i386/whpx-cpus.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Accelerator CPUS Interface - * - * Copyright 2020 SUSE LLC - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef WHPX_CPUS_H -#define WHPX_CPUS_H - -#include "sysemu/cpus.h" - -extern const CpusAccel whpx_cpus; - -int whpx_init_vcpu(CPUState *cpu); -int whpx_vcpu_exec(CPUState *cpu); -void whpx_destroy_vcpu(CPUState *cpu); -void whpx_vcpu_kick(CPUState *cpu); - -void whpx_cpu_synchronize_state(CPUState *cpu); -void whpx_cpu_synchronize_post_reset(CPUState *cpu); -void whpx_cpu_synchronize_post_init(CPUState *cpu); -void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); - -/* state subset only touched by the VCPU itself during runtime */ -#define WHPX_SET_RUNTIME_STATE 1 -/* state subset modified during VCPU reset */ -#define WHPX_SET_RESET_STATE 2 -/* full state set, modified during initialization or on vmload */ -#define WHPX_SET_FULL_STATE 3 - -#endif /* WHPX_CPUS_H */ diff --git a/target/i386/whpx/meson.build b/target/i386/whpx/meson.build new file mode 100644 index 0000000000..d8aa683999 --- /dev/null +++ b/target/i386/whpx/meson.build @@ -0,0 +1,5 @@ +i386_softmmu_ss.add(when: 'CONFIG_WHPX', if_true: files( + 'whpx-all.c', + 'whpx-apic.c', + 'whpx-cpus.c', +)) diff --git a/target/i386/whpx/whp-dispatch.h b/target/i386/whpx/whp-dispatch.h new file mode 100644 index 0000000000..cef5d848bd --- /dev/null +++ b/target/i386/whpx/whp-dispatch.h @@ -0,0 +1,75 @@ +#ifndef WHP_DISPATCH_H +#define WHP_DISPATCH_H + +#include +#include +#include + +#define WHV_E_UNKNOWN_CAPABILITY 0x80370300L + +#define LIST_WINHVPLATFORM_FUNCTIONS(X) \ + X(HRESULT, WHvGetCapability, (WHV_CAPABILITY_CODE CapabilityCode, VOID* CapabilityBuffer, UINT32 CapabilityBufferSizeInBytes, UINT32* WrittenSizeInBytes)) \ + X(HRESULT, WHvCreatePartition, (WHV_PARTITION_HANDLE* Partition)) \ + X(HRESULT, WHvSetupPartition, (WHV_PARTITION_HANDLE Partition)) \ + X(HRESULT, WHvDeletePartition, (WHV_PARTITION_HANDLE Partition)) \ + X(HRESULT, WHvGetPartitionProperty, (WHV_PARTITION_HANDLE Partition, WHV_PARTITION_PROPERTY_CODE PropertyCode, VOID* PropertyBuffer, UINT32 PropertyBufferSizeInBytes, UINT32* WrittenSizeInBytes)) \ + X(HRESULT, WHvSetPartitionProperty, (WHV_PARTITION_HANDLE Partition, WHV_PARTITION_PROPERTY_CODE PropertyCode, const VOID* PropertyBuffer, UINT32 PropertyBufferSizeInBytes)) \ + X(HRESULT, WHvMapGpaRange, (WHV_PARTITION_HANDLE Partition, VOID* SourceAddress, WHV_GUEST_PHYSICAL_ADDRESS GuestAddress, UINT64 SizeInBytes, WHV_MAP_GPA_RANGE_FLAGS Flags)) \ + X(HRESULT, WHvUnmapGpaRange, (WHV_PARTITION_HANDLE Partition, WHV_GUEST_PHYSICAL_ADDRESS GuestAddress, UINT64 SizeInBytes)) \ + X(HRESULT, WHvTranslateGva, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, WHV_GUEST_VIRTUAL_ADDRESS Gva, WHV_TRANSLATE_GVA_FLAGS TranslateFlags, WHV_TRANSLATE_GVA_RESULT* TranslationResult, WHV_GUEST_PHYSICAL_ADDRESS* Gpa)) \ + X(HRESULT, WHvCreateVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, UINT32 Flags)) \ + X(HRESULT, WHvDeleteVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex)) \ + X(HRESULT, WHvRunVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, VOID* ExitContext, UINT32 ExitContextSizeInBytes)) \ + X(HRESULT, WHvCancelRunVirtualProcessor, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, UINT32 Flags)) \ + X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \ + X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \ + +/* + * These are supplemental functions that may not be present + * on all versions and are not critical for basic functionality. + */ +#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \ + X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \ + X(HRESULT, WHvRequestInterrupt, (WHV_PARTITION_HANDLE Partition, \ + WHV_INTERRUPT_CONTROL* Interrupt, UINT32 InterruptControlSize)) \ + X(HRESULT, WHvGetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize, UINT32* WrittenSize)) \ + X(HRESULT, WHvSetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize)) \ + +#define LIST_WINHVEMULATION_FUNCTIONS(X) \ + X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \ + X(HRESULT, WHvEmulatorDestroyEmulator, (WHV_EMULATOR_HANDLE Emulator)) \ + X(HRESULT, WHvEmulatorTryIoEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_X64_IO_PORT_ACCESS_CONTEXT* IoInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ + X(HRESULT, WHvEmulatorTryMmioEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_MEMORY_ACCESS_CONTEXT* MmioInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ + +#define WHP_DEFINE_TYPE(return_type, function_name, signature) \ + typedef return_type (WINAPI *function_name ## _t) signature; + +#define WHP_DECLARE_MEMBER(return_type, function_name, signature) \ + function_name ## _t function_name; + +/* Define function typedef */ +LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE) +LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE) +LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE) + +struct WHPDispatch { + LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER) + LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER) + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER) +}; + +extern struct WHPDispatch whp_dispatch; + +bool init_whp_dispatch(void); + +typedef enum WHPFunctionList { + WINHV_PLATFORM_FNS_DEFAULT, + WINHV_EMULATION_FNS_DEFAULT, + WINHV_PLATFORM_FNS_SUPPLEMENTAL +} WHPFunctionList; + +#endif /* WHP_DISPATCH_H */ diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c new file mode 100644 index 0000000000..3b824fc9d7 --- /dev/null +++ b/target/i386/whpx/whpx-all.c @@ -0,0 +1,1938 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) + * + * Copyright Microsoft Corp. 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/address-spaces.h" +#include "exec/ioport.h" +#include "qemu-common.h" +#include "sysemu/accel.h" +#include "sysemu/whpx.h" +#include "sysemu/cpus.h" +#include "sysemu/runstate.h" +#include "qemu/main-loop.h" +#include "hw/boards.h" +#include "hw/i386/ioapic.h" +#include "hw/i386/apic_internal.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-types-common.h" +#include "qapi/qapi-visit-common.h" +#include "migration/blocker.h" +#include "whp-dispatch.h" +#include + +#include "whpx-cpus.h" + +#include +#include + +#define HYPERV_APIC_BUS_FREQUENCY (200000000ULL) + +static const WHV_REGISTER_NAME whpx_register_names[] = { + + /* X64 General purpose registers */ + WHvX64RegisterRax, + WHvX64RegisterRcx, + WHvX64RegisterRdx, + WHvX64RegisterRbx, + WHvX64RegisterRsp, + WHvX64RegisterRbp, + WHvX64RegisterRsi, + WHvX64RegisterRdi, + WHvX64RegisterR8, + WHvX64RegisterR9, + WHvX64RegisterR10, + WHvX64RegisterR11, + WHvX64RegisterR12, + WHvX64RegisterR13, + WHvX64RegisterR14, + WHvX64RegisterR15, + WHvX64RegisterRip, + WHvX64RegisterRflags, + + /* X64 Segment registers */ + WHvX64RegisterEs, + WHvX64RegisterCs, + WHvX64RegisterSs, + WHvX64RegisterDs, + WHvX64RegisterFs, + WHvX64RegisterGs, + WHvX64RegisterLdtr, + WHvX64RegisterTr, + + /* X64 Table registers */ + WHvX64RegisterIdtr, + WHvX64RegisterGdtr, + + /* X64 Control Registers */ + WHvX64RegisterCr0, + WHvX64RegisterCr2, + WHvX64RegisterCr3, + WHvX64RegisterCr4, + WHvX64RegisterCr8, + + /* X64 Debug Registers */ + /* + * WHvX64RegisterDr0, + * WHvX64RegisterDr1, + * WHvX64RegisterDr2, + * WHvX64RegisterDr3, + * WHvX64RegisterDr6, + * WHvX64RegisterDr7, + */ + + /* X64 Floating Point and Vector Registers */ + WHvX64RegisterXmm0, + WHvX64RegisterXmm1, + WHvX64RegisterXmm2, + WHvX64RegisterXmm3, + WHvX64RegisterXmm4, + WHvX64RegisterXmm5, + WHvX64RegisterXmm6, + WHvX64RegisterXmm7, + WHvX64RegisterXmm8, + WHvX64RegisterXmm9, + WHvX64RegisterXmm10, + WHvX64RegisterXmm11, + WHvX64RegisterXmm12, + WHvX64RegisterXmm13, + WHvX64RegisterXmm14, + WHvX64RegisterXmm15, + WHvX64RegisterFpMmx0, + WHvX64RegisterFpMmx1, + WHvX64RegisterFpMmx2, + WHvX64RegisterFpMmx3, + WHvX64RegisterFpMmx4, + WHvX64RegisterFpMmx5, + WHvX64RegisterFpMmx6, + WHvX64RegisterFpMmx7, + WHvX64RegisterFpControlStatus, + WHvX64RegisterXmmControlStatus, + + /* X64 MSRs */ + WHvX64RegisterEfer, +#ifdef TARGET_X86_64 + WHvX64RegisterKernelGsBase, +#endif + WHvX64RegisterApicBase, + /* WHvX64RegisterPat, */ + WHvX64RegisterSysenterCs, + WHvX64RegisterSysenterEip, + WHvX64RegisterSysenterEsp, + WHvX64RegisterStar, +#ifdef TARGET_X86_64 + WHvX64RegisterLstar, + WHvX64RegisterCstar, + WHvX64RegisterSfmask, +#endif + + /* Interrupt / Event Registers */ + /* + * WHvRegisterPendingInterruption, + * WHvRegisterInterruptState, + * WHvRegisterPendingEvent0, + * WHvRegisterPendingEvent1 + * WHvX64RegisterDeliverabilityNotifications, + */ +}; + +struct whpx_register_set { + WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)]; +}; + +struct whpx_vcpu { + WHV_EMULATOR_HANDLE emulator; + bool window_registered; + bool interruptable; + bool ready_for_pic_interrupt; + uint64_t tpr; + uint64_t apic_base; + bool interruption_pending; + + /* Must be the last field as it may have a tail */ + WHV_RUN_VP_EXIT_CONTEXT exit_ctx; +}; + +static bool whpx_allowed; +static bool whp_dispatch_initialized; +static HMODULE hWinHvPlatform, hWinHvEmulation; +static uint32_t max_vcpu_index; +struct whpx_state whpx_global; +struct WHPDispatch whp_dispatch; + + +/* + * VP support + */ + +static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu) +{ + return (struct whpx_vcpu *)cpu->hax_vcpu; +} + +static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86, + int r86) +{ + WHV_X64_SEGMENT_REGISTER hs; + unsigned flags = qs->flags; + + hs.Base = qs->base; + hs.Limit = qs->limit; + hs.Selector = qs->selector; + + if (v86) { + hs.Attributes = 0; + hs.SegmentType = 3; + hs.Present = 1; + hs.DescriptorPrivilegeLevel = 3; + hs.NonSystemSegment = 1; + + } else { + hs.Attributes = (flags >> DESC_TYPE_SHIFT); + + if (r86) { + /* hs.Base &= 0xfffff; */ + } + } + + return hs; +} + +static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs) +{ + SegmentCache qs; + + qs.base = hs->Base; + qs.limit = hs->Limit; + qs.selector = hs->Selector; + + qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT; + + return qs; +} + +static int whpx_set_tsc(CPUState *cpu) +{ + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; + WHV_REGISTER_VALUE tsc_val; + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + + /* + * Suspend the partition prior to setting the TSC to reduce the variance + * in TSC across vCPUs. When the first vCPU runs post suspend, the + * partition is automatically resumed. + */ + if (whp_dispatch.WHvSuspendPartitionTime) { + + /* + * Unable to suspend partition while setting TSC is not a fatal + * error. It just increases the likelihood of TSC variance between + * vCPUs and some guest OS are able to handle that just fine. + */ + hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition); + if (FAILED(hr)) { + warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr); + } + } + + tsc_val.Reg64 = env->tsc; + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); + if (FAILED(hr)) { + error_report("WHPX: Failed to set TSC, hr=%08lx", hr); + return -1; + } + + return 0; +} + +static void whpx_set_registers(CPUState *cpu, int level) +{ + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + struct whpx_register_set vcxt; + HRESULT hr; + int idx; + int idx_next; + int i; + int v86, r86; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + /* + * Following MSRs have side effects on the guest or are too heavy for + * runtime. Limit them to full state update. + */ + if (level >= WHPX_SET_RESET_STATE) { + whpx_set_tsc(cpu); + } + + memset(&vcxt, 0, sizeof(struct whpx_register_set)); + + v86 = (env->eflags & VM_MASK); + r86 = !(env->cr[0] & CR0_PE_MASK); + + vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state); + vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); + + idx = 0; + + /* Indexes for first 16 registers match between HV and QEMU definitions */ + idx_next = 16; + for (idx = 0; idx < CPU_NB_REGS; idx += 1) { + vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx]; + } + idx = idx_next; + + /* Same goes for RIP and RFLAGS */ + assert(whpx_register_names[idx] == WHvX64RegisterRip); + vcxt.values[idx++].Reg64 = env->eip; + + assert(whpx_register_names[idx] == WHvX64RegisterRflags); + vcxt.values[idx++].Reg64 = env->eflags; + + /* Translate 6+4 segment registers. HV and QEMU order matches */ + assert(idx == WHvX64RegisterEs); + for (i = 0; i < 6; i += 1, idx += 1) { + vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86); + } + + assert(idx == WHvX64RegisterLdtr); + vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0); + + assert(idx == WHvX64RegisterTr); + vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0); + + assert(idx == WHvX64RegisterIdtr); + vcxt.values[idx].Table.Base = env->idt.base; + vcxt.values[idx].Table.Limit = env->idt.limit; + idx += 1; + + assert(idx == WHvX64RegisterGdtr); + vcxt.values[idx].Table.Base = env->gdt.base; + vcxt.values[idx].Table.Limit = env->gdt.limit; + idx += 1; + + /* CR0, 2, 3, 4, 8 */ + assert(whpx_register_names[idx] == WHvX64RegisterCr0); + vcxt.values[idx++].Reg64 = env->cr[0]; + assert(whpx_register_names[idx] == WHvX64RegisterCr2); + vcxt.values[idx++].Reg64 = env->cr[2]; + assert(whpx_register_names[idx] == WHvX64RegisterCr3); + vcxt.values[idx++].Reg64 = env->cr[3]; + assert(whpx_register_names[idx] == WHvX64RegisterCr4); + vcxt.values[idx++].Reg64 = env->cr[4]; + assert(whpx_register_names[idx] == WHvX64RegisterCr8); + vcxt.values[idx++].Reg64 = vcpu->tpr; + + /* 8 Debug Registers - Skipped */ + + /* 16 XMM registers */ + assert(whpx_register_names[idx] == WHvX64RegisterXmm0); + idx_next = idx + 16; + for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { + vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0); + vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1); + } + idx = idx_next; + + /* 8 FP registers */ + assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); + for (i = 0; i < 8; i += 1, idx += 1) { + vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0); + /* vcxt.values[idx].Fp.AsUINT128.High64 = + env->fpregs[i].mmx.MMX_Q(1); + */ + } + + /* FP control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); + vcxt.values[idx].FpControlStatus.FpControl = env->fpuc; + vcxt.values[idx].FpControlStatus.FpStatus = + (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + vcxt.values[idx].FpControlStatus.FpTag = 0; + for (i = 0; i < 8; ++i) { + vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i; + } + vcxt.values[idx].FpControlStatus.Reserved = 0; + vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop; + vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip; + idx += 1; + + /* XMM control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); + vcxt.values[idx].XmmControlStatus.LastFpRdp = 0; + vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr; + vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff; + idx += 1; + + /* MSRs */ + assert(whpx_register_names[idx] == WHvX64RegisterEfer); + vcxt.values[idx++].Reg64 = env->efer; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); + vcxt.values[idx++].Reg64 = env->kernelgsbase; +#endif + + assert(whpx_register_names[idx] == WHvX64RegisterApicBase); + vcxt.values[idx++].Reg64 = vcpu->apic_base; + + /* WHvX64RegisterPat - Skipped */ + + assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); + vcxt.values[idx++].Reg64 = env->sysenter_cs; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); + vcxt.values[idx++].Reg64 = env->sysenter_eip; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); + vcxt.values[idx++].Reg64 = env->sysenter_esp; + assert(whpx_register_names[idx] == WHvX64RegisterStar); + vcxt.values[idx++].Reg64 = env->star; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterLstar); + vcxt.values[idx++].Reg64 = env->lstar; + assert(whpx_register_names[idx] == WHvX64RegisterCstar); + vcxt.values[idx++].Reg64 = env->cstar; + assert(whpx_register_names[idx] == WHvX64RegisterSfmask); + vcxt.values[idx++].Reg64 = env->fmask; +#endif + + /* Interrupt / Event Registers - Skipped */ + + assert(idx == RTL_NUMBER_OF(whpx_register_names)); + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + whpx_register_names, + RTL_NUMBER_OF(whpx_register_names), + &vcxt.values[0]); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set virtual processor context, hr=%08lx", + hr); + } + + return; +} + +static int whpx_get_tsc(CPUState *cpu) +{ + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; + WHV_REGISTER_VALUE tsc_val; + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); + if (FAILED(hr)) { + error_report("WHPX: Failed to get TSC, hr=%08lx", hr); + return -1; + } + + env->tsc = tsc_val.Reg64; + return 0; +} + +static void whpx_get_registers(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + struct whpx_register_set vcxt; + uint64_t tpr, apic_base; + HRESULT hr; + int idx; + int idx_next; + int i; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + if (!env->tsc_valid) { + whpx_get_tsc(cpu); + env->tsc_valid = !runstate_is_running(); + } + + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + whpx_register_names, + RTL_NUMBER_OF(whpx_register_names), + &vcxt.values[0]); + if (FAILED(hr)) { + error_report("WHPX: Failed to get virtual processor context, hr=%08lx", + hr); + } + + idx = 0; + + /* Indexes for first 16 registers match between HV and QEMU definitions */ + idx_next = 16; + for (idx = 0; idx < CPU_NB_REGS; idx += 1) { + env->regs[idx] = vcxt.values[idx].Reg64; + } + idx = idx_next; + + /* Same goes for RIP and RFLAGS */ + assert(whpx_register_names[idx] == WHvX64RegisterRip); + env->eip = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterRflags); + env->eflags = vcxt.values[idx++].Reg64; + + /* Translate 6+4 segment registers. HV and QEMU order matches */ + assert(idx == WHvX64RegisterEs); + for (i = 0; i < 6; i += 1, idx += 1) { + env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment); + } + + assert(idx == WHvX64RegisterLdtr); + env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment); + assert(idx == WHvX64RegisterTr); + env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment); + assert(idx == WHvX64RegisterIdtr); + env->idt.base = vcxt.values[idx].Table.Base; + env->idt.limit = vcxt.values[idx].Table.Limit; + idx += 1; + assert(idx == WHvX64RegisterGdtr); + env->gdt.base = vcxt.values[idx].Table.Base; + env->gdt.limit = vcxt.values[idx].Table.Limit; + idx += 1; + + /* CR0, 2, 3, 4, 8 */ + assert(whpx_register_names[idx] == WHvX64RegisterCr0); + env->cr[0] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr2); + env->cr[2] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr3); + env->cr[3] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr4); + env->cr[4] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr8); + tpr = vcxt.values[idx++].Reg64; + if (tpr != vcpu->tpr) { + vcpu->tpr = tpr; + cpu_set_apic_tpr(x86_cpu->apic_state, tpr); + } + + /* 8 Debug Registers - Skipped */ + + /* 16 XMM registers */ + assert(whpx_register_names[idx] == WHvX64RegisterXmm0); + idx_next = idx + 16; + for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { + env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64; + env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64; + } + idx = idx_next; + + /* 8 FP registers */ + assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); + for (i = 0; i < 8; i += 1, idx += 1) { + env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64; + /* env->fpregs[i].mmx.MMX_Q(1) = + vcxt.values[idx].Fp.AsUINT128.High64; + */ + } + + /* FP control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); + env->fpuc = vcxt.values[idx].FpControlStatus.FpControl; + env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7; + env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800; + for (i = 0; i < 8; ++i) { + env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1); + } + env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp; + env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip; + idx += 1; + + /* XMM control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); + env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl; + idx += 1; + + /* MSRs */ + assert(whpx_register_names[idx] == WHvX64RegisterEfer); + env->efer = vcxt.values[idx++].Reg64; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); + env->kernelgsbase = vcxt.values[idx++].Reg64; +#endif + + assert(whpx_register_names[idx] == WHvX64RegisterApicBase); + apic_base = vcxt.values[idx++].Reg64; + if (apic_base != vcpu->apic_base) { + vcpu->apic_base = apic_base; + cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base); + } + + /* WHvX64RegisterPat - Skipped */ + + assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); + env->sysenter_cs = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); + env->sysenter_eip = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); + env->sysenter_esp = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterStar); + env->star = vcxt.values[idx++].Reg64; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterLstar); + env->lstar = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCstar); + env->cstar = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSfmask); + env->fmask = vcxt.values[idx++].Reg64; +#endif + + /* Interrupt / Event Registers - Skipped */ + + assert(idx == RTL_NUMBER_OF(whpx_register_names)); + + if (whpx_apic_in_platform()) { + whpx_apic_get(x86_cpu->apic_state); + } + + return; +} + +static HRESULT CALLBACK whpx_emu_ioport_callback( + void *ctx, + WHV_EMULATOR_IO_ACCESS_INFO *IoAccess) +{ + MemTxAttrs attrs = { 0 }; + address_space_rw(&address_space_io, IoAccess->Port, attrs, + &IoAccess->Data, IoAccess->AccessSize, + IoAccess->Direction); + return S_OK; +} + +static HRESULT CALLBACK whpx_emu_mmio_callback( + void *ctx, + WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) +{ + cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, + ma->Direction); + return S_OK; +} + +static HRESULT CALLBACK whpx_emu_getreg_callback( + void *ctx, + const WHV_REGISTER_NAME *RegisterNames, + UINT32 RegisterCount, + WHV_REGISTER_VALUE *RegisterValues) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + RegisterNames, RegisterCount, + RegisterValues); + if (FAILED(hr)) { + error_report("WHPX: Failed to get virtual processor registers," + " hr=%08lx", hr); + } + + return hr; +} + +static HRESULT CALLBACK whpx_emu_setreg_callback( + void *ctx, + const WHV_REGISTER_NAME *RegisterNames, + UINT32 RegisterCount, + const WHV_REGISTER_VALUE *RegisterValues) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + RegisterNames, RegisterCount, + RegisterValues); + if (FAILED(hr)) { + error_report("WHPX: Failed to set virtual processor registers," + " hr=%08lx", hr); + } + + /* + * The emulator just successfully wrote the register state. We clear the + * dirty state so we avoid the double write on resume of the VP. + */ + cpu->vcpu_dirty = false; + + return hr; +} + +static HRESULT CALLBACK whpx_emu_translate_callback( + void *ctx, + WHV_GUEST_VIRTUAL_ADDRESS Gva, + WHV_TRANSLATE_GVA_FLAGS TranslateFlags, + WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult, + WHV_GUEST_PHYSICAL_ADDRESS *Gpa) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + WHV_TRANSLATE_GVA_RESULT res; + + hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index, + Gva, TranslateFlags, &res, Gpa); + if (FAILED(hr)) { + error_report("WHPX: Failed to translate GVA, hr=%08lx", hr); + } else { + *TranslationResult = res.ResultCode; + } + + return hr; +} + +static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = { + .Size = sizeof(WHV_EMULATOR_CALLBACKS), + .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback, + .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback, + .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback, + .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback, + .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback, +}; + +static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx) +{ + HRESULT hr; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + WHV_EMULATOR_STATUS emu_status; + + hr = whp_dispatch.WHvEmulatorTryMmioEmulation( + vcpu->emulator, cpu, + &vcpu->exit_ctx.VpContext, ctx, + &emu_status); + if (FAILED(hr)) { + error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr); + return -1; + } + + if (!emu_status.EmulationSuccessful) { + error_report("WHPX: Failed to emulate MMIO access with" + " EmulatorReturnStatus: %u", emu_status.AsUINT32); + return -1; + } + + return 0; +} + +static int whpx_handle_portio(CPUState *cpu, + WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx) +{ + HRESULT hr; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + WHV_EMULATOR_STATUS emu_status; + + hr = whp_dispatch.WHvEmulatorTryIoEmulation( + vcpu->emulator, cpu, + &vcpu->exit_ctx.VpContext, ctx, + &emu_status); + if (FAILED(hr)) { + error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr); + return -1; + } + + if (!emu_status.EmulationSuccessful) { + error_report("WHPX: Failed to emulate PortIO access with" + " EmulatorReturnStatus: %u", emu_status.AsUINT32); + return -1; + } + + return 0; +} + +static int whpx_handle_halt(CPUState *cpu) +{ + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + int ret = 0; + + qemu_mutex_lock_iothread(); + if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) && + !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->exception_index = EXCP_HLT; + cpu->halted = true; + ret = 1; + } + qemu_mutex_unlock_iothread(); + + return ret; +} + +static void whpx_vcpu_pre_run(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + int irq; + uint8_t tpr; + WHV_X64_PENDING_INTERRUPTION_REGISTER new_int; + UINT32 reg_count = 0; + WHV_REGISTER_VALUE reg_values[3]; + WHV_REGISTER_NAME reg_names[3]; + + memset(&new_int, 0, sizeof(new_int)); + memset(reg_values, 0, sizeof(reg_values)); + + qemu_mutex_lock_iothread(); + + /* Inject NMI */ + if (!vcpu->interruption_pending && + cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + vcpu->interruptable = false; + new_int.InterruptionType = WHvX64PendingNmi; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = 2; + } + if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + } + } + + /* + * Force the VCPU out of its inner loop to process any INIT requests or + * commit pending TPR access. + */ + if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + cpu->exit_request = 1; + } + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->exit_request = 1; + } + } + + /* Get pending hard interruption or replay one that was overwritten */ + if (!whpx_apic_in_platform()) { + if (!vcpu->interruption_pending && + vcpu->interruptable && (env->eflags & IF_MASK)) { + assert(!new_int.InterruptionPending); + if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + new_int.InterruptionType = WHvX64PendingInterrupt; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = irq; + } + } + } + + /* Setup interrupt state if new one was prepared */ + if (new_int.InterruptionPending) { + reg_values[reg_count].PendingInterruption = new_int; + reg_names[reg_count] = WHvRegisterPendingInterruption; + reg_count += 1; + } + } else if (vcpu->ready_for_pic_interrupt && + (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + reg_names[reg_count] = WHvRegisterPendingEvent; + reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT) + { + .EventPending = 1, + .EventType = WHvX64PendingEventExtInt, + .Vector = irq, + }; + reg_count += 1; + } + } + + /* Sync the TPR to the CR8 if was modified during the intercept */ + tpr = cpu_get_apic_tpr(x86_cpu->apic_state); + if (tpr != vcpu->tpr) { + vcpu->tpr = tpr; + reg_values[reg_count].Reg64 = tpr; + cpu->exit_request = 1; + reg_names[reg_count] = WHvX64RegisterCr8; + reg_count += 1; + } + + /* Update the state of the interrupt delivery notification */ + if (!vcpu->window_registered && + cpu->interrupt_request & CPU_INTERRUPT_HARD) { + reg_values[reg_count].DeliverabilityNotifications = + (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { + .InterruptNotification = 1 + }; + vcpu->window_registered = 1; + reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; + reg_count += 1; + } + + qemu_mutex_unlock_iothread(); + vcpu->ready_for_pic_interrupt = false; + + if (reg_count) { + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + reg_names, reg_count, reg_values); + if (FAILED(hr)) { + error_report("WHPX: Failed to set interrupt state registers," + " hr=%08lx", hr); + } + } + + return; +} + +static void whpx_vcpu_post_run(CPUState *cpu) +{ + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + + env->eflags = vcpu->exit_ctx.VpContext.Rflags; + + uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8; + if (vcpu->tpr != tpr) { + vcpu->tpr = tpr; + qemu_mutex_lock_iothread(); + cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr); + qemu_mutex_unlock_iothread(); + } + + vcpu->interruption_pending = + vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending; + + vcpu->interruptable = + !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow; + + return; +} + +static void whpx_vcpu_process_async_events(CPUState *cpu) +{ + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + whpx_cpu_synchronize_state(cpu); + do_cpu_init(x86_cpu); + vcpu->interruptable = true; + } + + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(x86_cpu->apic_state); + } + + if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) || + (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->halted = false; + } + + if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + whpx_cpu_synchronize_state(cpu); + do_cpu_sipi(x86_cpu); + } + + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + whpx_cpu_synchronize_state(cpu); + apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, + env->tpr_access_type); + } + + return; +} + +static int whpx_vcpu_run(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + int ret; + + whpx_vcpu_process_async_events(cpu); + if (cpu->halted && !whpx_apic_in_platform()) { + cpu->exception_index = EXCP_HLT; + qatomic_set(&cpu->exit_request, false); + return 0; + } + + qemu_mutex_unlock_iothread(); + cpu_exec_start(cpu); + + do { + if (cpu->vcpu_dirty) { + whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); + cpu->vcpu_dirty = false; + } + + whpx_vcpu_pre_run(cpu); + + if (qatomic_read(&cpu->exit_request)) { + whpx_vcpu_kick(cpu); + } + + hr = whp_dispatch.WHvRunVirtualProcessor( + whpx->partition, cpu->cpu_index, + &vcpu->exit_ctx, sizeof(vcpu->exit_ctx)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to exec a virtual processor," + " hr=%08lx", hr); + ret = -1; + break; + } + + whpx_vcpu_post_run(cpu); + + switch (vcpu->exit_ctx.ExitReason) { + case WHvRunVpExitReasonMemoryAccess: + ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess); + break; + + case WHvRunVpExitReasonX64IoPortAccess: + ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess); + break; + + case WHvRunVpExitReasonX64InterruptWindow: + vcpu->ready_for_pic_interrupt = 1; + vcpu->window_registered = 0; + ret = 0; + break; + + case WHvRunVpExitReasonX64ApicEoi: + assert(whpx_apic_in_platform()); + ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector); + break; + + case WHvRunVpExitReasonX64Halt: + ret = whpx_handle_halt(cpu); + break; + + case WHvRunVpExitReasonX64ApicInitSipiTrap: { + WHV_INTERRUPT_CONTROL ipi = {0}; + uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr; + uint32_t delivery_mode = + (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT; + int dest_shorthand = + (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; + bool broadcast = false; + bool include_self = false; + uint32_t i; + + /* We only registered for INIT and SIPI exits. */ + if ((delivery_mode != APIC_DM_INIT) && + (delivery_mode != APIC_DM_SIPI)) { + error_report( + "WHPX: Unexpected APIC exit that is not a INIT or SIPI"); + break; + } + + if (delivery_mode == APIC_DM_INIT) { + ipi.Type = WHvX64InterruptTypeInit; + } else { + ipi.Type = WHvX64InterruptTypeSipi; + } + + ipi.DestinationMode = + ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical; + + ipi.TriggerMode = + ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ? + WHvX64InterruptTriggerModeLevel : + WHvX64InterruptTriggerModeEdge; + + ipi.Vector = icr & APIC_VECTOR_MASK; + switch (dest_shorthand) { + /* no shorthand. Bits 56-63 contain the destination. */ + case 0: + ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report("WHPX: Failed to request interrupt hr=%08lx", + hr); + } + + break; + + /* self */ + case 1: + include_self = true; + break; + + /* broadcast, including self */ + case 2: + broadcast = true; + include_self = true; + break; + + /* broadcast, excluding self */ + case 3: + broadcast = true; + break; + } + + if (!broadcast && !include_self) { + break; + } + + for (i = 0; i <= max_vcpu_index; i++) { + if (i == cpu->cpu_index && !include_self) { + continue; + } + + /* + * Assuming that APIC Ids are identity mapped since + * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers + * are not handled yet and the hypervisor doesn't allow the + * guest to modify the APIC ID. + */ + ipi.Destination = i; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report( + "WHPX: Failed to request SIPI for %d, hr=%08lx", + i, hr); + } + } + + break; + } + + case WHvRunVpExitReasonCanceled: + cpu->exception_index = EXCP_INTERRUPT; + ret = 1; + break; + + case WHvRunVpExitReasonX64MsrAccess: { + WHV_REGISTER_VALUE reg_values[3] = {0}; + WHV_REGISTER_NAME reg_names[3]; + UINT32 reg_count; + + reg_names[0] = WHvX64RegisterRip; + reg_names[1] = WHvX64RegisterRax; + reg_names[2] = WHvX64RegisterRdx; + + reg_values[0].Reg64 = + vcpu->exit_ctx.VpContext.Rip + + vcpu->exit_ctx.VpContext.InstructionLength; + + /* + * For all unsupported MSR access we: + * ignore writes + * return 0 on read. + */ + reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ? + 1 : 3; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + reg_names, reg_count, + reg_values); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set MsrAccess state " + " registers, hr=%08lx", hr); + } + ret = 0; + break; + } + case WHvRunVpExitReasonX64Cpuid: { + WHV_REGISTER_VALUE reg_values[5]; + WHV_REGISTER_NAME reg_names[5]; + UINT32 reg_count = 5; + UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + memset(reg_values, 0, sizeof(reg_values)); + + rip = vcpu->exit_ctx.VpContext.Rip + + vcpu->exit_ctx.VpContext.InstructionLength; + cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax; + + /* + * Ideally, these should be supplied to the hypervisor during VCPU + * initialization and it should be able to satisfy this request. + * But, currently, WHPX doesn't support setting CPUID values in the + * hypervisor once the partition has been setup, which is too late + * since VCPUs are realized later. For now, use the values from + * QEMU to satisfy these requests, until WHPX adds support for + * being able to set these values in the hypervisor at runtime. + */ + cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx, + (UINT32 *)&rcx, (UINT32 *)&rdx); + switch (cpuid_fn) { + case 0x40000000: + /* Expose the vmware cpu frequency cpuid leaf */ + rax = 0x40000010; + rbx = rcx = rdx = 0; + break; + + case 0x40000010: + rax = env->tsc_khz; + rbx = env->apic_bus_freq / 1000; /* Hz to KHz */ + rcx = rdx = 0; + break; + + case 0x80000001: + /* Remove any support of OSVW */ + rcx &= ~CPUID_EXT3_OSVW; + break; + } + + reg_names[0] = WHvX64RegisterRip; + reg_names[1] = WHvX64RegisterRax; + reg_names[2] = WHvX64RegisterRcx; + reg_names[3] = WHvX64RegisterRdx; + reg_names[4] = WHvX64RegisterRbx; + + reg_values[0].Reg64 = rip; + reg_values[1].Reg64 = rax; + reg_values[2].Reg64 = rcx; + reg_values[3].Reg64 = rdx; + reg_values[4].Reg64 = rbx; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + reg_names, + reg_count, + reg_values); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set CpuidAccess state registers," + " hr=%08lx", hr); + } + ret = 0; + break; + } + case WHvRunVpExitReasonNone: + case WHvRunVpExitReasonUnrecoverableException: + case WHvRunVpExitReasonInvalidVpRegisterValue: + case WHvRunVpExitReasonUnsupportedFeature: + case WHvRunVpExitReasonException: + default: + error_report("WHPX: Unexpected VP exit code %d", + vcpu->exit_ctx.ExitReason); + whpx_get_registers(cpu); + qemu_mutex_lock_iothread(); + qemu_system_guest_panicked(cpu_get_crash_info(cpu)); + qemu_mutex_unlock_iothread(); + break; + } + + } while (!ret); + + cpu_exec_end(cpu); + qemu_mutex_lock_iothread(); + current_cpu = cpu; + + qatomic_set(&cpu->exit_request, false); + + return ret < 0; +} + +static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) +{ + if (!cpu->vcpu_dirty) { + whpx_get_registers(cpu); + cpu->vcpu_dirty = true; + } +} + +static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, + run_on_cpu_data arg) +{ + whpx_set_registers(cpu, WHPX_SET_RESET_STATE); + cpu->vcpu_dirty = false; +} + +static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, + run_on_cpu_data arg) +{ + whpx_set_registers(cpu, WHPX_SET_FULL_STATE); + cpu->vcpu_dirty = false; +} + +static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, + run_on_cpu_data arg) +{ + cpu->vcpu_dirty = true; +} + +/* + * CPU support. + */ + +void whpx_cpu_synchronize_state(CPUState *cpu) +{ + if (!cpu->vcpu_dirty) { + run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); + } +} + +void whpx_cpu_synchronize_post_reset(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); +} + +void whpx_cpu_synchronize_post_init(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL); +} + +void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); +} + +/* + * Vcpu support. + */ + +static Error *whpx_migration_blocker; + +static void whpx_cpu_update_state(void *opaque, int running, RunState state) +{ + CPUX86State *env = opaque; + + if (running) { + env->tsc_valid = false; + } +} + +int whpx_init_vcpu(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = NULL; + Error *local_error = NULL; + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + X86CPU *x86_cpu = X86_CPU(cpu); + UINT64 freq = 0; + int ret; + + /* Add migration blockers for all unsupported features of the + * Windows Hypervisor Platform + */ + if (whpx_migration_blocker == NULL) { + error_setg(&whpx_migration_blocker, + "State blocked due to non-migratable CPUID feature support," + "dirty memory tracking support, and XSAVE/XRSTOR support"); + + (void)migrate_add_blocker(whpx_migration_blocker, &local_error); + if (local_error) { + error_report_err(local_error); + migrate_del_blocker(whpx_migration_blocker); + error_free(whpx_migration_blocker); + ret = -EINVAL; + goto error; + } + } + + vcpu = g_malloc0(sizeof(struct whpx_vcpu)); + + if (!vcpu) { + error_report("WHPX: Failed to allocte VCPU context."); + ret = -ENOMEM; + goto error; + } + + hr = whp_dispatch.WHvEmulatorCreateEmulator( + &whpx_emu_callbacks, + &vcpu->emulator); + if (FAILED(hr)) { + error_report("WHPX: Failed to setup instruction completion support," + " hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvCreateVirtualProcessor( + whpx->partition, cpu->cpu_index, 0); + if (FAILED(hr)) { + error_report("WHPX: Failed to create a virtual processor," + " hr=%08lx", hr); + whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); + ret = -EINVAL; + goto error; + } + + /* + * vcpu's TSC frequency is either specified by user, or use the value + * provided by Hyper-V if the former is not present. In the latter case, we + * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC + * frequency can be migrated later via this field. + */ + if (!env->tsc_khz) { + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq), + NULL); + if (hr != WHV_E_UNKNOWN_CAPABILITY) { + if (FAILED(hr)) { + printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr); + } else { + env->tsc_khz = freq / 1000; /* Hz to KHz */ + } + } + } + + env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY; + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL); + if (hr != WHV_E_UNKNOWN_CAPABILITY) { + if (FAILED(hr)) { + printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr); + } else { + env->apic_bus_freq = freq; + } + } + + /* + * If the vmware cpuid frequency leaf option is set, and we have a valid + * tsc value, trap the corresponding cpuid's. + */ + if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) { + UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010}; + + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeCpuidExitList, + cpuidExitList, + RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", + hr); + ret = -EINVAL; + goto error; + } + } + + vcpu->interruptable = true; + cpu->vcpu_dirty = true; + cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; + max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); + qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr); + + return 0; + +error: + g_free(vcpu); + + return ret; +} + +int whpx_vcpu_exec(CPUState *cpu) +{ + int ret; + int fatal; + + for (;;) { + if (cpu->exception_index >= EXCP_INTERRUPT) { + ret = cpu->exception_index; + cpu->exception_index = -1; + break; + } + + fatal = whpx_vcpu_run(cpu); + + if (fatal) { + error_report("WHPX: Failed to exec a virtual processor"); + abort(); + } + } + + return ret; +} + +void whpx_destroy_vcpu(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); + + whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index); + whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); + g_free(cpu->hax_vcpu); + return; +} + +void whpx_vcpu_kick(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + whp_dispatch.WHvCancelRunVirtualProcessor( + whpx->partition, cpu->cpu_index, 0); +} + +/* + * Memory support. + */ + +static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size, + void *host_va, int add, int rom, + const char *name) +{ + struct whpx_state *whpx = &whpx_global; + HRESULT hr; + + /* + if (add) { + printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n", + (void*)start_pa, (void*)size, host_va, + (rom ? "ROM" : "RAM"), name); + } else { + printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n", + (void*)start_pa, (void*)size, host_va, name); + } + */ + + if (add) { + hr = whp_dispatch.WHvMapGpaRange(whpx->partition, + host_va, + start_pa, + size, + (WHvMapGpaRangeFlagRead | + WHvMapGpaRangeFlagExecute | + (rom ? 0 : WHvMapGpaRangeFlagWrite))); + } else { + hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition, + start_pa, + size); + } + + if (FAILED(hr)) { + error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes," + " Host:%p, hr=%08lx", + (add ? "MAP" : "UNMAP"), name, + (void *)(uintptr_t)start_pa, (void *)size, host_va, hr); + } +} + +static void whpx_process_section(MemoryRegionSection *section, int add) +{ + MemoryRegion *mr = section->mr; + hwaddr start_pa = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + unsigned int delta; + uint64_t host_va; + + if (!memory_region_is_ram(mr)) { + return; + } + + delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask); + delta &= ~qemu_real_host_page_mask; + if (delta > size) { + return; + } + start_pa += delta; + size -= delta; + size &= qemu_real_host_page_mask; + if (!size || (start_pa & ~qemu_real_host_page_mask)) { + return; + } + + host_va = (uintptr_t)memory_region_get_ram_ptr(mr) + + section->offset_within_region + delta; + + whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add, + memory_region_is_rom(mr), mr->name); +} + +static void whpx_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + memory_region_ref(section->mr); + whpx_process_section(section, 1); +} + +static void whpx_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + whpx_process_section(section, 0); + memory_region_unref(section->mr); +} + +static void whpx_transaction_begin(MemoryListener *listener) +{ +} + +static void whpx_transaction_commit(MemoryListener *listener) +{ +} + +static void whpx_log_sync(MemoryListener *listener, + MemoryRegionSection *section) +{ + MemoryRegion *mr = section->mr; + + if (!memory_region_is_ram(mr)) { + return; + } + + memory_region_set_dirty(mr, 0, int128_get64(section->size)); +} + +static MemoryListener whpx_memory_listener = { + .begin = whpx_transaction_begin, + .commit = whpx_transaction_commit, + .region_add = whpx_region_add, + .region_del = whpx_region_del, + .log_sync = whpx_log_sync, + .priority = 10, +}; + +static void whpx_memory_init(void) +{ + memory_listener_register(&whpx_memory_listener, &address_space_memory); +} + +/* + * Load the functions from the given library, using the given handle. If a + * handle is provided, it is used, otherwise the library is opened. The + * handle will be updated on return with the opened one. + */ +static bool load_whp_dispatch_fns(HMODULE *handle, + WHPFunctionList function_list) +{ + HMODULE hLib = *handle; + + #define WINHV_PLATFORM_DLL "WinHvPlatform.dll" + #define WINHV_EMULATION_DLL "WinHvEmulation.dll" + #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \ + whp_dispatch.function_name = \ + (function_name ## _t)GetProcAddress(hLib, #function_name); \ + + #define WHP_LOAD_FIELD(return_type, function_name, signature) \ + whp_dispatch.function_name = \ + (function_name ## _t)GetProcAddress(hLib, #function_name); \ + if (!whp_dispatch.function_name) { \ + error_report("Could not load function %s", #function_name); \ + goto error; \ + } \ + + #define WHP_LOAD_LIB(lib_name, handle_lib) \ + if (!handle_lib) { \ + handle_lib = LoadLibrary(lib_name); \ + if (!handle_lib) { \ + error_report("Could not load library %s.", lib_name); \ + goto error; \ + } \ + } \ + + switch (function_list) { + case WINHV_PLATFORM_FNS_DEFAULT: + WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) + LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD) + break; + + case WINHV_EMULATION_FNS_DEFAULT: + WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib) + LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD) + break; + + case WINHV_PLATFORM_FNS_SUPPLEMENTAL: + WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL) + break; + } + + *handle = hLib; + return true; + +error: + if (hLib) { + FreeLibrary(hLib); + } + + return false; +} + +static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + struct whpx_state *whpx = &whpx_global; + OnOffSplit mode; + + if (!visit_type_OnOffSplit(v, name, &mode, errp)) { + return; + } + + switch (mode) { + case ON_OFF_SPLIT_ON: + whpx->kernel_irqchip_allowed = true; + whpx->kernel_irqchip_required = true; + break; + + case ON_OFF_SPLIT_OFF: + whpx->kernel_irqchip_allowed = false; + whpx->kernel_irqchip_required = false; + break; + + case ON_OFF_SPLIT_SPLIT: + error_setg(errp, "WHPX: split irqchip currently not supported"); + error_append_hint(errp, + "Try without kernel-irqchip or with kernel-irqchip=on|off"); + break; + + default: + /* + * The value was checked in visit_type_OnOffSplit() above. If + * we get here, then something is wrong in QEMU. + */ + abort(); + } +} + +/* + * Partition support + */ + +static int whpx_accel_init(MachineState *ms) +{ + struct whpx_state *whpx; + int ret; + HRESULT hr; + WHV_CAPABILITY whpx_cap; + UINT32 whpx_cap_size; + WHV_PARTITION_PROPERTY prop; + UINT32 cpuidExitList[] = {1, 0x80000001}; + WHV_CAPABILITY_FEATURES features = {0}; + + whpx = &whpx_global; + + if (!init_whp_dispatch()) { + ret = -ENOSYS; + goto error; + } + + whpx->mem_quota = ms->ram_size; + + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeHypervisorPresent, &whpx_cap, + sizeof(whpx_cap), &whpx_cap_size); + if (FAILED(hr) || !whpx_cap.HypervisorPresent) { + error_report("WHPX: No accelerator found, hr=%08lx", hr); + ret = -ENOSPC; + goto error; + } + + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeFeatures, &features, sizeof(features), NULL); + if (FAILED(hr)) { + error_report("WHPX: Failed to query capabilities, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvCreatePartition(&whpx->partition); + if (FAILED(hr)) { + error_report("WHPX: Failed to create partition, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); + prop.ProcessorCount = ms->smp.cpus; + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeProcessorCount, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set partition core count to %d," + " hr=%08lx", ms->smp.cores, hr); + ret = -EINVAL; + goto error; + } + + /* + * Error out if WHP doesn't support apic emulation and user is requiring + * it. + */ + if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation || + !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) { + error_report("WHPX: kernel irqchip requested, but unavailable. " + "Try without kernel-irqchip or with kernel-irqchip=off"); + ret = -EINVAL; + goto error; + } + + if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation && + whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) { + WHV_X64_LOCAL_APIC_EMULATION_MODE mode = + WHvX64LocalApicEmulationModeXApic; + printf("WHPX: setting APIC emulation mode in the hypervisor\n"); + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &mode, + sizeof(mode)); + if (FAILED(hr)) { + error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr); + if (whpx->kernel_irqchip_required) { + error_report("WHPX: kernel irqchip requested, but unavailable"); + ret = -EINVAL; + goto error; + } + } else { + whpx->apic_in_platform = true; + } + } + + /* Register for MSR and CPUID exits */ + memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); + prop.ExtendedVmExits.X64MsrExit = 1; + prop.ExtendedVmExits.X64CpuidExit = 1; + if (whpx_apic_in_platform()) { + prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1; + } + + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeExtendedVmExits, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); + if (FAILED(hr)) { + error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeCpuidExitList, + cpuidExitList, + RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", + hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvSetupPartition(whpx->partition); + if (FAILED(hr)) { + error_report("WHPX: Failed to setup partition, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + whpx_memory_init(); + + cpus_register_accel(&whpx_cpus); + + printf("Windows Hypervisor Platform accelerator is operational\n"); + return 0; + +error: + + if (NULL != whpx->partition) { + whp_dispatch.WHvDeletePartition(whpx->partition); + whpx->partition = NULL; + } + + return ret; +} + +int whpx_enabled(void) +{ + return whpx_allowed; +} + +static void whpx_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "WHPX"; + ac->init_machine = whpx_accel_init; + ac->allowed = &whpx_allowed; + + object_class_property_add(oc, "kernel-irqchip", "on|off|split", + NULL, whpx_set_kernel_irqchip, + NULL, NULL); + object_class_property_set_description(oc, "kernel-irqchip", + "Configure WHPX in-kernel irqchip"); +} + +static void whpx_accel_instance_init(Object *obj) +{ + struct whpx_state *whpx = &whpx_global; + + memset(whpx, 0, sizeof(struct whpx_state)); + /* Turn on kernel-irqchip, by default */ + whpx->kernel_irqchip_allowed = true; +} + +static const TypeInfo whpx_accel_type = { + .name = ACCEL_CLASS_NAME("whpx"), + .parent = TYPE_ACCEL, + .instance_init = whpx_accel_instance_init, + .class_init = whpx_accel_class_init, +}; + +static void whpx_type_init(void) +{ + type_register_static(&whpx_accel_type); +} + +bool init_whp_dispatch(void) +{ + if (whp_dispatch_initialized) { + return true; + } + + if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) { + goto error; + } + + if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) { + goto error; + } + + assert(load_whp_dispatch_fns(&hWinHvPlatform, + WINHV_PLATFORM_FNS_SUPPLEMENTAL)); + whp_dispatch_initialized = true; + + return true; +error: + if (hWinHvPlatform) { + FreeLibrary(hWinHvPlatform); + } + + if (hWinHvEmulation) { + FreeLibrary(hWinHvEmulation); + } + + return false; +} + +type_init(whpx_type_init); diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c new file mode 100644 index 0000000000..b127a3cb8a --- /dev/null +++ b/target/i386/whpx/whpx-apic.c @@ -0,0 +1,274 @@ +/* + * WHPX platform APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka + * John Starks + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/pci/msi.h" +#include "sysemu/hw_accel.h" +#include "sysemu/whpx.h" +#include "whp-dispatch.h" + +static void whpx_put_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i; + + memset(kapic, 0, sizeof(*kapic)); + kapic->fields[0x2].data = s->id << 24; + kapic->fields[0x3].data = s->version | ((APIC_LVT_NB - 1) << 16); + kapic->fields[0x8].data = s->tpr; + kapic->fields[0xd].data = s->log_dest << 24; + kapic->fields[0xe].data = s->dest_mode << 28 | 0x0fffffff; + kapic->fields[0xf].data = s->spurious_vec; + for (i = 0; i < 8; i++) { + kapic->fields[0x10 + i].data = s->isr[i]; + kapic->fields[0x18 + i].data = s->tmr[i]; + kapic->fields[0x20 + i].data = s->irr[i]; + } + + kapic->fields[0x28].data = s->esr; + kapic->fields[0x30].data = s->icr[0]; + kapic->fields[0x31].data = s->icr[1]; + for (i = 0; i < APIC_LVT_NB; i++) { + kapic->fields[0x32 + i].data = s->lvt[i]; + } + + kapic->fields[0x38].data = s->initial_count; + kapic->fields[0x3e].data = s->divide_conf; +} + +static void whpx_get_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i, v; + + s->id = kapic->fields[0x2].data >> 24; + s->tpr = kapic->fields[0x8].data; + s->arb_id = kapic->fields[0x9].data; + s->log_dest = kapic->fields[0xd].data >> 24; + s->dest_mode = kapic->fields[0xe].data >> 28; + s->spurious_vec = kapic->fields[0xf].data; + for (i = 0; i < 8; i++) { + s->isr[i] = kapic->fields[0x10 + i].data; + s->tmr[i] = kapic->fields[0x18 + i].data; + s->irr[i] = kapic->fields[0x20 + i].data; + } + + s->esr = kapic->fields[0x28].data; + s->icr[0] = kapic->fields[0x30].data; + s->icr[1] = kapic->fields[0x31].data; + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kapic->fields[0x32 + i].data; + } + + s->initial_count = kapic->fields[0x38].data; + s->divide_conf = kapic->fields[0x3e].data; + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + apic_next_timer(s, s->initial_count_load_time); +} + +static void whpx_apic_set_base(APICCommonState *s, uint64_t val) +{ + s->apicbase = val; +} + +static void whpx_put_apic_base(CPUState *cpu, uint64_t val) +{ + HRESULT hr; + WHV_REGISTER_VALUE reg_value = {.Reg64 = val}; + WHV_REGISTER_NAME reg_name = WHvX64RegisterApicBase; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx_global.partition, + cpu->cpu_index, + ®_name, 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set MSR APIC base, hr=%08lx", hr); + } +} + +static void whpx_apic_set_tpr(APICCommonState *s, uint8_t val) +{ + s->tpr = val; +} + +static uint8_t whpx_apic_get_tpr(APICCommonState *s) +{ + return s->tpr; +} + +static void whpx_apic_vapic_base_update(APICCommonState *s) +{ + /* not implemented yet */ +} + +static void whpx_apic_put(CPUState *cs, run_on_cpu_data data) +{ + APICCommonState *s = data.host_ptr; + struct whpx_lapic_state kapic; + HRESULT hr; + + whpx_put_apic_base(CPU(s->cpu), s->apicbase); + whpx_put_apic_state(s, &kapic); + + hr = whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cs->cpu_index, + &kapic, + sizeof(kapic)); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } +} + +void whpx_apic_get(DeviceState *dev) +{ + APICCommonState *s = APIC_COMMON(dev); + CPUState *cpu = CPU(s->cpu); + struct whpx_lapic_state kapic; + + HRESULT hr = whp_dispatch.WHvGetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cpu->cpu_index, + &kapic, + sizeof(kapic), + NULL); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } + + whpx_get_apic_state(s, &kapic); +} + +static void whpx_apic_post_load(APICCommonState *s) +{ + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_external_nmi(APICCommonState *s) +{ +} + +static void whpx_send_msi(MSIMessage *msg) +{ + uint64_t addr = msg->address; + uint32_t data = msg->data; + uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; + uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; + + WHV_INTERRUPT_CONTROL interrupt = { + /* Values correspond to delivery modes */ + .Type = delivery, + .DestinationMode = dest_mode ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical, + + .TriggerMode = trigger_mode ? + WHvX64InterruptTriggerModeLevel : WHvX64InterruptTriggerModeEdge, + .Reserved = 0, + .Vector = vector, + .Destination = dest, + }; + HRESULT hr = whp_dispatch.WHvRequestInterrupt(whpx_global.partition, + &interrupt, sizeof(interrupt)); + if (FAILED(hr)) { + fprintf(stderr, "whpx: injection failed, MSI (%llx, %x) delivery: %d, " + "dest_mode: %d, trigger mode: %d, vector: %d, lost (%08lx)\n", + addr, data, delivery, dest_mode, trigger_mode, vector, hr); + } +} + +static uint64_t whpx_apic_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void whpx_apic_mem_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + whpx_send_msi(&msg); +} + +static const MemoryRegionOps whpx_apic_io_ops = { + .read = whpx_apic_mem_read, + .write = whpx_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void whpx_apic_reset(APICCommonState *s) +{ + /* Not used by WHPX. */ + s->wait_for_sipi = 0; + + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_realize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + memory_region_init_io(&s->io_memory, OBJECT(s), &whpx_apic_io_ops, s, + "whpx-apic-msi", APIC_SPACE_SIZE); + + msi_nonbroken = true; +} + +static void whpx_apic_class_init(ObjectClass *klass, void *data) +{ + APICCommonClass *k = APIC_COMMON_CLASS(klass); + + k->realize = whpx_apic_realize; + k->reset = whpx_apic_reset; + k->set_base = whpx_apic_set_base; + k->set_tpr = whpx_apic_set_tpr; + k->get_tpr = whpx_apic_get_tpr; + k->post_load = whpx_apic_post_load; + k->vapic_base_update = whpx_apic_vapic_base_update; + k->external_nmi = whpx_apic_external_nmi; + k->send_msi = whpx_send_msi; +} + +static const TypeInfo whpx_apic_info = { + .name = "whpx-apic", + .parent = TYPE_APIC_COMMON, + .instance_size = sizeof(APICCommonState), + .class_init = whpx_apic_class_init, +}; + +static void whpx_apic_register_types(void) +{ + type_register_static(&whpx_apic_info); +} + +type_init(whpx_apic_register_types) diff --git a/target/i386/whpx/whpx-cpus.c b/target/i386/whpx/whpx-cpus.c new file mode 100644 index 0000000000..d9bd5a2d36 --- /dev/null +++ b/target/i386/whpx/whpx-cpus.c @@ -0,0 +1,96 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) + * + * Copyright Microsoft Corp. 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "sysemu/kvm_int.h" +#include "qemu/main-loop.h" +#include "sysemu/cpus.h" +#include "qemu/guest-random.h" + +#include "sysemu/whpx.h" +#include "whpx-cpus.h" + +#include +#include + +static void *whpx_cpu_thread_fn(void *arg) +{ + CPUState *cpu = arg; + int r; + + rcu_register_thread(); + + qemu_mutex_lock_iothread(); + qemu_thread_get_self(cpu->thread); + cpu->thread_id = qemu_get_thread_id(); + current_cpu = cpu; + + r = whpx_init_vcpu(cpu); + if (r < 0) { + fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r)); + exit(1); + } + + /* signal CPU creation */ + cpu_thread_signal_created(cpu); + qemu_guest_random_seed_thread_part2(cpu->random_seed); + + do { + if (cpu_can_run(cpu)) { + r = whpx_vcpu_exec(cpu); + if (r == EXCP_DEBUG) { + cpu_handle_guest_debug(cpu); + } + } + while (cpu_thread_is_idle(cpu)) { + qemu_cond_wait_iothread(cpu->halt_cond); + } + qemu_wait_io_event_common(cpu); + } while (!cpu->unplug || cpu_can_run(cpu)); + + whpx_destroy_vcpu(cpu); + cpu_thread_signal_destroyed(cpu); + qemu_mutex_unlock_iothread(); + rcu_unregister_thread(); + return NULL; +} + +static void whpx_start_vcpu_thread(CPUState *cpu) +{ + char thread_name[VCPU_THREAD_NAME_SIZE]; + + cpu->thread = g_malloc0(sizeof(QemuThread)); + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); + qemu_cond_init(cpu->halt_cond); + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX", + cpu->cpu_index); + qemu_thread_create(cpu->thread, thread_name, whpx_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); +#ifdef _WIN32 + cpu->hThread = qemu_thread_get_handle(cpu->thread); +#endif +} + +static void whpx_kick_vcpu_thread(CPUState *cpu) +{ + if (!qemu_cpu_is_self(cpu)) { + whpx_vcpu_kick(cpu); + } +} + +const CpusAccel whpx_cpus = { + .create_vcpu_thread = whpx_start_vcpu_thread, + .kick_vcpu_thread = whpx_kick_vcpu_thread, + + .synchronize_post_reset = whpx_cpu_synchronize_post_reset, + .synchronize_post_init = whpx_cpu_synchronize_post_init, + .synchronize_state = whpx_cpu_synchronize_state, + .synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm, +}; diff --git a/target/i386/whpx/whpx-cpus.h b/target/i386/whpx/whpx-cpus.h new file mode 100644 index 0000000000..bdb367d1d0 --- /dev/null +++ b/target/i386/whpx/whpx-cpus.h @@ -0,0 +1,34 @@ +/* + * Accelerator CPUS Interface + * + * Copyright 2020 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef WHPX_CPUS_H +#define WHPX_CPUS_H + +#include "sysemu/cpus.h" + +extern const CpusAccel whpx_cpus; + +int whpx_init_vcpu(CPUState *cpu); +int whpx_vcpu_exec(CPUState *cpu); +void whpx_destroy_vcpu(CPUState *cpu); +void whpx_vcpu_kick(CPUState *cpu); + +void whpx_cpu_synchronize_state(CPUState *cpu); +void whpx_cpu_synchronize_post_reset(CPUState *cpu); +void whpx_cpu_synchronize_post_init(CPUState *cpu); +void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); + +/* state subset only touched by the VCPU itself during runtime */ +#define WHPX_SET_RUNTIME_STATE 1 +/* state subset modified during VCPU reset */ +#define WHPX_SET_RESET_STATE 2 +/* full state set, modified during initialization or on vmload */ +#define WHPX_SET_FULL_STATE 3 + +#endif /* WHPX_CPUS_H */ -- cgit v1.2.3-55-g7522