diff options
author | Peter Maydell | 2020-12-11 14:50:35 +0100 |
---|---|---|
committer | Peter Maydell | 2020-12-11 14:50:35 +0100 |
commit | b785d25e91718a660546a6550f64b3c543af7754 (patch) | |
tree | a69f546d903351e4db899b8f080468ff12fe14e0 /target/i386 | |
parent | Merge remote-tracking branch 'remotes/ehabkost/tags/machine-next-pull-request... (diff) | |
parent | scripts: kernel-doc: remove unnecessary change wrt Linux (diff) | |
download | qemu-b785d25e91718a660546a6550f64b3c543af7754.tar.gz qemu-b785d25e91718a660546a6550f64b3c543af7754.tar.xz qemu-b785d25e91718a660546a6550f64b3c543af7754.zip |
Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging
* Fix for NULL segments (Bin Meng)
* Support for 32768 CPUs on x86 without IOMMU (David)
* PDEP/PEXT fix and testcase (myself)
* Remove bios_name and ram_size globals (myself)
* qemu_init rationalization (myself)
* Update kernel-doc (myself + upstream patches)
* Propagate MemTxResult across DMA and PCI functions (Philippe)
* Remove master/slave when applicable (Philippe)
* WHPX support for in-kernel irqchip (Sunil)
# gpg: Signature made Thu 10 Dec 2020 17:21:50 GMT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* remotes/bonzini-gitlab/tags/for-upstream: (113 commits)
scripts: kernel-doc: remove unnecessary change wrt Linux
Revert "docs: temporarily disable the kernel-doc extension"
scripts: kernel-doc: use :c:union when needed
scripts: kernel-doc: split typedef complex regex
scripts: kernel-doc: fix typedef parsing
Revert "kernel-doc: Handle function typedefs that return pointers"
Revert "kernel-doc: Handle function typedefs without asterisks"
scripts: kernel-doc: try to use c:function if possible
scripts: kernel-doc: fix line number handling
scripts: kernel-doc: allow passing desired Sphinx C domain dialect
scripts: kernel-doc: don't mangle with parameter list
scripts: kernel-doc: fix typedef identification
scripts: kernel-doc: reimplement -nofunction argument
scripts: kernel-doc: fix troubles with line counts
scripts: kernel-doc: use a less pedantic markup for funcs on Sphinx 3.x
scripts: kernel-doc: make it more compatible with Sphinx 3.x
Revert "kernel-doc: Use c:struct for Sphinx 3.0 and later"
Revert "scripts/kerneldoc: For Sphinx 3 use c:macro for macros with arguments"
scripts: kernel-doc: add support for typedef enum
kernel-doc: add support for ____cacheline_aligned attribute
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/i386')
-rw-r--r-- | target/i386/cpu.c | 8 | ||||
-rw-r--r-- | target/i386/kvm.c | 77 | ||||
-rw-r--r-- | target/i386/kvm_i386.h | 2 | ||||
-rw-r--r-- | target/i386/meson.build | 1 | ||||
-rw-r--r-- | target/i386/seg_helper.c | 5 | ||||
-rw-r--r-- | target/i386/translate.c | 8 | ||||
-rw-r--r-- | target/i386/whp-dispatch.h | 9 | ||||
-rw-r--r-- | target/i386/whpx-all.c | 291 | ||||
-rw-r--r-- | target/i386/whpx-apic.c | 274 |
9 files changed, 615 insertions, 60 deletions
diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 900ea08283..6c11feeb92 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -30,6 +30,7 @@ #include "sysemu/hvf.h" #include "sysemu/cpus.h" #include "sysemu/xen.h" +#include "sysemu/whpx.h" #include "kvm_i386.h" #include "sev_i386.h" @@ -800,7 +801,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock", "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt", NULL, "kvm-pv-tlb-flush", NULL, "kvm-pv-ipi", - "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", NULL, + "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", "kvm-msi-ext-dest-id", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "kvmclock-stable-bit", NULL, NULL, NULL, @@ -4139,6 +4140,7 @@ static PropValue kvm_default_props[] = { { "kvm-pv-eoi", "on" }, { "kvmclock-stable-bit", "on" }, { "x2apic", "on" }, + { "kvm-msi-ext-dest-id", "off" }, { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -5165,6 +5167,8 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) if (kvm_enabled()) { if (!kvm_irqchip_in_kernel()) { x86_cpu_change_kvm_default("x2apic", "off"); + } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) { + x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on"); } x86_cpu_apply_props(cpu, kvm_default_props); @@ -6173,6 +6177,8 @@ APICCommonClass *apic_get_class(void) apic_type = "kvm-apic"; } else if (xen_enabled()) { apic_type = "xen-apic"; + } else if (whpx_apic_in_platform()) { + apic_type = "whpx-apic"; } return APIC_COMMON_CLASS(object_class_by_name(apic_type)); diff --git a/target/i386/kvm.c b/target/i386/kvm.c index a2934dda02..bcfa4b03e0 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -416,6 +416,9 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!kvm_irqchip_in_kernel()) { ret &= ~(1U << KVM_FEATURE_PV_UNHALT); } + if (kvm_irqchip_is_split()) { + ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID; + } } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) { ret |= 1U << KVM_HINTS_REALTIME; } @@ -4589,38 +4592,74 @@ int kvm_arch_irqchip_create(KVMState *s) } } +uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address) +{ + CPUX86State *env; + uint64_t ext_id; + + if (!first_cpu) { + return address; + } + env = &X86_CPU(first_cpu)->env; + if (!(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) { + return address; + } + + /* + * If the remappable format bit is set, or the upper bits are + * already set in address_hi, or the low extended bits aren't + * there anyway, do nothing. + */ + ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT); + if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) || (address >> 32)) { + return address; + } + + address &= ~ext_id; + address |= ext_id << 35; + return address; +} + int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, uint64_t address, uint32_t data, PCIDevice *dev) { X86IOMMUState *iommu = x86_iommu_get_default(); if (iommu) { - int ret; - MSIMessage src, dst; X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu); - if (!class->int_remap) { - return 0; - } + if (class->int_remap) { + int ret; + MSIMessage src, dst; - src.address = route->u.msi.address_hi; - src.address <<= VTD_MSI_ADDR_HI_SHIFT; - src.address |= route->u.msi.address_lo; - src.data = route->u.msi.data; + src.address = route->u.msi.address_hi; + src.address <<= VTD_MSI_ADDR_HI_SHIFT; + src.address |= route->u.msi.address_lo; + src.data = route->u.msi.data; - ret = class->int_remap(iommu, &src, &dst, dev ? \ - pci_requester_id(dev) : \ - X86_IOMMU_SID_INVALID); - if (ret) { - trace_kvm_x86_fixup_msi_error(route->gsi); - return 1; - } + ret = class->int_remap(iommu, &src, &dst, dev ? \ + pci_requester_id(dev) : \ + X86_IOMMU_SID_INVALID); + if (ret) { + trace_kvm_x86_fixup_msi_error(route->gsi); + return 1; + } + + /* + * Handled untranslated compatibilty format interrupt with + * extended destination ID in the low bits 11-5. */ + dst.address = kvm_swizzle_msi_ext_dest_id(dst.address); - route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; - route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; - route->u.msi.data = dst.data; + route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; + route->u.msi.data = dst.data; + return 0; + } } + address = kvm_swizzle_msi_ext_dest_id(address); + route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK; return 0; } diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h index a4a619cebb..dc72508389 100644 --- a/target/i386/kvm_i386.h +++ b/target/i386/kvm_i386.h @@ -48,4 +48,6 @@ bool kvm_has_waitpkg(void); bool kvm_hv_vpindex_settable(void); +uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); + #endif diff --git a/target/i386/meson.build b/target/i386/meson.build index a1a02f3e99..fc3ee80386 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -33,6 +33,7 @@ i386_softmmu_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c')) i386_softmmu_ss.add(when: 'CONFIG_WHPX', if_true: files( 'whpx-all.c', 'whpx-cpus.c', + 'whpx-apic.c', )) i386_softmmu_ss.add(when: 'CONFIG_HAX', if_true: files( 'hax-all.c', diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c index 09b6554660..e6ffa1f018 100644 --- a/target/i386/seg_helper.c +++ b/target/i386/seg_helper.c @@ -2108,7 +2108,10 @@ static inline void validate_seg(CPUX86State *env, int seg_reg, int cpl) if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) { /* data or non conforming code segment */ if (dpl < cpl) { - cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, seg_reg, 0, + env->segs[seg_reg].base, + env->segs[seg_reg].limit, + env->segs[seg_reg].flags & ~DESC_P_MASK); } } } diff --git a/target/i386/translate.c b/target/i386/translate.c index 4c57307e42..e8f5f5803a 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -3936,14 +3936,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the mask operand, we + /* Note that by zero-extending the source operand, we automatically handle zero-extending the result. */ if (ot == MO_64) { tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); } else { tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); } - gen_helper_pdep(cpu_regs[reg], s->T0, s->T1); + gen_helper_pdep(cpu_regs[reg], s->T1, s->T0); break; case 0x2f5: /* pext Gy, By, Ey */ @@ -3954,14 +3954,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the mask operand, we + /* Note that by zero-extending the source operand, we automatically handle zero-extending the result. */ if (ot == MO_64) { tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); } else { tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); } - gen_helper_pext(cpu_regs[reg], s->T0, s->T1); + gen_helper_pext(cpu_regs[reg], s->T1, s->T0); break; case 0x1f6: /* adcx Gy, Ey */ diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h index b18aba20ed..cef5d848bd 100644 --- a/target/i386/whp-dispatch.h +++ b/target/i386/whp-dispatch.h @@ -30,6 +30,14 @@ */ #define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \ X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \ + X(HRESULT, WHvRequestInterrupt, (WHV_PARTITION_HANDLE Partition, \ + WHV_INTERRUPT_CONTROL* Interrupt, UINT32 InterruptControlSize)) \ + X(HRESULT, WHvGetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize, UINT32* WrittenSize)) \ + X(HRESULT, WHvSetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize)) \ #define LIST_WINHVEMULATION_FUNCTIONS(X) \ X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \ @@ -37,7 +45,6 @@ X(HRESULT, WHvEmulatorTryIoEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_X64_IO_PORT_ACCESS_CONTEXT* IoInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ X(HRESULT, WHvEmulatorTryMmioEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_MEMORY_ACCESS_CONTEXT* MmioInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ - #define WHP_DEFINE_TYPE(return_type, function_name, signature) \ typedef return_type (WINAPI *function_name ## _t) signature; diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c index f4f3e33eac..3b824fc9d7 100644 --- a/target/i386/whpx-all.c +++ b/target/i386/whpx-all.c @@ -19,10 +19,15 @@ #include "sysemu/runstate.h" #include "qemu/main-loop.h" #include "hw/boards.h" +#include "hw/i386/ioapic.h" +#include "hw/i386/apic_internal.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "qapi/qapi-types-common.h" +#include "qapi/qapi-visit-common.h" #include "migration/blocker.h" #include "whp-dispatch.h" +#include <winerror.h> #include "whpx-cpus.h" @@ -31,11 +36,6 @@ #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL) -struct whpx_state { - uint64_t mem_quota; - WHV_PARTITION_HANDLE partition; -}; - static const WHV_REGISTER_NAME whpx_register_names[] = { /* X64 General purpose registers */ @@ -152,6 +152,7 @@ struct whpx_vcpu { WHV_EMULATOR_HANDLE emulator; bool window_registered; bool interruptable; + bool ready_for_pic_interrupt; uint64_t tpr; uint64_t apic_base; bool interruption_pending; @@ -163,7 +164,7 @@ struct whpx_vcpu { static bool whpx_allowed; static bool whp_dispatch_initialized; static HMODULE hWinHvPlatform, hWinHvEmulation; - +static uint32_t max_vcpu_index; struct whpx_state whpx_global; struct WHPDispatch whp_dispatch; @@ -599,6 +600,10 @@ static void whpx_get_registers(CPUState *cpu) assert(idx == RTL_NUMBER_OF(whpx_register_names)); + if (whpx_apic_in_platform()) { + whpx_apic_get(x86_cpu->apic_state); + } + return; } @@ -820,26 +825,42 @@ static void whpx_vcpu_pre_run(CPUState *cpu) } /* Get pending hard interruption or replay one that was overwritten */ - if (!vcpu->interruption_pending && - vcpu->interruptable && (env->eflags & IF_MASK)) { - assert(!new_int.InterruptionPending); - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - new_int.InterruptionType = WHvX64PendingInterrupt; - new_int.InterruptionPending = 1; - new_int.InterruptionVector = irq; + if (!whpx_apic_in_platform()) { + if (!vcpu->interruption_pending && + vcpu->interruptable && (env->eflags & IF_MASK)) { + assert(!new_int.InterruptionPending); + if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + new_int.InterruptionType = WHvX64PendingInterrupt; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = irq; + } } } - } - /* Setup interrupt state if new one was prepared */ - if (new_int.InterruptionPending) { - reg_values[reg_count].PendingInterruption = new_int; - reg_names[reg_count] = WHvRegisterPendingInterruption; - reg_count += 1; - } + /* Setup interrupt state if new one was prepared */ + if (new_int.InterruptionPending) { + reg_values[reg_count].PendingInterruption = new_int; + reg_names[reg_count] = WHvRegisterPendingInterruption; + reg_count += 1; + } + } else if (vcpu->ready_for_pic_interrupt && + (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + reg_names[reg_count] = WHvRegisterPendingEvent; + reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT) + { + .EventPending = 1, + .EventType = WHvX64PendingEventExtInt, + .Vector = irq, + }; + reg_count += 1; + } + } /* Sync the TPR to the CR8 if was modified during the intercept */ tpr = cpu_get_apic_tpr(x86_cpu->apic_state); @@ -854,14 +875,17 @@ static void whpx_vcpu_pre_run(CPUState *cpu) /* Update the state of the interrupt delivery notification */ if (!vcpu->window_registered && cpu->interrupt_request & CPU_INTERRUPT_HARD) { - reg_values[reg_count].DeliverabilityNotifications.InterruptNotification - = 1; + reg_values[reg_count].DeliverabilityNotifications = + (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { + .InterruptNotification = 1 + }; vcpu->window_registered = 1; reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; reg_count += 1; } qemu_mutex_unlock_iothread(); + vcpu->ready_for_pic_interrupt = false; if (reg_count) { hr = whp_dispatch.WHvSetVirtualProcessorRegisters( @@ -948,7 +972,7 @@ static int whpx_vcpu_run(CPUState *cpu) int ret; whpx_vcpu_process_async_events(cpu); - if (cpu->halted) { + if (cpu->halted && !whpx_apic_in_platform()) { cpu->exception_index = EXCP_HLT; qatomic_set(&cpu->exit_request, false); return 0; @@ -992,14 +1016,114 @@ static int whpx_vcpu_run(CPUState *cpu) break; case WHvRunVpExitReasonX64InterruptWindow: + vcpu->ready_for_pic_interrupt = 1; vcpu->window_registered = 0; ret = 0; break; + case WHvRunVpExitReasonX64ApicEoi: + assert(whpx_apic_in_platform()); + ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector); + break; + case WHvRunVpExitReasonX64Halt: ret = whpx_handle_halt(cpu); break; + case WHvRunVpExitReasonX64ApicInitSipiTrap: { + WHV_INTERRUPT_CONTROL ipi = {0}; + uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr; + uint32_t delivery_mode = + (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT; + int dest_shorthand = + (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; + bool broadcast = false; + bool include_self = false; + uint32_t i; + + /* We only registered for INIT and SIPI exits. */ + if ((delivery_mode != APIC_DM_INIT) && + (delivery_mode != APIC_DM_SIPI)) { + error_report( + "WHPX: Unexpected APIC exit that is not a INIT or SIPI"); + break; + } + + if (delivery_mode == APIC_DM_INIT) { + ipi.Type = WHvX64InterruptTypeInit; + } else { + ipi.Type = WHvX64InterruptTypeSipi; + } + + ipi.DestinationMode = + ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical; + + ipi.TriggerMode = + ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ? + WHvX64InterruptTriggerModeLevel : + WHvX64InterruptTriggerModeEdge; + + ipi.Vector = icr & APIC_VECTOR_MASK; + switch (dest_shorthand) { + /* no shorthand. Bits 56-63 contain the destination. */ + case 0: + ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report("WHPX: Failed to request interrupt hr=%08lx", + hr); + } + + break; + + /* self */ + case 1: + include_self = true; + break; + + /* broadcast, including self */ + case 2: + broadcast = true; + include_self = true; + break; + + /* broadcast, excluding self */ + case 3: + broadcast = true; + break; + } + + if (!broadcast && !include_self) { + break; + } + + for (i = 0; i <= max_vcpu_index; i++) { + if (i == cpu->cpu_index && !include_self) { + continue; + } + + /* + * Assuming that APIC Ids are identity mapped since + * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers + * are not handled yet and the hypervisor doesn't allow the + * guest to modify the APIC ID. + */ + ipi.Destination = i; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report( + "WHPX: Failed to request SIPI for %d, hr=%08lx", + i, hr); + } + } + + break; + } + case WHvRunVpExitReasonCanceled: cpu->exception_index = EXCP_INTERRUPT; ret = 1; @@ -1314,6 +1438,7 @@ int whpx_init_vcpu(CPUState *cpu) vcpu->interruptable = true; cpu->vcpu_dirty = true; cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; + max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr); return 0; @@ -1549,6 +1674,43 @@ error: return false; } +static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + struct whpx_state *whpx = &whpx_global; + OnOffSplit mode; + + if (!visit_type_OnOffSplit(v, name, &mode, errp)) { + return; + } + + switch (mode) { + case ON_OFF_SPLIT_ON: + whpx->kernel_irqchip_allowed = true; + whpx->kernel_irqchip_required = true; + break; + + case ON_OFF_SPLIT_OFF: + whpx->kernel_irqchip_allowed = false; + whpx->kernel_irqchip_required = false; + break; + + case ON_OFF_SPLIT_SPLIT: + error_setg(errp, "WHPX: split irqchip currently not supported"); + error_append_hint(errp, + "Try without kernel-irqchip or with kernel-irqchip=on|off"); + break; + + default: + /* + * The value was checked in visit_type_OnOffSplit() above. If + * we get here, then something is wrong in QEMU. + */ + abort(); + } +} + /* * Partition support */ @@ -1562,6 +1724,7 @@ static int whpx_accel_init(MachineState *ms) UINT32 whpx_cap_size; WHV_PARTITION_PROPERTY prop; UINT32 cpuidExitList[] = {1, 0x80000001}; + WHV_CAPABILITY_FEATURES features = {0}; whpx = &whpx_global; @@ -1570,7 +1733,6 @@ static int whpx_accel_init(MachineState *ms) goto error; } - memset(whpx, 0, sizeof(struct whpx_state)); whpx->mem_quota = ms->ram_size; hr = whp_dispatch.WHvGetCapability( @@ -1582,6 +1744,14 @@ static int whpx_accel_init(MachineState *ms) goto error; } + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeFeatures, &features, sizeof(features), NULL); + if (FAILED(hr)) { + error_report("WHPX: Failed to query capabilities, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + hr = whp_dispatch.WHvCreatePartition(&whpx->partition); if (FAILED(hr)) { error_report("WHPX: Failed to create partition, hr=%08lx", hr); @@ -1604,18 +1774,55 @@ static int whpx_accel_init(MachineState *ms) goto error; } + /* + * Error out if WHP doesn't support apic emulation and user is requiring + * it. + */ + if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation || + !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) { + error_report("WHPX: kernel irqchip requested, but unavailable. " + "Try without kernel-irqchip or with kernel-irqchip=off"); + ret = -EINVAL; + goto error; + } + + if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation && + whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) { + WHV_X64_LOCAL_APIC_EMULATION_MODE mode = + WHvX64LocalApicEmulationModeXApic; + printf("WHPX: setting APIC emulation mode in the hypervisor\n"); + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &mode, + sizeof(mode)); + if (FAILED(hr)) { + error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr); + if (whpx->kernel_irqchip_required) { + error_report("WHPX: kernel irqchip requested, but unavailable"); + ret = -EINVAL; + goto error; + } + } else { + whpx->apic_in_platform = true; + } + } + + /* Register for MSR and CPUID exits */ memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); prop.ExtendedVmExits.X64MsrExit = 1; prop.ExtendedVmExits.X64CpuidExit = 1; - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeExtendedVmExits, - &prop, - sizeof(WHV_PARTITION_PROPERTY)); + if (whpx_apic_in_platform()) { + prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1; + } + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeExtendedVmExits, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); if (FAILED(hr)) { - error_report("WHPX: Failed to enable partition extended X64MsrExit and" - " X64CpuidExit hr=%08lx", hr); + error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr); ret = -EINVAL; goto error; } @@ -1668,11 +1875,27 @@ static void whpx_accel_class_init(ObjectClass *oc, void *data) ac->name = "WHPX"; ac->init_machine = whpx_accel_init; ac->allowed = &whpx_allowed; + + object_class_property_add(oc, "kernel-irqchip", "on|off|split", + NULL, whpx_set_kernel_irqchip, + NULL, NULL); + object_class_property_set_description(oc, "kernel-irqchip", + "Configure WHPX in-kernel irqchip"); +} + +static void whpx_accel_instance_init(Object *obj) +{ + struct whpx_state *whpx = &whpx_global; + + memset(whpx, 0, sizeof(struct whpx_state)); + /* Turn on kernel-irqchip, by default */ + whpx->kernel_irqchip_allowed = true; } static const TypeInfo whpx_accel_type = { .name = ACCEL_CLASS_NAME("whpx"), .parent = TYPE_ACCEL, + .instance_init = whpx_accel_instance_init, .class_init = whpx_accel_class_init, }; diff --git a/target/i386/whpx-apic.c b/target/i386/whpx-apic.c new file mode 100644 index 0000000000..b127a3cb8a --- /dev/null +++ b/target/i386/whpx-apic.c @@ -0,0 +1,274 @@ +/* + * WHPX platform APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * John Starks <jostarks@microsoft.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/pci/msi.h" +#include "sysemu/hw_accel.h" +#include "sysemu/whpx.h" +#include "whp-dispatch.h" + +static void whpx_put_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i; + + memset(kapic, 0, sizeof(*kapic)); + kapic->fields[0x2].data = s->id << 24; + kapic->fields[0x3].data = s->version | ((APIC_LVT_NB - 1) << 16); + kapic->fields[0x8].data = s->tpr; + kapic->fields[0xd].data = s->log_dest << 24; + kapic->fields[0xe].data = s->dest_mode << 28 | 0x0fffffff; + kapic->fields[0xf].data = s->spurious_vec; + for (i = 0; i < 8; i++) { + kapic->fields[0x10 + i].data = s->isr[i]; + kapic->fields[0x18 + i].data = s->tmr[i]; + kapic->fields[0x20 + i].data = s->irr[i]; + } + + kapic->fields[0x28].data = s->esr; + kapic->fields[0x30].data = s->icr[0]; + kapic->fields[0x31].data = s->icr[1]; + for (i = 0; i < APIC_LVT_NB; i++) { + kapic->fields[0x32 + i].data = s->lvt[i]; + } + + kapic->fields[0x38].data = s->initial_count; + kapic->fields[0x3e].data = s->divide_conf; +} + +static void whpx_get_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i, v; + + s->id = kapic->fields[0x2].data >> 24; + s->tpr = kapic->fields[0x8].data; + s->arb_id = kapic->fields[0x9].data; + s->log_dest = kapic->fields[0xd].data >> 24; + s->dest_mode = kapic->fields[0xe].data >> 28; + s->spurious_vec = kapic->fields[0xf].data; + for (i = 0; i < 8; i++) { + s->isr[i] = kapic->fields[0x10 + i].data; + s->tmr[i] = kapic->fields[0x18 + i].data; + s->irr[i] = kapic->fields[0x20 + i].data; + } + + s->esr = kapic->fields[0x28].data; + s->icr[0] = kapic->fields[0x30].data; + s->icr[1] = kapic->fields[0x31].data; + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kapic->fields[0x32 + i].data; + } + + s->initial_count = kapic->fields[0x38].data; + s->divide_conf = kapic->fields[0x3e].data; + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + apic_next_timer(s, s->initial_count_load_time); +} + +static void whpx_apic_set_base(APICCommonState *s, uint64_t val) +{ + s->apicbase = val; +} + +static void whpx_put_apic_base(CPUState *cpu, uint64_t val) +{ + HRESULT hr; + WHV_REGISTER_VALUE reg_value = {.Reg64 = val}; + WHV_REGISTER_NAME reg_name = WHvX64RegisterApicBase; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx_global.partition, + cpu->cpu_index, + ®_name, 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set MSR APIC base, hr=%08lx", hr); + } +} + +static void whpx_apic_set_tpr(APICCommonState *s, uint8_t val) +{ + s->tpr = val; +} + +static uint8_t whpx_apic_get_tpr(APICCommonState *s) +{ + return s->tpr; +} + +static void whpx_apic_vapic_base_update(APICCommonState *s) +{ + /* not implemented yet */ +} + +static void whpx_apic_put(CPUState *cs, run_on_cpu_data data) +{ + APICCommonState *s = data.host_ptr; + struct whpx_lapic_state kapic; + HRESULT hr; + + whpx_put_apic_base(CPU(s->cpu), s->apicbase); + whpx_put_apic_state(s, &kapic); + + hr = whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cs->cpu_index, + &kapic, + sizeof(kapic)); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } +} + +void whpx_apic_get(DeviceState *dev) +{ + APICCommonState *s = APIC_COMMON(dev); + CPUState *cpu = CPU(s->cpu); + struct whpx_lapic_state kapic; + + HRESULT hr = whp_dispatch.WHvGetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cpu->cpu_index, + &kapic, + sizeof(kapic), + NULL); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } + + whpx_get_apic_state(s, &kapic); +} + +static void whpx_apic_post_load(APICCommonState *s) +{ + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_external_nmi(APICCommonState *s) +{ +} + +static void whpx_send_msi(MSIMessage *msg) +{ + uint64_t addr = msg->address; + uint32_t data = msg->data; + uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; + uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; + + WHV_INTERRUPT_CONTROL interrupt = { + /* Values correspond to delivery modes */ + .Type = delivery, + .DestinationMode = dest_mode ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical, + + .TriggerMode = trigger_mode ? + WHvX64InterruptTriggerModeLevel : WHvX64InterruptTriggerModeEdge, + .Reserved = 0, + .Vector = vector, + .Destination = dest, + }; + HRESULT hr = whp_dispatch.WHvRequestInterrupt(whpx_global.partition, + &interrupt, sizeof(interrupt)); + if (FAILED(hr)) { + fprintf(stderr, "whpx: injection failed, MSI (%llx, %x) delivery: %d, " + "dest_mode: %d, trigger mode: %d, vector: %d, lost (%08lx)\n", + addr, data, delivery, dest_mode, trigger_mode, vector, hr); + } +} + +static uint64_t whpx_apic_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void whpx_apic_mem_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + whpx_send_msi(&msg); +} + +static const MemoryRegionOps whpx_apic_io_ops = { + .read = whpx_apic_mem_read, + .write = whpx_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void whpx_apic_reset(APICCommonState *s) +{ + /* Not used by WHPX. */ + s->wait_for_sipi = 0; + + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_realize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + memory_region_init_io(&s->io_memory, OBJECT(s), &whpx_apic_io_ops, s, + "whpx-apic-msi", APIC_SPACE_SIZE); + + msi_nonbroken = true; +} + +static void whpx_apic_class_init(ObjectClass *klass, void *data) +{ + APICCommonClass *k = APIC_COMMON_CLASS(klass); + + k->realize = whpx_apic_realize; + k->reset = whpx_apic_reset; + k->set_base = whpx_apic_set_base; + k->set_tpr = whpx_apic_set_tpr; + k->get_tpr = whpx_apic_get_tpr; + k->post_load = whpx_apic_post_load; + k->vapic_base_update = whpx_apic_vapic_base_update; + k->external_nmi = whpx_apic_external_nmi; + k->send_msi = whpx_send_msi; +} + +static const TypeInfo whpx_apic_info = { + .name = "whpx-apic", + .parent = TYPE_APIC_COMMON, + .instance_size = sizeof(APICCommonState), + .class_init = whpx_apic_class_init, +}; + +static void whpx_apic_register_types(void) +{ + type_register_static(&whpx_apic_info); +} + +type_init(whpx_apic_register_types) |