From 869840d26c929b99694e31b1a18e83bdea6e97ca Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 May 2022 13:59:45 +0200 Subject: i386: Hyper-V Enlightened MSR bitmap feature The newly introduced enlightenment allow L0 (KVM) and L1 (Hyper-V) hypervisors to collaborate to avoid unnecessary updates to L2 MSR-Bitmap upon vmexits. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220525115949.1294004-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- docs/hyperv.txt | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'docs') diff --git a/docs/hyperv.txt b/docs/hyperv.txt index 33588a0396..5d85569b99 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -239,6 +239,15 @@ This enlightenment requires a VMBus device (-device vmbus-bridge,irq=15) and the follow enlightenments to work: hv-relaxed,hv_time,hv-vapic,hv-vpindex,hv-synic,hv-runtime,hv-stimer +3.22. hv-emsr-bitmap +===================== +The enlightenment is nested specific, it targets Hyper-V on KVM guests. When +enabled, it allows L0 (KVM) and L1 (Hyper-V) hypervisors to collaborate to +avoid unnecessary updates to L2 MSR-Bitmap upon vmexits. While the protocol is +supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires +Enlightened VMCS ('hv-evmcs') feature to also be enabled. + +Recommended: hv-evmcs (Intel) 4. Supplementary features ========================= -- cgit v1.2.3-55-g7522 From 9411e8b6faeb1d88d4441c63c5ec072a01b2914e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 May 2022 13:59:46 +0200 Subject: i386: Hyper-V XMM fast hypercall input feature Hyper-V specification allows to pass parameters for certain hypercalls using XMM registers ("XMM Fast Hypercall Input"). When the feature is in use, it allows for faster hypercalls processing as KVM can avoid reading guest's memory. KVM supports the feature since v5.14. Rename HV_HYPERCALL_{PARAMS_XMM_AVAILABLE -> XMM_INPUT_AVAILABLE} to comply with KVM. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220525115949.1294004-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- docs/hyperv.txt | 6 ++++++ target/i386/cpu.c | 2 ++ target/i386/cpu.h | 1 + target/i386/kvm/hyperv-proto.h | 2 +- target/i386/kvm/kvm.c | 7 +++++++ 5 files changed, 17 insertions(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/hyperv.txt b/docs/hyperv.txt index 5d85569b99..af1b10c0b3 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -249,6 +249,12 @@ Enlightened VMCS ('hv-evmcs') feature to also be enabled. Recommended: hv-evmcs (Intel) +3.23. hv-xmm-input +=================== +Hyper-V specification allows to pass parameters for certain hypercalls using XMM +registers ("XMM Fast Hypercall Input"). When the feature is in use, it allows +for faster hypercalls processing as KVM can avoid reading guest's memory. + 4. Supplementary features ========================= diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 474e9b582e..63cec0ea68 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6970,6 +6970,8 @@ static Property x86_cpu_properties[] = { HYPERV_FEAT_AVIC, 0), DEFINE_PROP_BIT64("hv-emsr-bitmap", X86CPU, hyperv_features, HYPERV_FEAT_MSR_BITMAP, 0), + DEFINE_PROP_BIT64("hv-xmm-input", X86CPU, hyperv_features, + HYPERV_FEAT_XMM_INPUT, 0), DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU, hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF), DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c788285736..37e9553584 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1107,6 +1107,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define HYPERV_FEAT_AVIC 15 #define HYPERV_FEAT_SYNDBG 16 #define HYPERV_FEAT_MSR_BITMAP 17 +#define HYPERV_FEAT_XMM_INPUT 18 #ifndef HYPERV_SPINLOCK_NEVER_NOTIFY #define HYPERV_SPINLOCK_NEVER_NOTIFY 0xFFFFFFFF diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h index cea18dbc0e..f5f16474fa 100644 --- a/target/i386/kvm/hyperv-proto.h +++ b/target/i386/kvm/hyperv-proto.h @@ -54,7 +54,7 @@ #define HV_GUEST_DEBUGGING_AVAILABLE (1u << 1) #define HV_PERF_MONITOR_AVAILABLE (1u << 2) #define HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1u << 3) -#define HV_HYPERCALL_PARAMS_XMM_AVAILABLE (1u << 4) +#define HV_HYPERCALL_XMM_INPUT_AVAILABLE (1u << 4) #define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5) #define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8) #define HV_GUEST_CRASH_MSR_AVAILABLE (1u << 10) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index f389bbedf2..7e6f934eda 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -980,6 +980,13 @@ static struct { .bits = HV_NESTED_MSR_BITMAP} } }, + [HYPERV_FEAT_XMM_INPUT] = { + .desc = "XMM fast hypercall input (hv-xmm-input)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_HYPERCALL_XMM_INPUT_AVAILABLE} + } + }, }; static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max, -- cgit v1.2.3-55-g7522 From aa6bb5fad58d049c6ea97448d4caba4499d60634 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 May 2022 13:59:47 +0200 Subject: i386: Hyper-V Support extended GVA ranges for TLB flush hypercalls KVM kind of supported "extended GVA ranges" (up to 4095 additional GFNs per hypercall) since the implementation of Hyper-V PV TLB flush feature (Linux-4.18) as regardless of the request, full TLB flush was always performed. "Extended GVA ranges for TLB flush hypercalls" feature bit wasn't exposed then. Now, as KVM gains support for fine-grained TLB flush handling, exposing this feature starts making sense. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220525115949.1294004-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- docs/hyperv.txt | 7 +++++++ target/i386/cpu.c | 2 ++ target/i386/cpu.h | 1 + target/i386/kvm/hyperv-proto.h | 1 + target/i386/kvm/kvm.c | 8 ++++++++ 5 files changed, 19 insertions(+) (limited to 'docs') diff --git a/docs/hyperv.txt b/docs/hyperv.txt index af1b10c0b3..4b132b1c94 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -255,6 +255,13 @@ Hyper-V specification allows to pass parameters for certain hypercalls using XMM registers ("XMM Fast Hypercall Input"). When the feature is in use, it allows for faster hypercalls processing as KVM can avoid reading guest's memory. +3.24. hv-tlbflush-ext +===================== +Allow for extended GVA ranges to be passed to Hyper-V TLB flush hypercalls +(HvFlushVirtualAddressList/HvFlushVirtualAddressListEx). + +Requires: hv-tlbflush + 4. Supplementary features ========================= diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 63cec0ea68..3429a4e455 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6972,6 +6972,8 @@ static Property x86_cpu_properties[] = { HYPERV_FEAT_MSR_BITMAP, 0), DEFINE_PROP_BIT64("hv-xmm-input", X86CPU, hyperv_features, HYPERV_FEAT_XMM_INPUT, 0), + DEFINE_PROP_BIT64("hv-tlbflush-ext", X86CPU, hyperv_features, + HYPERV_FEAT_TLBFLUSH_EXT, 0), DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU, hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF), DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 37e9553584..5ff48257e5 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1108,6 +1108,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define HYPERV_FEAT_SYNDBG 16 #define HYPERV_FEAT_MSR_BITMAP 17 #define HYPERV_FEAT_XMM_INPUT 18 +#define HYPERV_FEAT_TLBFLUSH_EXT 19 #ifndef HYPERV_SPINLOCK_NEVER_NOTIFY #define HYPERV_SPINLOCK_NEVER_NOTIFY 0xFFFFFFFF diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h index f5f16474fa..c7854ed6d3 100644 --- a/target/i386/kvm/hyperv-proto.h +++ b/target/i386/kvm/hyperv-proto.h @@ -59,6 +59,7 @@ #define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8) #define HV_GUEST_CRASH_MSR_AVAILABLE (1u << 10) #define HV_FEATURE_DEBUG_MSRS_AVAILABLE (1u << 11) +#define HV_EXT_GVA_RANGES_FLUSH_AVAILABLE (1u << 14) #define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19) /* diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 7e6f934eda..a11c8e88f6 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -987,6 +987,14 @@ static struct { .bits = HV_HYPERCALL_XMM_INPUT_AVAILABLE} } }, + [HYPERV_FEAT_TLBFLUSH_EXT] = { + .desc = "Extended gva ranges for TLB flush hypercalls (hv-tlbflush-ext)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_EXT_GVA_RANGES_FLUSH_AVAILABLE} + }, + .dependencies = BIT(HYPERV_FEAT_TLBFLUSH) + }, }; static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max, -- cgit v1.2.3-55-g7522 From 3aae0854b26aff303202c6f9542445f58b2539fe Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 May 2022 13:59:48 +0200 Subject: i386: Hyper-V Direct TLB flush hypercall Hyper-V TLFS allows for L0 and L1 hypervisors to collaborate on L2's TLB flush hypercalls handling. With the correct setup, L2's TLB flush hypercalls can be handled by L0 directly, without the need to exit to L1. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220525115949.1294004-6-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- docs/hyperv.txt | 11 +++++++++++ target/i386/cpu.c | 2 ++ target/i386/cpu.h | 1 + target/i386/kvm/hyperv-proto.h | 1 + target/i386/kvm/kvm.c | 8 ++++++++ 5 files changed, 23 insertions(+) (limited to 'docs') diff --git a/docs/hyperv.txt b/docs/hyperv.txt index 4b132b1c94..14a7f449ea 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -262,6 +262,17 @@ Allow for extended GVA ranges to be passed to Hyper-V TLB flush hypercalls Requires: hv-tlbflush +3.25. hv-tlbflush-direct +========================= +The enlightenment is nested specific, it targets Hyper-V on KVM guests. When +enabled, it allows L0 (KVM) to directly handle TLB flush hypercalls from L2 +guest without the need to exit to L1 (Hyper-V) hypervisor. While the feature is +supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires +Enlightened VMCS ('hv-evmcs') feature to also be enabled. + +Requires: hv-vapic +Recommended: hv-evmcs (Intel) + 4. Supplementary features ========================= diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3429a4e455..bb6a5dd498 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6974,6 +6974,8 @@ static Property x86_cpu_properties[] = { HYPERV_FEAT_XMM_INPUT, 0), DEFINE_PROP_BIT64("hv-tlbflush-ext", X86CPU, hyperv_features, HYPERV_FEAT_TLBFLUSH_EXT, 0), + DEFINE_PROP_BIT64("hv-tlbflush-direct", X86CPU, hyperv_features, + HYPERV_FEAT_TLBFLUSH_DIRECT, 0), DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU, hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF), DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 5ff48257e5..82004b65b9 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1109,6 +1109,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define HYPERV_FEAT_MSR_BITMAP 17 #define HYPERV_FEAT_XMM_INPUT 18 #define HYPERV_FEAT_TLBFLUSH_EXT 19 +#define HYPERV_FEAT_TLBFLUSH_DIRECT 20 #ifndef HYPERV_SPINLOCK_NEVER_NOTIFY #define HYPERV_SPINLOCK_NEVER_NOTIFY 0xFFFFFFFF diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h index c7854ed6d3..464fbf09e3 100644 --- a/target/i386/kvm/hyperv-proto.h +++ b/target/i386/kvm/hyperv-proto.h @@ -90,6 +90,7 @@ /* * HV_CPUID_NESTED_FEATURES.EAX bits */ +#define HV_NESTED_DIRECT_FLUSH (1u << 17) #define HV_NESTED_MSR_BITMAP (1u << 19) /* diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a11c8e88f6..f148a6d52f 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -995,6 +995,14 @@ static struct { }, .dependencies = BIT(HYPERV_FEAT_TLBFLUSH) }, + [HYPERV_FEAT_TLBFLUSH_DIRECT] = { + .desc = "direct TLB flush (hv-tlbflush-direct)", + .flags = { + {.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX, + .bits = HV_NESTED_DIRECT_FLUSH} + }, + .dependencies = BIT(HYPERV_FEAT_VAPIC) + }, }; static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max, -- cgit v1.2.3-55-g7522 From 9ad6634ec956bcf3558059aae8c6b2b5ee985307 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 May 2022 13:59:49 +0200 Subject: i386: docs: Convert hyperv.txt to rST rSTify docs/hyperv.txt and link it from docs/system/target-i386.rst. Signed-off-by: Vitaly Kuznetsov Message-Id: <20220525115949.1294004-7-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- docs/hyperv.txt | 303 -------------------------------------------- docs/system/i386/hyperv.rst | 288 +++++++++++++++++++++++++++++++++++++++++ docs/system/target-i386.rst | 1 + 3 files changed, 289 insertions(+), 303 deletions(-) delete mode 100644 docs/hyperv.txt create mode 100644 docs/system/i386/hyperv.rst (limited to 'docs') diff --git a/docs/hyperv.txt b/docs/hyperv.txt deleted file mode 100644 index 14a7f449ea..0000000000 --- a/docs/hyperv.txt +++ /dev/null @@ -1,303 +0,0 @@ -Hyper-V Enlightenments -====================== - - -1. Description -=============== -In some cases when implementing a hardware interface in software is slow, KVM -implements its own paravirtualized interfaces. This works well for Linux as -guest support for such features is added simultaneously with the feature itself. -It may, however, be hard-to-impossible to add support for these interfaces to -proprietary OSes, namely, Microsoft Windows. - -KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features -make Windows and Hyper-V guests think they're running on top of a Hyper-V -compatible hypervisor and use Hyper-V specific features. - - -2. Setup -========= -No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In -QEMU, individual enlightenments can be enabled through CPU flags, e.g: - - qemu-system-x86_64 --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ... - -Sometimes there are dependencies between enlightenments, QEMU is supposed to -check that the supplied configuration is sane. - -When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor -identification (CPUID 0x40000000..0x4000000A) to Hyper-V. KVM identification -and features are kept in leaves 0x40000100..0x40000101. - - -3. Existing enlightenments -=========================== - -3.1. hv-relaxed -================ -This feature tells guest OS to disable watchdog timeouts as it is running on a -hypervisor. It is known that some Windows versions will do this even when they -see 'hypervisor' CPU flag. - -3.2. hv-vapic -============== -Provides so-called VP Assist page MSR to guest allowing it to work with APIC -more efficiently. In particular, this enlightenment allows paravirtualized -(exit-less) EOI processing. - -3.3. hv-spinlocks=xxx -====================== -Enables paravirtualized spinlocks. The parameter indicates how many times -spinlock acquisition should be attempted before indicating the situation to the -hypervisor. A special value 0xffffffff indicates "never notify". - -3.4. hv-vpindex -================ -Provides HV_X64_MSR_VP_INDEX (0x40000002) MSR to the guest which has Virtual -processor index information. This enlightenment makes sense in conjunction with -hv-synic, hv-stimer and other enlightenments which require the guest to know its -Virtual Processor indices (e.g. when VP index needs to be passed in a -hypercall). - -3.5. hv-runtime -================ -Provides HV_X64_MSR_VP_RUNTIME (0x40000010) MSR to the guest. The MSR keeps the -virtual processor run time in 100ns units. This gives guest operating system an -idea of how much time was 'stolen' from it (when the virtual CPU was preempted -to perform some other work). - -3.6. hv-crash -============== -Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x40000100..0x40000105) and -HV_X64_MSR_CRASH_CTL (0x40000105) MSRs to the guest. These MSRs are written to -by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs -contain additional crash information. This information is outputted in QEMU log -and through QAPI. -Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest -to shutdown. This effectively blocks crash dump generation by Windows. - -3.7. hv-time -============= -Enables two Hyper-V-specific clocksources available to the guest: MSR-based -Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x40000020) and Reference TSC -page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x40000021). Both clocksources -are per-guest, Reference TSC page clocksource allows for exit-less time stamp -readings. Using this enlightenment leads to significant speedup of all timestamp -related operations. - -3.8. hv-synic -============== -Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC. -When enabled, this enlightenment provides additional communication facilities -to the guest: SynIC messages and Events. This is a pre-requisite for -implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment -is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs -HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x40000080..0x40000084) and -HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x40000090..0x4000009F) - -Requires: hv-vpindex - -3.9. hv-stimer -=============== -Enables Hyper-V synthetic timers. There are four synthetic timers per virtual -CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STIMER3_COUNT -(0x400000B0..0x400000B7) MSRs. These timers can work either in single-shot or -periodic mode. It is known that certain Windows versions revert to using HPET -(or even RTC when HPET is unavailable) extensively when this enlightenment is -not provided; this can lead to significant CPU consumption, even when virtual -CPU is idle. - -Requires: hv-vpindex, hv-synic, hv-time - -3.10. hv-tlbflush -================== -Enables paravirtualized TLB shoot-down mechanism. On x86 architecture, remote -TLB flush procedure requires sending IPIs and waiting for other CPUs to perform -local TLB flush. In virtualized environment some virtual CPUs may not even be -scheduled at the time of the call and may not require flushing (or, flushing -may be postponed until the virtual CPU is scheduled). hv-tlbflush enlightenment -implements TLB shoot-down through hypervisor enabling the optimization. - -Requires: hv-vpindex - -3.11. hv-ipi -============= -Enables paravirtualized IPI send mechanism. HvCallSendSyntheticClusterIpi -hypercall may target more than 64 virtual CPUs simultaneously, doing the same -through APIC requires more than one access (and thus exit to the hypervisor). - -Requires: hv-vpindex - -3.12. hv-vendor-id=xxx -======================= -This changes Hyper-V identification in CPUID 0x40000000.EBX-EDX from the default -"Microsoft Hv". The parameter should be no longer than 12 characters. According -to the specification, guests shouldn't use this information and it is unknown -if there is a Windows version which acts differently. -Note: hv-vendor-id is not an enlightenment and thus doesn't enable Hyper-V -identification when specified without some other enlightenment. - -3.13. hv-reset -=============== -Provides HV_X64_MSR_RESET (0x40000003) MSR to the guest allowing it to reset -itself by writing to it. Even when this MSR is enabled, it is not a recommended -way for Windows to perform system reboot and thus it may not be used. - -3.14. hv-frequencies -============================================ -Provides HV_X64_MSR_TSC_FREQUENCY (0x40000022) and HV_X64_MSR_APIC_FREQUENCY -(0x40000023) allowing the guest to get its TSC/APIC frequencies without doing -measurements. - -3.15 hv-reenlightenment -======================== -The enlightenment is nested specific, it targets Hyper-V on KVM guests. When -enabled, it provides HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106), -HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107)and HV_X64_MSR_TSC_EMULATION_STATUS -(0x40000108) MSRs allowing the guest to get notified when TSC frequency changes -(only happens on migration) and keep using old frequency (through emulation in -the hypervisor) until it is ready to switch to the new one. This, in conjunction -with hv-frequencies, allows Hyper-V on KVM to pass stable clocksource (Reference -TSC page) to its own guests. - -Note, KVM doesn't fully support re-enlightenment notifications and doesn't -emulate TSC accesses after migration so 'tsc-frequency=' CPU option also has to -be specified to make migration succeed. The destination host has to either have -the same TSC frequency or support TSC scaling CPU feature. - -Recommended: hv-frequencies - -3.16. hv-evmcs -=============== -The enlightenment is nested specific, it targets Hyper-V on KVM guests. When -enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature -implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V) -hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only. -Note: some virtualization features (e.g. Posted Interrupts) are disabled when -hv-evmcs is enabled. It may make sense to measure your nested workload with and -without the feature to find out if enabling it is beneficial. - -Requires: hv-vapic - -3.17. hv-stimer-direct -======================= -Hyper-V specification allows synthetic timer operation in two modes: "classic", -when expiration event is delivered as SynIC message and "direct", when the event -is delivered via normal interrupt. It is known that nested Hyper-V can only -use synthetic timers in direct mode and thus 'hv-stimer-direct' needs to be -enabled. - -Requires: hv-vpindex, hv-synic, hv-time, hv-stimer - -3.18. hv-avic (hv-apicv) -======================= -The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled. -Normally, Hyper-V SynIC disables these hardware feature and suggests the guest -to use paravirtualized AutoEOI feature. -Note: enabling this feature on old hardware (without APICv/AVIC support) may -have negative effect on guest's performance. - -3.19. hv-no-nonarch-coresharing=on/off/auto -=========================================== -This enlightenment tells guest OS that virtual processors will never share a -physical core unless they are reported as sibling SMT threads. This information -is required by Windows and Hyper-V guests to properly mitigate SMT related CPU -vulnerabilities. -When the option is set to 'auto' QEMU will enable the feature only when KVM -reports that non-architectural coresharing is impossible, this means that -hyper-threading is not supported or completely disabled on the host. This -setting also prevents migration as SMT settings on the destination may differ. -When the option is set to 'on' QEMU will always enable the feature, regardless -of host setup. To keep guests secure, this can only be used in conjunction with -exposing correct vCPU topology and vCPU pinning. - -3.20. hv-version-id-{build,major,minor,spack,sbranch,snumber} -============================================================= -This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the -default (WS2016). -- hv-version-id-build sets 'Build Number' (32 bits) -- hv-version-id-major sets 'Major Version' (16 bits) -- hv-version-id-minor sets 'Minor Version' (16 bits) -- hv-version-id-spack sets 'Service Pack' (32 bits) -- hv-version-id-sbranch sets 'Service Branch' (8 bits) -- hv-version-id-snumber sets 'Service Number' (24 bits) - -Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V -identification when specified without any other enlightenments. - -3.21. hv-syndbg -=============== -Enables Hyper-V synthetic debugger interface, this is a special interface used -by Windows Kernel debugger to send the packets through, rather than sending -them via serial/network . -When enabled, this enlightenment provides additional communication facilities -to the guest: SynDbg messages. -This new communication is used by Windows Kernel debugger rather than sending -packets via serial/network, adding significant performance boost over the other -comm channels. -This enlightenment requires a VMBus device (-device vmbus-bridge,irq=15) -and the follow enlightenments to work: -hv-relaxed,hv_time,hv-vapic,hv-vpindex,hv-synic,hv-runtime,hv-stimer - -3.22. hv-emsr-bitmap -===================== -The enlightenment is nested specific, it targets Hyper-V on KVM guests. When -enabled, it allows L0 (KVM) and L1 (Hyper-V) hypervisors to collaborate to -avoid unnecessary updates to L2 MSR-Bitmap upon vmexits. While the protocol is -supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires -Enlightened VMCS ('hv-evmcs') feature to also be enabled. - -Recommended: hv-evmcs (Intel) - -3.23. hv-xmm-input -=================== -Hyper-V specification allows to pass parameters for certain hypercalls using XMM -registers ("XMM Fast Hypercall Input"). When the feature is in use, it allows -for faster hypercalls processing as KVM can avoid reading guest's memory. - -3.24. hv-tlbflush-ext -===================== -Allow for extended GVA ranges to be passed to Hyper-V TLB flush hypercalls -(HvFlushVirtualAddressList/HvFlushVirtualAddressListEx). - -Requires: hv-tlbflush - -3.25. hv-tlbflush-direct -========================= -The enlightenment is nested specific, it targets Hyper-V on KVM guests. When -enabled, it allows L0 (KVM) to directly handle TLB flush hypercalls from L2 -guest without the need to exit to L1 (Hyper-V) hypervisor. While the feature is -supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires -Enlightened VMCS ('hv-evmcs') feature to also be enabled. - -Requires: hv-vapic -Recommended: hv-evmcs (Intel) - -4. Supplementary features -========================= - -4.1. hv-passthrough -=================== -In some cases (e.g. during development) it may make sense to use QEMU in -'pass-through' mode and give Windows guests all enlightenments currently -supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU -flag. -Note: "hv-passthrough" flag only enables enlightenments which are known to QEMU -(have corresponding "hv-*" flag) and copies "hv-spinlocks="/"hv-vendor-id=" -values from KVM to QEMU. "hv-passthrough" overrides all other "hv-*" settings on -the command line. Also, enabling this flag effectively prevents migration as the -list of enabled enlightenments may differ between target and destination hosts. - -4.2. hv-enforce-cpuid -===================== -By default, KVM allows the guest to use all currently supported Hyper-V -enlightenments when Hyper-V CPUID interface was exposed, regardless of if -some features were not announced in guest visible CPUIDs. 'hv-enforce-cpuid' -feature alters this behavior and only allows the guest to use exposed Hyper-V -enlightenments. - - -5. Useful links -================ -Hyper-V Top Level Functional specification and other information: -https://github.com/MicrosoftDocs/Virtualization-Documentation diff --git a/docs/system/i386/hyperv.rst b/docs/system/i386/hyperv.rst new file mode 100644 index 0000000000..2505dc4c86 --- /dev/null +++ b/docs/system/i386/hyperv.rst @@ -0,0 +1,288 @@ +Hyper-V Enlightenments +====================== + + +Description +----------- + +In some cases when implementing a hardware interface in software is slow, KVM +implements its own paravirtualized interfaces. This works well for Linux as +guest support for such features is added simultaneously with the feature itself. +It may, however, be hard-to-impossible to add support for these interfaces to +proprietary OSes, namely, Microsoft Windows. + +KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features +make Windows and Hyper-V guests think they're running on top of a Hyper-V +compatible hypervisor and use Hyper-V specific features. + + +Setup +----- + +No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In +QEMU, individual enlightenments can be enabled through CPU flags, e.g: + +.. parsed-literal:: + + |qemu_system| --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ... + +Sometimes there are dependencies between enlightenments, QEMU is supposed to +check that the supplied configuration is sane. + +When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor +identification (CPUID 0x40000000..0x4000000A) to Hyper-V. KVM identification +and features are kept in leaves 0x40000100..0x40000101. + + +Existing enlightenments +----------------------- + +``hv-relaxed`` + This feature tells guest OS to disable watchdog timeouts as it is running on a + hypervisor. It is known that some Windows versions will do this even when they + see 'hypervisor' CPU flag. + +``hv-vapic`` + Provides so-called VP Assist page MSR to guest allowing it to work with APIC + more efficiently. In particular, this enlightenment allows paravirtualized + (exit-less) EOI processing. + +``hv-spinlocks`` = xxx + Enables paravirtualized spinlocks. The parameter indicates how many times + spinlock acquisition should be attempted before indicating the situation to the + hypervisor. A special value 0xffffffff indicates "never notify". + +``hv-vpindex`` + Provides HV_X64_MSR_VP_INDEX (0x40000002) MSR to the guest which has Virtual + processor index information. This enlightenment makes sense in conjunction with + hv-synic, hv-stimer and other enlightenments which require the guest to know its + Virtual Processor indices (e.g. when VP index needs to be passed in a + hypercall). + +``hv-runtime`` + Provides HV_X64_MSR_VP_RUNTIME (0x40000010) MSR to the guest. The MSR keeps the + virtual processor run time in 100ns units. This gives guest operating system an + idea of how much time was 'stolen' from it (when the virtual CPU was preempted + to perform some other work). + +``hv-crash`` + Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x40000100..0x40000105) and + HV_X64_MSR_CRASH_CTL (0x40000105) MSRs to the guest. These MSRs are written to + by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs + contain additional crash information. This information is outputted in QEMU log + and through QAPI. + Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest + to shutdown. This effectively blocks crash dump generation by Windows. + +``hv-time`` + Enables two Hyper-V-specific clocksources available to the guest: MSR-based + Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x40000020) and Reference TSC + page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x40000021). Both clocksources + are per-guest, Reference TSC page clocksource allows for exit-less time stamp + readings. Using this enlightenment leads to significant speedup of all timestamp + related operations. + +``hv-synic`` + Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC. + When enabled, this enlightenment provides additional communication facilities + to the guest: SynIC messages and Events. This is a pre-requisite for + implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment + is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs + HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x40000080..0x40000084) and + HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x40000090..0x4000009F) + + Requires: ``hv-vpindex`` + +``hv-stimer`` + Enables Hyper-V synthetic timers. There are four synthetic timers per virtual + CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STIMER3_COUNT + (0x400000B0..0x400000B7) MSRs. These timers can work either in single-shot or + periodic mode. It is known that certain Windows versions revert to using HPET + (or even RTC when HPET is unavailable) extensively when this enlightenment is + not provided; this can lead to significant CPU consumption, even when virtual + CPU is idle. + + Requires: ``hv-vpindex``, ``hv-synic``, ``hv-time`` + +``hv-tlbflush`` + Enables paravirtualized TLB shoot-down mechanism. On x86 architecture, remote + TLB flush procedure requires sending IPIs and waiting for other CPUs to perform + local TLB flush. In virtualized environment some virtual CPUs may not even be + scheduled at the time of the call and may not require flushing (or, flushing + may be postponed until the virtual CPU is scheduled). hv-tlbflush enlightenment + implements TLB shoot-down through hypervisor enabling the optimization. + + Requires: ``hv-vpindex`` + +``hv-ipi`` + Enables paravirtualized IPI send mechanism. HvCallSendSyntheticClusterIpi + hypercall may target more than 64 virtual CPUs simultaneously, doing the same + through APIC requires more than one access (and thus exit to the hypervisor). + + Requires: ``hv-vpindex`` + +``hv-vendor-id`` = xxx + This changes Hyper-V identification in CPUID 0x40000000.EBX-EDX from the default + "Microsoft Hv". The parameter should be no longer than 12 characters. According + to the specification, guests shouldn't use this information and it is unknown + if there is a Windows version which acts differently. + Note: hv-vendor-id is not an enlightenment and thus doesn't enable Hyper-V + identification when specified without some other enlightenment. + +``hv-reset`` + Provides HV_X64_MSR_RESET (0x40000003) MSR to the guest allowing it to reset + itself by writing to it. Even when this MSR is enabled, it is not a recommended + way for Windows to perform system reboot and thus it may not be used. + +``hv-frequencies`` + Provides HV_X64_MSR_TSC_FREQUENCY (0x40000022) and HV_X64_MSR_APIC_FREQUENCY + (0x40000023) allowing the guest to get its TSC/APIC frequencies without doing + measurements. + +``hv-reenlightenment`` + The enlightenment is nested specific, it targets Hyper-V on KVM guests. When + enabled, it provides HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106), + HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107)and HV_X64_MSR_TSC_EMULATION_STATUS + (0x40000108) MSRs allowing the guest to get notified when TSC frequency changes + (only happens on migration) and keep using old frequency (through emulation in + the hypervisor) until it is ready to switch to the new one. This, in conjunction + with ``hv-frequencies``, allows Hyper-V on KVM to pass stable clocksource + (Reference TSC page) to its own guests. + + Note, KVM doesn't fully support re-enlightenment notifications and doesn't + emulate TSC accesses after migration so 'tsc-frequency=' CPU option also has to + be specified to make migration succeed. The destination host has to either have + the same TSC frequency or support TSC scaling CPU feature. + + Recommended: ``hv-frequencies`` + +``hv-evmcs`` + The enlightenment is nested specific, it targets Hyper-V on KVM guests. When + enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature + implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V) + hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only. + + Note: some virtualization features (e.g. Posted Interrupts) are disabled when + hv-evmcs is enabled. It may make sense to measure your nested workload with and + without the feature to find out if enabling it is beneficial. + + Requires: ``hv-vapic`` + +``hv-stimer-direct`` + Hyper-V specification allows synthetic timer operation in two modes: "classic", + when expiration event is delivered as SynIC message and "direct", when the event + is delivered via normal interrupt. It is known that nested Hyper-V can only + use synthetic timers in direct mode and thus ``hv-stimer-direct`` needs to be + enabled. + + Requires: ``hv-vpindex``, ``hv-synic``, ``hv-time``, ``hv-stimer`` + +``hv-avic`` (``hv-apicv``) + The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled. + Normally, Hyper-V SynIC disables these hardware feature and suggests the guest + to use paravirtualized AutoEOI feature. + Note: enabling this feature on old hardware (without APICv/AVIC support) may + have negative effect on guest's performance. + +``hv-no-nonarch-coresharing`` = on/off/auto + This enlightenment tells guest OS that virtual processors will never share a + physical core unless they are reported as sibling SMT threads. This information + is required by Windows and Hyper-V guests to properly mitigate SMT related CPU + vulnerabilities. + + When the option is set to 'auto' QEMU will enable the feature only when KVM + reports that non-architectural coresharing is impossible, this means that + hyper-threading is not supported or completely disabled on the host. This + setting also prevents migration as SMT settings on the destination may differ. + When the option is set to 'on' QEMU will always enable the feature, regardless + of host setup. To keep guests secure, this can only be used in conjunction with + exposing correct vCPU topology and vCPU pinning. + +``hv-version-id-build``, ``hv-version-id-major``, ``hv-version-id-minor``, ``hv-version-id-spack``, ``hv-version-id-sbranch``, ``hv-version-id-snumber`` + This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the + default (WS2016). + + - ``hv-version-id-build`` sets 'Build Number' (32 bits) + - ``hv-version-id-major`` sets 'Major Version' (16 bits) + - ``hv-version-id-minor`` sets 'Minor Version' (16 bits) + - ``hv-version-id-spack`` sets 'Service Pack' (32 bits) + - ``hv-version-id-sbranch`` sets 'Service Branch' (8 bits) + - ``hv-version-id-snumber`` sets 'Service Number' (24 bits) + + Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V + identification when specified without any other enlightenments. + +``hv-syndbg`` + Enables Hyper-V synthetic debugger interface, this is a special interface used + by Windows Kernel debugger to send the packets through, rather than sending + them via serial/network . + When enabled, this enlightenment provides additional communication facilities + to the guest: SynDbg messages. + This new communication is used by Windows Kernel debugger rather than sending + packets via serial/network, adding significant performance boost over the other + comm channels. + This enlightenment requires a VMBus device (-device vmbus-bridge,irq=15). + + Requires: ``hv-relaxed``, ``hv_time``, ``hv-vapic``, ``hv-vpindex``, ``hv-synic``, ``hv-runtime``, ``hv-stimer`` + +``hv-emsr-bitmap`` + The enlightenment is nested specific, it targets Hyper-V on KVM guests. When + enabled, it allows L0 (KVM) and L1 (Hyper-V) hypervisors to collaborate to + avoid unnecessary updates to L2 MSR-Bitmap upon vmexits. While the protocol is + supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires + Enlightened VMCS (``hv-evmcs``) feature to also be enabled. + + Recommended: ``hv-evmcs`` (Intel) + +``hv-xmm-input`` + Hyper-V specification allows to pass parameters for certain hypercalls using XMM + registers ("XMM Fast Hypercall Input"). When the feature is in use, it allows + for faster hypercalls processing as KVM can avoid reading guest's memory. + +``hv-tlbflush-ext`` + Allow for extended GVA ranges to be passed to Hyper-V TLB flush hypercalls + (HvFlushVirtualAddressList/HvFlushVirtualAddressListEx). + + Requires: ``hv-tlbflush`` + +``hv-tlbflush-direct`` + The enlightenment is nested specific, it targets Hyper-V on KVM guests. When + enabled, it allows L0 (KVM) to directly handle TLB flush hypercalls from L2 + guest without the need to exit to L1 (Hyper-V) hypervisor. While the feature is + supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires + Enlightened VMCS (``hv-evmcs``) feature to also be enabled. + + Requires: ``hv-vapic`` + + Recommended: ``hv-evmcs`` (Intel) + +Supplementary features +---------------------- + +``hv-passthrough`` + In some cases (e.g. during development) it may make sense to use QEMU in + 'pass-through' mode and give Windows guests all enlightenments currently + supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU + flag. + + Note: ``hv-passthrough`` flag only enables enlightenments which are known to QEMU + (have corresponding 'hv-' flag) and copies ``hv-spinlocks`` and ``hv-vendor-id`` + values from KVM to QEMU. ``hv-passthrough`` overrides all other 'hv-' settings on + the command line. Also, enabling this flag effectively prevents migration as the + list of enabled enlightenments may differ between target and destination hosts. + +``hv-enforce-cpuid`` + By default, KVM allows the guest to use all currently supported Hyper-V + enlightenments when Hyper-V CPUID interface was exposed, regardless of if + some features were not announced in guest visible CPUIDs. ``hv-enforce-cpuid`` + feature alters this behavior and only allows the guest to use exposed Hyper-V + enlightenments. + + +Useful links +------------ +Hyper-V Top Level Functional specification and other information: + +- https://github.com/MicrosoftDocs/Virtualization-Documentation +- https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/tlfs + diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst index 96bf54889a..e64c013077 100644 --- a/docs/system/target-i386.rst +++ b/docs/system/target-i386.rst @@ -26,6 +26,7 @@ Architectural features :maxdepth: 1 i386/cpu + i386/hyperv i386/kvm-pv i386/sgx i386/amd-memory-encryption -- cgit v1.2.3-55-g7522