From 0f062bfe36b63cd24f16afe2822d0df7c27904d9 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 28 Feb 2019 18:33:00 +0000 Subject: KVM: arm/arm64: Add hook for arch-specific KVM initialisation This patch adds a kvm_arm_init_arch_resources() hook to perform subarch-specific initialisation when starting up KVM. This will be used in a subsequent patch for global SVE-related setup on arm64. No functional change. Signed-off-by: Dave Martin Reviewed-by: Julien Thierry Tested-by: zhang.lei Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 99c37384ba7b..c69e1370a5dc 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1664,6 +1664,10 @@ int kvm_arch_init(void *opaque) if (err) return err; + err = kvm_arm_init_arch_resources(); + if (err) + return err; + if (!in_hyp_mode) { err = init_hyp_mode(); if (err) -- cgit v1.2.3-55-g7522 From 7dd32a0d0103a5941efbb971f85a3e930cc5665e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 19 Dec 2018 14:27:01 +0000 Subject: KVM: arm/arm64: Add KVM_ARM_VCPU_FINALIZE ioctl Some aspects of vcpu configuration may be too complex to be completed inside KVM_ARM_VCPU_INIT. Thus, there may be a requirement for userspace to do some additional configuration before various other ioctls will work in a consistent way. In particular this will be the case for SVE, where userspace will need to negotiate the set of vector lengths to be made available to the guest before the vcpu becomes fully usable. In order to provide an explicit way for userspace to confirm that it has finished setting up a particular vcpu feature, this patch adds a new ioctl KVM_ARM_VCPU_FINALIZE. When userspace has opted into a feature that requires finalization, typically by means of a feature flag passed to KVM_ARM_VCPU_INIT, a matching call to KVM_ARM_VCPU_FINALIZE is now required before KVM_RUN or KVM_GET_REG_LIST is allowed. Individual features may impose additional restrictions where appropriate. No existing vcpu features are affected by this, so current userspace implementations will continue to work exactly as before, with no need to issue KVM_ARM_VCPU_FINALIZE. As implemented in this patch, KVM_ARM_VCPU_FINALIZE is currently a placeholder: no finalizable features exist yet, so ioctl is not required and will always yield EINVAL. Subsequent patches will add the finalization logic to make use of this ioctl for SVE. No functional change for existing userspace. Signed-off-by: Dave Martin Reviewed-by: Julien Thierry Tested-by: zhang.lei Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 4 ++++ arch/arm64/include/asm/kvm_host.h | 4 ++++ include/uapi/linux/kvm.h | 3 +++ virt/kvm/arm/arm.c | 18 ++++++++++++++++++ 4 files changed, 29 insertions(+) (limited to 'virt/kvm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index a49ee01242ff..e80cfc18412b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,7 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include #include #include #include @@ -411,4 +412,7 @@ static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) return 0; } +#define kvm_arm_vcpu_finalize(vcpu, what) (-EINVAL) +#define kvm_arm_vcpu_is_finalized(vcpu) true + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3e8950942591..98658f7dad68 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -23,6 +23,7 @@ #define __ARM64_KVM_HOST_H__ #include +#include #include #include #include @@ -625,4 +626,7 @@ void kvm_arch_free_vm(struct kvm *kvm); int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); +#define kvm_arm_vcpu_finalize(vcpu, what) (-EINVAL) +#define kvm_arm_vcpu_is_finalized(vcpu) true + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dc77a5a3648d..c3b8e7a31315 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1441,6 +1441,9 @@ struct kvm_enc_region { /* Available with KVM_CAP_HYPERV_CPUID */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) +/* Available with KVM_CAP_ARM_SVE */ +#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c69e1370a5dc..9edbf0f676e7 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -545,6 +545,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) if (likely(vcpu->arch.has_run_once)) return 0; + if (!kvm_arm_vcpu_is_finalized(vcpu)) + return -EPERM; + vcpu->arch.has_run_once = true; if (likely(irqchip_in_kernel(kvm))) { @@ -1116,6 +1119,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (unlikely(!kvm_vcpu_initialized(vcpu))) break; + r = -EPERM; + if (!kvm_arm_vcpu_is_finalized(vcpu)) + break; + r = -EFAULT; if (copy_from_user(®_list, user_list, sizeof(reg_list))) break; @@ -1169,6 +1176,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return kvm_arm_vcpu_set_events(vcpu, &events); } + case KVM_ARM_VCPU_FINALIZE: { + int what; + + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + + if (get_user(what, (const int __user *)argp)) + return -EFAULT; + + return kvm_arm_vcpu_finalize(vcpu, what); + } default: r = -EINVAL; } -- cgit v1.2.3-55-g7522 From c110ae578ca0a10064dfbda3d786d6a733b9fe69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Mar 2019 17:24:03 +0100 Subject: kvm: move KVM_CAP_NR_MEMSLOTS to common code All architectures except MIPS were defining it in the same way, and memory slots are handled entirely by common code so there is no point in keeping the definition per-architecture. Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 5 ++--- arch/powerpc/kvm/powerpc.c | 3 --- arch/s390/kvm/kvm-s390.c | 3 --- arch/x86/kvm/x86.c | 3 --- virt/kvm/arm/arm.c | 3 --- virt/kvm/kvm_main.c | 2 ++ 6 files changed, 4 insertions(+), 15 deletions(-) (limited to 'virt/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 67068c47c591..b62ad0d94234 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1117,9 +1117,8 @@ struct kvm_userspace_memory_region { This ioctl allows the user to create, modify or delete a guest physical memory slot. Bits 0-15 of "slot" specify the slot id and this value should be less than the maximum number of user memory slots supported per -VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, -if this capability is supported by the architecture. Slots may not -overlap in guest physical address space. +VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS. +Slots may not overlap in guest physical address space. If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" specifies the address space which is being modified. They must be diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8885377ec3e0..92910b7c5bcc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -644,9 +644,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = num_online_cpus(); break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4638303ba6a8..28f35d2b06cb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -513,9 +513,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else if (sclp.has_esca && sclp.has_64bscao) r = KVM_S390_ESCA_CPU_SLOTS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 38440316a806..6c27d224f744 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3093,9 +3093,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; break; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 99c37384ba7b..be4ec5f3ba5f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -224,9 +224,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MSI_DEVID: if (!kvm) r = -EINVAL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dc8edc97ba85..684b67252cd5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3063,6 +3063,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif case KVM_CAP_MAX_VCPU_ID: return KVM_MAX_VCPU_ID; + case KVM_CAP_NR_MEMSLOTS: + return KVM_USER_MEM_SLOTS; default: break; } -- cgit v1.2.3-55-g7522 From a3be836df7cb777fa8ecbfd662224bfe0394f771 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 12 Apr 2019 15:30:58 +0100 Subject: KVM: arm/arm64: Demote kvm_arm_init_arch_resources() to just set up SVE The introduction of kvm_arm_init_arch_resources() looks like premature factoring, since nothing else uses this hook yet and it is not clear what will use it in the future. For now, let's not pretend that this is a general thing: This patch simply renames the function to kvm_arm_init_sve(), retaining the arm stub version under the new name. Suggested-by: Andrew Jones Signed-off-by: Dave Martin Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/reset.c | 2 +- virt/kvm/arm/arm.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'virt/kvm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e80cfc18412b..d95627393353 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -54,7 +54,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); -static inline int kvm_arm_init_arch_resources(void) { return 0; } +static inline int kvm_arm_init_sve(void) { return 0; } u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9d57cf8be879..6adf08ba9277 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -59,7 +59,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; -int kvm_arm_init_arch_resources(void); +int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f13378d0a0ad..8847f389f56d 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -110,7 +110,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) unsigned int kvm_sve_max_vl; -int kvm_arm_init_arch_resources(void) +int kvm_arm_init_sve(void) { if (system_supports_sve()) { kvm_sve_max_vl = sve_max_virtualisable_vl; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9edbf0f676e7..7039c99cc217 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1682,7 +1682,7 @@ int kvm_arch_init(void *opaque) if (err) return err; - err = kvm_arm_init_arch_resources(); + err = kvm_arm_init_sve(); if (err) return err; -- cgit v1.2.3-55-g7522 From 384b40caa8afae44a54e8f69bd37097c0279fdce Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 23 Apr 2019 10:12:35 +0530 Subject: KVM: arm/arm64: Context-switch ptrauth registers When pointer authentication is supported, a guest may wish to use it. This patch adds the necessary KVM infrastructure for this to work, with a semi-lazy context switch of the pointer auth state. Pointer authentication feature is only enabled when VHE is built in the kernel and present in the CPU implementation so only VHE code paths are modified. When we schedule a vcpu, we disable guest usage of pointer authentication instructions and accesses to the keys. While these are disabled, we avoid context-switching the keys. When we trap the guest trying to use pointer authentication functionality, we change to eagerly context-switching the keys, and enable the feature. The next time the vcpu is scheduled out/in, we start again. However the host key save is optimized and implemented inside ptrauth instruction/register access trap. Pointer authentication consists of address authentication and generic authentication, and CPUs in a system might have varied support for either. Where support for either feature is not uniform, it is hidden from guests via ID register emulation, as a result of the cpufeature framework in the host. Unfortunately, address authentication and generic authentication cannot be trapped separately, as the architecture provides a single EL2 trap covering both. If we wish to expose one without the other, we cannot prevent a (badly-written) guest from intermittently using a feature which is not uniformly supported (when scheduled on a physical CPU which supports the relevant feature). Hence, this patch expects both type of authentication to be present in a cpu. This switch of key is done from guest enter/exit assembly as preparation for the upcoming in-kernel pointer authentication support. Hence, these key switching routines are not implemented in C code as they may cause pointer authentication key signing error in some situations. Signed-off-by: Mark Rutland [Only VHE, key switch in full assembly, vcpu_has_ptrauth checks , save host key in ptrauth exception trap] Signed-off-by: Amit Daniel Kachhap Reviewed-by: Julien Thierry Cc: Christoffer Dall Cc: kvmarm@lists.cs.columbia.edu [maz: various fixups] Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_emulate.h | 2 + arch/arm64/Kconfig | 6 +- arch/arm64/include/asm/kvm_emulate.h | 16 +++++ arch/arm64/include/asm/kvm_host.h | 14 +++++ arch/arm64/include/asm/kvm_ptrauth.h | 111 +++++++++++++++++++++++++++++++++++ arch/arm64/kernel/asm-offsets.c | 6 ++ arch/arm64/kvm/handle_exit.c | 36 +++++++++--- arch/arm64/kvm/hyp/entry.S | 15 +++++ arch/arm64/kvm/sys_regs.c | 50 +++++++++++++--- virt/kvm/arm/arm.c | 2 + 10 files changed, 240 insertions(+), 18 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_ptrauth.h (limited to 'virt/kvm') diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 8927cae7c966..efb0e2c0d84c 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -343,4 +343,6 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, } } +static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7e34b9eba5de..39470784a50c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1288,6 +1288,7 @@ menu "ARMv8.3 architectural features" config ARM64_PTR_AUTH bool "Enable support for pointer authentication" default y + depends on !KVM || ARM64_VHE help Pointer authentication (part of the ARMv8.3 Extensions) provides instructions for signing and authenticating pointers against secret @@ -1301,8 +1302,9 @@ config ARM64_PTR_AUTH context-switched along with the process. The feature is detected at runtime. If the feature is not present in - hardware it will not be advertised to userspace nor will it be - enabled. + hardware it will not be advertised to userspace/KVM guest nor will it + be enabled. However, KVM guest also require VHE mode and hence + CONFIG_ARM64_VHE=y option to use this feature. endmenu diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d3842791e1c4..613427fafff9 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -98,6 +98,22 @@ static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWE; } +static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); +} + +static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); +} + +static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) +{ + if (vcpu_has_ptrauth(vcpu)) + vcpu_ptrauth_disable(vcpu); +} + static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7ccac42a91a6..7eebea7059c6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -161,6 +161,18 @@ enum vcpu_sysreg { PMSWINC_EL0, /* Software Increment Register */ PMUSERENR_EL0, /* User Enable Register */ + /* Pointer Authentication Registers in a strict increasing order. */ + APIAKEYLO_EL1, + APIAKEYHI_EL1, + APIBKEYLO_EL1, + APIBKEYHI_EL1, + APDAKEYLO_EL1, + APDAKEYHI_EL1, + APDBKEYLO_EL1, + APDBKEYHI_EL1, + APGAKEYLO_EL1, + APGAKEYHI_EL1, + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -530,6 +542,8 @@ static inline bool kvm_arch_requires_vhe(void) return false; } +void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); + static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h new file mode 100644 index 000000000000..6301813dcace --- /dev/null +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* arch/arm64/include/asm/kvm_ptrauth.h: Guest/host ptrauth save/restore + * Copyright 2019 Arm Limited + * Authors: Mark Rutland + * Amit Daniel Kachhap + */ + +#ifndef __ASM_KVM_PTRAUTH_H +#define __ASM_KVM_PTRAUTH_H + +#ifdef __ASSEMBLY__ + +#include + +#ifdef CONFIG_ARM64_PTR_AUTH + +#define PTRAUTH_REG_OFFSET(x) (x - CPU_APIAKEYLO_EL1) + +/* + * CPU_AP*_EL1 values exceed immediate offset range (512) for stp + * instruction so below macros takes CPU_APIAKEYLO_EL1 as base and + * calculates the offset of the keys from this base to avoid an extra add + * instruction. These macros assumes the keys offsets follow the order of + * the sysreg enum in kvm_host.h. + */ +.macro ptrauth_save_state base, reg1, reg2 + mrs_s \reg1, SYS_APIAKEYLO_EL1 + mrs_s \reg2, SYS_APIAKEYHI_EL1 + stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)] + mrs_s \reg1, SYS_APIBKEYLO_EL1 + mrs_s \reg2, SYS_APIBKEYHI_EL1 + stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)] + mrs_s \reg1, SYS_APDAKEYLO_EL1 + mrs_s \reg2, SYS_APDAKEYHI_EL1 + stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)] + mrs_s \reg1, SYS_APDBKEYLO_EL1 + mrs_s \reg2, SYS_APDBKEYHI_EL1 + stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)] + mrs_s \reg1, SYS_APGAKEYLO_EL1 + mrs_s \reg2, SYS_APGAKEYHI_EL1 + stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)] +.endm + +.macro ptrauth_restore_state base, reg1, reg2 + ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)] + msr_s SYS_APIAKEYLO_EL1, \reg1 + msr_s SYS_APIAKEYHI_EL1, \reg2 + ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)] + msr_s SYS_APIBKEYLO_EL1, \reg1 + msr_s SYS_APIBKEYHI_EL1, \reg2 + ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)] + msr_s SYS_APDAKEYLO_EL1, \reg1 + msr_s SYS_APDAKEYHI_EL1, \reg2 + ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)] + msr_s SYS_APDBKEYLO_EL1, \reg1 + msr_s SYS_APDBKEYHI_EL1, \reg2 + ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)] + msr_s SYS_APGAKEYLO_EL1, \reg1 + msr_s SYS_APGAKEYHI_EL1, \reg2 +.endm + +/* + * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will + * check for the presence of one of the cpufeature flag + * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and + * then proceed ahead with the save/restore of Pointer Authentication + * key registers. + */ +.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 +alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH + b 1000f +alternative_else_nop_endif +alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF + b 1001f +alternative_else_nop_endif +1000: + ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + and \reg1, \reg1, #(HCR_API | HCR_APK) + cbz \reg1, 1001f + add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 + ptrauth_restore_state \reg1, \reg2, \reg3 +1001: +.endm + +.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 +alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH + b 2000f +alternative_else_nop_endif +alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF + b 2001f +alternative_else_nop_endif +2000: + ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + and \reg1, \reg1, #(HCR_API | HCR_APK) + cbz \reg1, 2001f + add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 + ptrauth_save_state \reg1, \reg2, \reg3 + add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1 + ptrauth_restore_state \reg1, \reg2, \reg3 + isb +2001: +.endm + +#else /* !CONFIG_ARM64_PTR_AUTH */ +.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 +.endm +.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 +.endm +#endif /* CONFIG_ARM64_PTR_AUTH */ +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_KVM_PTRAUTH_H */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7f40dcbdd51d..8178330a9f7a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -125,7 +125,13 @@ int main(void) DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); + DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); + DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); + DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1])); + DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); #endif diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 0b7983442071..516aead3c2a9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -173,20 +173,40 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +#define __ptrauth_save_key(regs, key) \ +({ \ + regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ +}) + +/* + * Handle the guest trying to use a ptrauth instruction, or trying to access a + * ptrauth register. + */ +void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *ctxt; + + if (vcpu_has_ptrauth(vcpu)) { + vcpu_ptrauth_enable(vcpu); + ctxt = vcpu->arch.host_cpu_context; + __ptrauth_save_key(ctxt->sys_regs, APIA); + __ptrauth_save_key(ctxt->sys_regs, APIB); + __ptrauth_save_key(ctxt->sys_regs, APDA); + __ptrauth_save_key(ctxt->sys_regs, APDB); + __ptrauth_save_key(ctxt->sys_regs, APGA); + } else { + kvm_inject_undefined(vcpu); + } +} + /* * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into * a NOP). */ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* - * We don't currently support ptrauth in a guest, and we mask the ID - * registers to prevent well-behaved guests from trying to make use of - * it. - * - * Inject an UNDEF, as if the feature really isn't present. - */ - kvm_inject_undefined(vcpu); + kvm_arm_vcpu_ptrauth_trap(vcpu); return 1; } diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 675fdc186e3b..93ba3d7ef027 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -24,6 +24,7 @@ #include #include #include +#include #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) @@ -64,6 +65,13 @@ ENTRY(__guest_enter) add x18, x0, #VCPU_CONTEXT + // Macro ptrauth_switch_to_guest format: + // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) + // The below macro to restore guest keys is not implemented in C code + // as it may cause Pointer Authentication key signing mismatch errors + // when this feature is enabled for kernel code. + ptrauth_switch_to_guest x18, x0, x1, x2 + // Restore guest regs x0-x17 ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] @@ -118,6 +126,13 @@ ENTRY(__guest_exit) get_host_ctxt x2, x3 + // Macro ptrauth_switch_to_guest format: + // ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3) + // The below macro to save/restore keys is not implemented in C code + // as it may cause Pointer Authentication key signing mismatch errors + // when this feature is enabled for kernel code. + ptrauth_switch_to_host x1, x2, x3, x4, x5 + // Now restore the host regs restore_callee_saved_regs x2 diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7046c7686321..12bd72e42b91 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1007,6 +1007,37 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } +static bool trap_ptrauth(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + kvm_arm_vcpu_ptrauth_trap(vcpu); + + /* + * Return false for both cases as we never skip the trapped + * instruction: + * + * - Either we re-execute the same key register access instruction + * after enabling ptrauth. + * - Or an UNDEF is injected as ptrauth is not supported/enabled. + */ + return false; +} + +static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST; +} + +#define __PTRAUTH_KEY(k) \ + { SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k, \ + .visibility = ptrauth_visibility} + +#define PTRAUTH_KEY(k) \ + __PTRAUTH_KEY(k ## KEYLO_EL1), \ + __PTRAUTH_KEY(k ## KEYHI_EL1) + static bool access_arch_timer(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -1053,14 +1084,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, if (id == SYS_ID_AA64PFR0_EL1 && !vcpu_has_sve(vcpu)) { val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); - } else if (id == SYS_ID_AA64ISAR1_EL1) { - const u64 ptrauth_mask = (0xfUL << ID_AA64ISAR1_APA_SHIFT) | - (0xfUL << ID_AA64ISAR1_API_SHIFT) | - (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | - (0xfUL << ID_AA64ISAR1_GPI_SHIFT); - if (val & ptrauth_mask) - kvm_debug("ptrauth unsupported for guests, suppressing\n"); - val &= ~ptrauth_mask; + } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { + val &= ~(0xfUL << ID_AA64ISAR1_APA_SHIFT) | + (0xfUL << ID_AA64ISAR1_API_SHIFT) | + (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | + (0xfUL << ID_AA64ISAR1_GPI_SHIFT); } return val; @@ -1460,6 +1488,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, + PTRAUTH_KEY(APIA), + PTRAUTH_KEY(APIB), + PTRAUTH_KEY(APDA), + PTRAUTH_KEY(APDB), + PTRAUTH_KEY(APGA), + { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7039c99cc217..156c09da9e2b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -385,6 +385,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_clear_wfe_traps(vcpu); else vcpu_set_wfe_traps(vcpu); + + vcpu_ptrauth_setup_lazy(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -- cgit v1.2.3-55-g7522 From 630a16854d2d28d13e96ff27ab43cc5caa4609fc Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 20:22:11 +0100 Subject: arm64: KVM: Encapsulate kvm_cpu_context in kvm_host_data The virt/arm core allocates a kvm_cpu_context_t percpu, at present this is a typedef to kvm_cpu_context and is used to store host cpu context. The kvm_cpu_context structure is also used elsewhere to hold vcpu context. In order to use the percpu to hold additional future host information we encapsulate kvm_cpu_context in a new structure and rename the typedef and percpu to match. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 10 +++++++--- arch/arm64/include/asm/kvm_asm.h | 3 ++- arch/arm64/include/asm/kvm_host.h | 16 ++++++++++------ arch/arm64/kernel/asm-offsets.c | 1 + virt/kvm/arm/arm.c | 14 ++++++++------ 5 files changed, 28 insertions(+), 16 deletions(-) (limited to 'virt/kvm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index fe7754315e9c..2d721ab05925 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -153,9 +153,13 @@ struct kvm_cpu_context { u32 cp15[NR_CP15_REGS]; }; -typedef struct kvm_cpu_context kvm_cpu_context_t; +struct kvm_host_data { + struct kvm_cpu_context host_ctxt; +}; + +typedef struct kvm_host_data kvm_host_data_t; -static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, +static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, int cpu) { /* The host's MPIDR is immutable, so let's set it up at boot time */ @@ -185,7 +189,7 @@ struct kvm_vcpu_arch { struct kvm_vcpu_fault_info fault; /* Host FP context */ - kvm_cpu_context_t *host_cpu_context; + struct kvm_cpu_context *host_cpu_context; /* VGIC state */ struct vgic_cpu vgic_cpu; diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f5b79e995f40..ff73f5462aca 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -108,7 +108,8 @@ extern u32 __kvm_get_mdcr_el2(void); .endm .macro get_host_ctxt reg, tmp - hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp + hyp_adr_this_cpu \reg, kvm_host_data, \tmp + add \reg, \reg, #HOST_DATA_CONTEXT .endm .macro get_vcpu_ptr vcpu, ctxt diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f772ac2fb3e9..9ba59832b71a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -233,7 +233,11 @@ struct kvm_cpu_context { struct kvm_vcpu *__hyp_running_vcpu; }; -typedef struct kvm_cpu_context kvm_cpu_context_t; +struct kvm_host_data { + struct kvm_cpu_context host_ctxt; +}; + +typedef struct kvm_host_data kvm_host_data_t; struct vcpu_reset_state { unsigned long pc; @@ -278,7 +282,7 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch external_debug_state; /* Pointer to host CPU context */ - kvm_cpu_context_t *host_cpu_context; + struct kvm_cpu_context *host_cpu_context; struct thread_info *host_thread_info; /* hyp VA */ struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ @@ -483,9 +487,9 @@ void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); -static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, +static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, int cpu) { /* The host's MPIDR is immutable, so let's set it up at boot time */ @@ -503,8 +507,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. */ - u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) - - (u64)kvm_ksym_ref(kvm_host_cpu_state)); + u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) - + (u64)kvm_ksym_ref(kvm_host_data)); /* * Call initialization code, and switch to the full blown HYP code. diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 8178330a9f7a..768b23101ff0 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -134,6 +134,7 @@ int main(void) DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); + DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 156c09da9e2b..e960b91551d6 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -56,7 +56,7 @@ __asm__(".arch_extension virt"); #endif -DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); /* Per-CPU variable containing the currently running vcpu. */ @@ -360,8 +360,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { int *last_ran; + kvm_host_data_t *cpu_data; last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + cpu_data = this_cpu_ptr(&kvm_host_data); /* * We might get preempted before the vCPU actually runs, but @@ -373,7 +375,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } vcpu->cpu = cpu; - vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state); + vcpu->arch.host_cpu_context = &cpu_data->host_ctxt; kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); @@ -1569,11 +1571,11 @@ static int init_hyp_mode(void) } for_each_possible_cpu(cpu) { - kvm_cpu_context_t *cpu_ctxt; + kvm_host_data_t *cpu_data; - cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); - kvm_init_host_cpu_context(cpu_ctxt, cpu); - err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); + cpu_data = per_cpu_ptr(&kvm_host_data, cpu); + kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu); + err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP); if (err) { kvm_err("Cannot map host CPU state: %d\n", err); -- cgit v1.2.3-55-g7522 From 435e53fb5e21ad1820c5c69f208304c0e5623d01 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 20:22:15 +0100 Subject: arm64: KVM: Enable VHE support for :G/:H perf event modifiers With VHE different exception levels are used between the host (EL2) and guest (EL1) with a shared exception level for userpace (EL0). We can take advantage of this and use the PMU's exception level filtering to avoid enabling/disabling counters in the world-switch code. Instead we just modify the counter type to include or exclude EL0 at vcpu_{load,put} time. We also ensure that trapped PMU system register writes do not re-enable EL0 when reconfiguring the backing perf events. This approach completely avoids blackout windows seen with !VHE. Suggested-by: Christoffer Dall Signed-off-by: Andrew Murray Acked-by: Will Deacon Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 3 ++ arch/arm64/include/asm/kvm_host.h | 5 ++- arch/arm64/kernel/perf_event.c | 6 ++- arch/arm64/kvm/pmu.c | 88 ++++++++++++++++++++++++++++++++++++++- arch/arm64/kvm/sys_regs.c | 3 ++ virt/kvm/arm/arm.c | 2 + 6 files changed, 103 insertions(+), 4 deletions(-) (limited to 'virt/kvm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 2d721ab05925..075e1921fdd9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -368,6 +368,9 @@ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {} +static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} +static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} + static inline void kvm_arm_vhe_guest_enter(void) {} static inline void kvm_arm_vhe_guest_exit(void) {} diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 645d74c705d6..2a8d3f8ca22c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -580,7 +580,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { - return attr->exclude_host; + return (!has_vhe() && attr->exclude_host); } #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */ @@ -594,6 +594,9 @@ void kvm_clr_pmu_events(u32 clr); void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt); bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt); + +void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); +void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 6bb28aaf5aea..314b1adedf06 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -847,8 +847,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, * with other architectures (x86 and Power). */ if (is_kernel_in_hyp_mode()) { - if (!attr->exclude_kernel) + if (!attr->exclude_kernel && !attr->exclude_host) config_base |= ARMV8_PMU_INCLUDE_EL2; + if (attr->exclude_guest) + config_base |= ARMV8_PMU_EXCLUDE_EL1; + if (attr->exclude_host) + config_base |= ARMV8_PMU_EXCLUDE_EL0; } else { if (!attr->exclude_hv && !attr->exclude_host) config_base |= ARMV8_PMU_INCLUDE_EL2; diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 599e6d3f692e..3f99a095a1ff 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -8,11 +8,19 @@ #include /* - * Given the exclude_{host,guest} attributes, determine if we are going - * to need to switch counters at guest entry/exit. + * Given the perf event attributes and system type, determine + * if we are going to need to switch counters at guest entry/exit. */ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr) { + /** + * With VHE the guest kernel runs at EL1 and the host at EL2, + * where user (EL0) is excluded then we have no reason to switch + * counters. + */ + if (has_vhe() && attr->exclude_user) + return false; + /* Only switch if attributes are different */ return (attr->exclude_host != attr->exclude_guest); } @@ -82,3 +90,79 @@ void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) if (pmu->events_host) write_sysreg(pmu->events_host, pmcntenset_el0); } + +/* + * Modify ARMv8 PMU events to include EL0 counting + */ +static void kvm_vcpu_pmu_enable_el0(unsigned long events) +{ + u64 typer; + u32 counter; + + for_each_set_bit(counter, &events, 32) { + write_sysreg(counter, pmselr_el0); + isb(); + typer = read_sysreg(pmxevtyper_el0) & ~ARMV8_PMU_EXCLUDE_EL0; + write_sysreg(typer, pmxevtyper_el0); + isb(); + } +} + +/* + * Modify ARMv8 PMU events to exclude EL0 counting + */ +static void kvm_vcpu_pmu_disable_el0(unsigned long events) +{ + u64 typer; + u32 counter; + + for_each_set_bit(counter, &events, 32) { + write_sysreg(counter, pmselr_el0); + isb(); + typer = read_sysreg(pmxevtyper_el0) | ARMV8_PMU_EXCLUDE_EL0; + write_sysreg(typer, pmxevtyper_el0); + isb(); + } +} + +/* + * On VHE ensure that only guest events have EL0 counting enabled + */ +void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_host_data *host; + u32 events_guest, events_host; + + if (!has_vhe()) + return; + + host_ctxt = vcpu->arch.host_cpu_context; + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + events_guest = host->pmu_events.events_guest; + events_host = host->pmu_events.events_host; + + kvm_vcpu_pmu_enable_el0(events_guest); + kvm_vcpu_pmu_disable_el0(events_host); +} + +/* + * On VHE ensure that only host events have EL0 counting enabled + */ +void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_host_data *host; + u32 events_guest, events_host; + + if (!has_vhe()) + return; + + host_ctxt = vcpu->arch.host_cpu_context; + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + events_guest = host->pmu_events.events_guest; + events_host = host->pmu_events.events_host; + + kvm_vcpu_pmu_enable_el0(events_host); + kvm_vcpu_pmu_disable_el0(events_guest); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 12bd72e42b91..9d02643bc601 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -695,6 +695,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val |= p->regval & ARMV8_PMU_PMCR_MASK; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); + kvm_vcpu_pmu_restore_guest(vcpu); } else { /* PMCR.P & PMCR.C are RAZ */ val = __vcpu_sys_reg(vcpu, PMCR_EL0) @@ -850,6 +851,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; + kvm_vcpu_pmu_restore_guest(vcpu); } else { p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; } @@ -875,6 +877,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* accessing PMCNTENSET_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; kvm_pmu_enable_counter(vcpu, val); + kvm_vcpu_pmu_restore_guest(vcpu); } else { /* accessing PMCNTENCLR_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index e960b91551d6..8b7ca101f0f7 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -382,6 +382,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_timer_vcpu_load(vcpu); kvm_vcpu_load_sysregs(vcpu); kvm_arch_vcpu_load_fp(vcpu); + kvm_vcpu_pmu_restore_guest(vcpu); if (single_task_running()) vcpu_clear_wfe_traps(vcpu); @@ -397,6 +398,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_vcpu_put_sysregs(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); + kvm_vcpu_pmu_restore_host(vcpu); vcpu->cpu = -1; -- cgit v1.2.3-55-g7522 From cdd6ad3ac63d2fa320baefcf92a02a918375c30f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 5 Mar 2019 05:30:01 -0500 Subject: KVM: polling: add architecture backend to disable polling There are cases where halt polling is unwanted. For example when running KVM on an over committed LPAR we rather want to give back the CPU to neighbour LPARs instead of polling. Let us provide a callback that allows architectures to disable polling. Signed-off-by: Christian Borntraeger Acked-by: Paolo Bonzini Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d55c63db09b..b3aff1a3f633 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1305,6 +1305,16 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ +#ifdef CONFIG_HAVE_KVM_NO_POLL +/* Callback that tells if we must not poll */ +bool kvm_arch_no_poll(struct kvm_vcpu *vcpu); +#else +static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +{ + return false; +} +#endif /* CONFIG_HAVE_KVM_NO_POLL */ + #ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index ea434ddc8499..aad9284c043a 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -57,3 +57,6 @@ config HAVE_KVM_VCPU_ASYNC_IOCTL config HAVE_KVM_VCPU_RUN_PID_CHANGE bool + +config HAVE_KVM_NO_POLL + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 55fe8e20d8fd..23aec2f4ba71 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2253,7 +2253,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) u64 block_ns; start = cur = ktime_get(); - if (vcpu->halt_poll_ns) { + if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); ++vcpu->stat.halt_attempted_poll; -- cgit v1.2.3-55-g7522 From a1cd3f0883f435e5f9ae6530d7e62b361c87a91a Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:36 +0200 Subject: KVM: Introduce a 'mmap' method for KVM devices Some KVM devices will want to handle special mappings related to the underlying HW. For instance, the XIVE interrupt controller of the POWER9 processor has MMIO pages for thread interrupt management and for interrupt source control that need to be exposed to the guest when the OS has the required support. Cc: Paolo Bonzini Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'virt/kvm') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d55c63db09b..831d963451d8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1245,6 +1245,7 @@ struct kvm_device_ops { int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, unsigned long arg); + int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; void kvm_device_get(struct kvm_device *dev); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 55fe8e20d8fd..ea2018ae1cd7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2884,6 +2884,16 @@ out: } #endif +static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct kvm_device *dev = filp->private_data; + + if (dev->ops->mmap) + return dev->ops->mmap(dev, vma); + + return -ENODEV; +} + static int kvm_device_ioctl_attr(struct kvm_device *dev, int (*accessor)(struct kvm_device *dev, struct kvm_device_attr *attr), @@ -2936,6 +2946,7 @@ static const struct file_operations kvm_device_fops = { .unlocked_ioctl = kvm_device_ioctl, .release = kvm_device_release, KVM_COMPAT(kvm_device_ioctl), + .mmap = kvm_device_mmap, }; struct kvm_device *kvm_device_from_filp(struct file *filp) -- cgit v1.2.3-55-g7522 From 2bde9b3ec8bdf60788e9e2ce8c07a2f8d6003dbd Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:41 +0200 Subject: KVM: Introduce a 'release' method for KVM devices When a P9 sPAPR VM boots, the CAS negotiation process determines which interrupt mode to use (XICS legacy or XIVE native) and invokes a machine reset to activate the chosen mode. To be able to switch from one interrupt mode to another, we introduce the capability to release a KVM device without destroying the VM. The KVM device interface is extended with a new 'release' method which is called when the file descriptor of the device is closed. Once 'release' is called, the 'destroy' method will not be called anymore as the device is removed from the device list of the VM. Cc: Paolo Bonzini Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/linux/kvm_host.h | 9 +++++++++ virt/kvm/kvm_main.c | 13 +++++++++++++ 2 files changed, 22 insertions(+) (limited to 'virt/kvm') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 831d963451d8..722692e2f745 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1240,6 +1240,15 @@ struct kvm_device_ops { */ void (*destroy)(struct kvm_device *dev); + /* + * Release is an alternative method to free the device. It is + * called when the device file descriptor is closed. Once + * release is called, the destroy method will not be called + * anymore as the device is removed from the device list of + * the VM. kvm->lock is held. + */ + void (*release)(struct kvm_device *dev); + int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ea2018ae1cd7..ea2619d5ca98 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2938,6 +2938,19 @@ static int kvm_device_release(struct inode *inode, struct file *filp) struct kvm_device *dev = filp->private_data; struct kvm *kvm = dev->kvm; + if (!dev) + return -ENODEV; + + if (dev->kvm != kvm) + return -EPERM; + + if (dev->ops->release) { + mutex_lock(&kvm->lock); + list_del(&dev->vm_node); + dev->ops->release(dev); + mutex_unlock(&kvm->lock); + } + kvm_put_kvm(kvm); return 0; } -- cgit v1.2.3-55-g7522 From 65c4189de8c1d995f6bc2cc96b22206405466b53 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Apr 2019 15:28:44 +0200 Subject: KVM: fix KVM_CLEAR_DIRTY_LOG for memory slots of unaligned size If a memory slot's size is not a multiple of 64 pages (256K), then the KVM_CLEAR_DIRTY_LOG API is unusable: clearing the final 64 pages either requires the requested page range to go beyond memslot->npages, or requires log->num_pages to be unaligned, and kvm_clear_dirty_log_protect requires log->num_pages to be both in range and aligned. To allow this case, allow log->num_pages not to be a multiple of 64 if it ends exactly on the last page of the slot. Reported-by: Peter Xu Fixes: 98938aa8edd6 ("KVM: validate userspace input in kvm_clear_dirty_log_protect()", 2019-01-02) Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 5 +++-- tools/testing/selftests/kvm/dirty_log_test.c | 4 ++-- virt/kvm/kvm_main.c | 7 ++++--- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'virt/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 26dc1280b49b..675cb0bea903 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3812,8 +3812,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap field. Bit 0 of the bitmap corresponds to page "first_page" in the memory slot, and num_pages is the size in bits of the input bitmap. -Both first_page and num_pages must be a multiple of 64. For each bit -that is set in the input bitmap, the corresponding page is marked "clean" +first_page must be a multiple of 64; num_pages must also be a multiple of +64 unless first_page + num_pages is the size of the memory slot. For each +bit that is set in the input bitmap, the corresponding page is marked "clean" in KVM's dirty bitmap, and dirty tracking is re-enabled for that page (for example via write-protection, or by clearing the dirty bit in a page table entry). diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 4715cfba20dc..052fb5856df4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -289,7 +289,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; guest_page_size = (1ul << guest_page_shift); /* 1G of guest page sized pages */ - guest_num_pages = (1ul << (30 - guest_page_shift)); + guest_num_pages = (1ul << (30 - guest_page_shift)) + 3; host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); @@ -359,7 +359,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); #ifdef USE_CLEAR_DIRTY_LOG kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - DIV_ROUND_UP(host_num_pages, 64) * 64); + host_num_pages); #endif vm_dirty_log_verify(bmap); iteration++; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 71ac0de892dc..e9ca417b9ae9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1240,7 +1240,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) return -EINVAL; - if ((log->first_page & 63) || (log->num_pages & 63)) + if (log->first_page & 63) return -EINVAL; slots = __kvm_memslots(kvm, as_id); @@ -1253,8 +1253,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, n = kvm_dirty_bitmap_bytes(memslot); if (log->first_page > memslot->npages || - log->num_pages > memslot->npages - log->first_page) - return -EINVAL; + log->num_pages > memslot->npages - log->first_page || + (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) + return -EINVAL; *flush = false; dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); -- cgit v1.2.3-55-g7522 From b8b002209c061273fd1ef7bb3c3c32301623a282 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Tue, 23 Apr 2019 19:40:30 +0800 Subject: kvm_main: fix some comments is_dirty has been renamed to flush, but the comment for it is outdated. And the description about @flush parameter for kvm_clear_dirty_log_protect() is missing, add it in this patch as well. Signed-off-by: Jiang Biao Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e9ca417b9ae9..3194aa3d0b43 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1134,11 +1134,11 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT /** - * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages + * kvm_get_dirty_log_protect - get a snapshot of dirty pages * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address to which we copy the log - * @is_dirty: flag set if any page is dirty + * @flush: true if TLB flush is needed by caller * * We need to keep it in mind that VCPU threads can write to the bitmap * concurrently. So, to avoid losing track of dirty pages we keep the @@ -1223,6 +1223,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address from which to fetch the bitmap of dirty pages + * @flush: true if TLB flush is needed by caller */ int kvm_clear_dirty_log_protect(struct kvm *kvm, struct kvm_clear_dirty_log *log, bool *flush) -- cgit v1.2.3-55-g7522 From e45adf665a53df0db37f784ed87c6b57ddd81885 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:34 +0100 Subject: KVM: Introduce a new guest mapping API In KVM, specially for nested guests, there is a dominant pattern of: => map guest memory -> do_something -> unmap guest memory In addition to all this unnecessarily noise in the code due to boiler plate code, most of the time the mapping function does not properly handle memory that is not backed by "struct page". This new guest mapping API encapsulate most of this boiler plate code and also handles guest memory that is not backed by "struct page". The current implementation of this API is using memremap for memory that is not backed by a "struct page" which would lead to a huge slow-down if it was used for high-frequency mapping operations. The API does not have any effect on current setups where guest memory is backed by a "struct page". Further patches are going to also introduce a pfn-cache which would significantly improve the performance of the memremap case. Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 28 +++++++++++++++++++++ virt/kvm/kvm_main.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) (limited to 'virt/kvm') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 43f87ad83ff4..6f665c16e31d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -227,6 +227,32 @@ enum { READING_SHADOW_PAGE_TABLES, }; +#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA) + +struct kvm_host_map { + /* + * Only valid if the 'pfn' is managed by the host kernel (i.e. There is + * a 'struct page' for it. When using mem= kernel parameter some memory + * can be used as guest memory but they are not managed by host + * kernel). + * If 'pfn' is not managed by the host kernel, this field is + * initialized to KVM_UNMAPPED_PAGE. + */ + struct page *page; + void *hva; + kvm_pfn_t pfn; + kvm_pfn_t gfn; +}; + +/* + * Used to check if the mapping is valid or not. Never use 'kvm_host_map' + * directly to check for that. + */ +static inline bool kvm_vcpu_mapped(struct kvm_host_map *map) +{ + return !!map->hva; +} + /* * Sometimes a large or cross-page mmio needs to be broken up into separate * exits for userspace servicing. @@ -733,7 +759,9 @@ struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); +int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3194aa3d0b43..53de2f946f9e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1742,6 +1742,70 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_page); +static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, + struct kvm_host_map *map) +{ + kvm_pfn_t pfn; + void *hva = NULL; + struct page *page = KVM_UNMAPPED_PAGE; + + if (!map) + return -EINVAL; + + pfn = gfn_to_pfn_memslot(slot, gfn); + if (is_error_noslot_pfn(pfn)) + return -EINVAL; + + if (pfn_valid(pfn)) { + page = pfn_to_page(pfn); + hva = kmap(page); + } else { + hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); + } + + if (!hva) + return -EFAULT; + + map->page = page; + map->hva = hva; + map->pfn = pfn; + map->gfn = gfn; + + return 0; +} + +int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) +{ + return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_map); + +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + bool dirty) +{ + if (!map) + return; + + if (!map->hva) + return; + + if (map->page) + kunmap(map->page); + else + memunmap(map->hva); + + if (dirty) { + kvm_vcpu_mark_page_dirty(vcpu, map->gfn); + kvm_release_pfn_dirty(map->pfn); + } else { + kvm_release_pfn_clean(map->pfn); + } + + map->hva = NULL; + map->page = NULL; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); + struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) { kvm_pfn_t pfn; -- cgit v1.2.3-55-g7522 From 4ddc9204572c33f2eb91fbdb1d99d8078388b67d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:45 +0800 Subject: KVM: Fix the bitmap range to copy during clear dirty kvm_dirty_bitmap_bytes() will return the size of the dirty bitmap of the memslot rather than the size of bitmap passed over from the ioctl. Here for KVM_CLEAR_DIRTY_LOG we should only copy exactly the size of bitmap that covers kvm_clear_dirty_log.num_pages. Signed-off-by: Peter Xu Cc: stable@vger.kernel.org Fixes: 2a31b9db153530df4aa02dac8c32837bf5f47019 Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 53de2f946f9e..ad39c57de82d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1251,7 +1251,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, if (!dirty_bitmap) return -ENOENT; - n = kvm_dirty_bitmap_bytes(memslot); + n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; if (log->first_page > memslot->npages || log->num_pages > memslot->npages - log->first_page || -- cgit v1.2.3-55-g7522 From 53eac7a8f8cf3d7dc5ecac1946f31442f5eee5f3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:46 +0800 Subject: KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one Just imaging the case where num_pages < BITS_PER_LONG, then the loop will be skipped while it shouldn't. Signed-off-by: Peter Xu Fixes: 2a31b9db153530df4aa02dac8c32837bf5f47019 Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ad39c57de82d..7883e0ad07fe 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1264,8 +1264,8 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, return -EFAULT; spin_lock(&kvm->mmu_lock); - for (offset = log->first_page, - i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--; + for (offset = log->first_page, i = offset / BITS_PER_LONG, + n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; i++, offset += BITS_PER_LONG) { unsigned long mask = *dirty_bitmap_buffer++; atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i]; -- cgit v1.2.3-55-g7522 From d7547c55cbe7471255ca51f14bcd4699f5eaabe5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:47 +0800 Subject: KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 The previous KVM_CAP_MANUAL_DIRTY_LOG_PROTECT has some problem which blocks the correct usage from userspace. Obsolete the old one and introduce a new capability bit for it. Suggested-by: Paolo Bonzini Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 15 ++++++++++----- include/uapi/linux/kvm.h | 5 +++-- tools/testing/selftests/kvm/dirty_log_test.c | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 4 files changed, 17 insertions(+), 11 deletions(-) (limited to 'virt/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 675cb0bea903..47a5eb00bc53 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -330,7 +330,7 @@ They must be less than the value that KVM_CHECK_EXTENSION returns for the KVM_CAP_MULTI_ADDRESS_SPACE capability. The bits in the dirty bitmap are cleared before the ioctl returns, unless -KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information, +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information, see the description of the capability. 4.9 KVM_SET_MEMORY_ALIAS @@ -3791,7 +3791,7 @@ to I/O ports. 4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl) -Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT +Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 Architectures: x86 Type: vm ioctl Parameters: struct kvm_dirty_log (in) @@ -3824,10 +3824,10 @@ the address space for which you want to return the dirty bitmap. They must be less than the value that KVM_CHECK_EXTENSION returns for the KVM_CAP_MULTI_ADDRESS_SPACE capability. -This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT +This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled; for more information, see the description of the capability. However, it can always be used as long as KVM_CHECK_EXTENSION confirms -that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present. +that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present. 4.118 KVM_GET_SUPPORTED_HV_CPUID @@ -4780,7 +4780,7 @@ and injected exceptions. * For the new DR6 bits, note that bit 16 is set iff the #DB exception will clear DR6.RTM. -7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT +7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 Architectures: all Parameters: args[0] whether feature should be enabled or not @@ -4803,6 +4803,11 @@ while userspace can see false reports of dirty pages. Manual reprotection helps reducing this time, improving guest performance and reducing the number of dirty log false positives. +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make +it hard or impossible to use it correctly. The availability of +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed. +Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT. 8. Other capabilities. ---------------------- diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6d4ea4b6c922..d673734c46cb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -986,8 +986,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #define KVM_CAP_EXCEPTION_PAYLOAD 164 #define KVM_CAP_ARM_VM_IPA_SIZE 165 -#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 #ifdef KVM_CAP_IRQ_ROUTING @@ -1434,7 +1435,7 @@ struct kvm_enc_region { #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) -/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ +/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) /* Available with KVM_CAP_HYPERV_CPUID */ diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 052fb5856df4..a29d1119ccb3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -311,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT; + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; cap.args[0] = 1; vm_enable_cap(vm, &cap); #endif @@ -427,7 +427,7 @@ int main(int argc, char *argv[]) int opt, i; #ifdef USE_CLEAR_DIRTY_LOG - if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) { + if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) { fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n"); exit(KSFT_SKIP); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7883e0ad07fe..f4e02cd8fa43 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3110,7 +3110,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_CHECK_EXTENSION_VM: case KVM_CAP_ENABLE_CAP_VM: #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: #endif return 1; #ifdef CONFIG_KVM_MMIO @@ -3148,7 +3148,7 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, { switch (cap->cap) { #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: if (cap->flags || (cap->args[0] & ~1)) return -EINVAL; kvm->manual_dirty_log_protect = cap->args[0]; -- cgit v1.2.3-55-g7522 From 4894fbcce856635c9ab79f44e50826e86bb92110 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 9 May 2019 14:33:44 +0200 Subject: KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device There is no need to test for the device pointer validity when releasing a KVM device. The file descriptor should identify it safely. Fixes: 2bde9b3ec8bd ("KVM: Introduce a 'release' method for KVM devices") Signed-off-by: Cédric Le Goater Reviewed-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras --- virt/kvm/kvm_main.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ea2619d5ca98..37149433c07a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2938,12 +2938,6 @@ static int kvm_device_release(struct inode *inode, struct file *filp) struct kvm_device *dev = filp->private_data; struct kvm *kvm = dev->kvm; - if (!dev) - return -ENODEV; - - if (dev->kvm != kvm) - return -EPERM; - if (dev->ops->release) { mutex_lock(&kvm->lock); list_del(&dev->vm_node); -- cgit v1.2.3-55-g7522 From c011d23ba046826ccf8c4a4a6c1d01c9ccaa1403 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 17 May 2019 14:08:53 +0200 Subject: kvm: fix compilation on aarch64 Commit e45adf665a53 ("KVM: Introduce a new guest mapping API", 2019-01-31) introduced a build failure on aarch64 defconfig: $ make -j$(nproc) ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- O=out defconfig \ Image.gz ... ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c: In function '__kvm_map_gfn': ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c:1763:9: error: implicit declaration of function 'memremap'; did you mean 'memset_p'? ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c:1763:46: error: 'MEMREMAP_WB' undeclared (first use in this function) ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c: In function 'kvm_vcpu_unmap': ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c:1795:3: error: implicit declaration of function 'memunmap'; did you mean 'vm_munmap'? because these functions are declared in rather than , and the former was being pulled in already on x86 but not on aarch64. Reported-by: Nathan Chancellor Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d22b1f4bfa56..34afa94f0183 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,9 +51,9 @@ #include #include #include +#include #include -#include #include #include #include -- cgit v1.2.3-55-g7522