From 7aa8d14641651a61a0b8892314a0bcfaceebe158 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 5 Jan 2019 15:49:50 +0000 Subject: arm/arm64: KVM: Introduce kvm_call_hyp_ret() Until now, we haven't differentiated between HYP calls that have a return value and those who don't. As we're about to change this, introduce kvm_call_hyp_ret(), and change all call sites that actually make use of a return value. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 2 +- virt/kvm/arm/vgic/vgic-v3.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9e350fd34504..4d55f98f97f7 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -765,7 +765,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_vcpu_run_vhe(vcpu); kvm_arm_vhe_guest_exit(); } else { - ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); + ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); } vcpu->mode = OUTSIDE_GUEST_MODE; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 9c0dd234ebe8..67f98151c88d 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -589,7 +589,7 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); */ int vgic_v3_probe(const struct gic_kvm_info *info) { - u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); + u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); int ret; /* @@ -679,7 +679,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; if (likely(cpu_if->vgic_sre)) - cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); + cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); kvm_call_hyp(__vgic_v3_save_aprs, vcpu); -- cgit v1.2.3-55-g7522 From 32f139551954512bfdf9d558341af453bb8b12b4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 19 Jan 2019 15:29:54 +0000 Subject: arm/arm64: KVM: Statically configure the host's view of MPIDR We currently eagerly save/restore MPIDR. It turns out to be slightly pointless: - On the host, this value is known as soon as we're scheduled on a physical CPU - In the guest, this value cannot change, as it is set by KVM (and this is a read-only register) The result of the above is that we can perfectly avoid the eager saving of MPIDR_EL1, and only keep the restore. We just have to setup the host contexts appropriately at boot time. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 8 ++++++++ arch/arm/kvm/hyp/cp15-sr.c | 1 - arch/arm64/include/asm/kvm_host.h | 8 ++++++++ arch/arm64/kvm/hyp/sysreg-sr.c | 1 - virt/kvm/arm/arm.c | 1 + 5 files changed, 17 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 4b6193f2f0f6..43e343e00fb8 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -147,6 +148,13 @@ struct kvm_cpu_context { typedef struct kvm_cpu_context kvm_cpu_context_t; +static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, + int cpu) +{ + /* The host's MPIDR is immutable, so let's set it up at boot time */ + cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu); +} + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c index c4782812714c..8bf895ec6e04 100644 --- a/arch/arm/kvm/hyp/cp15-sr.c +++ b/arch/arm/kvm/hyp/cp15-sr.c @@ -27,7 +27,6 @@ static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx) void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) { - ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR); ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8b7702bdb219..f497bb31031f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -418,6 +419,13 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, + int cpu) +{ + /* The host's MPIDR is immutable, so let's set it up at boot time */ + cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu); +} + void __kvm_enable_ssbs(void); static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 68d6f7c3b237..2498f86defcb 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -52,7 +52,6 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { - ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4d55f98f97f7..3dd240ea9e76 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1561,6 +1561,7 @@ static int init_hyp_mode(void) kvm_cpu_context_t *cpu_ctxt; cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); + kvm_init_host_cpu_context(cpu_ctxt, cpu); err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); if (err) { -- cgit v1.2.3-55-g7522 From e329fb75d519e3dc3eb11b22d5bb846516be3521 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 11 Dec 2018 15:26:31 +0100 Subject: KVM: arm/arm64: Factor out VMID into struct kvm_vmid In preparation for nested virtualization where we are going to have more than a single VMID per VM, let's factor out the VMID data into a separate VMID data structure and change the VMID allocator to operate on this new structure instead of using a struct kvm. This also means that udate_vttbr now becomes update_vmid, and that the vttbr itself is generated on the fly based on the stage 2 page table base address and the vmid. We cache the physical address of the pgd when allocating the pgd to avoid doing the calculation on every entry to the guest and to avoid calling into potentially non-hyp-mapped code from hyp/EL2. If we wanted to merge the VMID allocator with the arm64 ASID allocator at some point in the future, it should actually become easier to do that after this patch. Note that to avoid mapping the kvm_vmid_bits variable into hyp, we simply forego the masking of the vmid value in kvm_get_vttbr and rely on update_vmid to always assign a valid vmid value (within the supported range). Reviewed-by: Marc Zyngier [maz: minor cleanups] Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 13 +++++---- arch/arm/include/asm/kvm_mmu.h | 9 +++++-- arch/arm/kvm/hyp/switch.c | 2 +- arch/arm/kvm/hyp/tlb.c | 4 +-- arch/arm64/include/asm/kvm_host.h | 9 ++++--- arch/arm64/include/asm/kvm_hyp.h | 3 ++- arch/arm64/include/asm/kvm_mmu.h | 10 +++++-- virt/kvm/arm/arm.c | 57 ++++++++++++++------------------------- virt/kvm/arm/mmu.c | 7 +++++ 9 files changed, 61 insertions(+), 53 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 43e343e00fb8..8073267dc4a0 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -57,10 +57,13 @@ int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_reset_coprocs(struct kvm_vcpu *vcpu); -struct kvm_arch { - /* VTTBR value associated with below pgd and vmid */ - u64 vttbr; +struct kvm_vmid { + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; +}; +struct kvm_arch { /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -70,11 +73,11 @@ struct kvm_arch { */ /* The VMID generation used for the virt. memory system */ - u64 vmid_gen; - u32 vmid; + struct kvm_vmid vmid; /* Stage-2 page table */ pgd_t *pgd; + phys_addr_t pgd_phys; /* Interrupt controller */ struct vgic_dist vgic; diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3a875fc1b63c..2de96a180166 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -421,9 +421,14 @@ static inline int hyp_map_aux_data(void) static inline void kvm_set_ipa_limit(void) {} -static inline bool kvm_cpu_has_cnp(void) +static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) { - return false; + struct kvm_vmid *vmid = &kvm->arch.vmid; + u64 vmid_field, baddr; + + baddr = kvm->arch.pgd_phys; + vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; + return kvm_phys_to_vttbr(baddr) | vmid_field; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index acf1c37fa49c..3b058a5d7c5f 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -77,7 +77,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); - write_sysreg(kvm->arch.vttbr, VTTBR); + write_sysreg(kvm_get_vttbr(kvm), VTTBR); write_sysreg(vcpu->arch.midr, VPIDR); } diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index c0edd450e104..8e4afba73635 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c @@ -41,7 +41,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, VTTBR); + write_sysreg(kvm_get_vttbr(kvm), VTTBR); isb(); write_sysreg(0, TLBIALLIS); @@ -61,7 +61,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, VTTBR); + write_sysreg(kvm_get_vttbr(kvm), VTTBR); isb(); write_sysreg(0, TLBIALL); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f497bb31031f..444dd1cb1958 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -57,16 +57,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); -struct kvm_arch { +struct kvm_vmid { /* The VMID generation used for the virt. memory system */ u64 vmid_gen; u32 vmid; +}; + +struct kvm_arch { + struct kvm_vmid vmid; /* stage2 entry level table */ pgd_t *pgd; + phys_addr_t pgd_phys; - /* VTTBR value associated with above pgd and vmid */ - u64 vttbr; /* VTCR_EL2 value for this VM */ u64 vtcr; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index a80a7ef57325..4da765f2cca5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #define __hyp_text __section(.hyp.text) notrace @@ -163,7 +164,7 @@ void __noreturn __hyp_do_panic(unsigned long, ...); static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) { write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm->arch.vttbr, vttbr_el2); + write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); /* * ARM erratum 1165522 requires the actual execution of the above diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8af4b1befa42..c423c8c4fc39 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -591,9 +591,15 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); } -static inline bool kvm_cpu_has_cnp(void) +static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) { - return system_supports_cnp(); + struct kvm_vmid *vmid = &kvm->arch.vmid; + u64 vmid_field, baddr; + u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; + + baddr = kvm->arch.pgd_phys; + vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; + return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } #endif /* __ASSEMBLY__ */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 3dd240ea9e76..b77db673bb03 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -65,7 +65,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; -static unsigned int kvm_vmid_bits __read_mostly; static DEFINE_SPINLOCK(kvm_vmid_lock); static bool vgic_present; @@ -142,7 +141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); /* Mark the initial VMID generation invalid */ - kvm->arch.vmid_gen = 0; + kvm->arch.vmid.vmid_gen = 0; /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = vgic_present ? @@ -472,37 +471,31 @@ void force_vm_exit(const cpumask_t *mask) /** * need_new_vmid_gen - check that the VMID is still valid - * @kvm: The VM's VMID to check + * @vmid: The VMID to check * * return true if there is a new generation of VMIDs being used * - * The hardware supports only 256 values with the value zero reserved for the - * host, so we check if an assigned value belongs to a previous generation, - * which which requires us to assign a new value. If we're the first to use a - * VMID for the new generation, we must flush necessary caches and TLBs on all - * CPUs. + * The hardware supports a limited set of values with the value zero reserved + * for the host, so we check if an assigned value belongs to a previous + * generation, which which requires us to assign a new value. If we're the + * first to use a VMID for the new generation, we must flush necessary caches + * and TLBs on all CPUs. */ -static bool need_new_vmid_gen(struct kvm *kvm) +static bool need_new_vmid_gen(struct kvm_vmid *vmid) { u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ - return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen); + return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); } /** - * update_vttbr - Update the VTTBR with a valid VMID before the guest runs - * @kvm The guest that we are about to run - * - * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the - * VM has a valid VMID, otherwise assigns a new one and flushes corresponding - * caches and TLBs. + * update_vmid - Update the vmid with a valid VMID for the current generation + * @kvm: The guest that struct vmid belongs to + * @vmid: The stage-2 VMID information struct */ -static void update_vttbr(struct kvm *kvm) +static void update_vmid(struct kvm_vmid *vmid) { - phys_addr_t pgd_phys; - u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; - - if (!need_new_vmid_gen(kvm)) + if (!need_new_vmid_gen(vmid)) return; spin_lock(&kvm_vmid_lock); @@ -512,7 +505,7 @@ static void update_vttbr(struct kvm *kvm) * already allocated a valid vmid for this vm, then this vcpu should * use the same vmid. */ - if (!need_new_vmid_gen(kvm)) { + if (!need_new_vmid_gen(vmid)) { spin_unlock(&kvm_vmid_lock); return; } @@ -536,18 +529,12 @@ static void update_vttbr(struct kvm *kvm) kvm_call_hyp(__kvm_flush_vm_context); } - kvm->arch.vmid = kvm_next_vmid; + vmid->vmid = kvm_next_vmid; kvm_next_vmid++; - kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; - - /* update vttbr to be used with the new vmid */ - pgd_phys = virt_to_phys(kvm->arch.pgd); - BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); - vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; + kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; smp_wmb(); - WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen)); + WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); spin_unlock(&kvm_vmid_lock); } @@ -690,7 +677,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ cond_resched(); - update_vttbr(vcpu->kvm); + update_vmid(&vcpu->kvm->arch.vmid); check_vcpu_requests(vcpu); @@ -739,7 +726,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); - if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || + if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ @@ -1417,10 +1404,6 @@ static inline void hyp_cpu_pm_exit(void) static int init_common_resources(void) { - /* set size of VMID supported by CPU */ - kvm_vmid_bits = kvm_get_vmid_bits(); - kvm_info("%d-bit VMID\n", kvm_vmid_bits); - kvm_set_ipa_limit(); return 0; diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index fbdf3ac2f001..f8dda452ea24 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -908,6 +908,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { + phys_addr_t pgd_phys; pgd_t *pgd; if (kvm->arch.pgd != NULL) { @@ -920,7 +921,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!pgd) return -ENOMEM; + pgd_phys = virt_to_phys(pgd); + if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) + return -EINVAL; + kvm->arch.pgd = pgd; + kvm->arch.pgd_phys = pgd_phys; return 0; } @@ -1008,6 +1014,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; + kvm->arch.pgd_phys = 0; } spin_unlock(&kvm->mmu_lock); -- cgit v1.2.3-55-g7522 From accb99bcd0ca6d3ee412557b0c3f583a3abc0eb6 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 26 Nov 2018 18:21:22 +0100 Subject: KVM: arm/arm64: Simplify bg_timer programming Instead of calling into kvm_timer_[un]schedule from the main kvm blocking path, test if the VCPU is on the wait queue from the load/put path and perform the background timer setup/cancel in this path. This has the distinct advantage that we no longer race between load/put and schedule/unschedule and programming and canceling of the bg_timer always happens when the timer state is not loaded. Note that we must now remove the checks in kvm_timer_blocking that do not schedule a background timer if one of the timers can fire, because we no longer have a guarantee that kvm_vcpu_check_block() will be called before kvm_timer_blocking. Reported-by: Andre Przywara Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 3 --- virt/kvm/arm/arch_timer.c | 35 ++++++++++++++--------------------- virt/kvm/arm/arm.c | 2 -- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 33771352dcd6..d6e6a45d1d24 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -76,9 +76,6 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); bool kvm_timer_is_pending(struct kvm_vcpu *vcpu); -void kvm_timer_schedule(struct kvm_vcpu *vcpu); -void kvm_timer_unschedule(struct kvm_vcpu *vcpu); - u64 kvm_phys_timer_read(void); void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b07ac4614e1c..4986028d9829 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -349,22 +349,12 @@ out: * thread is removed from its waitqueue and made runnable when there's a timer * interrupt to handle. */ -void kvm_timer_schedule(struct kvm_vcpu *vcpu) +static void kvm_timer_blocking(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - vtimer_save_state(vcpu); - - /* - * No need to schedule a background timer if any guest timer has - * already expired, because kvm_vcpu_block will return before putting - * the thread to sleep. - */ - if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer)) - return; - /* * If both timers are not capable of raising interrupts (disabled or * masked), then there's no more work for us to do. @@ -373,12 +363,19 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) return; /* - * The guest timers have not yet expired, schedule a background timer. + * At least one guest time will expire. Schedule a background timer. * Set the earliest expiration time among the guest timers. */ soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); } +static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + soft_timer_cancel(&timer->bg_timer); +} + static void vtimer_restore_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -401,15 +398,6 @@ out: local_irq_restore(flags); } -void kvm_timer_unschedule(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - vtimer_restore_state(vcpu); - - soft_timer_cancel(&timer->bg_timer); -} - static void set_cntvoff(u64 cntvoff) { u32 low = lower_32_bits(cntvoff); @@ -485,6 +473,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) /* Set the background timer for the physical timer emulation. */ phys_timer_emulate(vcpu); + kvm_timer_unblocking(vcpu); + /* If the timer fired while we weren't running, inject it now */ if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); @@ -527,6 +517,9 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) */ soft_timer_cancel(&timer->phys_timer); + if (swait_active(kvm_arch_vcpu_wq(vcpu))) + kvm_timer_blocking(vcpu); + /* * The kernel may decide to run userspace after calling vcpu_put, so * we reset cntvoff to 0 to ensure a consistent read between user diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index b77db673bb03..9fbdb9e1c51f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -335,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { - kvm_timer_schedule(vcpu); kvm_vgic_v4_enable_doorbell(vcpu); } void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { - kvm_timer_unschedule(vcpu); kvm_vgic_v4_disable_doorbell(vcpu); } -- cgit v1.2.3-55-g7522 From b98c079ba480c606b13f6abf844187af09baeaab Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Jan 2019 11:33:42 +0100 Subject: KVM: arm64: Fix ICH_ELRSR_EL2 sysreg naming We previously incorrectly named the define for this system register. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/arch_gicv3.h | 4 ++-- arch/arm64/include/asm/sysreg.h | 2 +- virt/kvm/arm/hyp/vgic-v3-sr.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 0bd530702118..bdc87700def2 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -54,7 +54,7 @@ #define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1) #define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2) #define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3) -#define ICH_ELSR __ACCESS_CP15(c12, 4, c11, 5) +#define ICH_ELRSR __ACCESS_CP15(c12, 4, c11, 5) #define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7) #define __LR0(x) __ACCESS_CP15(c12, 4, c12, x) @@ -151,7 +151,7 @@ CPUIF_MAP(ICH_HCR, ICH_HCR_EL2) CPUIF_MAP(ICH_VTR, ICH_VTR_EL2) CPUIF_MAP(ICH_MISR, ICH_MISR_EL2) CPUIF_MAP(ICH_EISR, ICH_EISR_EL2) -CPUIF_MAP(ICH_ELSR, ICH_ELSR_EL2) +CPUIF_MAP(ICH_ELRSR, ICH_ELRSR_EL2) CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2) CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2) CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 72dc4c011014..3e5650903d6d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -426,7 +426,7 @@ #define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) #define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) -#define SYS_ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 9652c453480f..264d92da3240 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) int i; u32 elrsr; - elrsr = read_gicreg(ICH_ELSR_EL2); + elrsr = read_gicreg(ICH_ELRSR_EL2); write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); -- cgit v1.2.3-55-g7522 From 84135d3d18da2ff17d3ad1a609b2818cc3049552 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 5 Jul 2018 16:48:23 +0100 Subject: KVM: arm/arm64: consolidate arch timer trap handlers At the moment we have separate system register emulation handlers for each timer register. Actually they are quite similar, and we rely on kvm_arm_timer_[gs]et_reg() for the actual emulation anyways, so let's just merge all of those handlers into one function, which just marshalls the arguments and then hands off to a set of common accessors. This makes extending the emulation to include EL2 timers much easier. Signed-off-by: Andre Przywara [Fixed 32-bit VM breakage and reduced to reworking existing code] Signed-off-by: Christoffer Dall [Fixed 32bit host, general cleanup] Signed-off-by: Marc Zyngier --- arch/arm/kvm/coproc.c | 23 ++++--- arch/arm64/include/asm/sysreg.h | 4 ++ arch/arm64/kvm/sys_regs.c | 73 ++++++++++------------ include/kvm/arm_arch_timer.h | 23 +++++++ virt/kvm/arm/arch_timer.c | 130 ++++++++++++++++++++++++++++++++++------ 5 files changed, 187 insertions(+), 66 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 222c1635bc7a..51863364f8d1 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -293,15 +293,16 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - u64 now = kvm_phys_timer_read(); - u64 val; + u32 val; if (p->is_write) { val = *vcpu_reg(vcpu, p->Rt1); - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now); + kvm_arm_timer_write_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_TVAL, val); } else { - val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); - *vcpu_reg(vcpu, p->Rt1) = val - now; + val = kvm_arm_timer_read_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_TVAL); + *vcpu_reg(vcpu, p->Rt1) = val; } return true; @@ -315,9 +316,11 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu, if (p->is_write) { val = *vcpu_reg(vcpu, p->Rt1); - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val); + kvm_arm_timer_write_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CTL, val); } else { - val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); + val = kvm_arm_timer_read_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CTL); *vcpu_reg(vcpu, p->Rt1) = val; } @@ -333,9 +336,11 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, if (p->is_write) { val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; val |= *vcpu_reg(vcpu, p->Rt1); - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val); + kvm_arm_timer_write_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CVAL, val); } else { - val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); + val = kvm_arm_timer_read_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CVAL); *vcpu_reg(vcpu, p->Rt1) = val; *vcpu_reg(vcpu, p->Rt2) = val >> 32; } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 3e5650903d6d..6482e8bcf1b8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -392,6 +392,10 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) +#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) + #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1a5bea4285e4..0d348500b24b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -990,44 +990,38 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } -static bool access_cntp_tval(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool access_arch_timer(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) { - u64 now = kvm_phys_timer_read(); - u64 cval; + enum kvm_arch_timers tmr; + enum kvm_arch_timer_regs treg; + u64 reg = reg_to_encoding(r); - if (p->is_write) { - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, - p->regval + now); - } else { - cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); - p->regval = cval - now; + switch (reg) { + case SYS_CNTP_TVAL_EL0: + case SYS_AARCH32_CNTP_TVAL: + tmr = TIMER_PTIMER; + treg = TIMER_REG_TVAL; + break; + case SYS_CNTP_CTL_EL0: + case SYS_AARCH32_CNTP_CTL: + tmr = TIMER_PTIMER; + treg = TIMER_REG_CTL; + break; + case SYS_CNTP_CVAL_EL0: + case SYS_AARCH32_CNTP_CVAL: + tmr = TIMER_PTIMER; + treg = TIMER_REG_CVAL; + break; + default: + BUG(); } - return true; -} - -static bool access_cntp_ctl(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval); - else - p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); - - return true; -} - -static bool access_cntp_cval(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ if (p->is_write) - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval); + kvm_arm_timer_write_sysreg(vcpu, tmr, treg, p->regval); else - p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); + p->regval = kvm_arm_timer_read_sysreg(vcpu, tmr, treg); return true; } @@ -1392,9 +1386,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, - { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl }, - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval }, + { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -1715,10 +1709,9 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, - /* CNTP_TVAL */ - { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval }, - /* CNTP_CTL */ - { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl }, + /* Arch Tmers */ + { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, + { SYS_DESC(SYS_AARCH32_CNTP_CTL), access_arch_timer }, /* PMEVCNTRn */ PMU_PMEVCNTR(0), @@ -1795,7 +1788,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ - { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, + { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, }; /* Target specific emulation tables */ diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d6e6a45d1d24..d26b7fde9935 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -22,6 +22,19 @@ #include #include +enum kvm_arch_timers { + TIMER_PTIMER, + TIMER_VTIMER, + NR_KVM_TIMERS +}; + +enum kvm_arch_timer_regs { + TIMER_REG_CNT, + TIMER_REG_CVAL, + TIMER_REG_TVAL, + TIMER_REG_CTL, +}; + struct arch_timer_context { /* Registers: control register, timer value */ u32 cnt_ctl; @@ -87,5 +100,15 @@ bool kvm_arch_timer_get_input_level(int vintid); #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) +#define vcpu_get_timer(v,t) \ + (t == TIMER_VTIMER ? vcpu_vtimer(v) : vcpu_ptimer(v)) + +u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg); +void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg, + u64 val); #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4986028d9829..f7d377448438 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -52,6 +53,13 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx); static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); +static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg, + u64 val); +static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg); u64 kvm_phys_timer_read(void) { @@ -628,24 +636,25 @@ static void kvm_timer_init_interrupt(void *info) int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - switch (regid) { case KVM_REG_ARM_TIMER_CTL: - vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; + kvm_arm_timer_write(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CTL, value); break; case KVM_REG_ARM_TIMER_CNT: update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); break; case KVM_REG_ARM_TIMER_CVAL: - vtimer->cnt_cval = value; + kvm_arm_timer_write(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CVAL, value); break; case KVM_REG_ARM_PTIMER_CTL: - ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; + kvm_arm_timer_write(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CTL, value); break; case KVM_REG_ARM_PTIMER_CVAL: - ptimer->cnt_cval = value; + kvm_arm_timer_write(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CVAL, value); break; default: @@ -672,26 +681,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) { - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - switch (regid) { case KVM_REG_ARM_TIMER_CTL: - return read_timer_ctl(vtimer); + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CTL); case KVM_REG_ARM_TIMER_CNT: - return kvm_phys_timer_read() - vtimer->cntvoff; + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CNT); case KVM_REG_ARM_TIMER_CVAL: - return vtimer->cnt_cval; + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CVAL); case KVM_REG_ARM_PTIMER_CTL: - return read_timer_ctl(ptimer); - case KVM_REG_ARM_PTIMER_CVAL: - return ptimer->cnt_cval; + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CTL); case KVM_REG_ARM_PTIMER_CNT: - return kvm_phys_timer_read(); + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CNT); + case KVM_REG_ARM_PTIMER_CVAL: + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CVAL); } return (u64)-1; } +static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg) +{ + u64 val; + + switch (treg) { + case TIMER_REG_TVAL: + val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval; + break; + + case TIMER_REG_CTL: + val = read_timer_ctl(timer); + break; + + case TIMER_REG_CVAL: + val = timer->cnt_cval; + break; + + case TIMER_REG_CNT: + val = kvm_phys_timer_read() - timer->cntvoff; + break; + + default: + BUG(); + } + + return val; +} + +u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg) +{ + u64 val; + + preempt_disable(); + kvm_timer_vcpu_put(vcpu); + + val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg); + + kvm_timer_vcpu_load(vcpu); + preempt_enable(); + + return val; +} + +static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg, + u64 val) +{ + switch (treg) { + case TIMER_REG_TVAL: + timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff; + break; + + case TIMER_REG_CTL: + timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; + break; + + case TIMER_REG_CVAL: + timer->cnt_cval = val; + break; + + default: + BUG(); + } +} + +void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg, + u64 val) +{ + preempt_disable(); + kvm_timer_vcpu_put(vcpu); + + kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val); + + kvm_timer_vcpu_load(vcpu); + preempt_enable(); +} + static int kvm_timer_starting_cpu(unsigned int cpu) { kvm_timer_init_interrupt(NULL); -- cgit v1.2.3-55-g7522 From e604dd5d45c75c2112424dec74853efb708f4fa6 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 18 Sep 2018 10:08:18 -0700 Subject: KVM: arm/arm64: timer: Rework data structures for multiple timers Prepare for having 4 timer data structures (2 for now). Move loaded to the cpu data structure and not the individual timer structure, in preparation for assigning the EL1 phys timer as well. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 41 ++++++++++++++++--------------- virt/kvm/arm/arch_timer.c | 58 +++++++++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 48 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d26b7fde9935..ab835112204d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -36,6 +36,8 @@ enum kvm_arch_timer_regs { }; struct arch_timer_context { + struct kvm_vcpu *vcpu; + /* Registers: control register, timer value */ u32 cnt_ctl; u64 cnt_cval; @@ -43,32 +45,31 @@ struct arch_timer_context { /* Timer IRQ */ struct kvm_irq_level irq; - /* - * We have multiple paths which can save/restore the timer state - * onto the hardware, so we need some way of keeping track of - * where the latest state is. - * - * loaded == true: State is loaded on the hardware registers. - * loaded == false: State is stored in memory. - */ - bool loaded; - /* Virtual offset */ - u64 cntvoff; + u64 cntvoff; + + /* Emulated Timer (may be unused) */ + struct hrtimer hrtimer; }; struct arch_timer_cpu { - struct arch_timer_context vtimer; - struct arch_timer_context ptimer; + struct arch_timer_context timers[NR_KVM_TIMERS]; /* Background timer used when the guest is not running */ struct hrtimer bg_timer; - /* Physical timer emulation */ - struct hrtimer phys_timer; - /* Is the timer enabled */ bool enabled; + + /* + * We have multiple paths which can save/restore the timer state + * onto the hardware, so we need some way of keeping track of + * where the latest state is. + * + * loaded == true: State is loaded on the hardware registers. + * loaded == false: State is stored in memory. + */ + bool loaded; }; int kvm_timer_hyp_init(bool); @@ -98,10 +99,10 @@ void kvm_timer_init_vhe(void); bool kvm_arch_timer_get_input_level(int vintid); -#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) -#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) -#define vcpu_get_timer(v,t) \ - (t == TIMER_VTIMER ? vcpu_vtimer(v) : vcpu_ptimer(v)) +#define vcpu_timer(v) (&(v)->arch.timer_cpu) +#define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)]) +#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER]) +#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index f7d377448438..471f9fd004c9 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -184,13 +184,11 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) { struct arch_timer_context *ptimer; - struct arch_timer_cpu *timer; struct kvm_vcpu *vcpu; u64 ns; - timer = container_of(hrt, struct arch_timer_cpu, phys_timer); - vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); - ptimer = vcpu_ptimer(vcpu); + ptimer = container_of(hrt, struct arch_timer_context, hrtimer); + vcpu = ptimer->vcpu; /* * Check that the timer has really expired from the guest's @@ -209,9 +207,10 @@ static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { + struct arch_timer_cpu *timer = vcpu_timer(timer_ctx->vcpu); u64 cval, now; - if (timer_ctx->loaded) { + if (timer->loaded) { u32 cnt_ctl; /* Only the virtual timer can be loaded so far */ @@ -280,7 +279,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, /* Schedule the background timer for the emulated timer. */ static void phys_timer_emulate(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* @@ -289,11 +287,11 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu) * then we also don't need a soft timer. */ if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { - soft_timer_cancel(&timer->phys_timer); + soft_timer_cancel(&ptimer->hrtimer); return; } - soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer)); + soft_timer_start(&ptimer->hrtimer, kvm_timer_compute_delta(ptimer)); } /* @@ -303,7 +301,7 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu) */ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); bool level; @@ -329,13 +327,13 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) static void vtimer_save_state(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); unsigned long flags; local_irq_save(flags); - if (!vtimer->loaded) + if (!timer->loaded) goto out; if (timer->enabled) { @@ -347,7 +345,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu) write_sysreg_el0(0, cntv_ctl); isb(); - vtimer->loaded = false; + timer->loaded = false; out: local_irq_restore(flags); } @@ -359,7 +357,7 @@ out: */ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); @@ -379,20 +377,20 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); soft_timer_cancel(&timer->bg_timer); } static void vtimer_restore_state(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); unsigned long flags; local_irq_save(flags); - if (vtimer->loaded) + if (timer->loaded) goto out; if (timer->enabled) { @@ -401,7 +399,7 @@ static void vtimer_restore_state(struct kvm_vcpu *vcpu) write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); } - vtimer->loaded = true; + timer->loaded = true; out: local_irq_restore(flags); } @@ -462,7 +460,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); @@ -507,7 +505,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); if (unlikely(!timer->enabled)) return; @@ -523,7 +522,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * In any case, we re-schedule the hrtimer for the physical timer when * coming back to the VCPU thread in kvm_timer_vcpu_load(). */ - soft_timer_cancel(&timer->phys_timer); + soft_timer_cancel(&ptimer->hrtimer); if (swait_active(kvm_arch_vcpu_wq(vcpu))) kvm_timer_blocking(vcpu); @@ -559,7 +558,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); if (unlikely(!timer->enabled)) return; @@ -570,7 +569,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); @@ -611,22 +610,25 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); - vcpu_ptimer(vcpu)->cntvoff = 0; + ptimer->cntvoff = 0; hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->bg_timer.function = kvm_bg_timer_expire; - hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->phys_timer.function = kvm_phys_timer_expire; + hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + ptimer->hrtimer.function = kvm_phys_timer_expire; vtimer->irq.irq = default_vtimer_irq.irq; ptimer->irq.irq = default_ptimer_irq.irq; + + vtimer->vcpu = vcpu; + ptimer->vcpu = vcpu; } static void kvm_timer_init_interrupt(void *info) @@ -860,7 +862,7 @@ out_free_irq: void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); soft_timer_cancel(&timer->bg_timer); } @@ -904,7 +906,7 @@ bool kvm_arch_timer_get_input_level(int vintid) int kvm_timer_enable(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); int ret; -- cgit v1.2.3-55-g7522 From 9e01dc76be6a3b5768cb02130d2ff0055a68809a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Feb 2019 14:04:30 +0100 Subject: KVM: arm/arm64: arch_timer: Assign the phys timer on VHE systems VHE systems don't have to emulate the physical timer, we can simply assign the EL1 physical timer directly to the VM as the host always uses the EL2 timers. In order to minimize the amount of cruft, AArch32 gets definitions for the physical timer too, but is should be generally unused on this architecture. Co-written with Marc Zyngier Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_hyp.h | 4 + include/kvm/arm_arch_timer.h | 6 ++ virt/kvm/arm/arch_timer.c | 219 ++++++++++++++++++++++++++++++++--------- 3 files changed, 180 insertions(+), 49 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index e93a0cac9add..87bcd18df8d5 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -40,6 +40,7 @@ #define TTBR1 __ACCESS_CP15_64(1, c2) #define VTTBR __ACCESS_CP15_64(6, c2) #define PAR __ACCESS_CP15_64(0, c7) +#define CNTP_CVAL __ACCESS_CP15_64(2, c14) #define CNTV_CVAL __ACCESS_CP15_64(3, c14) #define CNTVOFF __ACCESS_CP15_64(4, c14) @@ -85,6 +86,7 @@ #define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) #define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) #define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) +#define CNTP_CTL __ACCESS_CP15(c14, 0, c2, 1) #define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) #define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) @@ -94,6 +96,8 @@ #define read_sysreg_el0(r) read_sysreg(r##_el0) #define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) +#define cntp_ctl_el0 CNTP_CTL +#define cntp_cval_el0 CNTP_CVAL #define cntv_ctl_el0 CNTV_CTL #define cntv_cval_el0 CNTV_CVAL #define cntvoff_el2 CNTVOFF diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ab835112204d..6d4a33a9c45a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -50,6 +50,10 @@ struct arch_timer_context { /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; + + /* Duplicated state from arch_timer.c for convenience */ + u32 host_timer_irq; + u32 host_timer_irq_flags; }; struct arch_timer_cpu { @@ -104,6 +108,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER]) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER]) +#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) + u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, enum kvm_arch_timer_regs treg); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 471f9fd004c9..10c15151c87e 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -35,7 +35,9 @@ static struct timecounter *timecounter; static unsigned int host_vtimer_irq; +static unsigned int host_ptimer_irq; static u32 host_vtimer_irq_flags; +static u32 host_ptimer_irq_flags; static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); @@ -86,20 +88,24 @@ static void soft_timer_cancel(struct hrtimer *hrt) static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; - struct arch_timer_context *vtimer; + struct arch_timer_context *ctx; /* * We may see a timer interrupt after vcpu_put() has been called which * sets the CPU's vcpu pointer to NULL, because even though the timer - * has been disabled in vtimer_save_state(), the hardware interrupt + * has been disabled in timer_save_state(), the hardware interrupt * signal may not have been retired from the interrupt controller yet. */ if (!vcpu) return IRQ_HANDLED; - vtimer = vcpu_vtimer(vcpu); - if (kvm_timer_should_fire(vtimer)) - kvm_timer_update_irq(vcpu, true, vtimer); + if (irq == host_vtimer_irq) + ctx = vcpu_vtimer(vcpu); + else + ctx = vcpu_ptimer(vcpu); + + if (kvm_timer_should_fire(ctx)) + kvm_timer_update_irq(vcpu, true, ctx); if (userspace_irqchip(vcpu->kvm) && !static_branch_unlikely(&has_gic_active_state)) @@ -208,13 +214,25 @@ static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { struct arch_timer_cpu *timer = vcpu_timer(timer_ctx->vcpu); + enum kvm_arch_timers index = arch_timer_ctx_index(timer_ctx); u64 cval, now; if (timer->loaded) { - u32 cnt_ctl; + u32 cnt_ctl = 0; + + switch (index) { + case TIMER_VTIMER: + cnt_ctl = read_sysreg_el0(cntv_ctl); + break; + case TIMER_PTIMER: + cnt_ctl = read_sysreg_el0(cntp_ctl); + break; + case NR_KVM_TIMERS: + /* GCC is braindead */ + cnt_ctl = 0; + break; + } - /* Only the virtual timer can be loaded so far */ - cnt_ctl = read_sysreg_el0(cntv_ctl); return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); @@ -310,7 +328,7 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) return; /* - * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part + * If the timer virtual interrupt is a 'mapped' interrupt, part * of its lifecycle is offloaded to the hardware, and we therefore may * not have lowered the irq.level value before having to signal a new * interrupt, but have to signal an interrupt every time the level is @@ -319,31 +337,55 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) level = kvm_timer_should_fire(vtimer); kvm_timer_update_irq(vcpu, level, vtimer); + if (has_vhe()) { + level = kvm_timer_should_fire(ptimer); + kvm_timer_update_irq(vcpu, level, ptimer); + + return; + } + phys_timer_emulate(vcpu); if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); } -static void vtimer_save_state(struct kvm_vcpu *vcpu) +static void timer_save_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; + if (!timer->enabled) + return; + local_irq_save(flags); if (!timer->loaded) goto out; - if (timer->enabled) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - vtimer->cnt_cval = read_sysreg_el0(cntv_cval); - } + switch (index) { + case TIMER_VTIMER: + ctx->cnt_ctl = read_sysreg_el0(cntv_ctl); + ctx->cnt_cval = read_sysreg_el0(cntv_cval); - /* Disable the virtual timer */ - write_sysreg_el0(0, cntv_ctl); - isb(); + /* Disable the timer */ + write_sysreg_el0(0, cntv_ctl); + isb(); + + break; + case TIMER_PTIMER: + ctx->cnt_ctl = read_sysreg_el0(cntp_ctl); + ctx->cnt_cval = read_sysreg_el0(cntp_cval); + + /* Disable the timer */ + write_sysreg_el0(0, cntp_ctl); + isb(); + + break; + case NR_KVM_TIMERS: + break; /* GCC is braindead */ + } timer->loaded = false; out: @@ -382,21 +424,33 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) soft_timer_cancel(&timer->bg_timer); } -static void vtimer_restore_state(struct kvm_vcpu *vcpu) +static void timer_restore_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; + if (!timer->enabled) + return; + local_irq_save(flags); if (timer->loaded) goto out; - if (timer->enabled) { - write_sysreg_el0(vtimer->cnt_cval, cntv_cval); + switch (index) { + case TIMER_VTIMER: + write_sysreg_el0(ctx->cnt_cval, cntv_cval); + isb(); + write_sysreg_el0(ctx->cnt_ctl, cntv_ctl); + break; + case TIMER_PTIMER: + write_sysreg_el0(ctx->cnt_cval, cntp_cval); isb(); - write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); + write_sysreg_el0(ctx->cnt_ctl, cntp_ctl); + break; + case NR_KVM_TIMERS: + break; /* GCC is braindead */ } timer->loaded = true; @@ -419,23 +473,23 @@ static void set_cntvoff(u64 cntvoff) kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); } -static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) +static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) { int r; - r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); + r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); WARN_ON(r); } -static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) +static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct kvm_vcpu *vcpu = ctx->vcpu; bool phys_active; if (irqchip_in_kernel(vcpu->kvm)) - phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); + phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); else - phys_active = vtimer->irq.level; - set_vtimer_irq_phys_active(vcpu, phys_active); + phys_active = ctx->irq.level; + set_timer_irq_phys_active(ctx, phys_active); } static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) @@ -467,14 +521,22 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) if (unlikely(!timer->enabled)) return; - if (static_branch_likely(&has_gic_active_state)) - kvm_timer_vcpu_load_gic(vcpu); - else + if (static_branch_likely(&has_gic_active_state)) { + kvm_timer_vcpu_load_gic(vtimer); + if (has_vhe()) + kvm_timer_vcpu_load_gic(ptimer); + } else { kvm_timer_vcpu_load_nogic(vcpu); + } set_cntvoff(vtimer->cntvoff); - vtimer_restore_state(vcpu); + timer_restore_state(vtimer); + + if (has_vhe()) { + timer_restore_state(ptimer); + return; + } /* Set the background timer for the physical timer emulation. */ phys_timer_emulate(vcpu); @@ -506,12 +568,17 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); if (unlikely(!timer->enabled)) return; - vtimer_save_state(vcpu); + timer_save_state(vtimer); + if (has_vhe()) { + timer_save_state(ptimer); + return; + } /* * Cancel the physical timer emulation, because the only case where we @@ -534,8 +601,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * counter of non-VHE case. For VHE, the virtual counter uses a fixed * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. */ - if (!has_vhe()) - set_cntvoff(0); + set_cntvoff(0); } /* @@ -550,7 +616,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) if (!kvm_timer_should_fire(vtimer)) { kvm_timer_update_irq(vcpu, false, vtimer); if (static_branch_likely(&has_gic_active_state)) - set_vtimer_irq_phys_active(vcpu, false); + set_timer_irq_phys_active(vtimer, false); else enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } @@ -625,7 +691,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) ptimer->hrtimer.function = kvm_phys_timer_expire; vtimer->irq.irq = default_vtimer_irq.irq; + vtimer->host_timer_irq = host_vtimer_irq; + vtimer->host_timer_irq_flags = host_vtimer_irq_flags; + ptimer->irq.irq = default_ptimer_irq.irq; + ptimer->host_timer_irq = host_ptimer_irq; + ptimer->host_timer_irq_flags = host_ptimer_irq_flags; vtimer->vcpu = vcpu; ptimer->vcpu = vcpu; @@ -634,6 +705,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) static void kvm_timer_init_interrupt(void *info) { enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); + enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); } int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) @@ -815,6 +887,8 @@ int kvm_timer_hyp_init(bool has_gic) return -ENODEV; } + /* First, do the virtual EL1 timer irq */ + if (info->virtual_irq <= 0) { kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", info->virtual_irq); @@ -825,15 +899,15 @@ int kvm_timer_hyp_init(bool has_gic) host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for IRQ%d, assuming level low\n", + kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", host_vtimer_irq); host_vtimer_irq_flags = IRQF_TRIGGER_LOW; } err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, - "kvm guest timer", kvm_get_running_vcpus()); + "kvm guest vtimer", kvm_get_running_vcpus()); if (err) { - kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", + kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", host_vtimer_irq, err); return err; } @@ -851,6 +925,43 @@ int kvm_timer_hyp_init(bool has_gic) kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); + /* Now let's do the physical EL1 timer irq */ + + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); + if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && + host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", + host_ptimer_irq); + host_ptimer_irq_flags = IRQF_TRIGGER_LOW; + } + + err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, + "kvm guest ptimer", kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", + host_ptimer_irq, err); + return err; + } + + if (has_gic) { + err = irq_set_vcpu_affinity(host_ptimer_irq, + kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } + } + + kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); + } else if (has_vhe()) { + kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", + info->physical_irq); + err = -ENODEV; + goto out_free_irq; + } + cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, "kvm/arm/timer:starting", kvm_timer_starting_cpu, kvm_timer_dying_cpu); @@ -898,8 +1009,10 @@ bool kvm_arch_timer_get_input_level(int vintid) if (vintid == vcpu_vtimer(vcpu)->irq.irq) timer = vcpu_vtimer(vcpu); + else if (vintid == vcpu_ptimer(vcpu)->irq.irq) + timer = vcpu_ptimer(vcpu); else - BUG(); /* We only map the vtimer so far */ + BUG(); return kvm_timer_should_fire(timer); } @@ -908,6 +1021,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); int ret; if (timer->enabled) @@ -930,14 +1044,21 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (ret) return ret; + if (has_vhe()) { + ret = kvm_vgic_map_phys_irq(vcpu, host_ptimer_irq, ptimer->irq.irq, + kvm_arch_timer_get_input_level); + if (ret) + return ret; + } + no_vgic: timer->enabled = 1; return 0; } /* - * On VHE system, we only need to configure trap on physical timer and counter - * accesses in EL0 and EL1 once, not for every world switch. + * On VHE system, we only need to configure the EL2 timer trap register once, + * not for every world switch. * The host kernel runs at EL2 with HCR_EL2.TGE == 1, * and this makes those bits have no effect for the host kernel execution. */ @@ -948,11 +1069,11 @@ void kvm_timer_init_vhe(void) u64 val; /* - * Disallow physical timer access for the guest. - * Physical counter access is allowed. + * VHE systems allow the guest direct access to the EL1 physical + * timer/counter. */ val = read_sysreg(cnthctl_el2); - val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); + val |= (CNTHCTL_EL1PCEN << cnthctl_shift); val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); write_sysreg(val, cnthctl_el2); } -- cgit v1.2.3-55-g7522 From bee038a67487598ebbe995f85bf60c3a5b2e9099 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 4 Jan 2019 13:31:22 +0100 Subject: KVM: arm/arm64: Rework the timer code to use a timer_map We are currently emulating two timers in two different ways. When we add support for nested virtualization in the future, we are going to be emulating either two timers in two diffferent ways, or four timers in a single way. We need a unified data structure to keep track of how we map virtual state to physical state and we need to cleanup some of the timer code to operate more independently on a struct arch_timer_context instead of trying to consider the global state of the VCPU and recomputing all state. Co-written with Marc Zyngier Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_arch_timer.h | 23 ++-- virt/kvm/arm/arch_timer.c | 295 +++++++++++++++++++++++-------------------- virt/kvm/arm/trace.h | 105 +++++++++++++++ 3 files changed, 278 insertions(+), 145 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6d4a33a9c45a..05a18dd265b5 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,11 +51,24 @@ struct arch_timer_context { /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; + /* + * We have multiple paths which can save/restore the timer state onto + * the hardware, so we need some way of keeping track of where the + * latest state is. + */ + bool loaded; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; u32 host_timer_irq_flags; }; +struct timer_map { + struct arch_timer_context *direct_vtimer; + struct arch_timer_context *direct_ptimer; + struct arch_timer_context *emul_ptimer; +}; + struct arch_timer_cpu { struct arch_timer_context timers[NR_KVM_TIMERS]; @@ -64,16 +77,6 @@ struct arch_timer_cpu { /* Is the timer enabled */ bool enabled; - - /* - * We have multiple paths which can save/restore the timer state - * onto the hardware, so we need some way of keeping track of - * where the latest state is. - * - * loaded == true: State is loaded on the hardware registers. - * loaded == false: State is stored in memory. - */ - bool loaded; }; int kvm_timer_hyp_init(bool); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 10c15151c87e..17f9de73cc8a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -68,6 +68,21 @@ u64 kvm_phys_timer_read(void) return timecounter->cc->read(timecounter->cc); } +static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) +{ + if (has_vhe()) { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_ptimer = NULL; + } else { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = NULL; + map->emul_ptimer = vcpu_ptimer(vcpu); + } + + trace_kvm_get_timer_map(vcpu->vcpu_id, map); +} + static inline bool userspace_irqchip(struct kvm *kvm) { return static_branch_unlikely(&userspace_irqchip_in_use) && @@ -89,6 +104,7 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; struct arch_timer_context *ctx; + struct timer_map map; /* * We may see a timer interrupt after vcpu_put() has been called which @@ -99,10 +115,12 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) if (!vcpu) return IRQ_HANDLED; + get_timer_map(vcpu, &map); + if (irq == host_vtimer_irq) - ctx = vcpu_vtimer(vcpu); + ctx = map.direct_vtimer; else - ctx = vcpu_ptimer(vcpu); + ctx = map.direct_ptimer; if (kvm_timer_should_fire(ctx)) kvm_timer_update_irq(vcpu, true, ctx); @@ -136,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) { - return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && + WARN_ON(timer_ctx && timer_ctx->loaded); + return timer_ctx && + !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); } @@ -146,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) */ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) { - u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + u64 min_delta = ULLONG_MAX; + int i; - if (kvm_timer_irq_can_fire(vtimer)) - min_virt = kvm_timer_compute_delta(vtimer); + for (i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; - if (kvm_timer_irq_can_fire(ptimer)) - min_phys = kvm_timer_compute_delta(ptimer); + WARN(ctx->loaded, "timer %d loaded\n", i); + if (kvm_timer_irq_can_fire(ctx)) + min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); + } /* If none of timers can fire, then return 0 */ - if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) + if (min_delta == ULLONG_MAX) return 0; - return min(min_virt, min_phys); + return min_delta; } static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) @@ -187,37 +208,45 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } -static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) +static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) { - struct arch_timer_context *ptimer; + struct arch_timer_context *ctx; struct kvm_vcpu *vcpu; u64 ns; - ptimer = container_of(hrt, struct arch_timer_context, hrtimer); - vcpu = ptimer->vcpu; + ctx = container_of(hrt, struct arch_timer_context, hrtimer); + vcpu = ctx->vcpu; + + trace_kvm_timer_hrtimer_expire(ctx); /* * Check that the timer has really expired from the guest's * PoV (NTP on the host may have forced it to expire * early). If not ready, schedule for a later time. */ - ns = kvm_timer_compute_delta(ptimer); + ns = kvm_timer_compute_delta(ctx); if (unlikely(ns)) { hrtimer_forward_now(hrt, ns_to_ktime(ns)); return HRTIMER_RESTART; } - kvm_timer_update_irq(vcpu, true, ptimer); + kvm_timer_update_irq(vcpu, true, ctx); return HRTIMER_NORESTART; } static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { - struct arch_timer_cpu *timer = vcpu_timer(timer_ctx->vcpu); - enum kvm_arch_timers index = arch_timer_ctx_index(timer_ctx); + struct arch_timer_cpu *timer; + enum kvm_arch_timers index; u64 cval, now; - if (timer->loaded) { + if (!timer_ctx) + return false; + + timer = vcpu_timer(timer_ctx->vcpu); + index = arch_timer_ctx_index(timer_ctx); + + if (timer_ctx->loaded) { u32 cnt_ctl = 0; switch (index) { @@ -249,13 +278,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct timer_map map; - if (kvm_timer_should_fire(vtimer)) - return true; + get_timer_map(vcpu, &map); - return kvm_timer_should_fire(ptimer); + return kvm_timer_should_fire(map.direct_vtimer) || + kvm_timer_should_fire(map.direct_ptimer) || + kvm_timer_should_fire(map.emul_ptimer); } /* @@ -294,60 +323,28 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, } } -/* Schedule the background timer for the emulated timer. */ -static void phys_timer_emulate(struct kvm_vcpu *vcpu) +static void timer_emulate(struct arch_timer_context *ctx) { - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + bool should_fire = kvm_timer_should_fire(ctx); - /* - * If the timer can fire now, we don't need to have a soft timer - * scheduled for the future. If the timer cannot fire at all, - * then we also don't need a soft timer. - */ - if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { - soft_timer_cancel(&ptimer->hrtimer); - return; - } - - soft_timer_start(&ptimer->hrtimer, kvm_timer_compute_delta(ptimer)); -} - -/* - * Check if there was a change in the timer state, so that we should either - * raise or lower the line level to the GIC or schedule a background timer to - * emulate the physical timer. - */ -static void kvm_timer_update_state(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - bool level; + trace_kvm_timer_emulate(ctx, should_fire); - if (unlikely(!timer->enabled)) + if (should_fire) { + kvm_timer_update_irq(ctx->vcpu, true, ctx); return; + } /* - * If the timer virtual interrupt is a 'mapped' interrupt, part - * of its lifecycle is offloaded to the hardware, and we therefore may - * not have lowered the irq.level value before having to signal a new - * interrupt, but have to signal an interrupt every time the level is - * asserted. + * If the timer can fire now, we don't need to have a soft timer + * scheduled for the future. If the timer cannot fire at all, + * then we also don't need a soft timer. */ - level = kvm_timer_should_fire(vtimer); - kvm_timer_update_irq(vcpu, level, vtimer); - - if (has_vhe()) { - level = kvm_timer_should_fire(ptimer); - kvm_timer_update_irq(vcpu, level, ptimer); - + if (!kvm_timer_irq_can_fire(ctx)) { + soft_timer_cancel(&ctx->hrtimer); return; } - phys_timer_emulate(vcpu); - - if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) - kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); + soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); } static void timer_save_state(struct arch_timer_context *ctx) @@ -361,7 +358,7 @@ static void timer_save_state(struct arch_timer_context *ctx) local_irq_save(flags); - if (!timer->loaded) + if (!ctx->loaded) goto out; switch (index) { @@ -384,10 +381,12 @@ static void timer_save_state(struct arch_timer_context *ctx) break; case NR_KVM_TIMERS: - break; /* GCC is braindead */ + BUG(); } - timer->loaded = false; + trace_kvm_timer_save_state(ctx); + + ctx->loaded = false; out: local_irq_restore(flags); } @@ -400,14 +399,17 @@ out: static void kvm_timer_blocking(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct timer_map map; + + get_timer_map(vcpu, &map); /* - * If both timers are not capable of raising interrupts (disabled or + * If no timers are capable of raising interrupts (disabled or * masked), then there's no more work for us to do. */ - if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) + if (!kvm_timer_irq_can_fire(map.direct_vtimer) && + !kvm_timer_irq_can_fire(map.direct_ptimer) && + !kvm_timer_irq_can_fire(map.emul_ptimer)) return; /* @@ -435,7 +437,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) local_irq_save(flags); - if (timer->loaded) + if (ctx->loaded) goto out; switch (index) { @@ -450,10 +452,12 @@ static void timer_restore_state(struct arch_timer_context *ctx) write_sysreg_el0(ctx->cnt_ctl, cntp_ctl); break; case NR_KVM_TIMERS: - break; /* GCC is braindead */ + BUG(); } - timer->loaded = true; + trace_kvm_timer_restore_state(ctx); + + ctx->loaded = true; out: local_irq_restore(flags); } @@ -515,37 +519,31 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct timer_map map; if (unlikely(!timer->enabled)) return; + get_timer_map(vcpu, &map); + if (static_branch_likely(&has_gic_active_state)) { - kvm_timer_vcpu_load_gic(vtimer); - if (has_vhe()) - kvm_timer_vcpu_load_gic(ptimer); + kvm_timer_vcpu_load_gic(map.direct_vtimer); + if (map.direct_ptimer) + kvm_timer_vcpu_load_gic(map.direct_ptimer); } else { kvm_timer_vcpu_load_nogic(vcpu); } - set_cntvoff(vtimer->cntvoff); - - timer_restore_state(vtimer); - - if (has_vhe()) { - timer_restore_state(ptimer); - return; - } - - /* Set the background timer for the physical timer emulation. */ - phys_timer_emulate(vcpu); + set_cntvoff(map.direct_vtimer->cntvoff); kvm_timer_unblocking(vcpu); - /* If the timer fired while we weren't running, inject it now */ - if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) - kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); + timer_restore_state(map.direct_vtimer); + if (map.direct_ptimer) + timer_restore_state(map.direct_ptimer); + + if (map.emul_ptimer) + timer_emulate(map.emul_ptimer); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) @@ -568,20 +566,19 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct timer_map map; if (unlikely(!timer->enabled)) return; - timer_save_state(vtimer); - if (has_vhe()) { - timer_save_state(ptimer); - return; - } + get_timer_map(vcpu, &map); + + timer_save_state(map.direct_vtimer); + if (map.direct_ptimer) + timer_save_state(map.direct_ptimer); /* - * Cancel the physical timer emulation, because the only case where we + * Cancel soft timer emulation, because the only case where we * need it after a vcpu_put is in the context of a sleeping VCPU, and * in that case we already factor in the deadline for the physical * timer when scheduling the bg_timer. @@ -589,7 +586,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * In any case, we re-schedule the hrtimer for the physical timer when * coming back to the VCPU thread in kvm_timer_vcpu_load(). */ - soft_timer_cancel(&ptimer->hrtimer); + if (map.emul_ptimer) + soft_timer_cancel(&map.emul_ptimer->hrtimer); if (swait_active(kvm_arch_vcpu_wq(vcpu))) kvm_timer_blocking(vcpu); @@ -636,8 +634,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct timer_map map; + + get_timer_map(vcpu, &map); /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 @@ -645,12 +644,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - vtimer->cnt_ctl = 0; - ptimer->cnt_ctl = 0; - kvm_timer_update_state(vcpu); + vcpu_vtimer(vcpu)->cnt_ctl = 0; + vcpu_ptimer(vcpu)->cnt_ctl = 0; + + if (timer->enabled) { + kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); + kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + + if (irqchip_in_kernel(vcpu->kvm)) { + kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); + if (map.direct_ptimer) + kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); + } + } - if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) - kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); + if (map.emul_ptimer) + soft_timer_cancel(&map.emul_ptimer->hrtimer); return 0; } @@ -687,15 +696,18 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->bg_timer.function = kvm_bg_timer_expire; + hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - ptimer->hrtimer.function = kvm_phys_timer_expire; + vtimer->hrtimer.function = kvm_hrtimer_expire; + ptimer->hrtimer.function = kvm_hrtimer_expire; vtimer->irq.irq = default_vtimer_irq.irq; - vtimer->host_timer_irq = host_vtimer_irq; - vtimer->host_timer_irq_flags = host_vtimer_irq_flags; - ptimer->irq.irq = default_ptimer_irq.irq; + + vtimer->host_timer_irq = host_vtimer_irq; ptimer->host_timer_irq = host_ptimer_irq; + + vtimer->host_timer_irq_flags = host_vtimer_irq_flags; ptimer->host_timer_irq_flags = host_ptimer_irq_flags; vtimer->vcpu = vcpu; @@ -710,32 +722,39 @@ static void kvm_timer_init_interrupt(void *info) int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) { + struct arch_timer_context *timer; + bool level; + switch (regid) { case KVM_REG_ARM_TIMER_CTL: - kvm_arm_timer_write(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CTL, value); + timer = vcpu_vtimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; case KVM_REG_ARM_TIMER_CNT: + timer = vcpu_vtimer(vcpu); update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); break; case KVM_REG_ARM_TIMER_CVAL: - kvm_arm_timer_write(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CVAL, value); + timer = vcpu_vtimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); break; case KVM_REG_ARM_PTIMER_CTL: - kvm_arm_timer_write(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CTL, value); + timer = vcpu_ptimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; case KVM_REG_ARM_PTIMER_CVAL: - kvm_arm_timer_write(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CVAL, value); + timer = vcpu_ptimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); break; default: return -1; } - kvm_timer_update_state(vcpu); + level = kvm_timer_should_fire(timer); + kvm_timer_update_irq(vcpu, level, timer); + timer_emulate(timer); + return 0; } @@ -1020,8 +1039,7 @@ bool kvm_arch_timer_get_input_level(int vintid) int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct timer_map map; int ret; if (timer->enabled) @@ -1039,18 +1057,25 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, + get_timer_map(vcpu, &map); + + ret = kvm_vgic_map_phys_irq(vcpu, + map.direct_vtimer->host_timer_irq, + map.direct_vtimer->irq.irq, kvm_arch_timer_get_input_level); if (ret) return ret; - if (has_vhe()) { - ret = kvm_vgic_map_phys_irq(vcpu, host_ptimer_irq, ptimer->irq.irq, + if (map.direct_ptimer) { + ret = kvm_vgic_map_phys_irq(vcpu, + map.direct_ptimer->host_timer_irq, + map.direct_ptimer->irq.irq, kvm_arch_timer_get_input_level); - if (ret) - return ret; } + if (ret) + return ret; + no_vgic: timer->enabled = 1; return 0; diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 3828beab93f2..54bb059243b9 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -2,6 +2,7 @@ #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVM_H +#include #include #undef TRACE_SYSTEM @@ -262,6 +263,110 @@ TRACE_EVENT(kvm_timer_update_irq, __entry->vcpu_id, __entry->irq, __entry->level) ); +TRACE_EVENT(kvm_get_timer_map, + TP_PROTO(unsigned long vcpu_id, struct timer_map *map), + TP_ARGS(vcpu_id, map), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( int, direct_vtimer ) + __field( int, direct_ptimer ) + __field( int, emul_ptimer ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); + __entry->direct_ptimer = + (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; + __entry->emul_ptimer = + (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; + ), + + TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", + __entry->vcpu_id, + __entry->direct_vtimer, + __entry->direct_ptimer, + __entry->emul_ptimer) +); + +TRACE_EVENT(kvm_timer_save_state, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( unsigned long, ctl ) + __field( unsigned long long, cval ) + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->ctl = ctx->cnt_ctl; + __entry->cval = ctx->cnt_cval; + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk(" CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", + __entry->ctl, + __entry->cval, + __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_restore_state, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( unsigned long, ctl ) + __field( unsigned long long, cval ) + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->ctl = ctx->cnt_ctl; + __entry->cval = ctx->cnt_cval; + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", + __entry->ctl, + __entry->cval, + __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_hrtimer_expire, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_emulate, + TP_PROTO(struct arch_timer_context *ctx, bool should_fire), + TP_ARGS(ctx, should_fire), + + TP_STRUCT__entry( + __field( int, timer_idx ) + __field( bool, should_fire ) + ), + + TP_fast_assign( + __entry->timer_idx = arch_timer_ctx_index(ctx); + __entry->should_fire = should_fire; + ), + + TP_printk("arch_timer_ctx_index: %d (should_fire: %d)", + __entry->timer_idx, __entry->should_fire) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3-55-g7522 From 64cf98fa5544aee6c547786ee32f92b796b30635 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 1 May 2016 22:29:58 +0200 Subject: KVM: arm/arm64: Move kvm_is_write_fault to header file Move this little function to the header files for arm/arm64 so other code can make use of it directly. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_emulate.h | 8 ++++++++ arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++ virt/kvm/arm/mmu.c | 8 -------- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 77121b713bef..8927cae7c966 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -265,6 +265,14 @@ static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) } } +static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 506386a3edde..a0d1ce9ae12b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -331,6 +331,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) return ESR_ELx_SYS64_ISS_RT(esr); } +static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index f8dda452ea24..e3e5a26b4845 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1403,14 +1403,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) return false; } -static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) -{ - if (kvm_vcpu_trap_is_iabt(vcpu)) - return false; - - return kvm_vcpu_dabt_iswrite(vcpu); -} - /** * stage2_wp_ptes - write protect PMD range * @pmd: pointer to pmd entry -- cgit v1.2.3-55-g7522 From bae561c0cff7e2cde99990ac3b5a58f815d6789d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 20 Jan 2019 20:32:31 +0000 Subject: KVM: arm/arm64: arch_timer: Mark physical interrupt active when a virtual interrupt is pending When a guest gets scheduled, KVM performs a "load" operation, which for the timer includes evaluating the virtual "active" state of the interrupt, and replicating it on the physical side. This ensures that the deactivation in the guest will also take place in the physical GIC distributor. If the interrupt is not yet active, we flag it as inactive on the physical side. This means that on restoring the timer registers, if the timer has expired, we'll immediately take an interrupt. That's absolutely fine, as the interrupt will then be flagged as active on the physical side. What this assumes though is that we'll enter the guest right after having taken the interrupt, and that the guest will quickly ACK the interrupt, making it active at on the virtual side. It turns out that quite often, this assumption doesn't really hold. The guest may be preempted on the back on this interrupt, either from kernel space or whilst running at EL1 when a host interrupt fires. When this happens, we repeat the whole sequence on the next load (interrupt marked as inactive, timer registers restored, interrupt fires). And if it takes a really long time for a guest to activate the interrupt (as it does with nested virt), we end-up with many such events in quick succession, leading to the guest only making very slow progress. This can also be seen with the number of virtual timer interrupt on the host being far greater than the same number in the guest. An easy way to fix this is to evaluate the timer state when performing the "load" operation, just like we do when the interrupt actually fires. If the timer has a pending virtual interrupt at this stage, then we can safely flag the physical interrupt as being active, which prevents spurious exits. Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 17f9de73cc8a..af8f2f1d01cc 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -487,12 +487,21 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { struct kvm_vcpu *vcpu = ctx->vcpu; - bool phys_active; + bool phys_active = false; + + /* + * Update the timer output so that it is likely to match the + * state we're about to restore. If the timer expires between + * this point and the register restoration, we'll take the + * interrupt anyway. + */ + kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); - else - phys_active = ctx->irq.level; + + phys_active |= ctx->irq.level; + set_timer_irq_phys_active(ctx, phys_active); } -- cgit v1.2.3-55-g7522 From 49dfe94fe5ad97d380f81544b6d3626b076a1ef6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 25 Jan 2019 16:57:28 +0900 Subject: KVM: arm/arm64: Fix TRACE_INCLUDE_PATH As the comment block in include/trace/define_trace.h says, TRACE_INCLUDE_PATH should be a relative path to the define_trace.h ../../virt/kvm/arm is the correct relative path. ../../../virt/kvm/arm is working by coincidence because the top Makefile adds -I$(srctree)/arch/$(SRCARCH)/include as a header search path, but we should not rely on it. Acked-by: Christoffer Dall Signed-off-by: Masahiro Yamada Signed-off-by: Marc Zyngier --- virt/kvm/arm/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 54bb059243b9..204d210d01c2 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -370,7 +370,7 @@ TRACE_EVENT(kvm_timer_emulate, #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm +#define TRACE_INCLUDE_PATH ../../virt/kvm/arm #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace -- cgit v1.2.3-55-g7522 From a37f0c3c46d2d5f4e94bb1e80e3f6f1f8fd89b40 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Feb 2019 22:36:26 +0000 Subject: KVM: arm/arm64: fix spelling mistake: "auxilary" -> "auxiliary" There is a spelling mistake in a kvm_err error message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9fbdb9e1c51f..8de55041e7ba 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1553,7 +1553,7 @@ static int init_hyp_mode(void) err = hyp_map_aux_data(); if (err) - kvm_err("Cannot map host auxilary data: %d\n", err); + kvm_err("Cannot map host auxiliary data: %d\n", err); return 0; -- cgit v1.2.3-55-g7522 From c2be79a0bcf34ed975787e03a129936df17ec0dc Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 19 Feb 2019 17:22:21 +0800 Subject: KVM: arm/arm64: Remove unused gpa_end variable The 'gpa_end' local variable is never used and let's remove it. Cc: Christoffer Dall Signed-off-by: Shaokun Zhang Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index e3e5a26b4845..aae68da2717f 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1597,14 +1597,13 @@ static void kvm_send_hwpoison_signal(unsigned long address, static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, unsigned long hva) { - gpa_t gpa_start, gpa_end; + gpa_t gpa_start; hva_t uaddr_start, uaddr_end; size_t size; size = memslot->npages * PAGE_SIZE; gpa_start = memslot->base_gfn << PAGE_SHIFT; - gpa_end = gpa_start + size; uaddr_start = memslot->userspace_addr; uaddr_end = uaddr_start + size; -- cgit v1.2.3-55-g7522 From 90952cd388595317170ad22a6bcee3cb8cde4942 Mon Sep 17 00:00:00 2001 From: Gustavo A. R. Silva Date: Wed, 30 Jan 2019 17:07:47 +0100 Subject: kvm: Use struct_size() in kmalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kmalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kmalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 585845203db8..791ced69dd0b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3714,8 +3714,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; - new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * - sizeof(struct kvm_io_range)), GFP_KERNEL); + new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), + GFP_KERNEL); if (!new_bus) return -ENOMEM; @@ -3760,8 +3760,8 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, if (i == bus->dev_count) return; - new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * - sizeof(struct kvm_io_range)), GFP_KERNEL); + new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), + GFP_KERNEL); if (!new_bus) { pr_err("kvm: failed to shrink bus, removing it completely\n"); goto broken; -- cgit v1.2.3-55-g7522 From b12ce36a43f29dbff0bca14c5a51c276aea5662f Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 11 Feb 2019 11:02:49 -0800 Subject: kvm: Add memcg accounting to KVM allocations There are many KVM kernel memory allocations which are tied to the life of the VM process and should be charged to the VM process's cgroup. If the allocations aren't tied to the process, the OOM killer will not know that killing the process will free the associated kernel memory. Add __GFP_ACCOUNT flags to many of the allocations which are not yet being charged to the VM process's cgroup. Tested: Ran all kvm-unit-tests on a 64 bit Haswell machine, the patch introduced no new failures. Ran a kernel memory accounting test which creates a VM to touch memory and then checks that the kernel memory allocated for the process is within certain bounds. With this patch we account for much more of the vmalloc and slab memory allocated for the VM. There remain a few allocations which should be charged to the VM's cgroup but are not. In they include: vcpu->run kvm->coalesced_mmio_ring There allocations are unaccounted in this patch because they are mapped to userspace, and accounting them to a cgroup causes problems. This should be addressed in a future patch. Signed-off-by: Ben Gardon Reviewed-by: Shakeel Butt Signed-off-by: Paolo Bonzini --- virt/kvm/coalesced_mmio.c | 3 ++- virt/kvm/eventfd.c | 7 ++++--- virt/kvm/irqchip.c | 4 ++-- virt/kvm/kvm_main.c | 29 +++++++++++++++-------------- virt/kvm/vfio.c | 4 ++-- 5 files changed, 25 insertions(+), 22 deletions(-) (limited to 'virt') diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 6855cce3e528..5294abb3f178 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -144,7 +144,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, if (zone->pio != 1 && zone->pio != 0) return -EINVAL; - dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), + GFP_KERNEL_ACCOUNT); if (!dev) return -ENOMEM; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b20b751286fc..4325250afd72 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -297,7 +297,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (!kvm_arch_intc_initialized(kvm)) return -EAGAIN; - irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); if (!irqfd) return -ENOMEM; @@ -345,7 +345,8 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } if (!irqfd->resampler) { - resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); + resampler = kzalloc(sizeof(*resampler), + GFP_KERNEL_ACCOUNT); if (!resampler) { ret = -ENOMEM; mutex_unlock(&kvm->irqfds.resampler_lock); @@ -797,7 +798,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, if (IS_ERR(eventfd)) return PTR_ERR(eventfd); - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT); if (!p) { ret = -ENOMEM; goto fail; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b1286c4e0712..3547b0d8c91e 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -196,7 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm, nr_rt_entries += 1; new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!new) return -ENOMEM; @@ -208,7 +208,7 @@ int kvm_set_irq_routing(struct kvm *kvm, for (i = 0; i < nr; ++i) { r = -ENOMEM; - e = kzalloc(sizeof(*e), GFP_KERNEL); + e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT); if (!e) goto out; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 791ced69dd0b..0a0ea8f4bb1b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -525,7 +525,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) int i; struct kvm_memslots *slots; - slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); if (!slots) return NULL; @@ -601,12 +601,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, sizeof(*kvm->debugfs_stat_data), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!kvm->debugfs_stat_data) return -ENOMEM; for (p = debugfs_entries; p->name; p++) { - stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); + stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) return -ENOMEM; @@ -671,7 +671,7 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err_no_irq_srcu; for (i = 0; i < KVM_NR_BUSES; i++) { rcu_assign_pointer(kvm->buses[i], - kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); + kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); if (!kvm->buses[i]) goto out_err; } @@ -789,7 +789,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); + memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM; @@ -1018,7 +1018,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); if (!slots) goto out_free; memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); @@ -2683,7 +2683,7 @@ static long kvm_vcpu_ioctl(struct file *filp, struct kvm_regs *kvm_regs; r = -ENOMEM; - kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT); if (!kvm_regs) goto out; r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); @@ -2711,7 +2711,8 @@ out_free1: break; } case KVM_GET_SREGS: { - kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); + kvm_sregs = kzalloc(sizeof(struct kvm_sregs), + GFP_KERNEL_ACCOUNT); r = -ENOMEM; if (!kvm_sregs) goto out; @@ -2803,7 +2804,7 @@ out_free1: break; } case KVM_GET_FPU: { - fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); + fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT); r = -ENOMEM; if (!fpu) goto out; @@ -2980,7 +2981,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, if (test) return 0; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT); if (!dev) return -ENOMEM; @@ -3715,7 +3716,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, return -ENOSPC; new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!new_bus) return -ENOMEM; @@ -3761,7 +3762,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, return; new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!new_bus) { pr_err("kvm: failed to shrink bus, removing it completely\n"); goto broken; @@ -4029,7 +4030,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) active = kvm_active_vms; spin_unlock(&kvm_lock); - env = kzalloc(sizeof(*env), GFP_KERNEL); + env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); if (!env) return; @@ -4045,7 +4046,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) add_uevent_var(env, "PID=%d", kvm->userspace_pid); if (kvm->debugfs_dentry) { - char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); + char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d99850c462a1..524cbd20379f 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -219,7 +219,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) } } - kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT); if (!kvg) { mutex_unlock(&kv->lock); kvm_vfio_group_put_external_user(vfio_group); @@ -405,7 +405,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) if (tmp->ops == &kvm_vfio_ops) return -EBUSY; - kv = kzalloc(sizeof(*kv), GFP_KERNEL); + kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT); if (!kv) return -ENOMEM; -- cgit v1.2.3-55-g7522 From 152482580a1b0accb60676063a1ac57b2d12daf6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Feb 2019 12:54:17 -0800 Subject: KVM: Call kvm_arch_memslots_updated() before updating memslots kvm_arch_memslots_updated() is at this point in time an x86-specific hook for handling MMIO generation wraparound. x86 stashes 19 bits of the memslots generation number in its MMIO sptes in order to avoid full page fault walks for repeat faults on emulated MMIO addresses. Because only 19 bits are used, wrapping the MMIO generation number is possible, if unlikely. kvm_arch_memslots_updated() alerts x86 that the generation has changed so that it can invalidate all MMIO sptes in case the effective MMIO generation has wrapped so as to avoid using a stale spte, e.g. a (very) old spte that was created with generation==0. Given that the purpose of kvm_arch_memslots_updated() is to prevent consuming stale entries, it needs to be called before the new generation is propagated to memslots. Invalidating the MMIO sptes after updating memslots means that there is a window where a vCPU could dereference the new memslots generation, e.g. 0, and incorrectly reuse an old MMIO spte that was created with (pre-wrap) generation==0. Fixes: e59dbe09f8e6 ("KVM: Introduce kvm_arch_memslots_updated()") Cc: Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- virt/kvm/arm/mmu.c | 2 +- virt/kvm/kvm_main.c | 7 +++++-- 9 files changed, 15 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d2abd98471e8..41204a49cf95 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -1134,7 +1134,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0f98f00da2ea..19693b8add93 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -837,7 +837,7 @@ struct kvm_vcpu_arch { static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d5d24889c3bc..c2b8c8c6c9be 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -878,7 +878,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0e2ef41efb9d..c4758e1a8843 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1254,7 +1254,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); -void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots); +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 415d0e62cb3e..a53a0e7ad9e6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5893,13 +5893,13 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); } -void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { /* * The very rare case: if the generation-number is round, * zap all shadow pages. */ - if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) { + if (unlikely((gen & MMIO_GEN_MASK) == 0)) { kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3de586f89730..03d26ffb29cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9357,13 +9357,13 @@ out_free: return -ENOMEM; } -void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ - kvm_mmu_invalidate_mmio_sptes(kvm, slots); + kvm_mmu_invalidate_mmio_sptes(kvm, gen); } int kvm_arch_prepare_memory_region(struct kvm *kvm, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c38cc5eb7e73..cf761ff58224 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -634,7 +634,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages); -void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index fbdf3ac2f001..e0355e0f8712 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2350,7 +2350,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0a0ea8f4bb1b..d54f6578a849 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -874,6 +874,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, int as_id, struct kvm_memslots *slots) { struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); + u64 gen; /* * Set the low bit in the generation, which disables SPTE caching @@ -896,9 +897,11 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * space 0 will use generations 0, 4, 8, ... while * address space 1 will * use generations 2, 6, 10, 14, ... */ - slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; + gen = slots->generation + KVM_ADDRESS_SPACE_NUM * 2 - 1; - kvm_arch_memslots_updated(kvm, slots); + kvm_arch_memslots_updated(kvm, gen); + + slots->generation = gen; return old_memslots; } -- cgit v1.2.3-55-g7522 From 361209e054a2c9f34da090ee1ee4c1e8bfe76a64 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Feb 2019 13:01:14 -0800 Subject: KVM: Explicitly define the "memslot update in-progress" bit KVM uses bit 0 of the memslots generation as an "update in-progress" flag, which is used by x86 to prevent caching MMIO access while the memslots are changing. Although the intended behavior is flag-like, e.g. MMIO sptes intentionally drop the in-progress bit so as to avoid caching data from in-flux memslots, the implementation oftentimes treats the bit as part of the generation number itself, e.g. incrementing the generation increments twice, once to set the flag and once to clear it. Prior to commit 4bd518f1598d ("KVM: use separate generations for each address space"), incorporating the "update in-progress" bit into the generation number largely made sense, e.g. "real" generations are even, "bogus" generations are odd, most code doesn't need to be aware of the bit, etc... Now that unique memslots generation numbers are assigned to each address space, stealthing the in-progress status into the generation number results in a wide variety of subtle code, e.g. kvm_create_vm() jumps over bit 0 when initializing the memslots generation without any hint as to why. Explicitly define the flag and convert as much code as possible (which isn't much) to actually treat it like a flag. This paves the way for eventually using a different bit for "update in-progress" so that it can be a flag in truth instead of a awkward extension to the generation number. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.h | 2 +- include/linux/kvm_host.h | 21 +++++++++++++++++++++ virt/kvm/kvm_main.c | 26 +++++++++++++------------- 3 files changed, 35 insertions(+), 14 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 20ede17202bf..28406aa1136d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -183,7 +183,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, { u64 gen = kvm_memslots(vcpu->kvm)->generation; - if (unlikely(gen & 1)) + if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) return; /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cf761ff58224..5e1cb74922b3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -48,6 +48,27 @@ */ #define KVM_MEMSLOT_INVALID (1UL << 16) +/* + * Bit 0 of the memslot generation number is an "update in-progress flag", + * e.g. is temporarily set for the duration of install_new_memslots(). + * This flag effectively creates a unique generation number that is used to + * mark cached memslot data, e.g. MMIO accesses, as potentially being stale, + * i.e. may (or may not) have come from the previous memslots generation. + * + * This is necessary because the actual memslots update is not atomic with + * respect to the generation number update. Updating the generation number + * first would allow a vCPU to cache a spte from the old memslots using the + * new generation number, and updating the generation number after switching + * to the new memslots would allow cache hits using the old generation number + * to reference the defunct memslots. + * + * This mechanism is used to prevent getting hits in KVM's caches while a + * memslot update is in-progress, and to prevent cache hits *after* updating + * the actual generation number against accesses that were inserted into the + * cache *before* the memslots were updated. + */ +#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(0) + /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d54f6578a849..0f1f1c7c7a36 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -874,30 +874,30 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, int as_id, struct kvm_memslots *slots) { struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); - u64 gen; + u64 gen = old_memslots->generation; - /* - * Set the low bit in the generation, which disables SPTE caching - * until the end of synchronize_srcu_expedited. - */ - WARN_ON(old_memslots->generation & 1); - slots->generation = old_memslots->generation + 1; + WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); + slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; rcu_assign_pointer(kvm->memslots[as_id], slots); synchronize_srcu_expedited(&kvm->srcu); /* - * Increment the new memslot generation a second time. This prevents - * vm exits that race with memslot updates from caching a memslot - * generation that will (potentially) be valid forever. - * + * Increment the new memslot generation a second time, dropping the + * update in-progress flag and incrementing then generation based on + * the number of address spaces. This provides a unique and easily + * identifiable generation number while the memslots are in flux. + */ + gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; + + /* * Generations must be unique even across address spaces. We do not need * a global counter for that, instead the generation space is evenly split * across address spaces. For example, with two address spaces, address - * space 0 will use generations 0, 4, 8, ... while * address space 1 will + * space 0 will use generations 0, 4, 8, ... while address space 1 will * use generations 2, 6, 10, 14, ... */ - gen = slots->generation + KVM_ADDRESS_SPACE_NUM * 2 - 1; + gen += KVM_ADDRESS_SPACE_NUM * 2; kvm_arch_memslots_updated(kvm, gen); -- cgit v1.2.3-55-g7522 From 0e32958ec449a9bb63c031ed04ac7a494ea1bc1c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Feb 2019 13:01:17 -0800 Subject: KVM: Remove the hack to trigger memslot generation wraparound x86 captures a subset of the memslot generation (19 bits) in its MMIO sptes so that it can expedite emulated MMIO handling by checking only the releveant spte, i.e. doesn't need to do a full page fault walk. Because the MMIO sptes capture only 19 bits (due to limited space in the sptes), there is a non-zero probability that the MMIO generation could wrap, e.g. after 500k memslot updates. Since normal usage is extremely unlikely to result in 500k memslot updates, a hack was added by commit 69c9ea93eaea ("KVM: MMU: init kvm generation close to mmio wrap-around value") to offset the MMIO generation in order to trigger a wraparound, e.g. after 150 memslot updates. When separate memslot generation sequences were assigned to each address space, commit 00f034a12fdd ("KVM: do not bias the generation number in kvm_current_mmio_generation") moved the offset logic into the initialization of the memslot generation itself so that the per-address space bit(s) were not dropped/corrupted by the MMIO shenanigans. Remove the offset hack for three reasons: - While it does exercise x86's kvm_mmu_invalidate_mmio_sptes(), simply wrapping the generation doesn't actually test the interesting case of having stale MMIO sptes with the new generation number, e.g. old sptes with a generation number of 0. - Triggering kvm_mmu_invalidate_mmio_sptes() prematurely makes its performance rather important since the probability of invalidating MMIO sptes jumps from "effectively never" to "fairly likely". This limits what can be done in future patches, e.g. to simplify the invalidation code, as doing so without proper caution could lead to a noticeable performance regression. - Forcing the memslots generation, which is a 64-bit number, to wrap prevents KVM from assuming the memslots generation will never wrap. This in turn prevents KVM from using an arbitrary bit for the "update in-progress" flag, e.g. using bit 63 would immediately collide with using a large value as the starting generation number. The "update in-progress" flag is effectively forced into bit 0 so that it's (subtly) taken into account when incrementing the generation. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0f1f1c7c7a36..5c2e7e173a46 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -656,12 +656,8 @@ static struct kvm *kvm_create_vm(unsigned long type) struct kvm_memslots *slots = kvm_alloc_memslots(); if (!slots) goto out_err_no_srcu; - /* - * Generations must be different for each address space. - * Init kvm generation close to the maximum to easily test the - * code of handling generation number wrap-around. - */ - slots->generation = i * 2 - 150; + /* Generations must be different for each address space. */ + slots->generation = i * 2; rcu_assign_pointer(kvm->memslots[i], slots); } -- cgit v1.2.3-55-g7522 From 164bf7e56c5a73f2f819c39ba7e0f20e0f97dc7b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Feb 2019 13:01:18 -0800 Subject: KVM: Move the memslot update in-progress flag to bit 63 ...now that KVM won't explode by moving it out of bit 0. Using bit 63 eliminates the need to jump over bit 0, e.g. when calculating a new memslots generation or when propagating the memslots generation to an MMIO spte. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/mmu.txt | 13 ++++++++----- arch/x86/kvm/mmu.c | 31 ++++++++++++------------------- include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 8 ++++---- 4 files changed, 26 insertions(+), 30 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index e507a9e0421e..367a952f50ab 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -452,13 +452,16 @@ stored into the MMIO spte. Thus, the MMIO spte might be created based on out-of-date information, but with an up-to-date generation number. To avoid this, the generation number is incremented again after synchronize_srcu -returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a +returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a memslot update, while some SRCU readers might be using the old copy. We do not want to use an MMIO sptes created with an odd generation number, and we can do -this without losing a bit in the MMIO spte. The low bit of the generation -is not stored in MMIO spte, and presumed zero when it is extracted out of the -spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1, -the next access to the spte will always be a cache miss. +this without losing a bit in the MMIO spte. The "update in-progress" bit of the +generation is not stored in MMIO spte, and is so is implicitly zero when the +generation is extracted out of the spte. If KVM is unlucky and creates an MMIO +spte while an update is in-progress, the next access to the spte will always be +a cache miss. For example, a subsequent access during the update window will +miss due to the in-progress flag diverging, while an access after the update +window closes will have a higher generation number (as compared to the spte). Further reading diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 364b2a737d94..bcf62e1e1ff7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -335,18 +335,17 @@ static inline bool is_access_track_spte(u64 spte) * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of * the memslots generation and is derived as follows: * - * Bits 1-9 of the memslot generation are propagated to spte bits 3-11 - * Bits 10-19 of the memslot generation are propagated to spte bits 52-61 + * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 + * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61 * - * The MMIO generation starts at bit 1 of the memslots generation in order to - * skip over bit 0, the KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag. Including - * the flag would require stealing a bit from the "real" generation number and - * thus effectively halve the maximum number of MMIO generations that can be - * handled before encountering a wrap (which requires a full MMU zap). The - * flag is instead explicitly queried when checking for MMIO spte cache hits. + * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in + * the MMIO generation number, as doing so would require stealing a bit from + * the "real" generation number and thus effectively halve the maximum number + * of MMIO generations that can be handled before encountering a wrap (which + * requires a full MMU zap). The flag is instead explicitly queried when + * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(19, 1) -#define MMIO_SPTE_GEN_SHIFT 1 +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 @@ -363,8 +362,6 @@ static u64 generation_mmio_spte_mask(u64 gen) WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); - gen >>= MMIO_SPTE_GEN_SHIFT; - mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; return mask; @@ -378,7 +375,7 @@ static u64 get_mmio_spte_generation(u64 spte) gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; - return gen << MMIO_SPTE_GEN_SHIFT; + return gen; } static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, @@ -5905,13 +5902,9 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { - gen &= MMIO_SPTE_GEN_MASK; + WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); - /* - * Shift to adjust for the "update in-progress" flag, which isn't - * included in the MMIO generation number. - */ - gen >>= MMIO_SPTE_GEN_SHIFT; + gen &= MMIO_SPTE_GEN_MASK; /* * Generation numbers are incremented in multiples of the number of diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5e1cb74922b3..85c0c00d5159 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -49,7 +49,7 @@ #define KVM_MEMSLOT_INVALID (1UL << 16) /* - * Bit 0 of the memslot generation number is an "update in-progress flag", + * Bit 63 of the memslot generation number is an "update in-progress flag", * e.g. is temporarily set for the duration of install_new_memslots(). * This flag effectively creates a unique generation number that is used to * mark cached memslot data, e.g. MMIO accesses, as potentially being stale, @@ -67,7 +67,7 @@ * the actual generation number against accesses that were inserted into the * cache *before* the memslots were updated. */ -#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(0) +#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(63) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5c2e7e173a46..c9d0bc01f8cb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -657,7 +657,7 @@ static struct kvm *kvm_create_vm(unsigned long type) if (!slots) goto out_err_no_srcu; /* Generations must be different for each address space. */ - slots->generation = i * 2; + slots->generation = i; rcu_assign_pointer(kvm->memslots[i], slots); } @@ -890,10 +890,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * Generations must be unique even across address spaces. We do not need * a global counter for that, instead the generation space is evenly split * across address spaces. For example, with two address spaces, address - * space 0 will use generations 0, 4, 8, ... while address space 1 will - * use generations 2, 6, 10, 14, ... + * space 0 will use generations 0, 2, 4, ... while address space 1 will + * use generations 1, 3, 5, ... */ - gen += KVM_ADDRESS_SPACE_NUM * 2; + gen += KVM_ADDRESS_SPACE_NUM; kvm_arch_memslots_updated(kvm, gen); -- cgit v1.2.3-55-g7522 From 7fa08e71b4a0591a518814fa78b32e124f90d587 Mon Sep 17 00:00:00 2001 From: Nir Weiner Date: Sun, 27 Jan 2019 12:17:14 +0200 Subject: KVM: grow_halt_poll_ns() should never shrink vCPU halt_poll_ns grow_halt_poll_ns() have a strange behavior in case (halt_poll_ns_grow == 0) && (vcpu->halt_poll_ns != 0). In this case, vcpu->halt_pol_ns will be set to zero. That results in shrinking instead of growing. Fix issue by changing grow_halt_poll_ns() to not modify vcpu->halt_poll_ns in case halt_poll_ns_grow is zero Reviewed-by: Boris Ostrovsky Reviewed-by: Liran Alon Signed-off-by: Nir Weiner Suggested-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv.c | 5 ++++- virt/kvm/kvm_main.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5a066fc299e1..e316a2ddb70b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3631,8 +3631,11 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, static void grow_halt_poll_ns(struct kvmppc_vcore *vc) { + if (!halt_poll_ns_grow) + return; + /* 10us base */ - if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) + if (vc->halt_poll_ns == 0) vc->halt_poll_ns = 10000; else vc->halt_poll_ns *= halt_poll_ns_grow; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c9d0bc01f8cb..9c8a8bf6e686 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2188,8 +2188,11 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) old = val = vcpu->halt_poll_ns; grow = READ_ONCE(halt_poll_ns_grow); + if (!grow) + goto out; + /* 10us base */ - if (val == 0 && grow) + if (val == 0) val = 10000; else val *= grow; @@ -2198,6 +2201,7 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) val = halt_poll_ns; vcpu->halt_poll_ns = val; +out: trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); } -- cgit v1.2.3-55-g7522 From 49113d360bdeb4dd916fb6bffbcc3e157422b6fd Mon Sep 17 00:00:00 2001 From: Nir Weiner Date: Sun, 27 Jan 2019 12:17:15 +0200 Subject: KVM: Expose the initial start value in grow_halt_poll_ns() as a module parameter The hard-coded value 10000 in grow_halt_poll_ns() stands for the initial start value when raising up vcpu->halt_poll_ns. It actually sets the first timeout to the first polling session. This value has significant effect on how tolerant we are to outliers. On the standard case, higher value is better - we will spend more time in the polling busyloop, handle events/interrupts faster and result in better performance. But on outliers it puts us in a busy loop that does nothing. Even if the shrink factor is zero, we will still waste time on the first iteration. The optimal value changes between different workloads. It depends on outliers rate and polling sessions length. As this value has significant effect on the dynamic halt-polling algorithm, it should be configurable and exposed. Reviewed-by: Boris Ostrovsky Reviewed-by: Liran Alon Signed-off-by: Nir Weiner Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/halt-polling.txt | 37 +++++++++++++++++++----------- arch/powerpc/kvm/book3s_hv.c | 3 +-- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 8 +++++-- 4 files changed, 31 insertions(+), 18 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virtual/kvm/halt-polling.txt index 4a8418318769..4f791b128dd2 100644 --- a/Documentation/virtual/kvm/halt-polling.txt +++ b/Documentation/virtual/kvm/halt-polling.txt @@ -53,7 +53,8 @@ the global max polling interval then the polling interval can be increased in the hope that next time during the longer polling interval the wake up source will be received while the host is polling and the latency benefits will be received. The polling interval is grown in the function grow_halt_poll_ns() and -is multiplied by the module parameter halt_poll_ns_grow. +is multiplied by the module parameters halt_poll_ns_grow and +halt_poll_ns_grow_start. In the event that the total block time was greater than the global max polling interval then the host will never poll for long enough (limited by the global @@ -80,22 +81,30 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the powerpc kvm-hv case. -Module Parameter | Description | Default Value +Module Parameter | Description | Default Value -------------------------------------------------------------------------------- -halt_poll_ns | The global max polling interval | KVM_HALT_POLL_NS_DEFAULT - | which defines the ceiling value | - | of the polling interval for | (per arch value) - | each vcpu. | +halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT + | interval which defines | + | the ceiling value of the | + | polling interval for | (per arch value) + | each vcpu. | -------------------------------------------------------------------------------- -halt_poll_ns_grow | The value by which the halt | 2 - | polling interval is multiplied | - | in the grow_halt_poll_ns() | - | function. | +halt_poll_ns_grow | The value by which the | 2 + | halt polling interval is | + | multiplied in the | + | grow_halt_poll_ns() | + | function. | -------------------------------------------------------------------------------- -halt_poll_ns_shrink | The value by which the halt | 0 - | polling interval is divided in | - | the shrink_halt_poll_ns() | - | function. | +halt_poll_ns_grow_start | The initial value to grow | 10000 + | to from zero in the | + | grow_halt_poll_ns() | + | function. | +-------------------------------------------------------------------------------- +halt_poll_ns_shrink | The value by which the | 0 + | halt polling interval is | + | divided in the | + | shrink_halt_poll_ns() | + | function. | -------------------------------------------------------------------------------- These module parameters can be set from the debugfs files in: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e316a2ddb70b..29ffc99bd79b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3634,9 +3634,8 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc) if (!halt_poll_ns_grow) return; - /* 10us base */ if (vc->halt_poll_ns == 0) - vc->halt_poll_ns = 10000; + vc->halt_poll_ns = halt_poll_ns_grow_start; else vc->halt_poll_ns *= halt_poll_ns_grow; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 85c0c00d5159..9d55c63db09b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1203,6 +1203,7 @@ extern bool kvm_rebooting; extern unsigned int halt_poll_ns; extern unsigned int halt_poll_ns_grow; +extern unsigned int halt_poll_ns_grow_start; extern unsigned int halt_poll_ns_shrink; struct kvm_device { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9c8a8bf6e686..ae818d27a1a4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2; module_param(halt_poll_ns_grow, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_grow); +/* The start value to grow halt_poll_ns from */ +unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ +module_param(halt_poll_ns_grow_start, uint, 0644); +EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start); + /* Default resets per-vcpu halt_poll_ns . */ unsigned int halt_poll_ns_shrink; module_param(halt_poll_ns_shrink, uint, 0644); @@ -2191,9 +2196,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (!grow) goto out; - /* 10us base */ if (val == 0) - val = 10000; + val = halt_poll_ns_grow_start; else val *= grow; -- cgit v1.2.3-55-g7522 From dee339b5c1da3e6fa139b97f74f99dc9f0b03ff6 Mon Sep 17 00:00:00 2001 From: Nir Weiner Date: Sun, 27 Jan 2019 12:17:16 +0200 Subject: KVM: Never start grow vCPU halt_poll_ns from value below halt_poll_ns_grow_start grow_halt_poll_ns() have a strange behaviour in case (vcpu->halt_poll_ns != 0) && (vcpu->halt_poll_ns < halt_poll_ns_grow_start). In this case, vcpu->halt_poll_ns will be multiplied by grow factor (halt_poll_ns_grow) which will require several grow iteration in order to reach a value bigger than halt_poll_ns_grow_start. This means that growing vcpu->halt_poll_ns from value of 0 is slower than growing it from a positive value less than halt_poll_ns_grow_start. Which is misleading and inaccurate. Fix issue by changing grow_halt_poll_ns() to set vcpu->halt_poll_ns to halt_poll_ns_grow_start in any case that (vcpu->halt_poll_ns < halt_poll_ns_grow_start). Regardless if vcpu->halt_poll_ns is 0. use READ_ONCE to get a consistent number for all cases. Reviewed-by: Boris Ostrovsky Reviewed-by: Liran Alon Signed-off-by: Nir Weiner Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv.c | 5 ++--- virt/kvm/kvm_main.c | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 29ffc99bd79b..062f3c92c871 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3634,10 +3634,9 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc) if (!halt_poll_ns_grow) return; - if (vc->halt_poll_ns == 0) + vc->halt_poll_ns *= halt_poll_ns_grow; + if (vc->halt_poll_ns < halt_poll_ns_grow_start) vc->halt_poll_ns = halt_poll_ns_grow_start; - else - vc->halt_poll_ns *= halt_poll_ns_grow; } static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ae818d27a1a4..5087cf703ed1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2189,17 +2189,17 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { - unsigned int old, val, grow; + unsigned int old, val, grow, grow_start; old = val = vcpu->halt_poll_ns; + grow_start = READ_ONCE(halt_poll_ns_grow_start); grow = READ_ONCE(halt_poll_ns_grow); if (!grow) goto out; - if (val == 0) - val = halt_poll_ns_grow_start; - else - val *= grow; + val *= grow; + if (val < grow_start) + val = grow_start; if (val > halt_poll_ns) val = halt_poll_ns; -- cgit v1.2.3-55-g7522 From a67794cafbc4594debf53dbe4e2a7708426f492e Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Sat, 2 Feb 2019 17:20:27 +0800 Subject: Revert "KVM: Eliminate extra function calls in kvm_get_dirty_log_protect()" The value of "dirty_bitmap[i]" is already check before setting its value to mask. The following check of "mask" is redundant. The check of "mask" was introduced by commit 58d2930f4ee3 ("KVM: Eliminate extra function calls in kvm_get_dirty_log_protect()"), revert it. Signed-off-by: Lan Tianyu Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5087cf703ed1..276af92ace6c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1205,11 +1205,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, mask = xchg(&dirty_bitmap[i], 0); dirty_bitmap_buffer[i] = mask; - if (mask) { - offset = i * BITS_PER_LONG; - kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, - offset, mask); - } + offset = i * BITS_PER_LONG; + kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, + offset, mask); } spin_unlock(&kvm->mmu_lock); } -- cgit v1.2.3-55-g7522 From 7f5d9c1bc0e6c13ba01546e9c6fda083764a4e9c Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 22 Feb 2019 14:34:29 +0800 Subject: KVM: arm/arm64: Remove unused timer variable The 'timer' local variable became unused after commit bee038a67487 ("KVM: arm/arm64: Rework the timer code to use a timer_map"). Remove it to avoid [-Wunused-but-set-variable] warning. Cc: Christoffer Dall Cc: James Morse Cc: Suzuki K Pouloze Reviewed-by: Julien Thierry Signed-off-by: Shaokun Zhang Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index af8f2f1d01cc..3417f2dbc366 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -236,14 +236,12 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { - struct arch_timer_cpu *timer; enum kvm_arch_timers index; u64 cval, now; if (!timer_ctx) return false; - timer = vcpu_timer(timer_ctx->vcpu); index = arch_timer_ctx_index(timer_ctx); if (timer_ctx->loaded) { -- cgit v1.2.3-55-g7522 From a242010776f880ec0d8917aae0406e4a70cdc36c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 22 Feb 2019 16:10:09 +0800 Subject: KVM: Minor cleanups for kvm_main.c This patch contains two minor cleanups: firstly it puts exported symbol for kvm_io_bus_write() by following the function definition; secondly it removes a redundant blank line. Signed-off-by: Leo Yan Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 276af92ace6c..0fb0e9aa0935 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3631,6 +3631,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } +EXPORT_SYMBOL_GPL(kvm_io_bus_write); /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, @@ -3681,7 +3682,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, return -EOPNOTSUPP; } -EXPORT_SYMBOL_GPL(kvm_io_bus_write); /* kvm_io_bus_read - called under kvm->slots_lock */ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, @@ -3703,7 +3703,6 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, return r < 0 ? r : 0; } - /* Caller must hold slots_lock. */ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev) -- cgit v1.2.3-55-g7522