From f257d6dcda0187693407e0c2e5dab69bdab3223f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2019 22:18:17 -0700 Subject: KVM: Directly return result from kvm_arch_check_processor_compat() Add a wrapper to invoke kvm_arch_check_processor_compat() so that the boilerplate ugliness of checking virtualization support on all CPUs is hidden from the arch specific code. x86's implementation in particular is quite heinous, as it unnecessarily propagates the out-param pattern into kvm_x86_ops. While the x86 specific issue could be resolved solely by changing kvm_x86_ops, make the change for all architectures as returning a value directly is prettier and technically more robust, e.g. s390 doesn't set the out param, which could lead to subtle breakage in the (highly unlikely) scenario where the out-param was not pre-initialized by the caller. Opportunistically annotate svm_check_processor_compat() with __init. Signed-off-by: Sean Christopherson Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/arm/arm.c | 4 ++-- virt/kvm/kvm_main.c | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7eeebe5e9da2..d2389033e9d6 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -105,9 +105,9 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - *(int *)rtn = 0; + return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ca54b09adf5b..b2579841263f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4224,6 +4224,11 @@ static void kvm_sched_out(struct preempt_notifier *pn, kvm_arch_vcpu_put(vcpu); } +static void check_processor_compat(void *rtn) +{ + *(int *)rtn = kvm_arch_check_processor_compat(); +} + int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { @@ -4255,9 +4260,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_0a; for_each_online_cpu(cpu) { - smp_call_function_single(cpu, - kvm_arch_check_processor_compat, - &r, 1); + smp_call_function_single(cpu, check_processor_compat, &r, 1); if (r < 0) goto out_free_1; } -- cgit v1.2.3-55-g7522 From b3ffd74a2f6fbec131eff6d81bc7a6dbbac57bc7 Mon Sep 17 00:00:00 2001 From: Gustavo A. R. Silva Date: Fri, 31 May 2019 14:24:53 -0500 Subject: KVM: irqchip: Use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; instance = kzalloc(sizeof(struct foo) + count * sizeof(struct boo), GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Paolo Bonzini --- virt/kvm/irqchip.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 79e59e4fa3dc..f8be6a3d1aa6 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -196,9 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm, nr_rt_entries += 1; - new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), - GFP_KERNEL_ACCOUNT); - + new = kzalloc(struct_size(new, map, nr_rt_entries), GFP_KERNEL_ACCOUNT); if (!new) return -ENOMEM; -- cgit v1.2.3-55-g7522 From 0d9ce162cf46c99628cc5da9510b959c7976735b Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 3 Jan 2019 17:14:28 -0800 Subject: kvm: Convert kvm_lock to a mutex It doesn't seem as if there is any particular need for kvm_lock to be a spinlock, so convert the lock to a mutex so that sleepable functions (in particular cond_resched()) can be called while holding it. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 4 +--- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 14 +++++++------- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 30 +++++++++++++++--------------- 6 files changed, 28 insertions(+), 30 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 1bb8bcaf8497..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows: On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. -For spinlocks, kvm_lock is taken outside kvm->mmu_lock. - Everything else is a leaf: no other lock is taken inside the critical sections. @@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. ------------ Name: kvm_lock -Type: spinlock_t +Type: mutex Arch: any Protects: - vm_list diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7936af0a971f..0fef9192f6ac 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2423,13 +2423,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); sca_offset += 16; if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; kvm->arch.sca = (struct bsca_block *) ((char *) kvm->arch.sca + sca_offset); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 95ac393e2959..3384c539d150 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5956,7 +5956,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -5998,7 +5998,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) break; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return freed; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 10feed6a01eb..6200d5a51f13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6719,7 +6719,7 @@ static void kvm_hyperv_tsc_notifier(void) struct kvm_vcpu *vcpu; int cpu; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_make_mclock_inprogress_request(kvm); @@ -6745,7 +6745,7 @@ static void kvm_hyperv_tsc_notifier(void) spin_unlock(&ka->pvclock_gtod_sync_lock); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } #endif @@ -6796,17 +6796,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) smp_call_function_single(cpu, tsc_khz_changed, freq, 1); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); - if (vcpu->cpu != smp_processor_id()) + if (vcpu->cpu != raw_smp_processor_id()) send_ipi = 1; } } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -6929,12 +6929,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); atomic_set(&kvm_guest_has_master_clock, 0); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5e9fd7ad8018..abafddb9fe2c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -162,7 +162,7 @@ static inline bool is_error_page(struct page *page) extern struct kmem_cache *kvm_vcpu_cache; -extern spinlock_t kvm_lock; +extern struct mutex kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b2579841263f..9613987ef4c8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_SPINLOCK(kvm_lock); +DEFINE_MUTEX(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -683,9 +683,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); preempt_notifier_inc(); @@ -731,9 +731,9 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_del(&kvm->vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); @@ -4034,13 +4034,13 @@ static int vm_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4053,12 +4053,12 @@ static int vm_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4073,13 +4073,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4092,12 +4092,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4118,7 +4118,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) if (!kvm_dev.this_device || !kvm) return; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; @@ -4127,7 +4127,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } created = kvm_createvm_count; active = kvm_active_vms; - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); if (!env) -- cgit v1.2.3-55-g7522 From 418e5ca88cc18b7e9eaafa40eac26397ccd66a71 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 17 Jun 2019 20:01:01 +0100 Subject: KVM: arm/arm64: Rename kvm_pmu_{enable/disable}_counter functions The kvm_pmu_{enable/disable}_counter functions can enable/disable multiple counters at once as they operate on a bitmask. Let's make this clearer by renaming the function. Suggested-by: Suzuki K Poulose Signed-off-by: Andrew Murray Reviewed-by: Julien Thierry Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 4 ++-- include/kvm/arm_pmu.h | 8 ++++---- virt/kvm/arm/pmu.c | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ce933f296049..0a7665c189ff 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -865,12 +865,12 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) { /* accessing PMCNTENSET_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; - kvm_pmu_enable_counter(vcpu, val); + kvm_pmu_enable_counter_mask(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); } else { /* accessing PMCNTENCLR_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; - kvm_pmu_disable_counter(vcpu, val); + kvm_pmu_disable_counter_mask(vcpu, val); } } else { p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 84a9db156be7..45e5205750b4 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -35,8 +35,8 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); -void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val); -void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val); +void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val); +void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu); bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu); @@ -72,8 +72,8 @@ static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) } static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} -static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {} -static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {} +static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {} +static inline void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {} static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index da740764a7ee..99e51ee8fd9e 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -124,13 +124,13 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) } /** - * kvm_pmu_enable_counter - enable selected PMU counter + * kvm_pmu_enable_counter_mask - enable selected PMU counters * @vcpu: The vcpu pointer * @val: the value guest writes to PMCNTENSET register * * Call perf_event_enable to start counting the perf event */ -void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) +void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; struct kvm_pmu *pmu = &vcpu->arch.pmu; @@ -153,13 +153,13 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) } /** - * kvm_pmu_disable_counter - disable selected PMU counter + * kvm_pmu_disable_counter_mask - disable selected PMU counters * @vcpu: The vcpu pointer * @val: the value guest writes to PMCNTENCLR register * * Call perf_event_disable to stop counting the perf event */ -void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) +void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; struct kvm_pmu *pmu = &vcpu->arch.pmu; @@ -336,10 +336,10 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) mask = kvm_pmu_valid_counter_mask(vcpu); if (val & ARMV8_PMU_PMCR_E) { - kvm_pmu_enable_counter(vcpu, + kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); } else { - kvm_pmu_disable_counter(vcpu, mask); + kvm_pmu_disable_counter_mask(vcpu, mask); } if (val & ARMV8_PMU_PMCR_C) -- cgit v1.2.3-55-g7522 From 6f4d2a0b0b1e9a1f7594e666eebad98372901818 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 17 Jun 2019 20:01:02 +0100 Subject: KVM: arm/arm64: Extract duplicated code to own function Let's reduce code duplication by extracting common code to its own function. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 99e51ee8fd9e..efdc7f6db6cd 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -53,6 +53,19 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); } +/** + * kvm_pmu_release_perf_event - remove the perf event + * @pmc: The PMU counter pointer + */ +static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) +{ + if (pmc->perf_event) { + perf_event_disable(pmc->perf_event); + perf_event_release_kernel(pmc->perf_event); + pmc->perf_event = NULL; + } +} + /** * kvm_pmu_stop_counter - stop PMU counter * @pmc: The PMU counter pointer @@ -68,9 +81,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; __vcpu_sys_reg(vcpu, reg) = counter; - perf_event_disable(pmc->perf_event); - perf_event_release_kernel(pmc->perf_event); - pmc->perf_event = NULL; + kvm_pmu_release_perf_event(pmc); } } @@ -101,15 +112,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) int i; struct kvm_pmu *pmu = &vcpu->arch.pmu; - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { - struct kvm_pmc *pmc = &pmu->pmc[i]; - - if (pmc->perf_event) { - perf_event_disable(pmc->perf_event); - perf_event_release_kernel(pmc->perf_event); - pmc->perf_event = NULL; - } - } + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + kvm_pmu_release_perf_event(&pmu->pmc[i]); } u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) -- cgit v1.2.3-55-g7522 From 30d97754b2d1bc4fd20f27c25fed92fc7ce39ce3 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 17 Jun 2019 20:01:03 +0100 Subject: KVM: arm/arm64: Re-create event when setting counter value The perf event sample_period is currently set based upon the current counter value, when PMXEVTYPER is written to and the perf event is created. However the user may choose to write the type before the counter value in which case sample_period will be set incorrectly. Let's instead decouple event creation from PMXEVTYPER and (re)create the event in either suitation. Signed-off-by: Andrew Murray Reviewed-by: Julien Thierry Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index efdc7f6db6cd..f77643f4274c 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -13,6 +13,7 @@ #include #include +static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); /** * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer @@ -51,6 +52,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + + /* Recreate the perf event to reflect the updated sample_period */ + kvm_pmu_create_perf_event(vcpu, select_idx); } /** @@ -367,23 +371,21 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) } /** - * kvm_pmu_set_counter_event_type - set selected counter to monitor some event + * kvm_pmu_create_perf_event - create a perf event for a counter * @vcpu: The vcpu pointer - * @data: The data guest writes to PMXEVTYPER_EL0 * @select_idx: The number of selected counter - * - * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an - * event with given hardware event number. Here we call perf_event API to - * emulate this action and create a kernel perf event for it. */ -void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, - u64 select_idx) +static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) { struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc = &pmu->pmc[select_idx]; struct perf_event *event; struct perf_event_attr attr; - u64 eventsel, counter; + u64 eventsel, counter, reg, data; + + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + data = __vcpu_sys_reg(vcpu, reg); kvm_pmu_stop_counter(vcpu, pmc); eventsel = data & ARMV8_PMU_EVTYPE_EVENT; @@ -420,6 +422,28 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, pmc->perf_event = event; } +/** + * kvm_pmu_set_counter_event_type - set selected counter to monitor some event + * @vcpu: The vcpu pointer + * @data: The data guest writes to PMXEVTYPER_EL0 + * @select_idx: The number of selected counter + * + * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an + * event with given hardware event number. Here we call perf_event API to + * emulate this action and create a kernel perf event for it. + */ +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, + u64 select_idx) +{ + u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK; + + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + + __vcpu_sys_reg(vcpu, reg) = event_type; + kvm_pmu_create_perf_event(vcpu, select_idx); +} + bool kvm_arm_support_pmu_v3(void) { /* -- cgit v1.2.3-55-g7522 From 218907cbc2b82419c70180610163c987d4764b27 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 17 Jun 2019 20:01:04 +0100 Subject: KVM: arm/arm64: Remove pmc->bitmask We currently use pmc->bitmask to determine the width of the pmc - however it's superfluous as the pmc index already describes if the pmc is a cycle counter or event counter. The architecture clearly describes the widths of these counters. Let's remove the bitmask to simplify the code. Signed-off-by: Andrew Murray Signed-off-by: Marc Zyngier --- include/kvm/arm_pmu.h | 1 - virt/kvm/arm/pmu.c | 30 ++++++++++++++++++++---------- 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 45e5205750b4..48a15d4b820e 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -17,7 +17,6 @@ struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ struct perf_event *perf_event; - u64 bitmask; }; struct kvm_pmu { diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index f77643f4274c..24c6cf869a16 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -14,6 +14,18 @@ #include static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); + +/** + * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (select_idx == ARMV8_PMU_CYCLE_IDX && + __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); +} + /** * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer @@ -36,7 +48,10 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - return counter & pmc->bitmask; + if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + counter = lower_32_bits(counter); + + return counter; } /** @@ -102,7 +117,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); pmu->pmc[i].idx = i; - pmu->pmc[i].bitmask = 0xffffffffUL; } } @@ -337,8 +351,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) */ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; u64 mask; int i; @@ -357,11 +369,6 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) kvm_pmu_set_counter_value(vcpu, i, 0); } - - if (val & ARMV8_PMU_PMCR_LC) { - pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX]; - pmc->bitmask = 0xffffffffffffffffUL; - } } static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) @@ -409,7 +416,10 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) counter = kvm_pmu_get_counter_value(vcpu, select_idx); /* The initial sample period (overflow count) of an event. */ - attr.sample_period = (-counter) & pmc->bitmask; + if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + attr.sample_period = (-counter) & GENMASK(63, 0); + else + attr.sample_period = (-counter) & GENMASK(31, 0); event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); -- cgit v1.2.3-55-g7522 From 80f393a23be68e2f8a0f74258d6155438c200bbd Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 17 Jun 2019 20:01:05 +0100 Subject: KVM: arm/arm64: Support chained PMU counters ARMv8 provides support for chained PMU counters, where an event type of 0x001E is set for odd-numbered counters, the event counter will increment by one for each overflow of the preceding even-numbered counter. Let's emulate this in KVM by creating a 64 bit perf counter when a user chains two emulated counters together. For chained events we only support generating an overflow interrupt on the high counter. We use the attributes of the low counter to determine the attributes of the perf event. Suggested-by: Marc Zyngier Signed-off-by: Andrew Murray Reviewed-by: Julien Thierry Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- include/kvm/arm_pmu.h | 2 + virt/kvm/arm/pmu.c | 252 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 217 insertions(+), 37 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 48a15d4b820e..16c769a7f979 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -11,6 +11,7 @@ #include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) +#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) #ifdef CONFIG_KVM_ARM_PMU @@ -22,6 +23,7 @@ struct kvm_pmc { struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; + DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); bool ready; bool created; bool irq_level; diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 24c6cf869a16..3dd8238ed246 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -15,6 +15,8 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 + /** * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter * @vcpu: The vcpu pointer @@ -26,29 +28,126 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); } +static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) +{ + struct kvm_pmu *pmu; + struct kvm_vcpu_arch *vcpu_arch; + + pmc -= pmc->idx; + pmu = container_of(pmc, struct kvm_pmu, pmc[0]); + vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); + return container_of(vcpu_arch, struct kvm_vcpu, arch); +} + /** - * kvm_pmu_get_counter_value - get PMU counter value + * kvm_pmu_pmc_is_chained - determine if the pmc is chained + * @pmc: The PMU counter pointer + */ +static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) +{ + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + + return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); +} + +/** + * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_is_high_counter(u64 select_idx) +{ + return select_idx & 0x1; +} + +/** + * kvm_pmu_get_canonical_pmc - obtain the canonical pmc + * @pmc: The PMU counter pointer + * + * When a pair of PMCs are chained together we use the low counter (canonical) + * to hold the underlying perf event. + */ +static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + + return pmc; +} + +/** + * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain * @vcpu: The vcpu pointer * @select_idx: The counter index */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) { - u64 counter, reg, enabled, running; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + u64 eventsel, reg; - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - counter = __vcpu_sys_reg(vcpu, reg); + select_idx |= 0x1; + + if (select_idx == ARMV8_PMU_CYCLE_IDX) + return false; + + reg = PMEVTYPER0_EL0 + select_idx; + eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT; + + return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; +} + +/** + * kvm_pmu_get_pair_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @pmc: The PMU counter pointer + */ +static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, + struct kvm_pmc *pmc) +{ + u64 counter, counter_high, reg, enabled, running; - /* The real counter value is equal to the value of counter register plus + if (kvm_pmu_pmc_is_chained(pmc)) { + pmc = kvm_pmu_get_canonical_pmc(pmc); + reg = PMEVCNTR0_EL0 + pmc->idx; + + counter = __vcpu_sys_reg(vcpu, reg); + counter_high = __vcpu_sys_reg(vcpu, reg + 1); + + counter = lower_32_bits(counter) | (counter_high << 32); + } else { + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + counter = __vcpu_sys_reg(vcpu, reg); + } + + /* + * The real counter value is equal to the value of counter register plus * the value perf event counts. */ if (pmc->perf_event) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + return counter; +} + +/** + * kvm_pmu_get_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +{ + u64 counter; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(select_idx)) + counter = upper_32_bits(counter); + + else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) counter = lower_32_bits(counter); return counter; @@ -78,6 +177,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) */ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) { + pmc = kvm_pmu_get_canonical_pmc(pmc); if (pmc->perf_event) { perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); @@ -95,13 +195,23 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { u64 counter, reg; - if (pmc->perf_event) { - counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); + pmc = kvm_pmu_get_canonical_pmc(pmc); + if (!pmc->perf_event) + return; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (kvm_pmu_pmc_is_chained(pmc)) { + reg = PMEVCNTR0_EL0 + pmc->idx; + __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); + __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + } else { reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - __vcpu_sys_reg(vcpu, reg) = counter; - kvm_pmu_release_perf_event(pmc); + __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); } + + kvm_pmu_release_perf_event(pmc); } /** @@ -118,6 +228,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); pmu->pmc[i].idx = i; } + + bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); } /** @@ -166,6 +278,18 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) continue; pmc = &pmu->pmc[i]; + + /* + * For high counters of chained events we must recreate the + * perf event with the long (64bit) attribute set. + */ + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(i)) { + kvm_pmu_create_perf_event(vcpu, i); + continue; + } + + /* At this point, pmc must be the canonical */ if (pmc->perf_event) { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) @@ -195,6 +319,18 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) continue; pmc = &pmu->pmc[i]; + + /* + * For high counters of chained events we must recreate the + * perf event with the long (64bit) attribute unset. + */ + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(i)) { + kvm_pmu_create_perf_event(vcpu, i); + continue; + } + + /* At this point, pmc must be the canonical */ if (pmc->perf_event) perf_event_disable(pmc->perf_event); } @@ -284,17 +420,6 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) kvm_pmu_update_state(vcpu); } -static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu; - struct kvm_vcpu_arch *vcpu_arch; - - pmc -= pmc->idx; - pmu = container_of(pmc, struct kvm_pmu, pmc[0]); - vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); - return container_of(vcpu_arch, struct kvm_vcpu, arch); -} - /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -385,13 +510,20 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) { struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + struct kvm_pmc *pmc; struct perf_event *event; struct perf_event_attr attr; u64 eventsel, counter, reg, data; - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + /* + * For chained counters the event type and filtering attributes are + * obtained from the low/even counter. We also use this counter to + * determine if the event is enabled/disabled. + */ + pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); + + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; data = __vcpu_sys_reg(vcpu, reg); kvm_pmu_stop_counter(vcpu, pmc); @@ -399,30 +531,47 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) /* Software increment event does't need to be backed by a perf event */ if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && - select_idx != ARMV8_PMU_CYCLE_IDX) + pmc->idx != ARMV8_PMU_CYCLE_IDX) return; memset(&attr, 0, sizeof(struct perf_event_attr)); attr.type = PERF_TYPE_RAW; attr.size = sizeof(attr); attr.pinned = 1; - attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx); + attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ - attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? + attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; - counter = kvm_pmu_get_counter_value(vcpu, select_idx); - /* The initial sample period (overflow count) of an event. */ - if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + /** + * The initial sample period (overflow count) of an event. For + * chained counters we only support overflow interrupts on the + * high counter. + */ attr.sample_period = (-counter) & GENMASK(63, 0); - else - attr.sample_period = (-counter) & GENMASK(31, 0); + event = perf_event_create_kernel_counter(&attr, -1, current, + kvm_pmu_perf_overflow, + pmc + 1); + + if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + } else { + /* The initial sample period (overflow count) of an event. */ + if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + attr.sample_period = (-counter) & GENMASK(63, 0); + else + attr.sample_period = (-counter) & GENMASK(31, 0); - event = perf_event_create_kernel_counter(&attr, -1, current, + event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); + } + if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", PTR_ERR(event)); @@ -432,6 +581,33 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) pmc->perf_event = event; } +/** + * kvm_pmu_update_pmc_chained - update chained bitmap + * @vcpu: The vcpu pointer + * @select_idx: The number of selected counter + * + * Update the chained bitmap based on the event type written in the + * typer register. + */ +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + + if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + /* + * During promotion from !chained to chained we must ensure + * the adjacent counter is stopped and its event destroyed + */ + if (!kvm_pmu_pmc_is_chained(pmc)) + kvm_pmu_stop_counter(vcpu, pmc); + + set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + } else { + clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + } +} + /** * kvm_pmu_set_counter_event_type - set selected counter to monitor some event * @vcpu: The vcpu pointer @@ -451,6 +627,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; __vcpu_sys_reg(vcpu, reg) = event_type; + + kvm_pmu_update_pmc_chained(vcpu, select_idx); kvm_pmu_create_perf_event(vcpu, select_idx); } -- cgit v1.2.3-55-g7522 From c118bbb52743df70e6297671606c1c08edc659fe Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 3 May 2019 15:27:48 +0100 Subject: arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests Recent commits added the explicit notion of "workaround not required" to the state of the Spectre v2 (aka. BP_HARDENING) workaround, where we just had "needed" and "unknown" before. Export this knowledge to the rest of the kernel and enhance the existing kvm_arm_harden_branch_predictor() to report this new state as well. Export this new state to guests when they use KVM's firmware interface emulation. Signed-off-by: Andre Przywara Reviewed-by: Steven Price Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 12 +++++++++--- arch/arm64/include/asm/cpufeature.h | 6 ++++++ arch/arm64/include/asm/kvm_host.h | 16 ++++++++++++++-- arch/arm64/kernel/cpu_errata.c | 23 ++++++++++++++++++----- virt/kvm/arm/psci.c | 10 +++++++++- 5 files changed, 56 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f80418ddeb60..e74e8f408987 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -362,7 +362,11 @@ static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_vhe_guest_enter(void) {} static inline void kvm_arm_vhe_guest_exit(void) {} -static inline bool kvm_arm_harden_branch_predictor(void) +#define KVM_BP_HARDEN_UNKNOWN -1 +#define KVM_BP_HARDEN_WA_NEEDED 0 +#define KVM_BP_HARDEN_NOT_REQUIRED 1 + +static inline int kvm_arm_harden_branch_predictor(void) { switch(read_cpuid_part()) { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR @@ -370,10 +374,12 @@ static inline bool kvm_arm_harden_branch_predictor(void) case ARM_CPU_PART_CORTEX_A12: case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_CORTEX_A17: - return true; + return KVM_BP_HARDEN_WA_NEEDED; #endif + case ARM_CPU_PART_CORTEX_A7: + return KVM_BP_HARDEN_NOT_REQUIRED; default: - return false; + return KVM_BP_HARDEN_UNKNOWN; } } diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 373799b7982f..948427f6b3d9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -614,6 +614,12 @@ static inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +#define ARM64_BP_HARDEN_UNKNOWN -1 +#define ARM64_BP_HARDEN_WA_NEEDED 0 +#define ARM64_BP_HARDEN_NOT_REQUIRED 1 + +int get_spectre_v2_workaround_state(void); + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c328191aa202..d9770daf3d7d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -620,9 +620,21 @@ static inline void kvm_arm_vhe_guest_exit(void) isb(); } -static inline bool kvm_arm_harden_branch_predictor(void) +#define KVM_BP_HARDEN_UNKNOWN -1 +#define KVM_BP_HARDEN_WA_NEEDED 0 +#define KVM_BP_HARDEN_NOT_REQUIRED 1 + +static inline int kvm_arm_harden_branch_predictor(void) { - return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); + switch (get_spectre_v2_workaround_state()) { + case ARM64_BP_HARDEN_WA_NEEDED: + return KVM_BP_HARDEN_WA_NEEDED; + case ARM64_BP_HARDEN_NOT_REQUIRED: + return KVM_BP_HARDEN_NOT_REQUIRED; + case ARM64_BP_HARDEN_UNKNOWN: + default: + return KVM_BP_HARDEN_UNKNOWN; + } } #define KVM_SSBD_UNKNOWN -1 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ca11ff7bf55e..1e43ba5c79b7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -554,6 +554,17 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) static bool __hardenbp_enab = true; static bool __spectrev2_safe = true; +int get_spectre_v2_workaround_state(void) +{ + if (__spectrev2_safe) + return ARM64_BP_HARDEN_NOT_REQUIRED; + + if (!__hardenbp_enab) + return ARM64_BP_HARDEN_UNKNOWN; + + return ARM64_BP_HARDEN_WA_NEEDED; +} + /* * List of CPUs that do not need any Spectre-v2 mitigation at all. */ @@ -854,13 +865,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (__spectrev2_safe) + switch (get_spectre_v2_workaround_state()) { + case ARM64_BP_HARDEN_NOT_REQUIRED: return sprintf(buf, "Not affected\n"); - - if (__hardenbp_enab) + case ARM64_BP_HARDEN_WA_NEEDED: return sprintf(buf, "Mitigation: Branch predictor hardening\n"); - - return sprintf(buf, "Vulnerable\n"); + case ARM64_BP_HARDEN_UNKNOWN: + default: + return sprintf(buf, "Vulnerable\n"); + } } ssize_t cpu_show_spec_store_bypass(struct device *dev, diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index be3c9cdca9f3..355b9e38a42d 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -401,8 +401,16 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) feature = smccc_get_arg1(vcpu); switch(feature) { case ARM_SMCCC_ARCH_WORKAROUND_1: - if (kvm_arm_harden_branch_predictor()) + switch (kvm_arm_harden_branch_predictor()) { + case KVM_BP_HARDEN_UNKNOWN: + break; + case KVM_BP_HARDEN_WA_NEEDED: val = SMCCC_RET_SUCCESS; + break; + case KVM_BP_HARDEN_NOT_REQUIRED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } break; case ARM_SMCCC_ARCH_WORKAROUND_2: switch (kvm_arm_have_ssbd()) { -- cgit v1.2.3-55-g7522 From 99adb567632b656a4a54a90adb2172cc725b6896 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 3 May 2019 15:27:49 +0100 Subject: KVM: arm/arm64: Add save/restore support for firmware workaround state KVM implements the firmware interface for mitigating cache speculation vulnerabilities. Guests may use this interface to ensure mitigation is active. If we want to migrate such a guest to a host with a different support level for those workarounds, migration might need to fail, to ensure that critical guests don't loose their protection. Introduce a way for userland to save and restore the workarounds state. On restoring we do checks that make sure we don't downgrade our mitigation level. Signed-off-by: Andre Przywara Reviewed-by: Eric Auger Reviewed-by: Steven Price Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_emulate.h | 10 +++ arch/arm/include/uapi/asm/kvm.h | 12 +++ arch/arm64/include/asm/kvm_emulate.h | 14 ++++ arch/arm64/include/uapi/asm/kvm.h | 10 +++ virt/kvm/arm/psci.c | 139 +++++++++++++++++++++++++++++++---- 5 files changed, 170 insertions(+), 15 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 6b7644a383f6..40002416efec 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -271,6 +271,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK; } +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu) +{ + return false; +} + +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu, + bool flag) +{ +} + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { *vcpu_cpsr(vcpu) |= PSR_E_BIT; diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 4602464ebdfb..a4217c1a5d01 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -214,6 +214,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1) + /* Higher values mean better protection. */ +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) + /* Higher values mean better protection. */ +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 034dadec7168..8abca5df01e5 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -353,6 +353,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG; +} + +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu, + bool flag) +{ + if (flag) + vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; + else + vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG; +} + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index d819a3e8b552..9a507716ae2f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -229,6 +229,16 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 355b9e38a42d..87927f7e1ee7 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -438,42 +438,103 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { - return 1; /* PSCI version */ + return 3; /* PSCI version and two workaround registers */ } int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices)) + if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) + return -EFAULT; + + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) + return -EFAULT; + + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) return -EFAULT; return 0; } +#define KVM_REG_FEATURE_LEVEL_WIDTH 4 +#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) + +/* + * Convert the workaround level into an easy-to-compare number, where higher + * values mean better protection. + */ +static int get_kernel_wa_level(u64 regid) +{ + switch (regid) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + switch (kvm_arm_harden_branch_predictor()) { + case KVM_BP_HARDEN_UNKNOWN: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case KVM_BP_HARDEN_WA_NEEDED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; + case KVM_BP_HARDEN_NOT_REQUIRED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; + case KVM_SSBD_KERNEL: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; + case KVM_SSBD_UNKNOWN: + default: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN; + } + } + + return -EINVAL; +} + int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - if (reg->id == KVM_REG_ARM_PSCI_VERSION) { - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: val = kvm_psci_version(vcpu, vcpu->kvm); - if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) - return -EFAULT; + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; - return 0; + if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && + kvm_arm_get_vcpu_workaround_2_flag(vcpu)) + val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED; + break; + default: + return -ENOENT; } - return -EINVAL; + if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; } int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - if (reg->id == KVM_REG_ARM_PSCI_VERSION) { - void __user *uaddr = (void __user *)(long)reg->addr; - bool wants_02; - u64 val; + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int wa_level; + + if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; - if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) - return -EFAULT; + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: + { + bool wants_02; wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); @@ -490,6 +551,54 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) vcpu->kvm->arch.psci_version = val; return 0; } + break; + } + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + if (val & ~KVM_REG_FEATURE_LEVEL_MASK) + return -EINVAL; + + if (get_kernel_wa_level(reg->id) < val) + return -EINVAL; + + return 0; + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) + return -EINVAL; + + wa_level = val & KVM_REG_FEATURE_LEVEL_MASK; + + if (get_kernel_wa_level(reg->id) < wa_level) + return -EINVAL; + + /* The enabled bit must not be set unless the level is AVAIL. */ + if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && + wa_level != val) + return -EINVAL; + + /* Are we finished or do we need to check the enable bit ? */ + if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL) + return 0; + + /* + * If this kernel supports the workaround to be switched on + * or off, make sure it matches the requested setting. + */ + switch (wa_level) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: + kvm_arm_set_vcpu_workaround_2_flag(vcpu, + val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED); + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: + kvm_arm_set_vcpu_workaround_2_flag(vcpu, true); + break; + } + + return 0; + default: + return -ENOENT; } return -EINVAL; -- cgit v1.2.3-55-g7522 From fdec2a9ef853172529baaa192673b4cdb9a44fac Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Sat, 6 Apr 2019 11:29:40 +0100 Subject: KVM: arm64: Migrate _elx sysreg accessors to msr_s/mrs_s Currently, the {read,write}_sysreg_el*() accessors for accessing particular ELs' sysregs in the presence of VHE rely on some local hacks and define their system register encodings in a way that is inconsistent with the core definitions in . As a result, it is necessary to add duplicate definitions for any system register that already needs a definition in sysreg.h for other reasons. This is a bit of a maintenance headache, and the reasons for the _el*() accessors working the way they do is a bit historical. This patch gets rid of the shadow sysreg definitions in , converts the _el*() accessors to use the core __msr_s/__mrs_s interface, and converts all call sites to use the standard sysreg #define names (i.e., upper case, with SYS_ prefix). This patch will conflict heavily anyway, so the opportunity to clean up some bad whitespace in the context of the changes is taken. The change exposes a few system registers that have no sysreg.h definition, due to msr_s/mrs_s being used in place of msr/mrs: additions are made in order to fill in the gaps. Signed-off-by: Dave Martin Cc: Catalin Marinas Cc: Christoffer Dall Cc: Mark Rutland Cc: Will Deacon Link: https://www.spinics.net/lists/kvm-arm/msg31717.html [Rebased to v4.21-rc1] Signed-off-by: Sudeep Holla [Rebased to v5.2-rc5, changelog updates] Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_hyp.h | 13 +++--- arch/arm64/include/asm/kvm_emulate.h | 16 +++---- arch/arm64/include/asm/kvm_hyp.h | 50 ++------------------ arch/arm64/include/asm/sysreg.h | 35 +++++++++++++- arch/arm64/kvm/hyp/switch.c | 14 +++--- arch/arm64/kvm/hyp/sysreg-sr.c | 78 ++++++++++++++++---------------- arch/arm64/kvm/hyp/tlb.c | 12 ++--- arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +- arch/arm64/kvm/regmap.c | 4 +- arch/arm64/kvm/sys_regs.c | 56 +++++++++++------------ virt/kvm/arm/arch_timer.c | 24 +++++----- 11 files changed, 148 insertions(+), 156 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 71ac1c8d101c..40e9034db601 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -82,13 +82,14 @@ #define VFP_FPEXC __ACCESS_VFP(FPEXC) /* AArch64 compatibility macros, only for the timer so far */ -#define read_sysreg_el0(r) read_sysreg(r##_el0) -#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) +#define read_sysreg_el0(r) read_sysreg(r##_EL0) +#define write_sysreg_el0(v, r) write_sysreg(v, r##_EL0) + +#define SYS_CNTP_CTL_EL0 CNTP_CTL +#define SYS_CNTP_CVAL_EL0 CNTP_CVAL +#define SYS_CNTV_CTL_EL0 CNTV_CTL +#define SYS_CNTV_CVAL_EL0 CNTV_CVAL -#define cntp_ctl_el0 CNTP_CTL -#define cntp_cval_el0 CNTP_CVAL -#define cntv_ctl_el0 CNTV_CTL -#define cntv_cval_el0 CNTV_CVAL #define cntvoff_el2 CNTVOFF #define cnthctl_el2 CNTHCTL diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 8abca5df01e5..d69c1efc63e7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -126,7 +126,7 @@ static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) { if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(elr); + return read_sysreg_el1(SYS_ELR); else return *__vcpu_elr_el1(vcpu); } @@ -134,7 +134,7 @@ static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) { if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, elr); + write_sysreg_el1(v, SYS_ELR); else *__vcpu_elr_el1(vcpu) = v; } @@ -186,7 +186,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) return vcpu_read_spsr32(vcpu); if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(spsr); + return read_sysreg_el1(SYS_SPSR); else return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; } @@ -199,7 +199,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) } if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, spsr); + write_sysreg_el1(v, SYS_SPSR); else vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } @@ -465,13 +465,13 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) */ static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) { - *vcpu_pc(vcpu) = read_sysreg_el2(elr); - vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr); + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr); - write_sysreg_el2(*vcpu_pc(vcpu), elr); + write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 286f7e7e1be4..86825aa20852 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -18,7 +18,7 @@ #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ - asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ + asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \ __mrs_s("%0", r##vh), \ ARM64_HAS_VIRT_HOST_EXTN) \ : "=r" (reg)); \ @@ -28,7 +28,7 @@ #define write_sysreg_elx(v,r,nvh,vh) \ do { \ u64 __val = (u64)(v); \ - asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ + asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \ __msr_s(r##vh, "%x0"), \ ARM64_HAS_VIRT_HOST_EXTN) \ : : "rZ" (__val)); \ @@ -37,55 +37,15 @@ /* * Unified accessors for registers that have a different encoding * between VHE and non-VHE. They must be specified without their "ELx" - * encoding. + * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h. */ -#define read_sysreg_el2(r) \ - ({ \ - u64 reg; \ - asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\ - "mrs %0, " __stringify(r##_EL1),\ - ARM64_HAS_VIRT_HOST_EXTN) \ - : "=r" (reg)); \ - reg; \ - }) - -#define write_sysreg_el2(v,r) \ - do { \ - u64 __val = (u64)(v); \ - asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\ - "msr " __stringify(r##_EL1) ", %x0",\ - ARM64_HAS_VIRT_HOST_EXTN) \ - : : "rZ" (__val)); \ - } while (0) #define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) #define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) #define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) #define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12) - -/* The VHE specific system registers and their encoding */ -#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0) -#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2) -#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0) -#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1) -#define tcr_EL12 sys_reg(3, 5, 2, 0, 2) -#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0) -#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1) -#define esr_EL12 sys_reg(3, 5, 5, 2, 0) -#define far_EL12 sys_reg(3, 5, 6, 0, 0) -#define mair_EL12 sys_reg(3, 5, 10, 2, 0) -#define amair_EL12 sys_reg(3, 5, 10, 3, 0) -#define vbar_EL12 sys_reg(3, 5, 12, 0, 0) -#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1) -#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0) -#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0) -#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1) -#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2) -#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0) -#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1) -#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2) -#define spsr_EL12 sys_reg(3, 5, 4, 0, 0) -#define elr_EL12 sys_reg(3, 5, 4, 0, 1) +#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) +#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) /** * hyp_alternate_select - Generates patchable code sequences that are diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cd7f7ce1a56a..852cc113de7c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -191,6 +191,9 @@ #define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0) #define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1) +#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) +#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1) + #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) #define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) @@ -382,6 +385,9 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) +#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) + #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) @@ -392,14 +398,17 @@ #define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) #define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) -#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7) +#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) - #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) +#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) +#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) #define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) #define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) @@ -444,7 +453,29 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* VHE encodings for architectural EL0/1 system registers */ +#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) +#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) +#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) +#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) +#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) +#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) +#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) +#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) +#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) +#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) +#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) +#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2) +#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0) +#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) +#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_DSSBS (_BITUL(44)) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b0041812bca9..80062f93769d 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -284,7 +284,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) return true; - far = read_sysreg_el2(far); + far = read_sysreg_el2(SYS_FAR); /* * The HPFAR can be invalid if the stage 2 fault did not @@ -401,7 +401,7 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) - vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr); + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); /* * We're using the raw exception code in order to only process @@ -697,8 +697,8 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(far), + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), read_sysreg(hpfar_el2), par, vcpu); } @@ -713,15 +713,15 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, panic(__hyp_panic_string, spsr, elr, - read_sysreg_el2(esr), read_sysreg_el2(far), + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), read_sysreg(hpfar_el2), par, vcpu); } NOKPROBE_SYMBOL(__hyp_call_panic_vhe); void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) { - u64 spsr = read_sysreg_el2(spsr); - u64 elr = read_sysreg_el2(elr); + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); u64 par = read_sysreg(par_el1); if (!has_vhe()) diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index c283f7cbc702..7ddbc849b580 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -43,33 +43,33 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl); + ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); + ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); + ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); + ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); + ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); + ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); + ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); } static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) { - ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); + ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); @@ -109,35 +109,35 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); } static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr); - write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl); + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); + write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); + write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); + write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); + write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); + write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); + write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); + write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); + write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); + write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); + write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); + write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); + write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); + write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); } static void __hyp_text @@ -160,8 +160,8 @@ __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) pstate = PSR_MODE_EL2h | PSR_IL_BIT; - write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); - write_sysreg_el2(pstate, spsr); + write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 32078b767f63..d49a14497715 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -33,12 +33,12 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, * in the TCR_EL1 register. We also need to prevent it to * allocate IPA->PA walks, so we enable the S1 MMU... */ - val = cxt->tcr = read_sysreg_el1(tcr); + val = cxt->tcr = read_sysreg_el1(SYS_TCR); val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, tcr); - val = cxt->sctlr = read_sysreg_el1(sctlr); + write_sysreg_el1(val, SYS_TCR); + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); val |= SCTLR_ELx_M; - write_sysreg_el1(val, sctlr); + write_sysreg_el1(val, SYS_SCTLR); } /* @@ -85,8 +85,8 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) { /* Restore the registers to what they were */ - write_sysreg_el1(cxt->tcr, tcr); - write_sysreg_el1(cxt->sctlr, sctlr); + write_sysreg_el1(cxt->tcr, SYS_TCR); + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); } local_irq_restore(cxt->flags); diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index ba2aaeb84c6c..29ee1feba4eb 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -16,7 +16,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) - return !!(read_sysreg_el2(spsr) & PSR_AA32_E_BIT); + return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); } diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index d66613e6ad08..0d60e4f0af66 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -152,7 +152,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) switch (spsr_idx) { case KVM_SPSR_SVC: - return read_sysreg_el1(spsr); + return read_sysreg_el1(SYS_SPSR); case KVM_SPSR_ABT: return read_sysreg(spsr_abt); case KVM_SPSR_UND: @@ -177,7 +177,7 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) switch (spsr_idx) { case KVM_SPSR_SVC: - write_sysreg_el1(v, spsr); + write_sysreg_el1(v, SYS_SPSR); case KVM_SPSR_ABT: write_sysreg(v, spsr_abt); case KVM_SPSR_UND: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0a7665c189ff..f26e181d881c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -81,24 +81,24 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) */ switch (reg) { case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); - case SCTLR_EL1: return read_sysreg_s(sctlr_EL12); + case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); - case CPACR_EL1: return read_sysreg_s(cpacr_EL12); - case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12); - case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12); - case TCR_EL1: return read_sysreg_s(tcr_EL12); - case ESR_EL1: return read_sysreg_s(esr_EL12); - case AFSR0_EL1: return read_sysreg_s(afsr0_EL12); - case AFSR1_EL1: return read_sysreg_s(afsr1_EL12); - case FAR_EL1: return read_sysreg_s(far_EL12); - case MAIR_EL1: return read_sysreg_s(mair_EL12); - case VBAR_EL1: return read_sysreg_s(vbar_EL12); - case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12); + case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); + case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); + case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); + case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); + case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); + case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); + case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); + case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); + case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); + case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); + case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); - case AMAIR_EL1: return read_sysreg_s(amair_EL12); - case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12); + case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); + case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); @@ -124,24 +124,24 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) */ switch (reg) { case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; - case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; - case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return; - case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return; - case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return; - case TCR_EL1: write_sysreg_s(val, tcr_EL12); return; - case ESR_EL1: write_sysreg_s(val, esr_EL12); return; - case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return; - case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return; - case FAR_EL1: write_sysreg_s(val, far_EL12); return; - case MAIR_EL1: write_sysreg_s(val, mair_EL12); return; - case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return; - case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; - case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return; - case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 1be486d5d7cb..e2bb5bd60227 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -237,10 +237,10 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) switch (index) { case TIMER_VTIMER: - cnt_ctl = read_sysreg_el0(cntv_ctl); + cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); break; case TIMER_PTIMER: - cnt_ctl = read_sysreg_el0(cntp_ctl); + cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); break; case NR_KVM_TIMERS: /* GCC is braindead */ @@ -350,20 +350,20 @@ static void timer_save_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - ctx->cnt_ctl = read_sysreg_el0(cntv_ctl); - ctx->cnt_cval = read_sysreg_el0(cntv_cval); + ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); + ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL); /* Disable the timer */ - write_sysreg_el0(0, cntv_ctl); + write_sysreg_el0(0, SYS_CNTV_CTL); isb(); break; case TIMER_PTIMER: - ctx->cnt_ctl = read_sysreg_el0(cntp_ctl); - ctx->cnt_cval = read_sysreg_el0(cntp_cval); + ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); + ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL); /* Disable the timer */ - write_sysreg_el0(0, cntp_ctl); + write_sysreg_el0(0, SYS_CNTP_CTL); isb(); break; @@ -429,14 +429,14 @@ static void timer_restore_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - write_sysreg_el0(ctx->cnt_cval, cntv_cval); + write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, cntv_ctl); + write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL); break; case TIMER_PTIMER: - write_sysreg_el0(ctx->cnt_cval, cntp_cval); + write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, cntp_ctl); + write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL); break; case NR_KVM_TIMERS: BUG(); -- cgit v1.2.3-55-g7522 From 1e0cf16cdad1ba53e9eeee8746fe57de42f20c97 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Jul 2019 23:35:56 +0100 Subject: KVM: arm/arm64: Initialise host's MPIDRs by reading the actual register As part of setting up the host context, we populate its MPIDR by using cpu_logical_map(). It turns out that contrary to arm64, cpu_logical_map() on 32bit ARM doesn't return the *full* MPIDR, but a truncated version. This leaves the host MPIDR slightly corrupted after the first run of a VM, since we won't correctly restore the MPIDR on exit. Oops. Since we cannot trust cpu_logical_map(), let's adopt a different strategy. We move the initialization of the host CPU context as part of the per-CPU initialization (which, in retrospect, makes a lot of sense), and directly read the MPIDR from the HW. This is guaranteed to work on both arm and arm64. Reported-by: Andre Przywara Tested-by: Andre Przywara Fixes: 32f139551954 ("arm/arm64: KVM: Statically configure the host's view of MPIDR") Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 6 ++---- arch/arm64/include/asm/kvm_host.h | 7 +++---- virt/kvm/arm/arm.c | 3 ++- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e74e8f408987..8a37c8e89777 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -147,11 +146,10 @@ struct kvm_host_data { typedef struct kvm_host_data kvm_host_data_t; -static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, - int cpu) +static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu); + cpu_ctxt->cp15[c0_MPIDR] = read_cpuid_mpidr(); } struct vcpu_reset_state { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d9770daf3d7d..63a196c19fed 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -19,12 +19,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -484,11 +484,10 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); -static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, - int cpu) +static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu); + cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); } void __kvm_enable_ssbs(void); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index bd5c55916d0d..f149c79fd6ef 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1332,6 +1332,8 @@ static void cpu_hyp_reset(void) static void cpu_hyp_reinit(void) { + kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt); + cpu_hyp_reset(); if (is_kernel_in_hyp_mode()) @@ -1569,7 +1571,6 @@ static int init_hyp_mode(void) kvm_host_data_t *cpu_data; cpu_data = per_cpu_ptr(&kvm_host_data, cpu); - kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu); err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP); if (err) { -- cgit v1.2.3-55-g7522 From b614c6027896ff9ad6757122e84760d938cab15e Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Wed, 10 Jul 2019 11:13:13 +0200 Subject: KVM: Properly check if "page" is valid in kvm_vcpu_unmap The field "page" is initialized to KVM_UNMAPPED_PAGE when it is not used (i.e. when the memory lives outside kernel control). So this check will always end up using kunmap even for memremap regions. Fixes: e45adf665a53 ("KVM: Introduce a new guest mapping API") Cc: stable@vger.kernel.org Signed-off-by: KarimAllah Ahmed Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9613987ef4c8..49dd5cc73d95 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1793,7 +1793,7 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, if (!map->hva) return; - if (map->page) + if (map->page != KVM_UNMAPPED_PAGE) kunmap(map->page); #ifdef CONFIG_HAS_IOMEM else -- cgit v1.2.3-55-g7522