From 4d3afc9bad2b67b118a0cc204dc94703f7a44e74 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 25 May 2016 15:26:33 +0100 Subject: KVM: arm/arm64: vgic-v2: Clear all dirty LRs When saving the state of the list registers, it is critical to reset them zero, as we could otherwise leave unexpected EOI interrupts pending for virtual level interrupts. Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v2-sr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index a3f12b3b277b..3a3a699b7489 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -100,12 +100,11 @@ static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) continue; - if (cpu_if->vgic_elrsr & (1UL << i)) { + if (cpu_if->vgic_elrsr & (1UL << i)) cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; - continue; - } + else + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); - cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); writel_relaxed(0, base + GICH_LR0 + (i * 4)); } } -- cgit v1.2.3-55-g7522 From fa89c77e891917b5913f9be080f9131a9457bb3e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 25 May 2016 15:26:34 +0100 Subject: KVM: arm/arm64: vgic-v3: Clear all dirty LRs When saving the state of the list registers, it is critical to reset them zero, as we could otherwise leave unexpected EOI interrupts pending for virtual level interrupts. Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index fff7cd42b3a3..3129df9d3a73 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -190,12 +190,11 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) continue; - if (cpu_if->vgic_elrsr & (1 << i)) { + if (cpu_if->vgic_elrsr & (1 << i)) cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; - continue; - } + else + cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); - cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); __gic_v3_set_lr(0, i); } -- cgit v1.2.3-55-g7522 From df7942d17e1623d7358fe895377293637de5521b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 May 2016 15:26:35 +0100 Subject: KVM: arm/arm64: vgic-v2: Always resample level interrupts When reading back from the list registers, we need to perform two actions for level interrupts: 1) clear the soft-pending bit if the interrupt is not pending anymore *in the list register* 2) resample the line level and propagate it to the pending state But these two actions shouldn't be linked, and we should *always* resample the line level, no matter what state is in the list register. Otherwise, we may end-up injecting spurious interrupts that have been already retired. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v2.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 8ad42c217770..e31405ee5515 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -112,11 +112,15 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) } } - /* Clear soft pending state when level IRQs have been acked */ - if (irq->config == VGIC_CONFIG_LEVEL && - !(val & GICH_LR_PENDING_BIT)) { - irq->soft_pending = false; - irq->pending = irq->line_level; + /* + * Clear soft pending state when level irqs have been acked. + * Always regenerate the pending state. + */ + if (irq->config == VGIC_CONFIG_LEVEL) { + if (!(val & GICH_LR_PENDING_BIT)) + irq->soft_pending = false; + + irq->pending = irq->line_level || irq->soft_pending; } spin_unlock(&irq->irq_lock); -- cgit v1.2.3-55-g7522 From 637d122baac7ff386c8e96df38ac88cee1551db9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 May 2016 15:26:36 +0100 Subject: KVM: arm/arm64: vgic-v3: Always resample level interrupts When reading back from the list registers, we need to perform two actions for level interrupts: 1) clear the soft-pending bit if the interrupt is not pending anymore *in the list register* 2) resample the line level and propagate it to the pending state But these two actions shouldn't be linked, and we should *always* resample the line level, no matter what state is in the list register. Otherwise, we may end-up injecting spurious interrupts that have been already retired. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v3.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 336a46115937..346b4ad12b49 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -101,11 +101,15 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) } } - /* Clear soft pending state when level irqs have been acked */ - if (irq->config == VGIC_CONFIG_LEVEL && - !(val & ICH_LR_PENDING_BIT)) { - irq->soft_pending = false; - irq->pending = irq->line_level; + /* + * Clear soft pending state when level irqs have been acked. + * Always regenerate the pending state. + */ + if (irq->config == VGIC_CONFIG_LEVEL) { + if (!(val & ICH_LR_PENDING_BIT)) + irq->soft_pending = false; + + irq->pending = irq->line_level || irq->soft_pending; } spin_unlock(&irq->irq_lock); -- cgit v1.2.3-55-g7522 From b34f2bcbf59fe2d27c37d6553c33611754677103 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 May 2016 15:26:37 +0100 Subject: arm64: KVM: Make ICC_SRE_EL1 access return the configured SRE value When we trap ICC_SRE_EL1, we handle it as RAZ/WI. It would be more correct to actual make it RO, and return the configured value when read. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7bbe3ff02602..a57d650f552c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -134,6 +134,17 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, return true; } +static bool access_gic_sre(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + return true; +} + static bool trap_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -958,7 +969,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { access_gic_sgi }, /* ICC_SRE_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101), - trap_raz_wi }, + access_gic_sre }, /* CONTEXTIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), -- cgit v1.2.3-55-g7522 From a057001e9e446f2195c34bc55c57e5cf353c99d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 May 2016 15:26:38 +0100 Subject: arm64: KVM: vgic-v3: Prevent the guest from messing with ICC_SRE_EL1 Both our GIC emulations are "strict", in the sense that we either emulate a GICv2 or a GICv3, and not a GICv3 with GICv2 legacy support. But when running on a GICv3 host, we still allow the guest to tinker with the ICC_SRE_EL1 register during its time slice: it can switch SRE off, observe that it is off, and yet on the next world switch, find the SRE bit to be set again. Not very nice. An obvious solution is to always trap accesses to ICC_SRE_EL1 (by clearing ICC_SRE_EL2.Enable), and to let the handler return the programmed value on a read, or ignore the write. That way, the guest can always observe that our GICv3 is SRE==1 only. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 3129df9d3a73..40c3b4c3d125 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -313,10 +313,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * Prevent the guest from touching the GIC system registers if * SRE isn't enabled for GICv3 emulation. */ - if (!cpu_if->vgic_sre) { - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, - ICC_SRE_EL2); - } + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); } void __hyp_text __vgic_v3_init_lrs(void) -- cgit v1.2.3-55-g7522 From c58513284029229842844929ddeaca44d013c128 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 May 2016 15:26:39 +0100 Subject: arm64: KVM: vgic-v3: Relax synchronization when SRE==1 The GICv3 backend of the vgic is quite barrier heavy, in order to ensure synchronization of the system registers and the memory mapped view for a potential GICv2 guest. But when the guest is using a GICv3 model, there is absolutely no need to execute all these heavy barriers, and it is actually beneficial to avoid them altogether. This patch makes the synchonization conditional, and ensures that we do not change the EL1 SRE settings if we do not need to. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 40c3b4c3d125..5f8f80b4a224 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -169,7 +169,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) * Make sure stores to the GIC via the memory mapped interface * are now visible to the system register interface. */ - dsb(st); + if (!cpu_if->vgic_sre) + dsb(st); cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); @@ -235,8 +236,12 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) val = read_gicreg(ICC_SRE_EL2); write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); - isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ - write_gicreg(1, ICC_SRE_EL1); + + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + isb(); + write_gicreg(1, ICC_SRE_EL1); + } } void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) @@ -255,8 +260,10 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * been actually programmed with the value we want before * starting to mess with the rest of the GIC. */ - write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); - isb(); + if (!cpu_if->vgic_sre) { + write_gicreg(0, ICC_SRE_EL1); + isb(); + } val = read_gicreg(ICH_VTR_EL2); max_lr_idx = vtr_to_max_lr_idx(val); @@ -305,8 +312,10 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * (re)distributors. This ensure the guest will read the * correct values from the memory-mapped interface. */ - isb(); - dsb(sy); + if (!cpu_if->vgic_sre) { + isb(); + dsb(sy); + } vcpu->arch.vgic_cpu.live_lrs = live_lrs; /* -- cgit v1.2.3-55-g7522 From 05fb05a6ca25e02ad8c31bc440b3c4996864f379 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 Jun 2016 09:24:06 +0100 Subject: KVM: arm/arm64: vgic-new: Removel harmful BUG_ON When changing the active bit from an MMIO trap, we decide to explode if the intid is that of a private interrupt. This flawed logic comes from the fact that we were assuming that kvm_vcpu_kick() as called by kvm_arm_halt_vcpu() would not return before the called vcpu responded, but this is not the case, so we need to perform this wait even for private interrupts. Dropping the BUG_ON seems like the right thing to do. [ Commit message tweaked by Christoffer ] Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-mmio.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 059595ec3da0..9f6fab74dce7 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -191,10 +191,8 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, * other thread sync back the IRQ. */ while (irq->vcpu && /* IRQ may have state in an LR somewhere */ - irq->vcpu->cpu != -1) { /* VCPU thread is running */ - BUG_ON(irq->intid < VGIC_NR_PRIVATE_IRQS); + irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); - } irq->active = new_active_state; if (new_active_state) -- cgit v1.2.3-55-g7522 From b19ee2ff3b287fea48a2896a381e31319394fe58 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 11 May 2016 08:04:29 -0700 Subject: KVM: x86: avoid write-tearing of TDP In theory, nothing prevents the compiler from write-tearing PTEs, or split PTE writes. These partially-modified PTEs can be fetched by other cores and cause mayhem. I have not really encountered such case in real-life, but it does seem possible. For example, the compiler may try to do something creative for kvm_set_pte_rmapp() and perform multiple writes to the PTE. Signed-off-by: Nadav Amit Signed-off-by: Radim Krčmář --- arch/x86/kvm/mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 24e800116ab4..def97b3a392b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -336,12 +336,12 @@ static gfn_t pse36_gfn_delta(u32 gpte) #ifdef CONFIG_X86_64 static void __set_spte(u64 *sptep, u64 spte) { - *sptep = spte; + WRITE_ONCE(*sptep, spte); } static void __update_clear_spte_fast(u64 *sptep, u64 spte) { - *sptep = spte; + WRITE_ONCE(*sptep, spte); } static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) @@ -390,7 +390,7 @@ static void __set_spte(u64 *sptep, u64 spte) */ smp_wmb(); - ssptep->spte_low = sspte.spte_low; + WRITE_ONCE(ssptep->spte_low, sspte.spte_low); } static void __update_clear_spte_fast(u64 *sptep, u64 spte) @@ -400,7 +400,7 @@ static void __update_clear_spte_fast(u64 *sptep, u64 spte) ssptep = (union split_spte *)sptep; sspte = (union split_spte)spte; - ssptep->spte_low = sspte.spte_low; + WRITE_ONCE(ssptep->spte_low, sspte.spte_low); /* * If we map the spte from present to nonpresent, we should clear -- cgit v1.2.3-55-g7522 From 0c2df2a1affd183ba9c114915f42a2d464b4f58f Mon Sep 17 00:00:00 2001 From: Dmitry Bilunov Date: Tue, 31 May 2016 17:38:24 +0300 Subject: KVM: Handle MSR_IA32_PERF_CTL Intel CPUs having Turbo Boost feature implement an MSR to provide a control interface via rdmsr/wrmsr instructions. One could detect the presence of this feature by issuing one of these instructions and handling the #GP exception which is generated in case the referenced MSR is not implemented by the CPU. KVM's vCPU model behaves exactly as a real CPU in this case by injecting a fault when MSR_IA32_PERF_CTL is called (which KVM does not support). However, some operating systems use this register during an early boot stage in which their kernel is not capable of handling #GP correctly, causing #DP and finally a triple fault effectively resetting the vCPU. This patch implements a dummy handler for MSR_IA32_PERF_CTL to avoid the crashes. Signed-off-by: Dmitry Bilunov Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c805cf494154..d0a5b4b4e64d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2314,6 +2314,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_NB_CFG: case MSR_FAM10H_MMIO_CONF_BASE: case MSR_AMD64_BU_CFG2: + case MSR_IA32_PERF_CTL: msr_info->data = 0; break; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: -- cgit v1.2.3-55-g7522 From b21629da120dd6145d14dbd6d028e1bba680a92b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:18 +0200 Subject: kvm: x86: avoid warning on repeated KVM_SET_TSS_ADDR Found by syzkaller: WARNING: CPU: 3 PID: 15175 at arch/x86/kvm/x86.c:7705 __x86_set_memory_region+0x1dc/0x1f0 [kvm]() CPU: 3 PID: 15175 Comm: a.out Tainted: G W 4.4.6-300.fc23.x86_64 #1 Hardware name: LENOVO 2325F51/2325F51, BIOS G2ET32WW (1.12 ) 05/30/2012 0000000000000286 00000000950899a7 ffff88011ab3fbf0 ffffffff813b542e 0000000000000000 ffffffffa0966496 ffff88011ab3fc28 ffffffff810a40f2 00000000000001fd 0000000000003000 ffff88014fc50000 0000000000000000 Call Trace: [] dump_stack+0x63/0x85 [] warn_slowpath_common+0x82/0xc0 [] warn_slowpath_null+0x1a/0x20 [] __x86_set_memory_region+0x1dc/0x1f0 [kvm] [] x86_set_memory_region+0x3b/0x60 [kvm] [] vmx_set_tss_addr+0x3c/0x150 [kvm_intel] [] kvm_arch_vm_ioctl+0x654/0xbc0 [kvm] [] kvm_vm_ioctl+0x9a/0x6f0 [kvm] [] do_vfs_ioctl+0x298/0x480 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x71 Testcase: #include #include #include #include #include long r[8]; int main() { memset(r, -1, sizeof(r)); r[2] = open("/dev/kvm", O_RDONLY|O_TRUNC); r[3] = ioctl(r[2], KVM_CREATE_VM, 0x0ul); r[5] = ioctl(r[3], KVM_SET_TSS_ADDR, 0x20000000ul); r[7] = ioctl(r[3], KVM_SET_TSS_ADDR, 0x20000000ul); return 0; } Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0a5b4b4e64d..990929bbeb50 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7816,7 +7816,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) slot = id_to_memslot(slots, id); if (size) { - if (WARN_ON(slot->npages)) + if (slot->npages) return -EEXIST; /* -- cgit v1.2.3-55-g7522 From 83676e923895adf2af392cfd36a05709950aaeef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:19 +0200 Subject: KVM: x86: avoid vmalloc(0) in the KVM_SET_CPUID This causes an ugly dmesg splat. Beautified syzkaller testcase: #include #include #include #include #include long r[8]; int main() { struct kvm_cpuid2 c = { 0 }; r[2] = open("/dev/kvm", O_RDWR); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 0x8); r[7] = ioctl(r[4], KVM_SET_CPUID, &c); return 0; } Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/cpuid.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 769af907f824..7597b42a8a88 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -181,19 +181,22 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry __user *entries) { int r, i; - struct kvm_cpuid_entry *cpuid_entries; + struct kvm_cpuid_entry *cpuid_entries = NULL; r = -E2BIG; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) goto out; r = -ENOMEM; - cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); - if (!cpuid_entries) - goto out; - r = -EFAULT; - if (copy_from_user(cpuid_entries, entries, - cpuid->nent * sizeof(struct kvm_cpuid_entry))) - goto out_free; + if (cpuid->nent) { + cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * + cpuid->nent); + if (!cpuid_entries) + goto out; + r = -EFAULT; + if (copy_from_user(cpuid_entries, entries, + cpuid->nent * sizeof(struct kvm_cpuid_entry))) + goto out; + } for (i = 0; i < cpuid->nent; i++) { vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; @@ -212,9 +215,8 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, kvm_x86_ops->cpuid_update(vcpu); r = kvm_update_cpuid(vcpu); -out_free: - vfree(cpuid_entries); out: + vfree(cpuid_entries); return r; } -- cgit v1.2.3-55-g7522 From 78e546c824fa8f96d323b7edd6f5cad5b74af057 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:20 +0200 Subject: KVM: fail KVM_SET_VCPU_EVENTS with invalid exception number This cannot be returned by KVM_GET_VCPU_EVENTS, so it is okay to return EINVAL. It causes a WARN from exception_type: WARNING: CPU: 3 PID: 16732 at arch/x86/kvm/x86.c:345 exception_type+0x49/0x50 [kvm]() CPU: 3 PID: 16732 Comm: a.out Tainted: G W 4.4.6-300.fc23.x86_64 #1 Hardware name: LENOVO 2325F51/2325F51, BIOS G2ET32WW (1.12 ) 05/30/2012 0000000000000286 000000006308a48b ffff8800bec7fcf8 ffffffff813b542e 0000000000000000 ffffffffa0966496 ffff8800bec7fd30 ffffffff810a40f2 ffff8800552a8000 0000000000000000 00000000002c267c 0000000000000001 Call Trace: [] dump_stack+0x63/0x85 [] warn_slowpath_common+0x82/0xc0 [] warn_slowpath_null+0x1a/0x20 [] exception_type+0x49/0x50 [kvm] [] kvm_arch_vcpu_ioctl_run+0x10a2/0x14e0 [kvm] [] kvm_vcpu_ioctl+0x33d/0x620 [kvm] [] do_vfs_ioctl+0x298/0x480 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x71 ---[ end trace b1a0391266848f50 ]--- Testcase (beautified/reduced from syzkaller output): #include #include #include #include #include #include #include long r[31]; int main() { memset(r, -1, sizeof(r)); r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[7] = ioctl(r[3], KVM_CREATE_VCPU, 0); struct kvm_vcpu_events ve = { .exception.injected = 1, .exception.nr = 0xd4 }; r[27] = ioctl(r[7], KVM_SET_VCPU_EVENTS, &ve); r[30] = ioctl(r[7], KVM_RUN, 0); return 0; } Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 990929bbeb50..d9db2a486377 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2973,6 +2973,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | KVM_VCPUEVENT_VALID_SMM)) return -EINVAL; + if (events->exception.injected && + (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) + return -EINVAL; + process_nmi(vcpu); vcpu->arch.exception.pending = events->exception.injected; vcpu->arch.exception.nr = events->exception.nr; -- cgit v1.2.3-55-g7522 From c622a3c21ede892e370b56e1ceb9eb28f8bbda6b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:21 +0200 Subject: KVM: irqfd: fix NULL pointer dereference in kvm_irq_map_gsi Found by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 0000000000000120 IP: [] kvm_irq_map_gsi+0x12/0x90 [kvm] PGD 6f80b067 PUD b6535067 PMD 0 Oops: 0000 [#1] SMP CPU: 3 PID: 4988 Comm: a.out Not tainted 4.4.9-300.fc23.x86_64 #1 [...] Call Trace: [] irqfd_update+0x32/0xc0 [kvm] [] kvm_irqfd+0x3dc/0x5b0 [kvm] [] kvm_vm_ioctl+0x164/0x6f0 [kvm] [] do_vfs_ioctl+0x298/0x480 [] SyS_ioctl+0x79/0x90 [] tracesys_phase2+0x84/0x89 Code: b5 71 a7 e0 5b 41 5c 41 5d 5d f3 c3 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 8b 8f 10 2e 00 00 31 c0 48 89 e5 <39> 91 20 01 00 00 76 6a 48 63 d2 48 8b 94 d1 28 01 00 00 48 85 RIP [] kvm_irq_map_gsi+0x12/0x90 [kvm] RSP CR2: 0000000000000120 Testcase: #include #include #include #include #include #include #include long r[26]; int main() { memset(r, -1, sizeof(r)); r[2] = open("/dev/kvm", 0); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); struct kvm_irqfd ifd; ifd.fd = syscall(SYS_eventfd2, 5, 0); ifd.gsi = 3; ifd.flags = 2; ifd.resamplefd = ifd.fd; r[25] = ioctl(r[3], KVM_IRQFD, &ifd); return 0; } Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- virt/kvm/irqchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index fe84e1a95dd5..8db197bb6c7a 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -40,7 +40,7 @@ int kvm_irq_map_gsi(struct kvm *kvm, irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, lockdep_is_held(&kvm->irq_lock)); - if (gsi < irq_rt->nr_rt_entries) { + if (irq_rt && gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; ++n; -- cgit v1.2.3-55-g7522 From f8c1b85b2523da59c8c03ea94e9d0fac04155943 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:22 +0200 Subject: KVM: x86: avoid vmalloc(0) in the KVM_SET_CPUID This causes an ugly dmesg splat. Beautified syzkaller testcase: #include #include #include #include #include long r[8]; int main() { struct kvm_irq_routing ir = { 0 }; r[2] = open("/dev/kvm", O_RDWR); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_SET_GSI_ROUTING, &ir); return 0; } Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- virt/kvm/kvm_main.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 37af23052470..02e98f3131bd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2935,7 +2935,7 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_SET_GSI_ROUTING: { struct kvm_irq_routing routing; struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; + struct kvm_irq_routing_entry *entries = NULL; r = -EFAULT; if (copy_from_user(&routing, argp, sizeof(routing))) @@ -2945,15 +2945,17 @@ static long kvm_vm_ioctl(struct file *filp, goto out; if (routing.flags) goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; + if (routing.nr) { + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + } r = kvm_set_irq_routing(kvm, entries, routing.nr, routing.flags); out_free_irq_routing: -- cgit v1.2.3-55-g7522 From d14bdb553f9196169f003058ae1cdabe514470e6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:23 +0200 Subject: KVM: x86: fix OOPS after invalid KVM_SET_DEBUGREGS MOV to DR6 or DR7 causes a #GP if an attempt is made to write a 1 to any of bits 63:32. However, this is not detected at KVM_SET_DEBUGREGS time, and the next KVM_RUN oopses: general protection fault: 0000 [#1] SMP CPU: 2 PID: 14987 Comm: a.out Not tainted 4.4.9-300.fc23.x86_64 #1 Hardware name: LENOVO 2325F51/2325F51, BIOS G2ET32WW (1.12 ) 05/30/2012 [...] Call Trace: [] kvm_arch_vcpu_ioctl_run+0x141d/0x14e0 [kvm] [] kvm_vcpu_ioctl+0x33d/0x620 [kvm] [] do_vfs_ioctl+0x298/0x480 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x71 Code: 55 83 ff 07 48 89 e5 77 27 89 ff ff 24 fd 90 87 80 81 0f 23 fe 5d c3 0f 23 c6 5d c3 0f 23 ce 5d c3 0f 23 d6 5d c3 0f 23 de 5d c3 <0f> 23 f6 5d c3 0f 0b 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 RIP [] native_set_debugreg+0x2b/0x40 RSP Testcase (beautified/reduced from syzkaller output): #include #include #include #include #include #include #include long r[8]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); memcpy(&dr, "\x5d\x6a\x6b\xe8\x57\x3b\x4b\x7e\xcf\x0d\xa1\x72" "\xa3\x4a\x29\x0c\xfc\x6d\x44\x00\xa7\x52\xc7\xd8" "\x00\xdb\x89\x9d\x78\xb5\x54\x6b\x6b\x13\x1c\xe9" "\x5e\xd3\x0e\x40\x6f\xb4\x66\xf7\x5b\xe3\x36\xcb", 48); r[7] = ioctl(r[4], KVM_SET_DEBUGREGS, &dr); r[6] = ioctl(r[4], KVM_RUN, 0); } Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d9db2a486377..902d9da12392 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3041,6 +3041,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, if (dbgregs->flags) return -EINVAL; + if (dbgregs->dr6 & ~0xffffffffull) + return -EINVAL; + if (dbgregs->dr7 & ~0xffffffffull) + return -EINVAL; + memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); vcpu->arch.dr6 = dbgregs->dr6; -- cgit v1.2.3-55-g7522