diff options
Diffstat (limited to 'arch/powerpc/kvm')
58 files changed, 2312 insertions, 848 deletions
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3223aec88b2c..4c67cc79de7c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -94,7 +94,7 @@ endif kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o -kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o +kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o kvm-book3s_64-module-objs := \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 10c5579d20ce..9524d92bc45d 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. * @@ -8,10 +9,6 @@ * Description: * This file is derived from arch/powerpc/kvm/44x.c, * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kvm_host.h> @@ -651,6 +648,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); break; #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_REG_PPC_VP_STATE: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) + r = kvmppc_xive_native_get_vp(vcpu, val); + else + r = -ENXIO; + break; +#endif /* CONFIG_KVM_XIVE */ case KVM_REG_PPC_FSCR: *val = get_reg_val(id, vcpu->arch.fscr); break; @@ -724,6 +733,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); break; #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_REG_PPC_VP_STATE: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) + r = kvmppc_xive_native_set_vp(vcpu, val); + else + r = -ENXIO; + break; +#endif /* CONFIG_KVM_XIVE */ case KVM_REG_PPC_FSCR: vcpu->arch.fscr = set_reg_val(id, *val); break; @@ -878,6 +899,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) #ifdef CONFIG_PPC64 INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables); INIT_LIST_HEAD(&kvm->arch.rtas_tokens); + mutex_init(&kvm->arch.rtas_token_lock); #endif return kvm->arch.kvm_ops->init_vm(kvm); @@ -891,6 +913,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) kvmppc_rtas_tokens_free(kvm); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); #endif + +#ifdef CONFIG_KVM_XICS + /* + * Free the XIVE devices which are not directly freed by the + * device 'release' method + */ + kfree(kvm->arch.xive_devices.native); + kvm->arch.xive_devices.native = NULL; + kfree(kvm->arch.xive_devices.xics_on_xive); + kvm->arch.xive_devices.xics_on_xive = NULL; +#endif /* CONFIG_KVM_XICS */ } int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) @@ -1050,6 +1083,9 @@ static int kvmppc_book3s_init(void) if (xics_on_xive()) { kvmppc_xive_init_module(); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); + kvmppc_xive_native_init_module(); + kvm_register_device_ops(&kvm_xive_native_ops, + KVM_DEV_TYPE_XIVE); } else #endif kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); @@ -1060,8 +1096,10 @@ static int kvmppc_book3s_init(void) static void kvmppc_book3s_exit(void) { #ifdef CONFIG_KVM_XICS - if (xics_on_xive()) + if (xics_on_xive()) { kvmppc_xive_exit_module(); + kvmppc_xive_native_exit_module(); + } #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kvmppc_book3s_exit_pr(); diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 14ef03501d21..2ef1311a2a13 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -1,12 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright IBM Corporation, 2013 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - * */ #ifndef __POWERPC_KVM_BOOK3S_H__ diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 6f789f674048..653936177857 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 999106991a76..d4cb3bcf41b6 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. * * Authors: * Alexander Graf <agraf@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S index 7e06a6fc8d07..e3ab9df6cf19 100644 --- a/arch/powerpc/kvm/book3s_32_sr.S +++ b/arch/powerpc/kvm/book3s_32_sr.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index d4b967f0e8d4..5f63a5f7f24f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 9a4614cd0e53..044dd49eeb9d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -1,22 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2009 SUSE Linux Products GmbH. All rights reserved. * * Authors: * Alexander Graf <agraf@suse.de> * Kevin Wolf <mail@kevin-wolf.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index be7bc070eae5..9a75f0e1933b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> */ @@ -63,7 +52,7 @@ struct kvm_resize_hpt { struct work_struct work; u32 order; - /* These fields protected by kvm->lock */ + /* These fields protected by kvm->arch.mmu_setup_lock */ /* Possible values and their usage: * <0 an error occurred during allocation, @@ -73,7 +62,7 @@ struct kvm_resize_hpt { int error; /* Private to the work thread, until error != -EBUSY, - * then protected by kvm->lock. + * then protected by kvm->arch.mmu_setup_lock. */ struct kvm_hpt_info hpt; }; @@ -139,7 +128,7 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) long err = -EBUSY; struct kvm_hpt_info info; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); if (kvm->arch.mmu_ready) { kvm->arch.mmu_ready = 0; /* order mmu_ready vs. vcpus_running */ @@ -183,7 +172,7 @@ out: /* Ensure that each vcpu will flush its TLB on next entry. */ cpumask_setall(&kvm->arch.need_tlb_flush); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return err; } @@ -600,7 +589,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* If writing != 0, then the HPTE must allow writing, if we get here */ write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, writing, pages); + npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); if (npages < 1) { /* Check if it's an I/O mapping */ down_read(¤t->mm->mmap_sem); @@ -1193,7 +1182,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); + npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages); if (npages < 1) goto err; page = pages[0]; @@ -1447,7 +1436,7 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize) static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) { - if (WARN_ON(!mutex_is_locked(&kvm->lock))) + if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock))) return; if (!resize) @@ -1474,14 +1463,14 @@ static void resize_hpt_prepare_work(struct work_struct *work) if (WARN_ON(resize->error != -EBUSY)) return; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); /* Request is still current? */ if (kvm->arch.resize_hpt == resize) { /* We may request large allocations here: - * do not sleep with kvm->lock held for a while. + * do not sleep with kvm->arch.mmu_setup_lock held for a while. */ - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", resize->order); @@ -1494,9 +1483,9 @@ static void resize_hpt_prepare_work(struct work_struct *work) if (WARN_ON(err == -EBUSY)) err = -EINPROGRESS; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); /* It is possible that kvm->arch.resize_hpt != resize - * after we grab kvm->lock again. + * after we grab kvm->arch.mmu_setup_lock again. */ } @@ -1505,7 +1494,7 @@ static void resize_hpt_prepare_work(struct work_struct *work) if (kvm->arch.resize_hpt != resize) resize_hpt_release(kvm, resize); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); } long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, @@ -1522,7 +1511,7 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, if (shift && ((shift < 18) || (shift > 46))) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); resize = kvm->arch.resize_hpt; @@ -1565,7 +1554,7 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, ret = 100; /* estimated time in ms */ out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return ret; } @@ -1588,7 +1577,7 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, if (shift && ((shift < 18) || (shift > 46))) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); resize = kvm->arch.resize_hpt; @@ -1625,7 +1614,7 @@ out: smp_mb(); out_no_hpt: resize_hpt_release(kvm, resize); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return ret; } @@ -1868,7 +1857,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, return -EINVAL; /* lock out vcpus from running while we're doing this */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); mmu_ready = kvm->arch.mmu_ready; if (mmu_ready) { kvm->arch.mmu_ready = 0; /* temporarily */ @@ -1876,7 +1865,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, smp_mb(); if (atomic_read(&kvm->arch.vcpus_running)) { kvm->arch.mmu_ready = 1; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return -EBUSY; } } @@ -1963,7 +1952,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, /* Order HPTE updates vs. mmu_ready */ smp_wmb(); kvm->arch.mmu_ready = mmu_ready; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); if (err) return err; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index f55ef071883f..08b2dfbc5305 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1,7 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. * * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> */ diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 066c665dc86f..4d958dd21e59 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index f100e331e69b..5bf05cc774e2 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> @@ -228,11 +217,33 @@ static void release_spapr_tce_table(struct rcu_head *head) unsigned long i, npages = kvmppc_tce_pages(stt->size); for (i = 0; i < npages; i++) - __free_page(stt->pages[i]); + if (stt->pages[i]) + __free_page(stt->pages[i]); kfree(stt); } +static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt, + unsigned long sttpage) +{ + struct page *page = stt->pages[sttpage]; + + if (page) + return page; + + mutex_lock(&stt->alloc_lock); + page = stt->pages[sttpage]; + if (!page) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + WARN_ON_ONCE(!page); + if (page) + stt->pages[sttpage] = page; + } + mutex_unlock(&stt->alloc_lock); + + return page; +} + static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) { struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; @@ -241,7 +252,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) return VM_FAULT_SIGBUS; - page = stt->pages[vmf->pgoff]; + page = kvm_spapr_get_tce_page(stt, vmf->pgoff); + if (!page) + return VM_FAULT_OOM; + get_page(page); vmf->page = page; return 0; @@ -296,7 +310,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *siter; unsigned long npages, size = args->size; int ret = -ENOMEM; - int i; if (!args->size || args->page_shift < 12 || args->page_shift > 34 || (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) @@ -318,14 +331,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, stt->offset = args->offset; stt->size = size; stt->kvm = kvm; + mutex_init(&stt->alloc_lock); INIT_LIST_HEAD_RCU(&stt->iommu_tables); - for (i = 0; i < npages; i++) { - stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!stt->pages[i]) - goto fail; - } - mutex_lock(&kvm->lock); /* Check this LIOBN hasn't been previously allocated */ @@ -352,17 +360,28 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, if (ret >= 0) return ret; - fail: - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); - kfree(stt); fail_acct: kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); return ret; } +static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, + unsigned long *ua) +{ + unsigned long gfn = tce >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + memslot = search_memslots(kvm_memslots(kvm), gfn); + if (!memslot) + return -EINVAL; + + *ua = __gfn_to_hva_memslot(memslot, gfn) | + (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); + + return 0; +} + static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) { @@ -378,7 +397,7 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_TOO_HARD; - if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL)) + if (kvmppc_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { @@ -397,6 +416,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, return H_SUCCESS; } +/* + * Handles TCE requests for emulated devices. + * Puts guest TCE values to the table and expects user space to convert them. + * Cannot fail so kvmppc_tce_validate must be called before it. + */ +static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, + unsigned long idx, unsigned long tce) +{ + struct page *page; + u64 *tbl; + unsigned long sttpage; + + idx -= stt->offset; + sttpage = idx / TCES_PER_PAGE; + page = stt->pages[sttpage]; + + if (!page) { + /* We allow any TCE, not just with read|write permissions */ + if (!tce) + return; + + page = kvm_spapr_get_tce_page(stt, sttpage); + if (!page) + return; + } + tbl = page_to_virt(page); + + tbl[idx % TCES_PER_PAGE] = tce; +} + static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, unsigned long entry) { @@ -551,7 +600,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, dir = iommu_tce_direction(tce); - if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; goto unlock_exit; } @@ -612,7 +661,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, return ret; idx = srcu_read_lock(&vcpu->kvm->srcu); - if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { + if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) { ret = H_TOO_HARD; goto unlock_exit; } @@ -647,7 +696,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, } tce = be64_to_cpu(tce); - if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) + if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 2206bc729b9a..f50bbeedfc66 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> @@ -66,8 +55,6 @@ #endif -#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) - /* * Finds a TCE table descriptor by LIOBN. * @@ -88,6 +75,25 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, EXPORT_SYMBOL_GPL(kvmppc_find_table); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, + unsigned long *ua, unsigned long **prmap) +{ + unsigned long gfn = tce >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + memslot = search_memslots(kvm_memslots_raw(kvm), gfn); + if (!memslot) + return -EINVAL; + + *ua = __gfn_to_hva_memslot(memslot, gfn) | + (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); + + if (prmap) + *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; + + return 0; +} + /* * Validates TCE address. * At the moment flags and page mask are validated. @@ -111,7 +117,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_PARAMETER; - if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL)) return H_TOO_HARD; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -129,7 +135,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, return H_SUCCESS; } -#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ /* Note on the use of page_address() in real mode, * @@ -161,13 +166,9 @@ static u64 *kvmppc_page_address(struct page *page) /* * Handles TCE requests for emulated devices. * Puts guest TCE values to the table and expects user space to convert them. - * Called in both real and virtual modes. - * Cannot fail so kvmppc_tce_validate must be called before it. - * - * WARNING: This will be called in real-mode on HV KVM and virtual - * mode on PR KVM + * Cannot fail so kvmppc_rm_tce_validate must be called before it. */ -void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, +static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, unsigned long idx, unsigned long tce) { struct page *page; @@ -175,35 +176,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, idx -= stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; + /* + * page must not be NULL in real mode, + * kvmppc_rm_ioba_validate() must have taken care of this. + */ + WARN_ON_ONCE_RM(!page); tbl = kvmppc_page_address(page); tbl[idx % TCES_PER_PAGE] = tce; } -EXPORT_SYMBOL_GPL(kvmppc_tce_put); -long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, - unsigned long *ua, unsigned long **prmap) +/* + * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so + * in real mode. + * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is + * allocated or not required (when clearing a tce entry). + */ +static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, + unsigned long ioba, unsigned long npages, bool clearing) { - unsigned long gfn = tce >> PAGE_SHIFT; - struct kvm_memory_slot *memslot; - - memslot = search_memslots(kvm_memslots(kvm), gfn); - if (!memslot) - return -EINVAL; + unsigned long i, idx, sttpage, sttpages; + unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages); - *ua = __gfn_to_hva_memslot(memslot, gfn) | - (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); + if (ret) + return ret; + /* + * clearing==true says kvmppc_rm_tce_put won't be allocating pages + * for empty tces. + */ + if (clearing) + return H_SUCCESS; -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (prmap) - *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; -#endif + idx = (ioba >> stt->page_shift) - stt->offset; + sttpage = idx / TCES_PER_PAGE; + sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / + TCES_PER_PAGE; + for (i = sttpage; i < sttpage + sttpages; ++i) + if (!stt->pages[i]) + return H_TOO_HARD; - return 0; + return H_SUCCESS; } -EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua); -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction) @@ -381,7 +395,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, if (!stt) return H_TOO_HARD; - ret = kvmppc_ioba_validate(stt, ioba, 1); + ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0); if (ret != H_SUCCESS) return ret; @@ -390,7 +404,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; dir = iommu_tce_direction(tce); - if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) + if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) return H_PARAMETER; entry = ioba >> stt->page_shift; @@ -409,7 +423,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, } } - kvmppc_tce_put(stt, entry, tce); + kvmppc_rm_tce_put(stt, entry, tce); return H_SUCCESS; } @@ -480,7 +494,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (tce_list & (SZ_4K - 1)) return H_PARAMETER; - ret = kvmppc_ioba_validate(stt, ioba, npages); + ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false); if (ret != H_SUCCESS) return ret; @@ -492,7 +506,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ struct mm_iommu_table_group_mem_t *mem; - if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) return H_TOO_HARD; mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); @@ -508,7 +522,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, * We do not require memory to be preregistered in this case * so lock rmap and do __find_linux_pte_or_hugepte(). */ - if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) return H_TOO_HARD; rmap = (void *) vmalloc_to_phys(rmap); @@ -542,7 +556,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -557,7 +571,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, } } - kvmppc_tce_put(stt, entry + i, tce); + kvmppc_rm_tce_put(stt, entry + i, tce); } unlock_exit: @@ -583,7 +597,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, if (!stt) return H_TOO_HARD; - ret = kvmppc_ioba_validate(stt, ioba, npages); + ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0); if (ret != H_SUCCESS) return ret; @@ -610,7 +624,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) - kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); + kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); return H_SUCCESS; } @@ -635,6 +649,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, idx = (ioba >> stt->page_shift) - stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; + if (!page) { + vcpu->arch.regs.gpr[4] = 0; + return H_SUCCESS; + } tbl = (u64 *)page_address(page); vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 6ef7c5f00a49..dad71d276b91 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index 0d013fbc2e13..f08565885ddf 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b2b29d4f9842..76b1801aa44a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. @@ -12,10 +13,6 @@ * * This file is derived from arch/powerpc/kvm/book3s.c, * by Alexander Graf <agraf@suse.de>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kvm_host.h> @@ -74,6 +71,7 @@ #include <asm/opal.h> #include <asm/xics.h> #include <asm/xive.h> +#include <asm/hw_breakpoint.h> #include "book3s.h" @@ -445,12 +443,7 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) { - struct kvm_vcpu *ret; - - mutex_lock(&kvm->lock); - ret = kvm_get_vcpu_by_id(kvm, id); - mutex_unlock(&kvm->lock); - return ret; + return kvm_get_vcpu_by_id(kvm, id); } static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) @@ -749,7 +742,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) /* * Ensure that the read of vcore->dpdes comes after the read * of vcpu->doorbell_request. This barrier matches the - * smb_wmb() in kvmppc_guest_entry_inject(). + * smp_wmb() in kvmppc_guest_entry_inject(). */ smp_rmb(); vc = vcpu->arch.vcore; @@ -801,6 +794,80 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, } } +/* Copy guest memory in place - must reside within a single memslot */ +static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from, + unsigned long len) +{ + struct kvm_memory_slot *to_memslot = NULL; + struct kvm_memory_slot *from_memslot = NULL; + unsigned long to_addr, from_addr; + int r; + + /* Get HPA for from address */ + from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT); + if (!from_memslot) + return -EFAULT; + if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages) + << PAGE_SHIFT)) + return -EINVAL; + from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT); + if (kvm_is_error_hva(from_addr)) + return -EFAULT; + from_addr |= (from & (PAGE_SIZE - 1)); + + /* Get HPA for to address */ + to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT); + if (!to_memslot) + return -EFAULT; + if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages) + << PAGE_SHIFT)) + return -EINVAL; + to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT); + if (kvm_is_error_hva(to_addr)) + return -EFAULT; + to_addr |= (to & (PAGE_SIZE - 1)); + + /* Perform copy */ + r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr, + len); + if (r) + return -EFAULT; + mark_page_dirty(kvm, to >> PAGE_SHIFT); + return 0; +} + +static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long dest, unsigned long src) +{ + u64 pg_sz = SZ_4K; /* 4K page size */ + u64 pg_mask = SZ_4K - 1; + int ret; + + /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ + if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | + H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) + return H_PARAMETER; + + /* dest (and src if copy_page flag set) must be page aligned */ + if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) + return H_PARAMETER; + + /* zero and/or copy the page as determined by the flags */ + if (flags & H_COPY_PAGE) { + ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz); + if (ret < 0) + return H_PARAMETER; + } else if (flags & H_ZERO_PAGE) { + ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz); + if (ret < 0) + return H_PARAMETER; + } + + /* We can ignore the remaining flags */ + + return H_SUCCESS; +} + static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) { struct kvmppc_vcore *vcore = target->arch.vcore; @@ -1003,6 +1070,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (nesting_enabled(vcpu->kvm)) ret = kvmhv_copy_tofrom_guest_nested(vcpu); break; + case H_PAGE_INIT: + ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); + break; default: return RESUME_HOST; } @@ -1047,6 +1119,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) case H_IPOLL: case H_XIRR_X: #endif + case H_PAGE_INIT: return 1; } @@ -1502,7 +1575,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; - mutex_lock(&kvm->lock); spin_lock(&vc->lock); /* * If ILE (interrupt little-endian) has changed, update the @@ -1542,7 +1614,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); - mutex_unlock(&kvm->lock); } static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, @@ -2257,11 +2328,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, pr_devel("KVM: collision on id %u", id); vcore = NULL; } else if (!vcore) { + /* + * Take mmu_setup_lock for mutual exclusion + * with kvmppc_update_lpcr(). + */ err = -ENOMEM; vcore = kvmppc_vcore_create(kvm, id & ~(kvm->arch.smt_mode - 1)); + mutex_lock(&kvm->arch.mmu_setup_lock); kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; + mutex_unlock(&kvm->arch.mmu_setup_lock); } } mutex_unlock(&kvm->lock); @@ -2504,37 +2581,6 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) } } -static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu, - struct kvm_nested_guest *nested) -{ - cpumask_t *need_tlb_flush; - int lpid; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return; - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pcpu &= ~0x3UL; - - if (nested) { - lpid = nested->shadow_lpid; - need_tlb_flush = &nested->need_tlb_flush; - } else { - lpid = kvm->arch.lpid; - need_tlb_flush = &kvm->arch.need_tlb_flush; - } - - mtspr(SPRN_LPID, lpid); - isync(); - smp_mb(); - - if (cpumask_test_cpu(pcpu, need_tlb_flush)) { - radix__local_flush_tlb_lpid_guest(lpid); - /* Clear the bit after the TLB flush */ - cpumask_clear_cpu(pcpu, need_tlb_flush); - } -} - static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; @@ -3228,19 +3274,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) for (sub = 0; sub < core_info.n_subcores; ++sub) spin_unlock(&core_info.vc[sub]->lock); - if (kvm_is_radix(vc->kvm)) { - /* - * Do we need to flush the process scoped TLB for the LPAR? - * - * On POWER9, individual threads can come in here, but the - * TLB is shared between the 4 threads in a core, hence - * invalidating on one thread invalidates for all. - * Thus we make all 4 threads use the same bit here. - * - * Hash must be flushed in realmode in order to use tlbiel. - */ - kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL); - } + guest_enter_irqoff(); + + srcu_idx = srcu_read_lock(&vc->kvm->srcu); + + this_cpu_disable_ftrace(); /* * Interrupts will be enabled once we get into the guest, @@ -3248,19 +3286,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ trace_hardirqs_on(); - guest_enter_irqoff(); - - srcu_idx = srcu_read_lock(&vc->kvm->srcu); - - this_cpu_disable_ftrace(); - trap = __kvmppc_vcore_entry(); + trace_hardirqs_off(); + this_cpu_enable_ftrace(); srcu_read_unlock(&vc->kvm->srcu, srcu_idx); - trace_hardirqs_off(); set_irq_happened(trap); spin_lock(&vc->lock); @@ -3374,7 +3407,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_PURR, vcpu->arch.purr); mtspr(SPRN_SPURR, vcpu->arch.spurr); - if (cpu_has_feature(CPU_FTR_DAWR)) { + if (dawr_enabled()) { mtspr(SPRN_DAWR, vcpu->arch.dawr); mtspr(SPRN_DAWRX, vcpu->arch.dawrx); } @@ -3513,6 +3546,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, #ifdef CONFIG_ALTIVEC load_vr_state(&vcpu->arch.vr); #endif + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); mtspr(SPRN_DSCR, vcpu->arch.dscr); mtspr(SPRN_IAMR, vcpu->arch.iamr); @@ -3604,6 +3638,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, #ifdef CONFIG_ALTIVEC store_vr_state(&vcpu->arch.vr); #endif + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); if (cpu_has_feature(CPU_FTR_TM) || cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) @@ -3624,6 +3659,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vc->in_guest = 0; mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); kvmhv_load_host_pmu(); @@ -3820,7 +3856,7 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) int r = 0; struct kvm *kvm = vcpu->kvm; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); if (!kvm->arch.mmu_ready) { if (!kvm_is_radix(kvm)) r = kvmppc_hv_setup_htab_rma(vcpu); @@ -3830,7 +3866,7 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) kvm->arch.mmu_ready = 1; } } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return r; } @@ -3969,7 +4005,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, unsigned long lpcr) { int trap, r, pcpu; - int srcu_idx; + int srcu_idx, lpid; struct kvmppc_vcore *vc; struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *nested = vcpu->arch.nested; @@ -4045,19 +4081,27 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, vc->vcore_state = VCORE_RUNNING; trace_kvmppc_run_core(vc, 0); - if (cpu_has_feature(CPU_FTR_HVMODE)) - kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; + mtspr(SPRN_LPID, lpid); + isync(); + kvmppc_check_need_tlb_flush(kvm, pcpu, nested); + } - trace_hardirqs_on(); guest_enter_irqoff(); srcu_idx = srcu_read_lock(&kvm->srcu); this_cpu_disable_ftrace(); + /* Tell lockdep that we're about to enable interrupts */ + trace_hardirqs_on(); + trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr); vcpu->arch.trap = trap; + trace_hardirqs_off(); + this_cpu_enable_ftrace(); srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -4067,7 +4111,6 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, isync(); } - trace_hardirqs_off(); set_irq_happened(trap); kvmppc_set_host_core(pcpu); @@ -4435,7 +4478,8 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, /* * Update LPCR values in kvm->arch and in vcores. - * Caller must hold kvm->lock. + * Caller must hold kvm->arch.mmu_setup_lock (for mutual exclusion + * of kvm->arch.lpcr update). */ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) { @@ -4487,7 +4531,7 @@ void kvmppc_setup_partition_table(struct kvm *kvm) /* * Set up HPT (hashed page table) and RMA (real-mode area). - * Must be called with kvm->lock held. + * Must be called with kvm->arch.mmu_setup_lock held. */ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { @@ -4575,7 +4619,10 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) goto out_srcu; } -/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ +/* + * Must be called with kvm->arch.mmu_setup_lock held and + * mmu_ready = 0 and no vcpus running. + */ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { if (nesting_enabled(kvm)) @@ -4592,7 +4639,10 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) return 0; } -/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ +/* + * Must be called with kvm->arch.mmu_setup_lock held and + * mmu_ready = 0 and no vcpus running. + */ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) { int err; @@ -4697,6 +4747,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) char buf[32]; int ret; + mutex_init(&kvm->arch.mmu_setup_lock); + /* Allocate the guest's logical partition ID */ lpid = kvmppc_alloc_lpid(); @@ -5222,7 +5274,7 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) if (kvmhv_on_pseries() && !radix) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); if (radix != kvm_is_radix(kvm)) { if (kvm->arch.mmu_ready) { kvm->arch.mmu_ready = 0; @@ -5250,7 +5302,7 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) err = 0; out_unlock: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return err; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b0cf22477e87..cb05ccc8bc6a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/cpu.h> @@ -805,3 +802,61 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) vcpu->arch.doorbell_request = 0; } } + +static void flush_guest_tlb(struct kvm *kvm) +{ + unsigned long rb, set; + + rb = PPC_BIT(52); /* IS = 2 */ + if (kvm_is_radix(kvm)) { + /* R=1 PRS=1 RIC=2 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (1), "i" (1), "i" (2), + "r" (0) : "memory"); + for (set = 1; set < kvm->arch.tlb_sets; ++set) { + rb += PPC_BIT(51); /* increment set number */ + /* R=1 PRS=1 RIC=0 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (1), "i" (1), "i" (0), + "r" (0) : "memory"); + } + } else { + for (set = 0; set < kvm->arch.tlb_sets; ++set) { + /* R=0 PRS=0 RIC=0 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (0), "i" (0), "i" (0), + "r" (0) : "memory"); + rb += PPC_BIT(51); /* increment set number */ + } + } + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); +} + +void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, + struct kvm_nested_guest *nested) +{ + cpumask_t *need_tlb_flush; + + /* + * On POWER9, individual threads can come in here, but the + * TLB is shared between the 4 threads in a core, hence + * invalidating on one thread invalidates for all. + * Thus we make all 4 threads use the same bit. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pcpu = cpu_first_thread_sibling(pcpu); + + if (nested) + need_tlb_flush = &nested->need_tlb_flush; + else + need_tlb_flush = &kvm->arch.need_tlb_flush; + + if (cpumask_test_cpu(pcpu, need_tlb_flush)) { + flush_guest_tlb(kvm); + + /* Clear the bit after the TLB flush */ + cpumask_clear_cpu(pcpu, need_tlb_flush); + } +} +EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush); diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c index 64b5011475c7..9af660476314 100644 --- a/arch/powerpc/kvm/book3s_hv_hmi.c +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -1,19 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Hypervisor Maintenance Interrupt (HMI) handling. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. - * * Copyright 2015 IBM Corporation * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> */ diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index a6d10010d9e8..63fd81f3039d 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 8c24c3bea0bf..79f7d07ef674 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -1,7 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. * * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 3b3791ed74a6..63e0ce91e29d 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -1,7 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. * * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> */ @@ -13,6 +11,7 @@ #include <linux/hugetlb.h> #include <linux/module.h> #include <linux/log2.h> +#include <linux/sizes.h> #include <asm/trace.h> #include <asm/kvm_ppc.h> @@ -867,6 +866,149 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, return ret; } +static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, + int writing, unsigned long *hpa, + struct kvm_memory_slot **memslot_p) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_memory_slot *memslot; + unsigned long gfn, hva, pa, psize = PAGE_SHIFT; + unsigned int shift; + pte_t *ptep, pte; + + /* Find the memslot for this address */ + gfn = gpa >> PAGE_SHIFT; + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + return H_PARAMETER; + + /* Translate to host virtual address */ + hva = __gfn_to_hva_memslot(memslot, gfn); + + /* Try to find the host pte for that virtual address */ + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + if (!ptep) + return H_TOO_HARD; + pte = kvmppc_read_update_linux_pte(ptep, writing); + if (!pte_present(pte)) + return H_TOO_HARD; + + /* Convert to a physical address */ + if (shift) + psize = 1UL << shift; + pa = pte_pfn(pte) << PAGE_SHIFT; + pa |= hva & (psize - 1); + pa |= gpa & ~PAGE_MASK; + + if (hpa) + *hpa = pa; + if (memslot_p) + *memslot_p = memslot; + + return H_SUCCESS; +} + +static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, + unsigned long dest) +{ + struct kvm_memory_slot *memslot; + struct kvm *kvm = vcpu->kvm; + unsigned long pa, mmu_seq; + long ret = H_SUCCESS; + int i; + + /* Used later to detect if we might have been invalidated */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + + ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); + if (ret != H_SUCCESS) + return ret; + + /* Check if we've been invalidated */ + raw_spin_lock(&kvm->mmu_lock.rlock); + if (mmu_notifier_retry(kvm, mmu_seq)) { + ret = H_TOO_HARD; + goto out_unlock; + } + + /* Zero the page */ + for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) + dcbz((void *)pa); + kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); + +out_unlock: + raw_spin_unlock(&kvm->mmu_lock.rlock); + return ret; +} + +static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, + unsigned long dest, unsigned long src) +{ + unsigned long dest_pa, src_pa, mmu_seq; + struct kvm_memory_slot *dest_memslot; + struct kvm *kvm = vcpu->kvm; + long ret = H_SUCCESS; + + /* Used later to detect if we might have been invalidated */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + + ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); + if (ret != H_SUCCESS) + return ret; + ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); + if (ret != H_SUCCESS) + return ret; + + /* Check if we've been invalidated */ + raw_spin_lock(&kvm->mmu_lock.rlock); + if (mmu_notifier_retry(kvm, mmu_seq)) { + ret = H_TOO_HARD; + goto out_unlock; + } + + /* Copy the page */ + memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); + + kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); + +out_unlock: + raw_spin_unlock(&kvm->mmu_lock.rlock); + return ret; +} + +long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long dest, unsigned long src) +{ + struct kvm *kvm = vcpu->kvm; + u64 pg_mask = SZ_4K - 1; /* 4K page size */ + long ret = H_SUCCESS; + + /* Don't handle radix mode here, go up to the virtual mode handler */ + if (kvm_is_radix(kvm)) + return H_TOO_HARD; + + /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ + if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | + H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) + return H_PARAMETER; + + /* dest (and src if copy_page flag set) must be page aligned */ + if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) + return H_PARAMETER; + + /* zero and/or copy the page as determined by the flags */ + if (flags & H_COPY_PAGE) + ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); + else if (flags & H_ZERO_PAGE) + ret = kvmppc_do_h_page_init_zero(vcpu, dest); + + /* We can ignore the other flags */ + + return ret; +} + void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 3b9662a4207e..4d2ec77d806c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2012 Michael Ellerman, IBM Corporation. * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kernel.h> @@ -822,7 +819,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr) raddr = per_cpu_ptr(addr, cpu); l = (unsigned long)raddr; - if (REGION_ID(l) == VMALLOC_REGION_ID) { + if (get_region_id(l) == VMALLOC_REGION_ID) { l = vmalloc_to_phys(raddr); raddr = (unsigned int *)l; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 3a5e719ef032..337e64468d78 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1,12 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. * * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * @@ -35,6 +28,7 @@ #include <asm/thread_info.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> +#include <asm/cpuidle.h> /* Sign-extend HDEC if not on POWER9 */ #define EXTEND_HDEC(reg) \ @@ -45,6 +39,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +#define NAPPING_UNSPLIT 3 /* Stack frame offsets for kvmppc_hv_entry */ #define SFS 208 @@ -290,17 +285,19 @@ kvm_novcpu_exit: b kvmhv_switch_to_host /* - * We come in here when wakened from nap mode. - * Relocation is off and most register values are lost. - * r13 points to the PACA. + * We come in here when wakened from Linux offline idle code. + * Relocation is off * r3 contains the SRR1 wakeup value, SRR1 is trashed. */ - .globl kvm_start_guest -kvm_start_guest: - /* Set runlatch bit the minute you wake up from nap */ - mfspr r0, SPRN_CTRLF - ori r0, r0, 1 - mtspr SPRN_CTRLT, r0 +_GLOBAL(idle_kvm_start_guest) + ld r4,PACAEMERGSP(r13) + mfcr r5 + mflr r0 + std r1,0(r4) + std r5,8(r4) + std r0,16(r4) + subi r1,r4,STACK_FRAME_OVERHEAD + SAVE_NVGPRS(r1) /* * Could avoid this and pass it through in r3. For now, @@ -308,27 +305,23 @@ kvm_start_guest: */ mtspr SPRN_SRR1,r3 - ld r2,PACATOC(r13) - li r0,0 stb r0,PACA_FTRACE_ENABLED(r13) li r0,KVM_HWTHREAD_IN_KVM stb r0,HSTATE_HWTHREAD_STATE(r13) - /* NV GPR values from power7_idle() will no longer be valid */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) - - /* were we napping due to cede? */ + /* kvm cede / napping does not come through here */ lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,NAPPING_CEDE - beq kvm_end_cede - cmpwi r0,NAPPING_NOVCPU - beq kvm_novcpu_wakeup + twnei r0,0 - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + b 1f + +kvm_unsplit_wakeup: + li r0, 0 + stb r0, HSTATE_NAPPING(r13) + +1: /* * We weren't napping due to cede, so this must be a secondary @@ -437,19 +430,25 @@ kvm_no_guest: lbz r3, HSTATE_HWTHREAD_REQ(r13) cmpwi r3, 0 bne 54f -/* - * We jump to pnv_wakeup_loss, which will return to the caller - * of power7_nap in the powernv cpu offline loop. The value we - * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss - * requires SRR1 in r12. - */ + + /* + * Jump to idle_return_gpr_loss, which returns to the + * idle_kvm_start_guest caller. + */ li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - li r3, 0 - mfspr r12,SPRN_SRR1 - b pnv_wakeup_loss + /* set up r3 for return */ + mfspr r3,SPRN_SRR1 + REST_NVGPRS(r1) + addi r1, r1, STACK_FRAME_OVERHEAD + ld r0, 16(r1) + ld r5, 8(r1) + ld r1, 0(r1) + mtlr r0 + mtcr r5 + blr 53: HMT_LOW ld r5, HSTATE_KVM_VCORE(r13) @@ -534,6 +533,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) lbz r0, KVM_SPLIT_DO_NAP(r3) cmpwi r0, 0 beq 57f + li r3, NAPPING_UNSPLIT + stb r3, HSTATE_NAPPING(r13) li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 mfspr r5, SPRN_LPCR rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) @@ -581,11 +582,8 @@ kvmppc_hv_entry: 1: #endif - /* Use cr7 as an indication of radix mode */ ld r5, HSTATE_KVM_VCORE(r13) ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ - lbz r0, KVM_RADIX(r9) - cmpwi cr7, r0, 0 /* * POWER7/POWER8 host -> guest partition switch code. @@ -608,9 +606,6 @@ kvmppc_hv_entry: cmpwi r6,0 bne 10f - /* Radix has already switched LPID and flushed core TLB */ - bne cr7, 22f - lwz r7,KVM_LPID(r9) BEGIN_FTR_SECTION ld r6,KVM_SDR1(r9) @@ -622,41 +617,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync - /* See if we need to flush the TLB. Hash has to be done in RM */ - lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ -BEGIN_FTR_SECTION - /* - * On POWER9, individual threads can come in here, but the - * TLB is shared between the 4 threads in a core, hence - * invalidating on one thread invalidates for all. - * Thus we make all 4 threads use the same bit here. - */ - clrrdi r6,r6,2 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - clrldi r7,r6,64-6 /* extract bit number (6 bits) */ - srdi r6,r6,6 /* doubleword number */ - sldi r6,r6,3 /* address offset */ - add r6,r6,r9 - addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ - li r8,1 - sld r8,r8,r7 - ld r7,0(r6) - and. r7,r7,r8 - beq 22f - /* Flush the TLB of any entries for this LPID */ - lwz r0,KVM_TLB_SETS(r9) - mtctr r0 - li r7,0x800 /* IS field = 0b10 */ - ptesync - li r0,0 /* RS for P9 version of tlbiel */ -28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ - addi r7,r7,0x1000 - bdnz 28b - ptesync -23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ - andc r7,r7,r8 - stdcx. r7,0,r6 - bne 23b + /* See if we need to flush the TLB. */ + mr r3, r9 /* kvm pointer */ + lhz r4, PACAPACAINDEX(r13) /* physical cpu number */ + li r5, 0 /* nested vcpu pointer */ + bl kvmppc_check_need_tlb_flush + nop + ld r5, HSTATE_KVM_VCORE(r13) /* Add timebase offset onto timebase */ 22: ld r8,VCORE_TB_OFFSET(r5) @@ -822,18 +789,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_IAMR, r5 mtspr SPRN_PSPB, r6 mtspr SPRN_FSCR, r7 - ld r5, VCPU_DAWR(r4) - ld r6, VCPU_DAWRX(r4) - ld r7, VCPU_CIABR(r4) - ld r8, VCPU_TAR(r4) /* * Handle broken DAWR case by not writing it. This means we * can still store the DAWR register for migration. */ -BEGIN_FTR_SECTION + LOAD_REG_ADDR(r5, dawr_force_enable) + lbz r5, 0(r5) + cmpdi r5, 0 + beq 1f + ld r5, VCPU_DAWR(r4) + ld r6, VCPU_DAWRX(r4) mtspr SPRN_DAWR, r5 mtspr SPRN_DAWRX, r6 -END_FTR_SECTION_IFSET(CPU_FTR_DAWR) +1: + ld r7, VCPU_CIABR(r4) + ld r8, VCPU_TAR(r4) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) @@ -969,17 +939,27 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) #ifdef CONFIG_KVM_XICS /* We are entering the guest on that thread, push VCPU to XIVE */ - ld r10, HSTATE_XIVE_TIMA_PHYS(r13) - cmpldi cr0, r10, 0 - beq no_xive ld r11, VCPU_XIVE_SAVED_STATE(r4) li r9, TM_QW1_OS + lwz r8, VCPU_XIVE_CAM_WORD(r4) + li r7, TM_QW1_OS + TM_WORD2 + mfmsr r0 + andi. r0, r0, MSR_DR /* in real mode? */ + beq 2f + ld r10, HSTATE_XIVE_TIMA_VIRT(r13) + cmpldi cr1, r10, 0 + beq cr1, no_xive + eieio + stdx r11,r9,r10 + stwx r8,r7,r10 + b 3f +2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13) + cmpldi cr1, r10, 0 + beq cr1, no_xive eieio stdcix r11,r9,r10 - lwz r11, VCPU_XIVE_CAM_WORD(r4) - li r9, TM_QW1_OS + TM_WORD2 - stwcix r11,r9,r10 - li r9, 1 + stwcix r8,r7,r10 +3: li r9, 1 stb r9, VCPU_XIVE_PUSHED(r4) eieio @@ -998,12 +978,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) * on, we mask it. */ lbz r0, VCPU_XIVE_ESC_ON(r4) - cmpwi r0,0 - beq 1f - ld r10, VCPU_XIVE_ESC_RADDR(r4) + cmpwi cr1, r0,0 + beq cr1, 1f li r9, XIVE_ESB_SET_PQ_01 + beq 4f /* in real mode? */ + ld r10, VCPU_XIVE_ESC_VADDR(r4) + ldx r0, r10, r9 + b 5f +4: ld r10, VCPU_XIVE_ESC_RADDR(r4) ldcix r0, r10, r9 - sync +5: sync /* We have a possible subtle race here: The escalation interrupt might * have fired and be on its way to the host queue while we mask it, @@ -2281,7 +2265,7 @@ hcall_real_table: #endif .long 0 /* 0x24 - H_SET_SPRG0 */ .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table - .long 0 /* 0x2c */ + .long DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table .long 0 /* 0x30 */ .long 0 /* 0x34 */ .long 0 /* 0x38 */ @@ -2513,20 +2497,31 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) blr 2: -BEGIN_FTR_SECTION - /* POWER9 with disabled DAWR */ + LOAD_REG_ADDR(r11, dawr_force_enable) + lbz r11, 0(r11) + cmpdi r11, 0 + bne 3f li r3, H_HARDWARE blr -END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) +3: /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) std r5, VCPU_DAWRX(r3) + /* + * If came in through the real mode hcall handler then it is necessary + * to write the registers since the return path won't. Otherwise it is + * sufficient to store then in the vcpu struct as they will be loaded + * next time the vcpu is run. + */ + mfmsr r6 + andi. r6, r6, MSR_DR /* in real mode? */ + bne 4f mtspr SPRN_DAWR, r4 mtspr SPRN_DAWRX, r5 - li r3, 0 +4: li r3, 0 blr _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ @@ -2654,6 +2649,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */ + /* Go back to host stack */ + ld r1, HSTATE_HOST_R1(r13) + /* * Take a nap until a decrementer or external or doobell interrupt * occurs, with PECE1 and PECE0 set in LPCR. @@ -2682,26 +2680,42 @@ BEGIN_FTR_SECTION * requested level = 0 (just stop dispatching) */ lis r3, (PSSCR_EC | PSSCR_ESL)@h - mtspr SPRN_PSSCR, r3 /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ li r4, LPCR_PECE_HVEE@higher sldi r4, r4, 32 or r5, r5, r4 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) +FTR_SECTION_ELSE + li r3, PNV_THREAD_NAP +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r5 isync - li r0, 0 - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b + BEGIN_FTR_SECTION - nap + bl isa300_idle_stop_mayloss FTR_SECTION_ELSE - PPC_STOP -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) - b . + bl isa206_idle_insn_mayloss +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + + mfspr r0, SPRN_CTRLF + ori r0, r0, 1 + mtspr SPRN_CTRLT, r0 + + mtspr SPRN_SRR1, r3 + + li r0, 0 + stb r0, PACA_FTRACE_ENABLED(r13) + + li r0, KVM_HWTHREAD_IN_KVM + stb r0, HSTATE_HWTHREAD_STATE(r13) + + lbz r0, HSTATE_NAPPING(r13) + cmpwi r0, NAPPING_CEDE + beq kvm_end_cede + cmpwi r0, NAPPING_NOVCPU + beq kvm_novcpu_wakeup + cmpwi r0, NAPPING_UNSPLIT + beq kvm_unsplit_wakeup + twi 31,0,0 /* Nap state must not be zero */ 33: mr r4, r3 li r3, 0 @@ -2709,12 +2723,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) b 34f kvm_end_cede: + /* Woken by external or decrementer interrupt */ + /* get vcpu pointer */ ld r4, HSTATE_KVM_VCPU(r13) - /* Woken by external or decrementer interrupt */ - ld r1, HSTATE_HOST_R1(r13) - #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING addi r3, r4, VCPU_TB_RMINTR bl kvmhv_accumulate_time diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index 888e2609e3f1..229496e2652e 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c index 3cf5863bc06e..217246279dfa 100644 --- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index d71dab16dc6f..f7ad99d972ce 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 905a934c1ef4..ce79ac33e8d3 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. * * Authors: * Alexander Graf <agraf@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index eab96cfe82fa..bf0282775e37 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright Novell Inc 2010 * diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 811a3c2fb0e9..cc65af8fe6f7 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. * @@ -13,10 +14,6 @@ * * This file is derived from arch/powerpc/kvm/44x.c, * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index dae3be5ff42b..031c8015864a 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011. Freescale Inc. All rights reserved. * @@ -9,10 +10,6 @@ * * Hypercall handling for running PAPR guests in PR KVM on Book 3S * processors. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/anon_inodes.h> diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index b0089e04c8c8..3dc129a254b5 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2009 * diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 4e178c4c1ea5..26b25994c969 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2012 Michael Ellerman, IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kernel.h> @@ -146,7 +143,7 @@ static int rtas_token_undefine(struct kvm *kvm, char *name) { struct rtas_token_definition *d, *tmp; - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.rtas_token_lock); list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { if (rtas_name_matches(d->handler->name, name)) { @@ -167,7 +164,7 @@ static int rtas_token_define(struct kvm *kvm, char *name, u64 token) bool found; int i; - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.rtas_token_lock); list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { if (d->token == token) @@ -206,14 +203,14 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.rtas_token_lock); if (args.token) rc = rtas_token_define(kvm, args.name, args.token); else rc = rtas_token_undefine(kvm, args.name); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.rtas_token_lock); return rc; } @@ -245,7 +242,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) orig_rets = args.rets; args.rets = &args.args[be32_to_cpu(args.nargs)]; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.rtas_token_lock); rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { @@ -256,7 +253,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) } } - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.rtas_token_lock); if (rc == 0) { args.rets = orig_rets; @@ -282,8 +279,6 @@ void kvmppc_rtas_tokens_free(struct kvm *kvm) { struct rtas_token_definition *d, *tmp; - lockdep_assert_held(&kvm->lock); - list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { list_del(&d->list); kfree(d); diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index e5c542a7c5ac..0169bab544dd 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright SUSE Linux Products GmbH 2010 * diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index f27ee57ab46e..e8276161872e 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2012 Michael Ellerman, IBM Corporation. * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index 453c9e518c19..6231f76bdd66 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2012 Michael Ellerman, IBM Corporation. * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #ifndef _KVM_PPC_BOOK3S_XICS_H diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f78d002f0fe0..6ca0d7376a9f 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) "xive-kvm: " fmt @@ -166,7 +163,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data) return IRQ_HANDLED; } -static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, + bool single_escalation) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; struct xive_q *q = &xc->queues[prio]; @@ -185,7 +183,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) return -EIO; } - if (xc->xive->single_escalation) + if (single_escalation) name = kasprintf(GFP_KERNEL, "kvm-%d-%d", vcpu->kvm->arch.lpid, xc->server_num); else @@ -217,7 +215,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) * interrupt, thus leaving it effectively masked after * it fires once. */ - if (xc->xive->single_escalation) { + if (single_escalation) { struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -270,14 +268,14 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio) return rc; } -/* Called with kvm_lock held */ +/* Called with xive->lock held */ static int xive_check_provisioning(struct kvm *kvm, u8 prio) { struct kvmppc_xive *xive = kvm->arch.xive; struct kvm_vcpu *vcpu; int i, rc; - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&xive->lock); /* Already provisioned ? */ if (xive->qmap & (1 << prio)) @@ -291,7 +289,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) continue; rc = xive_provision_queue(vcpu, prio); if (rc == 0 && !xive->single_escalation) - xive_attach_escalation(vcpu, prio); + kvmppc_xive_attach_escalation(vcpu, prio, + xive->single_escalation); if (rc) return rc; } @@ -342,7 +341,7 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio) return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY; } -static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) +int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio) { struct kvm_vcpu *vcpu; int i, rc; @@ -380,11 +379,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) return -EBUSY; } -static u32 xive_vp(struct kvmppc_xive *xive, u32 server) -{ - return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); -} - static u8 xive_lock_and_mask(struct kvmppc_xive *xive, struct kvmppc_xive_src_block *sb, struct kvmppc_xive_irq_state *state) @@ -430,8 +424,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive_vp(xive, state->act_server), - MASKED, state->number); + kvmppc_xive_vp(xive, state->act_server), + MASKED, state->number); /* set old_p so we can track if an H_EOI was done */ state->old_p = true; state->old_q = false; @@ -486,8 +480,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive_vp(xive, state->act_server), - state->act_priority, state->number); + kvmppc_xive_vp(xive, state->act_server), + state->act_priority, state->number); /* If an EOI is needed, do it here */ if (!state->old_p) xive_vm_source_eoi(hw_num, xd); @@ -535,7 +529,7 @@ static int xive_target_interrupt(struct kvm *kvm, * priority. The count for that new target will have * already been incremented. */ - rc = xive_select_target(kvm, &server, prio); + rc = kvmppc_xive_select_target(kvm, &server, prio); /* * We failed to find a target ? Not much we can do @@ -563,7 +557,7 @@ static int xive_target_interrupt(struct kvm *kvm, kvmppc_xive_select_irq(state, &hw_num, NULL); return xive_native_configure_irq(hw_num, - xive_vp(xive, server), + kvmppc_xive_vp(xive, server), prio, state->number); } @@ -624,9 +618,12 @@ int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, irq, server, priority); /* First, check provisioning of queues */ - if (priority != MASKED) + if (priority != MASKED) { + mutex_lock(&xive->lock); rc = xive_check_provisioning(xive->kvm, xive_prio_from_guest(priority)); + mutex_unlock(&xive->lock); + } if (rc) { pr_devel(" provisioning failure %d !\n", rc); return rc; @@ -849,7 +846,8 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) /* * We can't update the state of a "pushed" VCPU, but that - * shouldn't happen. + * shouldn't happen because the vcpu->mutex makes running a + * vcpu mutually exclusive with doing one_reg get/set on it. */ if (WARN_ON(vcpu->arch.xive_pushed)) return -EIO; @@ -940,6 +938,13 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, /* Turn the IPI hard off */ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); + /* + * Reset ESB guest mapping. Needed when ESB pages are exposed + * to the guest in XIVE native mode + */ + if (xive->ops && xive->ops->reset_mapped) + xive->ops->reset_mapped(kvm, guest_irq); + /* Grab info about irq */ state->pt_number = hw_irq; state->pt_data = irq_data_get_irq_handler_data(host_data); @@ -951,7 +956,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, * which is fine for a never started interrupt. */ xive_native_configure_irq(hw_irq, - xive_vp(xive, state->act_server), + kvmppc_xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -1025,9 +1030,17 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, state->pt_number = 0; state->pt_data = NULL; + /* + * Reset ESB guest mapping. Needed when ESB pages are exposed + * to the guest in XIVE native mode + */ + if (xive->ops && xive->ops->reset_mapped) { + xive->ops->reset_mapped(kvm, guest_irq); + } + /* Reconfigure the IPI */ xive_native_configure_irq(state->ipi_number, - xive_vp(xive, state->act_server), + kvmppc_xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -1049,7 +1062,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, } EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); -static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; struct kvm *kvm = vcpu->kvm; @@ -1083,14 +1096,35 @@ static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) arch_spin_unlock(&sb->lock); } } + + /* Disable vcpu's escalation interrupt */ + if (vcpu->arch.xive_esc_on) { + __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr + + XIVE_ESB_SET_PQ_01)); + vcpu->arch.xive_esc_on = false; + } + + /* + * Clear pointers to escalation interrupt ESB. + * This is safe because the vcpu->mutex is held, preventing + * any other CPU from concurrently executing a KVM_RUN ioctl. + */ + vcpu->arch.xive_esc_vaddr = 0; + vcpu->arch.xive_esc_raddr = 0; } void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; - struct kvmppc_xive *xive = xc->xive; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; int i; + if (!kvmppc_xics_enabled(vcpu)) + return; + + if (!xc) + return; + pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); /* Ensure no interrupt is still routed to that VP */ @@ -1129,6 +1163,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) } /* Free the VP */ kfree(xc); + + /* Cleanup the vcpu */ + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; + vcpu->arch.xive_vcpu = NULL; } int kvmppc_xive_connect_vcpu(struct kvm_device *dev, @@ -1146,7 +1184,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, } if (xive->kvm != vcpu->kvm) return -EPERM; - if (vcpu->arch.irq_type) + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { pr_devel("Duplicate !\n"); @@ -1161,12 +1199,12 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, return -ENOMEM; /* We need to synchronize with queue provisioning */ - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&xive->lock); vcpu->arch.xive_vcpu = xc; xc->xive = xive; xc->vcpu = vcpu; xc->server_num = cpu; - xc->vp_id = xive_vp(xive, cpu); + xc->vp_id = kvmppc_xive_vp(xive, cpu); xc->mfrr = 0xff; xc->valid = true; @@ -1219,7 +1257,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); if (r == 0 && !xive->single_escalation) - xive_attach_escalation(vcpu, i); + kvmppc_xive_attach_escalation( + vcpu, i, xive->single_escalation); if (r) goto bail; } else { @@ -1234,7 +1273,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, } /* If not done above, attach priority 0 escalation */ - r = xive_attach_escalation(vcpu, 0); + r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); if (r) goto bail; @@ -1244,7 +1283,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00); bail: - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&xive->lock); if (r) { kvmppc_xive_cleanup_vcpu(vcpu); return r; @@ -1485,16 +1524,15 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) return 0; } -static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive, - int irq) +struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( + struct kvmppc_xive *xive, int irq) { - struct kvm *kvm = xive->kvm; struct kvmppc_xive_src_block *sb; int i, bid; bid = irq >> KVMPPC_XICS_ICS_SHIFT; - mutex_lock(&kvm->lock); + mutex_lock(&xive->lock); /* block already exists - somebody else got here first */ if (xive->src_blocks[bid]) @@ -1509,6 +1547,7 @@ static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *x for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; + sb->irq_state[i].eisn = 0; sb->irq_state[i].guest_priority = MASKED; sb->irq_state[i].saved_priority = MASKED; sb->irq_state[i].act_priority = MASKED; @@ -1520,7 +1559,7 @@ static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *x xive->max_sbid = bid; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&xive->lock); return xive->src_blocks[bid]; } @@ -1565,7 +1604,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) sb = kvmppc_xive_find_source(xive, irq, &idx); if (!sb) { pr_devel("No source, creating source block...\n"); - sb = xive_create_src_block(xive, irq); + sb = kvmppc_xive_create_src_block(xive, irq); if (!sb) { pr_devel("Failed to create block...\n"); return -ENOMEM; @@ -1630,9 +1669,9 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* If we have a priority target the interrupt */ if (act_prio != MASKED) { /* First, check provisioning of queues */ - mutex_lock(&xive->kvm->lock); + mutex_lock(&xive->lock); rc = xive_check_provisioning(xive->kvm, act_prio); - mutex_unlock(&xive->kvm->lock); + mutex_unlock(&xive->lock); /* Target interrupt */ if (rc == 0) @@ -1786,10 +1825,9 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) { xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); xive_native_configure_irq(hw_num, 0, MASKED, 0); - xive_cleanup_irq_data(xd); } -static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) +void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) { int i; @@ -1800,9 +1838,10 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) continue; kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data); + xive_cleanup_irq_data(&state->ipi_data); xive_native_free_irq(state->ipi_number); - /* Pass-through, cleanup too */ + /* Pass-through, cleanup too but keep IRQ hw data */ if (state->pt_number) kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data); @@ -1810,16 +1849,53 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) } } -static void kvmppc_xive_free(struct kvm_device *dev) +/* + * Called when device fd is closed. kvm->lock is held. + */ +static void kvmppc_xive_release(struct kvm_device *dev) { struct kvmppc_xive *xive = dev->private; struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; int i; + pr_devel("Releasing xive device\n"); + + /* + * Since this is the device release function, we know that + * userspace does not have any open fd referring to the + * device. Therefore there can not be any of the device + * attribute set/get functions being executed concurrently, + * and similarly, the connect_vcpu and set/clr_mapped + * functions also cannot be being executed. + */ + debugfs_remove(xive->dentry); - if (kvm) - kvm->arch.xive = NULL; + /* + * We should clean up the vCPU interrupt presenters first. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + /* + * Take vcpu->mutex to ensure that no one_reg get/set ioctl + * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently. + * Holding the vcpu->mutex also means that the vcpu cannot + * be executing the KVM_RUN ioctl, and therefore it cannot + * be executing the XIVE push or pull code or accessing + * the XIVE MMIO regions. + */ + mutex_lock(&vcpu->mutex); + kvmppc_xive_cleanup_vcpu(vcpu); + mutex_unlock(&vcpu->mutex); + } + + /* + * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type + * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe + * against xive code getting called during vcpu execution or + * set/get one_reg operations. + */ + kvm->arch.xive = NULL; /* Mask and free interrupts */ for (i = 0; i <= xive->max_sbid; i++) { @@ -1832,11 +1908,45 @@ static void kvmppc_xive_free(struct kvm_device *dev) if (xive->vp_base != XIVE_INVALID_VP) xive_native_free_vp_block(xive->vp_base); + /* + * A reference of the kvmppc_xive pointer is now kept under + * the xive_devices struct of the machine for reuse. It is + * freed when the VM is destroyed for now until we fix all the + * execution paths. + */ - kfree(xive); kfree(dev); } +/* + * When the guest chooses the interrupt mode (XICS legacy or XIVE + * native), the VM will switch of KVM device. The previous device will + * be "released" before the new one is created. + * + * Until we are sure all execution paths are well protected, provide a + * fail safe (transitional) method for device destruction, in which + * the XIVE device pointer is recycled and not directly freed. + */ +struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type) +{ + struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ? + &kvm->arch.xive_devices.native : + &kvm->arch.xive_devices.xics_on_xive; + struct kvmppc_xive *xive = *kvm_xive_device; + + if (!xive) { + xive = kzalloc(sizeof(*xive), GFP_KERNEL); + *kvm_xive_device = xive; + } else { + memset(xive, 0, sizeof(*xive)); + } + + return xive; +} + +/* + * Create a XICS device with XIVE backend. kvm->lock is held. + */ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; @@ -1845,13 +1955,14 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) pr_devel("Creating xive for partition\n"); - xive = kzalloc(sizeof(*xive), GFP_KERNEL); + xive = kvmppc_xive_get_device(kvm, type); if (!xive) return -ENOMEM; dev->private = xive; xive->dev = dev; xive->kvm = kvm; + mutex_init(&xive->lock); /* Already there ? */ if (kvm->arch.xive) @@ -1883,6 +1994,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) return 0; } +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int i; + + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + u32 i0, i1, idx; + + if (!q->qpage && !xc->esc_virq[i]) + continue; + + seq_printf(m, " [q%d]: ", i); + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "T=%d %08x %08x...\n", q->toggle, + i0, i1); + } + if (xc->esc_virq[i]) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); + struct xive_irq_data *xd = + irq_data_get_irq_handler_data(d); + u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', + xc->esc_virq[i], pq, xd->eoi_page); + seq_puts(m, "\n"); + } + } + return 0; +} static int xive_debug_show(struct seq_file *m, void *private) { @@ -1908,7 +2056,6 @@ static int xive_debug_show(struct seq_file *m, void *private) kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; - unsigned int i; if (!xc) continue; @@ -1918,33 +2065,8 @@ static int xive_debug_show(struct seq_file *m, void *private) xc->server_num, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); - for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { - struct xive_q *q = &xc->queues[i]; - u32 i0, i1, idx; - - if (!q->qpage && !xc->esc_virq[i]) - continue; - - seq_printf(m, " [q%d]: ", i); - if (q->qpage) { - idx = q->idx; - i0 = be32_to_cpup(q->qpage + idx); - idx = (idx + 1) & q->msk; - i1 = be32_to_cpup(q->qpage + idx); - seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); - } - if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', - xc->esc_virq[i], pq, xd->eoi_page); - seq_printf(m, "\n"); - } - } + kvmppc_xive_debug_show_queues(m, vcpu); t_rm_h_xirr += xc->stat_rm_h_xirr; t_rm_h_ipoll += xc->stat_rm_h_ipoll; @@ -1999,7 +2121,7 @@ struct kvm_device_ops kvm_xive_ops = { .name = "kvm-xive", .create = kvmppc_xive_create, .init = kvmppc_xive_init, - .destroy = kvmppc_xive_free, + .release = kvmppc_xive_release, .set_attr = xive_set_attr, .get_attr = xive_get_attr, .has_attr = xive_has_attr, diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index a08ae6fd4c51..50494d0ee375 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -1,9 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #ifndef _KVM_PPC_BOOK3S_XIVE_H @@ -13,6 +10,13 @@ #include "book3s_xics.h" /* + * The XIVE Interrupt source numbers are within the range 0 to + * KVMPPC_XICS_NR_IRQS. + */ +#define KVMPPC_XIVE_FIRST_IRQ 0 +#define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS + +/* * State for one guest irq source. * * For each guest source we allocate a HW interrupt in the XIVE @@ -54,6 +58,9 @@ struct kvmppc_xive_irq_state { bool saved_p; bool saved_q; u8 saved_scan_prio; + + /* Xive native */ + u32 eisn; /* Guest Effective IRQ number */ }; /* Select the "right" interrupt (IPI vs. passthrough) */ @@ -84,6 +91,11 @@ struct kvmppc_xive_src_block { struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; }; +struct kvmppc_xive; + +struct kvmppc_xive_ops { + int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq); +}; struct kvmppc_xive { struct kvm *kvm; @@ -122,6 +134,11 @@ struct kvmppc_xive { /* Flags */ u8 single_escalation; + + struct kvmppc_xive_ops *ops; + struct address_space *mapping; + struct mutex mapping_lock; + struct mutex lock; }; #define KVMPPC_XIVE_Q_COUNT 8 @@ -198,6 +215,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp return xive->src_blocks[bid]; } +static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) +{ + return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); +} + /* * Mapping between guest priorities and host priorities * is as follow. @@ -248,5 +270,18 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); +/* + * Common Xive routines for XICS-over-XIVE and XIVE native + */ +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); +struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( + struct kvmppc_xive *xive, int irq); +void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); +int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, + bool single_escalation); +struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); + #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c new file mode 100644 index 000000000000..5596c8ec221a --- /dev/null +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -0,0 +1,1263 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2017-2019, IBM Corporation. + */ + +#define pr_fmt(fmt) "xive-kvm: " fmt + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/file.h> +#include <asm/uaccess.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/xive.h> +#include <asm/xive-regs.h> +#include <asm/debug.h> +#include <asm/debugfs.h> +#include <asm/opal.h> + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include "book3s_xive.h" + +static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) +{ + u64 val; + + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) + offset |= offset << 4; + + val = in_be64(xd->eoi_mmio + offset); + return (u8)val; +} + +static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct xive_q *q = &xc->queues[prio]; + + xive_native_disable_queue(xc->vp_id, q, prio); + if (q->qpage) { + put_page(virt_to_page(q->qpage)); + q->qpage = NULL; + } +} + +void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + int i; + + if (!kvmppc_xive_enabled(vcpu)) + return; + + if (!xc) + return; + + pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); + + /* Ensure no interrupt is still routed to that VP */ + xc->valid = false; + kvmppc_xive_disable_vcpu_interrupts(vcpu); + + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); + + /* Free the queues & associated interrupts */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + /* Free the escalation irq */ + if (xc->esc_virq[i]) { + free_irq(xc->esc_virq[i], vcpu); + irq_dispose_mapping(xc->esc_virq[i]); + kfree(xc->esc_virq_names[i]); + xc->esc_virq[i] = 0; + } + + /* Free the queue */ + kvmppc_xive_native_cleanup_queue(vcpu, i); + } + + /* Free the VP */ + kfree(xc); + + /* Cleanup the vcpu */ + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; + vcpu->arch.xive_vcpu = NULL; +} + +int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 server_num) +{ + struct kvmppc_xive *xive = dev->private; + struct kvmppc_xive_vcpu *xc = NULL; + int rc; + + pr_devel("native_connect_vcpu(server=%d)\n", server_num); + + if (dev->ops != &kvm_xive_native_ops) { + pr_devel("Wrong ops !\n"); + return -EPERM; + } + if (xive->kvm != vcpu->kvm) + return -EPERM; + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) + return -EBUSY; + if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { + pr_devel("Out of bounds !\n"); + return -EINVAL; + } + + mutex_lock(&xive->lock); + + if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { + pr_devel("Duplicate !\n"); + rc = -EEXIST; + goto bail; + } + + xc = kzalloc(sizeof(*xc), GFP_KERNEL); + if (!xc) { + rc = -ENOMEM; + goto bail; + } + + vcpu->arch.xive_vcpu = xc; + xc->xive = xive; + xc->vcpu = vcpu; + xc->server_num = server_num; + + xc->vp_id = kvmppc_xive_vp(xive, server_num); + xc->valid = true; + vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; + + rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); + if (rc) { + pr_err("Failed to get VP info from OPAL: %d\n", rc); + goto bail; + } + + /* + * Enable the VP first as the single escalation mode will + * affect escalation interrupts numbering + */ + rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + if (rc) { + pr_err("Failed to enable VP in OPAL: %d\n", rc); + goto bail; + } + + /* Configure VCPU fields for use by assembly push/pull */ + vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); + vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); + + /* TODO: reset all queues to a clean state ? */ +bail: + mutex_unlock(&xive->lock); + if (rc) + kvmppc_xive_native_cleanup_vcpu(vcpu); + + return rc; +} + +/* + * Device passthrough support + */ +static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) +{ + struct kvmppc_xive *xive = kvm->arch.xive; + pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; + + if (irq >= KVMPPC_XIVE_NR_IRQS) + return -EINVAL; + + /* + * Clear the ESB pages of the IRQ number being mapped (or + * unmapped) into the guest and let the the VM fault handler + * repopulate with the appropriate ESB pages (device or IC) + */ + pr_debug("clearing esb pages for girq 0x%lx\n", irq); + mutex_lock(&xive->mapping_lock); + if (xive->mapping) + unmap_mapping_range(xive->mapping, + esb_pgoff << PAGE_SHIFT, + 2ull << PAGE_SHIFT, 1); + mutex_unlock(&xive->mapping_lock); + return 0; +} + +static struct kvmppc_xive_ops kvmppc_xive_native_ops = { + .reset_mapped = kvmppc_xive_native_reset_mapped, +}; + +static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct kvm_device *dev = vma->vm_file->private_data; + struct kvmppc_xive *xive = dev->private; + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + struct xive_irq_data *xd; + u32 hw_num; + u16 src; + u64 page; + unsigned long irq; + u64 page_offset; + + /* + * Linux/KVM uses a two pages ESB setting, one for trigger and + * one for EOI + */ + page_offset = vmf->pgoff - vma->vm_pgoff; + irq = page_offset / 2; + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) { + pr_devel("%s: source %lx not found !\n", __func__, irq); + return VM_FAULT_SIGBUS; + } + + state = &sb->irq_state[src]; + kvmppc_xive_select_irq(state, &hw_num, &xd); + + arch_spin_lock(&sb->lock); + + /* + * first/even page is for trigger + * second/odd page is for EOI and management. + */ + page = page_offset % 2 ? xd->eoi_page : xd->trig_page; + arch_spin_unlock(&sb->lock); + + if (WARN_ON(!page)) { + pr_err("%s: accessing invalid ESB page for source %lx !\n", + __func__, irq); + return VM_FAULT_SIGBUS; + } + + vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct xive_native_esb_vmops = { + .fault = xive_native_esb_fault, +}; + +static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + + switch (vmf->pgoff - vma->vm_pgoff) { + case 0: /* HW - forbid access */ + case 1: /* HV - forbid access */ + return VM_FAULT_SIGBUS; + case 2: /* OS */ + vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); + return VM_FAULT_NOPAGE; + case 3: /* USER - TODO */ + default: + return VM_FAULT_SIGBUS; + } +} + +static const struct vm_operations_struct xive_native_tima_vmops = { + .fault = xive_native_tima_fault, +}; + +static int kvmppc_xive_native_mmap(struct kvm_device *dev, + struct vm_area_struct *vma) +{ + struct kvmppc_xive *xive = dev->private; + + /* We only allow mappings at fixed offset for now */ + if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { + if (vma_pages(vma) > 4) + return -EINVAL; + vma->vm_ops = &xive_native_tima_vmops; + } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { + if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) + return -EINVAL; + vma->vm_ops = &xive_native_esb_vmops; + } else { + return -EINVAL; + } + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + + /* + * Grab the KVM device file address_space to be able to clear + * the ESB pages mapping when a device is passed-through into + * the guest. + */ + xive->mapping = vma->vm_file->f_mapping; + return 0; +} + +static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, + u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + u64 __user *ubufp = (u64 __user *) addr; + u64 val; + u16 idx; + int rc; + + pr_devel("%s irq=0x%lx\n", __func__, irq); + + if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) + return -E2BIG; + + sb = kvmppc_xive_find_source(xive, irq, &idx); + if (!sb) { + pr_debug("No source, creating source block...\n"); + sb = kvmppc_xive_create_src_block(xive, irq); + if (!sb) { + pr_err("Failed to create block...\n"); + return -ENOMEM; + } + } + state = &sb->irq_state[idx]; + + if (get_user(val, ubufp)) { + pr_err("fault getting user info !\n"); + return -EFAULT; + } + + arch_spin_lock(&sb->lock); + + /* + * If the source doesn't already have an IPI, allocate + * one and get the corresponding data + */ + if (!state->ipi_number) { + state->ipi_number = xive_native_alloc_irq(); + if (state->ipi_number == 0) { + pr_err("Failed to allocate IRQ !\n"); + rc = -ENXIO; + goto unlock; + } + xive_native_populate_irq_data(state->ipi_number, + &state->ipi_data); + pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, + state->ipi_number, irq); + } + + /* Restore LSI state */ + if (val & KVM_XIVE_LEVEL_SENSITIVE) { + state->lsi = true; + if (val & KVM_XIVE_LEVEL_ASSERTED) + state->asserted = true; + pr_devel(" LSI ! Asserted=%d\n", state->asserted); + } + + /* Mask IRQ to start with */ + state->act_server = 0; + state->act_priority = MASKED; + xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); + xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); + + /* Increment the number of valid sources and mark this one valid */ + if (!state->valid) + xive->src_count++; + state->valid = true; + + rc = 0; + +unlock: + arch_spin_unlock(&sb->lock); + + return rc; +} + +static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, + struct kvmppc_xive_src_block *sb, + struct kvmppc_xive_irq_state *state, + u32 server, u8 priority, bool masked, + u32 eisn) +{ + struct kvm *kvm = xive->kvm; + u32 hw_num; + int rc = 0; + + arch_spin_lock(&sb->lock); + + if (state->act_server == server && state->act_priority == priority && + state->eisn == eisn) + goto unlock; + + pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", + priority, server, masked, state->act_server, + state->act_priority); + + kvmppc_xive_select_irq(state, &hw_num, NULL); + + if (priority != MASKED && !masked) { + rc = kvmppc_xive_select_target(kvm, &server, priority); + if (rc) + goto unlock; + + state->act_priority = priority; + state->act_server = server; + state->eisn = eisn; + + rc = xive_native_configure_irq(hw_num, + kvmppc_xive_vp(xive, server), + priority, eisn); + } else { + state->act_priority = MASKED; + state->act_server = 0; + state->eisn = 0; + + rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); + } + +unlock: + arch_spin_unlock(&sb->lock); + return rc; +} + +static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, + long irq, u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + u64 __user *ubufp = (u64 __user *) addr; + u16 src; + u64 kvm_cfg; + u32 server; + u8 priority; + bool masked; + u32 eisn; + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + return -ENOENT; + + state = &sb->irq_state[src]; + + if (!state->valid) + return -EINVAL; + + if (get_user(kvm_cfg, ubufp)) + return -EFAULT; + + pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); + + priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> + KVM_XIVE_SOURCE_PRIORITY_SHIFT; + server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> + KVM_XIVE_SOURCE_SERVER_SHIFT; + masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> + KVM_XIVE_SOURCE_MASKED_SHIFT; + eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> + KVM_XIVE_SOURCE_EISN_SHIFT; + + if (priority != xive_prio_from_guest(priority)) { + pr_err("invalid priority for queue %d for VCPU %d\n", + priority, server); + return -EINVAL; + } + + return kvmppc_xive_native_update_source_config(xive, sb, state, server, + priority, masked, eisn); +} + +static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, + long irq, u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + struct xive_irq_data *xd; + u32 hw_num; + u16 src; + int rc = 0; + + pr_devel("%s irq=0x%lx", __func__, irq); + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + return -ENOENT; + + state = &sb->irq_state[src]; + + rc = -EINVAL; + + arch_spin_lock(&sb->lock); + + if (state->valid) { + kvmppc_xive_select_irq(state, &hw_num, &xd); + xive_native_sync_source(hw_num); + rc = 0; + } + + arch_spin_unlock(&sb->lock); + return rc; +} + +static int xive_native_validate_queue_size(u32 qshift) +{ + /* + * We only support 64K pages for the moment. This is also + * advertised in the DT property "ibm,xive-eq-sizes" + */ + switch (qshift) { + case 0: /* EQ reset */ + case 16: + return 0; + case 12: + case 21: + case 24: + default: + return -EINVAL; + } +} + +static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, + long eq_idx, u64 addr) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + struct kvmppc_xive_vcpu *xc; + void __user *ubufp = (void __user *) addr; + u32 server; + u8 priority; + struct kvm_ppc_xive_eq kvm_eq; + int rc; + __be32 *qaddr = 0; + struct page *page; + struct xive_q *q; + gfn_t gfn; + unsigned long page_size; + int srcu_idx; + + /* + * Demangle priority/server tuple from the EQ identifier + */ + priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> + KVM_XIVE_EQ_PRIORITY_SHIFT; + server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> + KVM_XIVE_EQ_SERVER_SHIFT; + + if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) + return -EFAULT; + + vcpu = kvmppc_xive_find_server(kvm, server); + if (!vcpu) { + pr_err("Can't find server %d\n", server); + return -ENOENT; + } + xc = vcpu->arch.xive_vcpu; + + if (priority != xive_prio_from_guest(priority)) { + pr_err("Trying to restore invalid queue %d for VCPU %d\n", + priority, server); + return -EINVAL; + } + q = &xc->queues[priority]; + + pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", + __func__, server, priority, kvm_eq.flags, + kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); + + /* reset queue and disable queueing */ + if (!kvm_eq.qshift) { + q->guest_qaddr = 0; + q->guest_qshift = 0; + + rc = xive_native_configure_queue(xc->vp_id, q, priority, + NULL, 0, true); + if (rc) { + pr_err("Failed to reset queue %d for VCPU %d: %d\n", + priority, xc->server_num, rc); + return rc; + } + + if (q->qpage) { + put_page(virt_to_page(q->qpage)); + q->qpage = NULL; + } + + return 0; + } + + /* + * sPAPR specifies a "Unconditional Notify (n) flag" for the + * H_INT_SET_QUEUE_CONFIG hcall which forces notification + * without using the coalescing mechanisms provided by the + * XIVE END ESBs. This is required on KVM as notification + * using the END ESBs is not supported. + */ + if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { + pr_err("invalid flags %d\n", kvm_eq.flags); + return -EINVAL; + } + + rc = xive_native_validate_queue_size(kvm_eq.qshift); + if (rc) { + pr_err("invalid queue size %d\n", kvm_eq.qshift); + return rc; + } + + if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { + pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, + 1ull << kvm_eq.qshift); + return -EINVAL; + } + + srcu_idx = srcu_read_lock(&kvm->srcu); + gfn = gpa_to_gfn(kvm_eq.qaddr); + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { + srcu_read_unlock(&kvm->srcu, srcu_idx); + pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); + return -EINVAL; + } + + page_size = kvm_host_page_size(kvm, gfn); + if (1ull << kvm_eq.qshift > page_size) { + srcu_read_unlock(&kvm->srcu, srcu_idx); + pr_warn("Incompatible host page size %lx!\n", page_size); + return -EINVAL; + } + + qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); + srcu_read_unlock(&kvm->srcu, srcu_idx); + + /* + * Backup the queue page guest address to the mark EQ page + * dirty for migration. + */ + q->guest_qaddr = kvm_eq.qaddr; + q->guest_qshift = kvm_eq.qshift; + + /* + * Unconditional Notification is forced by default at the + * OPAL level because the use of END ESBs is not supported by + * Linux. + */ + rc = xive_native_configure_queue(xc->vp_id, q, priority, + (__be32 *) qaddr, kvm_eq.qshift, true); + if (rc) { + pr_err("Failed to configure queue %d for VCPU %d: %d\n", + priority, xc->server_num, rc); + put_page(page); + return rc; + } + + /* + * Only restore the queue state when needed. When doing the + * H_INT_SET_SOURCE_CONFIG hcall, it should not. + */ + if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { + rc = xive_native_set_queue_state(xc->vp_id, priority, + kvm_eq.qtoggle, + kvm_eq.qindex); + if (rc) + goto error; + } + + rc = kvmppc_xive_attach_escalation(vcpu, priority, + xive->single_escalation); +error: + if (rc) + kvmppc_xive_native_cleanup_queue(vcpu, priority); + return rc; +} + +static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, + long eq_idx, u64 addr) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + struct kvmppc_xive_vcpu *xc; + struct xive_q *q; + void __user *ubufp = (u64 __user *) addr; + u32 server; + u8 priority; + struct kvm_ppc_xive_eq kvm_eq; + u64 qaddr; + u64 qshift; + u64 qeoi_page; + u32 escalate_irq; + u64 qflags; + int rc; + + /* + * Demangle priority/server tuple from the EQ identifier + */ + priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> + KVM_XIVE_EQ_PRIORITY_SHIFT; + server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> + KVM_XIVE_EQ_SERVER_SHIFT; + + vcpu = kvmppc_xive_find_server(kvm, server); + if (!vcpu) { + pr_err("Can't find server %d\n", server); + return -ENOENT; + } + xc = vcpu->arch.xive_vcpu; + + if (priority != xive_prio_from_guest(priority)) { + pr_err("invalid priority for queue %d for VCPU %d\n", + priority, server); + return -EINVAL; + } + q = &xc->queues[priority]; + + memset(&kvm_eq, 0, sizeof(kvm_eq)); + + if (!q->qpage) + return 0; + + rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, + &qeoi_page, &escalate_irq, &qflags); + if (rc) + return rc; + + kvm_eq.flags = 0; + if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) + kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; + + kvm_eq.qshift = q->guest_qshift; + kvm_eq.qaddr = q->guest_qaddr; + + rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, + &kvm_eq.qindex); + if (rc) + return rc; + + pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", + __func__, server, priority, kvm_eq.flags, + kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); + + if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) + return -EFAULT; + + return 0; +} + +static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) +{ + int i; + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; + + if (!state->valid) + continue; + + if (state->act_priority == MASKED) + continue; + + state->eisn = 0; + state->act_server = 0; + state->act_priority = MASKED; + xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); + xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); + if (state->pt_number) { + xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); + xive_native_configure_irq(state->pt_number, + 0, MASKED, 0); + } + } +} + +static int kvmppc_xive_reset(struct kvmppc_xive *xive) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + unsigned int i; + + pr_devel("%s\n", __func__); + + mutex_lock(&xive->lock); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int prio; + + if (!xc) + continue; + + kvmppc_xive_disable_vcpu_interrupts(vcpu); + + for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + + /* Single escalation, no queue 7 */ + if (prio == 7 && xive->single_escalation) + break; + + if (xc->esc_virq[prio]) { + free_irq(xc->esc_virq[prio], vcpu); + irq_dispose_mapping(xc->esc_virq[prio]); + kfree(xc->esc_virq_names[prio]); + xc->esc_virq[prio] = 0; + } + + kvmppc_xive_native_cleanup_queue(vcpu, prio); + } + } + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_reset_sources(sb); + arch_spin_unlock(&sb->lock); + } + } + + mutex_unlock(&xive->lock); + + return 0; +} + +static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) +{ + int j; + + for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { + struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; + struct xive_irq_data *xd; + u32 hw_num; + + if (!state->valid) + continue; + + /* + * The struct kvmppc_xive_irq_state reflects the state + * of the EAS configuration and not the state of the + * source. The source is masked setting the PQ bits to + * '-Q', which is what is being done before calling + * the KVM_DEV_XIVE_EQ_SYNC control. + * + * If a source EAS is configured, OPAL syncs the XIVE + * IC of the source and the XIVE IC of the previous + * target if any. + * + * So it should be fine ignoring MASKED sources as + * they have been synced already. + */ + if (state->act_priority == MASKED) + continue; + + kvmppc_xive_select_irq(state, &hw_num, &xd); + xive_native_sync_source(hw_num); + xive_native_sync_queue(hw_num); + } +} + +static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int prio; + int srcu_idx; + + if (!xc) + return -ENOENT; + + for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + struct xive_q *q = &xc->queues[prio]; + + if (!q->qpage) + continue; + + /* Mark EQ page dirty for migration */ + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + } + return 0; +} + +static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + unsigned int i; + + pr_devel("%s\n", __func__); + + mutex_lock(&xive->lock); + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_native_sync_sources(sb); + arch_spin_unlock(&sb->lock); + } + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + kvmppc_xive_native_vcpu_eq_sync(vcpu); + } + mutex_unlock(&xive->lock); + + return 0; +} + +static int kvmppc_xive_native_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvmppc_xive *xive = dev->private; + + switch (attr->group) { + case KVM_DEV_XIVE_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XIVE_RESET: + return kvmppc_xive_reset(xive); + case KVM_DEV_XIVE_EQ_SYNC: + return kvmppc_xive_native_eq_sync(xive); + } + break; + case KVM_DEV_XIVE_GRP_SOURCE: + return kvmppc_xive_native_set_source(xive, attr->attr, + attr->addr); + case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: + return kvmppc_xive_native_set_source_config(xive, attr->attr, + attr->addr); + case KVM_DEV_XIVE_GRP_EQ_CONFIG: + return kvmppc_xive_native_set_queue_config(xive, attr->attr, + attr->addr); + case KVM_DEV_XIVE_GRP_SOURCE_SYNC: + return kvmppc_xive_native_sync_source(xive, attr->attr, + attr->addr); + } + return -ENXIO; +} + +static int kvmppc_xive_native_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvmppc_xive *xive = dev->private; + + switch (attr->group) { + case KVM_DEV_XIVE_GRP_EQ_CONFIG: + return kvmppc_xive_native_get_queue_config(xive, attr->attr, + attr->addr); + } + return -ENXIO; +} + +static int kvmppc_xive_native_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_XIVE_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XIVE_RESET: + case KVM_DEV_XIVE_EQ_SYNC: + return 0; + } + break; + case KVM_DEV_XIVE_GRP_SOURCE: + case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: + case KVM_DEV_XIVE_GRP_SOURCE_SYNC: + if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && + attr->attr < KVMPPC_XIVE_NR_IRQS) + return 0; + break; + case KVM_DEV_XIVE_GRP_EQ_CONFIG: + return 0; + } + return -ENXIO; +} + +/* + * Called when device fd is closed. kvm->lock is held. + */ +static void kvmppc_xive_native_release(struct kvm_device *dev) +{ + struct kvmppc_xive *xive = dev->private; + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + int i; + + pr_devel("Releasing xive native device\n"); + + /* + * Clear the KVM device file address_space which is used to + * unmap the ESB pages when a device is passed-through. + */ + mutex_lock(&xive->mapping_lock); + xive->mapping = NULL; + mutex_unlock(&xive->mapping_lock); + + /* + * Since this is the device release function, we know that + * userspace does not have any open fd or mmap referring to + * the device. Therefore there can not be any of the + * device attribute set/get, mmap, or page fault functions + * being executed concurrently, and similarly, the + * connect_vcpu and set/clr_mapped functions also cannot + * be being executed. + */ + + debugfs_remove(xive->dentry); + + /* + * We should clean up the vCPU interrupt presenters first. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + /* + * Take vcpu->mutex to ensure that no one_reg get/set ioctl + * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. + * Holding the vcpu->mutex also means that the vcpu cannot + * be executing the KVM_RUN ioctl, and therefore it cannot + * be executing the XIVE push or pull code or accessing + * the XIVE MMIO regions. + */ + mutex_lock(&vcpu->mutex); + kvmppc_xive_native_cleanup_vcpu(vcpu); + mutex_unlock(&vcpu->mutex); + } + + /* + * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type + * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe + * against xive code getting called during vcpu execution or + * set/get one_reg operations. + */ + kvm->arch.xive = NULL; + + for (i = 0; i <= xive->max_sbid; i++) { + if (xive->src_blocks[i]) + kvmppc_xive_free_sources(xive->src_blocks[i]); + kfree(xive->src_blocks[i]); + xive->src_blocks[i] = NULL; + } + + if (xive->vp_base != XIVE_INVALID_VP) + xive_native_free_vp_block(xive->vp_base); + + /* + * A reference of the kvmppc_xive pointer is now kept under + * the xive_devices struct of the machine for reuse. It is + * freed when the VM is destroyed for now until we fix all the + * execution paths. + */ + + kfree(dev); +} + +/* + * Create a XIVE device. kvm->lock is held. + */ +static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) +{ + struct kvmppc_xive *xive; + struct kvm *kvm = dev->kvm; + int ret = 0; + + pr_devel("Creating xive native device\n"); + + if (kvm->arch.xive) + return -EEXIST; + + xive = kvmppc_xive_get_device(kvm, type); + if (!xive) + return -ENOMEM; + + dev->private = xive; + xive->dev = dev; + xive->kvm = kvm; + kvm->arch.xive = xive; + mutex_init(&xive->mapping_lock); + mutex_init(&xive->lock); + + /* + * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for + * a default. Getting the max number of CPUs the VM was + * configured with would improve our usage of the XIVE VP space. + */ + xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); + pr_devel("VP_Base=%x\n", xive->vp_base); + + if (xive->vp_base == XIVE_INVALID_VP) + ret = -ENXIO; + + xive->single_escalation = xive_native_has_single_escalation(); + xive->ops = &kvmppc_xive_native_ops; + + if (ret) + kfree(xive); + + return ret; +} + +/* + * Interrupt Pending Buffer (IPB) offset + */ +#define TM_IPB_SHIFT 40 +#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) + +int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + u64 opal_state; + int rc; + + if (!kvmppc_xive_enabled(vcpu)) + return -EPERM; + + if (!xc) + return -ENOENT; + + /* Thread context registers. We only care about IPB and CPPR */ + val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; + + /* Get the VP state from OPAL */ + rc = xive_native_get_vp_state(xc->vp_id, &opal_state); + if (rc) + return rc; + + /* + * Capture the backup of IPB register in the NVT structure and + * merge it in our KVM VP state. + */ + val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); + + pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", + __func__, + vcpu->arch.xive_saved_state.nsr, + vcpu->arch.xive_saved_state.cppr, + vcpu->arch.xive_saved_state.ipb, + vcpu->arch.xive_saved_state.pipr, + vcpu->arch.xive_saved_state.w01, + (u32) vcpu->arch.xive_cam_word, opal_state); + + return 0; +} + +int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; + + pr_devel("%s w01=%016llx vp=%016llx\n", __func__, + val->xive_timaval[0], val->xive_timaval[1]); + + if (!kvmppc_xive_enabled(vcpu)) + return -EPERM; + + if (!xc || !xive) + return -ENOENT; + + /* We can't update the state of a "pushed" VCPU */ + if (WARN_ON(vcpu->arch.xive_pushed)) + return -EBUSY; + + /* + * Restore the thread context registers. IPB and CPPR should + * be the only ones that matter. + */ + vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; + + /* + * There is no need to restore the XIVE internal state (IPB + * stored in the NVT) as the IPB register was merged in KVM VP + * state when captured. + */ + return 0; +} + +static int xive_native_debug_show(struct seq_file *m, void *private) +{ + struct kvmppc_xive *xive = m->private; + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + unsigned int i; + + if (!kvm) + return 0; + + seq_puts(m, "=========\nVCPU state\n=========\n"); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + + if (!xc) + continue; + + seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, + vcpu->arch.xive_saved_state.nsr, + vcpu->arch.xive_saved_state.cppr, + vcpu->arch.xive_saved_state.ipb, + vcpu->arch.xive_saved_state.pipr, + vcpu->arch.xive_saved_state.w01, + (u32) vcpu->arch.xive_cam_word); + + kvmppc_xive_debug_show_queues(m, vcpu); + } + + return 0; +} + +static int xive_native_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, xive_native_debug_show, inode->i_private); +} + +static const struct file_operations xive_native_debug_fops = { + .open = xive_native_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void xive_native_debugfs_init(struct kvmppc_xive *xive) +{ + char *name; + + name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); + if (!name) { + pr_err("%s: no memory for name\n", __func__); + return; + } + + xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, + xive, &xive_native_debug_fops); + + pr_debug("%s: created %s\n", __func__, name); + kfree(name); +} + +static void kvmppc_xive_native_init(struct kvm_device *dev) +{ + struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + + /* Register some debug interfaces */ + xive_native_debugfs_init(xive); +} + +struct kvm_device_ops kvm_xive_native_ops = { + .name = "kvm-xive-native", + .create = kvmppc_xive_native_create, + .init = kvmppc_xive_native_init, + .release = kvmppc_xive_native_release, + .set_attr = kvmppc_xive_native_set_attr, + .get_attr = kvmppc_xive_native_get_attr, + .has_attr = kvmppc_xive_native_has_attr, + .mmap = kvmppc_xive_native_mmap, +}; + +void kvmppc_xive_native_init_module(void) +{ + ; +} + +void kvmppc_xive_native_exit_module(void) +{ + ; +} diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 033363d6e764..a8a900ace1e6 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ /* File to be included by other .c files */ @@ -130,24 +127,14 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc, */ prio = ffs(pending) - 1; - /* - * If the most favoured prio we found pending is less - * favored (or equal) than a pending IPI, we return - * the IPI instead. - * - * Note: If pending was 0 and mfrr is 0xff, we will - * not spurriously take an IPI because mfrr cannot - * then be smaller than cppr. - */ - if (prio >= xc->mfrr && xc->mfrr < xc->cppr) { - prio = xc->mfrr; - hirq = XICS_IPI; - break; - } - /* Don't scan past the guest cppr */ - if (prio >= xc->cppr || prio > 7) + if (prio >= xc->cppr || prio > 7) { + if (xc->mfrr < xc->cppr) { + prio = xc->mfrr; + hirq = XICS_IPI; + } break; + } /* Grab queue and pointers */ q = &xc->queues[prio]; @@ -184,9 +171,12 @@ skip_ipi: * been set and another occurrence of the IPI will trigger. */ if (hirq == XICS_IPI || (prio == 0 && !qpage)) { - if (scan_type == scan_fetch) + if (scan_type == scan_fetch) { GLUE(X_PFX,source_eoi)(xc->vp_ipi, &xc->vp_ipi_data); + q->idx = idx; + q->toggle = toggle; + } /* Loop back on same queue with updated idx/toggle */ #ifdef XIVE_RUNTIME_CHECKS WARN_ON(hirq && hirq != XICS_IPI); @@ -199,32 +189,41 @@ skip_ipi: if (hirq == XICS_DUMMY) goto skip_ipi; - /* If fetching, update queue pointers */ - if (scan_type == scan_fetch) { - q->idx = idx; - q->toggle = toggle; - } - - /* Something found, stop searching */ - if (hirq) - break; - - /* Clear the pending bit on the now empty queue */ - pending &= ~(1 << prio); + /* Clear the pending bit if the queue is now empty */ + if (!hirq) { + pending &= ~(1 << prio); - /* - * Check if the queue count needs adjusting due to - * interrupts being moved away. - */ - if (atomic_read(&q->pending_count)) { - int p = atomic_xchg(&q->pending_count, 0); - if (p) { + /* + * Check if the queue count needs adjusting due to + * interrupts being moved away. + */ + if (atomic_read(&q->pending_count)) { + int p = atomic_xchg(&q->pending_count, 0); + if (p) { #ifdef XIVE_RUNTIME_CHECKS - WARN_ON(p > atomic_read(&q->count)); + WARN_ON(p > atomic_read(&q->count)); #endif - atomic_sub(p, &q->count); + atomic_sub(p, &q->count); + } } } + + /* + * If the most favoured prio we found pending is less + * favored (or equal) than a pending IPI, we return + * the IPI instead. + */ + if (prio >= xc->mfrr && xc->mfrr < xc->cppr) { + prio = xc->mfrr; + hirq = XICS_IPI; + break; + } + + /* If fetching, update queue pointers */ + if (scan_type == scan_fetch) { + q->idx = idx; + q->toggle = toggle; + } } /* If we are just taking a "peek", do nothing else */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index dbec4128bb51..be9a45874194 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 * Copyright 2010-2011 Freescale Semiconductor, Inc. diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 22ba08ea68e9..9d3169fbce55 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2008 * diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index d23e582f0fee..689ff5f90e9e 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2008 * Copyright 2011 Freescale Semiconductor, Inc. diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 84c308a9a371..2e56ab5a5f55 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 * Copyright 2011 Freescale Semiconductor, Inc. diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 4e5081e58409..c577ba4b3169 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright (C) 2010-2011 Freescale Semiconductor, Inc. * diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index afd3c255a427..b5a848a55504 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * @@ -6,10 +7,6 @@ * Description: * This file is derived from arch/powerpc/kvm/44x.c, * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 962ee90a0dfe..c3ef751465fb 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * @@ -10,10 +11,6 @@ * This file is based on arch/powerpc/kvm/44x_tlb.h and * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>, * Copyright IBM Corp. 2007-2008 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #ifndef KVM_E500_H diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index fde1de08b4d7..3d0d3ec5be96 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * @@ -6,10 +7,6 @@ * Description: * This file is derived from arch/powerpc/kvm/44x_emulate.c, * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <asm/kvm_ppc.h> diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 24296f4cadc6..2d910b87e441 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. * @@ -10,10 +11,6 @@ * Description: * This file is based on arch/powerpc/kvm/44x_tlb.c, * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kernel.h> @@ -783,7 +780,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, if (!pages) return -ENOMEM; - ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); + ret = get_user_pages_fast(cfg->array, num_pages, FOLL_WRITE, pages); if (ret < 0) goto free_pages; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index c3f312b2bcb3..321db0fdb9db 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. * @@ -10,10 +11,6 @@ * Description: * This file is based on arch/powerpc/kvm/44x_tlb.c, * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h index 7624835b76c7..d8178cc86b30 100644 --- a/arch/powerpc/kvm/e500_mmu_host.h +++ b/arch/powerpc/kvm/e500_mmu_host.h @@ -1,9 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #ifndef KVM_E500_MMU_HOST_H diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index d31645491a93..318e65c65999 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010,2012 Freescale Semiconductor, Inc. All rights reserved. * @@ -6,10 +7,6 @@ * Description: * This file is derived from arch/powerpc/kvm/e500.c, * by Yu Liu <yu.liu@freescale.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #include <linux/kvm_host.h> diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 9f5b8c01c4e1..bb4d09c1ad56 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 * Copyright 2011 Freescale Semiconductor, Inc. diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index f91b1309a0a8..9208c82ed08d 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 * Copyright 2011 Freescale Semiconductor, Inc. diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index bf68d597549e..3dfae0cb6228 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S @@ -1,13 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * FPU helper code to use FPU operations from inside the kernel * * Copyright (C) 2010 Alexander Graf (agraf@suse.de) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * */ #include <asm/reg.h> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8885377ec3e0..6d704ad2472b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 * @@ -570,6 +559,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; +#ifdef CONFIG_KVM_XIVE + case KVM_CAP_PPC_IRQ_XIVE: + /* + * We need XIVE to be enabled on the platform (implies + * a POWER9 processor) and the PowerNV platform, as + * nested is not yet supported. + */ + r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE); + break; +#endif case KVM_CAP_PPC_ALLOC_HTAB: r = hv_enabled; @@ -644,12 +643,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = num_online_cpus(); break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_PPC_GET_SMMU_INFO: r = 1; @@ -753,6 +752,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) else kvmppc_xics_free_icp(vcpu); break; + case KVMPPC_IRQ_XIVE: + kvmppc_xive_native_cleanup_vcpu(vcpu); + break; } kvmppc_core_vcpu_free(vcpu); @@ -1941,6 +1943,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_CAP_PPC_IRQ_XIVE: { + struct fd f; + struct kvm_device *dev; + + r = -EBADF; + f = fdget(cap->args[0]); + if (!f.file) + break; + + r = -ENXIO; + if (!xive_enabled()) + break; + + r = -EPERM; + dev = kvm_device_from_filp(f.file); + if (dev) + r = kvmppc_xive_native_connect_vcpu(dev, vcpu, + cap->args[1]); + + fdput(f); + break; + } +#endif /* CONFIG_KVM_XIVE */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_FWNMI: r = -EINVAL; diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 1c03c978eb18..bfe4f106cffc 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2008 * diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index 3123690c82dc..ace65f9fed30 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -1,16 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2008 * diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index 0531a1492fdf..3bf17c854be4 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. * * Derived from book3s_hv_rmhandlers.S, which is: * * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * */ #include <asm/reg.h> |