diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-powernv.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/idle.c | 8 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/npu-dma.c | 571 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-call.c | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-hmi.c | 40 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 23 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 14 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.c | 145 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/vas-window.c | 19 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/vas.h | 20 |
11 files changed, 87 insertions, 764 deletions
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 9ade4489f415..620a986209f5 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * The file intends to implement the platform dependent EEH operations on - * powernv platform. Actually, the powernv was created in order to fully - * hypervisor support. + * PowerNV Platform dependent EEH operations * * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2f4479b94ac3..09f49eed7fb8 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -716,7 +716,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) * to reload MMCR0 (see mmcr0 comment above). */ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { - asm volatile(PPC_INVALIDATE_ERAT); + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); mtspr(SPRN_MMCR0, mmcr0); } @@ -758,7 +758,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mtspr(SPRN_PTCR, sprs.ptcr); mtspr(SPRN_RPR, sprs.rpr); mtspr(SPRN_TSCR, sprs.tscr); - mtspr(SPRN_LDBAR, sprs.ldbar); if (pls >= pnv_first_tb_loss_level) { /* TB loss */ @@ -790,6 +789,7 @@ core_woken: mtspr(SPRN_MMCR0, sprs.mmcr0); mtspr(SPRN_MMCR1, sprs.mmcr1); mtspr(SPRN_MMCR2, sprs.mmcr2); + mtspr(SPRN_LDBAR, sprs.ldbar); mtspr(SPRN_SPRG3, local_paca->sprg_vdso); @@ -1155,10 +1155,10 @@ static void __init pnv_power9_idle_init(void) pnv_deepest_stop_psscr_mask); } - pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n", + pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", pnv_first_spr_loss_level); - pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n", + pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", pnv_first_tb_loss_level); } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index c321fdbc2200..c16249d251f1 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -19,18 +19,25 @@ #include "pci.h" -/* - * spinlock to protect initialisation of an npu_context for a particular - * mm_struct. - */ -static DEFINE_SPINLOCK(npu_context_lock); - static struct pci_dev *get_pci_dev(struct device_node *dn) { struct pci_dn *pdn = PCI_DN(dn); + struct pci_dev *pdev; - return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), + pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), pdn->busno, pdn->devfn); + + /* + * pci_get_domain_bus_and_slot() increased the reference count of + * the PCI device, but callers don't need that actually as the PE + * already holds a reference to the device. Since callers aren't + * aware of the reference count change, call pci_dev_put() now to + * avoid leaks. + */ + if (pdev) + pci_dev_put(pdev); + + return pdev; } /* Given a NPU device get the associated PCI device. */ @@ -359,15 +366,6 @@ struct npu_comp { /* An NPU descriptor, valid for POWER9 only */ struct npu { int index; - __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS]; - unsigned int mmio_atsd_count; - - /* Bitmask for MMIO register usage */ - unsigned long mmio_atsd_usage; - - /* Do we need to explicitly flush the nest mmu? */ - bool nmmu_flush; - struct npu_comp npucomp; }; @@ -624,534 +622,8 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) } #endif /* CONFIG_IOMMU_API */ -/* Maximum number of nvlinks per npu */ -#define NV_MAX_LINKS 6 - -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ -static int max_npu2_index; - -struct npu_context { - struct mm_struct *mm; - struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; - struct mmu_notifier mn; - struct kref kref; - bool nmmu_flush; - - /* Callback to stop translation requests on a given GPU */ - void (*release_cb)(struct npu_context *context, void *priv); - - /* - * Private pointer passed to the above callback for usage by - * device drivers. - */ - void *priv; -}; - -struct mmio_atsd_reg { - struct npu *npu; - int reg; -}; - -/* - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC - * if none are available. - */ -static int get_mmio_atsd_reg(struct npu *npu) -{ - int i; - - for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_bit(i, &npu->mmio_atsd_usage)) - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) - return i; - } - - return -ENOSPC; -} - -static void put_mmio_atsd_reg(struct npu *npu, int reg) -{ - clear_bit_unlock(reg, &npu->mmio_atsd_usage); -} - -/* MMIO ATSD register offsets */ -#define XTS_ATSD_LAUNCH 0 -#define XTS_ATSD_AVA 1 -#define XTS_ATSD_STAT 2 - -static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize) -{ - unsigned long launch = 0; - - if (psize == MMU_PAGE_COUNT) { - /* IS set to invalidate entire matching PID */ - launch |= PPC_BIT(12); - } else { - /* AP set to invalidate region of psize */ - launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17); - } - - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); - - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); - - /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */ - - return launch; -} - -static void mmio_atsd_regs_write(struct mmio_atsd_reg - mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset, - unsigned long val) -{ - struct npu *npu; - int i, reg; - - for (i = 0; i <= max_npu2_index; i++) { - reg = mmio_atsd_reg[i].reg; - if (reg < 0) - continue; - - npu = mmio_atsd_reg[i].npu; - __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset); - } -} - -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long pid) -{ - unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT); - - /* Invalidating the entire process doesn't use a va */ - mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); -} - -static void mmio_invalidate_range(struct mmio_atsd_reg - mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid, - unsigned long start, unsigned long psize) -{ - unsigned long launch = get_atsd_launch_val(pid, psize); - - /* Write all VAs first */ - mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start); - - /* Issue one barrier for all address writes */ - eieio(); - - /* Launch */ - mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); -} - -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) - -static void mmio_invalidate_wait( - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - struct npu *npu; - int i, reg; - - /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* Wait for completion */ - npu = mmio_atsd_reg[i].npu; - reg = mmio_atsd_reg[i].reg; - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) - cpu_relax(); - } -} - -/* - * Acquires all the address translation shootdown (ATSD) registers required to - * launch an ATSD on all links this npu_context is active on. - */ -static void acquire_atsd_reg(struct npu_context *npu_context, - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - int i, j; - struct npu *npu; - struct pci_dev *npdev; - - for (i = 0; i <= max_npu2_index; i++) { - mmio_atsd_reg[i].reg = -1; - for (j = 0; j < NV_MAX_LINKS; j++) { - /* - * There are no ordering requirements with respect to - * the setup of struct npu_context, but to ensure - * consistent behaviour we need to ensure npdev[][] is - * only read once. - */ - npdev = READ_ONCE(npu_context->npdev[i][j]); - if (!npdev) - continue; - - npu = pci_bus_to_host(npdev->bus)->npu; - if (!npu) - continue; - - mmio_atsd_reg[i].npu = npu; - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); - while (mmio_atsd_reg[i].reg < 0) { - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); - cpu_relax(); - } - break; - } - } -} - -/* - * Release previously acquired ATSD registers. To avoid deadlocks the registers - * must be released in the same order they were acquired above in - * acquire_atsd_reg. - */ -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - int i; - - for (i = 0; i <= max_npu2_index; i++) { - /* - * We can't rely on npu_context->npdev[][] being the same here - * as when acquire_atsd_reg() was called, hence we use the - * values stored in mmio_atsd_reg during the acquire phase - * rather than re-reading npdev[][]. - */ - if (mmio_atsd_reg[i].reg < 0) - continue; - - put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); - } -} - -/* - * Invalidate a virtual address range - */ -static void mmio_invalidate(struct npu_context *npu_context, - unsigned long start, unsigned long size) -{ - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; - unsigned long pid = npu_context->mm->context.id; - unsigned long atsd_start = 0; - unsigned long end = start + size - 1; - int atsd_psize = MMU_PAGE_COUNT; - - /* - * Convert the input range into one of the supported sizes. If the range - * doesn't fit, use the next larger supported size. Invalidation latency - * is high, so over-invalidation is preferred to issuing multiple - * invalidates. - * - * A 4K page size isn't supported by NPU/GPU ATS, so that case is - * ignored. - */ - if (size == SZ_64K) { - atsd_start = start; - atsd_psize = MMU_PAGE_64K; - } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) { - atsd_start = ALIGN_DOWN(start, SZ_2M); - atsd_psize = MMU_PAGE_2M; - } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) { - atsd_start = ALIGN_DOWN(start, SZ_1G); - atsd_psize = MMU_PAGE_1G; - } - - if (npu_context->nmmu_flush) - /* - * Unfortunately the nest mmu does not support flushing specific - * addresses so we have to flush the whole mm once before - * shooting down the GPU translation. - */ - flush_all_mm(npu_context->mm); - - /* - * Loop over all the NPUs this process is active on and launch - * an invalidate. - */ - acquire_atsd_reg(npu_context, mmio_atsd_reg); - - if (atsd_psize == MMU_PAGE_COUNT) - mmio_invalidate_pid(mmio_atsd_reg, pid); - else - mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start, - atsd_psize); - - mmio_invalidate_wait(mmio_atsd_reg); - - /* - * The GPU requires two flush ATSDs to ensure all entries have been - * flushed. We use PID 0 as it will never be used for a process on the - * GPU. - */ - mmio_invalidate_pid(mmio_atsd_reg, 0); - mmio_invalidate_wait(mmio_atsd_reg); - mmio_invalidate_pid(mmio_atsd_reg, 0); - mmio_invalidate_wait(mmio_atsd_reg); - - release_atsd_reg(mmio_atsd_reg); -} - -static void pnv_npu2_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - - /* Call into device driver to stop requests to the NMMU */ - if (npu_context->release_cb) - npu_context->release_cb(npu_context, npu_context->priv); - - /* - * There should be no more translation requests for this PID, but we - * need to ensure any entries for it are removed from the TLB. - */ - mmio_invalidate(npu_context, 0, ~0UL); -} - -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, start, end - start); -} - -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { - .release = pnv_npu2_mn_release, - .invalidate_range = pnv_npu2_mn_invalidate_range, -}; - -/* - * Call into OPAL to setup the nmmu context for the current task in - * the NPU. This must be called to setup the context tables before the - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device. - * - * A release callback should be registered to allow a device driver to - * be notified that it should not launch any new translation requests - * as the final TLB invalidate is about to occur. - * - * Returns an error if there no contexts are currently available or a - * npu_context which should be passed to pnv_npu2_handle_fault(). - * - * mmap_sem must be held in write mode and must not be called from interrupt - * context. - */ -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, - unsigned long flags, - void (*cb)(struct npu_context *, void *), - void *priv) -{ - int rc; - u32 nvlink_index; - struct device_node *nvlink_dn; - struct mm_struct *mm = current->mm; - struct npu *npu; - struct npu_context *npu_context; - struct pci_controller *hose; - - /* - * At present we don't support GPUs connected to multiple NPUs and I'm - * not sure the hardware does either. - */ - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - - if (!npdev) - /* No nvlink associated with this GPU device */ - return ERR_PTR(-ENODEV); - - /* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */ - if (flags & ~(MSR_DR | MSR_PR | MSR_HV)) - return ERR_PTR(-EINVAL); - - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return ERR_PTR(-ENODEV); - - if (!mm || mm->context.id == 0) { - /* - * Kernel thread contexts are not supported and context id 0 is - * reserved on the GPU. - */ - return ERR_PTR(-EINVAL); - } - - hose = pci_bus_to_host(npdev->bus); - npu = hose->npu; - if (!npu) - return ERR_PTR(-ENODEV); - - /* - * We store the npu pci device so we can more easily get at the - * associated npus. - */ - spin_lock(&npu_context_lock); - npu_context = mm->context.npu_context; - if (npu_context) { - if (npu_context->release_cb != cb || - npu_context->priv != priv) { - spin_unlock(&npu_context_lock); - return ERR_PTR(-EINVAL); - } - - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); - } - spin_unlock(&npu_context_lock); - - if (!npu_context) { - /* - * We can set up these fields without holding the - * npu_context_lock as the npu_context hasn't been returned to - * the caller meaning it can't be destroyed. Parallel allocation - * is protected against by mmap_sem. - */ - rc = -ENOMEM; - npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); - if (npu_context) { - kref_init(&npu_context->kref); - npu_context->mm = mm; - npu_context->mn.ops = &nv_nmmu_notifier_ops; - rc = __mmu_notifier_register(&npu_context->mn, mm); - } - - if (rc) { - kfree(npu_context); - return ERR_PTR(rc); - } - - mm->context.npu_context = npu_context; - } - - npu_context->release_cb = cb; - npu_context->priv = priv; - - /* - * npdev is a pci_dev pointer setup by the PCI code. We assign it to - * npdev[][] to indicate to the mmu notifiers that an invalidation - * should also be sent over this nvlink. The notifiers don't use any - * other fields in npu_context, so we just need to ensure that when they - * deference npu_context->npdev[][] it is either a valid pointer or - * NULL. - */ - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); - - if (!npu->nmmu_flush) { - /* - * If we're not explicitly flushing ourselves we need to mark - * the thread for global flushes - */ - npu_context->nmmu_flush = false; - mm_context_add_copro(mm); - } else - npu_context->nmmu_flush = true; - - return npu_context; -} -EXPORT_SYMBOL(pnv_npu2_init_context); - -static void pnv_npu2_release_context(struct kref *kref) -{ - struct npu_context *npu_context = - container_of(kref, struct npu_context, kref); - - if (!npu_context->nmmu_flush) - mm_context_remove_copro(npu_context->mm); - - npu_context->mm->context.npu_context = NULL; -} - -/* - * Destroy a context on the given GPU. May free the npu_context if it is no - * longer active on any GPUs. Must not be called from interrupt context. - */ -void pnv_npu2_destroy_context(struct npu_context *npu_context, - struct pci_dev *gpdev) -{ - int removed; - struct npu *npu; - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - struct device_node *nvlink_dn; - u32 nvlink_index; - struct pci_controller *hose; - - if (WARN_ON(!npdev)) - return; - - hose = pci_bus_to_host(npdev->bus); - npu = hose->npu; - if (!npu) - return; - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return; - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); - spin_lock(&npu_context_lock); - removed = kref_put(&npu_context->kref, pnv_npu2_release_context); - spin_unlock(&npu_context_lock); - - /* - * We need to do this outside of pnv_npu2_release_context so that it is - * outside the spinlock as mmu_notifier_destroy uses SRCU. - */ - if (removed) { - mmu_notifier_unregister(&npu_context->mn, - npu_context->mm); - - kfree(npu_context); - } - -} -EXPORT_SYMBOL(pnv_npu2_destroy_context); - -/* - * Assumes mmap_sem is held for the contexts associated mm. - */ -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, - unsigned long *flags, unsigned long *status, int count) -{ - u64 rc = 0, result = 0; - int i, is_write; - struct page *page[1]; - const char __user *u; - char c; - - /* mmap_sem should be held so the struct_mm must be present */ - struct mm_struct *mm = context->mm; - - WARN_ON(!rwsem_is_locked(&mm->mmap_sem)); - - for (i = 0; i < count; i++) { - is_write = flags[i] & NPU2_WRITE; - rc = get_user_pages_remote(NULL, mm, ea[i], 1, - is_write ? FOLL_WRITE : 0, - page, NULL, NULL); - - if (rc != 1) { - status[i] = rc; - result = -EFAULT; - continue; - } - - /* Make sure partition scoped tree gets a pte */ - u = page_address(page[0]); - if (__get_user(c, u)) - result = -EFAULT; - - status[i] = 0; - put_page(page[0]); - } - - return result; -} -EXPORT_SYMBOL(pnv_npu2_handle_fault); - int pnv_npu2_init(struct pci_controller *hose) { - unsigned int i; - u64 mmio_atsd; static int npu_index; struct npu *npu; int ret; @@ -1160,33 +632,18 @@ int pnv_npu2_init(struct pci_controller *hose) if (!npu) return -ENOMEM; - npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush"); - - for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) && - !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", - i, &mmio_atsd); i++) - npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); - - pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i); - npu->mmio_atsd_count = i; - npu->mmio_atsd_usage = 0; npu_index++; if (WARN_ON(npu_index >= NV_MAX_NPUS)) { ret = -ENOSPC; goto fail_exit; } - max_npu2_index = npu_index; npu->index = npu_index; hose->npu = npu; return 0; fail_exit: - for (i = 0; i < npu->mmio_atsd_count; ++i) - iounmap(npu->mmio_atsd_regs[i]); - kfree(npu); - return ret; } diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 36c8fa3647a2..29ca523c1c79 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -273,7 +273,6 @@ OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); -OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 5cae375525d0..3e1f064a18db 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -137,6 +137,43 @@ static void print_nx_checkstop_reason(const char *level, xstop_reason[i].description); } +static void print_npu_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + uint8_t reason, reason_count, i; + + /* + * We may not have a checkstop reason on some combination of + * hardware and/or skiboot version + */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s NPU checkstop on chip %x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id)); + return; + } + + /* + * NPU2 has 3 FIRs. Reason encoded on a byte as: + * 2 bits for the FIR number + * 6 bits for the bit number + * It may be possible to find several reasons. + * + * We don't display a specific message per FIR bit as there + * are too many and most are meaningless without the workbook + * and/or hw team help anyway. + */ + reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) / + sizeof(reason); + for (i = 0; i < reason_count; i++) { + reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF; + if (reason) + printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n", + level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id), + reason >> 6, reason & 0x3F); + } +} + static void print_checkstop_reason(const char *level, struct OpalHMIEvent *hmi_evt) { @@ -148,6 +185,9 @@ static void print_checkstop_reason(const char *level, case CHECKSTOP_TYPE_NX: print_nx_checkstop_reason(level, hmi_evt); break; + case CHECKSTOP_TYPE_NPU: + print_npu_checkstop_reason(level, hmi_evt); + break; default: printk("%s Unknown Malfunction Alert of type %d\n", level, type); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 98c5d94b17fb..aba443be7daa 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -202,16 +202,18 @@ static int __init opal_register_exception_handlers(void) glue = 0x7000; /* - * Check if we are running on newer firmware that exports - * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch - * the HMI interrupt and we catch it directly in Linux. + * Only ancient OPAL firmware requires this. + * Specifically, firmware from FW810.00 (released June 2014) + * through FW810.20 (Released October 2014). * - * For older firmware (i.e currently released POWER8 System Firmware - * as of today <= SV810_087), we fallback to old behavior and let OPAL - * patch the HMI vector and handle it inside OPAL firmware. + * Check if we are running on newer (post Oct 2014) firmware that + * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to + * patch the HMI interrupt and we catch it directly in Linux. * - * For newer firmware (in development/yet to be released) we will - * start catching/handling HMI directly in Linux. + * For older firmware (i.e < FW810.20), we fallback to old behavior and + * let OPAL patch the HMI vector and handle it inside OPAL firmware. + * + * For newer firmware we catch/handle the HMI directly in Linux. */ if (!opal_check_token(OPAL_HANDLE_HMI)) { pr_info("Old firmware detected, OPAL handles HMIs.\n"); @@ -221,6 +223,11 @@ static int __init opal_register_exception_handlers(void) glue += 128; } + /* + * Only applicable to ancient firmware, all modern + * (post March 2015/skiboot 5.0) firmware will just return + * OPAL_UNSUPPORTED. + */ opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); #endif diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 10cc42b9e541..d8080558d020 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -50,6 +50,8 @@ static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", "NPU_OCAPI" }; +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); + void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) { @@ -2356,7 +2358,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, return 0; } -void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -2456,6 +2458,14 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) if (!pnv_iommu_bypass_disabled) pnv_pci_ioda2_set_bypass(pe, true); + /* + * Set table base for the case of IOMMU DMA use. Usually this is done + * from dma_dev_setup() which is not called when a device is returned + * from VFIO so do it here. + */ + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + return 0; } @@ -2543,6 +2553,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (pe->pbus) pnv_ioda_setup_bus_dma(pe, pe->pbus); + else if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, NULL); iommu_tce_table_put(tbl); } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index ff1a33fee8e6..6104418c9ad5 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -34,7 +34,6 @@ #include "powernv.h" #include "pci.h" -static DEFINE_MUTEX(p2p_mutex); static DEFINE_MUTEX(tunnel_mutex); int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) @@ -857,79 +856,6 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus) } } -int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc) -{ - struct pci_controller *hose; - struct pnv_phb *phb_init, *phb_target; - struct pnv_ioda_pe *pe_init; - int rc; - - if (!opal_check_token(OPAL_PCI_SET_P2P)) - return -ENXIO; - - hose = pci_bus_to_host(initiator->bus); - phb_init = hose->private_data; - - hose = pci_bus_to_host(target->bus); - phb_target = hose->private_data; - - pe_init = pnv_ioda_get_pe(initiator); - if (!pe_init) - return -ENODEV; - - /* - * Configuring the initiator's PHB requires to adjust its - * TVE#1 setting. Since the same device can be an initiator - * several times for different target devices, we need to keep - * a reference count to know when we can restore the default - * bypass setting on its TVE#1 when disabling. Opal is not - * tracking PE states, so we add a reference count on the PE - * in linux. - * - * For the target, the configuration is per PHB, so we keep a - * target reference count on the PHB. - */ - mutex_lock(&p2p_mutex); - - if (desc & OPAL_PCI_P2P_ENABLE) { - /* always go to opal to validate the configuration */ - rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, - desc, pe_init->pe_number); - - if (rc != OPAL_SUCCESS) { - rc = -EIO; - goto out; - } - - pe_init->p2p_initiator_count++; - phb_target->p2p_target_count++; - } else { - if (!pe_init->p2p_initiator_count || - !phb_target->p2p_target_count) { - rc = -EINVAL; - goto out; - } - - if (--pe_init->p2p_initiator_count == 0) - pnv_pci_ioda2_set_bypass(pe_init, true); - - if (--phb_target->p2p_target_count == 0) { - rc = opal_pci_set_p2p(phb_init->opal_id, - phb_target->opal_id, desc, - pe_init->pe_number); - if (rc != OPAL_SUCCESS) { - rc = -EIO; - goto out; - } - } - } - rc = 0; -out: - mutex_unlock(&p2p_mutex); - return rc; -} -EXPORT_SYMBOL_GPL(pnv_pci_set_p2p); - struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -938,54 +864,6 @@ struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) } EXPORT_SYMBOL(pnv_pci_get_phb_node); -int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind) -{ - struct device_node *np; - const __be32 *prop; - struct pnv_ioda_pe *pe; - uint16_t window_id; - int rc; - - if (!radix_enabled()) - return -ENXIO; - - if (!(np = pnv_pci_get_phb_node(dev))) - return -ENXIO; - - prop = of_get_property(np, "ibm,phb-indications", NULL); - of_node_put(np); - - if (!prop || !prop[1]) - return -ENXIO; - - *asnind = (u64)be32_to_cpu(prop[1]); - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - /* Increase real window size to accept as_notify messages. */ - window_id = (pe->pe_number << 1 ) + 1; - rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number, - window_id, pe->tce_bypass_base, - (uint64_t)1 << 48); - return opal_error_code(rc); -} -EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel); - -int pnv_pci_disable_tunnel(struct pci_dev *dev) -{ - struct pnv_ioda_pe *pe; - - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - /* Restore default real window size. */ - pnv_pci_ioda2_set_bypass(pe, true); - return 0; -} -EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel); - int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) { __be64 val; @@ -1040,29 +918,6 @@ out: } EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); -#ifdef CONFIG_PPC64 /* for thread.tidr */ -int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid, - u32 *tid) -{ - struct mm_struct *mm = NULL; - - if (task == NULL) - return -EINVAL; - - mm = get_task_mm(task); - if (mm == NULL) - return -EINVAL; - - *pid = mm->context.id; - mmput(mm); - - *tid = task->thread.tidr; - *lpid = mfspr(SPRN_LPID); - return 0; -} -EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info); -#endif - void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index be26ab3d99e0..469c24463247 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -79,9 +79,6 @@ struct pnv_ioda_pe { struct pnv_ioda_pe *master; struct list_head slaves; - /* PCI peer-to-peer*/ - int p2p_initiator_count; - /* Link in list of PE#s */ struct list_head list; }; @@ -172,8 +169,6 @@ struct pnv_phb { /* PHB and hub diagnostics */ unsigned int diag_data_size; u8 *diag_data; - - int p2p_target_count; }; extern struct pci_ops pnv_pci_ops; @@ -200,7 +195,6 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); -extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels); extern int pnv_eeh_post_init(void); diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index ea5ca0201da8..0c0d27d17976 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -40,16 +40,6 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } -u64 vas_win_paste_addr(struct vas_window *win) -{ - u64 addr; - - compute_paste_address(win, &addr, NULL); - - return addr; -} -EXPORT_SYMBOL(vas_win_paste_addr); - static inline void get_hvwc_mmio_bar(struct vas_window *window, u64 *start, int *len) { @@ -1264,12 +1254,3 @@ int vas_win_close(struct vas_window *window) return 0; } EXPORT_SYMBOL_GPL(vas_win_close); - -/* - * Return a system-wide unique window id for the window @win. - */ -u32 vas_win_id(struct vas_window *win) -{ - return encode_pswid(win->vinst->vas_id, win->winid); -} -EXPORT_SYMBOL_GPL(vas_win_id); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 9cc5251816db..5574aec9ee88 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -444,26 +444,6 @@ static inline u64 read_hvwc_reg(struct vas_window *win, return in_be64(win->hvwc_map+reg); } -/* - * Encode/decode the Partition Send Window ID (PSWID) for a window in - * a way that we can uniquely identify any window in the system. i.e. - * we should be able to locate the 'struct vas_window' given the PSWID. - * - * Bits Usage - * 0:7 VAS id (8 bits) - * 8:15 Unused, 0 (3 bits) - * 16:31 Window id (16 bits) - */ -static inline u32 encode_pswid(int vasid, int winid) -{ - u32 pswid = 0; - - pswid |= vasid << (31 - 7); - pswid |= winid; - - return pswid; -} - static inline void decode_pswid(u32 pswid, int *vasid, int *winid) { if (vasid) |