diff options
author | Jason Gunthorpe | 2019-07-02 19:34:43 +0200 |
---|---|---|
committer | Jason Gunthorpe | 2019-07-02 19:34:43 +0200 |
commit | 9ec3f4cb35bc8278f0582fed9f9229c9315c2ffb (patch) | |
tree | 3ff6a17a33913268a2ed06e7d9a1a95e29eeee97 /drivers/pci | |
parent | mm/swap: fix release_pages() when releasing devmap pages (diff) | |
parent | Linux 5.2-rc7 (diff) | |
download | kernel-qcow2-linux-9ec3f4cb35bc8278f0582fed9f9229c9315c2ffb.tar.gz kernel-qcow2-linux-9ec3f4cb35bc8278f0582fed9f9229c9315c2ffb.tar.xz kernel-qcow2-linux-9ec3f4cb35bc8278f0582fed9f9229c9315c2ffb.zip |
Merge tag 'v5.2-rc7' into rdma.git hmm
Required for dependencies in the next patches.
Diffstat (limited to 'drivers/pci')
-rw-r--r-- | drivers/pci/p2pdma.c | 119 | ||||
-rw-r--r-- | drivers/pci/pci-driver.c | 47 |
2 files changed, 111 insertions, 55 deletions
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 742928d0053e..a4994aa3acc0 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -18,14 +18,19 @@ #include <linux/percpu-refcount.h> #include <linux/random.h> #include <linux/seq_buf.h> +#include <linux/iommu.h> struct pci_p2pdma { - struct percpu_ref devmap_ref; - struct completion devmap_ref_done; struct gen_pool *pool; bool p2pmem_published; }; +struct p2pdma_pagemap { + struct dev_pagemap pgmap; + struct percpu_ref ref; + struct completion ref_done; +}; + static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -74,41 +79,45 @@ static const struct attribute_group p2pmem_group = { .name = "p2pmem", }; +static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref) +{ + return container_of(ref, struct p2pdma_pagemap, ref); +} + static void pci_p2pdma_percpu_release(struct percpu_ref *ref) { - struct pci_p2pdma *p2p = - container_of(ref, struct pci_p2pdma, devmap_ref); + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); - complete_all(&p2p->devmap_ref_done); + complete(&p2p_pgmap->ref_done); } static void pci_p2pdma_percpu_kill(struct percpu_ref *ref) { - /* - * pci_p2pdma_add_resource() may be called multiple times - * by a driver and may register the percpu_kill devm action multiple - * times. We only want the first action to actually kill the - * percpu_ref. - */ - if (percpu_ref_is_dying(ref)) - return; - percpu_ref_kill(ref); } +static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref) +{ + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); + + wait_for_completion(&p2p_pgmap->ref_done); + percpu_ref_exit(&p2p_pgmap->ref); +} + static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; + struct pci_p2pdma *p2pdma = pdev->p2pdma; - if (!pdev->p2pdma) + if (!p2pdma) return; - wait_for_completion(&pdev->p2pdma->devmap_ref_done); - percpu_ref_exit(&pdev->p2pdma->devmap_ref); + /* Flush and disable pci_alloc_p2p_mem() */ + pdev->p2pdma = NULL; + synchronize_rcu(); - gen_pool_destroy(pdev->p2pdma->pool); + gen_pool_destroy(p2pdma->pool); sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); - pdev->p2pdma = NULL; } static int pci_p2pdma_setup(struct pci_dev *pdev) @@ -124,12 +133,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) if (!p2p->pool) goto out; - init_completion(&p2p->devmap_ref_done); - error = percpu_ref_init(&p2p->devmap_ref, - pci_p2pdma_percpu_release, 0, GFP_KERNEL); - if (error) - goto out_pool_destroy; - error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); if (error) goto out_pool_destroy; @@ -163,6 +166,7 @@ out: int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) { + struct p2pdma_pagemap *p2p_pgmap; struct dev_pagemap *pgmap; void *addr; int error; @@ -185,18 +189,27 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, return error; } - pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); - if (!pgmap) + p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); + if (!p2p_pgmap) return -ENOMEM; + init_completion(&p2p_pgmap->ref_done); + error = percpu_ref_init(&p2p_pgmap->ref, + pci_p2pdma_percpu_release, 0, GFP_KERNEL); + if (error) + goto pgmap_free; + + pgmap = &p2p_pgmap->pgmap; + pgmap->res.start = pci_resource_start(pdev, bar) + offset; pgmap->res.end = pgmap->res.start + size - 1; pgmap->res.flags = pci_resource_flags(pdev, bar); - pgmap->ref = &pdev->p2pdma->devmap_ref; + pgmap->ref = &p2p_pgmap->ref; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar); pgmap->kill = pci_p2pdma_percpu_kill; + pgmap->cleanup = pci_p2pdma_percpu_cleanup; addr = devm_memremap_pages(&pdev->dev, pgmap); if (IS_ERR(addr)) { @@ -204,19 +217,22 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } - error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr, + error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, - resource_size(&pgmap->res), dev_to_node(&pdev->dev)); + resource_size(&pgmap->res), dev_to_node(&pdev->dev), + &p2p_pgmap->ref); if (error) - goto pgmap_free; + goto pages_free; pci_info(pdev, "added peer-to-peer DMA memory %pR\n", &pgmap->res); return 0; +pages_free: + devm_memunmap_pages(&pdev->dev, pgmap); pgmap_free: - devm_kfree(&pdev->dev, pgmap); + devm_kfree(&pdev->dev, p2p_pgmap); return error; } EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); @@ -284,6 +300,9 @@ static bool root_complex_whitelist(struct pci_dev *dev) struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0)); unsigned short vendor, device; + if (iommu_present(dev->dev.bus)) + return false; + if (!root) return false; @@ -585,19 +604,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many); */ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) { - void *ret; + void *ret = NULL; + struct percpu_ref *ref; + /* + * Pairs with synchronize_rcu() in pci_p2pdma_release() to + * ensure pdev->p2pdma is non-NULL for the duration of the + * read-lock. + */ + rcu_read_lock(); if (unlikely(!pdev->p2pdma)) - return NULL; - - if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref))) - return NULL; - - ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size); + goto out; - if (unlikely(!ret)) - percpu_ref_put(&pdev->p2pdma->devmap_ref); + ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size, + (void **) &ref); + if (!ret) + goto out; + if (unlikely(!percpu_ref_tryget_live(ref))) { + gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size); + ret = NULL; + goto out; + } +out: + rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); @@ -610,8 +640,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); */ void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) { - gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size); - percpu_ref_put(&pdev->p2pdma->devmap_ref); + struct percpu_ref *ref; + + gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size, + (void **) &ref); + percpu_ref_put(ref); } EXPORT_SYMBOL_GPL(pci_free_p2pmem); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 5eadbc3d0969..ca3793002e2f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -524,7 +524,6 @@ static void pci_pm_default_resume_early(struct pci_dev *pci_dev) pci_power_up(pci_dev); pci_restore_state(pci_dev); pci_pme_restore(pci_dev); - pci_fixup_device(pci_fixup_resume_early, pci_dev); } /* @@ -831,18 +830,16 @@ static int pci_pm_suspend_noirq(struct device *dev) if (pci_dev->skip_bus_pm) { /* - * The function is running for the second time in a row without + * Either the device is a bridge with a child in D0 below it, or + * the function is running for the second time in a row without * going through full resume, which is possible only during - * suspend-to-idle in a spurious wakeup case. Moreover, the - * device was originally left in D0, so its power state should - * not be changed here and the device register values saved - * originally should be restored on resume again. + * suspend-to-idle in a spurious wakeup case. The device should + * be in D0 at this point, but if it is a bridge, it may be + * necessary to save its state. */ - pci_dev->state_saved = true; - } else if (pci_dev->state_saved) { - if (pci_dev->current_state == PCI_D0) - pci_dev->skip_bus_pm = true; - } else { + if (!pci_dev->state_saved) + pci_save_state(pci_dev); + } else if (!pci_dev->state_saved) { pci_save_state(pci_dev); if (pci_power_manageable(pci_dev)) pci_prepare_to_sleep(pci_dev); @@ -851,6 +848,22 @@ static int pci_pm_suspend_noirq(struct device *dev) dev_dbg(dev, "PCI PM: Suspend power state: %s\n", pci_power_name(pci_dev->current_state)); + if (pci_dev->current_state == PCI_D0) { + pci_dev->skip_bus_pm = true; + /* + * Per PCI PM r1.2, table 6-1, a bridge must be in D0 if any + * downstream device is in D0, so avoid changing the power state + * of the parent bridge by setting the skip_bus_pm flag for it. + */ + if (pci_dev->bus->self) + pci_dev->bus->self->skip_bus_pm = true; + } + + if (pci_dev->skip_bus_pm && pm_suspend_no_platform()) { + dev_dbg(dev, "PCI PM: Skipped\n"); + goto Fixup; + } + pci_pm_set_unknown_state(pci_dev); /* @@ -898,7 +911,16 @@ static int pci_pm_resume_noirq(struct device *dev) if (dev_pm_smart_suspend_and_suspended(dev)) pm_runtime_set_active(dev); - pci_pm_default_resume_early(pci_dev); + /* + * In the suspend-to-idle case, devices left in D0 during suspend will + * stay in D0, so it is not necessary to restore or update their + * configuration here and attempting to put them into D0 again is + * pointless, so avoid doing that. + */ + if (!(pci_dev->skip_bus_pm && pm_suspend_no_platform())) + pci_pm_default_resume_early(pci_dev); + + pci_fixup_device(pci_fixup_resume_early, pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); @@ -1194,6 +1216,7 @@ static int pci_pm_restore_noirq(struct device *dev) } pci_pm_default_resume_early(pci_dev); + pci_fixup_device(pci_fixup_resume_early, pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); |