summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJason Gunthorpe2019-07-02 20:07:52 +0200
committerJason Gunthorpe2019-07-02 20:10:45 +0200
commitcc5dfd59e375f4d0f2b64643723d16b38b2f2d78 (patch)
tree0a8f526169ee889d6af4e7679122c946773ec33a /mm
parentMerge tag 'v5.2-rc7' into rdma.git hmm (diff)
parentmm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR (diff)
downloadkernel-qcow2-linux-cc5dfd59e375f4d0f2b64643723d16b38b2f2d78.tar.gz
kernel-qcow2-linux-cc5dfd59e375f4d0f2b64643723d16b38b2f2d78.tar.xz
kernel-qcow2-linux-cc5dfd59e375f4d0f2b64643723d16b38b2f2d78.zip
Merge branch 'hmm-devmem-cleanup.4' into rdma.git hmm
Christoph Hellwig says: ==================== Below is a series that cleans up the dev_pagemap interface so that it is more easily usable, which removes the need to wrap it in hmm and thus allowing to kill a lot of code Changes since v3: - pull in "mm/swap: Fix release_pages() when releasing devmap pages" and rebase the other patches on top of that - fold the hmm_devmem_add_resource into the DEVICE_PUBLIC memory removal patch - remove _vm_normal_page as it isn't needed without DEVICE_PUBLIC memory - pick up various ACKs Changes since v2: - fix nvdimm kunit build - add a new memory type for device dax - fix a few issues in intermediate patches that didn't show up in the end result - incorporate feedback from Michal Hocko, including killing of the DEVICE_PUBLIC memory type entirely Changes since v1: - rebase - also switch p2pdma to the internal refcount - add type checking for pgmap->type - rename the migrate method to migrate_to_ram - cleanup the altmap_valid flag - various tidbits from the reviews ==================== Conflicts resolved by: - Keeping Ira's version of the code in swap.c - Using the delete for the section in hmm.rst - Using the delete for the devmap code in hmm.c and .h * branch 'hmm-devmem-cleanup.4': (24 commits) mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR mm: remove the HMM config option mm: sort out the DEVICE_PRIVATE Kconfig mess mm: simplify ZONE_DEVICE page private data mm: remove hmm_devmem_add mm: remove hmm_vma_alloc_locked_page nouveau: use devm_memremap_pages directly nouveau: use alloc_page_vma directly PCI/P2PDMA: use the dev_pagemap internal refcount device-dax: use the dev_pagemap internal refcount memremap: provide an optional internal refcount in struct dev_pagemap memremap: replace the altmap_valid field with a PGMAP_ALTMAP_VALID flag memremap: remove the data field in struct dev_pagemap memremap: add a migrate_to_ram method to struct dev_pagemap_ops memremap: lift the devmap_enable manipulation into devm_memremap_pages memremap: pass a struct dev_pagemap to ->kill and ->cleanup memremap: move dev_pagemap callbacks into a separate structure memremap: validate the pagemap type passed to devm_memremap_pages mm: factor out a devm_request_free_mem_region helper mm: export alloc_pages_vma ... Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig50
-rw-r--r--mm/Makefile2
-rw-r--r--mm/gup.c7
-rw-r--r--mm/hmm.c284
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memcontrol.c13
-rw-r--r--mm/memory-failure.c6
-rw-r--r--mm/memory.c49
-rw-r--r--mm/memory_hotplug.c6
-rw-r--r--mm/mempolicy.c1
-rw-r--r--mm/migrate.c28
-rw-r--r--mm/page_alloc.c13
12 files changed, 31 insertions, 430 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..40cf0562412d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -669,47 +669,17 @@ config ZONE_DEVICE
If FS_DAX is enabled, then say Y.
-config ARCH_HAS_HMM_MIRROR
- bool
- default y
- depends on (X86_64 || PPC64)
- depends on MMU && 64BIT
-
-config ARCH_HAS_HMM_DEVICE
- bool
- default y
- depends on (X86_64 || PPC64)
- depends on MEMORY_HOTPLUG
- depends on MEMORY_HOTREMOVE
- depends on SPARSEMEM_VMEMMAP
- depends on ARCH_HAS_ZONE_DEVICE
- select XARRAY_MULTI
-
-config ARCH_HAS_HMM
- bool
- default y
- depends on (X86_64 || PPC64)
- depends on ZONE_DEVICE
- depends on MMU && 64BIT
- depends on MEMORY_HOTPLUG
- depends on MEMORY_HOTREMOVE
- depends on SPARSEMEM_VMEMMAP
-
config MIGRATE_VMA_HELPER
bool
config DEV_PAGEMAP_OPS
bool
-config HMM
- bool
- select MMU_NOTIFIER
- select MIGRATE_VMA_HELPER
-
config HMM_MIRROR
bool "HMM mirror CPU page table into a device page table"
- depends on ARCH_HAS_HMM
- select HMM
+ depends on (X86_64 || PPC64)
+ depends on MMU && 64BIT
+ select MMU_NOTIFIER
help
Select HMM_MIRROR if you want to mirror range of the CPU page table of a
process into a device page table. Here, mirror means "keep synchronized".
@@ -719,8 +689,7 @@ config HMM_MIRROR
config DEVICE_PRIVATE
bool "Unaddressable device memory (GPU memory, ...)"
- depends on ARCH_HAS_HMM
- select HMM
+ depends on ZONE_DEVICE
select DEV_PAGEMAP_OPS
help
@@ -728,17 +697,6 @@ config DEVICE_PRIVATE
memory; i.e., memory that is only accessible from the device (or
group of devices). You likely also want to select HMM_MIRROR.
-config DEVICE_PUBLIC
- bool "Addressable device memory (like GPU memory)"
- depends on ARCH_HAS_HMM
- select HMM
- select DEV_PAGEMAP_OPS
-
- help
- Allows creation of struct pages to represent addressable device
- memory; i.e., memory that is accessible from both the device and
- the CPU
-
config FRAME_VECTOR
bool
diff --git a/mm/Makefile b/mm/Makefile
index ac5e5ba78874..91c99040065c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -102,5 +102,5 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
-obj-$(CONFIG_HMM) += hmm.o
+obj-$(CONFIG_HMM_MIRROR) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
diff --git a/mm/gup.c b/mm/gup.c
index ddde097cf9e4..fe131d879c70 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -605,13 +605,6 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
goto unmap;
*page = pte_page(*pte);
-
- /*
- * This should never happen (a device public page in the gate
- * area).
- */
- if (is_device_public_page(*page))
- goto unmap;
}
if (unlikely(!try_get_page(*page))) {
ret = -ENOMEM;
diff --git a/mm/hmm.c b/mm/hmm.c
index c1bdcef403ee..d48b9283725a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -26,9 +26,6 @@
#include <linux/mmu_notifier.h>
#include <linux/memory_hotplug.h>
-#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
-
-#if IS_ENABLED(CONFIG_HMM_MIRROR)
static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
/**
@@ -1287,284 +1284,3 @@ long hmm_range_dma_unmap(struct hmm_range *range,
return cpages;
}
EXPORT_SYMBOL(hmm_range_dma_unmap);
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
-
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
- unsigned long addr)
-{
- struct page *page;
-
- page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
- if (!page)
- return NULL;
- lock_page(page);
- return page;
-}
-EXPORT_SYMBOL(hmm_vma_alloc_locked_page);
-
-
-static void hmm_devmem_ref_release(struct percpu_ref *ref)
-{
- struct hmm_devmem *devmem;
-
- devmem = container_of(ref, struct hmm_devmem, ref);
- complete(&devmem->completion);
-}
-
-static void hmm_devmem_ref_exit(struct percpu_ref *ref)
-{
- struct hmm_devmem *devmem;
-
- devmem = container_of(ref, struct hmm_devmem, ref);
- wait_for_completion(&devmem->completion);
- percpu_ref_exit(ref);
-}
-
-static void hmm_devmem_ref_kill(struct percpu_ref *ref)
-{
- percpu_ref_kill(ref);
-}
-
-static vm_fault_t hmm_devmem_fault(struct vm_area_struct *vma,
- unsigned long addr,
- const struct page *page,
- unsigned int flags,
- pmd_t *pmdp)
-{
- struct hmm_devmem *devmem = page->pgmap->data;
-
- return devmem->ops->fault(devmem, vma, addr, page, flags, pmdp);
-}
-
-static void hmm_devmem_free(struct page *page, void *data)
-{
- struct hmm_devmem *devmem = data;
-
- page->mapping = NULL;
-
- devmem->ops->free(devmem, page);
-}
-
-/*
- * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory
- *
- * @ops: memory event device driver callback (see struct hmm_devmem_ops)
- * @device: device struct to bind the resource too
- * @size: size in bytes of the device memory to add
- * Return: pointer to new hmm_devmem struct ERR_PTR otherwise
- *
- * This function first finds an empty range of physical address big enough to
- * contain the new resource, and then hotplugs it as ZONE_DEVICE memory, which
- * in turn allocates struct pages. It does not do anything beyond that; all
- * events affecting the memory will go through the various callbacks provided
- * by hmm_devmem_ops struct.
- *
- * Device driver should call this function during device initialization and
- * is then responsible of memory management. HMM only provides helpers.
- */
-struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
- struct device *device,
- unsigned long size)
-{
- struct hmm_devmem *devmem;
- resource_size_t addr;
- void *result;
- int ret;
-
- dev_pagemap_get_ops();
-
- devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
- if (!devmem)
- return ERR_PTR(-ENOMEM);
-
- init_completion(&devmem->completion);
- devmem->pfn_first = -1UL;
- devmem->pfn_last = -1UL;
- devmem->resource = NULL;
- devmem->device = device;
- devmem->ops = ops;
-
- ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
- 0, GFP_KERNEL);
- if (ret)
- return ERR_PTR(ret);
-
- size = ALIGN(size, PA_SECTION_SIZE);
- addr = min((unsigned long)iomem_resource.end,
- (1UL << MAX_PHYSMEM_BITS) - 1);
- addr = addr - size + 1UL;
-
- /*
- * FIXME add a new helper to quickly walk resource tree and find free
- * range
- *
- * FIXME what about ioport_resource resource ?
- */
- for (; addr > size && addr >= iomem_resource.start; addr -= size) {
- ret = region_intersects(addr, size, 0, IORES_DESC_NONE);
- if (ret != REGION_DISJOINT)
- continue;
-
- devmem->resource = devm_request_mem_region(device, addr, size,
- dev_name(device));
- if (!devmem->resource)
- return ERR_PTR(-ENOMEM);
- break;
- }
- if (!devmem->resource)
- return ERR_PTR(-ERANGE);
-
- devmem->resource->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
- devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
- devmem->pfn_last = devmem->pfn_first +
- (resource_size(devmem->resource) >> PAGE_SHIFT);
- devmem->page_fault = hmm_devmem_fault;
-
- devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
- devmem->pagemap.res = *devmem->resource;
- devmem->pagemap.page_free = hmm_devmem_free;
- devmem->pagemap.altmap_valid = false;
- devmem->pagemap.ref = &devmem->ref;
- devmem->pagemap.data = devmem;
- devmem->pagemap.kill = hmm_devmem_ref_kill;
- devmem->pagemap.cleanup = hmm_devmem_ref_exit;
-
- result = devm_memremap_pages(devmem->device, &devmem->pagemap);
- if (IS_ERR(result))
- return result;
- return devmem;
-}
-EXPORT_SYMBOL_GPL(hmm_devmem_add);
-
-struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
- struct device *device,
- struct resource *res)
-{
- struct hmm_devmem *devmem;
- void *result;
- int ret;
-
- if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
- return ERR_PTR(-EINVAL);
-
- dev_pagemap_get_ops();
-
- devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
- if (!devmem)
- return ERR_PTR(-ENOMEM);
-
- init_completion(&devmem->completion);
- devmem->pfn_first = -1UL;
- devmem->pfn_last = -1UL;
- devmem->resource = res;
- devmem->device = device;
- devmem->ops = ops;
-
- ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
- 0, GFP_KERNEL);
- if (ret)
- return ERR_PTR(ret);
-
- devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
- devmem->pfn_last = devmem->pfn_first +
- (resource_size(devmem->resource) >> PAGE_SHIFT);
- devmem->page_fault = hmm_devmem_fault;
-
- devmem->pagemap.type = MEMORY_DEVICE_PUBLIC;
- devmem->pagemap.res = *devmem->resource;
- devmem->pagemap.page_free = hmm_devmem_free;
- devmem->pagemap.altmap_valid = false;
- devmem->pagemap.ref = &devmem->ref;
- devmem->pagemap.data = devmem;
- devmem->pagemap.kill = hmm_devmem_ref_kill;
- devmem->pagemap.cleanup = hmm_devmem_ref_exit;
-
- result = devm_memremap_pages(devmem->device, &devmem->pagemap);
- if (IS_ERR(result))
- return result;
- return devmem;
-}
-EXPORT_SYMBOL_GPL(hmm_devmem_add_resource);
-
-/*
- * A device driver that wants to handle multiple devices memory through a
- * single fake device can use hmm_device to do so. This is purely a helper
- * and it is not needed to make use of any HMM functionality.
- */
-#define HMM_DEVICE_MAX 256
-
-static DECLARE_BITMAP(hmm_device_mask, HMM_DEVICE_MAX);
-static DEFINE_SPINLOCK(hmm_device_lock);
-static struct class *hmm_device_class;
-static dev_t hmm_device_devt;
-
-static void hmm_device_release(struct device *device)
-{
- struct hmm_device *hmm_device;
-
- hmm_device = container_of(device, struct hmm_device, device);
- spin_lock(&hmm_device_lock);
- clear_bit(hmm_device->minor, hmm_device_mask);
- spin_unlock(&hmm_device_lock);
-
- kfree(hmm_device);
-}
-
-struct hmm_device *hmm_device_new(void *drvdata)
-{
- struct hmm_device *hmm_device;
-
- hmm_device = kzalloc(sizeof(*hmm_device), GFP_KERNEL);
- if (!hmm_device)
- return ERR_PTR(-ENOMEM);
-
- spin_lock(&hmm_device_lock);
- hmm_device->minor = find_first_zero_bit(hmm_device_mask, HMM_DEVICE_MAX);
- if (hmm_device->minor >= HMM_DEVICE_MAX) {
- spin_unlock(&hmm_device_lock);
- kfree(hmm_device);
- return ERR_PTR(-EBUSY);
- }
- set_bit(hmm_device->minor, hmm_device_mask);
- spin_unlock(&hmm_device_lock);
-
- dev_set_name(&hmm_device->device, "hmm_device%d", hmm_device->minor);
- hmm_device->device.devt = MKDEV(MAJOR(hmm_device_devt),
- hmm_device->minor);
- hmm_device->device.release = hmm_device_release;
- dev_set_drvdata(&hmm_device->device, drvdata);
- hmm_device->device.class = hmm_device_class;
- device_initialize(&hmm_device->device);
-
- return hmm_device;
-}
-EXPORT_SYMBOL(hmm_device_new);
-
-void hmm_device_put(struct hmm_device *hmm_device)
-{
- put_device(&hmm_device->device);
-}
-EXPORT_SYMBOL(hmm_device_put);
-
-static int __init hmm_init(void)
-{
- int ret;
-
- ret = alloc_chrdev_region(&hmm_device_devt, 0,
- HMM_DEVICE_MAX,
- "hmm_device");
- if (ret)
- return ret;
-
- hmm_device_class = class_create(THIS_MODULE, "hmm_device");
- if (IS_ERR(hmm_device_class)) {
- unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX);
- return PTR_ERR(hmm_device_class);
- }
- return 0;
-}
-
-device_initcall(hmm_init);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
diff --git a/mm/madvise.c b/mm/madvise.c
index 628022e674a7..968df3aa069f 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -354,7 +354,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
continue;
}
- page = _vm_normal_page(vma, addr, ptent, true);
+ page = vm_normal_page(vma, addr, ptent);
if (!page)
continue;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ba9138a4a1de..d2a6454fa0bd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4793,7 +4793,7 @@ enum mc_target_type {
static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent)
{
- struct page *page = _vm_normal_page(vma, addr, ptent, true);
+ struct page *page = vm_normal_page(vma, addr, ptent);
if (!page || !page_mapped(page))
return NULL;
@@ -4994,8 +4994,8 @@ out:
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
* target for charge migration. if @target is not NULL, the entry is stored
* in target->ent.
- * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is MEMORY_DEVICE_PUBLIC
- * or MEMORY_DEVICE_PRIVATE (so ZONE_DEVICE page and thus not on the lru).
+ * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is MEMORY_DEVICE_PRIVATE
+ * (so ZONE_DEVICE page and thus not on the lru).
* For now we such page is charge like a regular page would be as for all
* intent and purposes it is just special memory taking the place of a
* regular page.
@@ -5029,8 +5029,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
*/
if (page->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
- if (is_device_private_page(page) ||
- is_device_public_page(page))
+ if (is_device_private_page(page))
ret = MC_TARGET_DEVICE;
if (target)
target->page = page;
@@ -5101,8 +5100,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
if (ptl) {
/*
* Note their can not be MC_TARGET_DEVICE for now as we do not
- * support transparent huge page with MEMORY_DEVICE_PUBLIC or
- * MEMORY_DEVICE_PRIVATE but this might change.
+ * support transparent huge page with MEMORY_DEVICE_PRIVATE but
+ * this might change.
*/
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d9cc6606f409..31e7c7b424a1 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1177,16 +1177,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
goto unlock;
}
- switch (pgmap->type) {
- case MEMORY_DEVICE_PRIVATE:
- case MEMORY_DEVICE_PUBLIC:
+ if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
/*
* TODO: Handle HMM pages which may need coordination
* with device-side memory.
*/
goto unlock;
- default:
- break;
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..d437ccdb210c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -571,8 +571,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
* PFNMAP mappings in order to support COWable mappings.
*
*/
-struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
- pte_t pte, bool with_public_device)
+struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+ pte_t pte)
{
unsigned long pfn = pte_pfn(pte);
@@ -585,29 +585,6 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
return NULL;
if (is_zero_pfn(pfn))
return NULL;
-
- /*
- * Device public pages are special pages (they are ZONE_DEVICE
- * pages but different from persistent memory). They behave
- * allmost like normal pages. The difference is that they are
- * not on the lru and thus should never be involve with any-
- * thing that involve lru manipulation (mlock, numa balancing,
- * ...).
- *
- * This is why we still want to return NULL for such page from
- * vm_normal_page() so that we do not have to special case all
- * call site of vm_normal_page().
- */
- if (likely(pfn <= highest_memmap_pfn)) {
- struct page *page = pfn_to_page(pfn);
-
- if (is_device_public_page(page)) {
- if (with_public_device)
- return page;
- return NULL;
- }
- }
-
if (pte_devmap(pte))
return NULL;
@@ -797,17 +774,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
rss[mm_counter(page)]++;
} else if (pte_devmap(pte)) {
page = pte_page(pte);
-
- /*
- * Cache coherent device memory behave like regular page and
- * not like persistent memory page. For more informations see
- * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h
- */
- if (is_device_public_page(page)) {
- get_page(page);
- page_dup_rmap(page, false);
- rss[mm_counter(page)]++;
- }
}
out_set_pte:
@@ -1063,7 +1029,7 @@ again:
if (pte_present(ptent)) {
struct page *page;
- page = _vm_normal_page(vma, addr, ptent, true);
+ page = vm_normal_page(vma, addr, ptent);
if (unlikely(details) && page) {
/*
* unmap_shared_mapping_pages() wants to
@@ -2782,13 +2748,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
migration_entry_wait(vma->vm_mm, vmf->pmd,
vmf->address);
} else if (is_device_private_entry(entry)) {
- /*
- * For un-addressable device memory we call the pgmap
- * fault handler callback. The callback must migrate
- * the page back to some CPU accessible page.
- */
- ret = device_private_entry_fault(vma, vmf->address, entry,
- vmf->flags, vmf->pmd);
+ vmf->page = device_private_entry_to_page(entry);
+ ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;
} else {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e096c987d261..6166ba5a15f3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -557,10 +557,8 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
int sections_to_remove;
/* In the ZONE_DEVICE case device driver owns the memory region */
- if (is_dev_zone(zone)) {
- if (altmap)
- map_offset = vmem_altmap_offset(altmap);
- }
+ if (is_dev_zone(zone))
+ map_offset = vmem_altmap_offset(altmap);
clear_zone_contiguous(zone);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index fdcb73536319..f48693f75b37 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2098,6 +2098,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
out:
return page;
}
+EXPORT_SYMBOL(alloc_pages_vma);
/**
* alloc_pages_current - Allocate pages.
diff --git a/mm/migrate.c b/mm/migrate.c
index f2ecc2855a12..78d45e184457 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -246,8 +246,6 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
if (is_device_private_page(new)) {
entry = make_device_private_entry(new, pte_write(pte));
pte = swp_entry_to_pte(entry);
- } else if (is_device_public_page(new)) {
- pte = pte_mkdevmap(pte);
}
}
@@ -381,7 +379,6 @@ static int expected_page_refs(struct address_space *mapping, struct page *page)
* ZONE_DEVICE pages.
*/
expected_count += is_device_private_page(page);
- expected_count += is_device_public_page(page);
if (mapping)
expected_count += hpage_nr_pages(page) + page_has_private(page);
@@ -994,10 +991,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
if (!PageMappingFlags(page))
page->mapping = NULL;
- if (unlikely(is_zone_device_page(newpage))) {
- if (is_device_public_page(newpage))
- flush_dcache_page(newpage);
- } else
+ if (likely(!is_zone_device_page(newpage)))
flush_dcache_page(newpage);
}
@@ -2265,7 +2259,7 @@ again:
pfn = 0;
goto next;
}
- page = _vm_normal_page(migrate->vma, addr, pte, true);
+ page = vm_normal_page(migrate->vma, addr, pte);
mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
}
@@ -2406,16 +2400,7 @@ static bool migrate_vma_check_page(struct page *page)
* FIXME proper solution is to rework migration_entry_wait() so
* it does not need to take a reference on page.
*/
- if (is_device_private_page(page))
- return true;
-
- /*
- * Only allow device public page to be migrated and account for
- * the extra reference count imply by ZONE_DEVICE pages.
- */
- if (!is_device_public_page(page))
- return false;
- extra++;
+ return is_device_private_page(page);
}
/* For file back page */
@@ -2665,11 +2650,6 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
entry = swp_entry_to_pte(swp_entry);
- } else if (is_device_public_page(page)) {
- entry = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
- if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry));
- entry = pte_mkdevmap(entry);
}
} else {
entry = mk_pte(page, vma->vm_page_prot);
@@ -2789,7 +2769,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
continue;
}
- } else if (!is_device_public_page(newpage)) {
+ } else {
/*
* Other types of ZONE_DEVICE page are not
* supported.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d66bc8abe0af..c0e031c52db5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5853,6 +5853,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
{
unsigned long pfn, end_pfn = start_pfn + size;
struct pglist_data *pgdat = zone->zone_pgdat;
+ struct vmem_altmap *altmap = pgmap_altmap(pgmap);
unsigned long zone_idx = zone_idx(zone);
unsigned long start = jiffies;
int nid = pgdat->node_id;
@@ -5865,9 +5866,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
* of the pages reserved for the memmap, so we can just jump to
* the end of that region and start processing the device pages.
*/
- if (pgmap->altmap_valid) {
- struct vmem_altmap *altmap = &pgmap->altmap;
-
+ if (altmap) {
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
size = end_pfn - start_pfn;
}
@@ -5887,12 +5886,12 @@ void __ref memmap_init_zone_device(struct zone *zone,
__SetPageReserved(page);
/*
- * ZONE_DEVICE pages union ->lru with a ->pgmap back
- * pointer and hmm_data. It is a bug if a ZONE_DEVICE
- * page is ever freed or placed on a driver-private list.
+ * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
+ * and zone_device_data. It is a bug if a ZONE_DEVICE page is
+ * ever freed or placed on a driver-private list.
*/
page->pgmap = pgmap;
- page->hmm_data = 0;
+ page->zone_device_data = NULL;
/*
* Mark the block movable so that blocks are reserved for