summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c60
1 files changed, 50 insertions, 10 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 0a72f821ccdc..32e9b7aec366 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2935,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
return true;
}
+static void deposit_prealloc_pte(struct fault_env *fe)
+{
+ struct vm_area_struct *vma = fe->vma;
+
+ pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte);
+ /*
+ * We are going to consume the prealloc table,
+ * count that as nr_ptes.
+ */
+ atomic_long_inc(&vma->vm_mm->nr_ptes);
+ fe->prealloc_pte = 0;
+}
+
static int do_set_pmd(struct fault_env *fe, struct page *page)
{
struct vm_area_struct *vma = fe->vma;
@@ -2949,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
ret = VM_FAULT_FALLBACK;
page = compound_head(page);
+ /*
+ * Archs like ppc64 need additonal space to store information
+ * related to pte entry. Use the preallocated table for that.
+ */
+ if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) {
+ fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address);
+ if (!fe->prealloc_pte)
+ return VM_FAULT_OOM;
+ smp_wmb(); /* See comment in __pte_alloc() */
+ }
+
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
if (unlikely(!pmd_none(*fe->pmd)))
goto out;
@@ -2962,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
page_add_file_rmap(page, true);
+ /*
+ * deposit and withdraw with pmd lock held
+ */
+ if (arch_needs_pgtable_deposit())
+ deposit_prealloc_pte(fe);
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
@@ -2971,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
ret = 0;
count_vm_event(THP_FILE_MAPPED);
out:
+ /*
+ * If we are going to fallback to pte mapping, do a
+ * withdraw with pmd lock held.
+ */
+ if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
+ fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
+ fe->pmd);
spin_unlock(fe->ptl);
return ret;
}
@@ -3010,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
ret = do_set_pmd(fe, page);
if (ret != VM_FAULT_FALLBACK)
- return ret;
+ goto fault_handled;
}
if (!fe->pte) {
ret = pte_alloc_one_map(fe);
if (ret)
- return ret;
+ goto fault_handled;
}
/* Re-check under ptl */
- if (unlikely(!pte_none(*fe->pte)))
- return VM_FAULT_NOPAGE;
+ if (unlikely(!pte_none(*fe->pte))) {
+ ret = VM_FAULT_NOPAGE;
+ goto fault_handled;
+ }
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
@@ -3041,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, fe->address, fe->pte);
+ ret = 0;
- return 0;
+fault_handled:
+ /* preallocated pagetable is unused: free it */
+ if (fe->prealloc_pte) {
+ pte_free(fe->vma->vm_mm, fe->prealloc_pte);
+ fe->prealloc_pte = 0;
+ }
+ return ret;
}
static unsigned long fault_around_bytes __read_mostly =
@@ -3141,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
- /* preallocated pagetable is unused: free it */
- if (fe->prealloc_pte) {
- pte_free(fe->vma->vm_mm, fe->prealloc_pte);
- fe->prealloc_pte = 0;
- }
/* Huge page is mapped? Page fault is solved */
if (pmd_trans_huge(*fe->pmd)) {
ret = VM_FAULT_NOPAGE;