summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c189
1 files changed, 124 insertions, 65 deletions
diff --git a/mm/memory.c b/mm/memory.c
index a728bed16c20..53373b7a1512 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -400,10 +400,17 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
-/* tlb_gather_mmu
- * Called to initialize an (on-stack) mmu_gather structure for page-table
- * tear-down from @mm. The @fullmm argument is used when @mm is without
- * users and we're going to destroy the full address space (exit/execve).
+/**
+ * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
+ * @tlb: the mmu_gather structure to initialize
+ * @mm: the mm_struct of the target address space
+ * @start: start of the region that will be removed from the page-table
+ * @end: end of the region that will be removed from the page-table
+ *
+ * Called to initialize an (on-stack) mmu_gather structure for page-table
+ * tear-down from @mm. The @start and @end are set to 0 and -1
+ * respectively when @mm is without users and we're going to destroy
+ * the full address space (exit/execve).
*/
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -438,7 +445,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
pgtable_t token = pmd_pgtable(*pmd);
pmd_clear(pmd);
pte_free_tlb(tlb, token, addr);
- atomic_long_dec(&tlb->mm->nr_ptes);
+ mm_dec_nr_ptes(tlb->mm);
}
static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -506,6 +513,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
pud = pud_offset(p4d, start);
p4d_clear(p4d);
pud_free_tlb(tlb, pud, start);
+ mm_dec_nr_puds(tlb->mm);
}
static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -665,7 +673,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
ptl = pmd_lock(mm, pmd);
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
- atomic_long_inc(&mm->nr_ptes);
+ mm_inc_nr_ptes(mm);
pmd_populate(mm, pmd, new);
new = NULL;
}
@@ -2554,7 +2562,11 @@ static int wp_page_copy(struct vm_fault *vmf)
put_page(new_page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+ /*
+ * No need to double call mmu_notifier->invalidate_range() callback as
+ * the above ptep_clear_flush_notify() did already call it.
+ */
+ mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);
if (old_page) {
/*
* Don't let another task, with possibly unlocked vma,
@@ -2787,8 +2799,37 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
}
/**
+ * unmap_mapping_pages() - Unmap pages from processes.
+ * @mapping: The address space containing pages to be unmapped.
+ * @start: Index of first page to be unmapped.
+ * @nr: Number of pages to be unmapped. 0 to unmap to end of file.
+ * @even_cows: Whether to unmap even private COWed pages.
+ *
+ * Unmap the pages in this address space from any userspace process which
+ * has them mmaped. Generally, you want to remove COWed pages as well when
+ * a file is being truncated, but not when invalidating pages from the page
+ * cache.
+ */
+void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
+ pgoff_t nr, bool even_cows)
+{
+ struct zap_details details = { };
+
+ details.check_mapping = even_cows ? NULL : mapping;
+ details.first_index = start;
+ details.last_index = start + nr - 1;
+ if (details.last_index < details.first_index)
+ details.last_index = ULONG_MAX;
+
+ i_mmap_lock_write(mapping);
+ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+ unmap_mapping_range_tree(&mapping->i_mmap, &details);
+ i_mmap_unlock_write(mapping);
+}
+
+/**
* unmap_mapping_range - unmap the portion of all mmaps in the specified
- * address_space corresponding to the specified page range in the underlying
+ * address_space corresponding to the specified byte range in the underlying
* file.
*
* @mapping: the address space containing mmaps to be unmapped.
@@ -2806,7 +2847,6 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows)
{
- struct zap_details details = { };
pgoff_t hba = holebegin >> PAGE_SHIFT;
pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -2818,16 +2858,7 @@ void unmap_mapping_range(struct address_space *mapping,
hlen = ULONG_MAX - hba + 1;
}
- details.check_mapping = even_cows ? NULL : mapping;
- details.first_index = hba;
- details.last_index = hba + hlen - 1;
- if (details.last_index < details.first_index)
- details.last_index = ULONG_MAX;
-
- i_mmap_lock_write(mapping);
- if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
- unmap_mapping_range_tree(&mapping->i_mmap, &details);
- i_mmap_unlock_write(mapping);
+ unmap_mapping_pages(mapping, hba, hlen, even_cows);
}
EXPORT_SYMBOL(unmap_mapping_range);
@@ -2842,7 +2873,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
int do_swap_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- struct page *page = NULL, *swapcache;
+ struct page *page = NULL, *swapcache = NULL;
struct mem_cgroup *memcg;
struct vma_swap_readahead swap_ra;
swp_entry_t entry;
@@ -2852,8 +2883,11 @@ int do_swap_page(struct vm_fault *vmf)
int ret = 0;
bool vma_readahead = swap_use_vma_readahead();
- if (vma_readahead)
+ if (vma_readahead) {
page = swap_readahead_detect(vmf, &swap_ra);
+ swapcache = page;
+ }
+
if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
if (page)
put_page(page);
@@ -2881,17 +2915,39 @@ int do_swap_page(struct vm_fault *vmf)
}
goto out;
}
+
+
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
- if (!page)
+ if (!page) {
page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
vmf->address);
+ swapcache = page;
+ }
+
if (!page) {
- if (vma_readahead)
- page = do_swap_page_readahead(entry,
- GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
- else
- page = swapin_readahead(entry,
- GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+ struct swap_info_struct *si = swp_swap_info(entry);
+
+ if (si->flags & SWP_SYNCHRONOUS_IO &&
+ __swap_count(si, entry) == 1) {
+ /* skip swapcache */
+ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+ if (page) {
+ __SetPageLocked(page);
+ __SetPageSwapBacked(page);
+ set_page_private(page, entry.val);
+ lru_cache_add_anon(page);
+ swap_readpage(page, true);
+ }
+ } else {
+ if (vma_readahead)
+ page = do_swap_page_readahead(entry,
+ GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
+ else
+ page = swapin_readahead(entry,
+ GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+ swapcache = page;
+ }
+
if (!page) {
/*
* Back out if somebody else faulted in this pte
@@ -2920,7 +2976,6 @@ int do_swap_page(struct vm_fault *vmf)
goto out_release;
}
- swapcache = page;
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2935,7 +2990,8 @@ int do_swap_page(struct vm_fault *vmf)
* test below, are not enough to exclude that. Even if it is still
* swapcache, we need to check that the page's swap has not changed.
*/
- if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
+ if (unlikely((!PageSwapCache(page) ||
+ page_private(page) != entry.val)) && swapcache)
goto out_page;
page = ksm_might_need_to_copy(page, vma, vmf->address);
@@ -2988,14 +3044,16 @@ int do_swap_page(struct vm_fault *vmf)
pte = pte_mksoft_dirty(pte);
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
vmf->orig_pte = pte;
- if (page == swapcache) {
- do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
- mem_cgroup_commit_charge(page, memcg, true, false);
- activate_page(page);
- } else { /* ksm created a completely new copy */
+
+ /* ksm created a completely new copy */
+ if (unlikely(page != swapcache && swapcache)) {
page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma);
+ } else {
+ do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
+ mem_cgroup_commit_charge(page, memcg, true, false);
+ activate_page(page);
}
swap_free(entry);
@@ -3003,7 +3061,7 @@ int do_swap_page(struct vm_fault *vmf)
(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
try_to_free_swap(page);
unlock_page(page);
- if (page != swapcache) {
+ if (page != swapcache && swapcache) {
/*
* Hold the lock to avoid the swap entry to be reused
* until we take the PT lock for the pte_same() check
@@ -3036,7 +3094,7 @@ out_page:
unlock_page(page);
out_release:
put_page(page);
- if (page != swapcache) {
+ if (page != swapcache && swapcache) {
unlock_page(swapcache);
put_page(swapcache);
}
@@ -3212,7 +3270,7 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
goto map_pte;
}
- atomic_long_inc(&vma->vm_mm->nr_ptes);
+ mm_inc_nr_ptes(vma->vm_mm);
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
spin_unlock(vmf->ptl);
vmf->prealloc_pte = NULL;
@@ -3271,7 +3329,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
* We are going to consume the prealloc table,
* count that as nr_ptes.
*/
- atomic_long_inc(&vma->vm_mm->nr_ptes);
+ mm_inc_nr_ptes(vma->vm_mm);
vmf->prealloc_pte = NULL;
}
@@ -3453,9 +3511,8 @@ static int fault_around_bytes_get(void *data, u64 *val)
}
/*
- * fault_around_pages() and fault_around_mask() expects fault_around_bytes
- * rounded down to nearest page order. It's what do_fault_around() expects to
- * see.
+ * fault_around_bytes must be rounded down to the nearest page order as it's
+ * what do_fault_around() expects to see.
*/
static int fault_around_bytes_set(void *data, u64 val)
{
@@ -3498,13 +3555,14 @@ late_initcall(fault_around_debugfs);
* This function doesn't cross the VMA boundaries, in order to call map_pages()
* only once.
*
- * fault_around_pages() defines how many pages we'll try to map.
- * do_fault_around() expects it to return a power of two less than or equal to
- * PTRS_PER_PTE.
+ * fault_around_bytes defines how many bytes we'll try to map.
+ * do_fault_around() expects it to be set to a power of two less than or equal
+ * to PTRS_PER_PTE.
*
- * The virtual address of the area that we map is naturally aligned to the
- * fault_around_pages() value (and therefore to page order). This way it's
- * easier to guarantee that we don't cross page table boundaries.
+ * The virtual address of the area that we map is naturally aligned to
+ * fault_around_bytes rounded down to the machine page size
+ * (and therefore to page order). This way it's easier to guarantee
+ * that we don't cross page table boundaries.
*/
static int do_fault_around(struct vm_fault *vmf)
{
@@ -3521,8 +3579,8 @@ static int do_fault_around(struct vm_fault *vmf)
start_pgoff -= off;
/*
- * end_pgoff is either end of page table or end of vma
- * or fault_around_pages() from start_pgoff, depending what is nearest.
+ * end_pgoff is either the end of the page table, the end of
+ * the vma or nr_pages from start_pgoff, depending what is nearest.
*/
end_pgoff = start_pgoff -
((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
@@ -3805,7 +3863,8 @@ static inline int create_huge_pmd(struct vm_fault *vmf)
return VM_FAULT_FALLBACK;
}
-static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
+/* `inline' is required to avoid gcc 4.1.2 build error */
+static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
{
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(vmf, orig_pmd);
@@ -3891,9 +3950,9 @@ static int handle_pte_fault(struct vm_fault *vmf)
/*
* some architectures can have larger ptes than wordsize,
* e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and
- * CONFIG_32BIT=y, so READ_ONCE or ACCESS_ONCE cannot guarantee
- * atomic accesses. The code below just needs a consistent
- * view for the ifs and we later double check anyway with the
+ * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic
+ * accesses. The code below just needs a consistent view
+ * for the ifs and we later double check anyway with the
* ptl lock held. So here a barrier will do.
*/
barrier();
@@ -4124,15 +4183,17 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
spin_lock(&mm->page_table_lock);
#ifndef __ARCH_HAS_5LEVEL_HACK
- if (p4d_present(*p4d)) /* Another has populated it */
- pud_free(mm, new);
- else
+ if (!p4d_present(*p4d)) {
+ mm_inc_nr_puds(mm);
p4d_populate(mm, p4d, new);
-#else
- if (pgd_present(*p4d)) /* Another has populated it */
+ } else /* Another has populated it */
pud_free(mm, new);
- else
+#else
+ if (!pgd_present(*p4d)) {
+ mm_inc_nr_puds(mm);
pgd_populate(mm, p4d, new);
+ } else /* Another has populated it */
+ pud_free(mm, new);
#endif /* __ARCH_HAS_5LEVEL_HACK */
spin_unlock(&mm->page_table_lock);
return 0;
@@ -4457,17 +4518,15 @@ void print_vma_addr(char *prefix, unsigned long ip)
struct vm_area_struct *vma;
/*
- * Do not print if we are in atomic
- * contexts (in exception stacks, etc.):
+ * we might be running from an atomic context so we cannot sleep
*/
- if (preempt_count())
+ if (!down_read_trylock(&mm->mmap_sem))
return;
- down_read(&mm->mmap_sem);
vma = find_vma(mm, ip);
if (vma && vma->vm_file) {
struct file *f = vma->vm_file;
- char *buf = (char *)__get_free_page(GFP_KERNEL);
+ char *buf = (char *)__get_free_page(GFP_NOWAIT);
if (buf) {
char *p;