diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 165 |
1 files changed, 151 insertions, 14 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index bb53285a1d99..a26649a6633f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -34,6 +34,8 @@ #include <linux/uio.h> #include <linux/khugepaged.h> +#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */ + static struct vfsmount *shm_mnt; #ifdef CONFIG_SHMEM @@ -70,6 +72,8 @@ static struct vfsmount *shm_mnt; #include <linux/syscalls.h> #include <linux/fcntl.h> #include <uapi/linux/memfd.h> +#include <linux/userfaultfd_k.h> +#include <linux/rmap.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -115,13 +119,14 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index); static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, - gfp_t gfp, struct mm_struct *fault_mm, int *fault_type); + gfp_t gfp, struct vm_area_struct *vma, + struct vm_fault *vmf, int *fault_type); int shmem_getpage(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp) { return shmem_getpage_gfp(inode, index, pagep, sgp, - mapping_gfp_mask(inode->i_mapping), NULL, NULL); + mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL); } static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) @@ -190,6 +195,11 @@ static const struct inode_operations shmem_special_inode_operations; static const struct vm_operations_struct shmem_vm_ops; static struct file_system_type shmem_fs_type; +bool vma_is_shmem(struct vm_area_struct *vma) +{ + return vma->vm_ops == &shmem_vm_ops; +} + static LIST_HEAD(shmem_swaplist); static DEFINE_MUTEX(shmem_swaplist_mutex); @@ -415,6 +425,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, struct shrink_control *sc, unsigned long nr_to_split) { LIST_HEAD(list), *pos, *next; + LIST_HEAD(to_remove); struct inode *inode; struct shmem_inode_info *info; struct page *page; @@ -441,9 +452,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, /* Check if there's anything to gain */ if (round_up(inode->i_size, PAGE_SIZE) == round_up(inode->i_size, HPAGE_PMD_SIZE)) { - list_del_init(&info->shrinklist); + list_move(&info->shrinklist, &to_remove); removed++; - iput(inode); goto next; } @@ -454,6 +464,13 @@ next: } spin_unlock(&sbinfo->shrinklist_lock); + list_for_each_safe(pos, next, &to_remove) { + info = list_entry(pos, struct shmem_inode_info, shrinklist); + inode = &info->vfs_inode; + list_del_init(&info->shrinklist); + iput(inode); + } + list_for_each_safe(pos, next, &list) { int ret; @@ -1563,7 +1580,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, */ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, - struct mm_struct *fault_mm, int *fault_type) + struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); @@ -1617,7 +1634,7 @@ repeat: * bring it back from swap or allocate. */ sbinfo = SHMEM_SB(inode->i_sb); - charge_mm = fault_mm ? : current->mm; + charge_mm = vma ? vma->vm_mm : current->mm; if (swap.val) { /* Look it up and read it in.. */ @@ -1627,7 +1644,8 @@ repeat: if (fault_type) { *fault_type |= VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT); + mem_cgroup_count_vm_event(charge_mm, + PGMAJFAULT); } /* Here we actually start the io */ page = shmem_swapin(swap, gfp, info, index); @@ -1696,6 +1714,11 @@ repeat: swap_free(swap); } else { + if (vma && userfaultfd_missing(vma)) { + *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); + return 0; + } + /* shmem_symlink() */ if (mapping->a_ops != &shmem_aops) goto alloc_nohuge; @@ -1885,8 +1908,9 @@ static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync return ret; } -static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int shmem_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); gfp_t gfp = mapping_gfp_mask(inode->i_mapping); enum sgp_type sgp; @@ -1958,7 +1982,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) sgp = SGP_NOHUGE; error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp, - gfp, vma->vm_mm, &ret); + gfp, vma, vmf, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); return ret; @@ -2168,10 +2192,123 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode bool shmem_mapping(struct address_space *mapping) { - if (!mapping->host) - return false; + return mapping->a_ops == &shmem_aops; +} - return mapping->host->i_sb->s_op == &shmem_ops; +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep) +{ + struct inode *inode = file_inode(dst_vma->vm_file); + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + gfp_t gfp = mapping_gfp_mask(mapping); + pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); + struct mem_cgroup *memcg; + spinlock_t *ptl; + void *page_kaddr; + struct page *page; + pte_t _dst_pte, *dst_pte; + int ret; + + ret = -ENOMEM; + if (shmem_acct_block(info->flags, 1)) + goto out; + if (sbinfo->max_blocks) { + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks) >= 0) + goto out_unacct_blocks; + percpu_counter_inc(&sbinfo->used_blocks); + } + + if (!*pagep) { + page = shmem_alloc_page(gfp, info, pgoff); + if (!page) + goto out_dec_used_blocks; + + page_kaddr = kmap_atomic(page); + ret = copy_from_user(page_kaddr, (const void __user *)src_addr, + PAGE_SIZE); + kunmap_atomic(page_kaddr); + + /* fallback to copy_from_user outside mmap_sem */ + if (unlikely(ret)) { + *pagep = page; + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -1); + shmem_unacct_blocks(info->flags, 1); + /* don't free the page */ + return -EFAULT; + } + } else { + page = *pagep; + *pagep = NULL; + } + + VM_BUG_ON(PageLocked(page) || PageSwapBacked(page)); + __SetPageLocked(page); + __SetPageSwapBacked(page); + __SetPageUptodate(page); + + ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false); + if (ret) + goto out_release; + + ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); + if (!ret) { + ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL); + radix_tree_preload_end(); + } + if (ret) + goto out_release_uncharge; + + mem_cgroup_commit_charge(page, memcg, false, false); + + _dst_pte = mk_pte(page, dst_vma->vm_page_prot); + if (dst_vma->vm_flags & VM_WRITE) + _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); + + ret = -EEXIST; + dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + if (!pte_none(*dst_pte)) + goto out_release_uncharge_unlock; + + lru_cache_add_anon(page); + + spin_lock(&info->lock); + info->alloced++; + inode->i_blocks += BLOCKS_PER_PAGE; + shmem_recalc_inode(inode); + spin_unlock(&info->lock); + + inc_mm_counter(dst_mm, mm_counter_file(page)); + page_add_file_rmap(page, false); + set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(dst_vma, dst_addr, dst_pte); + unlock_page(page); + pte_unmap_unlock(dst_pte, ptl); + ret = 0; +out: + return ret; +out_release_uncharge_unlock: + pte_unmap_unlock(dst_pte, ptl); +out_release_uncharge: + mem_cgroup_cancel_charge(page, memcg, false); +out_release: + unlock_page(page); + put_page(page); +out_dec_used_blocks: + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -1); +out_unacct_blocks: + shmem_unacct_blocks(info->flags, 1); + goto out; } #ifdef CONFIG_TMPFS @@ -2194,7 +2331,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; /* i_mutex is held by caller */ - if (unlikely(info->seals)) { + if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) { if (info->seals & F_SEAL_WRITE) return -EPERM; if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) @@ -4133,7 +4270,7 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, BUG_ON(mapping->a_ops != &shmem_aops); error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, - gfp, NULL, NULL); + gfp, NULL, NULL, NULL); if (error) page = ERR_PTR(error); else |