diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 207 |
1 files changed, 137 insertions, 70 deletions
@@ -848,7 +848,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, - int *locked, bool notify_drop, + int *locked, unsigned int flags) { long ret, pages_done; @@ -922,7 +922,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, pages++; start += PAGE_SIZE; } - if (notify_drop && lock_dropped && *locked) { + if (lock_dropped && *locked) { /* * We must let the caller know we temporarily dropped the lock * and so the critical section protected by it was lost. @@ -959,36 +959,12 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int *locked) { return __get_user_pages_locked(current, current->mm, start, nr_pages, - pages, NULL, locked, true, + pages, NULL, locked, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_locked); /* - * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows for - * tsk, mm to be specified. - * - * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the - * caller if required (just like with __get_user_pages). "FOLL_GET" - * is set implicitly if "pages" is non-NULL. - */ -static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, - struct mm_struct *mm, unsigned long start, - unsigned long nr_pages, struct page **pages, - unsigned int gup_flags) -{ - long ret; - int locked = 1; - - down_read(&mm->mmap_sem); - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, - &locked, false, gup_flags); - if (locked) - up_read(&mm->mmap_sem); - return ret; -} - -/* * get_user_pages_unlocked() is suitable to replace the form: * * down_read(&mm->mmap_sem); @@ -1006,8 +982,16 @@ static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags) { - return __get_user_pages_unlocked(current, current->mm, start, nr_pages, - pages, gup_flags | FOLL_TOUCH); + struct mm_struct *mm = current->mm; + int locked = 1; + long ret; + + down_read(&mm->mmap_sem); + ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL, + &locked, gup_flags | FOLL_TOUCH); + if (locked) + up_read(&mm->mmap_sem); + return ret; } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -1073,7 +1057,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct **vmas, int *locked) { return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, - locked, true, + locked, gup_flags | FOLL_TOUCH | FOLL_REMOTE); } EXPORT_SYMBOL(get_user_pages_remote); @@ -1090,11 +1074,75 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, struct vm_area_struct **vmas) { return __get_user_pages_locked(current, current->mm, start, nr_pages, - pages, vmas, NULL, false, + pages, vmas, NULL, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_FS_DAX +/* + * This is the same as get_user_pages() in that it assumes we are + * operating on the current task's mm, but it goes further to validate + * that the vmas associated with the address range are suitable for + * longterm elevated page reference counts. For example, filesystem-dax + * mappings are subject to the lifetime enforced by the filesystem and + * we need guarantees that longterm users like RDMA and V4L2 only + * establish mappings that have a kernel enforced revocation mechanism. + * + * "longterm" == userspace controlled elevated page count lifetime. + * Contrast this to iov_iter_get_pages() usages which are transient. + */ +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas_arg) +{ + struct vm_area_struct **vmas = vmas_arg; + struct vm_area_struct *vma_prev = NULL; + long rc, i; + + if (!pages) + return -EINVAL; + + if (!vmas) { + vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!vmas) + return -ENOMEM; + } + + rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas); + + for (i = 0; i < rc; i++) { + struct vm_area_struct *vma = vmas[i]; + + if (vma == vma_prev) + continue; + + vma_prev = vma; + + if (vma_is_fsdax(vma)) + break; + } + + /* + * Either get_user_pages() failed, or the vma validation + * succeeded, in either case we don't need to put_page() before + * returning. + */ + if (i >= rc) + goto out; + + for (i = 0; i < rc; i++) + put_page(pages[i]); + rc = -EOPNOTSUPP; +out: + if (vmas != vmas_arg) + kfree(vmas); + return rc; +} +EXPORT_SYMBOL(get_user_pages_longterm); +#endif /* CONFIG_FS_DAX */ + /** * populate_vma_page_range() - populate a range of pages in the vma. * @vma: target vma @@ -1643,6 +1691,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +static void gup_pgd_range(unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pgd_t *pgdp; + + pgdp = pgd_offset(current->mm, addr); + do { + pgd_t pgd = READ_ONCE(*pgdp); + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + return; + if (unlikely(pgd_huge(pgd))) { + if (!gup_huge_pgd(pgd, pgdp, addr, next, write, + pages, nr)) + return; + } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, + PGDIR_SHIFT, next, write, pages, nr)) + return; + } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr)) + return; + } while (pgdp++, addr = next, addr != end); +} + +#ifndef gup_fast_permitted +/* + * Check if it's allowed to use __get_user_pages_fast() for the range, or + * we need to fall back to the slow version: + */ +bool gup_fast_permitted(unsigned long start, int nr_pages, int write) +{ + unsigned long len, end; + + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + return end >= start; +} +#endif + /* * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. It will only return non-negative values. @@ -1650,10 +1739,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - struct mm_struct *mm = current->mm; unsigned long addr, len, end; - unsigned long next, flags; - pgd_t *pgdp; + unsigned long flags; int nr = 0; start &= PAGE_MASK; @@ -1677,45 +1764,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * block IPIs that come from THPs splitting. */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = READ_ONCE(*pgdp); - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (unlikely(pgd_huge(pgd))) { - if (!gup_huge_pgd(pgd, pgdp, addr, next, write, - pages, &nr)) - break; - } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { - if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, - PGDIR_SHIFT, next, write, pages, &nr)) - break; - } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); + if (gup_fast_permitted(start, nr_pages, write)) { + local_irq_save(flags); + gup_pgd_range(addr, end, write, pages, &nr); + local_irq_restore(flags); + } return nr; } -#ifndef gup_fast_permitted -/* - * Check if it's allowed to use __get_user_pages_fast() for the range, or - * we need to fall back to the slow version: - */ -bool gup_fast_permitted(unsigned long start, int nr_pages, int write) -{ - unsigned long len, end; - - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - return end >= start; -} -#endif - /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address @@ -1735,12 +1792,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write) int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { + unsigned long addr, len, end; int nr = 0, ret = 0; start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; if (gup_fast_permitted(start, nr_pages, write)) { - nr = __get_user_pages_fast(start, nr_pages, write, pages); + local_irq_disable(); + gup_pgd_range(addr, end, write, pages, &nr); + local_irq_enable(); ret = nr; } |