summaryrefslogtreecommitdiffstats
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c109
1 files changed, 38 insertions, 71 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1c713fd5b3e6..87a13a7c8270 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -294,10 +294,13 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
* pmd_trans_unstable) of the pmd.
*/
_pmd = READ_ONCE(*pmd);
- if (!pmd_present(_pmd))
+ if (pmd_none(_pmd))
goto out;
ret = false;
+ if (!pmd_present(_pmd))
+ goto out;
+
if (pmd_trans_huge(_pmd))
goto out;
@@ -381,7 +384,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* in __get_user_pages if userfaultfd_release waits on the
* caller of handle_userfault to release the mmap_sem.
*/
- if (unlikely(ACCESS_ONCE(ctx->released))) {
+ if (unlikely(READ_ONCE(ctx->released))) {
/*
* Don't return VM_FAULT_SIGBUS in this case, so a non
* cooperative manager can close the uffd after the
@@ -477,7 +480,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
vmf->flags, reason);
up_read(&mm->mmap_sem);
- if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
+ if (likely(must_wait && !READ_ONCE(ctx->released) &&
(return_to_userland ? !signal_pending(current) :
!fatal_signal_pending(current)))) {
wake_up_poll(&ctx->fd_wqh, POLLIN);
@@ -570,11 +573,14 @@ out:
static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
struct userfaultfd_wait_queue *ewq)
{
+ struct userfaultfd_ctx *release_new_ctx;
+
if (WARN_ON_ONCE(current->flags & PF_EXITING))
goto out;
ewq->ctx = ctx;
init_waitqueue_entry(&ewq->wq, current);
+ release_new_ctx = NULL;
spin_lock(&ctx->event_wqh.lock);
/*
@@ -586,7 +592,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
set_current_state(TASK_KILLABLE);
if (ewq->msg.event == 0)
break;
- if (ACCESS_ONCE(ctx->released) ||
+ if (READ_ONCE(ctx->released) ||
fatal_signal_pending(current)) {
/*
* &ewq->wq may be queued in fork_event, but
@@ -601,8 +607,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
new = (struct userfaultfd_ctx *)
(unsigned long)
ewq->msg.arg.reserved.reserved1;
-
- userfaultfd_ctx_put(new);
+ release_new_ctx = new;
}
break;
}
@@ -617,6 +622,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
__set_current_state(TASK_RUNNING);
spin_unlock(&ctx->event_wqh.lock);
+ if (release_new_ctx) {
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = release_new_ctx->mm;
+
+ /* the various vma->vm_userfaultfd_ctx still points to it */
+ down_write(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
+ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+ up_write(&mm->mmap_sem);
+
+ userfaultfd_ctx_put(release_new_ctx);
+ }
+
/*
* ctx may go away after this if the userfault pseudo fd is
* already released.
@@ -668,7 +687,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
ctx->features = octx->features;
ctx->released = false;
ctx->mm = vma->vm_mm;
- atomic_inc(&ctx->mm->mm_count);
+ mmgrab(ctx->mm);
userfaultfd_ctx_get(octx);
fctx->orig = octx;
@@ -833,7 +852,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
struct userfaultfd_wake_range range = { .len = 0, };
unsigned long new_flags;
- ACCESS_ONCE(ctx->released) = true;
+ WRITE_ONCE(ctx->released, true);
if (!mmget_not_zero(mm))
goto wakeup;
@@ -921,10 +940,10 @@ static inline struct userfaultfd_wait_queue *find_userfault_evt(
return find_userfault_in(&ctx->event_wqh);
}
-static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
+static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
{
struct userfaultfd_ctx *ctx = file->private_data;
- unsigned int ret;
+ __poll_t ret;
poll_wait(file, &ctx->fd_wqh, wait);
@@ -969,24 +988,14 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *ctx,
struct uffd_msg *msg)
{
int fd;
- struct file *file;
- unsigned int flags = new->flags & UFFD_SHARED_FCNTL_FLAGS;
- fd = get_unused_fd_flags(flags);
+ fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, new,
+ O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS));
if (fd < 0)
return fd;
- file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, new,
- O_RDWR | flags);
- if (IS_ERR(file)) {
- put_unused_fd(fd);
- return PTR_ERR(file);
- }
-
- fd_install(fd, file);
msg->arg.reserved.reserved1 = 0;
msg->arg.fork.ufd = fd;
-
return 0;
}
@@ -1868,24 +1877,10 @@ static void init_once_userfaultfd_ctx(void *mem)
seqcount_init(&ctx->refile_seq);
}
-/**
- * userfaultfd_file_create - Creates a userfaultfd file pointer.
- * @flags: Flags for the userfaultfd file.
- *
- * This function creates a userfaultfd file pointer, w/out installing
- * it into the fd table. This is useful when the userfaultfd file is
- * used during the initialization of data structures that require
- * extra setup after the userfaultfd creation. So the userfaultfd
- * creation is split into the file pointer creation phase, and the
- * file descriptor installation phase. In this way races with
- * userspace closing the newly installed file descriptor can be
- * avoided. Returns a userfaultfd file pointer, or a proper error
- * pointer.
- */
-static struct file *userfaultfd_file_create(int flags)
+SYSCALL_DEFINE1(userfaultfd, int, flags)
{
- struct file *file;
struct userfaultfd_ctx *ctx;
+ int fd;
BUG_ON(!current->mm);
@@ -1893,14 +1888,12 @@ static struct file *userfaultfd_file_create(int flags)
BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK);
- file = ERR_PTR(-EINVAL);
if (flags & ~UFFD_SHARED_FCNTL_FLAGS)
- goto out;
+ return -EINVAL;
- file = ERR_PTR(-ENOMEM);
ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL);
if (!ctx)
- goto out;
+ return -ENOMEM;
atomic_set(&ctx->refcount, 1);
ctx->flags = flags;
@@ -1911,39 +1904,13 @@ static struct file *userfaultfd_file_create(int flags)
/* prevent the mm struct to be freed */
mmgrab(ctx->mm);
- file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
- if (IS_ERR(file)) {
+ fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
+ O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
+ if (fd < 0) {
mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
}
-out:
- return file;
-}
-
-SYSCALL_DEFINE1(userfaultfd, int, flags)
-{
- int fd, error;
- struct file *file;
-
- error = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS);
- if (error < 0)
- return error;
- fd = error;
-
- file = userfaultfd_file_create(flags);
- if (IS_ERR(file)) {
- error = PTR_ERR(file);
- goto err_put_unused_fd;
- }
- fd_install(fd, file);
-
return fd;
-
-err_put_unused_fd:
- put_unused_fd(fd);
-
- return error;
}
static int __init userfaultfd_init(void)