summaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c98
1 files changed, 71 insertions, 27 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 3c31e874afad..4133876d8cd2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+ struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
static inline void free_thread_info(struct thread_info *ti)
{
- free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
@@ -823,6 +823,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
mm->pmd_huge_pte = NULL;
#endif
+#ifdef CONFIG_NUMA_BALANCING
+ mm->first_nid = NUMA_PTE_SCAN_INIT;
+#endif
if (!mm_init(mm, tsk))
goto fail_nomem;
@@ -1041,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
atomic_set(&sig->live, 1);
atomic_set(&sig->sigcnt, 1);
init_waitqueue_head(&sig->wait_chldexit);
- if (clone_flags & CLONE_NEWPID)
- sig->flags |= SIGNAL_UNKILLABLE;
sig->curr_target = tsk;
init_sigpending(&sig->shared_pending);
INIT_LIST_HEAD(&sig->posix_timers);
@@ -1165,6 +1166,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
current->signal->flags & SIGNAL_UNKILLABLE)
return ERR_PTR(-EINVAL);
+ /*
+ * If the new process will be in a different pid namespace
+ * don't allow the creation of threads.
+ */
+ if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
+ (task_active_pid_ns(current) != current->nsproxy->pid_ns))
+ return ERR_PTR(-EINVAL);
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
@@ -1224,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+ seqlock_init(&p->vtime_seqlock);
+ p->vtime_snap = 0;
+ p->vtime_snap_whence = VTIME_SLEEPING;
+#endif
+
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
@@ -1435,8 +1450,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
if (thread_group_leader(p)) {
- if (is_child_reaper(pid))
- p->nsproxy->pid_ns->child_reaper = p;
+ if (is_child_reaper(pid)) {
+ ns_of_pid(pid)->child_reaper = p;
+ p->signal->flags |= SIGNAL_UNKILLABLE;
+ }
p->signal->leader_pid = pid;
p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1470,8 +1487,6 @@ bad_fork_cleanup_io:
if (p->io_context)
exit_io_context(p);
bad_fork_cleanup_namespaces:
- if (unlikely(clone_flags & CLONE_NEWPID))
- pid_ns_release_proc(p->nsproxy->pid_ns);
exit_task_namespaces(p);
bad_fork_cleanup_mm:
if (p->mm)
@@ -1551,15 +1566,9 @@ long do_fork(unsigned long clone_flags,
* Do some preliminary argument and permissions checking before we
* actually start allocating stuff
*/
- if (clone_flags & CLONE_NEWUSER) {
- if (clone_flags & CLONE_THREAD)
+ if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
+ if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
return -EINVAL;
- /* hopefully this check will go away when userns support is
- * complete
- */
- if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
- !capable(CAP_SETGID))
- return -EPERM;
}
/*
@@ -1618,7 +1627,6 @@ long do_fork(unsigned long clone_flags,
return nr;
}
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
/*
* Create a kernel thread.
*/
@@ -1627,7 +1635,6 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL);
}
-#endif
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
@@ -1667,8 +1674,10 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int, tls_val)
#endif
{
- return do_fork(clone_flags, newsp, 0,
- parent_tidptr, child_tidptr);
+ long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
+ asmlinkage_protect(5, ret, clone_flags, newsp,
+ parent_tidptr, child_tidptr, tls_val);
+ return ret;
}
#endif
@@ -1721,7 +1730,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
{
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
- CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
+ CLONE_NEWUSER|CLONE_NEWPID))
return -EINVAL;
/*
* Not implemented, but pretend it works if there is nothing to
@@ -1788,19 +1798,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
{
struct fs_struct *fs, *new_fs = NULL;
struct files_struct *fd, *new_fd = NULL;
+ struct cred *new_cred = NULL;
struct nsproxy *new_nsproxy = NULL;
int do_sysvsem = 0;
int err;
- err = check_unshare_flags(unshare_flags);
- if (err)
- goto bad_unshare_out;
-
+ /*
+ * If unsharing a user namespace must also unshare the thread.
+ */
+ if (unshare_flags & CLONE_NEWUSER)
+ unshare_flags |= CLONE_THREAD;
+ /*
+ * If unsharing a pid namespace must also unshare the thread.
+ */
+ if (unshare_flags & CLONE_NEWPID)
+ unshare_flags |= CLONE_THREAD;
+ /*
+ * If unsharing a thread from a thread group, must also unshare vm.
+ */
+ if (unshare_flags & CLONE_THREAD)
+ unshare_flags |= CLONE_VM;
+ /*
+ * If unsharing vm, must also unshare signal handlers.
+ */
+ if (unshare_flags & CLONE_VM)
+ unshare_flags |= CLONE_SIGHAND;
/*
* If unsharing namespace, must also unshare filesystem information.
*/
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+
+ err = check_unshare_flags(unshare_flags);
+ if (err)
+ goto bad_unshare_out;
/*
* CLONE_NEWIPC must also detach from the undolist: after switching
* to a new ipc namespace, the semaphore arrays from the old
@@ -1814,11 +1845,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
err = unshare_fd(unshare_flags, &new_fd);
if (err)
goto bad_unshare_cleanup_fs;
- err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+ err = unshare_userns(unshare_flags, &new_cred);
if (err)
goto bad_unshare_cleanup_fd;
+ err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
+ new_cred, new_fs);
+ if (err)
+ goto bad_unshare_cleanup_cred;
- if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
+ if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
if (do_sysvsem) {
/*
* CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1851,11 +1886,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
task_unlock(current);
+
+ if (new_cred) {
+ /* Install the new user namespace */
+ commit_creds(new_cred);
+ new_cred = NULL;
+ }
}
if (new_nsproxy)
put_nsproxy(new_nsproxy);
+bad_unshare_cleanup_cred:
+ if (new_cred)
+ put_cred(new_cred);
bad_unshare_cleanup_fd:
if (new_fd)
put_files_struct(new_fd);