sched/fair: Don't free p->numa_faults with concurrent readers

commit 16d51a590a8ce3befb1308e0e7ab77f3b661af33 upstream. When going through execve(), zero out the NUMA fault statistics instead of freeing them. During execve, the task is reachable through procfs and the scheduler. A concurrent /proc/*/sched reader can read data from a freed ->numa_faults allocation (confirmed by KASAN) and write it back to userspace. I believe that it would also be possible for a use-after-free read to occur through a race between a NUMA fault and execve(): task_numa_fault() can lead to task_numa_compare(), which invokes task_weight() on the currently running task of a different CPU. Another way to fix this would be to make ->numa_faults RCU-managed or add extra locking, but it seems easier to wipe the NUMA fault statistics on execve. Signed-off-by: Jann Horn <jannh@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Petr Mladek <pmladek@suse.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will@kernel.org> Fixes: 82727018b0d3 ("sched/numa: Call task_numa_free() from do_execve()") Link: https://lkml.kernel.org/r/20190716152047.14424-1-jannh@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Jann Horn 2019-07-16 17:20:45 +0200
committer: Greg Kroah-Hartman 2019-08-04 09:30:56 +0200
commit: 48046e092ad557a01d7daf53205624944793b19d (patch)
tree: 56c87ea911ed99c4e42161ab98a8f60d03f8fe2c /kernel
parent: vhost: scsi: add weight support (diff)
download: kernel-qcow2-linux-48046e092ad557a01d7daf53205624944793b19d.tar.gz
kernel-qcow2-linux-48046e092ad557a01d7daf53205624944793b19d.tar.xz
kernel-qcow2-linux-48046e092ad557a01d7daf53205624944793b19d.zip
2 files changed, 21 insertions, 5 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 69874db3fba8..e76ce81c9c75 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -679,7 +679,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(tsk == current);
 
 	cgroup_free(tsk);
-	task_numa_free(tsk);
+	task_numa_free(tsk, true);
 	security_task_free(tsk);
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4a433608ba74..34b998678b97 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2345,13 +2345,23 @@ no_join:
 	return;
 }
 
-void task_numa_free(struct task_struct *p)
+/*
+ * Get rid of NUMA staticstics associated with a task (either current or dead).
+ * If @final is set, the task is dead and has reached refcount zero, so we can
+ * safely free all relevant data structures. Otherwise, there might be
+ * concurrent reads from places like load balancing and procfs, and we should
+ * reset the data back to default state without freeing ->numa_faults.
+ */
+void task_numa_free(struct task_struct *p, bool final)
 {
 	struct numa_group *grp = p->numa_group;
-	void *numa_faults = p->numa_faults;
+	unsigned long *numa_faults = p->numa_faults;
 	unsigned long flags;
 	int i;
 
+	if (!numa_faults)
+		return;
+
 	if (grp) {
 		spin_lock_irqsave(&grp->lock, flags);
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
@@ -2364,8 +2374,14 @@ void task_numa_free(struct task_struct *p)
 		put_numa_group(grp);
 	}
 
-	p->numa_faults = NULL;
-	kfree(numa_faults);
+	if (final) {
+		p->numa_faults = NULL;
+		kfree(numa_faults);
+	} else {
+		p->total_numa_faults = 0;
+		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
+			numa_faults[i] = 0;
+	}
 }
 
 /*
author	Jann Horn	2019-07-16 17:20:45 +0200
committer	Greg Kroah-Hartman	2019-08-04 09:30:56 +0200
commit	48046e092ad557a01d7daf53205624944793b19d (patch)
tree	56c87ea911ed99c4e42161ab98a8f60d03f8fe2c /kernel
parent	vhost: scsi: add weight support (diff)
download	kernel-qcow2-linux-48046e092ad557a01d7daf53205624944793b19d.tar.gz kernel-qcow2-linux-48046e092ad557a01d7daf53205624944793b19d.tar.xz kernel-qcow2-linux-48046e092ad557a01d7daf53205624944793b19d.zip