diff options
author | Mathieu Desnoyers | 2018-01-29 21:20:17 +0100 |
---|---|---|
committer | Ingo Molnar | 2018-02-05 21:35:03 +0100 |
commit | 70216e18e519a54a2f13569e8caff99a092a92d6 (patch) | |
tree | 000536088ca3d1e7869e28b56bb5ad2ac6a49740 /kernel/sched | |
parent | lockin/x86: Implement sync_core_before_usermode() (diff) | |
download | kernel-qcow2-linux-70216e18e519a54a2f13569e8caff99a092a92d6.tar.gz kernel-qcow2-linux-70216e18e519a54a2f13569e8caff99a092a92d6.tar.xz kernel-qcow2-linux-70216e18e519a54a2f13569e8caff99a092a92d6.zip |
membarrier: Provide core serializing command, *_SYNC_CORE
Provide core serializing membarrier command to support memory reclaim
by JIT.
Each architecture needs to explicitly opt into that support by
documenting in their architecture code how they provide the core
serializing instructions required when returning from the membarrier
IPI, and after the scheduler has updated the curr->mm pointer (before
going back to user-space). They should then select
ARCH_HAS_MEMBARRIER_SYNC_CORE to enable support for that command on
their architecture.
Architectures selecting this feature need to either document that
they issue core serializing instructions when returning to user-space,
or implement their architecture-specific sync_core_before_usermode().
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Link: http://lkml.kernel.org/r/20180129202020.8515-9-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 18 | ||||
-rw-r--r-- | kernel/sched/membarrier.c | 53 |
2 files changed, 54 insertions, 17 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 11bf4d48d2d3..ee420d78e674 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2704,13 +2704,21 @@ static struct rq *finish_task_switch(struct task_struct *prev) fire_sched_in_preempt_notifiers(current); /* - * When transitioning from a kernel thread to a userspace - * thread, mmdrop()'s implicit full barrier is required by the - * membarrier system call, because the current ->active_mm can - * become the current mm without going through switch_mm(). + * When switching through a kernel thread, the loop in + * membarrier_{private,global}_expedited() may have observed that + * kernel thread and not issued an IPI. It is therefore possible to + * schedule between user->kernel->user threads without passing though + * switch_mm(). Membarrier requires a barrier after storing to + * rq->curr, before returning to userspace, so provide them here: + * + * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly + * provided by mmdrop(), + * - a sync_core for SYNC_CORE. */ - if (mm) + if (mm) { + membarrier_mm_sync_core_before_usermode(mm); mmdrop(mm); + } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index d2087d5f9837..5d0762633639 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -26,11 +26,20 @@ * Bitmask made from a "or" of all commands within enum membarrier_cmd, * except MEMBARRIER_CMD_QUERY. */ +#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE +#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ + (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \ + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE) +#else +#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0 +#endif + #define MEMBARRIER_CMD_BITMASK \ (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \ | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ - | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ + | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK) static void ipi_mb(void *info) { @@ -104,15 +113,23 @@ static int membarrier_global_expedited(void) return 0; } -static int membarrier_private_expedited(void) +static int membarrier_private_expedited(int flags) { int cpu; bool fallback = false; cpumask_var_t tmpmask; - if (!(atomic_read(¤t->mm->membarrier_state) - & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) - return -EPERM; + if (flags & MEMBARRIER_FLAG_SYNC_CORE) { + if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE)) + return -EINVAL; + if (!(atomic_read(¤t->mm->membarrier_state) & + MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY)) + return -EPERM; + } else { + if (!(atomic_read(¤t->mm->membarrier_state) & + MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) + return -EPERM; + } if (num_online_cpus() == 1) return 0; @@ -205,20 +222,29 @@ static int membarrier_register_global_expedited(void) return 0; } -static int membarrier_register_private_expedited(void) +static int membarrier_register_private_expedited(int flags) { struct task_struct *p = current; struct mm_struct *mm = p->mm; + int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY; + + if (flags & MEMBARRIER_FLAG_SYNC_CORE) { + if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE)) + return -EINVAL; + state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY; + } /* * We need to consider threads belonging to different thread * groups, which use the same mm. (CLONE_VM but not * CLONE_THREAD). */ - if (atomic_read(&mm->membarrier_state) - & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) + if (atomic_read(&mm->membarrier_state) & state) return 0; atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state); + if (flags & MEMBARRIER_FLAG_SYNC_CORE) + atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE, + &mm->membarrier_state); if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) { /* * Ensure all future scheduler executions will observe the @@ -226,8 +252,7 @@ static int membarrier_register_private_expedited(void) */ synchronize_sched(); } - atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, - &mm->membarrier_state); + atomic_or(state, &mm->membarrier_state); return 0; } @@ -283,9 +308,13 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED: return membarrier_register_global_expedited(); case MEMBARRIER_CMD_PRIVATE_EXPEDITED: - return membarrier_private_expedited(); + return membarrier_private_expedited(0); case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: - return membarrier_register_private_expedited(); + return membarrier_register_private_expedited(0); + case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE: + return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE); + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE: + return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE); default: return -EINVAL; } |