s390/mm,tlb: race of lazy TLB flush vs. recreation of TLB entries

Git commit 050eef364ad70059 "[S390] fix tlb flushing vs. concurrent /proc accesses" introduced the attach counter to avoid using the mm_users value to decide between IPTE for every PTE and lazy TLB flushing with IDTE. That fixed the problem with mm_users but it introduced another subtle race, fortunately one that is very hard to hit. The background is the requirement of the architecture that a valid PTE may not be changed while it can be used concurrently by another cpu. The decision between IPTE and lazy TLB flushing needs to be done while the PTE is still valid. Now if the virtual cpu is temporarily stopped after the decision to use lazy TLB flushing but before the invalid bit of the PTE has been set, another cpu can attach the mm, find that flush_mm is set, do the IDTE, return to userspace, and recreate a TLB that uses the PTE in question. When the first, stopped cpu continues it will change the PTE while it is attached on another cpu. The first cpu will do another IDTE shortly after the modification of the PTE which makes the race window quite short. To fix this race the CPU that wants to attach the address space of a user space thread needs to wait for the end of the PTE modification. The number of concurrent TLB flushers for an mm is tracked in the upper 16 bits of the attach_count and finish_arch_post_lock_switch is used to wait for the end of the flush operation if required. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
author: Martin Schwidefsky 2012-09-10 13:00:09 +0200
committer: Martin Schwidefsky 2014-02-21 08:50:18 +0100
commit: 53e857f30867918b3618d8e18902e63291946ef4 (patch)
tree: 53b7875848deb77a0012aaa04169d6d041b52cf1 /arch/s390/include/asm/mmu_context.h
parent: sched/mm: call finish_arch_post_lock_switch in idle_task_exit and use_mm (diff)
download: kernel-qcow2-linux-53e857f30867918b3618d8e18902e63291946ef4.tar.gz
kernel-qcow2-linux-53e857f30867918b3618d8e18902e63291946ef4.tar.xz
kernel-qcow2-linux-53e857f30867918b3618d8e18902e63291946ef4.zip
1 files changed, 34 insertions, 5 deletions
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 5d1f950704dc..38149b63dc44 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -48,13 +48,42 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
-	update_mm(next, tsk);
+	int cpu = smp_processor_id();
+
+	if (prev == next)
+		return;
+	if (atomic_inc_return(&next->context.attach_count) >> 16) {
+		/* Delay update_mm until all TLB flushes are done. */
+		set_tsk_thread_flag(tsk, TIF_TLB_WAIT);
+	} else {
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+		update_mm(next, tsk);
+		if (next->context.flush_mm)
+			/* Flush pending TLBs */
+			__tlb_flush_mm(next);
+	}
 	atomic_dec(&prev->context.attach_count);
 	WARN_ON(atomic_read(&prev->context.attach_count) < 0);
-	atomic_inc(&next->context.attach_count);
-	/* Check for TLBs not flushed yet */
-	__tlb_flush_mm_lazy(next);
+}
+
+#define finish_arch_post_lock_switch finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+
+	if (!test_tsk_thread_flag(tsk, TIF_TLB_WAIT))
+		return;
+	preempt_disable();
+	clear_tsk_thread_flag(tsk, TIF_TLB_WAIT);
+	while (atomic_read(&mm->context.attach_count) >> 16)
+		cpu_relax();
+
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+	update_mm(mm, tsk);
+	if (mm->context.flush_mm)
+		__tlb_flush_mm(mm);
+	preempt_enable();
 }
 
 #define enter_lazy_tlb(mm,tsk)	do { } while (0)
author	Martin Schwidefsky	2012-09-10 13:00:09 +0200
committer	Martin Schwidefsky	2014-02-21 08:50:18 +0100
commit	53e857f30867918b3618d8e18902e63291946ef4 (patch)
tree	53b7875848deb77a0012aaa04169d6d041b52cf1 /arch/s390/include/asm/mmu_context.h
parent	sched/mm: call finish_arch_post_lock_switch in idle_task_exit and use_mm (diff)
download	kernel-qcow2-linux-53e857f30867918b3618d8e18902e63291946ef4.tar.gz kernel-qcow2-linux-53e857f30867918b3618d8e18902e63291946ef4.tar.xz kernel-qcow2-linux-53e857f30867918b3618d8e18902e63291946ef4.zip