summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h7
-rw-r--r--mm/internal.h4
-rw-r--r--mm/rmap.c28
-rw-r--r--mm/vmscan.c7
4 files changed, 38 insertions, 8 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3c602c20c717..a4ab9daa387c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1354,6 +1354,13 @@ struct tlbflush_unmap_batch {
/* True if any bit in cpumask is set */
bool flush_required;
+
+ /*
+ * If true then the PTE was dirty when unmapped. The entry must be
+ * flushed before IO is initiated or a stale TLB entry potentially
+ * allows an update without redirtying the page.
+ */
+ bool writable;
};
struct task_struct {
diff --git a/mm/internal.h b/mm/internal.h
index bd6372ac5f7f..1195dd2d6a2b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -431,10 +431,14 @@ struct tlbflush_unmap_batch;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void);
+void try_to_unmap_flush_dirty(void);
#else
static inline void try_to_unmap_flush(void)
{
}
+static inline void try_to_unmap_flush_dirty(void)
+{
+}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
#endif /* __MM_INTERNAL_H */
diff --git a/mm/rmap.c b/mm/rmap.c
index 326d5d89e45c..0db38e7d0a72 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -626,16 +626,34 @@ void try_to_unmap_flush(void)
}
cpumask_clear(&tlb_ubc->cpumask);
tlb_ubc->flush_required = false;
+ tlb_ubc->writable = false;
put_cpu();
}
+/* Flush iff there are potentially writable TLB entries that can race with IO */
+void try_to_unmap_flush_dirty(void)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+
+ if (tlb_ubc->writable)
+ try_to_unmap_flush();
+}
+
static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
- struct page *page)
+ struct page *page, bool writable)
{
struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
tlb_ubc->flush_required = true;
+
+ /*
+ * If the PTE was dirty then it's best to assume it's writable. The
+ * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
+ * before the page is queued for IO.
+ */
+ if (writable)
+ tlb_ubc->writable = true;
}
/*
@@ -658,7 +676,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
}
#else
static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
- struct page *page)
+ struct page *page, bool writable)
{
}
@@ -1315,11 +1333,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
*/
pteval = ptep_get_and_clear(mm, address, pte);
- /* Potentially writable TLBs must be flushed before IO */
- if (pte_dirty(pteval))
- flush_tlb_page(vma, address);
- else
- set_tlb_ubc_flush_pending(mm, page);
+ set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval));
} else {
pteval = ptep_clear_flush(vma, address, pte);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 99ec00d6a5dd..b1139039122a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1098,7 +1098,12 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (!sc->may_writepage)
goto keep_locked;
- /* Page is dirty, try to write it out here */
+ /*
+ * Page is dirty. Flush the TLB if a writable entry
+ * potentially exists to avoid CPU writes after IO
+ * starts and then write it out here.
+ */
+ try_to_unmap_flush_dirty();
switch (pageout(page, mapping, sc)) {
case PAGE_KEEP:
goto keep_locked;