summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar2017-03-01 09:02:26 +0100
committerIngo Molnar2017-03-01 09:02:26 +0100
commit0871d5a66da5c41151e0896a90298b163e42f2e0 (patch)
tree1ba71fab9016cb28bb9d18ffd62b6b744f2f761c /arch/x86/mm
parentx86/boot: Fix pr_debug() API braindamage (diff)
parentMerge tag 'for-linus-4.11' of git://git.code.sf.net/p/openipmi/linux-ipmi (diff)
downloadkernel-qcow2-linux-0871d5a66da5c41151e0896a90298b163e42f2e0.tar.gz
kernel-qcow2-linux-0871d5a66da5c41151e0896a90298b163e42f2e0.tar.xz
kernel-qcow2-linux-0871d5a66da5c41151e0896a90298b163e42f2e0.zip
Merge branch 'linus' into WIP.x86/boot, to fix up conflicts and to pick up updates
Conflicts: arch/x86/xen/setup.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/dump_pagetables.c27
-rw-r--r--arch/x86/mm/gup.c28
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/mm/mpx.c6
-rw-r--r--arch/x86/mm/pageattr.c13
-rw-r--r--arch/x86/mm/pat_rbtree.c12
-rw-r--r--arch/x86/mm/pgtable.c31
8 files changed, 104 insertions, 24 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..58b5bee7ea27 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -15,8 +15,10 @@
#include <linux/debugfs.h>
#include <linux/mm.h>
#include <linux/init.h>
+#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <asm/kasan.h>
#include <asm/pgtable.h>
/*
@@ -50,6 +52,10 @@ enum address_markers_idx {
LOW_KERNEL_NR,
VMALLOC_START_NR,
VMEMMAP_START_NR,
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
# ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
# endif
@@ -75,6 +81,10 @@ static struct addr_marker address_markers[] = {
{ 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
{ 0/* VMALLOC_START */, "vmalloc() Area" },
{ 0/* VMEMMAP_START */, "Vmemmap" },
+#ifdef CONFIG_KASAN
+ { KASAN_SHADOW_START, "KASAN shadow" },
+ { KASAN_SHADOW_END, "KASAN shadow end" },
+#endif
# ifdef CONFIG_X86_ESPFIX64
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
# endif
@@ -326,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
#if PTRS_PER_PUD > 1
+/*
+ * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
+ * KASAN fills page tables with the same values. Since there is no
+ * point in checking page table more than once we just skip repeated
+ * entries. This saves us dozens of seconds during boot.
+ */
+static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
+{
+ return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
+}
+
static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
unsigned long P)
{
int i;
pud_t *start;
pgprotval_t prot;
+ pud_t *prev_pud = NULL;
start = (pud_t *) pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
- if (!pud_none(*start)) {
+ if (!pud_none(*start) &&
+ !pud_already_checked(prev_pud, start, st->check_wx)) {
if (pud_large(*start) || !pud_present(*start)) {
prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 2);
@@ -348,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
} else
note_page(m, st, __pgprot(0), 2);
+ prev_pud = start;
start++;
}
}
@@ -406,6 +430,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
} else
note_page(m, &st, __pgprot(0), 1);
+ cond_resched();
start++;
}
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 0d4fb3ebbbac..99c7805a9693 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -154,14 +154,12 @@ static inline void get_head_page_multiple(struct page *page, int nr)
SetPageReferenced(page);
}
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+static int __gup_device_huge(unsigned long pfn, unsigned long addr,
unsigned long end, struct page **pages, int *nr)
{
int nr_start = *nr;
- unsigned long pfn = pmd_pfn(pmd);
struct dev_pagemap *pgmap = NULL;
- pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
do {
struct page *page = pfn_to_page(pfn);
@@ -180,6 +178,24 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
return 1;
}
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ unsigned long fault_pfn;
+
+ fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ unsigned long fault_pfn;
+
+ fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+
static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
@@ -251,9 +267,13 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
if (!pte_allows_gup(pud_val(pud), write))
return 0;
+
+ VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
+ if (pud_devmap(pud))
+ return __gup_device_huge_pud(pud, addr, end, pages, nr);
+
/* hugepages are never "special" */
VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
- VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
refs = 0;
head = pud_page(pud);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 70e1ad66d34a..1fa97c941abe 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -864,9 +864,6 @@ static noinline int do_test_wp_bit(void)
return flag;
}
-const int rodata_test_data = 0xC3;
-EXPORT_SYMBOL_GPL(rodata_test_data);
-
int kernel_set_to_readonly __read_mostly;
void set_kernel_text_rw(void)
@@ -939,7 +936,6 @@ void mark_rodata_ro(void)
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
size >> 10);
- rodata_test();
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 94e033085f60..f6da869810a8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -679,7 +679,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
- magic = (unsigned long)page->lru.next;
+ magic = (unsigned long)page->freelist;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
@@ -1000,9 +1000,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
}
-const int rodata_test_data = 0xC3;
-EXPORT_SYMBOL_GPL(rodata_test_data);
-
int kernel_set_to_readonly;
void set_kernel_text_rw(void)
@@ -1071,8 +1068,6 @@ void mark_rodata_ro(void)
all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
- rodata_test();
-
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
set_memory_rw(start, (end-start) >> PAGE_SHIFT);
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index af59f808742f..c98079684bdb 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -51,7 +51,7 @@ static unsigned long mpx_mmap(unsigned long len)
down_write(&mm->mmap_sem);
addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
+ MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
up_write(&mm->mmap_sem);
if (populate)
mm_populate(addr, populate);
@@ -796,7 +796,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
return -EINVAL;
len = min(vma->vm_end, end) - addr;
- zap_page_range(vma, addr, len, NULL);
+ zap_page_range(vma, addr, len);
trace_mpx_unmap_zap(addr, addr+len);
vma = vma->vm_next;
@@ -893,7 +893,7 @@ static int unmap_entire_bt(struct mm_struct *mm,
* avoid recursion, do_munmap() will check whether it comes
* from one bounds table through VM_MPX flag.
*/
- return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
+ return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
}
static int try_unmap_single_bt(struct mm_struct *mm,
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3e1577622cb0..a57e8e02f457 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
int in_flags, struct page **pages)
{
unsigned int i, level;
+#ifdef CONFIG_PREEMPT
+ /*
+ * Avoid wbinvd() because it causes latencies on all CPUs,
+ * regardless of any CPU isolation that may be in effect.
+ *
+ * This should be extended for CAT enabled systems independent of
+ * PREEMPT because wbinvd() does not respect the CAT partitions and
+ * this is exposed to unpriviledged users through the graphics
+ * subsystem.
+ */
+ unsigned long do_wbinvd = 0;
+#else
unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
+#endif
BUG_ON(irqs_disabled());
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 159b52ccd600..d76485b22824 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node)
{
u64 ret = 0;
if (node) {
- struct memtype *data = container_of(node, struct memtype, rb);
+ struct memtype *data = rb_entry(node, struct memtype, rb);
ret = data->subtree_max_end;
}
return ret;
@@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
struct memtype *last_lower = NULL;
while (node) {
- struct memtype *data = container_of(node, struct memtype, rb);
+ struct memtype *data = rb_entry(node, struct memtype, rb);
if (get_subtree_max_end(node->rb_left) > start) {
/* Lowest overlap if any must be on left side */
@@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root,
node = rb_next(&match->rb);
if (node)
- match = container_of(node, struct memtype, rb);
+ match = rb_entry(node, struct memtype, rb);
else
match = NULL;
}
@@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root,
node = rb_next(&match->rb);
while (node) {
- match = container_of(node, struct memtype, rb);
+ match = rb_entry(node, struct memtype, rb);
if (match->start >= end) /* Checked all possible matches */
goto success;
@@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
struct rb_node *parent = NULL;
while (*node) {
- struct memtype *data = container_of(*node, struct memtype, rb);
+ struct memtype *data = rb_entry(*node, struct memtype, rb);
parent = *node;
if (data->subtree_max_end < newdata->end)
@@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
}
if (node) { /* pos == i */
- struct memtype *this = container_of(node, struct memtype, rb);
+ struct memtype *this = rb_entry(node, struct memtype, rb);
*out = *this;
return 0;
} else {
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3feec5af4e67..6cbdff26bb96 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
return changed;
}
+
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp, pud_t entry, int dirty)
+{
+ int changed = !pud_same(*pudp, entry);
+
+ VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+
+ if (changed && dirty) {
+ *pudp = entry;
+ /*
+ * We had a write-protection fault here and changed the pud
+ * to to more permissive. No need to flush the TLB for that,
+ * #PF is architecturally guaranteed to do that and in the
+ * worst-case we'll generate a spurious fault.
+ */
+ }
+
+ return changed;
+}
#endif
int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp)
+{
+ int ret = 0;
+
+ if (pud_young(*pudp))
+ ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+ (unsigned long *)pudp);
+
+ return ret;
+}
#endif
int ptep_clear_flush_young(struct vm_area_struct *vma,