summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/cpu_entry_area.c64
-rw-r--r--arch/x86/mm/debug_pagetables.c1
-rw-r--r--arch/x86/mm/dump_pagetables.c10
-rw-r--r--arch/x86/mm/extable.c5
-rw-r--r--arch/x86/mm/fault.c58
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/hugetlbpage.c2
-rw-r--r--arch/x86/mm/init.c37
-rw-r--r--arch/x86/mm/init_32.c12
-rw-r--r--arch/x86/mm/init_64.c165
-rw-r--r--arch/x86/mm/iomap_32.c15
-rw-r--r--arch/x86/mm/ioremap.c1
-rw-r--r--arch/x86/mm/kasan_init_64.c2
-rw-r--r--arch/x86/mm/kaslr.c105
-rw-r--r--arch/x86/mm/mem_encrypt.c10
-rw-r--r--arch/x86/mm/mm_internal.h3
-rw-r--r--arch/x86/mm/mmap.c15
-rw-r--r--arch/x86/mm/mmio-mod.c14
-rw-r--r--arch/x86/mm/mpx.c16
-rw-r--r--arch/x86/mm/numa.c1
-rw-r--r--arch/x86/mm/pageattr.c17
-rw-r--r--arch/x86/mm/pat.c1
-rw-r--r--arch/x86/mm/pf_in.c17
-rw-r--r--arch/x86/mm/pf_in.h17
-rw-r--r--arch/x86/mm/pgtable.c14
-rw-r--r--arch/x86/mm/pkeys.c31
-rw-r--r--arch/x86/mm/pti.c16
-rw-r--r--arch/x86/mm/testmmiotrace.c1
-rw-r--r--arch/x86/mm/tlb.c117
30 files changed, 425 insertions, 345 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f..84373dc9b341 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -21,7 +21,7 @@ CFLAGS_physaddr.o := $(nostackp)
CFLAGS_setup_nx.o := $(nostackp)
CFLAGS_mem_encrypt_identity.o := $(nostackp)
-CFLAGS_fault.o := -I$(src)/../include/asm/trace
+CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 19c6abf9ea31..752ad11d6868 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -13,8 +13,8 @@
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
#ifdef CONFIG_X86_64
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
+DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
#endif
struct cpu_entry_area *get_cpu_entry_area(int cpu)
@@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
-static void __init percpu_setup_debug_store(int cpu)
+static void __init percpu_setup_debug_store(unsigned int cpu)
{
#ifdef CONFIG_CPU_SUP_INTEL
- int npages;
+ unsigned int npages;
void *cea;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
@@ -78,9 +78,43 @@ static void __init percpu_setup_debug_store(int cpu)
#endif
}
+#ifdef CONFIG_X86_64
+
+#define cea_map_stack(name) do { \
+ npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \
+ cea_map_percpu_pages(cea->estacks.name## _stack, \
+ estacks->name## _stack, npages, PAGE_KERNEL); \
+ } while (0)
+
+static void __init percpu_setup_exception_stacks(unsigned int cpu)
+{
+ struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+ unsigned int npages;
+
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+
+ per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
+
+ /*
+ * The exceptions stack mappings in the per cpu area are protected
+ * by guard pages so each stack must be mapped separately. DB2 is
+ * not mapped; it just exists to catch triple nesting of #DB.
+ */
+ cea_map_stack(DF);
+ cea_map_stack(NMI);
+ cea_map_stack(DB1);
+ cea_map_stack(DB);
+ cea_map_stack(MCE);
+}
+#else
+static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
+#endif
+
/* Setup the fixmap mappings only once per-processor */
-static void __init setup_cpu_entry_area(int cpu)
+static void __init setup_cpu_entry_area(unsigned int cpu)
{
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
#ifdef CONFIG_X86_64
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
pgprot_t gdt_prot = PAGE_KERNEL_RO;
@@ -101,10 +135,9 @@ static void __init setup_cpu_entry_area(int cpu)
pgprot_t tss_prot = PAGE_KERNEL;
#endif
- cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
- gdt_prot);
+ cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+ cea_map_percpu_pages(&cea->entry_stack_page,
per_cpu_ptr(&entry_stack_storage, cpu), 1,
PAGE_KERNEL);
@@ -128,22 +161,15 @@ static void __init setup_cpu_entry_area(int cpu)
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
- &per_cpu(cpu_tss_rw, cpu),
+ cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
#ifdef CONFIG_X86_32
- per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+ per_cpu(cpu_entry_area, cpu) = cea;
#endif
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
- BUILD_BUG_ON(sizeof(exception_stacks) !=
- sizeof(((struct cpu_entry_area *)0)->exception_stacks));
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
- &per_cpu(exception_stacks, cpu),
- sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
-#endif
+ percpu_setup_exception_stacks(cpu);
+
percpu_setup_debug_store(cpu);
}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index cd84f067e41d..c6f4982d5401 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/debugfs.h>
#include <linux/efi.h>
#include <linux/module.h>
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index c0309ea9abee..ab67822fd2f4 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Debug helper to dump the current kernel pagetables of the system
* so that we can see what the various memory ranges are set to.
@@ -5,11 +6,6 @@
* (C) Copyright 2008 Intel Corporation
*
* Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/debugfs.h>
@@ -578,7 +574,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- if (user && static_cpu_has(X86_FEATURE_PTI))
+ if (user && boot_cpu_has(X86_FEATURE_PTI))
pgd = kernel_to_user_pgdp(pgd);
#endif
ptdump_walk_pgd_level_core(m, pgd, false, false);
@@ -591,7 +587,7 @@ void ptdump_walk_user_pgd_level_checkwx(void)
pgd_t *pgd = INIT_PGD;
if (!(__supported_pte_mask & _PAGE_NX) ||
- !static_cpu_has(X86_FEATURE_PTI))
+ !boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("x86/mm: Checking user space page tables\n");
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3c4568f8fb28..4d75bc656f97 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
@@ -145,7 +146,7 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup
unsigned long error_code,
unsigned long fault_addr)
{
- if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
+ if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
@@ -162,7 +163,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
unsigned long error_code,
unsigned long fault_addr)
{
- if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
+ if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
(unsigned int)regs->ax, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 667f1da36208..46df4c6aae46 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -28,6 +28,7 @@
#include <asm/mmu_context.h> /* vma_pkey() */
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#include <asm/desc.h> /* store_idt(), ... */
+#include <asm/cpu_entry_area.h> /* exception stack */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -359,8 +360,6 @@ static noinline int vmalloc_fault(unsigned long address)
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
- WARN_ON_ONCE(in_nmi());
-
/*
* Copy kernel mappings over when needed. This can also
* happen within a race in page table update. In the later
@@ -603,24 +602,9 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
}
-/*
- * This helper function transforms the #PF error_code bits into
- * "[PROT] [USER]" type of descriptive, almost human-readable error strings:
- */
-static void err_str_append(unsigned long error_code, char *buf, unsigned long mask, const char *txt)
-{
- if (error_code & mask) {
- if (buf[0])
- strcat(buf, " ");
- strcat(buf, txt);
- }
-}
-
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
- char err_txt[64];
-
if (!oops_may_print())
return;
@@ -644,31 +628,29 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
from_kuid(&init_user_ns, current_uid()));
}
- pr_alert("BUG: unable to handle kernel %s at %px\n",
- address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
- (void *)address);
-
- err_txt[0] = 0;
-
- /*
- * Note: length of these appended strings including the separation space and the
- * zero delimiter must fit into err_txt[].
- */
- err_str_append(error_code, err_txt, X86_PF_PROT, "[PROT]" );
- err_str_append(error_code, err_txt, X86_PF_WRITE, "[WRITE]");
- err_str_append(error_code, err_txt, X86_PF_USER, "[USER]" );
- err_str_append(error_code, err_txt, X86_PF_RSVD, "[RSVD]" );
- err_str_append(error_code, err_txt, X86_PF_INSTR, "[INSTR]");
- err_str_append(error_code, err_txt, X86_PF_PK, "[PK]" );
-
- pr_alert("#PF error: %s\n", error_code ? err_txt : "[normal kernel read fault]");
+ if (address < PAGE_SIZE && !user_mode(regs))
+ pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
+ (void *)address);
+ else
+ pr_alert("BUG: unable to handle page fault for address: %px\n",
+ (void *)address);
+
+ pr_alert("#PF: %s %s in %s mode\n",
+ (error_code & X86_PF_USER) ? "user" : "supervisor",
+ (error_code & X86_PF_INSTR) ? "instruction fetch" :
+ (error_code & X86_PF_WRITE) ? "write access" :
+ "read access",
+ user_mode(regs) ? "user" : "kernel");
+ pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code,
+ !(error_code & X86_PF_PROT) ? "not-present page" :
+ (error_code & X86_PF_RSVD) ? "reserved bit violation" :
+ (error_code & X86_PF_PK) ? "protection keys violation" :
+ "permissions violation");
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
struct desc_ptr idt, gdt;
u16 ldtr, tr;
- pr_alert("This was a system access from user code\n");
-
/*
* This can happen for quite a few reasons. The more obvious
* ones are faults accessing the GDT, or LDT. Perhaps
@@ -793,7 +775,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+ unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
/*
* We're likely to be running with very little stack space
* left. It's plausible that we'd hit this condition but
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 0d4bdcb84da5..0a1898b8552e 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/swap.h> /* for totalram_pages */
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 92e4c4b85bba..fab095362c50 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -203,7 +203,7 @@ static __init int setup_hugepagesz(char *opt)
}
__setup("hugepagesz=", setup_hugepagesz);
-#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
+#ifdef CONFIG_CONTIG_ALLOC
static __init int gigantic_pages_init(void)
{
/* With compaction or CMA we can allocate gigantic pages at runtime */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8dacdb96899e..fd10d91a6115 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -6,6 +6,7 @@
#include <linux/swapfile.h>
#include <linux/swapops.h>
#include <linux/kmemleak.h>
+#include <linux/sched/task.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@@ -23,6 +24,7 @@
#include <asm/hypervisor.h>
#include <asm/cpufeature.h>
#include <asm/pti.h>
+#include <asm/text-patching.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -702,6 +704,41 @@ void __init init_mem_mapping(void)
}
/*
+ * Initialize an mm_struct to be used during poking and a pointer to be used
+ * during patching.
+ */
+void __init poking_init(void)
+{
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ poking_mm = copy_init_mm();
+ BUG_ON(!poking_mm);
+
+ /*
+ * Randomize the poking address, but make sure that the following page
+ * will be mapped at the same PMD. We need 2 pages, so find space for 3,
+ * and adjust the address if the PMD ends after the first one.
+ */
+ poking_addr = TASK_UNMAPPED_BASE;
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
+ (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
+
+ if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
+ poking_addr += PAGE_SIZE;
+
+ /*
+ * We need to trigger the allocation of the page-tables that will be
+ * needed for poking now. Later, poking may be performed in an atomic
+ * section, which might cause allocation to fail.
+ */
+ ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+ BUG_ON(!ptep);
+ pte_unmap_unlock(ptep, ptl);
+}
+
+/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number.
*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 85c94f9a87f8..f265a4316179 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 1995 Linus Torvalds
@@ -850,24 +851,25 @@ void __init mem_init(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
- bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size,
+ struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
+ return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap)
+void arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
zone = page_zone(pfn_to_page(start_pfn));
- return __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(zone, start_pfn, nr_pages, altmap);
}
#endif
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bccff68e3267..693aaf28d5fe 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/arch/x86_64/mm/init.c
*
@@ -58,6 +59,37 @@
#include "ident_map.c"
+#define DEFINE_POPULATE(fname, type1, type2, init) \
+static inline void fname##_init(struct mm_struct *mm, \
+ type1##_t *arg1, type2##_t *arg2, bool init) \
+{ \
+ if (init) \
+ fname##_safe(mm, arg1, arg2); \
+ else \
+ fname(mm, arg1, arg2); \
+}
+
+DEFINE_POPULATE(p4d_populate, p4d, pud, init)
+DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
+DEFINE_POPULATE(pud_populate, pud, pmd, init)
+DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
+
+#define DEFINE_ENTRY(type1, type2, init) \
+static inline void set_##type1##_init(type1##_t *arg1, \
+ type2##_t arg2, bool init) \
+{ \
+ if (init) \
+ set_##type1##_safe(arg1, arg2); \
+ else \
+ set_##type1(arg1, arg2); \
+}
+
+DEFINE_ENTRY(p4d, p4d, init)
+DEFINE_ENTRY(pud, pud, init)
+DEFINE_ENTRY(pmd, pmd, init)
+DEFINE_ENTRY(pte, pte, init)
+
+
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
@@ -414,7 +446,7 @@ void __init cleanup_highmap(void)
*/
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
- pgprot_t prot)
+ pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@@ -432,7 +464,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pte_safe(pte, __pte(0));
+ set_pte_init(pte, __pte(0), init);
continue;
}
@@ -452,7 +484,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pages++;
- set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+ set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
}
@@ -468,7 +500,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
*/
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask, pgprot_t prot)
+ unsigned long page_size_mask, pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@@ -487,7 +519,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pmd_safe(pmd, __pmd(0));
+ set_pmd_init(pmd, __pmd(0), init);
continue;
}
@@ -496,7 +528,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
spin_lock(&init_mm.page_table_lock);
pte = (pte_t *)pmd_page_vaddr(*pmd);
paddr_last = phys_pte_init(pte, paddr,
- paddr_end, prot);
+ paddr_end, prot,
+ init);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -524,19 +557,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
spin_lock(&init_mm.page_table_lock);
- set_pte_safe((pte_t *)pmd,
- pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
- __pgprot(pgprot_val(prot) | _PAGE_PSE)));
+ set_pte_init((pte_t *)pmd,
+ pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
+ __pgprot(pgprot_val(prot) | _PAGE_PSE)),
+ init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
}
pte = alloc_low_page();
- paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
+ paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
spin_lock(&init_mm.page_table_lock);
- pmd_populate_kernel_safe(&init_mm, pmd, pte);
+ pmd_populate_kernel_init(&init_mm, pmd, pte, init);
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
@@ -551,7 +585,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
*/
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@@ -573,7 +607,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pud_safe(pud, __pud(0));
+ set_pud_init(pud, __pud(0), init);
continue;
}
@@ -583,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
paddr_last = phys_pmd_init(pmd, paddr,
paddr_end,
page_size_mask,
- prot);
+ prot, init);
continue;
}
/*
@@ -610,9 +644,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
spin_lock(&init_mm.page_table_lock);
- set_pte_safe((pte_t *)pud,
- pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE));
+ set_pte_init((pte_t *)pud,
+ pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE),
+ init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
@@ -620,10 +655,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
pmd = alloc_low_page();
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
- page_size_mask, prot);
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
- pud_populate_safe(&init_mm, pud, pmd);
+ pud_populate_init(&init_mm, pud, pmd, init);
spin_unlock(&init_mm.page_table_lock);
}
@@ -634,14 +669,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, bool init)
{
unsigned long paddr_next, paddr_last = paddr_end;
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
if (!pgtable_l5_enabled())
- return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
+ return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
+ page_size_mask, init);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
p4d_t *p4d;
@@ -657,39 +693,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_p4d_safe(p4d, __p4d(0));
+ set_p4d_init(p4d, __p4d(0), init);
continue;
}
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
- paddr_last = phys_pud_init(pud, paddr,
- paddr_end,
- page_size_mask);
+ paddr_last = phys_pud_init(pud, paddr, paddr_end,
+ page_size_mask, init);
continue;
}
pud = alloc_low_page();
paddr_last = phys_pud_init(pud, paddr, paddr_end,
- page_size_mask);
+ page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
- p4d_populate_safe(&init_mm, p4d, pud);
+ p4d_populate_init(&init_mm, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock);
}
return paddr_last;
}
-/*
- * Create page table mapping for the physical memory for specific physical
- * addresses. The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
- */
-unsigned long __meminit
-kernel_physical_mapping_init(unsigned long paddr_start,
- unsigned long paddr_end,
- unsigned long page_size_mask)
+static unsigned long __meminit
+__kernel_physical_mapping_init(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask,
+ bool init)
{
bool pgd_changed = false;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@@ -709,19 +740,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
paddr_last = phys_p4d_init(p4d, __pa(vaddr),
__pa(vaddr_end),
- page_size_mask);
+ page_size_mask,
+ init);
continue;
}
p4d = alloc_low_page();
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
- page_size_mask);
+ page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled())
- pgd_populate_safe(&init_mm, pgd, p4d);
+ pgd_populate_init(&init_mm, pgd, p4d, init);
else
- p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
+ p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
+ (pud_t *) p4d, init);
+
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
@@ -732,6 +766,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
return paddr_last;
}
+
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. Note that it can only be used to populate non-present entries.
+ * The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
+unsigned long __meminit
+kernel_physical_mapping_init(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask)
+{
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, true);
+}
+
+/*
+ * This function is similar to kernel_physical_mapping_init() above with the
+ * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
+ * when updating the mapping. The caller is responsible to flush the TLBs after
+ * the function returns.
+ */
+unsigned long __meminit
+kernel_physical_mapping_change(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask)
+{
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, false);
+}
+
#ifndef CONFIG_NUMA
void __init initmem_init(void)
{
@@ -777,11 +842,11 @@ static void update_end_of_memory_vars(u64 start, u64 size)
}
int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
- struct vmem_altmap *altmap, bool want_memblock)
+ struct mhp_restrictions *restrictions)
{
int ret;
- ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
+ ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
WARN_ON_ONCE(ret);
/* update max_pfn, max_low_pfn and high_memory */
@@ -791,15 +856,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
return ret;
}
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
- bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size,
+ struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
init_memory_mapping(start, start + size);
- return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
+ return add_pages(nid, start_pfn, nr_pages, restrictions);
}
#define PAGE_INUSE 0xFD
@@ -1141,24 +1206,20 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
remove_pagetable(start, end, true, NULL);
}
-int __ref arch_remove_memory(int nid, u64 start, u64 size,
- struct vmem_altmap *altmap)
+void __ref arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn);
struct zone *zone;
- int ret;
/* With altmap the first mapped page is offset from @start */
if (altmap)
page += vmem_altmap_offset(altmap);
zone = page_zone(page);
- ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
- WARN_ON_ONCE(ret);
+ __remove_pages(zone, start_pfn, nr_pages, altmap);
kernel_physical_mapping_remove(start, start + size);
-
- return ret;
}
#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index b3294d36769d..6748b4c2baff 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright © 2008 Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
#include <asm/iomap.h>
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index dd73d5d74393..4b6423e7bd21 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Re-map IO memory to kernel address space so that we can access it.
* This is needed for high PCI addresses that aren't mapped in the
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8dc0fc0b1382..296da58f3013 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -199,7 +199,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
- p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
+ p4d = pgd_val(*pgd) & PTE_PFN_MASK;
p4d += __START_KERNEL_map - phys_base;
return (p4d_t *)p4d + p4d_index(addr);
}
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index d669c5e797e0..dc6182eecefa 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region {
} kaslr_regions[] = {
{ &page_offset_base, 0 },
{ &vmalloc_base, 0 },
- { &vmemmap_base, 1 },
+ { &vmemmap_base, 0 },
};
/* Get size in bytes used by the memory region */
@@ -78,6 +78,7 @@ void __init kernel_randomize_memory(void)
unsigned long rand, memory_tb;
struct rnd_state rand_state;
unsigned long remain_entropy;
+ unsigned long vmemmap_size;
vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr = vaddr_start;
@@ -109,6 +110,14 @@ void __init kernel_randomize_memory(void)
if (memory_tb < kaslr_regions[0].size_tb)
kaslr_regions[0].size_tb = memory_tb;
+ /*
+ * Calculate the vmemmap region size in TBs, aligned to a TB
+ * boundary.
+ */
+ vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) *
+ sizeof(struct page);
+ kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT);
+
/* Calculate entropy available between regions */
remain_entropy = vaddr_end - vaddr_start;
for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
@@ -125,10 +134,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (pgtable_l5_enabled())
- entropy = (rand % (entropy + 1)) & P4D_MASK;
- else
- entropy = (rand % (entropy + 1)) & PUD_MASK;
+ entropy = (rand % (entropy + 1)) & PUD_MASK;
vaddr += entropy;
*kaslr_regions[i].base = vaddr;
@@ -137,84 +143,71 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (pgtable_l5_enabled())
- vaddr = round_up(vaddr + 1, P4D_SIZE);
- else
- vaddr = round_up(vaddr + 1, PUD_SIZE);
+ vaddr = round_up(vaddr + 1, PUD_SIZE);
remain_entropy -= entropy;
}
}
static void __meminit init_trampoline_pud(void)
{
- unsigned long paddr, paddr_next;
+ pud_t *pud_page_tramp, *pud, *pud_tramp;
+ p4d_t *p4d_page_tramp, *p4d, *p4d_tramp;
+ unsigned long paddr, vaddr;
pgd_t *pgd;
- pud_t *pud_page, *pud_page_tramp;
- int i;
pud_page_tramp = alloc_low_page();
+ /*
+ * There are two mappings for the low 1MB area, the direct mapping
+ * and the 1:1 mapping for the real mode trampoline:
+ *
+ * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET
+ * 1:1 mapping: virt_addr = phys_addr
+ */
paddr = 0;
- pgd = pgd_offset_k((unsigned long)__va(paddr));
- pud_page = (pud_t *) pgd_page_vaddr(*pgd);
-
- for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
- pud_t *pud, *pud_tramp;
- unsigned long vaddr = (unsigned long)__va(paddr);
-
- pud_tramp = pud_page_tramp + pud_index(paddr);
- pud = pud_page + pud_index(vaddr);
- paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
-
- *pud_tramp = *pud;
- }
-
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
-}
+ vaddr = (unsigned long)__va(paddr);
+ pgd = pgd_offset_k(vaddr);
-static void __meminit init_trampoline_p4d(void)
-{
- unsigned long paddr, paddr_next;
- pgd_t *pgd;
- p4d_t *p4d_page, *p4d_page_tramp;
- int i;
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
- p4d_page_tramp = alloc_low_page();
-
- paddr = 0;
- pgd = pgd_offset_k((unsigned long)__va(paddr));
- p4d_page = (p4d_t *) pgd_page_vaddr(*pgd);
+ pud_tramp = pud_page_tramp + pud_index(paddr);
+ *pud_tramp = *pud;
- for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) {
- p4d_t *p4d, *p4d_tramp;
- unsigned long vaddr = (unsigned long)__va(paddr);
+ if (pgtable_l5_enabled()) {
+ p4d_page_tramp = alloc_low_page();
p4d_tramp = p4d_page_tramp + p4d_index(paddr);
- p4d = p4d_page + p4d_index(vaddr);
- paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
- *p4d_tramp = *p4d;
- }
+ set_p4d(p4d_tramp,
+ __p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ set_pgd(&trampoline_pgd_entry,
+ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ } else {
+ set_pgd(&trampoline_pgd_entry,
+ __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+ }
}
/*
- * Create PGD aligned trampoline table to allow real mode initialization
- * of additional CPUs. Consume only 1 low memory page.
+ * The real mode trampoline, which is required for bootstrapping CPUs
+ * occupies only a small area under the low 1MB. See reserve_real_mode()
+ * for details.
+ *
+ * If KASLR is disabled the first PGD entry of the direct mapping is copied
+ * to map the real mode trampoline.
+ *
+ * If KASLR is enabled, copy only the PUD which covers the low 1MB
+ * area. This limits the randomization granularity to 1GB for both 4-level
+ * and 5-level paging.
*/
void __meminit init_trampoline(void)
{
-
if (!kaslr_memory_enabled()) {
init_trampoline_default();
return;
}
- if (pgtable_l5_enabled())
- init_trampoline_p4d();
- else
- init_trampoline_pud();
+ init_trampoline_pud();
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 385afa2b9e17..51f50a7a07ef 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -301,9 +301,13 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
else
split_page_size_mask = 1 << PG_LEVEL_2M;
- kernel_physical_mapping_init(__pa(vaddr & pmask),
- __pa((vaddr_end & pmask) + psize),
- split_page_size_mask);
+ /*
+ * kernel_physical_mapping_change() does not flush the TLBs, so
+ * a TLB flush is required after we exit from the for loop.
+ */
+ kernel_physical_mapping_change(__pa(vaddr & pmask),
+ __pa((vaddr_end & pmask) + psize),
+ split_page_size_mask);
}
ret = 0;
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 319bde386d5f..eeae142062ed 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
unsigned long kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
+unsigned long kernel_physical_mapping_change(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask);
void zone_sizes_init(void);
extern int after_bootmem;
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index dc726e07d8ba..aae9a933dfd4 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Flexible mmap layout support
*
@@ -8,20 +9,6 @@
* All Rights Reserved.
* Copyright 2005 Andi Kleen, SUSE Labs.
* Copyright 2007 Jiri Kosina, SUSE Labs.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/personality.h>
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 2c1ecf4763c4..b8ef8557d4b3 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -1,17 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2005
* Jeff Muizelaar, 2006, 2007
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c805db6236b4..0d1c47cbbdd6 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
goto err_out;
}
/* get bndregs field from current task's xsave area */
- bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
+ bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS);
if (!bndregs) {
err = -EINVAL;
goto err_out;
@@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void)
* The bounds directory pointer is stored in a register
* only accessible if we first do an xsave.
*/
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
return MPX_INVALID_BOUNDS_DIR;
@@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void)
const struct mpx_bndcsr *bndcsr;
struct mm_struct *mm = current->mm;
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
return -EINVAL;
/*
@@ -881,9 +881,10 @@ static int mpx_unmap_tables(struct mm_struct *mm,
* the virtual address region start...end have already been split if
* necessary, and the 'vma' is the first vma in this range (start -> end).
*/
-void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+void mpx_notify_unmap(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
{
+ struct vm_area_struct *vma;
int ret;
/*
@@ -902,11 +903,12 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
* which should not occur normally. Being strict about it here
* helps ensure that we do not have an exploitable stack overflow.
*/
- do {
+ vma = find_vma(mm, start);
+ while (vma && vma->vm_start < end) {
if (vma->vm_flags & VM_MPX)
return;
vma = vma->vm_next;
- } while (vma && vma->vm_start < end);
+ }
ret = mpx_unmap_tables(mm, start, end);
if (ret)
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index dfb6c4df639a..e6dad600614c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Common code for 32 and 64-bit NUMA */
#include <linux/acpi.h>
#include <linux/kernel.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4c570612e24e..6a9a77a403c9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2002 Andi Kleen, SuSE Labs.
* Thanks to Ben LaHaise for precious feedback.
@@ -2209,8 +2210,6 @@ int set_pages_rw(struct page *page, int numpages)
return set_memory_rw(addr, numpages);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-
static int __set_pages_p(struct page *page, int numpages)
{
unsigned long tempaddr = (unsigned long) page_address(page);
@@ -2249,6 +2248,16 @@ static int __set_pages_np(struct page *page, int numpages)
return __change_page_attr_set_clr(&cpa, 0);
}
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ return __set_pages_np(page, 1);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ return __set_pages_p(page, 1);
+}
+
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
if (PageHighMem(page))
@@ -2282,7 +2291,6 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
}
#ifdef CONFIG_HIBERNATION
-
bool kernel_page_present(struct page *page)
{
unsigned int level;
@@ -2294,11 +2302,8 @@ bool kernel_page_present(struct page *page)
pte = lookup_address((unsigned long)page_address(page), &level);
return (pte_val(*pte) & _PAGE_PRESENT);
}
-
#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags)
{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 4fe956a63b25..d9fbd4f69920 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Handle caching attributes in page tables (PAT)
*
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index a235869532bc..3f83e31b3a93 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Fault Injection Test harness (FI)
* Copyright (C) Intel Crop.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- *
*/
/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
index e05341a51a27..e2a13dce0e13 100644
--- a/arch/x86/mm/pf_in.h
+++ b/arch/x86/mm/pf_in.h
@@ -1,22 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Fault Injection Test harness (FI)
* Copyright (C) Intel Crop.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- *
*/
#ifndef __PF_H_
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7bd01709a091..1f67b1e15bf6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -190,7 +190,7 @@ static void pgd_dtor(pgd_t *pgd)
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
*/
-#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
+#define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \
KERNEL_PGD_PTRS : 0)
#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
@@ -292,7 +292,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pgdp = kernel_to_user_pgdp(pgdp);
@@ -373,14 +373,14 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
static struct kmem_cache *pgd_cache;
-static int __init pgd_cache_init(void)
+void __init pgd_cache_init(void)
{
/*
* When PAE kernel is running as a Xen domain, it does not use
* shared kernel pmd. And this requires a whole page for pgd.
*/
if (!SHARED_KERNEL_PMD)
- return 0;
+ return;
/*
* when PAE kernel is not running as a Xen domain, it uses
@@ -390,9 +390,7 @@ static int __init pgd_cache_init(void)
*/
pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
SLAB_PANIC, NULL);
- return 0;
}
-core_initcall(pgd_cache_init);
static inline pgd_t *_pgd_alloc(void)
{
@@ -420,6 +418,10 @@ static inline void _pgd_free(pgd_t *pgd)
}
#else
+void __init pgd_cache_init(void)
+{
+}
+
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 047a77f6a10c..c6f84c0b5d7a 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel Memory Protection Keys management
* Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/debugfs.h> /* debugfs_create_u32() */
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
@@ -18,6 +10,7 @@
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/mmu_context.h> /* vma_pkey() */
+#include <asm/fpu/internal.h> /* init_fpstate */
int __execute_only_pkey(struct mm_struct *mm)
{
@@ -39,17 +32,12 @@ int __execute_only_pkey(struct mm_struct *mm)
* dance to set PKRU if we do not need to. Check it
* first and assume that if the execute-only pkey is
* write-disabled that we do not have to set it
- * ourselves. We need preempt off so that nobody
- * can make fpregs inactive.
+ * ourselves.
*/
- preempt_disable();
if (!need_to_set_mm_pkey &&
- current->thread.fpu.initialized &&
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
- preempt_enable();
return execute_only_pkey;
}
- preempt_enable();
/*
* Set up PKRU so that it denies access for everything
@@ -131,7 +119,6 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
-static
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
@@ -148,13 +135,6 @@ void copy_init_pkru_to_fpregs(void)
{
u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value);
/*
- * Any write to PKRU takes it out of the XSAVE 'init
- * state' which increases context switch cost. Avoid
- * writing 0 when PKRU was already 0.
- */
- if (!init_pkru_value_snapshot && !read_pkru())
- return;
- /*
* Override the PKRU state that came from 'init_fpstate'
* with the baseline from the process.
*/
@@ -174,6 +154,7 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
static ssize_t init_pkru_write_file(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
+ struct pkru_state *pk;
char buf[32];
ssize_t len;
u32 new_init_pkru;
@@ -196,6 +177,10 @@ static ssize_t init_pkru_write_file(struct file *file,
return -EINVAL;
WRITE_ONCE(init_pkru_value, new_init_pkru);
+ pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
+ if (!pk)
+ return -EINVAL;
+ pk->pkru = new_init_pkru;
return count;
}
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 139b28a01ce4..b196524759ec 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
* This code is based in part on work published here:
*
* https://github.com/IAIK/KAISER
@@ -35,6 +27,7 @@
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
+#include <linux/cpu.h>
#include <asm/cpufeature.h>
#include <asm/hypervisor.h>
@@ -115,7 +108,8 @@ void __init pti_check_boottime_disable(void)
}
}
- if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+ if (cmdline_find_option_bool(boot_command_line, "nopti") ||
+ cpu_mitigations_off()) {
pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on command line.");
return;
@@ -626,7 +620,7 @@ static void pti_set_kernel_image_nonglobal(void)
*/
void __init pti_init(void)
{
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("enabled\n");
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index f6ae6830b341..0881e1ff1e58 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
*/
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 487b8474c01c..91f6db92554c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/init.h>
#include <linux/mm.h>
@@ -634,7 +635,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
}
-static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
{
const struct flush_tlb_info *f = info;
@@ -722,43 +723,81 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
*/
unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
+#endif
+
+static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned int stride_shift, bool freed_tables,
+ u64 new_tlb_gen)
+{
+ struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+ /*
+ * Ensure that the following code is non-reentrant and flush_tlb_info
+ * is not overwritten. This means no TLB flushing is initiated by
+ * interrupt handlers and machine-check exception handlers.
+ */
+ BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
+#endif
+
+ info->start = start;
+ info->end = end;
+ info->mm = mm;
+ info->stride_shift = stride_shift;
+ info->freed_tables = freed_tables;
+ info->new_tlb_gen = new_tlb_gen;
+
+ return info;
+}
+
+static inline void put_flush_tlb_info(void)
+{
+#ifdef CONFIG_DEBUG_VM
+ /* Complete reentrency prevention checks */
+ barrier();
+ this_cpu_dec(flush_tlb_info_idx);
+#endif
+}
+
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned int stride_shift,
bool freed_tables)
{
+ struct flush_tlb_info *info;
+ u64 new_tlb_gen;
int cpu;
- struct flush_tlb_info info = {
- .mm = mm,
- .stride_shift = stride_shift,
- .freed_tables = freed_tables,
- };
-
cpu = get_cpu();
- /* This is also a barrier that synchronizes with switch_mm(). */
- info.new_tlb_gen = inc_mm_tlb_gen(mm);
-
/* Should we flush just the requested range? */
- if ((end != TLB_FLUSH_ALL) &&
- ((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) {
- info.start = start;
- info.end = end;
- } else {
- info.start = 0UL;
- info.end = TLB_FLUSH_ALL;
+ if ((end == TLB_FLUSH_ALL) ||
+ ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
+ start = 0;
+ end = TLB_FLUSH_ALL;
}
+ /* This is also a barrier that synchronizes with switch_mm(). */
+ new_tlb_gen = inc_mm_tlb_gen(mm);
+
+ info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
+ new_tlb_gen);
+
if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
- VM_WARN_ON(irqs_disabled());
+ lockdep_assert_irqs_enabled();
local_irq_disable();
- flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+ flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
local_irq_enable();
}
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), &info);
+ flush_tlb_others(mm_cpumask(mm), info);
+ put_flush_tlb_info();
put_cpu();
}
@@ -787,38 +826,48 @@ static void do_kernel_range_flush(void *info)
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
-
/* Balance as user space task's flush, a bit conservative */
if (end == TLB_FLUSH_ALL ||
(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
on_each_cpu(do_flush_tlb_all, NULL, 1);
} else {
- struct flush_tlb_info info;
- info.start = start;
- info.end = end;
- on_each_cpu(do_kernel_range_flush, &info, 1);
+ struct flush_tlb_info *info;
+
+ preempt_disable();
+ info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
+
+ on_each_cpu(do_kernel_range_flush, info, 1);
+
+ put_flush_tlb_info();
+ preempt_enable();
}
}
+/*
+ * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
+ * This means that the 'struct flush_tlb_info' that describes which mappings to
+ * flush is actually fixed. We therefore set a single fixed struct and use it in
+ * arch_tlbbatch_flush().
+ */
+static const struct flush_tlb_info full_flush_tlb_info = {
+ .mm = NULL,
+ .start = 0,
+ .end = TLB_FLUSH_ALL,
+};
+
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
- struct flush_tlb_info info = {
- .mm = NULL,
- .start = 0UL,
- .end = TLB_FLUSH_ALL,
- };
-
int cpu = get_cpu();
if (cpumask_test_cpu(cpu, &batch->cpumask)) {
- VM_WARN_ON(irqs_disabled());
+ lockdep_assert_irqs_enabled();
local_irq_disable();
- flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
+ flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
local_irq_enable();
}
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
- flush_tlb_others(&batch->cpumask, &info);
+ flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
cpumask_clear(&batch->cpumask);