summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/extable.c31
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/init.c11
-rw-r--r--arch/x86/mm/init_32.c28
-rw-r--r--arch/x86/mm/init_64.c63
-rw-r--r--arch/x86/mm/ioremap.c91
-rw-r--r--arch/x86/mm/k8topology_64.c101
-rw-r--r--arch/x86/mm/kmemcheck/error.c19
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c16
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h2
-rw-r--r--arch/x86/mm/kmmio.c57
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/mmio-mod.c71
-rw-r--r--arch/x86/mm/numa_32.c7
-rw-r--r--arch/x86/mm/numa_64.c506
-rw-r--r--arch/x86/mm/pageattr.c43
-rw-r--r--arch/x86/mm/pat.c23
-rw-r--r--arch/x86/mm/pgtable.c31
-rw-r--r--arch/x86/mm/setup_nx.c59
-rw-r--r--arch/x86/mm/srat_32.c2
-rw-r--r--arch/x86/mm/srat_64.c41
-rw-r--r--arch/x86/mm/testmmiotrace.c29
-rw-r--r--arch/x86/mm/tlb.c11
25 files changed, 734 insertions, 529 deletions
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 61b41ca3b5a2..d0474ad2a6e5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs)
return 0;
}
-
-#ifdef CONFIG_X86_64
-/*
- * Need to defined our own search_extable on X86_64 to work around
- * a B stepping K8 bug.
- */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- /* B stepping K8 bug */
- if ((value >> 32) == 0)
- value |= 0xffffffffUL << 32;
-
- while (first <= last) {
- const struct exception_table_entry *mid;
- long diff;
-
- mid = (last - first) / 2 + first;
- diff = mid->insn - value;
- if (diff == 0)
- return mid;
- else if (diff < 0)
- first = mid+1;
- else
- last = mid-1;
- }
- return NULL;
-}
-#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f4cee9028cf0..f62777940dfb 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
* Returns 0 if mmiotrace is disabled, or if the fault is not
* handled by mmiotrace:
*/
-static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+static inline int __kprobes
+kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
return 0;
}
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes notify_page_fault(struct pt_regs *regs)
{
int ret = 0;
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void)
*
* Handle a fault on the vmalloc or module mapping area
*/
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
{
unsigned long pgd_paddr;
pmd_t *pmd_k;
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void)
*
* This assumes no large pages in there.
*/
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
{
pgd_t *pgd, *pgd_ref;
pud_t *pud, *pud_ref;
@@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
show_fault_oops(regs, error_code, address);
stackend = end_of_stack(tsk);
- if (*stackend != STACK_END_MAGIC)
+ if (tsk != &init_task && *stackend != STACK_END_MAGIC)
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
tsk->thread.cr2 = address;
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
*/
-static noinline int
+static noinline __kprobes int
spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 71da1bca13cb..738e6593799d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
#else
/*
* With get_user_pages_fast, we walk down the pagetables without taking
- * any locks. For this we would like to load the pointers atoimcally,
+ * any locks. For this we would like to load the pointers atomically,
* but that is not possible (without expensive cmpxchg8b) on PAE. What
* we do have is the guarantee that a pte will only either go from not
* present to present, or present to not present or both -- it will not
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 73ffd5536f62..e71c5cbc8f35 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
use_gbpages = direct_gbpages;
#endif
- set_nx();
- if (nx_enabled)
- printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-
/* Enable PSE if available */
if (cpu_has_pse)
set_in_cr4(X86_CR4_PSE);
@@ -270,16 +266,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
if (!after_bootmem)
find_early_table_space(end, use_pse, use_gbpages);
-#ifdef CONFIG_X86_32
- for (i = 0; i < nr_range; i++)
- kernel_physical_mapping_init(mr[i].start, mr[i].end,
- mr[i].page_size_mask);
- ret = end;
-#else /* CONFIG_X86_64 */
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
mr[i].page_size_mask);
-#endif
#ifdef CONFIG_X86_32
early_ioremap_page_table_range_init();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 30938c1d8d5d..5cb3f0f54f47 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start,
unsigned long page_size_mask)
{
int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
+ unsigned long last_map_addr = end;
unsigned long start_pfn, end_pfn;
pgd_t *pgd_base = swapper_pg_dir;
int pgd_idx, pmd_idx, pte_ofs;
@@ -341,9 +342,10 @@ repeat:
prot = PAGE_KERNEL_EXEC;
pages_4k++;
- if (mapping_iter == 1)
+ if (mapping_iter == 1) {
set_pte(pte, pfn_pte(pfn, init_prot));
- else
+ last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
+ } else
set_pte(pte, pfn_pte(pfn, prot));
}
}
@@ -368,7 +370,7 @@ repeat:
mapping_iter = 2;
goto repeat;
}
- return 0;
+ return last_map_addr;
}
pte_t *kmap_pte;
@@ -412,7 +414,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
-static void __init add_one_highpage_init(struct page *page, int pfn)
+static void __init add_one_highpage_init(struct page *page)
{
ClearPageReserved(page);
init_page_count(page);
@@ -445,7 +447,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
- add_one_highpage_init(page, node_pfn);
+ add_one_highpage_init(page);
}
return 0;
@@ -703,8 +705,8 @@ void __init find_low_pfn_range(void)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn,
- unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
@@ -748,6 +750,7 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
+#ifndef CONFIG_NO_BOOTMEM
static unsigned long __init setup_node_bootmem(int nodeid,
unsigned long start_pfn,
unsigned long end_pfn,
@@ -764,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid,
printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
nodeid, bootmap, bootmap + bootmap_size);
free_bootmem_with_active_regions(nodeid, end_pfn);
- early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
return bootmap + bootmap_size;
}
+#endif
void __init setup_bootmem_allocator(void)
{
+#ifndef CONFIG_NO_BOOTMEM
int nodeid;
unsigned long bootmap_size, bootmap;
/*
@@ -782,11 +786,13 @@ void __init setup_bootmem_allocator(void)
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
+#endif
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
+#ifndef CONFIG_NO_BOOTMEM
for_each_online_node(nodeid) {
unsigned long start_pfn, end_pfn;
@@ -804,6 +810,7 @@ void __init setup_bootmem_allocator(void)
bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
bootmap);
}
+#endif
after_bootmem = 1;
}
@@ -892,8 +899,7 @@ void __init mem_init(void)
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10,
- (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
- );
+ totalhigh_pages << (PAGE_SHIFT-10));
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
@@ -997,7 +1003,7 @@ static noinline int do_test_wp_bit(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly __read_mostly;
void set_kernel_text_rw(void)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5a4398a6006b..e9b040e1cde5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -49,6 +49,7 @@
#include <asm/numa.h>
#include <asm/cacheflush.h>
#include <asm/init.h>
+#include <linux/bootmem.h>
static unsigned long dma_reserve __initdata;
@@ -568,8 +569,10 @@ kernel_physical_mapping_init(unsigned long start,
}
#ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
+#ifndef CONFIG_NO_BOOTMEM
unsigned long bootmap_size, bootmap;
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -577,13 +580,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
/* don't touch min_low_pfn */
bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
0, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
- early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
- reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
+#else
+ e820_register_active_regions(0, start_pfn, end_pfn);
+#endif
}
#endif
@@ -615,6 +620,21 @@ void __init paging_init(void)
*/
#ifdef CONFIG_MEMORY_HOTPLUG
/*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void update_end_of_memory_vars(u64 start, u64 size)
+{
+ unsigned long end_pfn = PFN_UP(start + size);
+
+ if (end_pfn > max_pfn) {
+ max_pfn = end_pfn;
+ max_low_pfn = end_pfn;
+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+ }
+}
+
+/*
* Memory is added always to NORMAL zone. This means you will never get
* additional DMA/DMA32 memory.
*/
@@ -633,6 +653,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
ret = __add_pages(nid, zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
+ /* update max_pfn, max_low_pfn and high_memory */
+ update_end_of_memory_vars(start, size);
+
return ret;
}
EXPORT_SYMBOL_GPL(arch_add_memory);
@@ -694,12 +717,12 @@ void __init mem_init(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly;
void set_kernel_text_rw(void)
{
- unsigned long start = PFN_ALIGN(_stext);
- unsigned long end = PFN_ALIGN(__start_rodata);
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly)
return;
@@ -707,13 +730,18 @@ void set_kernel_text_rw(void)
pr_debug("Set kernel text: %lx - %lx for read write\n",
start, end);
+ /*
+ * Make the kernel identity mapping for text RW. Kernel text
+ * mapping will always be RO. Refer to the comment in
+ * static_protections() in pageattr.c
+ */
set_memory_rw(start, (end - start) >> PAGE_SHIFT);
}
void set_kernel_text_ro(void)
{
- unsigned long start = PFN_ALIGN(_stext);
- unsigned long end = PFN_ALIGN(__start_rodata);
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly)
return;
@@ -721,14 +749,21 @@ void set_kernel_text_ro(void)
pr_debug("Set kernel text: %lx - %lx for read only\n",
start, end);
+ /*
+ * Set the kernel identity mapping for text RO.
+ */
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
}
void mark_rodata_ro(void)
{
- unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+ unsigned long start = PFN_ALIGN(_text);
unsigned long rodata_start =
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ unsigned long end = (unsigned long) &__end_rodata_hpage_align;
+ unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
+ unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
+ unsigned long data_start = (unsigned long) &_sdata;
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -751,6 +786,14 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: again\n");
set_memory_ro(start, (end-start) >> PAGE_SHIFT);
#endif
+
+ free_init_pages("unused kernel memory",
+ (unsigned long) page_address(virt_to_page(text_end)),
+ (unsigned long)
+ page_address(virt_to_page(rodata_start)));
+ free_init_pages("unused kernel memory",
+ (unsigned long) page_address(virt_to_page(rodata_end)),
+ (unsigned long) page_address(virt_to_page(data_start)));
}
#endif
@@ -934,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
if (pmd_none(*pmd)) {
pte_t entry;
- p = vmemmap_alloc_block(PMD_SIZE, node);
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (!p)
return -ENOMEM;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 334e63ca7b2b..5eb1ba74a3a9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,43 +24,6 @@
#include "physaddr.h"
-int page_is_ram(unsigned long pagenr)
-{
- resource_size_t addr, end;
- int i;
-
- /*
- * A special case is the first 4Kb of memory;
- * This is a BIOS owned area, not kernel ram, but generally
- * not listed as such in the E820 table.
- */
- if (pagenr == 0)
- return 0;
-
- /*
- * Second special case: Some BIOSen report the PC BIOS
- * area (640->1Mb) as ram even though it is not.
- */
- if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
- pagenr < (BIOS_END >> PAGE_SHIFT))
- return 0;
-
- for (i = 0; i < e820.nr_map; i++) {
- /*
- * Not usable memory:
- */
- if (e820.map[i].type != E820_RAM)
- continue;
- addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
-
-
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
-}
-
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
@@ -170,8 +133,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
(unsigned long long)phys_addr,
(unsigned long long)(phys_addr + size),
prot_val, new_prot_val);
- free_memtype(phys_addr, phys_addr + size);
- return NULL;
+ goto err_free_memtype;
}
prot_val = new_prot_val;
}
@@ -197,26 +159,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
*/
area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (!area)
- return NULL;
+ goto err_free_memtype;
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
- if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
- free_memtype(phys_addr, phys_addr + size);
- free_vm_area(area);
- return NULL;
- }
+ if (kernel_map_sync_memtype(phys_addr, size, prot_val))
+ goto err_free_area;
- if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
- free_memtype(phys_addr, phys_addr + size);
- free_vm_area(area);
- return NULL;
- }
+ if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
+ goto err_free_area;
ret_addr = (void __iomem *) (vaddr + offset);
mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
return ret_addr;
+err_free_area:
+ free_vm_area(area);
+err_free_memtype:
+ free_memtype(phys_addr, phys_addr + size);
+ return NULL;
}
/**
@@ -283,30 +244,6 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
}
EXPORT_SYMBOL(ioremap_cache);
-static void __iomem *ioremap_default(resource_size_t phys_addr,
- unsigned long size)
-{
- unsigned long flags;
- void __iomem *ret;
- int err;
-
- /*
- * - WB for WB-able memory and no other conflicting mappings
- * - UC_MINUS for non-WB-able memory with no other conflicting mappings
- * - Inherit from confliting mappings otherwise
- */
- err = reserve_memtype(phys_addr, phys_addr + size,
- _PAGE_CACHE_WB, &flags);
- if (err < 0)
- return NULL;
-
- ret = __ioremap_caller(phys_addr, size, flags,
- __builtin_return_address(0));
-
- free_memtype(phys_addr, phys_addr + size);
- return ret;
-}
-
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
unsigned long prot_val)
{
@@ -382,7 +319,7 @@ void *xlate_dev_mem_ptr(unsigned long phys)
if (page_is_ram(start >> PAGE_SHIFT))
return __va(phys);
- addr = (void __force *)ioremap_default(start, PAGE_SIZE);
+ addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
if (addr)
addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
@@ -448,6 +385,10 @@ void __init early_ioremap_init(void)
* The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
+#define __FIXADDR_TOP (-PAGE_SIZE)
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+#undef __FIXADDR_TOP
if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
printk(KERN_WARNING "pmd %p != %p\n",
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 268f8255280f..970ed579d4e4 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -24,6 +24,9 @@
#include <asm/apic.h>
#include <asm/k8.h>
+static struct bootnode __initdata nodes[8];
+static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
+
static __init int find_northbridge(void)
{
int num;
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void)
* need to get boot_cpu_id so can use that to create apicid_to_node
* in k8_scan_nodes()
*/
- /*
- * Find possible boot-time SMP configuration:
- */
-#ifdef CONFIG_X86_MPPARSE
- early_find_smp_config();
-#endif
-#ifdef CONFIG_ACPI
- /*
- * Read APIC information from ACPI tables.
- */
- early_acpi_boot_init();
-#endif
#ifdef CONFIG_X86_MPPARSE
/*
* get boot-time SMP configuration:
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void)
early_init_lapic_mapping();
}
-int __init k8_scan_nodes(unsigned long start, unsigned long end)
+int __init k8_get_nodes(struct bootnode *physnodes)
{
- unsigned numnodes, cores, bits, apicid_base;
+ int i;
+ int ret = 0;
+
+ for_each_node_mask(i, nodes_parsed) {
+ physnodes[ret].start = nodes[i].start;
+ physnodes[ret].end = nodes[i].end;
+ ret++;
+ }
+ return ret;
+}
+
+int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+ unsigned long start = PFN_PHYS(start_pfn);
+ unsigned long end = PFN_PHYS(end_pfn);
+ unsigned numnodes;
unsigned long prevbase;
- struct bootnode nodes[8];
- int i, j, nb, found = 0;
+ int i, nb, found = 0;
u32 nodeid, reg;
if (!early_pci_allowed())
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (nb < 0)
return nb;
- printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
+ pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1)
return -1;
- printk(KERN_INFO "Number of nodes %d\n", numnodes);
+ pr_info("Number of physical nodes %d\n", numnodes);
- memset(&nodes, 0, sizeof(nodes));
prevbase = 0;
for (i = 0; i < 8; i++) {
unsigned long base, limit;
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodeid = limit & 7;
if ((base & 3) == 0) {
if (i < numnodes)
- printk("Skipping disabled node %d\n", i);
+ pr_info("Skipping disabled node %d\n", i);
continue;
}
if (nodeid >= numnodes) {
- printk("Ignoring excess node %d (%lx:%lx)\n", nodeid,
- base, limit);
+ pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
+ base, limit);
continue;
}
if (!limit) {
- printk(KERN_INFO "Skipping node entry %d (base %lx)\n",
- i, base);
+ pr_info("Skipping node entry %d (base %lx)\n",
+ i, base);
continue;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
- printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
- nodeid, (base>>8)&3, (limit>>8) & 3);
+ pr_err("Node %d using interleaving mode %lx/%lx\n",
+ nodeid, (base >> 8) & 3, (limit >> 8) & 3);
return -1;
}
- if (node_isset(nodeid, node_possible_map)) {
- printk(KERN_INFO "Node %d already present. Skipping\n",
- nodeid);
+ if (node_isset(nodeid, nodes_parsed)) {
+ pr_info("Node %d already present, skipping\n",
+ nodeid);
continue;
}
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
limit |= (1<<24)-1;
limit++;
- if (limit > max_pfn << PAGE_SHIFT)
- limit = max_pfn << PAGE_SHIFT;
+ if (limit > end)
+ limit = end;
if (limit <= base)
continue;
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (limit > end)
limit = end;
if (limit == base) {
- printk(KERN_ERR "Empty node %d\n", nodeid);
+ pr_err("Empty node %d\n", nodeid);
continue;
}
if (limit < base) {
- printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
+ pr_err("Node %d bogus settings %lx-%lx.\n",
nodeid, base, limit);
continue;
}
/* Could sort here, but pun for now. Should not happen anyroads. */
if (prevbase > base) {
- printk(KERN_ERR "Node map not sorted %lx,%lx\n",
+ pr_err("Node map not sorted %lx,%lx\n",
prevbase, base);
return -1;
}
- printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
- nodeid, base, limit);
+ pr_info("Node %d MemBase %016lx Limit %016lx\n",
+ nodeid, base, limit);
found++;
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = base;
- node_set(nodeid, node_possible_map);
+ node_set(nodeid, nodes_parsed);
}
if (!found)
return -1;
+ return 0;
+}
+
+int __init k8_scan_nodes(void)
+{
+ unsigned int bits;
+ unsigned int cores;
+ unsigned int apicid_base;
+ int i;
+ BUG_ON(nodes_empty(nodes_parsed));
+ node_possible_map = nodes_parsed;
memnode_shift = compute_hash_shift(nodes, 8, NULL);
if (memnode_shift < 0) {
- printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
+ pr_err("No NUMA node hash function found. Contact maintainer\n");
return -1;
}
- printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
+ pr_info("Using node hash shift of %d\n", memnode_shift);
/* use the coreid bits from early_identify_cpu */
bits = boot_cpu_data.x86_coreid_bits;
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
/* need to get boot_cpu_id early for system with apicid lifting */
early_get_boot_cpu_id();
if (boot_cpu_physical_apicid > 0) {
- printk(KERN_INFO "BSP APIC ID: %02x\n",
- boot_cpu_physical_apicid);
+ pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
apicid_base = boot_cpu_physical_apicid;
}
- for (i = 0; i < 8; i++) {
- if (nodes[i].start == nodes[i].end)
- continue;
+ for_each_node_mask(i, node_possible_map) {
+ int j;
e820_register_active_regions(i,
nodes[i].start >> PAGE_SHIFT,
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index 4901d0dafda6..af3b6c8a436f 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -106,26 +106,25 @@ void kmemcheck_error_recall(void)
switch (e->type) {
case KMEMCHECK_ERROR_INVALID_ACCESS:
- printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read "
- "from %s memory (%p)\n",
+ printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n",
8 * e->size, e->state < ARRAY_SIZE(desc) ?
desc[e->state] : "(invalid shadow state)",
(void *) e->address);
- printk(KERN_INFO);
+ printk(KERN_WARNING);
for (i = 0; i < SHADOW_COPY_SIZE; ++i)
- printk("%02x", e->memory_copy[i]);
- printk("\n");
+ printk(KERN_CONT "%02x", e->memory_copy[i]);
+ printk(KERN_CONT "\n");
- printk(KERN_INFO);
+ printk(KERN_WARNING);
for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
- printk(" %c", short_desc[e->shadow_copy[i]]);
+ printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]);
else
- printk(" ?");
+ printk(KERN_CONT " ?");
}
- printk("\n");
- printk(KERN_INFO "%*c\n", 2 + 2
+ printk(KERN_CONT "\n");
+ printk(KERN_WARNING "%*c\n", 2 + 2
* (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
break;
case KMEMCHECK_ERROR_BUG:
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 8cc183344140..b3b531a4f8e5 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
if (!shadow)
return true;
- status = kmemcheck_shadow_test(shadow, size);
+ status = kmemcheck_shadow_test_all(shadow, size);
return status == KMEMCHECK_SHADOW_INITIALIZED;
}
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index 3f66b82076a3..aec124214d97 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
{
+#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
uint8_t *x;
unsigned int i;
x = shadow;
-#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
/*
* Make sure _some_ bytes are initialized. Gcc frequently generates
* code to access neighboring bytes.
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
return x[i];
}
+
+ return x[0];
#else
+ return kmemcheck_shadow_test_all(shadow, size);
+#endif
+}
+
+enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
+{
+ uint8_t *x;
+ unsigned int i;
+
+ x = shadow;
+
/* All bytes must be initialized. */
for (i = 0; i < size; ++i) {
if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
return x[i];
}
-#endif
return x[0];
}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
index af46d9ab9d86..ff0b2f70fbcb 100644
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -11,6 +11,8 @@ enum kmemcheck_shadow {
void *kmemcheck_shadow_lookup(unsigned long address);
enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
+enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
+ unsigned int size);
void kmemcheck_shadow_set(void *shadow, unsigned int size);
#endif
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..536fb6823366 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -5,6 +5,8 @@
* 2008 Pekka Paalanen <pq@iki.fi>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/spinlock.h>
@@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
pte_t *pte = lookup_address(f->page, &level);
if (!pte) {
- pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
+ pr_err("no pte for page 0x%08lx\n", f->page);
return -1;
}
@@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
clear_pte_presence(pte, clear, &f->old_presence);
break;
default:
- pr_err("kmmio: unexpected page level 0x%x.\n", level);
+ pr_err("unexpected page level 0x%x.\n", level);
return -1;
}
@@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
{
int ret;
- WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
+ WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) {
- pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
- f->page, f->count, !!f->old_presence);
+ pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
+ f->page, f->count, !!f->old_presence);
}
ret = clear_page_presence(f, true);
- WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
+ WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
+ f->page);
f->armed = true;
return ret;
}
@@ -203,7 +206,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
*/
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate
- * and they remain disabled thorough out this function.
+ * and they remain disabled throughout this function.
*/
int kmmio_handler(struct pt_regs *regs, unsigned long addr)
{
@@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
* condition needs handling by do_page_fault(), the
* page really not being present is the most common.
*/
- pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
- addr, smp_processor_id());
+ pr_debug("secondary hit for 0x%08lx CPU %d.\n",
+ addr, smp_processor_id());
if (!faultpage->old_presence)
- pr_info("kmmio: unexpected secondary hit for "
- "address 0x%08lx on CPU %d.\n", addr,
- smp_processor_id());
+ pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
+ addr, smp_processor_id());
} else {
/*
* Prevent overwriting already in-flight context.
* This should not happen, let's hope disarming at
* least prevents a panic.
*/
- pr_emerg("kmmio: recursive probe hit on CPU %d, "
- "for address 0x%08lx. Ignoring.\n",
- smp_processor_id(), addr);
- pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
- ctx->addr);
+ pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
+ smp_processor_id(), addr);
+ pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
disarm_kmmio_fault_page(faultpage);
}
goto no_kmmio_ctx;
@@ -302,7 +302,7 @@ no_kmmio:
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate
- * and they remain disabled thorough out this function.
+ * and they remain disabled throughout this function.
* This must always get called as the pair to kmmio_handler().
*/
static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
@@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
* something external causing them (f.e. using a debugger while
* mmio tracing enabled), or erroneous behaviour
*/
- pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
- smp_processor_id());
+ pr_warning("unexpected debug trap on CPU %d.\n",
+ smp_processor_id());
goto out;
}
@@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
if (add_kmmio_fault_page(p->addr + size))
- pr_err("kmmio: Unable to set page fault.\n");
+ pr_err("Unable to set page fault.\n");
size += PAGE_SIZE;
}
out:
@@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
* 2. remove_kmmio_fault_pages()
* Remove the pages from kmmio_page_table.
* 3. rcu_free_kmmio_fault_pages()
- * Actally free the kmmio_fault_page structs as with RCU.
+ * Actually free the kmmio_fault_page structs as with RCU.
*/
void unregister_kmmio_probe(struct kmmio_probe *p)
{
@@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
if (!drelease) {
- pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+ pr_crit("leaking kmmio_fault_page objects.\n");
return;
}
drelease->release_list = release_list;
@@ -538,10 +538,17 @@ static int
kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
{
struct die_args *arg = args;
+ unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
- if (val == DIE_DEBUG && (arg->err & DR_STEP))
- if (post_kmmio_handler(arg->err, arg->regs) == 1)
+ if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
+ if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ *dr6_p &= ~DR_STEP;
return NOTIFY_STOP;
+ }
return NOTIFY_DONE;
}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c8191defc38a..1dab5194fd9d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void)
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+ if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void)
static unsigned long mmap_base(void)
{
- unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+ unsigned long gap = rlimit(RLIMIT_STACK);
if (gap < MIN_GAP)
gap = MIN_GAP;
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 132772a8ec57..34a3291ca103 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -19,6 +19,9 @@
*
* Derived from the read-mod example from relay-examples by Tom Zanussi.
*/
+
+#define pr_fmt(fmt) "mmiotrace: " fmt
+
#define DEBUG 1
#include <linux/module.h>
@@ -36,8 +39,6 @@
#include "pf_in.h"
-#define NAME "mmiotrace: "
-
struct trap_reason {
unsigned long addr;
unsigned long ip;
@@ -96,17 +97,18 @@ static void print_pte(unsigned long address)
pte_t *pte = lookup_address(address, &level);
if (!pte) {
- pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
- __func__, address);
+ pr_err("Error in %s: no pte for page 0x%08lx\n",
+ __func__, address);
return;
}
if (level == PG_LEVEL_2M) {
- pr_emerg(NAME "4MB pages are not currently supported: "
- "0x%08lx\n", address);
+ pr_emerg("4MB pages are not currently supported: 0x%08lx\n",
+ address);
BUG();
}
- pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
+ pr_info("pte for 0x%lx: 0x%llx 0x%llx\n",
+ address,
(unsigned long long)pte_val(*pte),
(unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
}
@@ -118,22 +120,21 @@ static void print_pte(unsigned long address)
static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
{
const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
- pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
- "last fault for address: 0x%08lx\n",
- addr, my_reason->addr);
+ pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n",
+ addr, my_reason->addr);
print_pte(addr);
print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
#ifdef __i386__
pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->ax, regs->bx, regs->cx, regs->dx);
+ regs->ax, regs->bx, regs->cx, regs->dx);
pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->si, regs->di, regs->bp, regs->sp);
+ regs->si, regs->di, regs->bp, regs->sp);
#else
pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
- regs->ax, regs->cx, regs->dx);
+ regs->ax, regs->cx, regs->dx);
pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
- regs->si, regs->di, regs->bp, regs->sp);
+ regs->si, regs->di, regs->bp, regs->sp);
#endif
put_cpu_var(pf_reason);
BUG();
@@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition,
/* this should always return the active_trace count to 0 */
my_reason->active_traces--;
if (my_reason->active_traces) {
- pr_emerg(NAME "unexpected post handler");
+ pr_emerg("unexpected post handler");
BUG();
}
@@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
};
if (!trace) {
- pr_err(NAME "kmalloc failed in ioremap\n");
+ pr_err("kmalloc failed in ioremap\n");
return;
}
@@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
if (!is_enabled()) /* recheck and proper locking in *_core() */
return;
- pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
- (unsigned long long)offset, size, addr);
+ pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n",
+ (unsigned long long)offset, size, addr);
if ((filter_offset) && (offset != filter_offset))
return;
ioremap_trace_core(offset, size, addr);
@@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr)
struct remap_trace *tmp;
struct remap_trace *found_trace = NULL;
- pr_debug(NAME "Unmapping %p.\n", addr);
+ pr_debug("Unmapping %p.\n", addr);
spin_lock_irq(&trace_lock);
if (!is_enabled())
@@ -363,9 +364,8 @@ static void clear_trace_list(void)
* Caller also ensures is_enabled() cannot change.
*/
list_for_each_entry(trace, &trace_list, list) {
- pr_notice(NAME "purging non-iounmapped "
- "trace @0x%08lx, size 0x%lx.\n",
- trace->probe.addr, trace->probe.len);
+ pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n",
+ trace->probe.addr, trace->probe.len);
if (!nommiotrace)
unregister_kmmio_probe(&trace->probe);
}
@@ -387,7 +387,7 @@ static void enter_uniprocessor(void)
if (downed_cpus == NULL &&
!alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
- pr_notice(NAME "Failed to allocate mask\n");
+ pr_notice("Failed to allocate mask\n");
goto out;
}
@@ -395,20 +395,19 @@ static void enter_uniprocessor(void)
cpumask_copy(downed_cpus, cpu_online_mask);
cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
if (num_online_cpus() > 1)
- pr_notice(NAME "Disabling non-boot CPUs...\n");
+ pr_notice("Disabling non-boot CPUs...\n");
put_online_cpus();
for_each_cpu(cpu, downed_cpus) {
err = cpu_down(cpu);
if (!err)
- pr_info(NAME "CPU%d is down.\n", cpu);
+ pr_info("CPU%d is down.\n", cpu);
else
- pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
+ pr_err("Error taking CPU%d down: %d\n", cpu, err);
}
out:
if (num_online_cpus() > 1)
- pr_warning(NAME "multiple CPUs still online, "
- "may miss events.\n");
+ pr_warning("multiple CPUs still online, may miss events.\n");
}
/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
@@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void)
if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
return;
- pr_notice(NAME "Re-enabling CPUs...\n");
+ pr_notice("Re-enabling CPUs...\n");
for_each_cpu(cpu, downed_cpus) {
err = cpu_up(cpu);
if (!err)
- pr_info(NAME "enabled CPU%d.\n", cpu);
+ pr_info("enabled CPU%d.\n", cpu);
else
- pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
+ pr_err("cannot re-enable CPU%d: %d\n", cpu, err);
}
}
@@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void)
static void enter_uniprocessor(void)
{
if (num_online_cpus() > 1)
- pr_warning(NAME "multiple CPUs are online, may miss events. "
- "Suggest booting with maxcpus=1 kernel argument.\n");
+ pr_warning("multiple CPUs are online, may miss events. "
+ "Suggest booting with maxcpus=1 kernel argument.\n");
}
static void leave_uniprocessor(void)
@@ -450,13 +449,13 @@ void enable_mmiotrace(void)
goto out;
if (nommiotrace)
- pr_info(NAME "MMIO tracing disabled.\n");
+ pr_info("MMIO tracing disabled.\n");
kmmio_init();
enter_uniprocessor();
spin_lock_irq(&trace_lock);
atomic_inc(&mmiotrace_enabled);
spin_unlock_irq(&trace_lock);
- pr_info(NAME "enabled.\n");
+ pr_info("enabled.\n");
out:
mutex_unlock(&mmiotrace_mutex);
}
@@ -475,7 +474,7 @@ void disable_mmiotrace(void)
clear_trace_list(); /* guarantees: no more kmmio callbacks */
leave_uniprocessor();
kmmio_cleanup();
- pr_info(NAME "disabled.\n");
+ pr_info("disabled.\n");
out:
mutex_unlock(&mmiotrace_mutex);
}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index d2530062fe00..809baaaf48b1 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid)
(ulong) node_remap_end_vaddr[nid]);
}
-void __init initmem_init(unsigned long start_pfn,
- unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
int nid;
long kva_target_pfn;
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn,
for_each_online_node(nid) {
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+ NODE_DATA(nid)->node_id = nid;
+#ifndef CONFIG_NO_BOOTMEM
NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
}
setup_bootmem_allocator();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 459913beac71..8948f47fde05 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
unsigned long end, unsigned long size,
unsigned long align)
{
- unsigned long mem = find_e820_area(start, end, size, align);
- void *ptr;
+ unsigned long mem;
+ /*
+ * put it on high as possible
+ * something will go with NODE_DATA
+ */
+ if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
+ start = MAX_DMA_PFN<<PAGE_SHIFT;
+ if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
+ end > (MAX_DMA32_PFN<<PAGE_SHIFT))
+ start = MAX_DMA32_PFN<<PAGE_SHIFT;
+ mem = find_e820_area(start, end, size, align);
+ if (mem != -1L)
+ return __va(mem);
+
+ /* extend the search scope */
+ end = max_pfn_mapped << PAGE_SHIFT;
+ if (end > (MAX_DMA32_PFN<<PAGE_SHIFT))
+ start = MAX_DMA32_PFN<<PAGE_SHIFT;
+ else
+ start = MAX_DMA_PFN<<PAGE_SHIFT;
+ mem = find_e820_area(start, end, size, align);
if (mem != -1L)
return __va(mem);
- ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
- if (ptr == NULL) {
- printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
+ printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
size, nodeid);
- return NULL;
- }
- return ptr;
+
+ return NULL;
}
/* Initialize bootmem allocator for a node */
void __init
setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
{
- unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+ unsigned long start_pfn, last_pfn, nodedata_phys;
const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
- unsigned long bootmap_start, nodedata_phys;
- void *bootmap;
int nid;
+#ifndef CONFIG_NO_BOOTMEM
+ unsigned long bootmap_start, bootmap_pages, bootmap_size;
+ void *bootmap;
+#endif
if (!end)
return;
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
start = roundup(start, ZONE_ALIGN);
- printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
+ printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
start, end);
start_pfn = start >> PAGE_SHIFT;
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
if (node_data[nodeid] == NULL)
return;
nodedata_phys = __pa(node_data[nodeid]);
+ reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
nodedata_phys + pgdat_size - 1);
+ nid = phys_to_nid(nodedata_phys);
+ if (nid != nodeid)
+ printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
- NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+ NODE_DATA(nodeid)->node_id = nodeid;
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
+#ifndef CONFIG_NO_BOOTMEM
+ NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+
/*
* Find a place for the bootmem map
* nodedata_phys could be on other nodes by alloc_bootmem,
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
* of alloc_bootmem, that could clash with reserved range
*/
bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
- nid = phys_to_nid(nodedata_phys);
- if (nid == nodeid)
- bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
- else
- bootmap_start = roundup(start, PAGE_SIZE);
+ bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
/*
* SMP_CACHE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
@@ -239,12 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
bootmap = early_node_mem(nodeid, bootmap_start, end,
bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
if (bootmap == NULL) {
- if (nodedata_phys < start || nodedata_phys >= end)
- free_bootmem(nodedata_phys, pgdat_size);
+ free_early(nodedata_phys, nodedata_phys + pgdat_size);
node_data[nodeid] = NULL;
return;
}
bootmap_start = __pa(bootmap);
+ reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT),
+ "BOOTMAP");
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
bootmap_start >> PAGE_SHIFT,
@@ -253,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
bootmap_start, bootmap_start + bootmap_size - 1,
bootmap_pages);
-
- free_bootmem_with_active_regions(nodeid, end);
-
- /*
- * convert early reserve to bootmem reserve earlier
- * otherwise early_node_mem could use early reserved mem
- * on previous node
- */
- early_res_to_bootmem(start, end);
-
- /*
- * in some case early_node_mem could use alloc_bootmem
- * to get range on other node, don't reserve that again
- */
- if (nid != nodeid)
- printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
- else
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
- pgdat_size, BOOTMEM_DEFAULT);
nid = phys_to_nid(bootmap_start);
if (nid != nodeid)
printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
- else
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
- bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+
+ free_bootmem_with_active_regions(nodeid, end);
+#endif
node_set_online(nodeid);
}
@@ -306,8 +309,71 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
+static struct bootnode nodes[MAX_NUMNODES] __initdata;
+static struct bootnode physnodes[MAX_NUMNODES] __initdata;
static char *cmdline __initdata;
+static int __init setup_physnodes(unsigned long start, unsigned long end,
+ int acpi, int k8)
+{
+ int nr_nodes = 0;
+ int ret = 0;
+ int i;
+
+#ifdef CONFIG_ACPI_NUMA
+ if (acpi)
+ nr_nodes = acpi_get_nodes(physnodes);
+#endif
+#ifdef CONFIG_K8_NUMA
+ if (k8)
+ nr_nodes = k8_get_nodes(physnodes);
+#endif
+ /*
+ * Basic sanity checking on the physical node map: there may be errors
+ * if the SRAT or K8 incorrectly reported the topology or the mem=
+ * kernel parameter is used.
+ */
+ for (i = 0; i < nr_nodes; i++) {
+ if (physnodes[i].start == physnodes[i].end)
+ continue;
+ if (physnodes[i].start > end) {
+ physnodes[i].end = physnodes[i].start;
+ continue;
+ }
+ if (physnodes[i].end < start) {
+ physnodes[i].start = physnodes[i].end;
+ continue;
+ }
+ if (physnodes[i].start < start)
+ physnodes[i].start = start;
+ if (physnodes[i].end > end)
+ physnodes[i].end = end;
+ }
+
+ /*
+ * Remove all nodes that have no memory or were truncated because of the
+ * limited address range.
+ */
+ for (i = 0; i < nr_nodes; i++) {
+ if (physnodes[i].start == physnodes[i].end)
+ continue;
+ physnodes[ret].start = physnodes[i].start;
+ physnodes[ret].end = physnodes[i].end;
+ ret++;
+ }
+
+ /*
+ * If no physical topology was detected, a single node is faked to cover
+ * the entire address space.
+ */
+ if (!ret) {
+ physnodes[ret].start = start;
+ physnodes[ret].end = end;
+ ret = 1;
+ }
+ return ret;
+}
+
/*
* Setups up nid to range from addr to addr + size. If the end
* boundary is greater than max_addr, then max_addr is used instead.
@@ -315,11 +381,9 @@ static char *cmdline __initdata;
* allocation past addr and -1 otherwise. addr is adjusted to be at
* the end of the node.
*/
-static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
- u64 size, u64 max_addr)
+static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
{
int ret = 0;
-
nodes[nid].start = *addr;
*addr += size;
if (*addr >= max_addr) {
@@ -335,167 +399,234 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
}
/*
- * Splits num_nodes nodes up equally starting at node_start. The return value
- * is the number of nodes split up and addr is adjusted to be at the end of the
- * last node allocated.
+ * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
+ * to max_addr. The return value is the number of nodes allocated.
*/
-static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
- u64 max_addr, int node_start,
- int num_nodes)
+static int __init split_nodes_interleave(u64 addr, u64 max_addr,
+ int nr_phys_nodes, int nr_nodes)
{
- unsigned int big;
+ nodemask_t physnode_mask = NODE_MASK_NONE;
u64 size;
+ int big;
+ int ret = 0;
int i;
- if (num_nodes <= 0)
+ if (nr_nodes <= 0)
return -1;
- if (num_nodes > MAX_NUMNODES)
- num_nodes = MAX_NUMNODES;
- size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
- num_nodes;
+ if (nr_nodes > MAX_NUMNODES) {
+ pr_info("numa=fake=%d too large, reducing to %d\n",
+ nr_nodes, MAX_NUMNODES);
+ nr_nodes = MAX_NUMNODES;
+ }
+
+ size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes;
/*
* Calculate the number of big nodes that can be allocated as a result
- * of consolidating the leftovers.
+ * of consolidating the remainder.
*/
- big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
- FAKE_NODE_MIN_SIZE;
+ big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
+ FAKE_NODE_MIN_SIZE;
- /* Round down to nearest FAKE_NODE_MIN_SIZE. */
size &= FAKE_NODE_MIN_HASH_MASK;
if (!size) {
- printk(KERN_ERR "Not enough memory for each node. "
- "NUMA emulation disabled.\n");
+ pr_err("Not enough memory for each node. "
+ "NUMA emulation disabled.\n");
return -1;
}
- for (i = node_start; i < num_nodes + node_start; i++) {
- u64 end = *addr + size;
+ for (i = 0; i < nr_phys_nodes; i++)
+ if (physnodes[i].start != physnodes[i].end)
+ node_set(i, physnode_mask);
- if (i < big)
- end += FAKE_NODE_MIN_SIZE;
- /*
- * The final node can have the remaining system RAM. Other
- * nodes receive roughly the same amount of available pages.
- */
- if (i == num_nodes + node_start - 1)
- end = max_addr;
- else
- while (end - *addr - e820_hole_size(*addr, end) <
- size) {
+ /*
+ * Continue to fill physical nodes with fake nodes until there is no
+ * memory left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 end = physnodes[i].start + size;
+ u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
+
+ if (ret < big)
+ end += FAKE_NODE_MIN_SIZE;
+
+ /*
+ * Continue to add memory to this fake node if its
+ * non-reserved memory is less than the per-node size.
+ */
+ while (end - physnodes[i].start -
+ e820_hole_size(physnodes[i].start, end) < size) {
end += FAKE_NODE_MIN_SIZE;
- if (end > max_addr) {
- end = max_addr;
+ if (end > physnodes[i].end) {
+ end = physnodes[i].end;
break;
}
}
- if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
- break;
+
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (physnodes[i].end - end -
+ e820_hole_size(end, physnodes[i].end) < size)
+ end = physnodes[i].end;
+
+ /*
+ * Avoid allocating more nodes than requested, which can
+ * happen as a result of rounding down each node's size
+ * to FAKE_NODE_MIN_SIZE.
+ */
+ if (nodes_weight(physnode_mask) + ret >= nr_nodes)
+ end = physnodes[i].end;
+
+ if (setup_node_range(ret++, &physnodes[i].start,
+ end - physnodes[i].start,
+ physnodes[i].end) < 0)
+ node_clear(i, physnode_mask);
+ }
}
- return i - node_start + 1;
+ return ret;
}
/*
- * Splits the remaining system RAM into chunks of size. The remaining memory is
- * always assigned to a final node and can be asymmetric. Returns the number of
- * nodes split.
+ * Returns the end address of a node so that there is at least `size' amount of
+ * non-reserved memory or `max_addr' is reached.
*/
-static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
- u64 max_addr, int node_start, u64 size)
+static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
{
- int i = node_start;
- size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
- while (!setup_node_range(i++, nodes, addr, size, max_addr))
- ;
- return i - node_start;
+ u64 end = start + size;
+
+ while (end - start - e820_hole_size(start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > max_addr) {
+ end = max_addr;
+ break;
+ }
+ }
+ return end;
}
/*
- * Sets up the system RAM area from start_pfn to last_pfn according to the
- * numa=fake command-line option.
+ * Sets up fake nodes of `size' interleaved over physical nodes ranging from
+ * `addr' to `max_addr'. The return value is the number of nodes allocated.
*/
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
-
-static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn)
+static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
{
- u64 size, addr = start_pfn << PAGE_SHIFT;
- u64 max_addr = last_pfn << PAGE_SHIFT;
- int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 min_size;
+ int ret = 0;
+ int i;
- memset(&nodes, 0, sizeof(nodes));
+ if (!size)
+ return -1;
/*
- * If the numa=fake command-line is just a single number N, split the
- * system RAM into N fake nodes.
+ * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
+ * increased accordingly if the requested size is too small. This
+ * creates a uniform distribution of node sizes across the entire
+ * machine (but not necessarily over physical nodes).
*/
- if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
- long n = simple_strtol(cmdline, NULL, 0);
-
- num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n);
- if (num_nodes < 0)
- return num_nodes;
- goto out;
+ min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) /
+ MAX_NUMNODES;
+ min_size = max(min_size, FAKE_NODE_MIN_SIZE);
+ if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
+ min_size = (min_size + FAKE_NODE_MIN_SIZE) &
+ FAKE_NODE_MIN_HASH_MASK;
+ if (size < min_size) {
+ pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
+ size >> 20, min_size >> 20);
+ size = min_size;
}
+ size &= FAKE_NODE_MIN_HASH_MASK;
- /* Parse the command line. */
- for (coeff_flag = 0; ; cmdline++) {
- if (*cmdline && isdigit(*cmdline)) {
- num = num * 10 + *cmdline - '0';
- continue;
- }
- if (*cmdline == '*') {
- if (num > 0)
- coeff = num;
- coeff_flag = 1;
- }
- if (!*cmdline || *cmdline == ',') {
- if (!coeff_flag)
- coeff = 1;
+ for (i = 0; i < MAX_NUMNODES; i++)
+ if (physnodes[i].start != physnodes[i].end)
+ node_set(i, physnode_mask);
+ /*
+ * Fill physical nodes with fake nodes of size until there is no memory
+ * left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+ u64 end;
+
+ end = find_end_of_node(physnodes[i].start,
+ physnodes[i].end, size);
/*
- * Round down to the nearest FAKE_NODE_MIN_SIZE.
- * Command-line coefficients are in megabytes.
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
*/
- size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
- if (size)
- for (i = 0; i < coeff; i++, num_nodes++)
- if (setup_node_range(num_nodes, nodes,
- &addr, size, max_addr) < 0)
- goto done;
- if (!*cmdline)
- break;
- coeff_flag = 0;
- coeff = -1;
+ if (end < dma32_end && dma32_end - end -
+ e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (physnodes[i].end - end -
+ e820_hole_size(end, physnodes[i].end) < size)
+ end = physnodes[i].end;
+
+ /*
+ * Setup the fake node that will be allocated as bootmem
+ * later. If setup_node_range() returns non-zero, there
+ * is no more memory available on this physical node.
+ */
+ if (setup_node_range(ret++, &physnodes[i].start,
+ end - physnodes[i].start,
+ physnodes[i].end) < 0)
+ node_clear(i, physnode_mask);
}
- num = 0;
}
-done:
- if (!num_nodes)
- return -1;
- /* Fill remainder of system RAM, if appropriate. */
- if (addr < max_addr) {
- if (coeff_flag && coeff < 0) {
- /* Split remaining nodes into num-sized chunks */
- num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
- num_nodes, num);
- goto out;
- }
- switch (*(cmdline - 1)) {
- case '*':
- /* Split remaining nodes into coeff chunks */
- if (coeff <= 0)
- break;
- num_nodes += split_nodes_equally(nodes, &addr, max_addr,
- num_nodes, coeff);
- break;
- case ',':
- /* Do not allocate remaining system RAM */
- break;
- default:
- /* Give one final node */
- setup_node_range(num_nodes, nodes, &addr,
- max_addr - addr, max_addr);
- num_nodes++;
- }
+ return ret;
+}
+
+/*
+ * Sets up the system RAM area from start_pfn to last_pfn according to the
+ * numa=fake command-line option.
+ */
+static int __init numa_emulation(unsigned long start_pfn,
+ unsigned long last_pfn, int acpi, int k8)
+{
+ u64 addr = start_pfn << PAGE_SHIFT;
+ u64 max_addr = last_pfn << PAGE_SHIFT;
+ int num_phys_nodes;
+ int num_nodes;
+ int i;
+
+ num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
+ /*
+ * If the numa=fake command-line contains a 'M' or 'G', it represents
+ * the fixed node size. Otherwise, if it is just a single number N,
+ * split the system RAM into N fake nodes.
+ */
+ if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
+ u64 size;
+
+ size = memparse(cmdline, &cmdline);
+ num_nodes = split_nodes_size_interleave(addr, max_addr, size);
+ } else {
+ unsigned long n;
+
+ n = simple_strtoul(cmdline, NULL, 0);
+ num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
}
-out:
+
+ if (num_nodes < 0)
+ return num_nodes;
memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
if (memnode_shift < 0) {
memnode_shift = 0;
@@ -505,14 +636,10 @@ out:
}
/*
- * We need to vacate all active ranges that may have been registered by
- * SRAT and set acpi_numa to -1 so that srat_disabled() always returns
- * true. NUMA emulation has succeeded so we will not scan ACPI nodes.
+ * We need to vacate all active ranges that may have been registered for
+ * the e820 memory map.
*/
remove_all_active_ranges();
-#ifdef CONFIG_ACPI_NUMA
- acpi_numa = -1;
-#endif
for_each_node_mask(i, node_possible_map) {
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
@@ -524,7 +651,8 @@ out:
}
#endif /* CONFIG_NUMA_EMU */
-void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
+ int acpi, int k8)
{
int i;
@@ -532,23 +660,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
nodes_clear(node_online_map);
#ifdef CONFIG_NUMA_EMU
- if (cmdline && !numa_emulation(start_pfn, last_pfn))
+ if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
return;
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
#endif
#ifdef CONFIG_ACPI_NUMA
- if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT))
+ if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
+ last_pfn << PAGE_SHIFT))
return;
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
#endif
#ifdef CONFIG_K8_NUMA
- if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
- last_pfn<<PAGE_SHIFT))
+ if (!numa_off && k8 && !k8_scan_nodes())
return;
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
@@ -579,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void)
for_each_online_node(i)
pages += free_all_bootmem_node(NODE_DATA(i));
+#ifdef CONFIG_NO_BOOTMEM
+ pages += free_all_memory_core_early(MAX_NUMNODES);
+#endif
+
return pages;
}
@@ -601,6 +732,25 @@ static __init int numa_setup(char *opt)
early_param("numa", numa_setup);
#ifdef CONFIG_NUMA
+
+static __init int find_near_online_node(int node)
+{
+ int n, val;
+ int min_val = INT_MAX;
+ int best_node = -1;
+
+ for_each_online_node(n) {
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ return best_node;
+}
+
/*
* Setup early cpu_to_node.
*
@@ -632,7 +782,7 @@ void __init init_cpu_to_node(void)
if (node == NUMA_NO_NODE)
continue;
if (!node_online(node))
- continue;
+ node = find_near_online_node(node);
numa_set_node(cpu, node);
}
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dd38bfbefd1f..cf07c26d9a4a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -279,6 +279,43 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+ /*
+ * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
+ * kernel text mappings for the large page aligned text, rodata sections
+ * will be always read-only. For the kernel identity mappings covering
+ * the holes caused by this alignment can be anything that user asks.
+ *
+ * This will preserve the large page mappings for kernel text/data
+ * at no extra cost.
+ */
+ if (kernel_set_to_readonly &&
+ within(address, (unsigned long)_text,
+ (unsigned long)__end_rodata_hpage_align)) {
+ unsigned int level;
+
+ /*
+ * Don't enforce the !RW mapping for the kernel text mapping,
+ * if the current mapping is already using small page mapping.
+ * No need to work hard to preserve large page mappings in this
+ * case.
+ *
+ * This also fixes the Linux Xen paravirt guest boot failure
+ * (because of unexpected read-only mappings for kernel identity
+ * mappings). In this paravirt guest case, the kernel text
+ * mapping and the kernel identity mapping share the same
+ * page-table pages. Thus we can't really use different
+ * protections for the kernel text and identity mappings. Also,
+ * these shared mappings are made of small page mappings.
+ * Thus this don't enforce !RW mapping for small page kernel
+ * text mapping logic will help Linux Xen parvirt guest boot
+ * aswell.
+ */
+ if (lookup_address(address, &level) && (level != PG_LEVEL_4K))
+ pgprot_val(forbidden) |= _PAGE_RW;
+ }
+#endif
+
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot;
@@ -1069,12 +1106,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
int set_memory_x(unsigned long addr, int numpages)
{
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return 0;
+
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_x);
int set_memory_nx(unsigned long addr, int numpages)
{
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return 0;
+
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_nx);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e78cd0ec2bcf..ae9648eb1c7f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -20,6 +20,7 @@
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
+#include <asm/x86_init.h>
#include <asm/pgtable.h>
#include <asm/fcntl.h>
#include <asm/e820.h>
@@ -355,9 +356,6 @@ static int free_ram_pages_type(u64 start, u64 end)
* - _PAGE_CACHE_UC_MINUS
* - _PAGE_CACHE_UC
*
- * req_type will have a special case value '-1', when requester want to inherit
- * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
- *
* If new_type is NULL, function will return an error if it cannot reserve the
* region with req_type. If new_type is non-NULL, function will return
* available type in new_type in case of no error. In case of any error
@@ -377,9 +375,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (!pat_enabled) {
/* This is identical to page table setting without PAT */
if (new_type) {
- if (req_type == -1)
- *new_type = _PAGE_CACHE_WB;
- else if (req_type == _PAGE_CACHE_WC)
+ if (req_type == _PAGE_CACHE_WC)
*new_type = _PAGE_CACHE_UC_MINUS;
else
*new_type = req_type & _PAGE_CACHE_MASK;
@@ -388,7 +384,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
}
/* Low ISA region is always mapped WB in page table. No need to track */
- if (is_ISA_range(start, end - 1)) {
+ if (x86_platform.is_untracked_pat_range(start, end)) {
if (new_type)
*new_type = _PAGE_CACHE_WB;
return 0;
@@ -499,7 +495,7 @@ int free_memtype(u64 start, u64 end)
return 0;
/* Low ISA region is always mapped WB. No need to track */
- if (is_ISA_range(start, end - 1))
+ if (x86_platform.is_untracked_pat_range(start, end))
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
@@ -582,7 +578,7 @@ static unsigned long lookup_memtype(u64 paddr)
int rettype = _PAGE_CACHE_WB;
struct memtype *entry;
- if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+ if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
return rettype;
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
@@ -708,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (!range_is_allowed(pfn, size))
return 0;
- if (file->f_flags & O_SYNC) {
+ if (file->f_flags & O_DSYNC)
flags = _PAGE_CACHE_UC_MINUS;
- }
#ifdef CONFIG_X86_32
/*
@@ -1018,8 +1013,10 @@ static const struct file_operations memtype_fops = {
static int __init pat_memtype_list_init(void)
{
- debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
- NULL, &memtype_fops);
+ if (pat_enabled) {
+ debugfs_create_file("pat_memtype_list", S_IRUSR,
+ arch_debugfs_dir, NULL, &memtype_fops);
+ }
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ed34f5e35999..c9ba9deafe83 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,6 +6,14 @@
#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#ifdef CONFIG_HIGHPTE
+#define PGALLOC_USER_GFP __GFP_HIGHMEM
+#else
+#define PGALLOC_USER_GFP 0
+#endif
+
+gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
+
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
return (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -15,16 +23,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
-#ifdef CONFIG_HIGHPTE
- pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
-#else
- pte = alloc_pages(PGALLOC_GFP, 0);
-#endif
+ pte = alloc_pages(__userpte_alloc_gfp, 0);
if (pte)
pgtable_page_ctor(pte);
return pte;
}
+static int __init setup_userpte(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ /*
+ * "userpte=nohigh" disables allocation of user pagetables in
+ * high memory.
+ */
+ if (strcmp(arg, "nohigh") == 0)
+ __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
+ else
+ return -EINVAL;
+ return 0;
+}
+early_param("userpte", setup_userpte);
+
void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
pgtable_page_dtor(pte);
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 513d8ed5d2ec..a3250aa34086 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -3,10 +3,8 @@
#include <linux/init.h>
#include <asm/pgtable.h>
+#include <asm/proto.h>
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static int disable_nx __cpuinitdata;
/*
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str)
if (!str)
return -EINVAL;
if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
disable_nx = 0;
} else if (!strncmp(str, "off", 3)) {
disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
}
+ x86_configure_nx();
return 0;
}
early_param("noexec", noexec_setup);
-#endif
-#ifdef CONFIG_X86_PAE
-void __init set_nx(void)
+void __cpuinit x86_configure_nx(void)
{
- unsigned int v[4], l, h;
-
- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+ if (cpu_has_nx && !disable_nx)
+ __supported_pte_mask |= _PAGE_NX;
+ else
+ __supported_pte_mask &= ~_PAGE_NX;
+}
- if ((v[3] & (1 << 20)) && !disable_nx) {
- rdmsr(MSR_EFER, l, h);
- l |= EFER_NX;
- wrmsr(MSR_EFER, l, h);
- nx_enabled = 1;
- __supported_pte_mask |= _PAGE_NX;
+void __init x86_report_nx(void)
+{
+ if (!cpu_has_nx) {
+ printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+ "missing in CPU or disabled in BIOS!\n");
+ } else {
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ if (disable_nx) {
+ printk(KERN_INFO "NX (Execute Disable) protection: "
+ "disabled by kernel command line option\n");
+ } else {
+ printk(KERN_INFO "NX (Execute Disable) protection: "
+ "active\n");
}
- }
-}
#else
-void set_nx(void)
-{
-}
+ /* 32bit non-PAE kernel, NX cannot be used */
+ printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+ "cannot be enabled: non-PAE kernel!\n");
#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || disable_nx)
- __supported_pte_mask &= ~_PAGE_NX;
+ }
}
-#endif
-
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6f8aa33031c7..9324f13492d5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void)
e820_register_active_regions(chunk->nid, chunk->start_pfn,
min(chunk->end_pfn, max_pfn));
}
+ /* for out of order entries in SRAT */
+ sort_node_map();
for_each_online_node(nid) {
unsigned long start = node_start_pfn[nid];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index dbb5381f7b3b..28c68762648f 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
apicid_to_node[apic_id] = node;
node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
- printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+ printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
}
@@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
apicid_to_node[apic_id] = node;
node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
- printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+ printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
}
@@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
}
- if (changed)
+ if (changed) {
+ node_set(node, cpu_nodes_parsed);
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
nd->start, nd->end);
+ }
}
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -290,8 +292,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end);
- e820_register_active_regions(node, start >> PAGE_SHIFT,
- end >> PAGE_SHIFT);
if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
update_nodes_add(node, start, end);
@@ -319,7 +319,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
unsigned long s = nodes[i].start >> PAGE_SHIFT;
unsigned long e = nodes[i].end >> PAGE_SHIFT;
pxmram += e - s;
- pxmram -= absent_pages_in_range(s, e);
+ pxmram -= __absent_pages_in_range(i, s, e);
if ((long)pxmram < 0)
pxmram = 0;
}
@@ -338,6 +338,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
void __init acpi_numa_arch_fixup(void) {}
+int __init acpi_get_nodes(struct bootnode *physnodes)
+{
+ int i;
+ int ret = 0;
+
+ for_each_node_mask(i, nodes_parsed) {
+ physnodes[ret].start = nodes[i].start;
+ physnodes[ret].end = nodes[i].end;
+ ret++;
+ }
+ return ret;
+}
+
/* Use the information discovered above to actually set up the nodes. */
int __init acpi_scan_nodes(unsigned long start, unsigned long end)
{
@@ -350,11 +363,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
for (i = 0; i < MAX_NUMNODES; i++)
cutoff_node(i, start, end);
- if (!nodes_cover_memory(nodes)) {
- bad_srat();
- return -1;
- }
-
memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
memblk_nodeid);
if (memnode_shift < 0) {
@@ -364,6 +372,16 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
+ for_each_node_mask(i, nodes_parsed)
+ e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
+ nodes[i].end >> PAGE_SHIFT);
+ /* for out of order entries in SRAT */
+ sort_node_map();
+ if (!nodes_cover_memory(nodes)) {
+ bad_srat();
+ return -1;
+ }
+
/* Account for nodes with cpus and no memory */
nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
@@ -454,7 +472,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
for (i = 0; i < num_nodes; i++)
if (fake_nodes[i].start != fake_nodes[i].end)
node_set(i, nodes_parsed);
- WARN_ON(!nodes_cover_memory(fake_nodes));
}
static int null_slit_node_compare(int a, int b)
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 427fd1b56df5..8565d944f7cf 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,12 +1,13 @@
/*
* Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/io.h>
#include <linux/mmiotrace.h>
-#define MODULE_NAME "testmmiotrace"
-
static unsigned long mmio_address;
module_param(mmio_address, ulong, 0);
MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i)
static void do_write_test(void __iomem *p)
{
unsigned int i;
- pr_info(MODULE_NAME ": write test.\n");
+ pr_info("write test.\n");
mmiotrace_printk("Write test.\n");
for (i = 0; i < 256; i++)
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p)
{
unsigned int i;
unsigned errs[3] = { 0 };
- pr_info(MODULE_NAME ": read test.\n");
+ pr_info("read test.\n");
mmiotrace_printk("Read test.\n");
for (i = 0; i < 256; i++)
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p)
static void do_read_far_test(void __iomem *p)
{
- pr_info(MODULE_NAME ": read far test.\n");
+ pr_info("read far test.\n");
mmiotrace_printk("Read far test.\n");
ioread32(p + read_far);
@@ -78,7 +79,7 @@ static void do_test(unsigned long size)
{
void __iomem *p = ioremap_nocache(mmio_address, size);
if (!p) {
- pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
+ pr_err("could not ioremap, aborting.\n");
return;
}
mmiotrace_printk("ioremap returned %p.\n", p);
@@ -94,24 +95,22 @@ static int __init init(void)
unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
if (mmio_address == 0) {
- pr_err(MODULE_NAME ": you have to use the module argument "
- "mmio_address.\n");
- pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
- " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
+ pr_err("you have to use the module argument mmio_address.\n");
+ pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n");
return -ENXIO;
}
- pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
- "address space, and writing 16 kB of rubbish in there.\n",
- size >> 10, mmio_address);
+ pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
+ "and writing 16 kB of rubbish in there.\n",
+ size >> 10, mmio_address);
do_test(size);
- pr_info(MODULE_NAME ": All done.\n");
+ pr_info("All done.\n");
return 0;
}
static void __exit cleanup(void)
{
- pr_debug(MODULE_NAME ": unloaded.\n");
+ pr_debug("unloaded.\n");
}
module_init(init);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 36fe08eeb5c3..426f3a1a64d3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -8,6 +8,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
@@ -40,10 +41,10 @@ union smp_flush_state {
struct {
struct mm_struct *flush_mm;
unsigned long flush_va;
- spinlock_t tlbstate_lock;
+ raw_spinlock_t tlbstate_lock;
DECLARE_BITMAP(flush_cpumask, NR_CPUS);
};
- char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
+ char pad[INTERNODE_CACHE_BYTES];
} ____cacheline_internodealigned_in_smp;
/* State is put into the per CPU data section, but padded
@@ -180,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
* num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
* probably not worth checking this for a cache-hot lock.
*/
- spin_lock(&f->tlbstate_lock);
+ raw_spin_lock(&f->tlbstate_lock);
f->flush_mm = mm;
f->flush_va = va;
@@ -198,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
f->flush_mm = NULL;
f->flush_va = 0;
- spin_unlock(&f->tlbstate_lock);
+ raw_spin_unlock(&f->tlbstate_lock);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -222,7 +223,7 @@ static int __cpuinit init_smp_flush(void)
int i;
for (i = 0; i < ARRAY_SIZE(flush_state); i++)
- spin_lock_init(&flush_state[i].tlbstate_lock);
+ raw_spin_lock_init(&flush_state[i].tlbstate_lock);
return 0;
}