From c898faf91b3ec6b0f6efa35831b3984fa3331db0 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 5 May 2009 17:28:56 -0400 Subject: x86: 46 bit physical address support on 64 bits Extend the maximum addressable memory on x86-64 from 2^44 to 2^46 bytes. This requires some shuffling around of the vmalloc and virtual memmap memory areas, to keep them away from the direct mapping of up to 64TB of physical memory. This patch also introduces a guard hole between the vmalloc area and the virtual memory map space. There's really no good reason why we wouldn't have a guard hole there. [ Impact: future hardware enablement ] Signed-off-by: Rik van Riel LKML-Reference: <20090505172856.6820db22@cuia.bos.redhat.com> Signed-off-by: H. Peter Anvin --- Documentation/x86/x86_64/mm.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 29b52b14d0b4..539413235845 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole -ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory -ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole -ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space -ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) +ffff880000000000 - ffffc8ffffffffff (=64 TB) direct mapping of all phys. memory +ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole +ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space +ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole +ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space -- cgit v1.2.3-55-g7522 From 2feceeff1e771850e49f9074307f071964fd9e3e Mon Sep 17 00:00:00 2001 From: H. Peter Anvin Date: Tue, 5 May 2009 19:07:07 -0700 Subject: x86: fix typo in address space documentation Fix a trivial typo in Documentation/x86/x86_64/mm.txt. [ Impact: documentation only ] Signed-off-by: H. Peter Anvin Cc: Rik van Riel --- Documentation/x86/x86_64/mm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 539413235845..d6498e3cd713 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -6,7 +6,7 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole -ffff880000000000 - ffffc8ffffffffff (=64 TB) direct mapping of all phys. memory +ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole -- cgit v1.2.3-55-g7522 From 888a589f6be07d624e21e2174d98375e9f95911b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 May 2009 13:59:37 -0700 Subject: mm, x86: remove MEMORY_HOTPLUG_RESERVE related code after: | commit b263295dbffd33b0fbff670720fa178c30e3392a | Author: Christoph Lameter | Date: Wed Jan 30 13:30:47 2008 +0100 | | x86: 64-bit, make sparsemem vmemmap the only memory model we don't have MEMORY_HOTPLUG_RESERVE anymore. Historically, x86-64 had an architecture-specific method for memory hotplug whereby it scanned the SRAT for physical memory ranges that could be potentially used for memory hot-add later. By reserving those ranges without physical memory, the memmap would be allocated and left dormant until needed. This depended on the DISCONTIG memory model which has been removed so the code implementing HOTPLUG_RESERVE is now dead. This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE. (Changelog authored by Mel.) v2: updated changelog, and remove hotadd= in doc [ Impact: remove dead code ] Signed-off-by: Yinghai Lu Reviewed-by: Christoph Lameter Reviewed-by: Mel Gorman Workflow-found-OK-by: Andrew Morton LKML-Reference: <4A0C4910.7090508@kernel.org> Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/boot-options.txt | 5 --- arch/x86/include/asm/numa_64.h | 3 -- arch/x86/mm/numa_64.c | 5 --- arch/x86/mm/srat_64.c | 63 ++++++---------------------- include/linux/mm.h | 2 - mm/page_alloc.c | 69 ------------------------------- 6 files changed, 12 insertions(+), 135 deletions(-) (limited to 'Documentation') diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 34c13040a718..2db5893d6c97 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -150,11 +150,6 @@ NUMA Otherwise, the remaining system RAM is allocated to an additional node. - numa=hotadd=percent - Only allow hotadd memory to preallocate page structures upto - percent of already available memory. - numa=hotadd=0 will disable hotadd memory. - ACPI acpi=off Don't enable ACPI diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 064ed6df4cbe..7feff0648d74 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, extern void numa_init_array(void); extern int numa_off; -extern void srat_reserve_add_area(int nodeid); -extern int hotadd_percent; - extern s16 apicid_to_node[MAX_LOCAL_APIC]; extern unsigned long numa_free_all_bootmem(void); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index fb61d81a656f..a6a93c395231 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<> PAGE_SHIFT; unsigned long e_pfn = end >> PAGE_SHIFT; - int ret = 0, changed = 0; + int changed = 0; struct bootnode *nd = &nodes_add[node]; /* I had some trouble with strange memory hotadd regions breaking @@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) mistakes */ if ((signed long)(end - start) < NODE_MIN_SIZE) { printk(KERN_ERR "SRAT: Hotplug area too small\n"); - return -1; + return; } /* This check might be a bit too strict, but I'm keeping it for now. */ @@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug area %lu -> %lu has existing memory\n", s_pfn, e_pfn); - return -1; - } - - if (!hotadd_enough_memory(&nodes_add[node])) { - printk(KERN_ERR "SRAT: Hotplug area too large\n"); - return -1; + return; } /* Looks good */ @@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); } - ret = update_end_of_memory(nd->end); - if (changed) - printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); - return ret; + printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", + nd->start, nd->end); } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ @@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) start, end); e820_register_active_regions(node, start >> PAGE_SHIFT, end >> PAGE_SHIFT); - push_node_boundaries(node, nd->start >> PAGE_SHIFT, - nd->end >> PAGE_SHIFT); - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && - (reserve_hotadd(node, start, end) < 0)) { - /* Ignore hotadd region. Undo damage */ - printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); + if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { + update_nodes_add(node, start, end); + /* restore nodes[node] */ *nd = oldnode; if ((nd->start | nd->end) == 0) node_clear(node, nodes_parsed); @@ -510,26 +491,6 @@ static int null_slit_node_compare(int a, int b) } #endif /* CONFIG_NUMA_EMU */ -void __init srat_reserve_add_area(int nodeid) -{ - if (found_add_area && nodes_add[nodeid].end) { - u64 total_mb; - - printk(KERN_INFO "SRAT: Reserving hot-add memory space " - "for node %d at %Lx-%Lx\n", - nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); - total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) - >> PAGE_SHIFT; - total_mb *= sizeof(struct page); - total_mb >>= 20; - printk(KERN_INFO "SRAT: This will cost you %Lu MB of " - "pre-allocated memory.\n", (unsigned long long)total_mb); - reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, - nodes_add[nodeid].end - nodes_add[nodeid].start, - BOOTMEM_DEFAULT); - } -} - int __node_distance(int a, int b) { int index; diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c7..511b09867096 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1031,8 +1031,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); extern void remove_all_active_ranges(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fe753ecf2aa5..474c7e9dd51a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve; static int __meminitdata nr_nodemap_entries; static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE - static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES]; - static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES]; -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ static unsigned long __initdata required_kernelcore; static unsigned long __initdata required_movablecore; static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; @@ -3102,64 +3098,6 @@ void __init sparse_memory_present_with_active_regions(int nid) early_node_map[i].end_pfn); } -/** - * push_node_boundaries - Push node boundaries to at least the requested boundary - * @nid: The nid of the node to push the boundary for - * @start_pfn: The start pfn of the node - * @end_pfn: The end pfn of the node - * - * In reserve-based hot-add, mem_map is allocated that is unused until hotadd - * time. Specifically, on x86_64, SRAT will report ranges that can potentially - * be hotplugged even though no physical memory exists. This function allows - * an arch to push out the node boundaries so mem_map is allocated that can - * be used later. - */ -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE -void __init push_node_boundaries(unsigned int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - mminit_dprintk(MMINIT_TRACE, "zoneboundary", - "Entering push_node_boundaries(%u, %lu, %lu)\n", - nid, start_pfn, end_pfn); - - /* Initialise the boundary for this node if necessary */ - if (node_boundary_end_pfn[nid] == 0) - node_boundary_start_pfn[nid] = -1UL; - - /* Update the boundaries */ - if (node_boundary_start_pfn[nid] > start_pfn) - node_boundary_start_pfn[nid] = start_pfn; - if (node_boundary_end_pfn[nid] < end_pfn) - node_boundary_end_pfn[nid] = end_pfn; -} - -/* If necessary, push the node boundary out for reserve hotadd */ -static void __meminit account_node_boundary(unsigned int nid, - unsigned long *start_pfn, unsigned long *end_pfn) -{ - mminit_dprintk(MMINIT_TRACE, "zoneboundary", - "Entering account_node_boundary(%u, %lu, %lu)\n", - nid, *start_pfn, *end_pfn); - - /* Return if boundary information has not been provided */ - if (node_boundary_end_pfn[nid] == 0) - return; - - /* Check the boundaries and update if necessary */ - if (node_boundary_start_pfn[nid] < *start_pfn) - *start_pfn = node_boundary_start_pfn[nid]; - if (node_boundary_end_pfn[nid] > *end_pfn) - *end_pfn = node_boundary_end_pfn[nid]; -} -#else -void __init push_node_boundaries(unsigned int nid, - unsigned long start_pfn, unsigned long end_pfn) {} - -static void __meminit account_node_boundary(unsigned int nid, - unsigned long *start_pfn, unsigned long *end_pfn) {} -#endif - - /** * get_pfn_range_for_nid - Return the start and end page frames for a node * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. @@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid, if (*start_pfn == -1UL) *start_pfn = 0; - - /* Push the node boundaries out if requested */ - account_node_boundary(nid, start_pfn, end_pfn); } /* @@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void) { memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0; -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE - memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); - memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ } /* Compare two active node_active_regions */ -- cgit v1.2.3-55-g7522