From 69111bac42f5ceacdd22e30947837ceb2c4493ed Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 21 Oct 2014 15:23:25 -0500 Subject: powerpc: Replace __get_cpu_var uses This still has not been merged and now powerpc is the only arch that does not have this change. Sorry about missing linuxppc-dev before. V2->V2 - Fix up to work against 3.18-rc1 __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Benjamin Herrenschmidt CC: Paul Mackerras Signed-off-by: Christoph Lameter [mpe: Fix build errors caused by set/or_softirq_pending(), and rework assignment in __set_breakpoint() to use memcpy().] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage-book3e.c | 6 +++--- arch/powerpc/mm/hugetlbpage.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ae4962a06476..d53288a08c37 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -629,7 +629,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d5339a3b9945..f01027731e23 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1322,7 +1322,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - &__get_cpu_var(ppc64_tlb_batch); + this_cpu_ptr(&ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 5e4ee2573903..ba47aaf33a4b 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = __get_cpu_var(next_tlbcam_idx); + index = this_cpu_read(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + __this_cpu_write(next_tlbcam_idx, tlbcam_index); else - __get_cpu_var(next_tlbcam_idx)++; + __this_cpu_inc(next_tlbcam_idx); return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..8aa04f03fd31 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = &get_cpu_var(hugepd_freelist_cur); + batchp = this_cpu_ptr(&hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), -- cgit v1.2.3-55-g7522 From 16d0f5c4af76b0c3424290937bf1ac22adf439b1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 22:17:47 +1100 Subject: powerpc: Remove ppc_md.remove_memory We have an extra level of indirection on memory hot remove which is not matched on memory hot add. Memory hotplug is book3s only, so there is no need for it. This also enables means remove_memory() (ie memory hot unplug) works on powernv. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 4 ---- arch/powerpc/mm/mem.c | 14 ++++++++++++-- arch/powerpc/platforms/pseries/hotplug-memory.c | 21 --------------------- 3 files changed, 12 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 4a6511f94258..15c9150a58cc 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -291,10 +291,6 @@ struct machdep_calls { #ifdef CONFIG_ARCH_RANDOM int (*get_random_long)(unsigned long *v); #endif - -#ifdef CONFIG_MEMORY_HOTREMOVE - int (*remove_memory)(u64, u64); -#endif }; extern void e500_idle(void); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8ebaac75c940..2add0b7b9f6d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -144,8 +145,17 @@ int arch_remove_memory(u64 start, u64 size) zone = page_zone(pfn_to_page(start_pfn)); ret = __remove_pages(zone, start_pfn, nr_pages); - if (!ret && (ppc_md.remove_memory)) - ret = ppc_md.remove_memory(start, size); + if (ret) + return ret; + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); + ret = remove_section_mapping(start, start + size); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory + */ + vm_unmap_aliases(); return ret; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 3c4c0dcd90d3..3cb256c2138e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -66,22 +65,6 @@ unsigned long pseries_memory_block_size(void) } #ifdef CONFIG_MEMORY_HOTREMOVE -static int pseries_remove_memory(u64 start, u64 size) -{ - int ret; - - /* Remove htab bolted mappings for this section of memory */ - start = (unsigned long)__va(start); - ret = remove_section_mapping(start, start + size); - - /* Ensure all vmalloc mappings are flushed in case they also - * hit that section of memory - */ - vm_unmap_aliases(); - - return ret; -} - static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long block_sz, start_pfn; @@ -262,10 +245,6 @@ static int __init pseries_memory_hotplug_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_mem_nb); -#ifdef CONFIG_MEMORY_HOTREMOVE - ppc_md.remove_memory = pseries_remove_memory; -#endif - return 0; } machine_device_initcall(pseries, pseries_memory_hotplug_init); -- cgit v1.2.3-55-g7522 From c51a6821bdbc9068adda93b3b8ee65df8e4642a6 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: powerpc/8xx: Invalidate non present TLB as early as possible 8xx sometimes need to load a invalid/non-present TLBs in it DTLB asm handler. These must be invalidated separaly as linux mm doesn't. Commit 5efab4a02c89c252fb4cce097aafde5f8208dbfe was invalidating them in arch/powerpc/mm/fault.c. This patch does the invalidation earlier in order to free the TLB as soon as possible. This also has the advantage of removing some 8xx specific code from fault.c Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 15 ++++++++++----- arch/powerpc/mm/fault.c | 7 ------- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index acf6d7eab6d5..d99aac0d69f1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -475,8 +475,11 @@ InstructionTLBError1: EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x400, handle_page_fault) +1: EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -492,11 +495,13 @@ DataTLBError: DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) mfspr r4,SPRN_DAR - li r10,RPN_PATTERN + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 +1: li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 08d659a9fcdb..eb79907f34fa 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -43,7 +43,6 @@ #include #include #include -#include #include "icswx.h" @@ -380,12 +379,6 @@ good_area: goto bad_area; #endif /* CONFIG_6xx */ #if defined(CONFIG_8xx) - /* 8xx sometimes need to load a invalid/non-present TLBs. - * These must be invalidated separately as linux mm don't. - */ - if (error_code & 0x40000000) /* no translation? */ - _tlbil_va(address, 0, 0, 0); - /* The MPC8xx seems to always set 0x80000000, which is * "undefined". Of those that can be set, this is the only * one which seems bad. -- cgit v1.2.3-55-g7522 From 10239733ee8617bac3f1c1769af43a88ed979324 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:33 +1000 Subject: powerpc: Remove bootmem allocator At the moment we transition from the memblock alloctor to the bootmem allocator. Gitting rid of the bootmem allocator removes a bunch of complicated code (most of which I owe the dubious honour of being responsible for writing). Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/setup.h | 3 +- arch/powerpc/kernel/setup_32.c | 5 +- arch/powerpc/kernel/setup_64.c | 3 +- arch/powerpc/mm/init_32.c | 9 -- arch/powerpc/mm/mem.c | 62 +------- arch/powerpc/mm/numa.c | 224 ++++----------------------- arch/powerpc/mm/pgtable_32.c | 3 +- arch/powerpc/mm/pgtable_64.c | 6 +- arch/powerpc/platforms/512x/mpc512x_shared.c | 9 +- 10 files changed, 47 insertions(+), 278 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4e28c3..98e9c548bd75 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select NO_BOOTMEM config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 11ba86e17631..fbdf18cf954c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -8,7 +8,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern int mem_init_done; /* set on boot once kmalloc can be called */ -extern int init_bootmem_done; /* set once bootmem is available */ extern unsigned long long memory_limit; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); @@ -24,7 +23,7 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) void check_for_initrd(void); -void do_init_bootmem(void); +void initmem_init(void); void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 07831ed0d9ef..88a36e64bf9b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -311,9 +311,8 @@ void __init setup_arch(char **cmdline_p) irqstack_early_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); + initmem_init(); + if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 615aa904b216..91d9cfaffe6c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -689,8 +689,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); + initmem_init(); sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 415a51b028b9..3b3100433c18 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -195,15 +195,6 @@ void __init MMU_init(void) memblock_set_current_limit(lowmem_end_addr); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - if (init_bootmem_done) - return alloc_bootmem_pages(PAGE_SIZE); - else - return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2add0b7b9f6d..e076d19c095b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -61,7 +61,6 @@ #define CPU_FTR_NOEXECUTE 0 #endif -int init_bootmem_done; int mem_init_done; unsigned long long memory_limit; @@ -190,70 +189,22 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, } EXPORT_SYMBOL_GPL(walk_system_ram_range); -/* - * Initialize the bootmem system and give it all the memory we - * have available. If we are using highmem, we only put the - * lowmem into the bootmem system. - */ #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init do_init_bootmem(void) +void __init initmem_init(void) { - unsigned long start, bootmap_pages; - unsigned long total_pages; - struct memblock_region *reg; - int boot_mapsize; - max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + min_low_pfn = MEMORY_START >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM - total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; #endif - /* - * Find an area to use for the bootmem bitmap. Calculate the size of - * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. - * Add 1 additional page in case the address isn't page-aligned. - */ - bootmap_pages = bootmem_bootmap_pages(total_pages); - - start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - - min_low_pfn = MEMORY_START >> PAGE_SHIFT; - boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); - /* Place all memblock_regions in the same node and merge contiguous * memblock_regions */ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); - /* Add all physical memory to the bootmem map, mark each area - * present. - */ -#ifdef CONFIG_HIGHMEM - free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) { - unsigned long top = reg->base + reg->size - 1; - if (top < lowmem_end_addr) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - else if (reg->base < lowmem_end_addr) { - unsigned long trunc_size = lowmem_end_addr - reg->base; - reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); - } - } -#else - free_bootmem_with_active_regions(0, max_pfn); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); -#endif /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); - - init_bootmem_done = 1; } /* mark pages that don't exist as nosave */ @@ -369,14 +320,6 @@ void __init paging_init(void) mark_nonram_nosave(); } -static void __init register_page_bootmem_info(void) -{ - int i; - - for_each_online_node(i) - register_page_bootmem_info_node(NODE_DATA(i)); -} - void __init mem_init(void) { /* @@ -389,7 +332,6 @@ void __init mem_init(void) swiotlb_init(0); #endif - register_page_bootmem_info(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); free_all_bootmem(); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9d1dfdbe5bb..2e9ad2d76b75 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } -/* - * get_node_active_region - Return active region containing pfn - * Active range returned is empty if none found. - * @pfn: The page to return the region for - * @node_ar: Returned set to the active region containing @pfn - */ -static void __init get_node_active_region(unsigned long pfn, - struct node_active_region *node_ar) -{ - unsigned long start_pfn, end_pfn; - int i, nid; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (pfn >= start_pfn && pfn < end_pfn) { - node_ar->nid = nid; - node_ar->start_pfn = start_pfn; - node_ar->end_pfn = end_pfn; - break; - } - } -} - static void reset_numa_cpu_lookup_table(void) { unsigned int cpu; @@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void) } } -/* - * Allocate some memory, satisfying the memblock or bootmem allocator where - * required. nid is the preferred node and end is the physical address of - * the highest address in the node. - * - * Returns the virtual address of the memory. - */ -static void __init *careful_zallocation(int nid, unsigned long size, - unsigned long align, - unsigned long end_pfn) -{ - void *ret; - int new_nid; - unsigned long ret_paddr; - - ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); - - /* retry over all memory */ - if (!ret_paddr) - ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); - - if (!ret_paddr) - panic("numa.c: cannot allocate %lu bytes for node %d", - size, nid); - - ret = __va(ret_paddr); - - /* - * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the MEMBLOCK allocator to the - * bootmem allocator. If this function is called for - * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the MEMBLOCK allocator. - * - * So, check the nid from which this allocation came - * and double check to see if we need to use bootmem - * instead of the MEMBLOCK. We don't free the MEMBLOCK memory - * since it would be useless. - */ - new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); - if (new_nid < nid) { - ret = __alloc_bootmem_node(NODE_DATA(new_nid), - size, align, 0); - - dbg("alloc_bootmem %p %lx\n", ret, size); - } - - memset(ret, 0, size); - return ret; -} - static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ }; -static void __init mark_reserved_regions_for_nid(int nid) +/* Initialize NODE_DATA for a node on the local memory */ +static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { - struct pglist_data *node = NODE_DATA(nid); - struct memblock_region *reg; - - for_each_memblock(reserved, reg) { - unsigned long physbase = reg->base; - unsigned long size = reg->size; - unsigned long start_pfn = physbase >> PAGE_SHIFT; - unsigned long end_pfn = PFN_UP(physbase + size); - struct node_active_region node_ar; - unsigned long node_end_pfn = pgdat_end_pfn(node); - - /* - * Check to make sure that this memblock.reserved area is - * within the bounds of the node that we care about. - * Checking the nid of the start and end points is not - * sufficient because the reserved area could span the - * entire node. - */ - if (end_pfn <= node->node_start_pfn || - start_pfn >= node_end_pfn) - continue; - - get_node_active_region(start_pfn, &node_ar); - while (start_pfn < end_pfn && - node_ar.start_pfn < node_ar.end_pfn) { - unsigned long reserve_size = size; - /* - * if reserved region extends past active region - * then trim size to active region - */ - if (end_pfn > node_ar.end_pfn) - reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - - physbase; - /* - * Only worry about *this* node, others may not - * yet have valid NODE_DATA(). - */ - if (node_ar.nid == nid) { - dbg("reserve_bootmem %lx %lx nid=%d\n", - physbase, reserve_size, node_ar.nid); - reserve_bootmem_node(NODE_DATA(node_ar.nid), - physbase, reserve_size, - BOOTMEM_DEFAULT); - } - /* - * if reserved region is contained in the active region - * then done. - */ - if (end_pfn <= node_ar.end_pfn) - break; - - /* - * reserved region extends past the active region - * get next active region that contains this - * reserved region - */ - start_pfn = node_ar.end_pfn; - physbase = start_pfn << PAGE_SHIFT; - size = size - reserve_size; - get_node_active_region(start_pfn, &node_ar); - } - } + u64 spanned_pages = end_pfn - start_pfn; + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); + u64 nd_pa; + void *nd; + int tnid; + + if (spanned_pages) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, + (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d\n", nid); + + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd = __va(nd_pa); + + /* report and initialize */ + pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (tnid != nid) + pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); + + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = spanned_pages; } - -void __init do_init_bootmem(void) +void __init initmem_init(void) { int nid, cpu; - min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; @@ -1064,64 +956,16 @@ void __init do_init_bootmem(void) else dump_numa_memory_topology(); + memblock_dump_all(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; - void *bootmem_vaddr; - unsigned long bootmap_pages; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - - /* - * Allocate the node structure node local if possible - * - * Be careful moving this around, as it relies on all - * previous nodes' bootmem to be initialized and have - * all reserved areas marked. - */ - NODE_DATA(nid) = careful_zallocation(nid, - sizeof(struct pglist_data), - SMP_CACHE_BYTES, end_pfn); - - dbg("node %d\n", nid); - dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; - NODE_DATA(nid)->node_start_pfn = start_pfn; - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - - if (NODE_DATA(nid)->node_spanned_pages == 0) - continue; - - dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); - dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); - - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_vaddr = careful_zallocation(nid, - bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end_pfn); - - dbg("bootmap_vaddr = %p\n", bootmem_vaddr); - - init_bootmem_node(NODE_DATA(nid), - __pa(bootmem_vaddr) >> PAGE_SHIFT, - start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - /* - * Be very careful about moving this around. Future - * calls to careful_zallocation() depend on this getting - * done correctly. - */ - mark_reserved_regions_for_nid(nid); + setup_node_data(nid, start_pfn, end_pfn); sparse_memory_present_with_active_regions(nid); } - init_bootmem_done = 1; - - /* - * Now bootmem is initialised we can create the node to cpumask - * lookup tables and setup the cpu callback to populate them. - */ setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cf11342bf519..d545b1231594 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -100,12 +100,11 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add { pte_t *pte; extern int mem_init_done; - extern void *early_get_page(void); if (mem_init_done) { pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); } else { - pte = (pte_t *)early_get_page(); + pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) clear_page(pte); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c8d709ab489d..cdb19ab859d3 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -75,11 +75,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; - if (init_bootmem_done) - pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); - else - pt = __va(memblock_alloc_base(size, size, - __pa(MAX_DMA_ADDRESS))); + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); return pt; diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index e996e007bc44..711f3d352af7 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -297,14 +297,13 @@ static void __init mpc512x_setup_diu(void) * and so negatively affect boot time. Instead we reserve the * already configured frame buffer area so that it won't be * destroyed. The starting address of the area to reserve and - * also it's length is passed to reserve_bootmem(). It will be + * also it's length is passed to memblock_reserve(). It will be * freed later on first open of fbdev, when splash image is not * needed any more. */ if (diu_shared_fb.in_use) { - ret = reserve_bootmem(diu_shared_fb.fb_phys, - diu_shared_fb.fb_len, - BOOTMEM_EXCLUSIVE); + ret = memblock_reserve(diu_shared_fb.fb_phys, + diu_shared_fb.fb_len); if (ret) { pr_err("%s: reserve bootmem failed\n", __func__); diu_shared_fb.in_use = false; -- cgit v1.2.3-55-g7522 From 14ed740957704e8768523899e0fa31972577bf65 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:34 +1000 Subject: powerpc: Remove some old bootmem related comments Now bootmem is gone from powerpc we can remove comments mentioning it. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 5 +---- arch/powerpc/kernel/rtas.c | 4 ++-- arch/powerpc/kvm/book3s_hv_builtin.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 4 ---- 5 files changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 099f27e6d1b0..6af05fc1dec9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -696,10 +696,7 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* - * Ensure that total memory size is page-aligned, because otherwise - * mark_bootmem() gets upset. - */ + /* Ensure that total memory size is page-aligned. */ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8b4c857c1421..4af905e81ab0 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1091,8 +1091,8 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) } /* - * Call early during boot, before mem init or bootmem, to retrieve the RTAS - * informations from the device-tree and allocate the RMO buffer for userland + * Call early during boot, before mem init, to retrieve the RTAS + * information from the device-tree and allocate the RMO buffer for userland * accesses. */ void __init rtas_initialize(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 4fdc27c80f4c..e64868c0b8de 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kvm_release_hpt); * kvm_cma_reserve() - reserve area for kvm hash pagetable * * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) + * called by arch specific code once the memblock allocator * has been activated and all other subsystems have already allocated/reserved * memory. */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8aa04f03fd31..b460e723f0ec 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -276,7 +276,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -399,7 +399,7 @@ void __init reserve_hugetlb_gpages(void) #else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdb19ab859d3..aa9173745cf4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -109,10 +109,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags) __pgprot(flags))); } else { #ifdef CONFIG_PPC_MMU_NOHASH - /* Warning ! This will blow up if bootmem is not initialized - * which our ppc64 code is keen to do that, we'll need to - * fix it and/or be more careful - */ pgdp = pgd_offset_k(ea); #ifdef PUD_TABLE_SIZE if (pgd_none(*pgdp)) { -- cgit v1.2.3-55-g7522 From 68cf0d642f62267b960f947370539ff3582c4935 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:35 +1000 Subject: powerpc: Remove superfluous bootmem includes Lots of places included bootmem.h even when not using bootmem. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/crash_dump.c | 1 - arch/powerpc/kernel/irq.c | 1 - arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/kernel/rtas_pci.c | 1 - arch/powerpc/kernel/setup_32.c | 1 - arch/powerpc/kernel/vdso.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/mm/init_32.c | 1 - arch/powerpc/mm/init_64.c | 1 - arch/powerpc/mm/pgtable_64.c | 2 +- arch/powerpc/platforms/cell/celleb_scc_epci.c | 1 - arch/powerpc/platforms/cell/celleb_scc_pciex.c | 1 - arch/powerpc/platforms/maple/pci.c | 1 - arch/powerpc/platforms/powermac/pci.c | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 1 - arch/powerpc/platforms/powernv/pci.c | 1 - arch/powerpc/sysdev/fsl_msi.c | 1 - arch/powerpc/sysdev/ipic.c | 1 - arch/powerpc/sysdev/mpic.c | 1 - arch/powerpc/sysdev/mpic_pasemi_msi.c | 1 - arch/powerpc/sysdev/mpic_u3msi.c | 1 - arch/powerpc/sysdev/ppc4xx_msi.c | 1 - arch/powerpc/sysdev/ppc4xx_pci.c | 1 - arch/powerpc/sysdev/qe_lib/qe.c | 1 - arch/powerpc/sysdev/qe_lib/qe_ic.c | 1 - arch/powerpc/sysdev/uic.c | 1 - 26 files changed, 1 insertion(+), 26 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index c78e6dac4d7d..cfa0f81a5bb0 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -12,7 +12,6 @@ #undef DEBUG #include -#include #include #include #include diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8d87bb162ecd..45096033d37b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 155013da27e0..ba0f2d6fc27f 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 7c55b86206b3..ce230da2c015 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 88a36e64bf9b..ba3a3c3b8c66 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f174351842cf..305eb0d9b768 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index e64868c0b8de..3f1bb5a36c27 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3b3100433c18..a10be665b645 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3481556a1880..10471f9bb63f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aa9173745cf4..e0c718543174 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -51,6 +50,7 @@ #include #include #include +#include #include "mmu_decl.h" diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c index 844c0facb4f7..9438bbed402f 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_epci.c +++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 4278acfa2ede..f22387598040 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index f7136aae8bbf..d3a13067ec42 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 7e868ccf3b0d..04702db35d45 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index eba9cb10619c..db3803e21483 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b2187d0068b8..ba7452708db3 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index de40b48b460e..8f37204bf94f 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -13,7 +13,6 @@ * */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b50f97811c25..b28733727ed3 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 89cec0ed6a58..c4648ad5c1f3 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 15dccd35fa11..428de9d23120 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -16,7 +16,6 @@ #undef DEBUG #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 623d7fba15b4..4ce8f54f234c 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 22b5200636e7..a59f2890897a 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -22,7 +22,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c index df6e2fc4ff92..086aca69ecae 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/sysdev/ppc4xx_pci.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 238a07b97f2c..b584debbcd9c 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index b2b87c30e266..543765e1ef14 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 92033936a8f7..7c37157d4c24 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-55-g7522 From 21098b9e07476deb3f40acd7e51cffbffb4ef865 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:36 +1000 Subject: powerpc: Move sparse_init() into initmem_init We did part of sparse initialisation in setup_arch and part in initmem_init. Put them together. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/mm/mem.c | 1 + arch/powerpc/mm/numa.c | 2 ++ 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 91d9cfaffe6c..6e5310ddf8c7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -690,7 +690,6 @@ void __init setup_arch(char **cmdline_p) emergency_stack_init(); initmem_init(); - sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e076d19c095b..b7285a5870f8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -205,6 +205,7 @@ void __init initmem_init(void) /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); + sparse_init(); } /* mark pages that don't exist as nosave */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2e9ad2d76b75..417b0a523a47 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -966,6 +966,8 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(nid); } + sparse_init(); + setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); -- cgit v1.2.3-55-g7522 From d1d5304fcbc519f5fb12cbcca9c4fdd1d94447d7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:29:28 +1100 Subject: powerpc/mm: Use PAGE_FACTOR PAGE_FACTOR was defined to reflect the difference between configured page size and fixed 4KB page size. Replace (PAGE_SHIFT - HW_PAGE_SHIFT) with PAGE_FACTOR. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_low_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 057cbbb4c576..5094f32b706e 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -514,7 +514,7 @@ htab_insert_pte: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- htab_special_pfn - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 htab_special_pfn: sldi r5,r5,HW_PAGE_SHIFT @@ -544,7 +544,7 @@ htab_call_hpte_insert1: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- 3f - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 3: sldi r5,r5,HW_PAGE_SHIFT -- cgit v1.2.3-55-g7522 From 06743521d0eae1263a09bccb1a92a9fbb94660b3 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Wed, 5 Nov 2014 21:57:39 +0530 Subject: powerpc/mm: Add missing pmd accessors This patch add documentation and missing accessors. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable-ppc64-4k.h | 16 ++++++++- arch/powerpc/include/asm/pgtable-ppc64-64k.h | 3 ++ arch/powerpc/include/asm/pgtable-ppc64.h | 51 +++++++++++++++++++++------- arch/powerpc/mm/hugetlbpage.c | 3 ++ arch/powerpc/mm/pgtable_64.c | 22 ++++++++++-- 5 files changed, 78 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 7b935683f268..132ee1d482c2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -57,7 +57,21 @@ #define pgd_present(pgd) (pgd_val(pgd) != 0) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) #define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd)) + +#ifndef __ASSEMBLY__ + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} +extern struct page *pgd_page(pgd_t pgd); + +#endif /* !__ASSEMBLY__ */ #define pud_offset(pgdp, addr) \ (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index a56b82fb0609..1de35bbd02a6 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h @@ -38,4 +38,7 @@ /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff +#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) +#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) + #endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index ae153c40ab7c..d12092420560 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -152,7 +152,7 @@ #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) -#define pmd_present(pmd) (pmd_val(pmd) != 0) +#define pmd_present(pmd) (!pmd_none(pmd)) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) extern struct page *pmd_page(pmd_t pmd); @@ -164,9 +164,21 @@ extern struct page *pmd_page(pmd_t pmd); #define pud_present(pud) (pud_val(pud) != 0) #define pud_clear(pudp) (pud_val(*(pudp)) = 0) #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) -#define pud_page(pud) virt_to_page(pud_page_vaddr(pud)) +extern struct page *pud_page(pud_t pud); + +static inline pte_t pud_pte(pud_t pud) +{ + return __pte(pud_val(pud)); +} + +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} +#define pud_write(pud) pte_write(pud_pte(pud)) #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_write(pgd) pte_write(pgd_pte(pgd)) /* * Find an entry in a page-table-directory. We combine the address region @@ -422,7 +434,22 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); - +/* + * + * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs + * page. The hugetlbfs page table walking and mangling paths are totally + * separated form the core VM paths and they're differentiated by + * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. + * + * pmd_trans_huge() is defined as false at build time if + * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build + * time in such case. + * + * For ppc64 we need to differntiate from explicit hugepages from THP, because + * for THP we also track the subpage details at the pmd level. We don't do + * that for explicit huge pages. + * + */ static inline int pmd_trans_huge(pmd_t pmd) { /* @@ -431,16 +458,6 @@ static inline int pmd_trans_huge(pmd_t pmd) return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); } -static inline int pmd_large(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_PRESENT; - return 0; -} - static inline int pmd_trans_splitting(pmd_t pmd) { if (pmd_trans_huge(pmd)) @@ -451,6 +468,14 @@ static inline int pmd_trans_splitting(pmd_t pmd) extern int has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline int pmd_large(pmd_t pmd) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pmd_val(pmd) & 0x3) != 0x0); +} + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b460e723f0ec..2b8e5ed28831 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -62,6 +62,9 @@ static unsigned nr_gpages; /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. */ int pmd_huge(pmd_t pmd) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e0c718543174..87ff0c1908a9 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -344,16 +345,31 @@ EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); +#ifndef __PAGETABLE_PUD_FOLDED +/* 4 level page table */ +struct page *pgd_page(pgd_t pgd) +{ + if (pgd_huge(pgd)) + return pte_page(pgd_pte(pgd)); + return virt_to_page(pgd_page_vaddr(pgd)); +} +#endif + +struct page *pud_page(pud_t pud) +{ + if (pud_huge(pud)) + return pte_page(pud_pte(pud)); + return virt_to_page(pud_page_vaddr(pud)); +} + /* * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. */ struct page *pmd_page(pmd_t pmd) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd)) return pfn_to_page(pmd_pfn(pmd)); -#endif return virt_to_page(pmd_page_vaddr(pmd)); } -- cgit v1.2.3-55-g7522 From b30e759072c182538abb6908681cfd49978ba5e2 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Wed, 5 Nov 2014 21:57:41 +0530 Subject: powerpc/mm: Switch to generic RCU get_user_pages_fast This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/hugetlb.h | 8 +- arch/powerpc/include/asm/page.h | 3 +- arch/powerpc/include/asm/pgtable-ppc64.h | 1 - arch/powerpc/include/asm/pgtable.h | 6 +- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/gup.c | 235 ------------------------------- arch/powerpc/mm/hugetlbpage.c | 33 ++--- 8 files changed, 22 insertions(+), 267 deletions(-) delete mode 100644 arch/powerpc/mm/gup.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 98e9c548bd75..46227336aacb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -149,6 +149,7 @@ config PPC select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM + select HAVE_GENERIC_RCU_GUP config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 766b77d527ac..1d53a65b4ec1 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd) #endif /* CONFIG_PPC_BOOK3S_64 */ -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { /* @@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, */ unsigned long idx = 0; - pte_t *dir = hugepd_page(*hpdp); + pte_t *dir = hugepd_page(hpd); #ifndef CONFIG_PPC_FSL_BOOK3E - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); #endif return dir + idx; @@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, } #define hugepd_shift(x) 0 -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { return 0; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f973fce73a43..69c059887a2c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd) } #endif -#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define is_hugepd(hpd) (hugepd_ok(hpd)) #define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 #define pgd_huge(pgd) 0 #endif /* CONFIG_HUGETLB_PAGE */ +#define __hugepd(x) ((hugepd_t) { (x) }) struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index d12092420560..5600e434332f 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 316f9a5da173..a8805fee0df9 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -274,11 +274,9 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); - extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); + unsigned long end, int write, + struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #define has_transparent_hugepage() 0 diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 325e861616a1..438dcd3fd0d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c deleted file mode 100644 index d8746684f606..000000000000 --- a/arch/powerpc/mm/gup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Lockless get_user_pages_fast for powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#ifdef __HAVE_ARCH_PTE_SPECIAL - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - result = _PAGE_PRESENT|_PAGE_USER; - if (write) - result |= _PAGE_RW; - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - pte_t pte = ACCESS_ONCE(*ptep); - struct page *page; - /* - * Similar to the PMD case, NUMA hinting must take slow path - */ - if (pte_numa(pte)) - return 0; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - if (!page_cache_get_speculative(page)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); - return 0; - } - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = ACCESS_ONCE(*pmdp); - - next = pmd_addr_end(addr, end); - /* - * If we find a splitting transparent hugepage we - * return zero. That will result in taking the slow - * path which will call wait_split_huge_page() - * if the pmd is still in splitting state - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (pmd_huge(pmd) || pmd_large(pmd)) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_numa(pmd)) - return 0; - - if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pmdp)) { - if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = ACCESS_ONCE(*pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (pud_huge(pud)) { - if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pudp)) { - if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - return 0; - - pr_devel(" aligned: %lx .. %lx\n", start, end); - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on powerpc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = ACCESS_ONCE(*pgdp); - - pr_devel(" %016lx: normal pgd %p\n", addr, - (void *)pgd_val(pgd)); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (pgd_huge(pgd)) { - if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, - write, pages, &nr)) - break; - } else if (is_hugepd(pgdp)) { - if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, - addr, next, write, pages, &nr)) - break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - int nr, ret; - - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; - - if (nr < nr_pages) { - pr_devel(" slow path ! nr = %d\n", nr); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - } - - return ret; -} - -#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 2b8e5ed28831..af56de82375d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #else @@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #endif @@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (!is_hugepd(pmd)) { + if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { /* * if it is not hugepd pointer, we should already find * it cleared. @@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, do { pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); - if (!is_hugepd(pud)) { + if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, @@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); - if (!is_hugepd(pgd)) { + if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); @@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, - unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, + unsigned long end, int write, struct page **pages, int *nr) { pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(*hugepd); + unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); @@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pgd_huge(pgd)) { ret_pte = (pte_t *) pgdp; goto out; - } else if (is_hugepd(&pgd)) + } else if (is_hugepd(__hugepd(pgd_val(pgd)))) hpdp = (hugepd_t *)&pgd; else { /* @@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pud_huge(pud)) { ret_pte = (pte_t *) pudp; goto out; - } else if (is_hugepd(&pud)) + } else if (is_hugepd(__hugepd(pud_val(pud)))) hpdp = (hugepd_t *)&pud; else { pdshift = PMD_SHIFT; @@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; - } else if (is_hugepd(&pmd)) + } else if (is_hugepd(__hugepd(pmd_val(pmd)))) hpdp = (hugepd_t *)&pmd; else return pte_offset_kernel(&pmd, ea); @@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (!hpdp) return NULL; - ret_pte = hugepte_offset(hpdp, ea, pdshift); + ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: if (shift) @@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if ((pte_val(pte) & mask) != mask) return 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * check for splitting here - */ - if (pmd_trans_splitting(pte_pmd(pte))) - return 0; -#endif - /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); -- cgit v1.2.3-55-g7522 From e39f223fc93580c86ccf6b3422033e349f57f0dd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 18 Nov 2014 16:47:35 +1100 Subject: powerpc: Remove more traces of bootmem Although we are now selecting NO_BOOTMEM, we still have some traces of bootmem lying around. That is because even with NO_BOOTMEM there is still a shim that converts bootmem calls into memblock calls, but ultimately we want to remove all traces of bootmem. Most of the patch is conversions from alloc_bootmem() to memblock_virt_alloc(). In general a call such as: p = (struct foo *)alloc_bootmem(x); Becomes: p = memblock_virt_alloc(x, 0); We don't need the cast because memblock_virt_alloc() returns a void *. The alignment value of zero tells memblock to use the default alignment, which is SMP_CACHE_BYTES, the same value alloc_bootmem() uses. We remove a number of NULL checks on the result of memblock_virt_alloc(). That is because memblock_virt_alloc() will panic if it can't allocate, in exactly the same way as alloc_bootmem(), so the NULL checks are and always have been redundant. The memory returned by memblock_virt_alloc() is already zeroed, so we remove several memsets of the result of memblock_virt_alloc(). Finally we convert a few uses of __alloc_bootmem(x, y, MAX_DMA_ADDRESS) to just plain memblock_virt_alloc(). We don't use memblock_alloc_base() because MAX_DMA_ADDRESS is ~0ul on powerpc, so limiting the allocation to that is pointless, 16XB ought to be enough for anyone. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 1 - arch/powerpc/kernel/pci_32.c | 4 +--- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/lib/alloc.c | 4 +--- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/mmu_context_nohash.c | 8 ++++---- arch/powerpc/platforms/cell/celleb_pci.c | 6 +++--- arch/powerpc/platforms/powermac/nvram.c | 6 +----- arch/powerpc/platforms/powernv/pci-ioda.c | 12 +++--------- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 16 ++++------------ arch/powerpc/platforms/ps3/setup.c | 7 +------ arch/powerpc/sysdev/fsl_pci.c | 1 - 12 files changed, 20 insertions(+), 49 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bc2dab52a991..37d512d35943 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 432459c817fa..1f7930037cb7 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -199,9 +199,7 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256); - if (!of_prop) - return; + of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6e5310ddf8c7..49f553bbb360 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -660,7 +660,7 @@ static void __init emergency_stack_init(void) } /* - * Called into from start_kernel this initializes bootmem, which is used + * Called into from start_kernel this initializes memblock, which is used * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index da22c84a8fed..4a6c2cf890d9 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -13,9 +13,7 @@ void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) if (mem_init_done) p = kzalloc(size, mask); else { - p = alloc_bootmem(size); - if (p) - memset(p, 0, size); + p = memblock_virt_alloc(size, 0); } return p; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de82375d..8c9b8115867c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -315,7 +315,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) * If gpages can be in highmem we can't use the trick of storing the * data structure in the page; allocate space for this */ - m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0); m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; #else m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 928ebe79668b..9cba6cba2e50 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -421,12 +421,12 @@ void __init mmu_context_init(void) /* * Allocate the maps used by context management */ - context_map = alloc_bootmem(CTX_MAP_SIZE); - context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); + context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0); + context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0); #ifndef CONFIG_SMP - stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0); #else - stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); register_cpu_notifier(&mmu_context_cpu_nb); #endif diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 2b98a36ef8fb..3ce70ded2d6a 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -401,11 +401,11 @@ error: } else { if (config && *config) { size = 256; - free_bootmem(__pa(*config), size); + memblock_free(__pa(*config), size); } if (res && *res) { size = sizeof(struct celleb_pci_resource); - free_bootmem(__pa(*res), size); + memblock_free(__pa(*res), size); } } diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 014d06e6d46b..60b03a1703d1 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -513,11 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr) printk(KERN_ERR "nvram: no address\n"); return -EINVAL; } - nvram_image = alloc_bootmem(NVRAM_SIZE); - if (nvram_image == NULL) { - printk(KERN_ERR "nvram: can't allocate ram image\n"); - return -ENOMEM; - } + nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0); nvram_data = ioremap(addr, NVRAM_SIZE*2); nvram_naddrs = 1; /* Make sure we get the correct case */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d03503515692..cc861c14840d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1940,19 +1940,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (!phb) { - pr_err(" Out of memory !\n"); - return; - } + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); /* Allocate PCI controller */ - memset(phb, 0, sizeof(struct pnv_phb)); phb->hose = hose = pcibios_alloc_controller(np); if (!phb->hose) { pr_err(" Can't allocate PCI controller for %s\n", np->full_name); - free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); + memblock_free(__pa(phb), sizeof(struct pnv_phb)); return; } @@ -2019,8 +2014,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); - aux = alloc_bootmem(size); - memset(aux, 0, size); + aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; if (phb->type == PNV_PHB_IODA1) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 3336fcbdd08a..6ef6d4d8e7e2 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -122,12 +122,9 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, return; } - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (phb) { - memset(phb, 0, sizeof(struct pnv_phb)); - phb->hose = pcibios_alloc_controller(np); - } - if (!phb || !phb->hose) { + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); + phb->hose = pcibios_alloc_controller(np); + if (!phb->hose) { pr_err(" Failed to allocate PCI controller\n"); return; } @@ -216,12 +213,7 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) * * XXX TODO: Make it chip local if possible */ - tce_mem = __alloc_bootmem(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY, - __pa(MAX_DMA_ADDRESS)); - if (!tce_mem) { - pr_err(" Failed to allocate TCE Memory !\n"); - return; - } + tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); pr_debug(" TCE : 0x%016lx..0x%016lx\n", __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 009a2004b876..799c8580ab09 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -125,12 +125,7 @@ static void __init prealloc(struct ps3_prealloc *p) if (!p->size) return; - p->address = __alloc_bootmem(p->size, p->align, __pa(MAX_DMA_ADDRESS)); - if (!p->address) { - printk(KERN_ERR "%s: Cannot allocate %s\n", __func__, - p->name); - return; - } + p->address = memblock_virt_alloc(p->size, p->align); printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size, p->address); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index d8484d7cffaa..6455c1eada1a 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-55-g7522 From 0ec2698fe1c451606c16e21dbfbd29efce694668 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Mon, 3 Nov 2014 20:21:34 +0530 Subject: powerpc/mm: Check for matching hpte without taking hpte lock With smaller hash page table config, we would end up in situation where we would be replacing hash page table slot frequently. In such config, we will find the hpte to be not matching, and we can do that check without holding the hpte lock. We need to recheck the hpte again after holding lock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_native_64.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index d53288a08c37..558e50bac6f7 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -294,8 +294,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); - native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -308,16 +306,24 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> miss\n"); ret = -1; } else { - DBG_LOW(" -> hit\n"); - /* Update the HPTE */ - hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C))); + native_lock_hpte(hptep); + /* recheck with locks held */ + hpte_v = be64_to_cpu(hptep->v); + if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || + !(hpte_v & HPTE_V_VALID))) { + ret = -1; + } else { + DBG_LOW(" -> hit\n"); + /* Update the HPTE */ + hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & + ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N | + HPTE_R_C))); + } + native_unlock_hpte(hptep); } - native_unlock_hpte(hptep); - /* Ensure it is out of the tlb too. */ tlbie(vpn, bpsize, apsize, ssize, local); - return ret; } -- cgit v1.2.3-55-g7522 From c4f3eb5fc527b749d6fb0d47ffdcfe83706dbe2f Mon Sep 17 00:00:00 2001 From: James Yang Date: Fri, 14 Nov 2014 12:32:24 -0600 Subject: powerpc/mm/hugetlb: Sanity check gigantic hugepage count Limit the number of gigantic hugepages specified by the hugepages= parameter to MAX_NUMBER_GPAGES. Signed-off-by: James Yang Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hugetlbpage.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de82375d..747e0c616526 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -355,6 +355,13 @@ static int __init do_gpage_early_setup(char *param, char *val, if (size != 0) { if (sscanf(val, "%lu", &npages) <= 0) npages = 0; + if (npages > MAX_NUMBER_GPAGES) { + pr_warn("MMU: %lu pages requested for page " + "size %llu KB, limiting to " + __stringify(MAX_NUMBER_GPAGES) "\n", + npages, size / 1024); + npages = MAX_NUMBER_GPAGES; + } gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; size = 0; } -- cgit v1.2.3-55-g7522 From f1581bf14bc40b4a14bf10358eac0b22173b3313 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Sun, 2 Nov 2014 21:15:27 +0530 Subject: powerpc/mm/thp: Remove code duplication Rename invalidate_old_hpte to flush_hash_hugepage and use that in other places. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/tlbflush.h | 3 +- arch/powerpc/mm/hash_utils_64.c | 52 ++++++++++++++++++++++++++++++ arch/powerpc/mm/hugepage-hash64.c | 54 ++----------------------------- arch/powerpc/mm/pgtable_64.c | 64 ++++++------------------------------- 4 files changed, 65 insertions(+), 108 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index cd7c2719d3ef..19550d346fea 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -127,7 +127,8 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local); extern void flush_hash_range(unsigned long number, int local); - +extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f01027731e23..6c2076c65d7c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1315,6 +1315,58 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #endif } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void flush_hash_range(unsigned long number, int local) { if (ppc_md.flush_hash_range) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 5f5e6328c21c..1b3ad46a71b5 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,57 +18,6 @@ #include #include -static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) -{ - int i, max_hpte_count, valid; - unsigned long s_addr; - unsigned char *hpte_slot_array; - unsigned long hidx, shift, vpn, hash, slot; - - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; - - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } -} - - int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -145,7 +94,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * hash page table entries. */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) - invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, + ssize); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 87ff0c1908a9..c175c990580e 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,29 +739,13 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, i; - unsigned long s_addr; - int max_hpte_count; - unsigned int psize, valid; - unsigned char *hpte_slot_array; - unsigned long hidx, vpn, vsid, hash, shift, slot; - - /* - * Flush all the hptes mapping this hugepage - */ - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; + int ssize; + unsigned int psize; + unsigned long vsid; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM - psize = get_slice_psize(mm, s_addr); + psize = get_slice_psize(mm, addr); BUG_ON(psize == MMU_PAGE_16M); #endif if (old_pmd & _PAGE_COMBO) @@ -769,46 +753,16 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, else psize = MMU_PAGE_64K; - if (!is_kernel_addr(s_addr)) { - ssize = user_segment_size(s_addr); - vsid = get_vsid(mm->context.id, s_addr, ssize); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); WARN_ON(vsid == 0); } else { - vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, - hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) -- cgit v1.2.3-55-g7522 From d557b09800dab5dd6804e5b79324069abcf0be11 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Sun, 2 Nov 2014 21:15:28 +0530 Subject: powerpc/mm/thp: Use tlbiel if possible If we know that user address space has never executed on other cpus we could use tlbiel. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/include/asm/tlbflush.h | 3 ++- arch/powerpc/mm/hash_native_64.c | 4 ++-- arch/powerpc/mm/hash_utils_64.c | 28 +++++++++++++++++++++++----- arch/powerpc/mm/hugepage-hash64.c | 2 +- arch/powerpc/mm/pgtable_64.c | 9 +++++++-- arch/powerpc/platforms/pseries/lpar.c | 2 +- 7 files changed, 37 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 15c9150a58cc..e5c0919acca4 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -60,7 +60,7 @@ struct machdep_calls { void (*hugepage_invalidate)(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize); + int psize, int ssize, int local); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 19550d346fea..4d3ecd8d8929 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -128,7 +128,8 @@ extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local); extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize); + pmd_t *pmdp, unsigned int psize, int ssize, + int local); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 558e50bac6f7..13700911b522 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -425,7 +425,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, static void native_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i; struct hash_pte *hptep; @@ -471,7 +471,7 @@ static void native_hugepage_invalidate(unsigned long vsid, * instruction compares entry_VA in tlb with the VA specified * here */ - tlbie(vpn, psize, actual_psize, ssize, 0); + tlbie(vpn, psize, actual_psize, ssize, local); } local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 6c2076c65d7c..68211d398fdb 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1317,7 +1317,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) + pmd_t *pmdp, unsigned int psize, int ssize, int local) { int i, max_hpte_count, valid; unsigned long s_addr; @@ -1334,9 +1334,11 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); + if (ppc_md.hugepage_invalidate) { + ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); + goto tm_abort; + } /* * No bluk hpte removal support, invalidate each entry */ @@ -1362,8 +1364,24 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); + MMU_PAGE_16M, ssize, local); + } +tm_abort: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Transactions are not aborted by tlbiel, only tlbie. + * Without, syncing a page back to a block device w/ PIO could pick up + * transactional data (bad!) so we force an abort here. Before the + * sync the page will be made read-only, which will flush_hash_page. + * BIG ISSUE here: if the kernel uses a page from userspace without + * unmapping it first, it may see the speculated version. + */ + if (local && cpu_has_feature(CPU_FTR_TM) && + current->thread.regs && + MSR_TM_ACTIVE(current->thread.regs->msr)) { + tm_enable(); + tm_abort(TM_CAUSE_TLBI); } +#endif } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 1b3ad46a71b5..3a648cd363ae 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize); + ssize, local); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c175c990580e..eea9fa1f8ae7 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize; + int ssize, local = 0; unsigned int psize; unsigned long vsid; + const struct cpumask *tmp; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM @@ -762,7 +763,11 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, ssize = mmu_kernel_ssize; } - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); + tmp = cpumask_of(smp_processor_id()); + if (cpumask_equal(mm_cpumask(mm), tmp)) + local = 1; + + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index d214a012b026..832f221840f2 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -442,7 +442,7 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i, index = 0; unsigned long s_addr = addr; -- cgit v1.2.3-55-g7522 From aefa5688c070727b8729de1aef85cad7b9933fc7 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Thu, 4 Dec 2014 11:00:14 +0530 Subject: powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault upatepp can get called for a nohpte fault when we find from the linux page table that the translation was hashed before. In that case we are sure that there is no existing translation, hence we could avoid doing tlbie. We could possibly race with a parallel fault filling the TLB. But that should be ok because updatepp is only ever relaxing permissions. We also look at linux pte permission bits when filling hash pte permission bits. We also hold the linux pte busy bits while inserting/updating a hashpte entry, hence a paralle update of linux pte is not possible. On the other hand mprotect involves ptep_modify_prot_start which cause a hpte invalidate and not updatepp. Performance number: We use randbox_access_bench written by Anton. Kernel with THP disabled and smaller hash page table size. 86.60% random_access_b [kernel.kallsyms] [k] .native_hpte_updatepp 2.10% random_access_b random_access_bench [.] doit 1.99% random_access_b [kernel.kallsyms] [k] .do_raw_spin_lock 1.85% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 1.26% random_access_b [kernel.kallsyms] [k] .native_flush_hash_range 1.18% random_access_b [kernel.kallsyms] [k] .__delay 0.69% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 0.37% random_access_b [kernel.kallsyms] [k] .clear_user_page 0.34% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 0.32% random_access_b [kernel.kallsyms] [k] fast_exception_return 0.30% random_access_b [kernel.kallsyms] [k] .hash_page_mm With Fix: 27.54% random_access_b random_access_bench [.] doit 22.90% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 5.76% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 5.20% random_access_b [kernel.kallsyms] [k] fast_exception_return 5.12% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 4.80% random_access_b [kernel.kallsyms] [k] .hash_page_mm 3.31% random_access_b [kernel.kallsyms] [k] data_access_common 1.84% random_access_b [kernel.kallsyms] [k] .trace_hardirqs_on_caller Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/include/asm/mmu-hash64.h | 22 ++++++++++------ arch/powerpc/include/asm/tlbflush.h | 4 +-- arch/powerpc/kernel/exceptions-64s.S | 2 ++ arch/powerpc/mm/hash_low_64.S | 15 ++++++----- arch/powerpc/mm/hash_native_64.c | 15 ++++++++--- arch/powerpc/mm/hash_utils_64.c | 44 ++++++++++++++++++++----------- arch/powerpc/mm/hugepage-hash64.c | 8 +++--- arch/powerpc/mm/hugetlbpage-hash64.c | 6 ++--- arch/powerpc/mm/pgtable_64.c | 7 ++--- arch/powerpc/platforms/cell/beat_htab.c | 4 +-- arch/powerpc/platforms/cell/spu_base.c | 5 ++-- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- drivers/misc/cxl/fault.c | 8 ++++-- 16 files changed, 91 insertions(+), 57 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e5c0919acca4..c8175a3fe560 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -42,7 +42,7 @@ struct machdep_calls { unsigned long newpp, unsigned long vpn, int bpsize, int apsize, - int ssize, int local); + int ssize, unsigned long flags); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, int psize, int ssize); diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index aeebc94b2bce..4f13c3ed7acf 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -316,27 +316,33 @@ static inline unsigned long hpt_hash(unsigned long vpn, return hash & 0x7fffffffffUL; } +#define HPTE_LOCAL_UPDATE 0x1 +#define HPTE_NOHPTE_UPDATE 0x2 + extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize, int subpage_prot); + unsigned long flags, int ssize, int subpage_prot); extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize); + unsigned long flags, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); -extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags); +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize); + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, - int local, int ssize, unsigned int psize); + unsigned long flags, int ssize, unsigned int psize); #else static inline int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, - unsigned long trap, int local, + unsigned long trap, unsigned long flags, int ssize, unsigned int psize) { BUG(); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 4d3ecd8d8929..23d351ca0303 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -125,11 +125,11 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, - int ssize, int local); + int ssize, unsigned long flags); extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, - int local); + unsigned long flags); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ad62f4d6ce31..6213f494f40b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1565,9 +1565,11 @@ do_hash_page: * r3 contains the faulting address * r4 contains the required access permissions * r5 contains the trap number + * r6 contains dsisr * * at return r3 = 0 for success, 1 for page fault, negative for error */ + ld r6,_DSISR(r1) bl hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 5094f32b706e..463174a4a647 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -46,7 +46,8 @@ /* * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize) + * pte_t *ptep, unsigned long trap, unsigned long flags, + * int ssize) * * Adds a 4K page to the hash table in a segment of 4K pages only */ @@ -298,7 +299,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* Patched by htab_finish_init() */ @@ -338,8 +339,8 @@ htab_pte_insert_failure: *****************************************************************************/ /* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize, - * int subpg_prot) + * pte_t *ptep, unsigned long trap, unsigned local flags, + * int ssize, int subpg_prot) */ /* @@ -594,7 +595,7 @@ htab_inval_old_hpte: li r5,0 /* PTE.hidx */ li r6,MMU_PAGE_64K /* psize */ ld r7,STK_PARAM(R9)(r1) /* ssize */ - ld r8,STK_PARAM(R8)(r1) /* local */ + ld r8,STK_PARAM(R8)(r1) /* flags */ bl flush_hash_page /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ lis r0,_PAGE_HPTE_SUB@h @@ -666,7 +667,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ @@ -962,7 +963,7 @@ ht64_modify_pte: li r6,MMU_PAGE_64K /* base page size */ li r7,MMU_PAGE_64K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl ht64_call_hpte_updatepp ht64_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 13700911b522..9c4880ddecd6 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -283,11 +283,11 @@ static long native_hpte_remove(unsigned long hpte_group) static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int bpsize, - int apsize, int ssize, int local) + int apsize, int ssize, unsigned long flags) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; - int ret = 0; + int ret = 0, local = 0; want_v = hpte_encode_avpn(vpn, bpsize, ssize); @@ -322,8 +322,15 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, } native_unlock_hpte(hptep); } - /* Ensure it is out of the tlb too. */ - tlbie(vpn, bpsize, apsize, ssize, local); + + if (flags & HPTE_LOCAL_UPDATE) + local = 1; + /* + * Ensure it is out of the tlb too if it is not a nohpte fault + */ + if (!(flags & HPTE_NOHPTE_UPDATE)) + tlbie(vpn, bpsize, apsize, ssize, local); + return ret; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 68211d398fdb..e56a307bc676 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; @@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; - int rc, user_region = 0, local = 0; + int rc, user_region = 0; int psize, ssize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", @@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES /* If we use 4K pages and our psize is not 4K, then we might @@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u if (hugeshift) { if (pmd_trans_huge(*(pmd_t *)ptep)) rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, - trap, local, ssize, psize); + trap, flags, ssize, psize); #ifdef CONFIG_HUGETLB_PAGE else rc = __hash_page_huge(ea, access, vsid, ptep, trap, - local, ssize, hugeshift, psize); + flags, ssize, hugeshift, psize); #else else { /* @@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u #ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ { @@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u rc = -2; else rc = __hash_page_4K(ea, access, vsid, ptep, trap, - local, ssize, spp); + flags, ssize, spp); } /* Dump some info in case of hash insertion failure, they should @@ -1181,14 +1184,19 @@ bail: } EXPORT_SYMBOL_GPL(hash_page_mm); -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr) { + unsigned long flags = 0; struct mm_struct *mm = current->mm; if (REGION_ID(ea) == VMALLOC_REGION_ID) mm = &init_mm; - return hash_page_mm(mm, ea, access, trap); + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + return hash_page_mm(mm, ea, access, trap, flags); } EXPORT_SYMBOL_GPL(hash_page); @@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pgd_t *pgdir; pte_t *ptep; unsigned long flags; - int rc, ssize, local = 0; + int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Is that local to this CPU ? */ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - local = 1; + update_flags |= HPTE_LOCAL_UPDATE; /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + update_flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(mm, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags, + ssize, subpage_protection(mm, ea)); /* Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do @@ -1278,9 +1287,10 @@ out_exit: * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, - int local) + unsigned long flags) { unsigned long hash, index, shift, hidx, slot; + int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { @@ -1317,12 +1327,14 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize, int local) + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags) { int i, max_hpte_count, valid; unsigned long s_addr; unsigned char *hpte_slot_array; unsigned long hidx, shift, vpn, hash, slot; + int local = flags & HPTE_LOCAL_UPDATE; s_addr = addr & HPAGE_PMD_MASK; hpte_slot_array = get_hpte_slot_array(pmdp); diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 3a648cd363ae..86686514ae13 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -19,8 +19,8 @@ #include int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, - pmd_t *pmdp, unsigned long trap, int local, int ssize, - unsigned int psize) + pmd_t *pmdp, unsigned long trap, unsigned long flags, + int ssize, unsigned int psize) { unsigned int index, valid; unsigned char *hpte_slot_array; @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize, local); + ssize, flags); } valid = hpte_valid(hpte_slot_array, index); @@ -108,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot += hidx & _PTEIDX_GROUP_IX; ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, local); + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index a5bcf9301196..d94b1af53a93 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long vflags, int psize, int ssize); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize) + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize) { unsigned long vpn; unsigned long old_pte, new_pte; @@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, local) == -1) + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index eea9fa1f8ae7..4fe5f64cc179 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, local = 0; + int ssize; unsigned int psize; unsigned long vsid; + unsigned long flags = 0; const struct cpumask *tmp; /* get the base page size,vsid and segment size */ @@ -765,9 +766,9 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, tmp = cpumask_of(smp_processor_id()); if (cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index d4d245c0d787..bee9232fe619 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -186,7 +186,7 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; u64 dummy0, dummy1; @@ -369,7 +369,7 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; unsigned long want_v; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index ffcbd242e669..f7af74f83693 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -181,7 +181,8 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) return 0; } -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX +extern int hash_page(unsigned long ea, unsigned long access, + unsigned long trap, unsigned long dsisr); //XXX static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) { int ret; @@ -196,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT, 0x300); + ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e45894a08118..d98f845ac777 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -144,7 +144,7 @@ int spufs_handle_class1(struct spu_context *ctx) access = (_PAGE_PRESENT | _PAGE_USER); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; local_irq_save(flags); - ret = hash_page(ea, access, 0x300); + ret = hash_page(ea, access, 0x300, dsisr); local_irq_restore(flags); /* hashing failed, so try the actual fault handler */ diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 3e270e3412ae..2f95d33cf34a 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -110,7 +110,7 @@ static long ps3_hpte_remove(unsigned long hpte_group) static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { int result; u64 hpte_v, want_v, hpte_rs; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 832f221840f2..469751d92004 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -284,7 +284,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index c99e896604ee..f8684bca2d79 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -133,7 +133,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, { unsigned flt = 0; int result; - unsigned long access, flags; + unsigned long access, flags, inv_flags = 0; if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { pr_devel("copro_handle_mm_fault failed: %#x\n", result); @@ -149,8 +149,12 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, access |= _PAGE_RW; if ((!ctx->kernel) || ~(dar & (1ULL << 63))) access |= _PAGE_USER; + + if (dsisr & DSISR_NOHPTE) + inv_flags |= HPTE_NOHPTE_UPDATE; + local_irq_save(flags); - hash_page_mm(mm, dar, access, 0x300); + hash_page_mm(mm, dar, access, 0x300, inv_flags); local_irq_restore(flags); pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); -- cgit v1.2.3-55-g7522