diff options
author | Tony Lindgren | 2017-03-06 17:37:53 +0100 |
---|---|---|
committer | Tony Lindgren | 2017-03-06 17:37:53 +0100 |
commit | e24bce8fb4c26bd0d8eca74cbbee1ad049246be3 (patch) | |
tree | c219c2ec183633aa15841fca5b055a09d2d0b980 /arch/powerpc/mm | |
parent | ARM: OMAP2+: Release device node after it is no longer needed. (diff) | |
parent | Linux 4.11-rc1 (diff) | |
download | kernel-qcow2-linux-e24bce8fb4c26bd0d8eca74cbbee1ad049246be3.tar.gz kernel-qcow2-linux-e24bce8fb4c26bd0d8eca74cbbee1ad049246be3.tar.xz kernel-qcow2-linux-e24bce8fb4c26bd0d8eca74cbbee1ad049246be3.zip |
Merge tag 'v4.11-rc1' into omap-for-v4.11/fixes
Linux 4.11-rc1
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 44 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 64 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-hash64.c | 21 | ||||
-rw-r--r-- | arch/powerpc/mm/init-common.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 35 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/mmap.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_iommu.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-book3s64.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-hash64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-radix.c | 263 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 24 | ||||
-rw-r--r-- | arch/powerpc/mm/subpage-prot.c | 4 |
18 files changed, 420 insertions, 110 deletions
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index aaa7ec6788b9..697b70ad1195 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -67,11 +67,13 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out_unlock; /* - * protfault should only happen due to us - * mapping a region readonly temporarily. PROT_NONE - * is also covered by the VMA check above. + * PROT_NONE is covered by the VMA check above. + * and hash should get a NOHPTE fault instead of + * a PROTFAULT in case fixup is needed for things + * like autonuma. */ - WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); + if (!radix_enabled()) + WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); } ret = 0; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 62a50d6d1053..51def8a515be 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -17,6 +17,7 @@ #include <linux/signal.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> @@ -407,15 +408,6 @@ good_area: (cpu_has_feature(CPU_FTR_NOEXECUTE) || !(vma->vm_flags & (VM_READ | VM_WRITE)))) goto bad_area; - -#ifdef CONFIG_PPC_STD_MMU - /* - * protfault should only happen due to us - * mapping a region readonly temporarily. PROT_NONE - * is also covered by the VMA check above. - */ - WARN_ON_ONCE(error_code & DSISR_PROTFAULT); -#endif /* CONFIG_PPC_STD_MMU */ /* a write */ } else if (is_write) { if (!(vma->vm_flags & VM_WRITE)) @@ -425,8 +417,40 @@ good_area: } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; - WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } +#ifdef CONFIG_PPC_STD_MMU + /* + * For hash translation mode, we should never get a + * PROTFAULT. Any update to pte to reduce access will result in us + * removing the hash page table entry, thus resulting in a DSISR_NOHPTE + * fault instead of DSISR_PROTFAULT. + * + * A pte update to relax the access will not result in a hash page table + * entry invalidate and hence can result in DSISR_PROTFAULT. + * ptep_set_access_flags() doesn't do a hpte flush. This is why we have + * the special !is_write in the below conditional. + * + * For platforms that doesn't supports coherent icache and do support + * per page noexec bit, we do setup things such that we do the + * sync between D/I cache via fault. But that is handled via low level + * hash fault code (hash_page_do_lazy_icache()) and we should not reach + * here in such case. + * + * For wrong access that can result in PROTFAULT, the above vma->vm_flags + * check should handle those and hence we should fall to the bad_area + * handling correctly. + * + * For embedded with per page exec support that doesn't support coherent + * icache we do get PROTFAULT and we handle that D/I cache sync in + * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON + * is conditional for server MMU. + * + * For radix, we can get prot fault for autonuma case, because radix + * page table will have them marked noaccess for user. + */ + if (!radix_enabled() && !is_write) + WARN_ON_ONCE(error_code & DSISR_PROTFAULT); +#endif /* CONFIG_PPC_STD_MMU */ /* * If for any reason at all we couldn't handle the fault, diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 67e19a0821be..c554768b1fa2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -23,7 +23,7 @@ #include <linux/spinlock.h> #include <linux/errno.h> -#include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/sysctl.h> @@ -35,7 +35,9 @@ #include <linux/memblock.h> #include <linux/context_tracking.h> #include <linux/libfdt.h> +#include <linux/debugfs.h> +#include <asm/debug.h> #include <asm/processor.h> #include <asm/pgtable.h> #include <asm/mmu.h> @@ -747,6 +749,35 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG +void resize_hpt_for_hotplug(unsigned long new_mem_size) +{ + unsigned target_hpt_shift; + + if (!mmu_hash_ops.resize_hpt) + return; + + target_hpt_shift = htab_shift_for_mem_size(new_mem_size); + + /* + * To avoid lots of HPT resizes if memory size is fluctuating + * across a boundary, we deliberately have some hysterisis + * here: we immediately increase the HPT size if the target + * shift exceeds the current shift, but we won't attempt to + * reduce unless the target shift is at least 2 below the + * current shift + */ + if ((target_hpt_shift > ppc64_pft_size) + || (target_hpt_shift < (ppc64_pft_size - 1))) { + int rc; + + rc = mmu_hash_ops.resize_hpt(target_hpt_shift); + if (rc) + printk(KERN_WARNING + "Unable to resize hash page table to target order %d: %d\n", + target_hpt_shift, rc); + } +} + int hash__create_section_mapping(unsigned long start, unsigned long end) { int rc = htab_bolt_mapping(start, end, __pa(start), @@ -1795,3 +1826,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, /* Finally limit subsequent allocations */ memblock_set_current_limit(ppc64_rma_size); } + +#ifdef CONFIG_DEBUG_FS + +static int hpt_order_get(void *data, u64 *val) +{ + *val = ppc64_pft_size; + return 0; +} + +static int hpt_order_set(void *data, u64 val) +{ + if (!mmu_hash_ops.resize_hpt) + return -ENODEV; + + return mmu_hash_ops.resize_hpt(val); +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); + +static int __init hash64_debugfs(void) +{ + if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, + NULL, &fops_hpt_order)) { + pr_err("lpar: unable to create hpt_order debugsfs file\n"); + } + + return 0; +} +machine_device_initcall(pseries, hash64_debugfs); + +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 37b5f91e381b..a84bb44497f9 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -116,24 +116,3 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } - -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM) -/* - * This enables us to catch the wrong page directory format - * Moved here so that we can use WARN() in the call. - */ -int hugepd_ok(hugepd_t hpd) -{ - bool is_hugepd; - unsigned long hpdval; - - hpdval = hpd_val(hpd); - - /* - * We should not find this format in page directory, warn otherwise. - */ - is_hugepd = (((hpdval & 0x3) == 0x0) && ((hpdval & HUGEPD_SHIFT_MASK) != 0)); - WARN(is_hugepd, "Found wrong page directory format\n"); - return 0; -} -#endif diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index f2108c40e697..eb8c6c8c4851 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -41,6 +41,7 @@ static void pmd_ctor(void *addr) } struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; +EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ /* * Create a kmem_cache() for pagetables. This is not used for PTE @@ -86,7 +87,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) pr_debug("Allocated pgtable cache for order %d\n", shift); } - +EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */ void pgtable_cache_init(void) { diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 8e1588021d1c..6aa3b76aa0d6 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -42,6 +42,8 @@ #include <linux/memblock.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -344,6 +346,30 @@ static int __init parse_disable_radix(char *p) } early_param("disable_radix", parse_disable_radix); +/* + * If we're running under a hypervisor, we need to check the contents of + * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do + * radix. If not, we clear the radix feature bit so we fall back to hash. + */ +static void early_check_vec5(void) +{ + unsigned long root, chosen; + int size; + const u8 *vec5; + + root = of_get_flat_dt_root(); + chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); + if (chosen == -FDT_ERR_NOTFOUND) + return; + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); + if (!vec5) + return; + if (size <= OV5_INDX(OV5_MMU_RADIX_300) || + !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) + /* Hypervisor doesn't support radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; +} + void __init mmu_early_init_devtree(void) { /* Disable radix mode based on kernel command line. */ @@ -351,6 +377,15 @@ void __init mmu_early_init_devtree(void) if (disable_radix || !(mfmsr() & MSR_HV)) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + /* + * Check /chosen/ibm,architecture-vec-5 if running as a guest. + * When running bare-metal, we can use radix if we like + * even though the ibm,architecture-vec-5 property created by + * skiboot doesn't have the necessary bits set. + */ + if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + early_check_vec5(); + if (early_radix_enabled()) radix__early_init_devtree(); else diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 5f844337de21..9ee536ec0739 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -134,6 +134,8 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) unsigned long nr_pages = size >> PAGE_SHIFT; int rc; + resize_hpt_for_hotplug(memblock_phys_mem_size()); + pgdata = NODE_DATA(nid); start = (unsigned long)__va(start); @@ -174,6 +176,8 @@ int arch_remove_memory(u64 start, u64 size) */ vm_unmap_aliases(); + resize_hpt_for_hotplug(memblock_phys_mem_size()); + return ret; } #endif diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 2f1e44362198..a5d9ef59debe 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -25,7 +25,8 @@ #include <linux/personality.h> #include <linux/mm.h> #include <linux/random.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include <linux/elf-randomize.h> #include <linux/security.h> #include <linux/mman.h> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 104bad029ce9..497130c5c742 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -10,7 +10,7 @@ * */ -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/rculist.h> #include <linux/vmalloc.h> @@ -184,7 +184,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, * of the CMA zone if possible. NOTE: faulting in + migration * can be expensive. Batching can be considered later */ - if (get_pageblock_migratetype(page) == MIGRATE_CMA) { + if (is_migrate_cma_page(page)) { if (mm_iommu_move_page_from_cma(page)) goto populate; if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b1099cb2f393..9befaee237d6 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -290,7 +290,7 @@ int of_node_to_nid(struct device_node *device) return nid; } -EXPORT_SYMBOL_GPL(of_node_to_nid); +EXPORT_SYMBOL(of_node_to_nid); static int __init find_min_common_depth(void) { @@ -786,14 +786,9 @@ new_range: fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); node_set_online(nid); - if (!(size = numa_enforce_memory_limit(start, size))) { - if (--ranges) - goto new_range; - else - continue; - } - - memblock_set_node(start, size, &memblock.memory, nid); + size = numa_enforce_memory_limit(start, size); + if (size) + memblock_set_node(start, size, &memblock.memory, nid); if (--ranges) goto new_range; @@ -1098,7 +1093,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) nid = hot_add_node_scn_to_nid(scn_addr); } - if (nid < 0 || !node_online(nid)) + if (nid < 0 || !node_possible(nid)) nid = first_online_node; return nid; diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 653ff6c74ebe..5fcb3dd74c13 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -8,6 +8,8 @@ */ #include <linux/sched.h> +#include <linux/mm_types.h> + #include <asm/pgalloc.h> #include <asm/tlb.h> @@ -131,7 +133,7 @@ void mmu_cleanup_all(void) int create_section_mapping(unsigned long start, unsigned long end) { if (radix_enabled()) - return -ENODEV; + return radix__create_section_mapping(start, end); return hash__create_section_mapping(start, end); } @@ -139,7 +141,7 @@ int create_section_mapping(unsigned long start, unsigned long end) int remove_section_mapping(unsigned long start, unsigned long end) { if (radix_enabled()) - return -ENODEV; + return radix__remove_section_mapping(start, end); return hash__remove_section_mapping(start, end); } diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index c23e286a6b8f..8b85a14b08ea 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -10,6 +10,8 @@ */ #include <linux/sched.h> +#include <linux/mm_types.h> + #include <asm/pgalloc.h> #include <asm/tlb.h> diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 34f1a0dbc898..2a590a98e652 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/memblock.h> #include <linux/of_fdt.h> @@ -18,6 +18,7 @@ #include <asm/machdep.h> #include <asm/mmu.h> #include <asm/firmware.h> +#include <asm/powernv.h> #include <trace/events/thp.h> @@ -107,54 +108,66 @@ set_the_pte: return 0; } +static inline void __meminit print_mapping(unsigned long start, + unsigned long end, + unsigned long size) +{ + if (end <= start) + return; + + pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size); +} + +static int __meminit create_physical_mapping(unsigned long start, + unsigned long end) +{ + unsigned long addr, mapping_size = 0; + + start = _ALIGN_UP(start, PAGE_SIZE); + for (addr = start; addr < end; addr += mapping_size) { + unsigned long gap, previous_size; + int rc; + + gap = end - addr; + previous_size = mapping_size; + + if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && + mmu_psize_defs[MMU_PAGE_1G].shift) + mapping_size = PUD_SIZE; + else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && + mmu_psize_defs[MMU_PAGE_2M].shift) + mapping_size = PMD_SIZE; + else + mapping_size = PAGE_SIZE; + + if (mapping_size != previous_size) { + print_mapping(start, addr, previous_size); + start = addr; + } + + rc = radix__map_kernel_page((unsigned long)__va(addr), addr, + PAGE_KERNEL_X, mapping_size); + if (rc) + return rc; + } + + print_mapping(start, addr, mapping_size); + return 0; +} + static void __init radix_init_pgtable(void) { - int loop_count; - u64 base, end, start_addr; unsigned long rts_field; struct memblock_region *reg; - unsigned long linear_page_size; /* We don't support slb for radix */ mmu_slb_size = 0; /* * Create the linear mapping, using standard page size for now */ - loop_count = 0; - for_each_memblock(memory, reg) { - - start_addr = reg->base; - -redo: - if (loop_count < 1 && mmu_psize_defs[MMU_PAGE_1G].shift) - linear_page_size = PUD_SIZE; - else if (loop_count < 2 && mmu_psize_defs[MMU_PAGE_2M].shift) - linear_page_size = PMD_SIZE; - else - linear_page_size = PAGE_SIZE; - - base = _ALIGN_UP(start_addr, linear_page_size); - end = _ALIGN_DOWN(reg->base + reg->size, linear_page_size); - - pr_info("Mapping range 0x%lx - 0x%lx with 0x%lx\n", - (unsigned long)base, (unsigned long)end, - linear_page_size); - - while (base < end) { - radix__map_kernel_page((unsigned long)__va(base), - base, PAGE_KERNEL_X, - linear_page_size); - base += linear_page_size; - } - /* - * map the rest using lower page size - */ - if (end < reg->base + reg->size) { - start_addr = end; - loop_count++; - goto redo; - } - } + for_each_memblock(memory, reg) + WARN_ON(create_physical_mapping(reg->base, + reg->base + reg->size)); /* * Allocate Partition table and process table for the * host. @@ -401,6 +414,8 @@ void __init radix__early_init_mmu(void) mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); radix_init_amor(); + } else { + radix_init_pseries(); } memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); @@ -438,6 +453,7 @@ void radix__mmu_cleanup_all(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); mtspr(SPRN_PTCR, 0); + powernv_set_nmmu_ptcr(0); radix__flush_tlb_all(); } } @@ -467,6 +483,173 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(first_memblock_base + first_memblock_size); } +#ifdef CONFIG_MEMORY_HOTPLUG +static void free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (!pte_none(*pte)) + return; + } + + pte_free_kernel(&init_mm, pte_start); + pmd_clear(pmd); +} + +static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) +{ + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (!pmd_none(*pmd)) + return; + } + + pmd_free(&init_mm, pmd_start); + pud_clear(pud); +} + +static void remove_pte_table(pte_t *pte_start, unsigned long addr, + unsigned long end) +{ + unsigned long next; + pte_t *pte; + + pte = pte_start + pte_index(addr); + for (; addr < end; addr = next, pte++) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + if (!pte_present(*pte)) + continue; + + if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) { + /* + * The vmemmap_free() and remove_section_mapping() + * codepaths call us with aligned addresses. + */ + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, pte); + } +} + +static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, + unsigned long end) +{ + unsigned long next; + pte_t *pte_base; + pmd_t *pmd; + + pmd = pmd_start + pmd_index(addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!pmd_present(*pmd)) + continue; + + if (pmd_huge(*pmd)) { + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pmd); + continue; + } + + pte_base = (pte_t *)pmd_page_vaddr(*pmd); + remove_pte_table(pte_base, addr, next); + free_pte_table(pte_base, pmd); + } +} + +static void remove_pud_table(pud_t *pud_start, unsigned long addr, + unsigned long end) +{ + unsigned long next; + pmd_t *pmd_base; + pud_t *pud; + + pud = pud_start + pud_index(addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + + if (!pud_present(*pud)) + continue; + + if (pud_huge(*pud)) { + if (!IS_ALIGNED(addr, PUD_SIZE) || + !IS_ALIGNED(next, PUD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pud); + continue; + } + + pmd_base = (pmd_t *)pud_page_vaddr(*pud); + remove_pmd_table(pmd_base, addr, next); + free_pmd_table(pmd_base, pud); + } +} + +static void remove_pagetable(unsigned long start, unsigned long end) +{ + unsigned long addr, next; + pud_t *pud_base; + pgd_t *pgd; + + spin_lock(&init_mm.page_table_lock); + + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + + pgd = pgd_offset_k(addr); + if (!pgd_present(*pgd)) + continue; + + if (pgd_huge(*pgd)) { + if (!IS_ALIGNED(addr, PGDIR_SIZE) || + !IS_ALIGNED(next, PGDIR_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pgd); + continue; + } + + pud_base = (pud_t *)pgd_page_vaddr(*pgd); + remove_pud_table(pud_base, addr, next); + } + + spin_unlock(&init_mm.page_table_lock); + radix__flush_tlb_kernel_range(start, end); +} + +int __ref radix__create_section_mapping(unsigned long start, unsigned long end) +{ + return create_physical_mapping(start, end); +} + +int radix__remove_section_mapping(unsigned long start, unsigned long end) +{ + remove_pagetable(start, end); + return 0; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ + #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, @@ -482,7 +665,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, #ifdef CONFIG_MEMORY_HOTPLUG void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { - /* FIXME!! intel does more. We should free page tables mapping vmemmap ? */ + remove_pagetable(start, start + page_size); } #endif #endif diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index cb39c8bd2436..a03ff3d99e0c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -193,9 +193,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, */ VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep)); - /* - * Add the pte bit when tryint set a pte - */ + /* Add the pte bit when trying to set a pte */ pte = __pte(pte_val(pte) | _PAGE_PTE); /* Note: mm->context.id might not yet have been assigned as diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 8bca7f58afc4..db93cf747a03 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -52,6 +52,7 @@ #include <asm/sections.h> #include <asm/firmware.h> #include <asm/dma.h> +#include <asm/powernv.h> #include "mmu_decl.h" @@ -436,6 +437,7 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) void __init mmu_partition_table_init(void) { unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + unsigned long ptcr; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); partition_tb = __va(memblock_alloc_base(patb_size, patb_size, @@ -448,19 +450,31 @@ void __init mmu_partition_table_init(void) * update partition table control register, * 64 K size. */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); + mtspr(SPRN_PTCR, ptcr); + powernv_set_nmmu_ptcr(ptcr); } void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, unsigned long dw1) { + unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); + partition_tb[lpid].patb0 = cpu_to_be64(dw0); partition_tb[lpid].patb1 = cpu_to_be64(dw1); - /* Global flush of TLBs and partition table caches for this lpid */ + /* + * Global flush of TLBs and partition table caches for this lpid. + * The type of flush (hash or radix) depends on what the previous + * use of this partition ID was, not the new use. + */ asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + if (old & PATB_HR) + asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + else + asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 48fc28bab544..5e01b2ece1d0 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -22,6 +22,8 @@ #include <asm/cacheflush.h> #include <asm/smp.h> #include <linux/compiler.h> +#include <linux/mm_types.h> + #include <asm/udbg.h> #include <asm/code-patching.h> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index e2974fcd20f1..a85e06ea6c20 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -71,9 +71,9 @@ slb_miss_kernel_load_linear: BEGIN_FTR_SECTION - b slb_finish_load + b .Lslb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) - b slb_finish_load_1T + b .Lslb_finish_load_1T 1: #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -109,9 +109,9 @@ slb_miss_kernel_load_io: addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l BEGIN_FTR_SECTION - b slb_finish_load + b .Lslb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) - b slb_finish_load_1T + b .Lslb_finish_load_1T 0: /* * For userspace addresses, make sure this is region 0. @@ -174,9 +174,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) ld r9,PACACONTEXTID(r13) BEGIN_FTR_SECTION cmpldi r10,0x1000 - bge slb_finish_load_1T + bge .Lslb_finish_load_1T END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - b slb_finish_load + b .Lslb_finish_load 8: /* invalid EA - return an error indication */ crset 4*cr0+eq /* indicate failure */ @@ -187,7 +187,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) * * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET */ -slb_finish_load: +.Lslb_finish_load: rldimi r10,r9,ESID_BITS,0 ASM_VSID_SCRAMBLE(r10,r9,256M) /* @@ -256,7 +256,7 @@ slb_compare_rr_to_size: * * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9 */ -slb_finish_load_1T: +.Lslb_finish_load_1T: srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ rldimi r10,r9,ESID_BITS_1T,0 ASM_VSID_SCRAMBLE(r10,r9,1T) @@ -272,3 +272,11 @@ slb_finish_load_1T: clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */ b 7b + +_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode) +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) +_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size) +#ifdef CONFIG_SPARSEMEM_VMEMMAP +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap) +#endif diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 5c096c01e8bd..94210940112f 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -248,9 +248,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) nw = (next - addr) >> PAGE_SHIFT; up_write(&mm->mmap_sem); - err = -EFAULT; if (__copy_from_user(spp, map, nw * sizeof(u32))) - goto out2; + return -EFAULT; map += nw; down_write(&mm->mmap_sem); @@ -262,6 +261,5 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) err = 0; out: up_write(&mm->mmap_sem); - out2: return err; } |