summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorTony Lindgren2017-03-06 17:37:53 +0100
committerTony Lindgren2017-03-06 17:37:53 +0100
commite24bce8fb4c26bd0d8eca74cbbee1ad049246be3 (patch)
treec219c2ec183633aa15841fca5b055a09d2d0b980 /arch/powerpc/mm
parentARM: OMAP2+: Release device node after it is no longer needed. (diff)
parentLinux 4.11-rc1 (diff)
downloadkernel-qcow2-linux-e24bce8fb4c26bd0d8eca74cbbee1ad049246be3.tar.gz
kernel-qcow2-linux-e24bce8fb4c26bd0d8eca74cbbee1ad049246be3.tar.xz
kernel-qcow2-linux-e24bce8fb4c26bd0d8eca74cbbee1ad049246be3.zip
Merge tag 'v4.11-rc1' into omap-for-v4.11/fixes
Linux 4.11-rc1
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/copro_fault.c10
-rw-r--r--arch/powerpc/mm/fault.c44
-rw-r--r--arch/powerpc/mm/hash_utils_64.c64
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c21
-rw-r--r--arch/powerpc/mm/init-common.c3
-rw-r--r--arch/powerpc/mm/init_64.c35
-rw-r--r--arch/powerpc/mm/mem.c4
-rw-r--r--arch/powerpc/mm/mmap.c3
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c4
-rw-r--r--arch/powerpc/mm/numa.c15
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c6
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c2
-rw-r--r--arch/powerpc/mm/pgtable-radix.c263
-rw-r--r--arch/powerpc/mm/pgtable.c4
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/slb.c2
-rw-r--r--arch/powerpc/mm/slb_low.S24
-rw-r--r--arch/powerpc/mm/subpage-prot.c4
18 files changed, 420 insertions, 110 deletions
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index aaa7ec6788b9..697b70ad1195 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -67,11 +67,13 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto out_unlock;
/*
- * protfault should only happen due to us
- * mapping a region readonly temporarily. PROT_NONE
- * is also covered by the VMA check above.
+ * PROT_NONE is covered by the VMA check above.
+ * and hash should get a NOHPTE fault instead of
+ * a PROTFAULT in case fixup is needed for things
+ * like autonuma.
*/
- WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
+ if (!radix_enabled())
+ WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
}
ret = 0;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 62a50d6d1053..51def8a515be 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -17,6 +17,7 @@
#include <linux/signal.h>
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
@@ -407,15 +408,6 @@ good_area:
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
-
-#ifdef CONFIG_PPC_STD_MMU
- /*
- * protfault should only happen due to us
- * mapping a region readonly temporarily. PROT_NONE
- * is also covered by the VMA check above.
- */
- WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
-#endif /* CONFIG_PPC_STD_MMU */
/* a write */
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -425,8 +417,40 @@ good_area:
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
- WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
}
+#ifdef CONFIG_PPC_STD_MMU
+ /*
+ * For hash translation mode, we should never get a
+ * PROTFAULT. Any update to pte to reduce access will result in us
+ * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
+ * fault instead of DSISR_PROTFAULT.
+ *
+ * A pte update to relax the access will not result in a hash page table
+ * entry invalidate and hence can result in DSISR_PROTFAULT.
+ * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
+ * the special !is_write in the below conditional.
+ *
+ * For platforms that doesn't supports coherent icache and do support
+ * per page noexec bit, we do setup things such that we do the
+ * sync between D/I cache via fault. But that is handled via low level
+ * hash fault code (hash_page_do_lazy_icache()) and we should not reach
+ * here in such case.
+ *
+ * For wrong access that can result in PROTFAULT, the above vma->vm_flags
+ * check should handle those and hence we should fall to the bad_area
+ * handling correctly.
+ *
+ * For embedded with per page exec support that doesn't support coherent
+ * icache we do get PROTFAULT and we handle that D/I cache sync in
+ * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
+ * is conditional for server MMU.
+ *
+ * For radix, we can get prot fault for autonuma case, because radix
+ * page table will have them marked noaccess for user.
+ */
+ if (!radix_enabled() && !is_write)
+ WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
+#endif /* CONFIG_PPC_STD_MMU */
/*
* If for any reason at all we couldn't handle the fault,
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 67e19a0821be..c554768b1fa2 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -23,7 +23,7 @@
#include <linux/spinlock.h>
#include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/sysctl.h>
@@ -35,7 +35,9 @@
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
+#include <linux/debugfs.h>
+#include <asm/debug.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -747,6 +749,35 @@ static unsigned long __init htab_get_table_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
+void resize_hpt_for_hotplug(unsigned long new_mem_size)
+{
+ unsigned target_hpt_shift;
+
+ if (!mmu_hash_ops.resize_hpt)
+ return;
+
+ target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
+
+ /*
+ * To avoid lots of HPT resizes if memory size is fluctuating
+ * across a boundary, we deliberately have some hysterisis
+ * here: we immediately increase the HPT size if the target
+ * shift exceeds the current shift, but we won't attempt to
+ * reduce unless the target shift is at least 2 below the
+ * current shift
+ */
+ if ((target_hpt_shift > ppc64_pft_size)
+ || (target_hpt_shift < (ppc64_pft_size - 1))) {
+ int rc;
+
+ rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
+ if (rc)
+ printk(KERN_WARNING
+ "Unable to resize hash page table to target order %d: %d\n",
+ target_hpt_shift, rc);
+ }
+}
+
int hash__create_section_mapping(unsigned long start, unsigned long end)
{
int rc = htab_bolt_mapping(start, end, __pa(start),
@@ -1795,3 +1826,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
}
+
+#ifdef CONFIG_DEBUG_FS
+
+static int hpt_order_get(void *data, u64 *val)
+{
+ *val = ppc64_pft_size;
+ return 0;
+}
+
+static int hpt_order_set(void *data, u64 val)
+{
+ if (!mmu_hash_ops.resize_hpt)
+ return -ENODEV;
+
+ return mmu_hash_ops.resize_hpt(val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
+
+static int __init hash64_debugfs(void)
+{
+ if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root,
+ NULL, &fops_hpt_order)) {
+ pr_err("lpar: unable to create hpt_order debugsfs file\n");
+ }
+
+ return 0;
+}
+machine_device_initcall(pseries, hash64_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 37b5f91e381b..a84bb44497f9 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -116,24 +116,3 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
-
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM)
-/*
- * This enables us to catch the wrong page directory format
- * Moved here so that we can use WARN() in the call.
- */
-int hugepd_ok(hugepd_t hpd)
-{
- bool is_hugepd;
- unsigned long hpdval;
-
- hpdval = hpd_val(hpd);
-
- /*
- * We should not find this format in page directory, warn otherwise.
- */
- is_hugepd = (((hpdval & 0x3) == 0x0) && ((hpdval & HUGEPD_SHIFT_MASK) != 0));
- WARN(is_hugepd, "Found wrong page directory format\n");
- return 0;
-}
-#endif
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index f2108c40e697..eb8c6c8c4851 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -41,6 +41,7 @@ static void pmd_ctor(void *addr)
}
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
+EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
/*
* Create a kmem_cache() for pagetables. This is not used for PTE
@@ -86,7 +87,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
pr_debug("Allocated pgtable cache for order %d\n", shift);
}
-
+EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
void pgtable_cache_init(void)
{
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 8e1588021d1c..6aa3b76aa0d6 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -42,6 +42,8 @@
#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -344,6 +346,30 @@ static int __init parse_disable_radix(char *p)
}
early_param("disable_radix", parse_disable_radix);
+/*
+ * If we're running under a hypervisor, we need to check the contents of
+ * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
+ * radix. If not, we clear the radix feature bit so we fall back to hash.
+ */
+static void early_check_vec5(void)
+{
+ unsigned long root, chosen;
+ int size;
+ const u8 *vec5;
+
+ root = of_get_flat_dt_root();
+ chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+ if (chosen == -FDT_ERR_NOTFOUND)
+ return;
+ vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+ if (!vec5)
+ return;
+ if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
+ !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
+ /* Hypervisor doesn't support radix */
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+}
+
void __init mmu_early_init_devtree(void)
{
/* Disable radix mode based on kernel command line. */
@@ -351,6 +377,15 @@ void __init mmu_early_init_devtree(void)
if (disable_radix || !(mfmsr() & MSR_HV))
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ /*
+ * Check /chosen/ibm,architecture-vec-5 if running as a guest.
+ * When running bare-metal, we can use radix if we like
+ * even though the ibm,architecture-vec-5 property created by
+ * skiboot doesn't have the necessary bits set.
+ */
+ if (early_radix_enabled() && !(mfmsr() & MSR_HV))
+ early_check_vec5();
+
if (early_radix_enabled())
radix__early_init_devtree();
else
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5f844337de21..9ee536ec0739 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -134,6 +134,8 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
unsigned long nr_pages = size >> PAGE_SHIFT;
int rc;
+ resize_hpt_for_hotplug(memblock_phys_mem_size());
+
pgdata = NODE_DATA(nid);
start = (unsigned long)__va(start);
@@ -174,6 +176,8 @@ int arch_remove_memory(u64 start, u64 size)
*/
vm_unmap_aliases();
+ resize_hpt_for_hotplug(memblock_phys_mem_size());
+
return ret;
}
#endif
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 2f1e44362198..a5d9ef59debe 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -25,7 +25,8 @@
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/elf-randomize.h>
#include <linux/security.h>
#include <linux/mman.h>
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 104bad029ce9..497130c5c742 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -10,7 +10,7 @@
*
*/
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/rculist.h>
#include <linux/vmalloc.h>
@@ -184,7 +184,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
* of the CMA zone if possible. NOTE: faulting in + migration
* can be expensive. Batching can be considered later
*/
- if (get_pageblock_migratetype(page) == MIGRATE_CMA) {
+ if (is_migrate_cma_page(page)) {
if (mm_iommu_move_page_from_cma(page))
goto populate;
if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b1099cb2f393..9befaee237d6 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -290,7 +290,7 @@ int of_node_to_nid(struct device_node *device)
return nid;
}
-EXPORT_SYMBOL_GPL(of_node_to_nid);
+EXPORT_SYMBOL(of_node_to_nid);
static int __init find_min_common_depth(void)
{
@@ -786,14 +786,9 @@ new_range:
fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
node_set_online(nid);
- if (!(size = numa_enforce_memory_limit(start, size))) {
- if (--ranges)
- goto new_range;
- else
- continue;
- }
-
- memblock_set_node(start, size, &memblock.memory, nid);
+ size = numa_enforce_memory_limit(start, size);
+ if (size)
+ memblock_set_node(start, size, &memblock.memory, nid);
if (--ranges)
goto new_range;
@@ -1098,7 +1093,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
nid = hot_add_node_scn_to_nid(scn_addr);
}
- if (nid < 0 || !node_online(nid))
+ if (nid < 0 || !node_possible(nid))
nid = first_online_node;
return nid;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 653ff6c74ebe..5fcb3dd74c13 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -8,6 +8,8 @@
*/
#include <linux/sched.h>
+#include <linux/mm_types.h>
+
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -131,7 +133,7 @@ void mmu_cleanup_all(void)
int create_section_mapping(unsigned long start, unsigned long end)
{
if (radix_enabled())
- return -ENODEV;
+ return radix__create_section_mapping(start, end);
return hash__create_section_mapping(start, end);
}
@@ -139,7 +141,7 @@ int create_section_mapping(unsigned long start, unsigned long end)
int remove_section_mapping(unsigned long start, unsigned long end)
{
if (radix_enabled())
- return -ENODEV;
+ return radix__remove_section_mapping(start, end);
return hash__remove_section_mapping(start, end);
}
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index c23e286a6b8f..8b85a14b08ea 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -10,6 +10,8 @@
*/
#include <linux/sched.h>
+#include <linux/mm_types.h>
+
#include <asm/pgalloc.h>
#include <asm/tlb.h>
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 34f1a0dbc898..2a590a98e652 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
@@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/firmware.h>
+#include <asm/powernv.h>
#include <trace/events/thp.h>
@@ -107,54 +108,66 @@ set_the_pte:
return 0;
}
+static inline void __meminit print_mapping(unsigned long start,
+ unsigned long end,
+ unsigned long size)
+{
+ if (end <= start)
+ return;
+
+ pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size);
+}
+
+static int __meminit create_physical_mapping(unsigned long start,
+ unsigned long end)
+{
+ unsigned long addr, mapping_size = 0;
+
+ start = _ALIGN_UP(start, PAGE_SIZE);
+ for (addr = start; addr < end; addr += mapping_size) {
+ unsigned long gap, previous_size;
+ int rc;
+
+ gap = end - addr;
+ previous_size = mapping_size;
+
+ if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
+ mmu_psize_defs[MMU_PAGE_1G].shift)
+ mapping_size = PUD_SIZE;
+ else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
+ mmu_psize_defs[MMU_PAGE_2M].shift)
+ mapping_size = PMD_SIZE;
+ else
+ mapping_size = PAGE_SIZE;
+
+ if (mapping_size != previous_size) {
+ print_mapping(start, addr, previous_size);
+ start = addr;
+ }
+
+ rc = radix__map_kernel_page((unsigned long)__va(addr), addr,
+ PAGE_KERNEL_X, mapping_size);
+ if (rc)
+ return rc;
+ }
+
+ print_mapping(start, addr, mapping_size);
+ return 0;
+}
+
static void __init radix_init_pgtable(void)
{
- int loop_count;
- u64 base, end, start_addr;
unsigned long rts_field;
struct memblock_region *reg;
- unsigned long linear_page_size;
/* We don't support slb for radix */
mmu_slb_size = 0;
/*
* Create the linear mapping, using standard page size for now
*/
- loop_count = 0;
- for_each_memblock(memory, reg) {
-
- start_addr = reg->base;
-
-redo:
- if (loop_count < 1 && mmu_psize_defs[MMU_PAGE_1G].shift)
- linear_page_size = PUD_SIZE;
- else if (loop_count < 2 && mmu_psize_defs[MMU_PAGE_2M].shift)
- linear_page_size = PMD_SIZE;
- else
- linear_page_size = PAGE_SIZE;
-
- base = _ALIGN_UP(start_addr, linear_page_size);
- end = _ALIGN_DOWN(reg->base + reg->size, linear_page_size);
-
- pr_info("Mapping range 0x%lx - 0x%lx with 0x%lx\n",
- (unsigned long)base, (unsigned long)end,
- linear_page_size);
-
- while (base < end) {
- radix__map_kernel_page((unsigned long)__va(base),
- base, PAGE_KERNEL_X,
- linear_page_size);
- base += linear_page_size;
- }
- /*
- * map the rest using lower page size
- */
- if (end < reg->base + reg->size) {
- start_addr = end;
- loop_count++;
- goto redo;
- }
- }
+ for_each_memblock(memory, reg)
+ WARN_ON(create_physical_mapping(reg->base,
+ reg->base + reg->size));
/*
* Allocate Partition table and process table for the
* host.
@@ -401,6 +414,8 @@ void __init radix__early_init_mmu(void)
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
radix_init_amor();
+ } else {
+ radix_init_pseries();
}
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
@@ -438,6 +453,7 @@ void radix__mmu_cleanup_all(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
mtspr(SPRN_PTCR, 0);
+ powernv_set_nmmu_ptcr(0);
radix__flush_tlb_all();
}
}
@@ -467,6 +483,173 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
memblock_set_current_limit(first_memblock_base + first_memblock_size);
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = pte_start + i;
+ if (!pte_none(*pte))
+ return;
+ }
+
+ pte_free_kernel(&init_mm, pte_start);
+ pmd_clear(pmd);
+}
+
+static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+{
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd = pmd_start + i;
+ if (!pmd_none(*pmd))
+ return;
+ }
+
+ pmd_free(&init_mm, pmd_start);
+ pud_clear(pud);
+}
+
+static void remove_pte_table(pte_t *pte_start, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+ pte_t *pte;
+
+ pte = pte_start + pte_index(addr);
+ for (; addr < end; addr = next, pte++) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ if (next > end)
+ next = end;
+
+ if (!pte_present(*pte))
+ continue;
+
+ if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
+ /*
+ * The vmemmap_free() and remove_section_mapping()
+ * codepaths call us with aligned addresses.
+ */
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, pte);
+ }
+}
+
+static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+ pte_t *pte_base;
+ pmd_t *pmd;
+
+ pmd = pmd_start + pmd_index(addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+
+ if (!pmd_present(*pmd))
+ continue;
+
+ if (pmd_huge(*pmd)) {
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
+ continue;
+ }
+
+ pte_base = (pte_t *)pmd_page_vaddr(*pmd);
+ remove_pte_table(pte_base, addr, next);
+ free_pte_table(pte_base, pmd);
+ }
+}
+
+static void remove_pud_table(pud_t *pud_start, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+ pmd_t *pmd_base;
+ pud_t *pud;
+
+ pud = pud_start + pud_index(addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+
+ if (!pud_present(*pud))
+ continue;
+
+ if (pud_huge(*pud)) {
+ if (!IS_ALIGNED(addr, PUD_SIZE) ||
+ !IS_ALIGNED(next, PUD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, (pte_t *)pud);
+ continue;
+ }
+
+ pmd_base = (pmd_t *)pud_page_vaddr(*pud);
+ remove_pmd_table(pmd_base, addr, next);
+ free_pmd_table(pmd_base, pud);
+ }
+}
+
+static void remove_pagetable(unsigned long start, unsigned long end)
+{
+ unsigned long addr, next;
+ pud_t *pud_base;
+ pgd_t *pgd;
+
+ spin_lock(&init_mm.page_table_lock);
+
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ if (!pgd_present(*pgd))
+ continue;
+
+ if (pgd_huge(*pgd)) {
+ if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
+ !IS_ALIGNED(next, PGDIR_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, (pte_t *)pgd);
+ continue;
+ }
+
+ pud_base = (pud_t *)pgd_page_vaddr(*pgd);
+ remove_pud_table(pud_base, addr, next);
+ }
+
+ spin_unlock(&init_mm.page_table_lock);
+ radix__flush_tlb_kernel_range(start, end);
+}
+
+int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
+{
+ return create_physical_mapping(start, end);
+}
+
+int radix__remove_section_mapping(unsigned long start, unsigned long end)
+{
+ remove_pagetable(start, end);
+ return 0;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit radix__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
@@ -482,7 +665,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
#ifdef CONFIG_MEMORY_HOTPLUG
void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
{
- /* FIXME!! intel does more. We should free page tables mapping vmemmap ? */
+ remove_pagetable(start, start + page_size);
}
#endif
#endif
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cb39c8bd2436..a03ff3d99e0c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -193,9 +193,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
*/
VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
- /*
- * Add the pte bit when tryint set a pte
- */
+ /* Add the pte bit when trying to set a pte */
pte = __pte(pte_val(pte) | _PAGE_PTE);
/* Note: mm->context.id might not yet have been assigned as
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 8bca7f58afc4..db93cf747a03 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -52,6 +52,7 @@
#include <asm/sections.h>
#include <asm/firmware.h>
#include <asm/dma.h>
+#include <asm/powernv.h>
#include "mmu_decl.h"
@@ -436,6 +437,7 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
void __init mmu_partition_table_init(void)
{
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+ unsigned long ptcr;
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
@@ -448,19 +450,31 @@ void __init mmu_partition_table_init(void)
* update partition table control register,
* 64 K size.
*/
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
+ mtspr(SPRN_PTCR, ptcr);
+ powernv_set_nmmu_ptcr(ptcr);
}
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
unsigned long dw1)
{
+ unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
- /* Global flush of TLBs and partition table caches for this lpid */
+ /*
+ * Global flush of TLBs and partition table caches for this lpid.
+ * The type of flush (hash or radix) depends on what the previous
+ * use of this partition ID was, not the new use.
+ */
asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ if (old & PATB_HR)
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ else
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 48fc28bab544..5e01b2ece1d0 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,8 @@
#include <asm/cacheflush.h>
#include <asm/smp.h>
#include <linux/compiler.h>
+#include <linux/mm_types.h>
+
#include <asm/udbg.h>
#include <asm/code-patching.h>
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index e2974fcd20f1..a85e06ea6c20 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -71,9 +71,9 @@ slb_miss_kernel_load_linear:
BEGIN_FTR_SECTION
- b slb_finish_load
+ b .Lslb_finish_load
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
- b slb_finish_load_1T
+ b .Lslb_finish_load_1T
1:
#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -109,9 +109,9 @@ slb_miss_kernel_load_io:
addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
BEGIN_FTR_SECTION
- b slb_finish_load
+ b .Lslb_finish_load
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
- b slb_finish_load_1T
+ b .Lslb_finish_load_1T
0: /*
* For userspace addresses, make sure this is region 0.
@@ -174,9 +174,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
ld r9,PACACONTEXTID(r13)
BEGIN_FTR_SECTION
cmpldi r10,0x1000
- bge slb_finish_load_1T
+ bge .Lslb_finish_load_1T
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- b slb_finish_load
+ b .Lslb_finish_load
8: /* invalid EA - return an error indication */
crset 4*cr0+eq /* indicate failure */
@@ -187,7 +187,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
*
* r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
*/
-slb_finish_load:
+.Lslb_finish_load:
rldimi r10,r9,ESID_BITS,0
ASM_VSID_SCRAMBLE(r10,r9,256M)
/*
@@ -256,7 +256,7 @@ slb_compare_rr_to_size:
*
* r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
*/
-slb_finish_load_1T:
+.Lslb_finish_load_1T:
srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
rldimi r10,r9,ESID_BITS_1T,0
ASM_VSID_SCRAMBLE(r10,r9,1T)
@@ -272,3 +272,11 @@ slb_finish_load_1T:
clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */
b 7b
+
+_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode)
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
+_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap)
+#endif
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 5c096c01e8bd..94210940112f 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -248,9 +248,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
nw = (next - addr) >> PAGE_SHIFT;
up_write(&mm->mmap_sem);
- err = -EFAULT;
if (__copy_from_user(spp, map, nw * sizeof(u32)))
- goto out2;
+ return -EFAULT;
map += nw;
down_write(&mm->mmap_sem);
@@ -262,6 +261,5 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
err = 0;
out:
up_write(&mm->mmap_sem);
- out2:
return err;
}