From 27e23b5f5f6f22292347901303aab2a1d458bcb5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 29 Mar 2019 10:00:02 +0000 Subject: powerpc/mm: Move nohash specifics in subdirectory mm/nohash Many files in arch/powerpc/mm are only for nohash. This patch creates a subdirectory for them. Signed-off-by: Christophe Leroy [mpe: Shorten new filenames] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/40x_mmu.c | 159 ---- arch/powerpc/mm/44x_mmu.c | 246 ----- arch/powerpc/mm/8xx_mmu.c | 239 ----- arch/powerpc/mm/Makefile | 17 +- arch/powerpc/mm/fsl_booke_mmu.c | 326 ------- arch/powerpc/mm/hugetlbpage-book3e.c | 206 ----- arch/powerpc/mm/mmu_context_nohash.c | 497 ----------- arch/powerpc/mm/nohash/40x.c | 159 ++++ arch/powerpc/mm/nohash/44x.c | 246 +++++ arch/powerpc/mm/nohash/8xx.c | 239 +++++ arch/powerpc/mm/nohash/Makefile | 18 + arch/powerpc/mm/nohash/book3e_hugetlbpage.c | 206 +++++ arch/powerpc/mm/nohash/book3e_pgtable.c | 120 +++ arch/powerpc/mm/nohash/fsl_booke.c | 326 +++++++ arch/powerpc/mm/nohash/mmu_context.c | 497 +++++++++++ arch/powerpc/mm/nohash/tlb.c | 810 +++++++++++++++++ arch/powerpc/mm/nohash/tlb_low.S | 491 ++++++++++ arch/powerpc/mm/nohash/tlb_low_64e.S | 1280 +++++++++++++++++++++++++++ arch/powerpc/mm/pgtable-book3e.c | 120 --- arch/powerpc/mm/tlb_low_64e.S | 1280 --------------------------- arch/powerpc/mm/tlb_nohash.c | 810 ----------------- arch/powerpc/mm/tlb_nohash_low.S | 491 ---------- 22 files changed, 4393 insertions(+), 4390 deletions(-) delete mode 100644 arch/powerpc/mm/40x_mmu.c delete mode 100644 arch/powerpc/mm/44x_mmu.c delete mode 100644 arch/powerpc/mm/8xx_mmu.c delete mode 100644 arch/powerpc/mm/fsl_booke_mmu.c delete mode 100644 arch/powerpc/mm/hugetlbpage-book3e.c delete mode 100644 arch/powerpc/mm/mmu_context_nohash.c create mode 100644 arch/powerpc/mm/nohash/40x.c create mode 100644 arch/powerpc/mm/nohash/44x.c create mode 100644 arch/powerpc/mm/nohash/8xx.c create mode 100644 arch/powerpc/mm/nohash/Makefile create mode 100644 arch/powerpc/mm/nohash/book3e_hugetlbpage.c create mode 100644 arch/powerpc/mm/nohash/book3e_pgtable.c create mode 100644 arch/powerpc/mm/nohash/fsl_booke.c create mode 100644 arch/powerpc/mm/nohash/mmu_context.c create mode 100644 arch/powerpc/mm/nohash/tlb.c create mode 100644 arch/powerpc/mm/nohash/tlb_low.S create mode 100644 arch/powerpc/mm/nohash/tlb_low_64e.S delete mode 100644 arch/powerpc/mm/pgtable-book3e.c delete mode 100644 arch/powerpc/mm/tlb_low_64e.S delete mode 100644 arch/powerpc/mm/tlb_nohash.c delete mode 100644 arch/powerpc/mm/tlb_nohash_low.S diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c deleted file mode 100644 index 460459b6f53e..000000000000 --- a/arch/powerpc/mm/40x_mmu.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * This file contains the routines for initializing the MMU - * on the 4xx series of chips. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -extern int __map_without_ltlbs; -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ -void __init MMU_init_hw(void) -{ - /* - * The Zone Protection Register (ZPR) defines how protection will - * be applied to every page which is a member of a given zone. At - * present, we utilize only two of the 4xx's zones. - * The zone index bits (of ZSEL) in the PTE are used for software - * indicators, except the LSB. For user access, zone 1 is used, - * for kernel access, zone 0 is used. We set all but zone 1 - * to zero, allowing only kernel access as indicated in the PTE. - * For zone 1, we set a 01 binary (a value of 10 will not work) - * to allow user access as indicated in the PTE. This also allows - * kernel access as indicated in the PTE. - */ - - mtspr(SPRN_ZPR, 0x10000000); - - flush_instruction_cache(); - - /* - * Set up the real-mode cache parameters for the exception vector - * handlers (which are run in real-mode). - */ - - mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */ - - /* - * Cache instruction and data space where the exception - * vectors and the kernel live in real-mode. - */ - - mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */ - mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */ -} - -#define LARGE_PAGE_SIZE_16M (1<<24) -#define LARGE_PAGE_SIZE_4M (1<<22) - -unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) -{ - unsigned long v, s, mapped; - phys_addr_t p; - - v = KERNELBASE; - p = 0; - s = total_lowmem; - - if (__map_without_ltlbs) - return 0; - - while (s >= LARGE_PAGE_SIZE_16M) { - pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; - - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp++ = __pmd(val); - *pmdp++ = __pmd(val); - *pmdp++ = __pmd(val); - *pmdp++ = __pmd(val); - - v += LARGE_PAGE_SIZE_16M; - p += LARGE_PAGE_SIZE_16M; - s -= LARGE_PAGE_SIZE_16M; - } - - while (s >= LARGE_PAGE_SIZE_4M) { - pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; - - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp = __pmd(val); - - v += LARGE_PAGE_SIZE_4M; - p += LARGE_PAGE_SIZE_4M; - s -= LARGE_PAGE_SIZE_4M; - } - - mapped = total_lowmem - s; - - /* If the size of RAM is not an exact power of two, we may not - * have covered RAM in its entirety with 16 and 4 MiB - * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" - * coverage with normal-sized pages (or other reasons) do not - * attempt to allocate outside the allowed range. - */ - memblock_set_current_limit(mapped); - - return mapped; -} - -void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* We don't currently support the first MEMBLOCK not mapping 0 - * physical on those processors - */ - BUG_ON(first_memblock_base != 0); - - /* 40x can only access 16MB at the moment (see head_40x.S) */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); -} diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c deleted file mode 100644 index c07983ebc02e..000000000000 --- a/arch/powerpc/mm/44x_mmu.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Modifications by Matt Porter (mporter@mvista.com) to support - * PPC44x Book E processors. - * - * This file contains the routines for initializing the MMU - * on the 4xx series of chips. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include - -#include -#include -#include -#include - -#include - -/* Used by the 44x TLB replacement exception handler. - * Just needed it declared someplace. - */ -unsigned int tlb_44x_index; /* = 0 */ -unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; -int icache_44x_need_flush; - -unsigned long tlb_47x_boltmap[1024/8]; - -static void ppc44x_update_tlb_hwater(void) -{ - /* The TLB miss handlers hard codes the watermark in a cmpli - * instruction to improve performances rather than loading it - * from the global variable. Thus, we patch the instructions - * in the 2 TLB miss handlers when updating the value - */ - modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater); - modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater); -} - -/* - * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU - */ -static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) -{ - unsigned int entry = tlb_44x_hwater--; - - ppc44x_update_tlb_hwater(); - - mtspr(SPRN_MMUCR, 0); - - __asm__ __volatile__( - "tlbwe %2,%3,%4\n" - "tlbwe %1,%3,%5\n" - "tlbwe %0,%3,%6\n" - : - : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), - "r" (phys), - "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), - "r" (entry), - "i" (PPC44x_TLB_PAGEID), - "i" (PPC44x_TLB_XLAT), - "i" (PPC44x_TLB_ATTRIB)); -} - -static int __init ppc47x_find_free_bolted(void) -{ - unsigned int mmube0 = mfspr(SPRN_MMUBE0); - unsigned int mmube1 = mfspr(SPRN_MMUBE1); - - if (!(mmube0 & MMUBE0_VBE0)) - return 0; - if (!(mmube0 & MMUBE0_VBE1)) - return 1; - if (!(mmube0 & MMUBE0_VBE2)) - return 2; - if (!(mmube1 & MMUBE1_VBE3)) - return 3; - if (!(mmube1 & MMUBE1_VBE4)) - return 4; - if (!(mmube1 & MMUBE1_VBE5)) - return 5; - return -1; -} - -static void __init ppc47x_update_boltmap(void) -{ - unsigned int mmube0 = mfspr(SPRN_MMUBE0); - unsigned int mmube1 = mfspr(SPRN_MMUBE1); - - if (mmube0 & MMUBE0_VBE0) - __set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff, - tlb_47x_boltmap); - if (mmube0 & MMUBE0_VBE1) - __set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff, - tlb_47x_boltmap); - if (mmube0 & MMUBE0_VBE2) - __set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff, - tlb_47x_boltmap); - if (mmube1 & MMUBE1_VBE3) - __set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff, - tlb_47x_boltmap); - if (mmube1 & MMUBE1_VBE4) - __set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff, - tlb_47x_boltmap); - if (mmube1 & MMUBE1_VBE5) - __set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff, - tlb_47x_boltmap); -} - -/* - * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU - */ -static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) -{ - unsigned int rA; - int bolted; - - /* Base rA is HW way select, way 0, bolted bit set */ - rA = 0x88000000; - - /* Look for a bolted entry slot */ - bolted = ppc47x_find_free_bolted(); - BUG_ON(bolted < 0); - - /* Insert bolted slot number */ - rA |= bolted << 24; - - pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n", - virt, phys, bolted); - - mtspr(SPRN_MMUCR, 0); - - __asm__ __volatile__( - "tlbwe %2,%3,0\n" - "tlbwe %1,%3,1\n" - "tlbwe %0,%3,2\n" - : - : "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR | - PPC47x_TLB2_SX -#ifdef CONFIG_SMP - | PPC47x_TLB2_M -#endif - ), - "r" (phys), - "r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M), - "r" (rA)); -} - -void __init MMU_init_hw(void) -{ - /* This is not useful on 47x but won't hurt either */ - ppc44x_update_tlb_hwater(); - - flush_instruction_cache(); -} - -unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) -{ - unsigned long addr; - unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); - - /* Pin in enough TLBs to cover any lowmem not covered by the - * initial 256M mapping established in head_44x.S */ - for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; - addr += PPC_PIN_SIZE) { - if (mmu_has_feature(MMU_FTR_TYPE_47x)) - ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); - else - ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); - } - if (mmu_has_feature(MMU_FTR_TYPE_47x)) { - ppc47x_update_boltmap(); - -#ifdef DEBUG - { - int i; - - printk(KERN_DEBUG "bolted entries: "); - for (i = 0; i < 255; i++) { - if (test_bit(i, tlb_47x_boltmap)) - printk("%d ", i); - } - printk("\n"); - } -#endif /* DEBUG */ - } - return total_lowmem; -} - -void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - u64 size; - -#ifndef CONFIG_NONSTATIC_KERNEL - /* We don't currently support the first MEMBLOCK not mapping 0 - * physical on those processors - */ - BUG_ON(first_memblock_base != 0); -#endif - - /* 44x has a 256M TLB entry pinned at boot */ - size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE)); - memblock_set_current_limit(first_memblock_base + size); -} - -#ifdef CONFIG_SMP -void __init mmu_init_secondary(int cpu) -{ - unsigned long addr; - unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); - - /* Pin in enough TLBs to cover any lowmem not covered by the - * initial 256M mapping established in head_44x.S - * - * WARNING: This is called with only the first 256M of the - * linear mapping in the TLB and we can't take faults yet - * so beware of what this code uses. It runs off a temporary - * stack. current (r2) isn't initialized, smp_processor_id() - * will not work, current thread info isn't accessible, ... - */ - for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; - addr += PPC_PIN_SIZE) { - if (mmu_has_feature(MMU_FTR_TYPE_47x)) - ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); - else - ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); - } -} -#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c deleted file mode 100644 index 70d55b615b62..000000000000 --- a/arch/powerpc/mm/8xx_mmu.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * This file contains the routines for initializing the MMU - * on the 8xx series of chips. - * -- christophe - * - * Derived from arch/powerpc/mm/40x_mmu.c: - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include - -#include - -#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) - -extern int __map_without_ltlbs; - -static unsigned long block_mapped_ram; - -/* - * Return PA for this VA if it is in an area mapped with LTLBs. - * Otherwise, returns 0 - */ -phys_addr_t v_block_mapped(unsigned long va) -{ - unsigned long p = PHYS_IMMR_BASE; - - if (__map_without_ltlbs) - return 0; - if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) - return p + va - VIRT_IMMR_BASE; - if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram) - return __pa(va); - return 0; -} - -/* - * Return VA for a given PA mapped with LTLBs or 0 if not mapped - */ -unsigned long p_block_mapped(phys_addr_t pa) -{ - unsigned long p = PHYS_IMMR_BASE; - - if (__map_without_ltlbs) - return 0; - if (pa >= p && pa < p + IMMR_SIZE) - return VIRT_IMMR_BASE + pa - p; - if (pa < block_mapped_ram) - return (unsigned long)__va(pa); - return 0; -} - -#define LARGE_PAGE_SIZE_8M (1<<23) - -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ -void __init MMU_init_hw(void) -{ - /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ - if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { - unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; - int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; - unsigned long addr = 0; - unsigned long mem = total_lowmem; - - for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { - mtspr(SPRN_MD_CTR, ctr | (i << 8)); - mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); - mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); - addr += LARGE_PAGE_SIZE_8M; - mem -= LARGE_PAGE_SIZE_8M; - } - } -} - -static void __init mmu_mapin_immr(void) -{ - unsigned long p = PHYS_IMMR_BASE; - unsigned long v = VIRT_IMMR_BASE; - int offset; - - for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); -} - -static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) -{ - modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); -} - -static void mmu_patch_addis(s32 *site, long simm) -{ - unsigned int instr = *(unsigned int *)patch_site_addr(site); - - instr &= 0xffff0000; - instr |= ((unsigned long)simm) >> 16; - patch_instruction_site(site, instr); -} - -unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) -{ - unsigned long mapped; - - if (__map_without_ltlbs) { - mapped = 0; - mmu_mapin_immr(); - if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) - patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); - } else { - mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, - _ALIGN(__pa(_einittext), 8 << 20)); - } - - mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); - mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); - - /* If the size of RAM is not an exact power of two, we may not - * have covered RAM in its entirety with 8 MiB - * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" - * coverage with normal-sized pages (or other reasons) do not - * attempt to allocate outside the allowed range. - */ - if (mapped) - memblock_set_current_limit(mapped); - - block_mapped_ram = mapped; - - return mapped; -} - -void mmu_mark_initmem_nx(void) -{ - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) - mmu_patch_addis(&patch__itlbmiss_linmem_top8, - -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); -} - -#ifdef CONFIG_STRICT_KERNEL_RWX -void mmu_mark_rodata_ro(void) -{ - if (CONFIG_DATA_SHIFT < 23) - mmu_patch_addis(&patch__dtlbmiss_romem_top8, - -__pa(((unsigned long)_sinittext) & - ~(LARGE_PAGE_SIZE_8M - 1))); - mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); -} -#endif - -void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* We don't currently support the first MEMBLOCK not mapping 0 - * physical on those processors - */ - BUG_ON(first_memblock_base != 0); - - /* 8xx can only access 32MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000)); -} - -/* - * Set up to use a given MMU context. - * id is context number, pgd is PGD pointer. - * - * We place the physical address of the new task page directory loaded - * into the MMU base register, and set the ASID compare register with - * the new "context." - */ -void set_context(unsigned long id, pgd_t *pgd) -{ - s16 offset = (s16)(__pa(swapper_pg_dir)); - - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - if (IS_ENABLED(CONFIG_BDI_SWITCH)) - abatron_pteptrs[1] = pgd; - - /* Register M_TWB will contain base address of level 1 table minus the - * lower part of the kernel PGDIR base address, so that all accesses to - * level 1 table are done relative to lower part of kernel PGDIR base - * address. - */ - mtspr(SPRN_M_TWB, __pa(pgd) - offset); - - /* Update context */ - mtspr(SPRN_M_CASID, id - 1); - /* sync */ - mb(); -} - -void flush_instruction_cache(void) -{ - isync(); - mtspr(SPRN_IC_CST, IDC_INVALL); - isync(); -} - -#ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) -{ - if (disabled) - return; - - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - mtspr(SPRN_MI_AP, MI_APG_KUEP); -} -#endif - -#ifdef CONFIG_PPC_KUAP -void __init setup_kuap(bool disabled) -{ - pr_info("Activating Kernel Userspace Access Protection\n"); - - if (disabled) - pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n"); - - mtspr(SPRN_MD_AP, MD_APG_KUAP); -} -#endif diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 68cb1e840b5e..08557bae6fa1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,30 +8,15 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ init-common.o mmu_context.o drmem.o -obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ - tlb_nohash_low.o -obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o -obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o +obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-frag.o obj-$(CONFIG_PPC32) += pgtable-frag.o -obj-$(CONFIG_40x) += 40x_mmu.o -obj-$(CONFIG_44x) += 44x_mmu.o -obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-y += hugetlbpage.o -ifdef CONFIG_HUGETLB_PAGE -obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o -endif obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_PPC_PTDUMP) += ptdump/ - -# Disable kcov instrumentation on sensitive code -# This is necessary for booting with kcov enabled on book3e machines -KCOV_INSTRUMENT_tlb_nohash.o := n -KCOV_INSTRUMENT_fsl_booke_mmu.o := n diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c deleted file mode 100644 index 71a1a36751dd..000000000000 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Modifications by Kumar Gala (galak@kernel.crashing.org) to support - * E500 Book E processors. - * - * Copyright 2004,2010 Freescale Semiconductor, Inc. - * - * This file contains the routines for initializing the MMU - * on the 4xx series of chips. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -unsigned int tlbcam_index; - -#define NUM_TLBCAMS (64) -struct tlbcam TLBCAM[NUM_TLBCAMS]; - -struct tlbcamrange { - unsigned long start; - unsigned long limit; - phys_addr_t phys; -} tlbcam_addrs[NUM_TLBCAMS]; - -unsigned long tlbcam_sz(int idx) -{ - return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; -} - -#ifdef CONFIG_FSL_BOOKE -/* - * Return PA for this VA if it is mapped by a CAM, or 0 - */ -phys_addr_t v_block_mapped(unsigned long va) -{ - int b; - for (b = 0; b < tlbcam_index; ++b) - if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit) - return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start); - return 0; -} - -/* - * Return VA for a given PA or 0 if not mapped - */ -unsigned long p_block_mapped(phys_addr_t pa) -{ - int b; - for (b = 0; b < tlbcam_index; ++b) - if (pa >= tlbcam_addrs[b].phys - && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) - +tlbcam_addrs[b].phys) - return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); - return 0; -} -#endif - -/* - * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; - * in particular size must be a power of 4 between 4k and the max supported by - * an implementation; max may further be limited by what can be represented in - * an unsigned long (for example, 32-bit implementations cannot support a 4GB - * size). - */ -static void settlbcam(int index, unsigned long virt, phys_addr_t phys, - unsigned long size, unsigned long flags, unsigned int pid) -{ - unsigned int tsize; - - tsize = __ilog2(size) - 10; - -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) - if ((flags & _PAGE_NO_CACHE) == 0) - flags |= _PAGE_COHERENT; -#endif - - TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); - TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); - TLBCAM[index].MAS2 = virt & PAGE_MASK; - - TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; - - TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; - TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - TLBCAM[index].MAS7 = (u64)phys >> 32; - - /* Below is unlikely -- only for large user pages or similar */ - if (pte_user(__pte(flags))) { - TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; - TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); - } - - tlbcam_addrs[index].start = virt; - tlbcam_addrs[index].limit = virt + size - 1; - tlbcam_addrs[index].phys = phys; -} - -unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, - phys_addr_t phys) -{ - unsigned int camsize = __ilog2(ram); - unsigned int align = __ffs(virt | phys); - unsigned long max_cam; - - if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { - /* Convert (4^max) kB to (2^max) bytes */ - max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10; - camsize &= ~1U; - align &= ~1U; - } else { - /* Convert (2^max) kB to (2^max) bytes */ - max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10; - } - - if (camsize > align) - camsize = align; - if (camsize > max_cam) - camsize = max_cam; - - return 1UL << camsize; -} - -static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, - unsigned long ram, int max_cam_idx, - bool dryrun) -{ - int i; - unsigned long amount_mapped = 0; - - /* Calculate CAM values */ - for (i = 0; ram && i < max_cam_idx; i++) { - unsigned long cam_sz; - - cam_sz = calc_cam_sz(ram, virt, phys); - if (!dryrun) - settlbcam(i, virt, phys, cam_sz, - pgprot_val(PAGE_KERNEL_X), 0); - - ram -= cam_sz; - amount_mapped += cam_sz; - virt += cam_sz; - phys += cam_sz; - } - - if (dryrun) - return amount_mapped; - - loadcam_multi(0, i, max_cam_idx); - tlbcam_index = i; - -#ifdef CONFIG_PPC64 - get_paca()->tcd.esel_next = i; - get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - get_paca()->tcd.esel_first = i; -#endif - - return amount_mapped; -} - -unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun) -{ - unsigned long virt = PAGE_OFFSET; - phys_addr_t phys = memstart_addr; - - return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun); -} - -#ifdef CONFIG_PPC32 - -#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) -#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" -#endif - -unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) -{ - return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; -} - -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ -void __init MMU_init_hw(void) -{ - flush_instruction_cache(); -} - -void __init adjust_total_lowmem(void) -{ - unsigned long ram; - int i; - - /* adjust lowmem size to __max_low_memory */ - ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); - - i = switch_to_as1(); - __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false); - restore_to_as0(i, 0, 0, 1); - - pr_info("Memory CAM mapping: "); - for (i = 0; i < tlbcam_index - 1; i++) - pr_cont("%lu/", tlbcam_sz(i) >> 20); - pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, - (unsigned int)((total_lowmem - __max_low_memory) >> 20)); - - memblock_set_current_limit(memstart_addr + __max_low_memory); -} - -void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - phys_addr_t limit = first_memblock_base + first_memblock_size; - - /* 64M mapped initially according to head_fsl_booke.S */ - memblock_set_current_limit(min_t(u64, limit, 0x04000000)); -} - -#ifdef CONFIG_RELOCATABLE -int __initdata is_second_reloc; -notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) -{ - unsigned long base = KERNELBASE; - - kernstart_addr = start; - if (is_second_reloc) { - virt_phys_offset = PAGE_OFFSET - memstart_addr; - return; - } - - /* - * Relocatable kernel support based on processing of dynamic - * relocation entries. Before we get the real memstart_addr, - * We will compute the virt_phys_offset like this: - * virt_phys_offset = stext.run - kernstart_addr - * - * stext.run = (KERNELBASE & ~0x3ffffff) + - * (kernstart_addr & 0x3ffffff) - * When we relocate, we have : - * - * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff) - * - * hence: - * virt_phys_offset = (KERNELBASE & ~0x3ffffff) - - * (kernstart_addr & ~0x3ffffff) - * - */ - start &= ~0x3ffffff; - base &= ~0x3ffffff; - virt_phys_offset = base - start; - early_get_first_memblock_info(__va(dt_ptr), NULL); - /* - * We now get the memstart_addr, then we should check if this - * address is the same as what the PAGE_OFFSET map to now. If - * not we have to change the map of PAGE_OFFSET to memstart_addr - * and do a second relocation. - */ - if (start != memstart_addr) { - int n; - long offset = start - memstart_addr; - - is_second_reloc = 1; - n = switch_to_as1(); - /* map a 64M area for the second relocation */ - if (memstart_addr > start) - map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM, - false); - else - map_mem_in_cams_addr(start, PAGE_OFFSET + offset, - 0x4000000, CONFIG_LOWMEM_CAM_NUM, - false); - restore_to_as0(n, offset, __va(dt_ptr), 1); - /* We should never reach here */ - panic("Relocation error"); - } -} -#endif -#endif diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c deleted file mode 100644 index f84ec46cdb26..000000000000 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ /dev/null @@ -1,206 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PPC Huge TLB Page Support for Book3E MMU - * - * Copyright (C) 2009 David Gibson, IBM Corporation. - * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor - * - */ -#include -#include - -#include - -#ifdef CONFIG_PPC_FSL_BOOK3E -#ifdef CONFIG_PPC64 -static inline int tlb1_next(void) -{ - struct paca_struct *paca = get_paca(); - struct tlb_core_data *tcd; - int this, next; - - tcd = paca->tcd_ptr; - this = tcd->esel_next; - - next = this + 1; - if (next >= tcd->esel_max) - next = tcd->esel_first; - - tcd->esel_next = next; - return this; -} -#else -static inline int tlb1_next(void) -{ - int index, ncams; - - ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - - index = this_cpu_read(next_tlbcam_idx); - - /* Just round-robin the entries and wrap when we hit the end */ - if (unlikely(index == ncams - 1)) - __this_cpu_write(next_tlbcam_idx, tlbcam_index); - else - __this_cpu_inc(next_tlbcam_idx); - - return index; -} -#endif /* !PPC64 */ -#endif /* FSL */ - -static inline int mmu_get_tsize(int psize) -{ - return mmu_psize_defs[psize].enc; -} - -#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64) -#include - -static inline void book3e_tlb_lock(void) -{ - struct paca_struct *paca = get_paca(); - unsigned long tmp; - int token = smp_processor_id() + 1; - - /* - * Besides being unnecessary in the absence of SMT, this - * check prevents trying to do lbarx/stbcx. on e5500 which - * doesn't implement either feature. - */ - if (!cpu_has_feature(CPU_FTR_SMT)) - return; - - asm volatile("1: lbarx %0, 0, %1;" - "cmpwi %0, 0;" - "bne 2f;" - "stbcx. %2, 0, %1;" - "bne 1b;" - "b 3f;" - "2: lbzx %0, 0, %1;" - "cmpwi %0, 0;" - "bne 2b;" - "b 1b;" - "3:" - : "=&r" (tmp) - : "r" (&paca->tcd_ptr->lock), "r" (token) - : "memory"); -} - -static inline void book3e_tlb_unlock(void) -{ - struct paca_struct *paca = get_paca(); - - if (!cpu_has_feature(CPU_FTR_SMT)) - return; - - isync(); - paca->tcd_ptr->lock = 0; -} -#else -static inline void book3e_tlb_lock(void) -{ -} - -static inline void book3e_tlb_unlock(void) -{ -} -#endif - -static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) -{ - int found = 0; - - mtspr(SPRN_MAS6, pid << 16); - if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { - asm volatile( - "li %0,0\n" - "tlbsx. 0,%1\n" - "bne 1f\n" - "li %0,1\n" - "1:\n" - : "=&r"(found) : "r"(ea)); - } else { - asm volatile( - "tlbsx 0,%1\n" - "mfspr %0,0x271\n" - "srwi %0,%0,31\n" - : "=&r"(found) : "r"(ea)); - } - - return found; -} - -void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, - pte_t pte) -{ - unsigned long mas1, mas2; - u64 mas7_3; - unsigned long psize, tsize, shift; - unsigned long flags; - struct mm_struct *mm; - -#ifdef CONFIG_PPC_FSL_BOOK3E - int index; -#endif - - if (unlikely(is_kernel_addr(ea))) - return; - - mm = vma->vm_mm; - - psize = vma_mmu_pagesize(vma); - shift = __ilog2(psize); - tsize = shift - 10; - /* - * We can't be interrupted while we're setting up the MAS - * regusters or after we've confirmed that no tlb exists. - */ - local_irq_save(flags); - - book3e_tlb_lock(); - - if (unlikely(book3e_tlb_exists(ea, mm->context.id))) { - book3e_tlb_unlock(); - local_irq_restore(flags); - return; - } - -#ifdef CONFIG_PPC_FSL_BOOK3E - /* We have to use the CAM(TLB1) on FSL parts for hugepages */ - index = tlb1_next(); - mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); -#endif - - mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); - mas2 = ea & ~((1UL << shift) - 1); - mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; - mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT; - mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK; - if (!pte_dirty(pte)) - mas7_3 &= ~(MAS3_SW|MAS3_UW); - - mtspr(SPRN_MAS1, mas1); - mtspr(SPRN_MAS2, mas2); - - if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { - mtspr(SPRN_MAS7_MAS3, mas7_3); - } else { - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); - mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); - } - - asm volatile ("tlbwe"); - - book3e_tlb_unlock(); - local_irq_restore(flags); -} - -void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) -{ - struct hstate *hstate = hstate_file(vma->vm_file); - unsigned long tsize = huge_page_shift(hstate) - 10; - - __flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0); -} diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c deleted file mode 100644 index ae4505d5b4b8..000000000000 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ /dev/null @@ -1,497 +0,0 @@ -/* - * This file contains the routines for handling the MMU on those - * PowerPC implementations where the MMU is not using the hash - * table, such as 8xx, 4xx, BookE's etc... - * - * Copyright 2008 Ben Herrenschmidt - * IBM Corp. - * - * Derived from previous arch/powerpc/mm/mmu_context.c - * and arch/powerpc/include/asm/mmu_context.h - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * TODO: - * - * - The global context lock will not scale very well - * - The maps should be dynamically allocated to allow for processors - * that support more PID bits at runtime - * - Implement flush_tlb_mm() by making the context stale and picking - * a new one - * - More aggressively clear stale map bits and maybe find some way to - * also clear mm->cpu_vm_mask bits when processes are migrated - */ - -//#define DEBUG_MAP_CONSISTENCY -//#define DEBUG_CLAMP_LAST_CONTEXT 31 -//#define DEBUG_HARDER - -/* We don't use DEBUG because it tends to be compiled in always nowadays - * and this would generate way too much output - */ -#ifdef DEBUG_HARDER -#define pr_hard(args...) printk(KERN_DEBUG args) -#define pr_hardcont(args...) printk(KERN_CONT args) -#else -#define pr_hard(args...) do { } while(0) -#define pr_hardcont(args...) do { } while(0) -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -/* - * The MPC8xx has only 16 contexts. We rotate through them on each task switch. - * A better way would be to keep track of tasks that own contexts, and implement - * an LRU usage. That way very active tasks don't always have to pay the TLB - * reload overhead. The kernel pages are mapped shared, so the kernel can run on - * behalf of any task that makes a kernel entry. Shared does not mean they are - * not protected, just that the ASID comparison is not performed. -- Dan - * - * The IBM4xx has 256 contexts, so we can just rotate through these as a way of - * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison - * is disabled, so we can use a TID of zero to represent all kernel pages as - * shared among all contexts. -- Dan - * - * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should - * normally never have to steal though the facility is present if needed. - * -- BenH - */ -#define FIRST_CONTEXT 1 -#ifdef DEBUG_CLAMP_LAST_CONTEXT -#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT -#elif defined(CONFIG_PPC_8xx) -#define LAST_CONTEXT 16 -#elif defined(CONFIG_PPC_47x) -#define LAST_CONTEXT 65535 -#else -#define LAST_CONTEXT 255 -#endif - -static unsigned int next_context, nr_free_contexts; -static unsigned long *context_map; -#ifdef CONFIG_SMP -static unsigned long *stale_map[NR_CPUS]; -#endif -static struct mm_struct **context_mm; -static DEFINE_RAW_SPINLOCK(context_lock); - -#define CTX_MAP_SIZE \ - (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1)) - - -/* Steal a context from a task that has one at the moment. - * - * This is used when we are running out of available PID numbers - * on the processors. - * - * This isn't an LRU system, it just frees up each context in - * turn (sort-of pseudo-random replacement :). This would be the - * place to implement an LRU scheme if anyone was motivated to do it. - * -- paulus - * - * For context stealing, we use a slightly different approach for - * SMP and UP. Basically, the UP one is simpler and doesn't use - * the stale map as we can just flush the local CPU - * -- benh - */ -#ifdef CONFIG_SMP -static unsigned int steal_context_smp(unsigned int id) -{ - struct mm_struct *mm; - unsigned int cpu, max, i; - - max = LAST_CONTEXT - FIRST_CONTEXT; - - /* Attempt to free next_context first and then loop until we manage */ - while (max--) { - /* Pick up the victim mm */ - mm = context_mm[id]; - - /* We have a candidate victim, check if it's active, on SMP - * we cannot steal active contexts - */ - if (mm->context.active) { - id++; - if (id > LAST_CONTEXT) - id = FIRST_CONTEXT; - continue; - } - pr_hardcont(" | steal %d from 0x%p", id, mm); - - /* Mark this mm has having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - - /* Mark it stale on all CPUs that used this mm. For threaded - * implementations, we set it on all threads on each core - * represented in the mask. A future implementation will use - * a core map instead but this will do for now. - */ - for_each_cpu(cpu, mm_cpumask(mm)) { - for (i = cpu_first_thread_sibling(cpu); - i <= cpu_last_thread_sibling(cpu); i++) { - if (stale_map[i]) - __set_bit(id, stale_map[i]); - } - cpu = i - 1; - } - return id; - } - - /* This will happen if you have more CPUs than available contexts, - * all we can do here is wait a bit and try again - */ - raw_spin_unlock(&context_lock); - cpu_relax(); - raw_spin_lock(&context_lock); - - /* This will cause the caller to try again */ - return MMU_NO_CONTEXT; -} -#endif /* CONFIG_SMP */ - -static unsigned int steal_all_contexts(void) -{ - struct mm_struct *mm; -#ifdef CONFIG_SMP - int cpu = smp_processor_id(); -#endif - unsigned int id; - - for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { - /* Pick up the victim mm */ - mm = context_mm[id]; - - pr_hardcont(" | steal %d from 0x%p", id, mm); - - /* Mark this mm as having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - if (id != FIRST_CONTEXT) { - context_mm[id] = NULL; - __clear_bit(id, context_map); -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif - } -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif - } - - /* Flush the TLB for all contexts (not to be used on SMP) */ - _tlbil_all(); - - nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT; - - return FIRST_CONTEXT; -} - -/* Note that this will also be called on SMP if all other CPUs are - * offlined, which means that it may be called for cpu != 0. For - * this to work, we somewhat assume that CPUs that are onlined - * come up with a fully clean TLB (or are cleaned when offlined) - */ -static unsigned int steal_context_up(unsigned int id) -{ - struct mm_struct *mm; -#ifdef CONFIG_SMP - int cpu = smp_processor_id(); -#endif - - /* Pick up the victim mm */ - mm = context_mm[id]; - - pr_hardcont(" | steal %d from 0x%p", id, mm); - - /* Flush the TLB for that context */ - local_flush_tlb_mm(mm); - - /* Mark this mm has having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - - /* XXX This clear should ultimately be part of local_flush_tlb_mm */ -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif - - return id; -} - -#ifdef DEBUG_MAP_CONSISTENCY -static void context_check_map(void) -{ - unsigned int id, nrf, nact; - - nrf = nact = 0; - for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { - int used = test_bit(id, context_map); - if (!used) - nrf++; - if (used != (context_mm[id] != NULL)) - pr_err("MMU: Context %d is %s and MM is %p !\n", - id, used ? "used" : "free", context_mm[id]); - if (context_mm[id] != NULL) - nact += context_mm[id]->context.active; - } - if (nrf != nr_free_contexts) { - pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", - nr_free_contexts, nrf); - nr_free_contexts = nrf; - } - if (nact > num_online_cpus()) - pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", - nact, num_online_cpus()); - if (FIRST_CONTEXT > 0 && !test_bit(0, context_map)) - pr_err("MMU: Context 0 has been freed !!!\n"); -} -#else -static void context_check_map(void) { } -#endif - -void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned int id; -#ifdef CONFIG_SMP - unsigned int i, cpu = smp_processor_id(); -#endif - unsigned long *map; - - /* No lockless fast path .. yet */ - raw_spin_lock(&context_lock); - - pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", - cpu, next, next->context.active, next->context.id); - -#ifdef CONFIG_SMP - /* Mark us active and the previous one not anymore */ - next->context.active++; - if (prev) { - pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); - WARN_ON(prev->context.active < 1); - prev->context.active--; - } - - again: -#endif /* CONFIG_SMP */ - - /* If we already have a valid assigned context, skip all that */ - id = next->context.id; - if (likely(id != MMU_NO_CONTEXT)) { -#ifdef DEBUG_MAP_CONSISTENCY - if (context_mm[id] != next) - pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", - next, id, id, context_mm[id]); -#endif - goto ctxt_ok; - } - - /* We really don't have a context, let's try to acquire one */ - id = next_context; - if (id > LAST_CONTEXT) - id = FIRST_CONTEXT; - map = context_map; - - /* No more free contexts, let's try to steal one */ - if (nr_free_contexts == 0) { -#ifdef CONFIG_SMP - if (num_online_cpus() > 1) { - id = steal_context_smp(id); - if (id == MMU_NO_CONTEXT) - goto again; - goto stolen; - } -#endif /* CONFIG_SMP */ - if (IS_ENABLED(CONFIG_PPC_8xx)) - id = steal_all_contexts(); - else - id = steal_context_up(id); - goto stolen; - } - nr_free_contexts--; - - /* We know there's at least one free context, try to find it */ - while (__test_and_set_bit(id, map)) { - id = find_next_zero_bit(map, LAST_CONTEXT+1, id); - if (id > LAST_CONTEXT) - id = FIRST_CONTEXT; - } - stolen: - next_context = id + 1; - context_mm[id] = next; - next->context.id = id; - pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); - - context_check_map(); - ctxt_ok: - - /* If that context got marked stale on this CPU, then flush the - * local TLB for it and unmark it before we use it - */ -#ifdef CONFIG_SMP - if (test_bit(id, stale_map[cpu])) { - pr_hardcont(" | stale flush %d [%d..%d]", - id, cpu_first_thread_sibling(cpu), - cpu_last_thread_sibling(cpu)); - - local_flush_tlb_mm(next); - - /* XXX This clear should ultimately be part of local_flush_tlb_mm */ - for (i = cpu_first_thread_sibling(cpu); - i <= cpu_last_thread_sibling(cpu); i++) { - if (stale_map[i]) - __clear_bit(id, stale_map[i]); - } - } -#endif - - /* Flick the MMU and release lock */ - pr_hardcont(" -> %d\n", id); - set_context(id, next->pgd); - raw_spin_unlock(&context_lock); -} - -/* - * Set up the context for a new address space. - */ -int init_new_context(struct task_struct *t, struct mm_struct *mm) -{ - pr_hard("initing context for mm @%p\n", mm); - - /* - * We have MMU_NO_CONTEXT set to be ~0. Hence check - * explicitly against context.id == 0. This ensures that we properly - * initialize context slice details for newly allocated mm's (which will - * have id == 0) and don't alter context slice inherited via fork (which - * will have id != 0). - */ - if (mm->context.id == 0) - slice_init_new_context_exec(mm); - mm->context.id = MMU_NO_CONTEXT; - mm->context.active = 0; - pte_frag_set(&mm->context, NULL); - return 0; -} - -/* - * We're finished using the context for an address space. - */ -void destroy_context(struct mm_struct *mm) -{ - unsigned long flags; - unsigned int id; - - if (mm->context.id == MMU_NO_CONTEXT) - return; - - WARN_ON(mm->context.active != 0); - - raw_spin_lock_irqsave(&context_lock, flags); - id = mm->context.id; - if (id != MMU_NO_CONTEXT) { - __clear_bit(id, context_map); - mm->context.id = MMU_NO_CONTEXT; -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif - context_mm[id] = NULL; - nr_free_contexts++; - } - raw_spin_unlock_irqrestore(&context_lock, flags); -} - -#ifdef CONFIG_SMP -static int mmu_ctx_cpu_prepare(unsigned int cpu) -{ - /* We don't touch CPU 0 map, it's allocated at aboot and kept - * around forever - */ - if (cpu == boot_cpuid) - return 0; - - pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); - stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); - return 0; -} - -static int mmu_ctx_cpu_dead(unsigned int cpu) -{ -#ifdef CONFIG_HOTPLUG_CPU - if (cpu == boot_cpuid) - return 0; - - pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); - kfree(stale_map[cpu]); - stale_map[cpu] = NULL; - - /* We also clear the cpu_vm_mask bits of CPUs going away */ - clear_tasks_mm_cpumask(cpu); -#endif - return 0; -} - -#endif /* CONFIG_SMP */ - -/* - * Initialize the context management stuff. - */ -void __init mmu_context_init(void) -{ - /* Mark init_mm as being active on all possible CPUs since - * we'll get called with prev == init_mm the first time - * we schedule on a given CPU - */ - init_mm.context.active = NR_CPUS; - - /* - * Allocate the maps used by context management - */ - context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); - if (!context_map) - panic("%s: Failed to allocate %zu bytes\n", __func__, - CTX_MAP_SIZE); - context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), - SMP_CACHE_BYTES); - if (!context_mm) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(void *) * (LAST_CONTEXT + 1)); -#ifdef CONFIG_SMP - stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); - if (!stale_map[boot_cpuid]) - panic("%s: Failed to allocate %zu bytes\n", __func__, - CTX_MAP_SIZE); - - cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, - "powerpc/mmu/ctx:prepare", - mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); -#endif - - printk(KERN_INFO - "MMU: Allocated %zu bytes of context maps for %d contexts\n", - 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)), - LAST_CONTEXT - FIRST_CONTEXT + 1); - - /* - * Some processors have too few contexts to reserve one for - * init_mm, and require using context 0 for a normal task. - * Other processors reserve the use of context zero for the kernel. - * This code assumes FIRST_CONTEXT < 32. - */ - context_map[0] = (1 << FIRST_CONTEXT) - 1; - next_context = FIRST_CONTEXT; - nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; -} diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c new file mode 100644 index 000000000000..460459b6f53e --- /dev/null +++ b/arch/powerpc/mm/nohash/40x.c @@ -0,0 +1,159 @@ +/* + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +extern int __map_without_ltlbs; +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + /* + * The Zone Protection Register (ZPR) defines how protection will + * be applied to every page which is a member of a given zone. At + * present, we utilize only two of the 4xx's zones. + * The zone index bits (of ZSEL) in the PTE are used for software + * indicators, except the LSB. For user access, zone 1 is used, + * for kernel access, zone 0 is used. We set all but zone 1 + * to zero, allowing only kernel access as indicated in the PTE. + * For zone 1, we set a 01 binary (a value of 10 will not work) + * to allow user access as indicated in the PTE. This also allows + * kernel access as indicated in the PTE. + */ + + mtspr(SPRN_ZPR, 0x10000000); + + flush_instruction_cache(); + + /* + * Set up the real-mode cache parameters for the exception vector + * handlers (which are run in real-mode). + */ + + mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */ + + /* + * Cache instruction and data space where the exception + * vectors and the kernel live in real-mode. + */ + + mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */ + mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */ +} + +#define LARGE_PAGE_SIZE_16M (1<<24) +#define LARGE_PAGE_SIZE_4M (1<<22) + +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +{ + unsigned long v, s, mapped; + phys_addr_t p; + + v = KERNELBASE; + p = 0; + s = total_lowmem; + + if (__map_without_ltlbs) + return 0; + + while (s >= LARGE_PAGE_SIZE_16M) { + pmd_t *pmdp; + unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; + + pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + *pmdp++ = __pmd(val); + *pmdp++ = __pmd(val); + *pmdp++ = __pmd(val); + *pmdp++ = __pmd(val); + + v += LARGE_PAGE_SIZE_16M; + p += LARGE_PAGE_SIZE_16M; + s -= LARGE_PAGE_SIZE_16M; + } + + while (s >= LARGE_PAGE_SIZE_4M) { + pmd_t *pmdp; + unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; + + pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + *pmdp = __pmd(val); + + v += LARGE_PAGE_SIZE_4M; + p += LARGE_PAGE_SIZE_4M; + s -= LARGE_PAGE_SIZE_4M; + } + + mapped = total_lowmem - s; + + /* If the size of RAM is not an exact power of two, we may not + * have covered RAM in its entirety with 16 and 4 MiB + * pages. Consequently, restrict the top end of RAM currently + * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" + * coverage with normal-sized pages (or other reasons) do not + * attempt to allocate outside the allowed range. + */ + memblock_set_current_limit(mapped); + + return mapped; +} + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 40x can only access 16MB at the moment (see head_40x.S) */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +} diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c new file mode 100644 index 000000000000..c07983ebc02e --- /dev/null +++ b/arch/powerpc/mm/nohash/44x.c @@ -0,0 +1,246 @@ +/* + * Modifications by Matt Porter (mporter@mvista.com) to support + * PPC44x Book E processors. + * + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include +#include +#include +#include + +#include + +/* Used by the 44x TLB replacement exception handler. + * Just needed it declared someplace. + */ +unsigned int tlb_44x_index; /* = 0 */ +unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; +int icache_44x_need_flush; + +unsigned long tlb_47x_boltmap[1024/8]; + +static void ppc44x_update_tlb_hwater(void) +{ + /* The TLB miss handlers hard codes the watermark in a cmpli + * instruction to improve performances rather than loading it + * from the global variable. Thus, we patch the instructions + * in the 2 TLB miss handlers when updating the value + */ + modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater); + modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater); +} + +/* + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU + */ +static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) +{ + unsigned int entry = tlb_44x_hwater--; + + ppc44x_update_tlb_hwater(); + + mtspr(SPRN_MMUCR, 0); + + __asm__ __volatile__( + "tlbwe %2,%3,%4\n" + "tlbwe %1,%3,%5\n" + "tlbwe %0,%3,%6\n" + : + : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), + "r" (phys), + "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), + "r" (entry), + "i" (PPC44x_TLB_PAGEID), + "i" (PPC44x_TLB_XLAT), + "i" (PPC44x_TLB_ATTRIB)); +} + +static int __init ppc47x_find_free_bolted(void) +{ + unsigned int mmube0 = mfspr(SPRN_MMUBE0); + unsigned int mmube1 = mfspr(SPRN_MMUBE1); + + if (!(mmube0 & MMUBE0_VBE0)) + return 0; + if (!(mmube0 & MMUBE0_VBE1)) + return 1; + if (!(mmube0 & MMUBE0_VBE2)) + return 2; + if (!(mmube1 & MMUBE1_VBE3)) + return 3; + if (!(mmube1 & MMUBE1_VBE4)) + return 4; + if (!(mmube1 & MMUBE1_VBE5)) + return 5; + return -1; +} + +static void __init ppc47x_update_boltmap(void) +{ + unsigned int mmube0 = mfspr(SPRN_MMUBE0); + unsigned int mmube1 = mfspr(SPRN_MMUBE1); + + if (mmube0 & MMUBE0_VBE0) + __set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube0 & MMUBE0_VBE1) + __set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube0 & MMUBE0_VBE2) + __set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE3) + __set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE4) + __set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE5) + __set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff, + tlb_47x_boltmap); +} + +/* + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU + */ +static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) +{ + unsigned int rA; + int bolted; + + /* Base rA is HW way select, way 0, bolted bit set */ + rA = 0x88000000; + + /* Look for a bolted entry slot */ + bolted = ppc47x_find_free_bolted(); + BUG_ON(bolted < 0); + + /* Insert bolted slot number */ + rA |= bolted << 24; + + pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n", + virt, phys, bolted); + + mtspr(SPRN_MMUCR, 0); + + __asm__ __volatile__( + "tlbwe %2,%3,0\n" + "tlbwe %1,%3,1\n" + "tlbwe %0,%3,2\n" + : + : "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR | + PPC47x_TLB2_SX +#ifdef CONFIG_SMP + | PPC47x_TLB2_M +#endif + ), + "r" (phys), + "r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M), + "r" (rA)); +} + +void __init MMU_init_hw(void) +{ + /* This is not useful on 47x but won't hurt either */ + ppc44x_update_tlb_hwater(); + + flush_instruction_cache(); +} + +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +{ + unsigned long addr; + unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); + + /* Pin in enough TLBs to cover any lowmem not covered by the + * initial 256M mapping established in head_44x.S */ + for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; + addr += PPC_PIN_SIZE) { + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); + else + ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + } + if (mmu_has_feature(MMU_FTR_TYPE_47x)) { + ppc47x_update_boltmap(); + +#ifdef DEBUG + { + int i; + + printk(KERN_DEBUG "bolted entries: "); + for (i = 0; i < 255; i++) { + if (test_bit(i, tlb_47x_boltmap)) + printk("%d ", i); + } + printk("\n"); + } +#endif /* DEBUG */ + } + return total_lowmem; +} + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + u64 size; + +#ifndef CONFIG_NONSTATIC_KERNEL + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); +#endif + + /* 44x has a 256M TLB entry pinned at boot */ + size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE)); + memblock_set_current_limit(first_memblock_base + size); +} + +#ifdef CONFIG_SMP +void __init mmu_init_secondary(int cpu) +{ + unsigned long addr; + unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); + + /* Pin in enough TLBs to cover any lowmem not covered by the + * initial 256M mapping established in head_44x.S + * + * WARNING: This is called with only the first 256M of the + * linear mapping in the TLB and we can't take faults yet + * so beware of what this code uses. It runs off a temporary + * stack. current (r2) isn't initialized, smp_processor_id() + * will not work, current thread info isn't accessible, ... + */ + for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; + addr += PPC_PIN_SIZE) { + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); + else + ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + } +} +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c new file mode 100644 index 000000000000..70d55b615b62 --- /dev/null +++ b/arch/powerpc/mm/nohash/8xx.c @@ -0,0 +1,239 @@ +/* + * This file contains the routines for initializing the MMU + * on the 8xx series of chips. + * -- christophe + * + * Derived from arch/powerpc/mm/40x_mmu.c: + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include + +#include + +#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) + +extern int __map_without_ltlbs; + +static unsigned long block_mapped_ram; + +/* + * Return PA for this VA if it is in an area mapped with LTLBs. + * Otherwise, returns 0 + */ +phys_addr_t v_block_mapped(unsigned long va) +{ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) + return p + va - VIRT_IMMR_BASE; + if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram) + return __pa(va); + return 0; +} + +/* + * Return VA for a given PA mapped with LTLBs or 0 if not mapped + */ +unsigned long p_block_mapped(phys_addr_t pa) +{ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (pa >= p && pa < p + IMMR_SIZE) + return VIRT_IMMR_BASE + pa - p; + if (pa < block_mapped_ram) + return (unsigned long)__va(pa); + return 0; +} + +#define LARGE_PAGE_SIZE_8M (1<<23) + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ + if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { + unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; + int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; + unsigned long addr = 0; + unsigned long mem = total_lowmem; + + for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + mtspr(SPRN_MD_CTR, ctr | (i << 8)); + mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); + addr += LARGE_PAGE_SIZE_8M; + mem -= LARGE_PAGE_SIZE_8M; + } + } +} + +static void __init mmu_mapin_immr(void) +{ + unsigned long p = PHYS_IMMR_BASE; + unsigned long v = VIRT_IMMR_BASE; + int offset; + + for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) + map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); +} + +static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) +{ + modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); +} + +static void mmu_patch_addis(s32 *site, long simm) +{ + unsigned int instr = *(unsigned int *)patch_site_addr(site); + + instr &= 0xffff0000; + instr |= ((unsigned long)simm) >> 16; + patch_instruction_site(site, instr); +} + +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +{ + unsigned long mapped; + + if (__map_without_ltlbs) { + mapped = 0; + mmu_mapin_immr(); + if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) + patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); + } else { + mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, + _ALIGN(__pa(_einittext), 8 << 20)); + } + + mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); + mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); + + /* If the size of RAM is not an exact power of two, we may not + * have covered RAM in its entirety with 8 MiB + * pages. Consequently, restrict the top end of RAM currently + * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" + * coverage with normal-sized pages (or other reasons) do not + * attempt to allocate outside the allowed range. + */ + if (mapped) + memblock_set_current_limit(mapped); + + block_mapped_ram = mapped; + + return mapped; +} + +void mmu_mark_initmem_nx(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) + mmu_patch_addis(&patch__itlbmiss_linmem_top8, + -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); +} + +#ifdef CONFIG_STRICT_KERNEL_RWX +void mmu_mark_rodata_ro(void) +{ + if (CONFIG_DATA_SHIFT < 23) + mmu_patch_addis(&patch__dtlbmiss_romem_top8, + -__pa(((unsigned long)_sinittext) & + ~(LARGE_PAGE_SIZE_8M - 1))); + mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); +} +#endif + +void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 8xx can only access 32MB at the moment */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000)); +} + +/* + * Set up to use a given MMU context. + * id is context number, pgd is PGD pointer. + * + * We place the physical address of the new task page directory loaded + * into the MMU base register, and set the ASID compare register with + * the new "context." + */ +void set_context(unsigned long id, pgd_t *pgd) +{ + s16 offset = (s16)(__pa(swapper_pg_dir)); + + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = pgd; + + /* Register M_TWB will contain base address of level 1 table minus the + * lower part of the kernel PGDIR base address, so that all accesses to + * level 1 table are done relative to lower part of kernel PGDIR base + * address. + */ + mtspr(SPRN_M_TWB, __pa(pgd) - offset); + + /* Update context */ + mtspr(SPRN_M_CASID, id - 1); + /* sync */ + mb(); +} + +void flush_instruction_cache(void) +{ + isync(); + mtspr(SPRN_IC_CST, IDC_INVALL); + isync(); +} + +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + if (disabled) + return; + + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + mtspr(SPRN_MI_AP, MI_APG_KUEP); +} +#endif + +#ifdef CONFIG_PPC_KUAP +void __init setup_kuap(bool disabled) +{ + pr_info("Activating Kernel Userspace Access Protection\n"); + + if (disabled) + pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n"); + + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} +#endif diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile new file mode 100644 index 000000000000..b2228ff81b8a --- /dev/null +++ b/arch/powerpc/mm/nohash/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) + +obj-y += mmu_context.o tlb.o tlb_low.o +obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o +obj-$(CONFIG_40x) += 40x.o +obj-$(CONFIG_44x) += 44x.o +obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o +ifdef CONFIG_HUGETLB_PAGE +obj-$(CONFIG_PPC_BOOK3E_MMU) += book3e_hugetlbpage.o +endif + +# Disable kcov instrumentation on sensitive code +# This is necessary for booting with kcov enabled on book3e machines +KCOV_INSTRUMENT_tlb.o := n +KCOV_INSTRUMENT_fsl_booke.o := n diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c new file mode 100644 index 000000000000..f84ec46cdb26 --- /dev/null +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PPC Huge TLB Page Support for Book3E MMU + * + * Copyright (C) 2009 David Gibson, IBM Corporation. + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor + * + */ +#include +#include + +#include + +#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC64 +static inline int tlb1_next(void) +{ + struct paca_struct *paca = get_paca(); + struct tlb_core_data *tcd; + int this, next; + + tcd = paca->tcd_ptr; + this = tcd->esel_next; + + next = this + 1; + if (next >= tcd->esel_max) + next = tcd->esel_first; + + tcd->esel_next = next; + return this; +} +#else +static inline int tlb1_next(void) +{ + int index, ncams; + + ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + index = this_cpu_read(next_tlbcam_idx); + + /* Just round-robin the entries and wrap when we hit the end */ + if (unlikely(index == ncams - 1)) + __this_cpu_write(next_tlbcam_idx, tlbcam_index); + else + __this_cpu_inc(next_tlbcam_idx); + + return index; +} +#endif /* !PPC64 */ +#endif /* FSL */ + +static inline int mmu_get_tsize(int psize) +{ + return mmu_psize_defs[psize].enc; +} + +#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64) +#include + +static inline void book3e_tlb_lock(void) +{ + struct paca_struct *paca = get_paca(); + unsigned long tmp; + int token = smp_processor_id() + 1; + + /* + * Besides being unnecessary in the absence of SMT, this + * check prevents trying to do lbarx/stbcx. on e5500 which + * doesn't implement either feature. + */ + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + + asm volatile("1: lbarx %0, 0, %1;" + "cmpwi %0, 0;" + "bne 2f;" + "stbcx. %2, 0, %1;" + "bne 1b;" + "b 3f;" + "2: lbzx %0, 0, %1;" + "cmpwi %0, 0;" + "bne 2b;" + "b 1b;" + "3:" + : "=&r" (tmp) + : "r" (&paca->tcd_ptr->lock), "r" (token) + : "memory"); +} + +static inline void book3e_tlb_unlock(void) +{ + struct paca_struct *paca = get_paca(); + + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + + isync(); + paca->tcd_ptr->lock = 0; +} +#else +static inline void book3e_tlb_lock(void) +{ +} + +static inline void book3e_tlb_unlock(void) +{ +} +#endif + +static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) +{ + int found = 0; + + mtspr(SPRN_MAS6, pid << 16); + if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { + asm volatile( + "li %0,0\n" + "tlbsx. 0,%1\n" + "bne 1f\n" + "li %0,1\n" + "1:\n" + : "=&r"(found) : "r"(ea)); + } else { + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); + } + + return found; +} + +void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, + pte_t pte) +{ + unsigned long mas1, mas2; + u64 mas7_3; + unsigned long psize, tsize, shift; + unsigned long flags; + struct mm_struct *mm; + +#ifdef CONFIG_PPC_FSL_BOOK3E + int index; +#endif + + if (unlikely(is_kernel_addr(ea))) + return; + + mm = vma->vm_mm; + + psize = vma_mmu_pagesize(vma); + shift = __ilog2(psize); + tsize = shift - 10; + /* + * We can't be interrupted while we're setting up the MAS + * regusters or after we've confirmed that no tlb exists. + */ + local_irq_save(flags); + + book3e_tlb_lock(); + + if (unlikely(book3e_tlb_exists(ea, mm->context.id))) { + book3e_tlb_unlock(); + local_irq_restore(flags); + return; + } + +#ifdef CONFIG_PPC_FSL_BOOK3E + /* We have to use the CAM(TLB1) on FSL parts for hugepages */ + index = tlb1_next(); + mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); +#endif + + mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); + mas2 = ea & ~((1UL << shift) - 1); + mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; + mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT; + mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK; + if (!pte_dirty(pte)) + mas7_3 &= ~(MAS3_SW|MAS3_UW); + + mtspr(SPRN_MAS1, mas1); + mtspr(SPRN_MAS2, mas2); + + if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { + mtspr(SPRN_MAS7_MAS3, mas7_3); + } else { + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); + } + + asm volatile ("tlbwe"); + + book3e_tlb_unlock(); + local_irq_restore(flags); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct hstate *hstate = hstate_file(vma->vm_file); + unsigned long tsize = huge_page_shift(hstate) - 10; + + __flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0); +} diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c new file mode 100644 index 000000000000..f296c2e88b09 --- /dev/null +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -0,0 +1,120 @@ +/* + * Copyright 2005, Paul Mackerras, IBM Corporation. + * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation. + * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* + * On Book3E CPUs, the vmemmap is currently mapped in the top half of + * the vmalloc space using normal page tables, though the size of + * pages encoded in the PTEs can be different + */ +int __meminit vmemmap_create_mapping(unsigned long start, + unsigned long page_size, + unsigned long phys) +{ + /* Create a PTE encoding without page size */ + unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED | + _PAGE_KERNEL_RW; + + /* PTEs only contain page size encodings up to 32M */ + BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf); + + /* Encode the size in the PTE */ + flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8; + + /* For each PTE for that area, map things. Note that we don't + * increment phys because all PTEs are of the large size and + * thus must have the low bits clear + */ + for (i = 0; i < page_size; i += PAGE_SIZE) + BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags))); + + return 0; +} + +#ifdef CONFIG_MEMORY_HOTPLUG +void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) +{ +} +#endif +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static __ref void *early_alloc_pgtable(unsigned long size) +{ + void *ptr; + + ptr = memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT, + __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE); + + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%lx\n", + __func__, size, size, __pa(MAX_DMA_ADDRESS)); + + return ptr; +} + +/* + * map_kernel_page currently only called by __ioremap + * map_kernel_page adds an entry to the ioremap page table + * and adds an entry to the HPT, possibly bolting it + */ +int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); + if (slab_is_available()) { + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + pmdp = pmd_alloc(&init_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; + ptep = pte_alloc_kernel(pmdp, ea); + if (!ptep) + return -ENOMEM; + } else { + pgdp = pgd_offset_k(ea); +#ifndef __PAGETABLE_PUD_FOLDED + if (pgd_none(*pgdp)) { + pudp = early_alloc_pgtable(PUD_TABLE_SIZE); + pgd_populate(&init_mm, pgdp, pudp); + } +#endif /* !__PAGETABLE_PUD_FOLDED */ + pudp = pud_offset(pgdp, ea); + if (pud_none(*pudp)) { + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (!pmd_present(*pmdp)) { + ptep = early_alloc_pgtable(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + } + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); + + smp_wmb(); + return 0; +} diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c new file mode 100644 index 000000000000..71a1a36751dd --- /dev/null +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -0,0 +1,326 @@ +/* + * Modifications by Kumar Gala (galak@kernel.crashing.org) to support + * E500 Book E processors. + * + * Copyright 2004,2010 Freescale Semiconductor, Inc. + * + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +unsigned int tlbcam_index; + +#define NUM_TLBCAMS (64) +struct tlbcam TLBCAM[NUM_TLBCAMS]; + +struct tlbcamrange { + unsigned long start; + unsigned long limit; + phys_addr_t phys; +} tlbcam_addrs[NUM_TLBCAMS]; + +unsigned long tlbcam_sz(int idx) +{ + return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; +} + +#ifdef CONFIG_FSL_BOOKE +/* + * Return PA for this VA if it is mapped by a CAM, or 0 + */ +phys_addr_t v_block_mapped(unsigned long va) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit) + return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start); + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_block_mapped(phys_addr_t pa) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (pa >= tlbcam_addrs[b].phys + && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) + +tlbcam_addrs[b].phys) + return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); + return 0; +} +#endif + +/* + * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; + * in particular size must be a power of 4 between 4k and the max supported by + * an implementation; max may further be limited by what can be represented in + * an unsigned long (for example, 32-bit implementations cannot support a 4GB + * size). + */ +static void settlbcam(int index, unsigned long virt, phys_addr_t phys, + unsigned long size, unsigned long flags, unsigned int pid) +{ + unsigned int tsize; + + tsize = __ilog2(size) - 10; + +#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) + if ((flags & _PAGE_NO_CACHE) == 0) + flags |= _PAGE_COHERENT; +#endif + + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); + TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); + TLBCAM[index].MAS2 = virt & PAGE_MASK; + + TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; + + TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + TLBCAM[index].MAS7 = (u64)phys >> 32; + + /* Below is unlikely -- only for large user pages or similar */ + if (pte_user(__pte(flags))) { + TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); + } + + tlbcam_addrs[index].start = virt; + tlbcam_addrs[index].limit = virt + size - 1; + tlbcam_addrs[index].phys = phys; +} + +unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, + phys_addr_t phys) +{ + unsigned int camsize = __ilog2(ram); + unsigned int align = __ffs(virt | phys); + unsigned long max_cam; + + if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { + /* Convert (4^max) kB to (2^max) bytes */ + max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10; + camsize &= ~1U; + align &= ~1U; + } else { + /* Convert (2^max) kB to (2^max) bytes */ + max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10; + } + + if (camsize > align) + camsize = align; + if (camsize > max_cam) + camsize = max_cam; + + return 1UL << camsize; +} + +static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, + unsigned long ram, int max_cam_idx, + bool dryrun) +{ + int i; + unsigned long amount_mapped = 0; + + /* Calculate CAM values */ + for (i = 0; ram && i < max_cam_idx; i++) { + unsigned long cam_sz; + + cam_sz = calc_cam_sz(ram, virt, phys); + if (!dryrun) + settlbcam(i, virt, phys, cam_sz, + pgprot_val(PAGE_KERNEL_X), 0); + + ram -= cam_sz; + amount_mapped += cam_sz; + virt += cam_sz; + phys += cam_sz; + } + + if (dryrun) + return amount_mapped; + + loadcam_multi(0, i, max_cam_idx); + tlbcam_index = i; + +#ifdef CONFIG_PPC64 + get_paca()->tcd.esel_next = i; + get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + get_paca()->tcd.esel_first = i; +#endif + + return amount_mapped; +} + +unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun) +{ + unsigned long virt = PAGE_OFFSET; + phys_addr_t phys = memstart_addr; + + return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun); +} + +#ifdef CONFIG_PPC32 + +#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) +#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" +#endif + +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +{ + return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; +} + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + flush_instruction_cache(); +} + +void __init adjust_total_lowmem(void) +{ + unsigned long ram; + int i; + + /* adjust lowmem size to __max_low_memory */ + ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); + + i = switch_to_as1(); + __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false); + restore_to_as0(i, 0, 0, 1); + + pr_info("Memory CAM mapping: "); + for (i = 0; i < tlbcam_index - 1; i++) + pr_cont("%lu/", tlbcam_sz(i) >> 20); + pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, + (unsigned int)((total_lowmem - __max_low_memory) >> 20)); + + memblock_set_current_limit(memstart_addr + __max_low_memory); +} + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + phys_addr_t limit = first_memblock_base + first_memblock_size; + + /* 64M mapped initially according to head_fsl_booke.S */ + memblock_set_current_limit(min_t(u64, limit, 0x04000000)); +} + +#ifdef CONFIG_RELOCATABLE +int __initdata is_second_reloc; +notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) +{ + unsigned long base = KERNELBASE; + + kernstart_addr = start; + if (is_second_reloc) { + virt_phys_offset = PAGE_OFFSET - memstart_addr; + return; + } + + /* + * Relocatable kernel support based on processing of dynamic + * relocation entries. Before we get the real memstart_addr, + * We will compute the virt_phys_offset like this: + * virt_phys_offset = stext.run - kernstart_addr + * + * stext.run = (KERNELBASE & ~0x3ffffff) + + * (kernstart_addr & 0x3ffffff) + * When we relocate, we have : + * + * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff) + * + * hence: + * virt_phys_offset = (KERNELBASE & ~0x3ffffff) - + * (kernstart_addr & ~0x3ffffff) + * + */ + start &= ~0x3ffffff; + base &= ~0x3ffffff; + virt_phys_offset = base - start; + early_get_first_memblock_info(__va(dt_ptr), NULL); + /* + * We now get the memstart_addr, then we should check if this + * address is the same as what the PAGE_OFFSET map to now. If + * not we have to change the map of PAGE_OFFSET to memstart_addr + * and do a second relocation. + */ + if (start != memstart_addr) { + int n; + long offset = start - memstart_addr; + + is_second_reloc = 1; + n = switch_to_as1(); + /* map a 64M area for the second relocation */ + if (memstart_addr > start) + map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM, + false); + else + map_mem_in_cams_addr(start, PAGE_OFFSET + offset, + 0x4000000, CONFIG_LOWMEM_CAM_NUM, + false); + restore_to_as0(n, offset, __va(dt_ptr), 1); + /* We should never reach here */ + panic("Relocation error"); + } +} +#endif +#endif diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c new file mode 100644 index 000000000000..ae4505d5b4b8 --- /dev/null +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -0,0 +1,497 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU is not using the hash + * table, such as 8xx, 4xx, BookE's etc... + * + * Copyright 2008 Ben Herrenschmidt + * IBM Corp. + * + * Derived from previous arch/powerpc/mm/mmu_context.c + * and arch/powerpc/include/asm/mmu_context.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TODO: + * + * - The global context lock will not scale very well + * - The maps should be dynamically allocated to allow for processors + * that support more PID bits at runtime + * - Implement flush_tlb_mm() by making the context stale and picking + * a new one + * - More aggressively clear stale map bits and maybe find some way to + * also clear mm->cpu_vm_mask bits when processes are migrated + */ + +//#define DEBUG_MAP_CONSISTENCY +//#define DEBUG_CLAMP_LAST_CONTEXT 31 +//#define DEBUG_HARDER + +/* We don't use DEBUG because it tends to be compiled in always nowadays + * and this would generate way too much output + */ +#ifdef DEBUG_HARDER +#define pr_hard(args...) printk(KERN_DEBUG args) +#define pr_hardcont(args...) printk(KERN_CONT args) +#else +#define pr_hard(args...) do { } while(0) +#define pr_hardcont(args...) do { } while(0) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +/* + * The MPC8xx has only 16 contexts. We rotate through them on each task switch. + * A better way would be to keep track of tasks that own contexts, and implement + * an LRU usage. That way very active tasks don't always have to pay the TLB + * reload overhead. The kernel pages are mapped shared, so the kernel can run on + * behalf of any task that makes a kernel entry. Shared does not mean they are + * not protected, just that the ASID comparison is not performed. -- Dan + * + * The IBM4xx has 256 contexts, so we can just rotate through these as a way of + * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison + * is disabled, so we can use a TID of zero to represent all kernel pages as + * shared among all contexts. -- Dan + * + * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should + * normally never have to steal though the facility is present if needed. + * -- BenH + */ +#define FIRST_CONTEXT 1 +#ifdef DEBUG_CLAMP_LAST_CONTEXT +#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT +#elif defined(CONFIG_PPC_8xx) +#define LAST_CONTEXT 16 +#elif defined(CONFIG_PPC_47x) +#define LAST_CONTEXT 65535 +#else +#define LAST_CONTEXT 255 +#endif + +static unsigned int next_context, nr_free_contexts; +static unsigned long *context_map; +#ifdef CONFIG_SMP +static unsigned long *stale_map[NR_CPUS]; +#endif +static struct mm_struct **context_mm; +static DEFINE_RAW_SPINLOCK(context_lock); + +#define CTX_MAP_SIZE \ + (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1)) + + +/* Steal a context from a task that has one at the moment. + * + * This is used when we are running out of available PID numbers + * on the processors. + * + * This isn't an LRU system, it just frees up each context in + * turn (sort-of pseudo-random replacement :). This would be the + * place to implement an LRU scheme if anyone was motivated to do it. + * -- paulus + * + * For context stealing, we use a slightly different approach for + * SMP and UP. Basically, the UP one is simpler and doesn't use + * the stale map as we can just flush the local CPU + * -- benh + */ +#ifdef CONFIG_SMP +static unsigned int steal_context_smp(unsigned int id) +{ + struct mm_struct *mm; + unsigned int cpu, max, i; + + max = LAST_CONTEXT - FIRST_CONTEXT; + + /* Attempt to free next_context first and then loop until we manage */ + while (max--) { + /* Pick up the victim mm */ + mm = context_mm[id]; + + /* We have a candidate victim, check if it's active, on SMP + * we cannot steal active contexts + */ + if (mm->context.active) { + id++; + if (id > LAST_CONTEXT) + id = FIRST_CONTEXT; + continue; + } + pr_hardcont(" | steal %d from 0x%p", id, mm); + + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + + /* Mark it stale on all CPUs that used this mm. For threaded + * implementations, we set it on all threads on each core + * represented in the mask. A future implementation will use + * a core map instead but this will do for now. + */ + for_each_cpu(cpu, mm_cpumask(mm)) { + for (i = cpu_first_thread_sibling(cpu); + i <= cpu_last_thread_sibling(cpu); i++) { + if (stale_map[i]) + __set_bit(id, stale_map[i]); + } + cpu = i - 1; + } + return id; + } + + /* This will happen if you have more CPUs than available contexts, + * all we can do here is wait a bit and try again + */ + raw_spin_unlock(&context_lock); + cpu_relax(); + raw_spin_lock(&context_lock); + + /* This will cause the caller to try again */ + return MMU_NO_CONTEXT; +} +#endif /* CONFIG_SMP */ + +static unsigned int steal_all_contexts(void) +{ + struct mm_struct *mm; +#ifdef CONFIG_SMP + int cpu = smp_processor_id(); +#endif + unsigned int id; + + for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { + /* Pick up the victim mm */ + mm = context_mm[id]; + + pr_hardcont(" | steal %d from 0x%p", id, mm); + + /* Mark this mm as having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + if (id != FIRST_CONTEXT) { + context_mm[id] = NULL; + __clear_bit(id, context_map); +#ifdef DEBUG_MAP_CONSISTENCY + mm->context.active = 0; +#endif + } +#ifdef CONFIG_SMP + __clear_bit(id, stale_map[cpu]); +#endif + } + + /* Flush the TLB for all contexts (not to be used on SMP) */ + _tlbil_all(); + + nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT; + + return FIRST_CONTEXT; +} + +/* Note that this will also be called on SMP if all other CPUs are + * offlined, which means that it may be called for cpu != 0. For + * this to work, we somewhat assume that CPUs that are onlined + * come up with a fully clean TLB (or are cleaned when offlined) + */ +static unsigned int steal_context_up(unsigned int id) +{ + struct mm_struct *mm; +#ifdef CONFIG_SMP + int cpu = smp_processor_id(); +#endif + + /* Pick up the victim mm */ + mm = context_mm[id]; + + pr_hardcont(" | steal %d from 0x%p", id, mm); + + /* Flush the TLB for that context */ + local_flush_tlb_mm(mm); + + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ +#ifdef CONFIG_SMP + __clear_bit(id, stale_map[cpu]); +#endif + + return id; +} + +#ifdef DEBUG_MAP_CONSISTENCY +static void context_check_map(void) +{ + unsigned int id, nrf, nact; + + nrf = nact = 0; + for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { + int used = test_bit(id, context_map); + if (!used) + nrf++; + if (used != (context_mm[id] != NULL)) + pr_err("MMU: Context %d is %s and MM is %p !\n", + id, used ? "used" : "free", context_mm[id]); + if (context_mm[id] != NULL) + nact += context_mm[id]->context.active; + } + if (nrf != nr_free_contexts) { + pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", + nr_free_contexts, nrf); + nr_free_contexts = nrf; + } + if (nact > num_online_cpus()) + pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", + nact, num_online_cpus()); + if (FIRST_CONTEXT > 0 && !test_bit(0, context_map)) + pr_err("MMU: Context 0 has been freed !!!\n"); +} +#else +static void context_check_map(void) { } +#endif + +void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned int id; +#ifdef CONFIG_SMP + unsigned int i, cpu = smp_processor_id(); +#endif + unsigned long *map; + + /* No lockless fast path .. yet */ + raw_spin_lock(&context_lock); + + pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", + cpu, next, next->context.active, next->context.id); + +#ifdef CONFIG_SMP + /* Mark us active and the previous one not anymore */ + next->context.active++; + if (prev) { + pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); + WARN_ON(prev->context.active < 1); + prev->context.active--; + } + + again: +#endif /* CONFIG_SMP */ + + /* If we already have a valid assigned context, skip all that */ + id = next->context.id; + if (likely(id != MMU_NO_CONTEXT)) { +#ifdef DEBUG_MAP_CONSISTENCY + if (context_mm[id] != next) + pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", + next, id, id, context_mm[id]); +#endif + goto ctxt_ok; + } + + /* We really don't have a context, let's try to acquire one */ + id = next_context; + if (id > LAST_CONTEXT) + id = FIRST_CONTEXT; + map = context_map; + + /* No more free contexts, let's try to steal one */ + if (nr_free_contexts == 0) { +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { + id = steal_context_smp(id); + if (id == MMU_NO_CONTEXT) + goto again; + goto stolen; + } +#endif /* CONFIG_SMP */ + if (IS_ENABLED(CONFIG_PPC_8xx)) + id = steal_all_contexts(); + else + id = steal_context_up(id); + goto stolen; + } + nr_free_contexts--; + + /* We know there's at least one free context, try to find it */ + while (__test_and_set_bit(id, map)) { + id = find_next_zero_bit(map, LAST_CONTEXT+1, id); + if (id > LAST_CONTEXT) + id = FIRST_CONTEXT; + } + stolen: + next_context = id + 1; + context_mm[id] = next; + next->context.id = id; + pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); + + context_check_map(); + ctxt_ok: + + /* If that context got marked stale on this CPU, then flush the + * local TLB for it and unmark it before we use it + */ +#ifdef CONFIG_SMP + if (test_bit(id, stale_map[cpu])) { + pr_hardcont(" | stale flush %d [%d..%d]", + id, cpu_first_thread_sibling(cpu), + cpu_last_thread_sibling(cpu)); + + local_flush_tlb_mm(next); + + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ + for (i = cpu_first_thread_sibling(cpu); + i <= cpu_last_thread_sibling(cpu); i++) { + if (stale_map[i]) + __clear_bit(id, stale_map[i]); + } + } +#endif + + /* Flick the MMU and release lock */ + pr_hardcont(" -> %d\n", id); + set_context(id, next->pgd); + raw_spin_unlock(&context_lock); +} + +/* + * Set up the context for a new address space. + */ +int init_new_context(struct task_struct *t, struct mm_struct *mm) +{ + pr_hard("initing context for mm @%p\n", mm); + + /* + * We have MMU_NO_CONTEXT set to be ~0. Hence check + * explicitly against context.id == 0. This ensures that we properly + * initialize context slice details for newly allocated mm's (which will + * have id == 0) and don't alter context slice inherited via fork (which + * will have id != 0). + */ + if (mm->context.id == 0) + slice_init_new_context_exec(mm); + mm->context.id = MMU_NO_CONTEXT; + mm->context.active = 0; + pte_frag_set(&mm->context, NULL); + return 0; +} + +/* + * We're finished using the context for an address space. + */ +void destroy_context(struct mm_struct *mm) +{ + unsigned long flags; + unsigned int id; + + if (mm->context.id == MMU_NO_CONTEXT) + return; + + WARN_ON(mm->context.active != 0); + + raw_spin_lock_irqsave(&context_lock, flags); + id = mm->context.id; + if (id != MMU_NO_CONTEXT) { + __clear_bit(id, context_map); + mm->context.id = MMU_NO_CONTEXT; +#ifdef DEBUG_MAP_CONSISTENCY + mm->context.active = 0; +#endif + context_mm[id] = NULL; + nr_free_contexts++; + } + raw_spin_unlock_irqrestore(&context_lock, flags); +} + +#ifdef CONFIG_SMP +static int mmu_ctx_cpu_prepare(unsigned int cpu) +{ + /* We don't touch CPU 0 map, it's allocated at aboot and kept + * around forever + */ + if (cpu == boot_cpuid) + return 0; + + pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); + stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); + return 0; +} + +static int mmu_ctx_cpu_dead(unsigned int cpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (cpu == boot_cpuid) + return 0; + + pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); + kfree(stale_map[cpu]); + stale_map[cpu] = NULL; + + /* We also clear the cpu_vm_mask bits of CPUs going away */ + clear_tasks_mm_cpumask(cpu); +#endif + return 0; +} + +#endif /* CONFIG_SMP */ + +/* + * Initialize the context management stuff. + */ +void __init mmu_context_init(void) +{ + /* Mark init_mm as being active on all possible CPUs since + * we'll get called with prev == init_mm the first time + * we schedule on a given CPU + */ + init_mm.context.active = NR_CPUS; + + /* + * Allocate the maps used by context management + */ + context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); + if (!context_map) + panic("%s: Failed to allocate %zu bytes\n", __func__, + CTX_MAP_SIZE); + context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), + SMP_CACHE_BYTES); + if (!context_mm) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(void *) * (LAST_CONTEXT + 1)); +#ifdef CONFIG_SMP + stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); + if (!stale_map[boot_cpuid]) + panic("%s: Failed to allocate %zu bytes\n", __func__, + CTX_MAP_SIZE); + + cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, + "powerpc/mmu/ctx:prepare", + mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); +#endif + + printk(KERN_INFO + "MMU: Allocated %zu bytes of context maps for %d contexts\n", + 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)), + LAST_CONTEXT - FIRST_CONTEXT + 1); + + /* + * Some processors have too few contexts to reserve one for + * init_mm, and require using context 0 for a normal task. + * Other processors reserve the use of context zero for the kernel. + * This code assumes FIRST_CONTEXT < 32. + */ + context_map[0] = (1 << FIRST_CONTEXT) - 1; + next_context = FIRST_CONTEXT; + nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; +} diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c new file mode 100644 index 000000000000..704e613a0b14 --- /dev/null +++ b/arch/powerpc/mm/nohash/tlb.c @@ -0,0 +1,810 @@ +/* + * This file contains the routines for TLB flushing. + * On machines where the MMU does not use a hash table to store virtual to + * physical translations (ie, SW loaded TLBs or Book3E compilant processors, + * this does -not- include 603 however which shares the implementation with + * hash based processors) + * + * -- BenH + * + * Copyright 2008,2009 Ben Herrenschmidt + * IBM Corp. + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +/* + * This struct lists the sw-supported page sizes. The hardawre MMU may support + * other sizes not listed here. The .ind field is only used on MMUs that have + * indirect page table entries. + */ +#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx) +#ifdef CONFIG_PPC_FSL_BOOK3E +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + [MMU_PAGE_4K] = { + .shift = 12, + .enc = BOOK3E_PAGESZ_4K, + }, + [MMU_PAGE_2M] = { + .shift = 21, + .enc = BOOK3E_PAGESZ_2M, + }, + [MMU_PAGE_4M] = { + .shift = 22, + .enc = BOOK3E_PAGESZ_4M, + }, + [MMU_PAGE_16M] = { + .shift = 24, + .enc = BOOK3E_PAGESZ_16M, + }, + [MMU_PAGE_64M] = { + .shift = 26, + .enc = BOOK3E_PAGESZ_64M, + }, + [MMU_PAGE_256M] = { + .shift = 28, + .enc = BOOK3E_PAGESZ_256M, + }, + [MMU_PAGE_1G] = { + .shift = 30, + .enc = BOOK3E_PAGESZ_1GB, + }, +}; +#elif defined(CONFIG_PPC_8xx) +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + /* we only manage 4k and 16k pages as normal pages */ +#ifdef CONFIG_PPC_4K_PAGES + [MMU_PAGE_4K] = { + .shift = 12, + }, +#else + [MMU_PAGE_16K] = { + .shift = 14, + }, +#endif + [MMU_PAGE_512K] = { + .shift = 19, + }, + [MMU_PAGE_8M] = { + .shift = 23, + }, +}; +#else +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + [MMU_PAGE_4K] = { + .shift = 12, + .ind = 20, + .enc = BOOK3E_PAGESZ_4K, + }, + [MMU_PAGE_16K] = { + .shift = 14, + .enc = BOOK3E_PAGESZ_16K, + }, + [MMU_PAGE_64K] = { + .shift = 16, + .ind = 28, + .enc = BOOK3E_PAGESZ_64K, + }, + [MMU_PAGE_1M] = { + .shift = 20, + .enc = BOOK3E_PAGESZ_1M, + }, + [MMU_PAGE_16M] = { + .shift = 24, + .ind = 36, + .enc = BOOK3E_PAGESZ_16M, + }, + [MMU_PAGE_256M] = { + .shift = 28, + .enc = BOOK3E_PAGESZ_256M, + }, + [MMU_PAGE_1G] = { + .shift = 30, + .enc = BOOK3E_PAGESZ_1GB, + }, +}; +#endif /* CONFIG_FSL_BOOKE */ + +static inline int mmu_get_tsize(int psize) +{ + return mmu_psize_defs[psize].enc; +} +#else +static inline int mmu_get_tsize(int psize) +{ + /* This isn't used on !Book3E for now */ + return 0; +} +#endif /* CONFIG_PPC_BOOK3E_MMU */ + +/* The variables below are currently only used on 64-bit Book3E + * though this will probably be made common with other nohash + * implementations at some point + */ +#ifdef CONFIG_PPC64 + +int mmu_linear_psize; /* Page size used for the linear mapping */ +int mmu_pte_psize; /* Page size used for PTE pages */ +int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ +int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ +unsigned long linear_map_top; /* Top of linear mapping */ + + +/* + * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug + * exceptions. This is used for bolted and e6500 TLB miss handlers which + * do not modify this SPRG in the TLB miss code; for other TLB miss handlers, + * this is set to zero. + */ +int extlb_level_exc; + +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC_FSL_BOOK3E +/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */ +DEFINE_PER_CPU(int, next_tlbcam_idx); +EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx); +#endif + +/* + * Base TLB flushing operations: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes kernel pages + * + * - local_* variants of page and mm only apply to the current + * processor + */ + +/* + * These are the base non-SMP variants of page and mm flushing + */ +void local_flush_tlb_mm(struct mm_struct *mm) +{ + unsigned int pid; + + preempt_disable(); + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbil_pid(pid); + preempt_enable(); +} +EXPORT_SYMBOL(local_flush_tlb_mm); + +void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, + int tsize, int ind) +{ + unsigned int pid; + + preempt_disable(); + pid = mm ? mm->context.id : 0; + if (pid != MMU_NO_CONTEXT) + _tlbil_va(vmaddr, pid, tsize, ind); + preempt_enable(); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, + mmu_get_tsize(mmu_virtual_psize), 0); +} +EXPORT_SYMBOL(local_flush_tlb_page); + +/* + * And here are the SMP non-local implementations + */ +#ifdef CONFIG_SMP + +static DEFINE_RAW_SPINLOCK(tlbivax_lock); + +struct tlb_flush_param { + unsigned long addr; + unsigned int pid; + unsigned int tsize; + unsigned int ind; +}; + +static void do_flush_tlb_mm_ipi(void *param) +{ + struct tlb_flush_param *p = param; + + _tlbil_pid(p ? p->pid : 0); +} + +static void do_flush_tlb_page_ipi(void *param) +{ + struct tlb_flush_param *p = param; + + _tlbil_va(p->addr, p->pid, p->tsize, p->ind); +} + + +/* Note on invalidations and PID: + * + * We snapshot the PID with preempt disabled. At this point, it can still + * change either because: + * - our context is being stolen (PID -> NO_CONTEXT) on another CPU + * - we are invaliating some target that isn't currently running here + * and is concurrently acquiring a new PID on another CPU + * - some other CPU is re-acquiring a lost PID for this mm + * etc... + * + * However, this shouldn't be a problem as we only guarantee + * invalidation of TLB entries present prior to this call, so we + * don't care about the PID changing, and invalidating a stale PID + * is generally harmless. + */ + +void flush_tlb_mm(struct mm_struct *mm) +{ + unsigned int pid; + + preempt_disable(); + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + if (!mm_is_core_local(mm)) { + struct tlb_flush_param p = { .pid = pid }; + /* Ignores smp_processor_id() even if set. */ + smp_call_function_many(mm_cpumask(mm), + do_flush_tlb_mm_ipi, &p, 1); + } + _tlbil_pid(pid); + no_context: + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_mm); + +void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, + int tsize, int ind) +{ + struct cpumask *cpu_mask; + unsigned int pid; + + /* + * This function as well as __local_flush_tlb_page() must only be called + * for user contexts. + */ + if (WARN_ON(!mm)) + return; + + preempt_disable(); + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto bail; + cpu_mask = mm_cpumask(mm); + if (!mm_is_core_local(mm)) { + /* If broadcast tlbivax is supported, use it */ + if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { + int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); + if (lock) + raw_spin_lock(&tlbivax_lock); + _tlbivax_bcast(vmaddr, pid, tsize, ind); + if (lock) + raw_spin_unlock(&tlbivax_lock); + goto bail; + } else { + struct tlb_flush_param p = { + .pid = pid, + .addr = vmaddr, + .tsize = tsize, + .ind = ind, + }; + /* Ignores smp_processor_id() even if set in cpu_mask */ + smp_call_function_many(cpu_mask, + do_flush_tlb_page_ipi, &p, 1); + } + } + _tlbil_va(vmaddr, pid, tsize, ind); + bail: + preempt_enable(); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ +#ifdef CONFIG_HUGETLB_PAGE + if (vma && is_vm_hugetlb_page(vma)) + flush_hugetlb_page(vma, vmaddr); +#endif + + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, + mmu_get_tsize(mmu_virtual_psize), 0); +} +EXPORT_SYMBOL(flush_tlb_page); + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC_47x +void __init early_init_mmu_47x(void) +{ +#ifdef CONFIG_SMP + unsigned long root = of_get_flat_dt_root(); + if (of_get_flat_dt_prop(root, "cooperative-partition", NULL)) + mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST); +#endif /* CONFIG_SMP */ +} +#endif /* CONFIG_PPC_47x */ + +/* + * Flush kernel TLB entries in the given range + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ +#ifdef CONFIG_SMP + preempt_disable(); + smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); + _tlbil_pid(0); + preempt_enable(); +#else + _tlbil_pid(0); +#endif +} +EXPORT_SYMBOL(flush_tlb_kernel_range); + +/* + * Currently, for range flushing, we just do a full mm flush. This should + * be optimized based on a threshold on the size of the range, since + * some implementation can stack multiple tlbivax before a tlbsync but + * for now, we keep it that way + */ +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) + +{ + if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK)) + flush_tlb_page(vma, start); + else + flush_tlb_mm(vma->vm_mm); +} +EXPORT_SYMBOL(flush_tlb_range); + +void tlb_flush(struct mmu_gather *tlb) +{ + flush_tlb_mm(tlb->mm); +} + +/* + * Below are functions specific to the 64-bit variant of Book3E though that + * may change in the future + */ + +#ifdef CONFIG_PPC64 + +/* + * Handling of virtual linear page tables or indirect TLB entries + * flushing when PTE pages are freed + */ +void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) +{ + int tsize = mmu_psize_defs[mmu_pte_psize].enc; + + if (book3e_htw_mode != PPC_HTW_NONE) { + unsigned long start = address & PMD_MASK; + unsigned long end = address + PMD_SIZE; + unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; + + /* This isn't the most optimal, ideally we would factor out the + * while preempt & CPU mask mucking around, or even the IPI but + * it will do for now + */ + while (start < end) { + __flush_tlb_page(tlb->mm, start, tsize, 1); + start += size; + } + } else { + unsigned long rmask = 0xf000000000000000ul; + unsigned long rid = (address & rmask) | 0x1000000000000000ul; + unsigned long vpte = address & ~rmask; + +#ifdef CONFIG_PPC_64K_PAGES + vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; +#else + vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; +#endif + vpte |= rid; + __flush_tlb_page(tlb->mm, vpte, tsize, 0); + } +} + +static void setup_page_sizes(void) +{ + unsigned int tlb0cfg; + unsigned int tlb0ps; + unsigned int eptcfg; + int i, psize; + +#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned int mmucfg = mfspr(SPRN_MMUCFG); + int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E); + + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { + unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); + unsigned int min_pg, max_pg; + + min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; + max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def; + unsigned int shift; + + def = &mmu_psize_defs[psize]; + shift = def->shift; + + if (shift == 0 || shift & 1) + continue; + + /* adjust to be in terms of 4^shift Kb */ + shift = (shift - 10) >> 1; + + if ((shift >= min_pg) && (shift <= max_pg)) + def->flags |= MMU_PAGE_SIZE_DIRECT; + } + + goto out; + } + + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { + u32 tlb1cfg, tlb1ps; + + tlb0cfg = mfspr(SPRN_TLB0CFG); + tlb1cfg = mfspr(SPRN_TLB1CFG); + tlb1ps = mfspr(SPRN_TLB1PS); + eptcfg = mfspr(SPRN_EPTCFG); + + if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) + book3e_htw_mode = PPC_HTW_E6500; + + /* + * We expect 4K subpage size and unrestricted indirect size. + * The lack of a restriction on indirect size is a Freescale + * extension, indicated by PSn = 0 but SPSn != 0. + */ + if (eptcfg != 2) + book3e_htw_mode = PPC_HTW_NONE; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (!def->shift) + continue; + + if (tlb1ps & (1U << (def->shift - 10))) { + def->flags |= MMU_PAGE_SIZE_DIRECT; + + if (book3e_htw_mode && psize == MMU_PAGE_2M) + def->flags |= MMU_PAGE_SIZE_INDIRECT; + } + } + + goto out; + } +#endif + + tlb0cfg = mfspr(SPRN_TLB0CFG); + tlb0ps = mfspr(SPRN_TLB0PS); + eptcfg = mfspr(SPRN_EPTCFG); + + /* Look for supported direct sizes */ + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (tlb0ps & (1U << (def->shift - 10))) + def->flags |= MMU_PAGE_SIZE_DIRECT; + } + + /* Indirect page sizes supported ? */ + if ((tlb0cfg & TLBnCFG_IND) == 0 || + (tlb0cfg & TLBnCFG_PT) == 0) + goto out; + + book3e_htw_mode = PPC_HTW_IBM; + + /* Now, we only deal with one IND page size for each + * direct size. Hopefully all implementations today are + * unambiguous, but we might want to be careful in the + * future. + */ + for (i = 0; i < 3; i++) { + unsigned int ps, sps; + + sps = eptcfg & 0x1f; + eptcfg >>= 5; + ps = eptcfg & 0x1f; + eptcfg >>= 5; + if (!ps || !sps) + continue; + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (ps == (def->shift - 10)) + def->flags |= MMU_PAGE_SIZE_INDIRECT; + if (sps == (def->shift - 10)) + def->ind = ps + 10; + } + } + +out: + /* Cleanup array and print summary */ + pr_info("MMU: Supported page sizes\n"); + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + const char *__page_type_names[] = { + "unsupported", + "direct", + "indirect", + "direct & indirect" + }; + if (def->flags == 0) { + def->shift = 0; + continue; + } + pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), + __page_type_names[def->flags & 0x3]); + } +} + +static void setup_mmu_htw(void) +{ + /* + * If we want to use HW tablewalk, enable it by patching the TLB miss + * handlers to branch to the one dedicated to it. + */ + + switch (book3e_htw_mode) { + case PPC_HTW_IBM: + patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); + break; +#ifdef CONFIG_PPC_FSL_BOOK3E + case PPC_HTW_E6500: + extlb_level_exc = EX_TLB_SIZE; + patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); + break; +#endif + } + pr_info("MMU: Book3E HW tablewalk %s\n", + book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported"); +} + +/* + * Early initialization of the MMU TLB code + */ +static void early_init_this_mmu(void) +{ + unsigned int mas4; + + /* Set MAS4 based on page table setting */ + + mas4 = 0x4 << MAS4_WIMGED_SHIFT; + switch (book3e_htw_mode) { + case PPC_HTW_E6500: + mas4 |= MAS4_INDD; + mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT; + mas4 |= MAS4_TLBSELD(1); + mmu_pte_psize = MMU_PAGE_2M; + break; + + case PPC_HTW_IBM: + mas4 |= MAS4_INDD; +#ifdef CONFIG_PPC_64K_PAGES + mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; + mmu_pte_psize = MMU_PAGE_256M; +#else + mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; + mmu_pte_psize = MMU_PAGE_1M; +#endif + break; + + case PPC_HTW_NONE: +#ifdef CONFIG_PPC_64K_PAGES + mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; +#else + mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; +#endif + mmu_pte_psize = mmu_virtual_psize; + break; + } + mtspr(SPRN_MAS4, mas4); + +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + unsigned int num_cams; + int __maybe_unused cpu = smp_processor_id(); + bool map = true; + + /* use a quarter of the TLBCAM for bolted linear map */ + num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; + + /* + * Only do the mapping once per core, or else the + * transient mapping would cause problems. + */ +#ifdef CONFIG_SMP + if (hweight32(get_tensr()) > 1) + map = false; +#endif + + if (map) + linear_map_top = map_mem_in_cams(linear_map_top, + num_cams, false); + } +#endif + + /* A sync won't hurt us after mucking around with + * the MMU configuration + */ + mb(); +} + +static void __init early_init_mmu_global(void) +{ + /* XXX This will have to be decided at runtime, but right + * now our boot and TLB miss code hard wires it. Ideally + * we should find out a suitable page size and patch the + * TLB miss code (either that or use the PACA to store + * the value we want) + */ + mmu_linear_psize = MMU_PAGE_1G; + + /* XXX This should be decided at runtime based on supported + * page sizes in the TLB, but for now let's assume 16M is + * always there and a good fit (which it probably is) + * + * Freescale booke only supports 4K pages in TLB0, so use that. + */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + mmu_vmemmap_psize = MMU_PAGE_4K; + else + mmu_vmemmap_psize = MMU_PAGE_16M; + + /* XXX This code only checks for TLB 0 capabilities and doesn't + * check what page size combos are supported by the HW. It + * also doesn't handle the case where a separate array holds + * the IND entries from the array loaded by the PT. + */ + /* Look for supported page sizes */ + setup_page_sizes(); + + /* Look for HW tablewalk support */ + setup_mmu_htw(); + +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + if (book3e_htw_mode == PPC_HTW_NONE) { + extlb_level_exc = EX_TLB_SIZE; + patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); + patch_exception(0x1e0, + exc_instruction_tlb_miss_bolted_book3e); + } + } +#endif + + /* Set the global containing the top of the linear mapping + * for use by the TLB miss code + */ + linear_map_top = memblock_end_of_DRAM(); +} + +static void __init early_mmu_set_memory_limit(void) +{ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + /* + * Limit memory so we dont have linear faults. + * Unlike memblock_set_current_limit, which limits + * memory available during early boot, this permanently + * reduces the memory available to Linux. We need to + * do this because highmem is not supported on 64-bit. + */ + memblock_enforce_memory_limit(linear_map_top); + } +#endif + + memblock_set_current_limit(linear_map_top); +} + +/* boot cpu only */ +void __init early_init_mmu(void) +{ + early_init_mmu_global(); + early_init_this_mmu(); + early_mmu_set_memory_limit(); +} + +void early_init_mmu_secondary(void) +{ + early_init_this_mmu(); +} + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* On non-FSL Embedded 64-bit, we adjust the RMA size to match + * the bolted TLB entry. We know for now that only 1G + * entries are supported though that may eventually + * change. + * + * on FSL Embedded 64-bit, usually all RAM is bolted, but with + * unusual memory sizes it's possible for some RAM to not be mapped + * (such RAM is not used at all by Linux, since we don't support + * highmem on 64-bit). We limit ppc64_rma_size to what would be + * mappable if this memblock is the only one. Additional memblocks + * can only increase, not decrease, the amount that ends up getting + * mapped. We still limit max to 1G even if we'll eventually map + * more. This is due to what the early init code is set up to do. + * + * We crop it to the size of the first MEMBLOCK to + * avoid going over total available memory just in case... + */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + unsigned long linear_sz; + unsigned int num_cams; + + /* use a quarter of the TLBCAM for bolted linear map */ + num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; + + linear_sz = map_mem_in_cams(first_memblock_size, num_cams, + true); + + ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); + } else +#endif + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(first_memblock_base + ppc64_rma_size); +} +#else /* ! CONFIG_PPC64 */ +void __init early_init_mmu(void) +{ +#ifdef CONFIG_PPC_47x + early_init_mmu_47x(); +#endif + +#ifdef CONFIG_PPC_MM_SLICES +#if defined(CONFIG_PPC_8xx) + init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; +#endif +#endif +} +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S new file mode 100644 index 000000000000..e066a658acac --- /dev/null +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -0,0 +1,491 @@ +/* + * This file contains low-level functions for performing various + * types of TLB invalidations on various processors with no hash + * table. + * + * This file implements the following functions for all no-hash + * processors. Some aren't implemented for some variants. Some + * are inline in tlbflush.h + * + * - tlbil_va + * - tlbil_pid + * - tlbil_all + * - tlbivax_bcast + * + * Code mostly moved over from misc_32.S + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Partially rewritten by Cort Dougan (cort@cs.nmt.edu) + * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_40x) + +/* + * 40x implementation needs only tlbil_va + */ +_GLOBAL(__tlbil_va) + /* We run the search with interrupts disabled because we have to change + * the PID and I don't want to preempt when that happens. + */ + mfmsr r5 + mfspr r6,SPRN_PID + wrteei 0 + mtspr SPRN_PID,r4 + tlbsx. r3, 0, r3 + mtspr SPRN_PID,r6 + wrtee r5 + bne 1f + sync + /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is + * clear. Since 25 is the V bit in the TLB_TAG, loading this value + * will invalidate the TLB entry. */ + tlbwe r3, r3, TLB_TAG + isync +1: blr + +#elif defined(CONFIG_PPC_8xx) + +/* + * Nothing to do for 8xx, everything is inline + */ + +#elif defined(CONFIG_44x) /* Includes 47x */ + +/* + * 440 implementation uses tlbsx/we for tlbil_va and a full sweep + * of the TLB for everything else. + */ +_GLOBAL(__tlbil_va) + mfspr r5,SPRN_MMUCR + mfmsr r10 + + /* + * We write 16 bits of STID since 47x supports that much, we + * will never be passed out of bounds values on 440 (hopefully) + */ + rlwimi r5,r4,0,16,31 + + /* We have to run the search with interrupts disabled, otherwise + * an interrupt which causes a TLB miss can clobber the MMUCR + * between the mtspr and the tlbsx. + * + * Critical and Machine Check interrupts take care of saving + * and restoring MMUCR, so only normal interrupts have to be + * taken care of. + */ + wrteei 0 + mtspr SPRN_MMUCR,r5 + tlbsx. r6,0,r3 + bne 10f + sync +BEGIN_MMU_FTR_SECTION + b 2f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) + /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit + * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this + * value will invalidate the TLB entry. + */ + tlbwe r6,r6,PPC44x_TLB_PAGEID + isync +10: wrtee r10 + blr +2: +#ifdef CONFIG_PPC_47x + oris r7,r6,0x8000 /* specify way explicitly */ + clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ + ori r4,r4,PPC47x_TLBE_SIZE + tlbwe r4,r7,0 /* write it */ + isync + wrtee r10 + blr +#else /* CONFIG_PPC_47x */ +1: trap + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; +#endif /* !CONFIG_PPC_47x */ + +_GLOBAL(_tlbil_all) +_GLOBAL(_tlbil_pid) +BEGIN_MMU_FTR_SECTION + b 2f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) + li r3,0 + sync + + /* Load high watermark */ + lis r4,tlb_44x_hwater@ha + lwz r5,tlb_44x_hwater@l(r4) + +1: tlbwe r3,r3,PPC44x_TLB_PAGEID + addi r3,r3,1 + cmpw 0,r3,r5 + ble 1b + + isync + blr +2: +#ifdef CONFIG_PPC_47x + /* 476 variant. There's not simple way to do this, hopefully we'll + * try to limit the amount of such full invalidates + */ + mfmsr r11 /* Interrupts off */ + wrteei 0 + li r3,-1 /* Current set */ + lis r10,tlb_47x_boltmap@h + ori r10,r10,tlb_47x_boltmap@l + lis r7,0x8000 /* Specify way explicitly */ + + b 9f /* For each set */ + +1: li r9,4 /* Number of ways */ + li r4,0 /* Current way */ + li r6,0 /* Default entry value 0 */ + andi. r0,r8,1 /* Check if way 0 is bolted */ + mtctr r9 /* Load way counter */ + bne- 3f /* Bolted, skip loading it */ + +2: /* For each way */ + or r5,r3,r4 /* Make way|index for tlbre */ + rlwimi r5,r5,16,8,15 /* Copy index into position */ + tlbre r6,r5,0 /* Read entry */ +3: addis r4,r4,0x2000 /* Next way */ + andi. r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */ + beq 4f /* Nope, skip it */ + rlwimi r7,r5,0,1,2 /* Insert way number */ + rlwinm r6,r6,0,21,19 /* Clear V */ + tlbwe r6,r7,0 /* Write it */ +4: bdnz 2b /* Loop for each way */ + srwi r8,r8,1 /* Next boltmap bit */ +9: cmpwi cr1,r3,255 /* Last set done ? */ + addi r3,r3,1 /* Next set */ + beq cr1,1f /* End of loop */ + andi. r0,r3,0x1f /* Need to load a new boltmap word ? */ + bne 1b /* No, loop */ + lwz r8,0(r10) /* Load boltmap entry */ + addi r10,r10,4 /* Next word */ + b 1b /* Then loop */ +1: isync /* Sync shadows */ + wrtee r11 +#else /* CONFIG_PPC_47x */ +1: trap + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; +#endif /* !CONFIG_PPC_47x */ + blr + +#ifdef CONFIG_PPC_47x + +/* + * _tlbivax_bcast is only on 47x. We don't bother doing a runtime + * check though, it will blow up soon enough if we mistakenly try + * to use it on a 440. + */ +_GLOBAL(_tlbivax_bcast) + mfspr r5,SPRN_MMUCR + mfmsr r10 + rlwimi r5,r4,0,16,31 + wrteei 0 + mtspr SPRN_MMUCR,r5 + isync + PPC_TLBIVAX(0, R3) + isync + eieio + tlbsync +BEGIN_FTR_SECTION + b 1f +END_FTR_SECTION_IFSET(CPU_FTR_476_DD2) + sync + wrtee r10 + blr +/* + * DD2 HW could hang if in instruction fetch happens before msync completes. + * Touch enough instruction cache lines to ensure cache hits + */ +1: mflr r9 + bl 2f +2: mflr r6 + li r7,32 + PPC_ICBT(0,R6,R7) /* touch next cache line */ + add r6,r6,r7 + PPC_ICBT(0,R6,R7) /* touch next cache line */ + add r6,r6,r7 + PPC_ICBT(0,R6,R7) /* touch next cache line */ + sync + nop + nop + nop + nop + nop + nop + nop + nop + mtlr r9 + wrtee r10 + blr +#endif /* CONFIG_PPC_47x */ + +#elif defined(CONFIG_FSL_BOOKE) +/* + * FSL BookE implementations. + * + * Since feature sections are using _SECTION_ELSE we need + * to have the larger code path before the _SECTION_ELSE + */ + +/* + * Flush MMU TLB on the local processor + */ +_GLOBAL(_tlbil_all) +BEGIN_MMU_FTR_SECTION + li r3,(MMUCSR0_TLBFI)@l + mtspr SPRN_MMUCSR0, r3 +1: + mfspr r3,SPRN_MMUCSR0 + andi. r3,r3,MMUCSR0_TLBFI@l + bne 1b +MMU_FTR_SECTION_ELSE + PPC_TLBILX_ALL(0,R0) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) + msync + isync + blr + +_GLOBAL(_tlbil_pid) +BEGIN_MMU_FTR_SECTION + slwi r3,r3,16 + mfmsr r10 + wrteei 0 + mfspr r4,SPRN_MAS6 /* save MAS6 */ + mtspr SPRN_MAS6,r3 + PPC_TLBILX_PID(0,R0) + mtspr SPRN_MAS6,r4 /* restore MAS6 */ + wrtee r10 +MMU_FTR_SECTION_ELSE + li r3,(MMUCSR0_TLBFI)@l + mtspr SPRN_MMUCSR0, r3 +1: + mfspr r3,SPRN_MMUCSR0 + andi. r3,r3,MMUCSR0_TLBFI@l + bne 1b +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX) + msync + isync + blr + +/* + * Flush MMU TLB for a particular address, but only on the local processor + * (no broadcast) + */ +_GLOBAL(__tlbil_va) + mfmsr r10 + wrteei 0 + slwi r4,r4,16 + ori r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l + mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ +BEGIN_MMU_FTR_SECTION + tlbsx 0,r3 + mfspr r4,SPRN_MAS1 /* check valid */ + andis. r3,r4,MAS1_VALID@h + beq 1f + rlwinm r4,r4,0,1,31 + mtspr SPRN_MAS1,r4 + tlbwe +MMU_FTR_SECTION_ELSE + PPC_TLBILX_VA(0,R3) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) + msync + isync +1: wrtee r10 + blr +#elif defined(CONFIG_PPC_BOOK3E) +/* + * New Book3E (>= 2.06) implementation + * + * Note: We may be able to get away without the interrupt masking stuff + * if we save/restore MAS6 on exceptions that might modify it + */ +_GLOBAL(_tlbil_pid) + slwi r4,r3,MAS6_SPID_SHIFT + mfmsr r10 + wrteei 0 + mtspr SPRN_MAS6,r4 + PPC_TLBILX_PID(0,R0) + wrtee r10 + msync + isync + blr + +_GLOBAL(_tlbil_pid_noind) + slwi r4,r3,MAS6_SPID_SHIFT + mfmsr r10 + ori r4,r4,MAS6_SIND + wrteei 0 + mtspr SPRN_MAS6,r4 + PPC_TLBILX_PID(0,R0) + wrtee r10 + msync + isync + blr + +_GLOBAL(_tlbil_all) + PPC_TLBILX_ALL(0,R0) + msync + isync + blr + +_GLOBAL(_tlbil_va) + mfmsr r10 + wrteei 0 + cmpwi cr0,r6,0 + slwi r4,r4,MAS6_SPID_SHIFT + rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK + beq 1f + rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND +1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ + PPC_TLBILX_VA(0,R3) + msync + isync + wrtee r10 + blr + +_GLOBAL(_tlbivax_bcast) + mfmsr r10 + wrteei 0 + cmpwi cr0,r6,0 + slwi r4,r4,MAS6_SPID_SHIFT + rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK + beq 1f + rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND +1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ + PPC_TLBIVAX(0,R3) + eieio + tlbsync + sync + wrtee r10 + blr + +_GLOBAL(set_context) +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r4, 0x4(r5) +#endif + mtspr SPRN_PID,r3 + isync /* Force context change */ + blr +#else +#error Unsupported processor type ! +#endif + +#if defined(CONFIG_PPC_FSL_BOOK3E) +/* + * extern void loadcam_entry(unsigned int index) + * + * Load TLBCAM[index] entry in to the L2 CAM MMU + * Must preserve r7, r8, r9, and r10 + */ +_GLOBAL(loadcam_entry) + mflr r5 + LOAD_REG_ADDR_PIC(r4, TLBCAM) + mtlr r5 + mulli r5,r3,TLBCAM_SIZE + add r3,r5,r4 + lwz r4,TLBCAM_MAS0(r3) + mtspr SPRN_MAS0,r4 + lwz r4,TLBCAM_MAS1(r3) + mtspr SPRN_MAS1,r4 + PPC_LL r4,TLBCAM_MAS2(r3) + mtspr SPRN_MAS2,r4 + lwz r4,TLBCAM_MAS3(r3) + mtspr SPRN_MAS3,r4 +BEGIN_MMU_FTR_SECTION + lwz r4,TLBCAM_MAS7(r3) + mtspr SPRN_MAS7,r4 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) + isync + tlbwe + isync + blr + +/* + * Load multiple TLB entries at once, using an alternate-space + * trampoline so that we don't have to care about whether the same + * TLB entry maps us before and after. + * + * r3 = first entry to write + * r4 = number of entries to write + * r5 = temporary tlb entry + */ +_GLOBAL(loadcam_multi) + mflr r8 + + /* + * Set up temporary TLB entry that is the same as what we're + * running from, but in AS=1. + */ + bl 1f +1: mflr r6 + tlbsx 0,r8 + mfspr r6,SPRN_MAS1 + ori r6,r6,MAS1_TS + mtspr SPRN_MAS1,r6 + mfspr r6,SPRN_MAS0 + rlwimi r6,r5,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK + mr r7,r5 + mtspr SPRN_MAS0,r6 + isync + tlbwe + isync + + /* Switch to AS=1 */ + mfmsr r6 + ori r6,r6,MSR_IS|MSR_DS + mtmsr r6 + isync + + mr r9,r3 + add r10,r3,r4 +2: bl loadcam_entry + addi r9,r9,1 + cmpw r9,r10 + mr r3,r9 + blt 2b + + /* Return to AS=0 and clear the temporary entry */ + mfmsr r6 + rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) + mtmsr r6 + isync + + li r6,0 + mtspr SPRN_MAS1,r6 + rlwinm r6,r7,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK + oris r6,r6,MAS0_TLBSEL(1)@h + mtspr SPRN_MAS0,r6 + isync + tlbwe + isync + + mtlr r8 + blr +#endif diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S new file mode 100644 index 000000000000..9ed90064f542 --- /dev/null +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -0,0 +1,1280 @@ +/* + * Low level TLB miss handlers for Book3E + * + * Copyright (C) 2008-2009 + * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PPC_64K_PAGES +#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) +#else +#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE) +#endif +#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE) +#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) +#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) + +/********************************************************************** + * * + * TLB miss handling for Book3E with a bolted linear mapping * + * No virtual page table, no nested TLB misses * + * * + **********************************************************************/ + +/* + * Note that, unlike non-bolted handlers, TLB_EXFRAME is not + * modified by the TLB miss handlers themselves, since the TLB miss + * handler code will not itself cause a recursive TLB miss. + * + * TLB_EXFRAME will be modified when crit/mc/debug exceptions are + * entered/exited. + */ +.macro tlb_prolog_bolted intnum addr + mtspr SPRN_SPRG_GEN_SCRATCH,r12 + mfspr r12,SPRN_SPRG_TLB_EXFRAME + std r13,EX_TLB_R13(r12) + std r10,EX_TLB_R10(r12) + mfspr r13,SPRN_SPRG_PACA + + mfcr r10 + std r11,EX_TLB_R11(r12) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif + DO_KVM \intnum, SPRN_SRR1 + std r16,EX_TLB_R16(r12) + mfspr r16,\addr /* get faulting address */ + std r14,EX_TLB_R14(r12) + ld r14,PACAPGD(r13) + std r15,EX_TLB_R15(r12) + std r10,EX_TLB_CR(r12) +#ifdef CONFIG_PPC_FSL_BOOK3E +START_BTB_FLUSH_SECTION + mfspr r11, SPRN_SRR1 + andi. r10,r11,MSR_PR + beq 1f + BTB_FLUSH(r10) +1: +END_BTB_FLUSH_SECTION + std r7,EX_TLB_R7(r12) +#endif + TLB_MISS_PROLOG_STATS +.endm + +.macro tlb_epilog_bolted + ld r14,EX_TLB_CR(r12) +#ifdef CONFIG_PPC_FSL_BOOK3E + ld r7,EX_TLB_R7(r12) +#endif + ld r10,EX_TLB_R10(r12) + ld r11,EX_TLB_R11(r12) + ld r13,EX_TLB_R13(r12) + mtcr r14 + ld r14,EX_TLB_R14(r12) + ld r15,EX_TLB_R15(r12) + TLB_MISS_RESTORE_STATS + ld r16,EX_TLB_R16(r12) + mfspr r12,SPRN_SPRG_GEN_SCRATCH +.endm + +/* Data TLB miss */ + START_EXCEPTION(data_tlb_miss_bolted) + tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR + + /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + /* We pre-test some combination of permissions to avoid double + * faults: + * + * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE + * ESR_ST is 0x00800000 + * _PAGE_BAP_SW is 0x00000010 + * So the shift is >> 19. This tests for supervisor writeability. + * If the page happens to be supervisor writeable and not user + * writeable, we will take a new fault later, but that should be + * a rare enough case. + * + * We also move ESR_ST in _PAGE_DIRTY position + * _PAGE_DIRTY is 0x00001000 so the shift is >> 11 + * + * MAS1 is preset for all we need except for TID that needs to + * be cleared for kernel translations + */ + + mfspr r11,SPRN_ESR + + srdi r15,r16,60 /* get region */ + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + bne- dtlb_miss_fault_bolted /* Bail if fault addr is invalid */ + + rlwinm r10,r11,32-19,27,27 + rlwimi r10,r11,32-16,19,19 + cmpwi r15,0 /* user vs kernel check */ + ori r10,r10,_PAGE_PRESENT + oris r11,r10,_PAGE_ACCESSED@h + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_bolted + +tlb_miss_common_bolted: +/* + * This is the guts of the TLB miss handler for bolted-linear. + * We are entered with: + * + * r16 = faulting address + * r15 = crap (free to use) + * r14 = page table base + * r13 = PACA + * r11 = PTE permission mask + * r10 = crap (free to use) + */ + rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 + cmpldi cr0,r14,0 + clrrdi r15,r15,3 + beq tlb_miss_fault_bolted /* No PGDIR, bail */ + +BEGIN_MMU_FTR_SECTION + /* Set the TLB reservation and search for existing entry. Then load + * the entry. + */ + PPC_TLBSRX_DOT(0,R16) + ldx r14,r14,r15 /* grab pgd entry */ + beq tlb_miss_done_bolted /* tlb exists already, bail */ +MMU_FTR_SECTION_ELSE + ldx r14,r14,r15 /* grab pgd entry */ +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) + +#ifndef CONFIG_PPC_64K_PAGES + rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */ + ldx r14,r14,r15 /* grab pud entry */ +#endif /* CONFIG_PPC_64K_PAGES */ + + rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_fault_bolted + ldx r14,r14,r15 /* Grab pmd entry */ + + rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_fault_bolted + ldx r14,r14,r15 /* Grab PTE, normal (!huge) page */ + + /* Check if required permissions are met */ + andc. r15,r11,r14 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + bne- tlb_miss_fault_bolted + + /* Now we build the MAS: + * + * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG + * MAS 1 : Almost fully setup + * - PID already updated by caller if necessary + * - TSIZE need change if !base page size, not + * yet implemented for now + * MAS 2 : Defaults not useful, need to be redone + * MAS 3+7 : Needs to be done + */ + clrrdi r11,r16,12 /* Clear low crap in EA */ + clrldi r15,r15,12 /* Clear crap at the top */ + rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r11 + andi. r11,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r11,MAS3_SW|MAS3_UW + andc r15,r15,r11 +1: + mtspr SPRN_MAS7_MAS3,r15 + tlbwe + +tlb_miss_done_bolted: + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) + tlb_epilog_bolted + rfi + +itlb_miss_kernel_bolted: + li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ + oris r11,r11,_PAGE_ACCESSED@h +tlb_miss_kernel_bolted: + mfspr r10,SPRN_MAS1 + ld r14,PACA_KERNELPGD(r13) + cmpldi cr0,r15,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + beq+ tlb_miss_common_bolted + +tlb_miss_fault_bolted: + /* We need to check if it was an instruction miss */ + andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX + bne itlb_miss_fault_bolted +dtlb_miss_fault_bolted: + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_data_storage_book3e +itlb_miss_fault_bolted: + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_instruction_storage_book3e + +/* Instruction TLB miss */ + START_EXCEPTION(instruction_tlb_miss_bolted) + tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 + + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + srdi r15,r16,60 /* get region */ + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne- itlb_miss_fault_bolted + + li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + + cmpldi cr0,r15,0 /* Check for user region */ + oris r11,r11,_PAGE_ACCESSED@h + beq tlb_miss_common_bolted + b itlb_miss_kernel_bolted + +#ifdef CONFIG_PPC_FSL_BOOK3E +/* + * TLB miss handling for e6500 and derivatives, using hardware tablewalk. + * + * Linear mapping is bolted: no virtual page table or nested TLB misses + * Indirect entries in TLB1, hardware loads resulting direct entries + * into TLB0 + * No HES or NV hint on TLB1, so we need to do software round-robin + * No tlbsrx. so we need a spinlock, and we have to deal + * with MAS-damage caused by tlbsx + * 4K pages only + */ + + START_EXCEPTION(instruction_tlb_miss_e6500) + tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 + + ld r11,PACA_TCD_PTR(r13) + srdi. r15,r16,60 /* get region */ + ori r16,r16,1 + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_e6500 /* user/kernel test */ + + b tlb_miss_common_e6500 + + START_EXCEPTION(data_tlb_miss_e6500) + tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR + + ld r11,PACA_TCD_PTR(r13) + srdi. r15,r16,60 /* get region */ + rldicr r16,r16,0,62 + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_e6500 /* user vs kernel check */ + +/* + * This is the guts of the TLB miss handler for e6500 and derivatives. + * We are entered with: + * + * r16 = page of faulting address (low bit 0 if data, 1 if instruction) + * r15 = crap (free to use) + * r14 = page table base + * r13 = PACA + * r11 = tlb_per_core ptr + * r10 = crap (free to use) + * r7 = esel_next + */ +tlb_miss_common_e6500: + crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */ + +BEGIN_FTR_SECTION /* CPU_FTR_SMT */ + /* + * Search if we already have an indirect entry for that virtual + * address, and if we do, bail out. + * + * MAS6:IND should be already set based on MAS4 + */ + lhz r10,PACAPACAINDEX(r13) + addi r10,r10,1 + crclr cr1*4+eq /* set cr1.eq = 0 for non-recursive */ +1: lbarx r15,0,r11 + cmpdi r15,0 + bne 2f + stbcx. r10,0,r11 + bne 1b +3: + .subsection 1 +2: cmpd cr1,r15,r10 /* recursive lock due to mcheck/crit/etc? */ + beq cr1,3b /* unlock will happen if cr1.eq = 0 */ +10: lbz r15,0(r11) + cmpdi r15,0 + bne 10b + b 1b + .previous +END_FTR_SECTION_IFSET(CPU_FTR_SMT) + + lbz r7,TCD_ESEL_NEXT(r11) + +BEGIN_FTR_SECTION /* CPU_FTR_SMT */ + /* + * Erratum A-008139 says that we can't use tlbwe to change + * an indirect entry in any way (including replacing or + * invalidating) if the other thread could be in the process + * of a lookup. The workaround is to invalidate the entry + * with tlbilx before overwriting. + */ + + rlwinm r10,r7,16,0xff0000 + oris r10,r10,MAS0_TLBSEL(1)@h + mtspr SPRN_MAS0,r10 + isync + tlbre + mfspr r15,SPRN_MAS1 + andis. r15,r15,MAS1_VALID@h + beq 5f + +BEGIN_FTR_SECTION_NESTED(532) + mfspr r10,SPRN_MAS8 + rlwinm r10,r10,0,0x80000fff /* tgs,tlpid -> sgs,slpid */ + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) + + mfspr r10,SPRN_MAS1 + rlwinm r15,r10,0,0x3fff0000 /* tid -> spid */ + rlwimi r15,r10,20,0x00000003 /* ind,ts -> sind,sas */ + mfspr r10,SPRN_MAS6 + mtspr SPRN_MAS6,r15 + + mfspr r15,SPRN_MAS2 + isync + tlbilxva 0,r15 + isync + + mtspr SPRN_MAS6,r10 + +5: +BEGIN_FTR_SECTION_NESTED(532) + li r10,0 + mtspr SPRN_MAS8,r10 + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) + + tlbsx 0,r16 + mfspr r10,SPRN_MAS1 + andis. r15,r10,MAS1_VALID@h + bne tlb_miss_done_e6500 +FTR_SECTION_ELSE + mfspr r10,SPRN_MAS1 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) + + oris r10,r10,MAS1_VALID@h + beq cr2,4f + rlwinm r10,r10,0,16,1 /* Clear TID */ +4: mtspr SPRN_MAS1,r10 + + /* Now, we need to walk the page tables. First check if we are in + * range. + */ + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + bne- tlb_miss_fault_e6500 + + rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 + cmpldi cr0,r14,0 + clrrdi r15,r15,3 + beq- tlb_miss_fault_e6500 /* No PGDIR, bail */ + ldx r14,r14,r15 /* grab pgd entry */ + + rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */ + ldx r14,r14,r15 /* grab pud entry */ + + rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_huge_e6500 + ldx r14,r14,r15 /* Grab pmd entry */ + + mfspr r10,SPRN_MAS0 + cmpdi cr0,r14,0 + bge tlb_miss_huge_e6500 + + /* Now we build the MAS for a 2M indirect page: + * + * MAS 0 : ESEL needs to be filled by software round-robin + * MAS 1 : Fully set up + * - PID already updated by caller if necessary + * - TSIZE for now is base ind page size always + * - TID already cleared if necessary + * MAS 2 : Default not 2M-aligned, need to be redone + * MAS 3+7 : Needs to be done + */ + + ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) + mtspr SPRN_MAS7_MAS3,r14 + + clrrdi r15,r16,21 /* make EA 2M-aligned */ + mtspr SPRN_MAS2,r15 + +tlb_miss_huge_done_e6500: + lbz r16,TCD_ESEL_MAX(r11) + lbz r14,TCD_ESEL_FIRST(r11) + rlwimi r10,r7,16,0x00ff0000 /* insert esel_next into MAS0 */ + addi r7,r7,1 /* increment esel_next */ + mtspr SPRN_MAS0,r10 + cmpw r7,r16 + iseleq r7,r14,r7 /* if next == last use first */ + stb r7,TCD_ESEL_NEXT(r11) + + tlbwe + +tlb_miss_done_e6500: + .macro tlb_unlock_e6500 +BEGIN_FTR_SECTION + beq cr1,1f /* no unlock if lock was recursively grabbed */ + li r15,0 + isync + stb r15,0(r11) +1: +END_FTR_SECTION_IFSET(CPU_FTR_SMT) + .endm + + tlb_unlock_e6500 + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) + tlb_epilog_bolted + rfi + +tlb_miss_huge_e6500: + beq tlb_miss_fault_e6500 + li r10,1 + andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ + rldimi r14,r10,63,0 /* Set PD_HUGE */ + xor r14,r14,r15 /* Clear size bits */ + ldx r14,0,r14 + + /* + * Now we build the MAS for a huge page. + * + * MAS 0 : ESEL needs to be filled by software round-robin + * - can be handled by indirect code + * MAS 1 : Need to clear IND and set TSIZE + * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler + */ + + subi r15,r15,10 /* Convert psize to tsize */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,~MAS1_IND + rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK + mtspr SPRN_MAS1,r10 + + li r10,-0x400 + sld r15,r10,r15 /* Generate mask based on size */ + and r10,r16,r15 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r10 + andi. r10,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r10,MAS3_SW|MAS3_UW + andc r15,r15,r10 +1: + mtspr SPRN_MAS7_MAS3,r15 + + mfspr r10,SPRN_MAS0 + b tlb_miss_huge_done_e6500 + +tlb_miss_kernel_e6500: + ld r14,PACA_KERNELPGD(r13) + cmpldi cr1,r15,8 /* Check for vmalloc region */ + beq+ cr1,tlb_miss_common_e6500 + +tlb_miss_fault_e6500: + tlb_unlock_e6500 + /* We need to check if it was an instruction miss */ + andi. r16,r16,1 + bne itlb_miss_fault_e6500 +dtlb_miss_fault_e6500: + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_data_storage_book3e +itlb_miss_fault_e6500: + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_instruction_storage_book3e +#endif /* CONFIG_PPC_FSL_BOOK3E */ + +/********************************************************************** + * * + * TLB miss handling for Book3E with TLB reservation and HES support * + * * + **********************************************************************/ + + +/* Data TLB miss */ + START_EXCEPTION(data_tlb_miss) + TLB_MISS_PROLOG + + /* Now we handle the fault proper. We only save DEAR in normal + * fault case since that's the only interesting values here. + * We could probably also optimize by not saving SRR0/1 in the + * linear mapping case but I'll leave that for later + */ + mfspr r14,SPRN_ESR + mfspr r16,SPRN_DEAR /* get faulting address */ + srdi r15,r16,60 /* get region */ + cmpldi cr0,r15,0xc /* linear mapping ? */ + TLB_MISS_STATS_SAVE_INFO + beq tlb_load_linear /* yes -> go to linear map load */ + + /* The page tables are mapped virtually linear. At this point, though, + * we don't know whether we are trying to fault in a first level + * virtual address or a virtual page table address. We can get that + * from bit 0x1 of the region ID which we have set for a page table + */ + andi. r10,r15,0x1 + bne- virt_page_table_tlb_miss + + std r14,EX_TLB_ESR(r12); /* save ESR */ + std r16,EX_TLB_DEAR(r12); /* save DEAR */ + + /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ + li r11,_PAGE_PRESENT + oris r11,r11,_PAGE_ACCESSED@h + + /* We do the user/kernel test for the PID here along with the RW test + */ + cmpldi cr0,r15,0 /* Check for user region */ + + /* We pre-test some combination of permissions to avoid double + * faults: + * + * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE + * ESR_ST is 0x00800000 + * _PAGE_BAP_SW is 0x00000010 + * So the shift is >> 19. This tests for supervisor writeability. + * If the page happens to be supervisor writeable and not user + * writeable, we will take a new fault later, but that should be + * a rare enough case. + * + * We also move ESR_ST in _PAGE_DIRTY position + * _PAGE_DIRTY is 0x00001000 so the shift is >> 11 + * + * MAS1 is preset for all we need except for TID that needs to + * be cleared for kernel translations + */ + rlwimi r11,r14,32-19,27,27 + rlwimi r11,r14,32-16,19,19 + beq normal_tlb_miss + /* XXX replace the RMW cycles with immediate loads + writes */ +1: mfspr r10,SPRN_MAS1 + cmpldi cr0,r15,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + beq+ normal_tlb_miss + + /* We got a crappy address, just fault with whatever DEAR and ESR + * are here + */ + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_data_storage_book3e + +/* Instruction TLB miss */ + START_EXCEPTION(instruction_tlb_miss) + TLB_MISS_PROLOG + + /* If we take a recursive fault, the second level handler may need + * to know whether we are handling a data or instruction fault in + * order to get to the right store fault handler. We provide that + * info by writing a crazy value in ESR in our exception frame + */ + li r14,-1 /* store to exception frame is done later */ + + /* Now we handle the fault proper. We only save DEAR in the non + * linear mapping case since we know the linear mapping case will + * not re-enter. We could indeed optimize and also not save SRR0/1 + * in the linear mapping case but I'll leave that for later + * + * Faulting address is SRR0 which is already in r16 + */ + srdi r15,r16,60 /* get region */ + cmpldi cr0,r15,0xc /* linear mapping ? */ + TLB_MISS_STATS_SAVE_INFO + beq tlb_load_linear /* yes -> go to linear map load */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ + oris r11,r11,_PAGE_ACCESSED@h + + cmpldi cr0,r15,0 /* Check for user region */ + std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ + beq normal_tlb_miss + + li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ + oris r11,r11,_PAGE_ACCESSED@h + /* XXX replace the RMW cycles with immediate loads + writes */ + mfspr r10,SPRN_MAS1 + cmpldi cr0,r15,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + beq+ normal_tlb_miss + + /* We got a crappy address, just fault */ + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_instruction_storage_book3e + +/* + * This is the guts of the first-level TLB miss handler for direct + * misses. We are entered with: + * + * r16 = faulting address + * r15 = region ID + * r14 = crap (free to use) + * r13 = PACA + * r12 = TLB exception frame in PACA + * r11 = PTE permission mask + * r10 = crap (free to use) + */ +normal_tlb_miss: + /* So we first construct the page table address. We do that by + * shifting the bottom of the address (not the region ID) by + * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and + * or'ing the fourth high bit. + * + * NOTE: For 64K pages, we do things slightly differently in + * order to handle the weird page table format used by linux + */ + ori r10,r15,0x1 +#ifdef CONFIG_PPC_64K_PAGES + /* For the top bits, 16 bytes per PTE */ + rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4 + /* Now create the bottom bits as 0 in position 0x8000 and + * the rest calculated for 8 bytes per PTE + */ + rldicl r15,r16,64-(PAGE_SHIFT-3),64-15 + /* Insert the bottom bits in */ + rlwimi r14,r15,0,16,31 +#else + rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 +#endif + sldi r15,r10,60 + clrrdi r14,r14,3 + or r10,r15,r14 + +BEGIN_MMU_FTR_SECTION + /* Set the TLB reservation and search for existing entry. Then load + * the entry. + */ + PPC_TLBSRX_DOT(0,R16) + ld r14,0(r10) + beq normal_tlb_miss_done +MMU_FTR_SECTION_ELSE + ld r14,0(r10) +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) + +finish_normal_tlb_miss: + /* Check if required permissions are met */ + andc. r15,r11,r14 + bne- normal_tlb_miss_access_fault + + /* Now we build the MAS: + * + * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG + * MAS 1 : Almost fully setup + * - PID already updated by caller if necessary + * - TSIZE need change if !base page size, not + * yet implemented for now + * MAS 2 : Defaults not useful, need to be redone + * MAS 3+7 : Needs to be done + * + * TODO: mix up code below for better scheduling + */ + clrrdi r11,r16,12 /* Clear low crap in EA */ + rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ + mtspr SPRN_MAS2,r11 + + /* Check page size, if not standard, update MAS1 */ + rldicl r11,r14,64-8,64-8 +#ifdef CONFIG_PPC_64K_PAGES + cmpldi cr0,r11,BOOK3E_PAGESZ_64K +#else + cmpldi cr0,r11,BOOK3E_PAGESZ_4K +#endif + beq- 1f + mfspr r11,SPRN_MAS1 + rlwimi r11,r14,31,21,24 + rlwinm r11,r11,0,21,19 + mtspr SPRN_MAS1,r11 +1: + /* Move RPN in position */ + rldicr r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + clrldi r15,r11,12 /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + andi. r11,r14,_PAGE_DIRTY + bne 1f + li r11,MAS3_SW|MAS3_UW + andc r15,r15,r11 +1: +BEGIN_MMU_FTR_SECTION + srdi r16,r15,32 + mtspr SPRN_MAS3,r15 + mtspr SPRN_MAS7,r16 +MMU_FTR_SECTION_ELSE + mtspr SPRN_MAS7_MAS3,r15 +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) + + tlbwe + +normal_tlb_miss_done: + /* We don't bother with restoring DEAR or ESR since we know we are + * level 0 and just going back to userland. They are only needed + * if you are going to take an access fault + */ + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) + TLB_MISS_EPILOG_SUCCESS + rfi + +normal_tlb_miss_access_fault: + /* We need to check if it was an instruction miss */ + andi. r10,r11,_PAGE_EXEC + bne 1f + ld r14,EX_TLB_DEAR(r12) + ld r15,EX_TLB_ESR(r12) + mtspr SPRN_DEAR,r14 + mtspr SPRN_ESR,r15 + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_data_storage_book3e +1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_instruction_storage_book3e + + +/* + * This is the guts of the second-level TLB miss handler for direct + * misses. We are entered with: + * + * r16 = virtual page table faulting address + * r15 = region (top 4 bits of address) + * r14 = crap (free to use) + * r13 = PACA + * r12 = TLB exception frame in PACA + * r11 = crap (free to use) + * r10 = crap (free to use) + * + * Note that this should only ever be called as a second level handler + * with the current scheme when using SW load. + * That means we can always get the original fault DEAR at + * EX_TLB_DEAR-EX_TLB_SIZE(r12) + * + * It can be re-entered by the linear mapping miss handler. However, to + * avoid too much complication, it will restart the whole fault at level + * 0 so we don't care too much about clobbers + * + * XXX That code was written back when we couldn't clobber r14. We can now, + * so we could probably optimize things a bit + */ +virt_page_table_tlb_miss: + /* Are we hitting a kernel page table ? */ + andi. r10,r15,0x8 + + /* The cool thing now is that r10 contains 0 for user and 8 for kernel, + * and we happen to have the swapper_pg_dir at offset 8 from the user + * pgdir in the PACA :-). + */ + add r11,r10,r13 + + /* If kernel, we need to clear MAS1 TID */ + beq 1f + /* XXX replace the RMW cycles with immediate loads + writes */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 +1: +BEGIN_MMU_FTR_SECTION + /* Search if we already have a TLB entry for that virtual address, and + * if we do, bail out. + */ + PPC_TLBSRX_DOT(0,R16) + beq virt_page_table_tlb_miss_done +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) + + /* Now, we need to walk the page tables. First check if we are in + * range. + */ + rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 + bne- virt_page_table_tlb_miss_fault + + /* Get the PGD pointer */ + ld r15,PACAPGD(r11) + cmpldi cr0,r15,0 + beq- virt_page_table_tlb_miss_fault + + /* Get to PGD entry */ + rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 + clrrdi r10,r11,3 + ldx r15,r10,r15 + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault + +#ifndef CONFIG_PPC_64K_PAGES + /* Get to PUD entry */ + rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 + clrrdi r10,r11,3 + ldx r15,r10,r15 + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault +#endif /* CONFIG_PPC_64K_PAGES */ + + /* Get to PMD entry */ + rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 + clrrdi r10,r11,3 + ldx r15,r10,r15 + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault + + /* Ok, we're all right, we can now create a kernel translation for + * a 4K or 64K page from r16 -> r15. + */ + /* Now we build the MAS: + * + * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG + * MAS 1 : Almost fully setup + * - PID already updated by caller if necessary + * - TSIZE for now is base page size always + * MAS 2 : Use defaults + * MAS 3+7 : Needs to be done + * + * So we only do MAS 2 and 3 for now... + */ + clrldi r11,r15,4 /* remove region ID from RPN */ + ori r10,r11,1 /* Or-in SR */ + +BEGIN_MMU_FTR_SECTION + srdi r16,r10,32 + mtspr SPRN_MAS3,r10 + mtspr SPRN_MAS7,r16 +MMU_FTR_SECTION_ELSE + mtspr SPRN_MAS7_MAS3,r10 +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) + + tlbwe + +BEGIN_MMU_FTR_SECTION +virt_page_table_tlb_miss_done: + + /* We have overridden MAS2:EPN but currently our primary TLB miss + * handler will always restore it so that should not be an issue, + * if we ever optimize the primary handler to not write MAS2 on + * some cases, we'll have to restore MAS2:EPN here based on the + * original fault's DEAR. If we do that we have to modify the + * ITLB miss handler to also store SRR0 in the exception frame + * as DEAR. + * + * However, one nasty thing we did is we cleared the reservation + * (well, potentially we did). We do a trick here thus if we + * are not a level 0 exception (we interrupted the TLB miss) we + * offset the return address by -4 in order to replay the tlbsrx + * instruction there + */ + subf r10,r13,r12 + cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE + bne- 1f + ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) + addi r10,r11,-4 + std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) +1: +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) + /* Return to caller, normal case */ + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK); + TLB_MISS_EPILOG_SUCCESS + rfi + +virt_page_table_tlb_miss_fault: + /* If we fault here, things are a little bit tricky. We need to call + * either data or instruction store fault, and we need to retrieve + * the original fault address and ESR (for data). + * + * The thing is, we know that in normal circumstances, this is + * always called as a second level tlb miss for SW load or as a first + * level TLB miss for HW load, so we should be able to peek at the + * relevant information in the first exception frame in the PACA. + * + * However, we do need to double check that, because we may just hit + * a stray kernel pointer or a userland attack trying to hit those + * areas. If that is the case, we do a data fault. (We can't get here + * from an instruction tlb miss anyway). + * + * Note also that when going to a fault, we must unwind the previous + * level as well. Since we are doing that, we don't need to clear or + * restore the TLB reservation neither. + */ + subf r10,r13,r12 + cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE + bne- virt_page_table_tlb_miss_whacko_fault + + /* We dig the original DEAR and ESR from slot 0 */ + ld r15,EX_TLB_DEAR+PACA_EXTLB(r13) + ld r16,EX_TLB_ESR+PACA_EXTLB(r13) + + /* We check for the "special" ESR value for instruction faults */ + cmpdi cr0,r16,-1 + beq 1f + mtspr SPRN_DEAR,r15 + mtspr SPRN_ESR,r16 + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT); + TLB_MISS_EPILOG_ERROR + b exc_data_storage_book3e +1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT); + TLB_MISS_EPILOG_ERROR + b exc_instruction_storage_book3e + +virt_page_table_tlb_miss_whacko_fault: + /* The linear fault will restart everything so ESR and DEAR will + * not have been clobbered, let's just fault with what we have + */ + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT); + TLB_MISS_EPILOG_ERROR + b exc_data_storage_book3e + + +/************************************************************** + * * + * TLB miss handling for Book3E with hw page table support * + * * + **************************************************************/ + + +/* Data TLB miss */ + START_EXCEPTION(data_tlb_miss_htw) + TLB_MISS_PROLOG + + /* Now we handle the fault proper. We only save DEAR in normal + * fault case since that's the only interesting values here. + * We could probably also optimize by not saving SRR0/1 in the + * linear mapping case but I'll leave that for later + */ + mfspr r14,SPRN_ESR + mfspr r16,SPRN_DEAR /* get faulting address */ + srdi r11,r16,60 /* get region */ + cmpldi cr0,r11,0xc /* linear mapping ? */ + TLB_MISS_STATS_SAVE_INFO + beq tlb_load_linear /* yes -> go to linear map load */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + cmpldi cr0,r11,0 /* Check for user region */ + ld r15,PACAPGD(r13) /* Load user pgdir */ + beq htw_tlb_miss + + /* XXX replace the RMW cycles with immediate loads + writes */ +1: mfspr r10,SPRN_MAS1 + cmpldi cr0,r11,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ + beq+ htw_tlb_miss + + /* We got a crappy address, just fault with whatever DEAR and ESR + * are here + */ + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_data_storage_book3e + +/* Instruction TLB miss */ + START_EXCEPTION(instruction_tlb_miss_htw) + TLB_MISS_PROLOG + + /* If we take a recursive fault, the second level handler may need + * to know whether we are handling a data or instruction fault in + * order to get to the right store fault handler. We provide that + * info by keeping a crazy value for ESR in r14 + */ + li r14,-1 /* store to exception frame is done later */ + + /* Now we handle the fault proper. We only save DEAR in the non + * linear mapping case since we know the linear mapping case will + * not re-enter. We could indeed optimize and also not save SRR0/1 + * in the linear mapping case but I'll leave that for later + * + * Faulting address is SRR0 which is already in r16 + */ + srdi r11,r16,60 /* get region */ + cmpldi cr0,r11,0xc /* linear mapping ? */ + TLB_MISS_STATS_SAVE_INFO + beq tlb_load_linear /* yes -> go to linear map load */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + cmpldi cr0,r11,0 /* Check for user region */ + ld r15,PACAPGD(r13) /* Load user pgdir */ + beq htw_tlb_miss + + /* XXX replace the RMW cycles with immediate loads + writes */ +1: mfspr r10,SPRN_MAS1 + cmpldi cr0,r11,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ + beq+ htw_tlb_miss + + /* We got a crappy address, just fault */ + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_instruction_storage_book3e + + +/* + * This is the guts of the second-level TLB miss handler for direct + * misses. We are entered with: + * + * r16 = virtual page table faulting address + * r15 = PGD pointer + * r14 = ESR + * r13 = PACA + * r12 = TLB exception frame in PACA + * r11 = crap (free to use) + * r10 = crap (free to use) + * + * It can be re-entered by the linear mapping miss handler. However, to + * avoid too much complication, it will save/restore things for us + */ +htw_tlb_miss: + /* Search if we already have a TLB entry for that virtual address, and + * if we do, bail out. + * + * MAS1:IND should be already set based on MAS4 + */ + PPC_TLBSRX_DOT(0,R16) + beq htw_tlb_miss_done + + /* Now, we need to walk the page tables. First check if we are in + * range. + */ + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + bne- htw_tlb_miss_fault + + /* Get the PGD pointer */ + cmpldi cr0,r15,0 + beq- htw_tlb_miss_fault + + /* Get to PGD entry */ + rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 + clrrdi r10,r11,3 + ldx r15,r10,r15 + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault + +#ifndef CONFIG_PPC_64K_PAGES + /* Get to PUD entry */ + rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 + clrrdi r10,r11,3 + ldx r15,r10,r15 + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault +#endif /* CONFIG_PPC_64K_PAGES */ + + /* Get to PMD entry */ + rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 + clrrdi r10,r11,3 + ldx r15,r10,r15 + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault + + /* Ok, we're all right, we can now create an indirect entry for + * a 1M or 256M page. + * + * The last trick is now that because we use "half" pages for + * the HTW (1M IND is 2K and 256M IND is 32K) we need to account + * for an added LSB bit to the RPN. For 64K pages, there is no + * problem as we already use 32K arrays (half PTE pages), but for + * 4K page we need to extract a bit from the virtual address and + * insert it into the "PA52" bit of the RPN. + */ +#ifndef CONFIG_PPC_64K_PAGES + rlwimi r15,r16,32-9,20,20 +#endif + /* Now we build the MAS: + * + * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG + * MAS 1 : Almost fully setup + * - PID already updated by caller if necessary + * - TSIZE for now is base ind page size always + * MAS 2 : Use defaults + * MAS 3+7 : Needs to be done + */ +#ifdef CONFIG_PPC_64K_PAGES + ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT) +#else + ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) +#endif + +BEGIN_MMU_FTR_SECTION + srdi r16,r10,32 + mtspr SPRN_MAS3,r10 + mtspr SPRN_MAS7,r16 +MMU_FTR_SECTION_ELSE + mtspr SPRN_MAS7_MAS3,r10 +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) + + tlbwe + +htw_tlb_miss_done: + /* We don't bother with restoring DEAR or ESR since we know we are + * level 0 and just going back to userland. They are only needed + * if you are going to take an access fault + */ + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK) + TLB_MISS_EPILOG_SUCCESS + rfi + +htw_tlb_miss_fault: + /* We need to check if it was an instruction miss. We know this + * though because r14 would contain -1 + */ + cmpdi cr0,r14,-1 + beq 1f + mtspr SPRN_DEAR,r16 + mtspr SPRN_ESR,r14 + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_data_storage_book3e +1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT) + TLB_MISS_EPILOG_ERROR + b exc_instruction_storage_book3e + +/* + * This is the guts of "any" level TLB miss handler for kernel linear + * mapping misses. We are entered with: + * + * + * r16 = faulting address + * r15 = crap (free to use) + * r14 = ESR (data) or -1 (instruction) + * r13 = PACA + * r12 = TLB exception frame in PACA + * r11 = crap (free to use) + * r10 = crap (free to use) + * + * In addition we know that we will not re-enter, so in theory, we could + * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later. + * + * We also need to be careful about MAS registers here & TLB reservation, + * as we know we'll have clobbered them if we interrupt the main TLB miss + * handlers in which case we probably want to do a full restart at level + * 0 rather than saving / restoring the MAS. + * + * Note: If we care about performance of that core, we can easily shuffle + * a few things around + */ +tlb_load_linear: + /* For now, we assume the linear mapping is contiguous and stops at + * linear_map_top. We also assume the size is a multiple of 1G, thus + * we only use 1G pages for now. That might have to be changed in a + * final implementation, especially when dealing with hypervisors + */ + ld r11,PACATOC(r13) + ld r11,linear_map_top@got(r11) + ld r10,0(r11) + tovirt(10,10) + cmpld cr0,r16,r10 + bge tlb_load_linear_fault + + /* MAS1 need whole new setup. */ + li r15,(BOOK3E_PAGESZ_1GB< -#include -#include -#include -#include - -#include - -#ifdef CONFIG_SPARSEMEM_VMEMMAP -/* - * On Book3E CPUs, the vmemmap is currently mapped in the top half of - * the vmalloc space using normal page tables, though the size of - * pages encoded in the PTEs can be different - */ -int __meminit vmemmap_create_mapping(unsigned long start, - unsigned long page_size, - unsigned long phys) -{ - /* Create a PTE encoding without page size */ - unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED | - _PAGE_KERNEL_RW; - - /* PTEs only contain page size encodings up to 32M */ - BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf); - - /* Encode the size in the PTE */ - flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8; - - /* For each PTE for that area, map things. Note that we don't - * increment phys because all PTEs are of the large size and - * thus must have the low bits clear - */ - for (i = 0; i < page_size; i += PAGE_SIZE) - BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags))); - - return 0; -} - -#ifdef CONFIG_MEMORY_HOTPLUG -void vmemmap_remove_mapping(unsigned long start, - unsigned long page_size) -{ -} -#endif -#endif /* CONFIG_SPARSEMEM_VMEMMAP */ - -static __ref void *early_alloc_pgtable(unsigned long size) -{ - void *ptr; - - ptr = memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT, - __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE); - - if (!ptr) - panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%lx\n", - __func__, size, size, __pa(MAX_DMA_ADDRESS)); - - return ptr; -} - -/* - * map_kernel_page currently only called by __ioremap - * map_kernel_page adds an entry to the ioremap page table - * and adds an entry to the HPT, possibly bolting it - */ -int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) -{ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); - if (slab_is_available()) { - pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); - if (!pudp) - return -ENOMEM; - pmdp = pmd_alloc(&init_mm, pudp, ea); - if (!pmdp) - return -ENOMEM; - ptep = pte_alloc_kernel(pmdp, ea); - if (!ptep) - return -ENOMEM; - } else { - pgdp = pgd_offset_k(ea); -#ifndef __PAGETABLE_PUD_FOLDED - if (pgd_none(*pgdp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - pgd_populate(&init_mm, pgdp, pudp); - } -#endif /* !__PAGETABLE_PUD_FOLDED */ - pudp = pud_offset(pgdp, ea); - if (pud_none(*pudp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - pud_populate(&init_mm, pudp, pmdp); - } - pmdp = pmd_offset(pudp, ea); - if (!pmd_present(*pmdp)) { - ptep = early_alloc_pgtable(PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmdp, ptep); - } - ptep = pte_offset_kernel(pmdp, ea); - } - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); - - smp_wmb(); - return 0; -} diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S deleted file mode 100644 index 9ed90064f542..000000000000 --- a/arch/powerpc/mm/tlb_low_64e.S +++ /dev/null @@ -1,1280 +0,0 @@ -/* - * Low level TLB miss handlers for Book3E - * - * Copyright (C) 2008-2009 - * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_PPC_64K_PAGES -#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) -#else -#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE) -#endif -#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE) -#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) -#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) - -/********************************************************************** - * * - * TLB miss handling for Book3E with a bolted linear mapping * - * No virtual page table, no nested TLB misses * - * * - **********************************************************************/ - -/* - * Note that, unlike non-bolted handlers, TLB_EXFRAME is not - * modified by the TLB miss handlers themselves, since the TLB miss - * handler code will not itself cause a recursive TLB miss. - * - * TLB_EXFRAME will be modified when crit/mc/debug exceptions are - * entered/exited. - */ -.macro tlb_prolog_bolted intnum addr - mtspr SPRN_SPRG_GEN_SCRATCH,r12 - mfspr r12,SPRN_SPRG_TLB_EXFRAME - std r13,EX_TLB_R13(r12) - std r10,EX_TLB_R10(r12) - mfspr r13,SPRN_SPRG_PACA - - mfcr r10 - std r11,EX_TLB_R11(r12) -#ifdef CONFIG_KVM_BOOKE_HV -BEGIN_FTR_SECTION - mfspr r11, SPRN_SRR1 -END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) -#endif - DO_KVM \intnum, SPRN_SRR1 - std r16,EX_TLB_R16(r12) - mfspr r16,\addr /* get faulting address */ - std r14,EX_TLB_R14(r12) - ld r14,PACAPGD(r13) - std r15,EX_TLB_R15(r12) - std r10,EX_TLB_CR(r12) -#ifdef CONFIG_PPC_FSL_BOOK3E -START_BTB_FLUSH_SECTION - mfspr r11, SPRN_SRR1 - andi. r10,r11,MSR_PR - beq 1f - BTB_FLUSH(r10) -1: -END_BTB_FLUSH_SECTION - std r7,EX_TLB_R7(r12) -#endif - TLB_MISS_PROLOG_STATS -.endm - -.macro tlb_epilog_bolted - ld r14,EX_TLB_CR(r12) -#ifdef CONFIG_PPC_FSL_BOOK3E - ld r7,EX_TLB_R7(r12) -#endif - ld r10,EX_TLB_R10(r12) - ld r11,EX_TLB_R11(r12) - ld r13,EX_TLB_R13(r12) - mtcr r14 - ld r14,EX_TLB_R14(r12) - ld r15,EX_TLB_R15(r12) - TLB_MISS_RESTORE_STATS - ld r16,EX_TLB_R16(r12) - mfspr r12,SPRN_SPRG_GEN_SCRATCH -.endm - -/* Data TLB miss */ - START_EXCEPTION(data_tlb_miss_bolted) - tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR - - /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ - - /* We do the user/kernel test for the PID here along with the RW test - */ - /* We pre-test some combination of permissions to avoid double - * faults: - * - * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE - * ESR_ST is 0x00800000 - * _PAGE_BAP_SW is 0x00000010 - * So the shift is >> 19. This tests for supervisor writeability. - * If the page happens to be supervisor writeable and not user - * writeable, we will take a new fault later, but that should be - * a rare enough case. - * - * We also move ESR_ST in _PAGE_DIRTY position - * _PAGE_DIRTY is 0x00001000 so the shift is >> 11 - * - * MAS1 is preset for all we need except for TID that needs to - * be cleared for kernel translations - */ - - mfspr r11,SPRN_ESR - - srdi r15,r16,60 /* get region */ - rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 - bne- dtlb_miss_fault_bolted /* Bail if fault addr is invalid */ - - rlwinm r10,r11,32-19,27,27 - rlwimi r10,r11,32-16,19,19 - cmpwi r15,0 /* user vs kernel check */ - ori r10,r10,_PAGE_PRESENT - oris r11,r10,_PAGE_ACCESSED@h - - TLB_MISS_STATS_SAVE_INFO_BOLTED - bne tlb_miss_kernel_bolted - -tlb_miss_common_bolted: -/* - * This is the guts of the TLB miss handler for bolted-linear. - * We are entered with: - * - * r16 = faulting address - * r15 = crap (free to use) - * r14 = page table base - * r13 = PACA - * r11 = PTE permission mask - * r10 = crap (free to use) - */ - rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 - cmpldi cr0,r14,0 - clrrdi r15,r15,3 - beq tlb_miss_fault_bolted /* No PGDIR, bail */ - -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) - ldx r14,r14,r15 /* grab pgd entry */ - beq tlb_miss_done_bolted /* tlb exists already, bail */ -MMU_FTR_SECTION_ELSE - ldx r14,r14,r15 /* grab pgd entry */ -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) - -#ifndef CONFIG_PPC_64K_PAGES - rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 - clrrdi r15,r15,3 - cmpdi cr0,r14,0 - bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */ - ldx r14,r14,r15 /* grab pud entry */ -#endif /* CONFIG_PPC_64K_PAGES */ - - rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 - clrrdi r15,r15,3 - cmpdi cr0,r14,0 - bge tlb_miss_fault_bolted - ldx r14,r14,r15 /* Grab pmd entry */ - - rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3 - clrrdi r15,r15,3 - cmpdi cr0,r14,0 - bge tlb_miss_fault_bolted - ldx r14,r14,r15 /* Grab PTE, normal (!huge) page */ - - /* Check if required permissions are met */ - andc. r15,r11,r14 - rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT - bne- tlb_miss_fault_bolted - - /* Now we build the MAS: - * - * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG - * MAS 1 : Almost fully setup - * - PID already updated by caller if necessary - * - TSIZE need change if !base page size, not - * yet implemented for now - * MAS 2 : Defaults not useful, need to be redone - * MAS 3+7 : Needs to be done - */ - clrrdi r11,r16,12 /* Clear low crap in EA */ - clrldi r15,r15,12 /* Clear crap at the top */ - rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ - rlwimi r15,r14,32-8,22,25 /* Move in U bits */ - mtspr SPRN_MAS2,r11 - andi. r11,r14,_PAGE_DIRTY - rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ - - /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ - bne 1f - li r11,MAS3_SW|MAS3_UW - andc r15,r15,r11 -1: - mtspr SPRN_MAS7_MAS3,r15 - tlbwe - -tlb_miss_done_bolted: - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) - tlb_epilog_bolted - rfi - -itlb_miss_kernel_bolted: - li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ - oris r11,r11,_PAGE_ACCESSED@h -tlb_miss_kernel_bolted: - mfspr r10,SPRN_MAS1 - ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - beq+ tlb_miss_common_bolted - -tlb_miss_fault_bolted: - /* We need to check if it was an instruction miss */ - andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX - bne itlb_miss_fault_bolted -dtlb_miss_fault_bolted: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) - tlb_epilog_bolted - b exc_data_storage_book3e -itlb_miss_fault_bolted: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - tlb_epilog_bolted - b exc_instruction_storage_book3e - -/* Instruction TLB miss */ - START_EXCEPTION(instruction_tlb_miss_bolted) - tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 - - rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 - srdi r15,r16,60 /* get region */ - TLB_MISS_STATS_SAVE_INFO_BOLTED - bne- itlb_miss_fault_bolted - - li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ - - /* We do the user/kernel test for the PID here along with the RW test - */ - - cmpldi cr0,r15,0 /* Check for user region */ - oris r11,r11,_PAGE_ACCESSED@h - beq tlb_miss_common_bolted - b itlb_miss_kernel_bolted - -#ifdef CONFIG_PPC_FSL_BOOK3E -/* - * TLB miss handling for e6500 and derivatives, using hardware tablewalk. - * - * Linear mapping is bolted: no virtual page table or nested TLB misses - * Indirect entries in TLB1, hardware loads resulting direct entries - * into TLB0 - * No HES or NV hint on TLB1, so we need to do software round-robin - * No tlbsrx. so we need a spinlock, and we have to deal - * with MAS-damage caused by tlbsx - * 4K pages only - */ - - START_EXCEPTION(instruction_tlb_miss_e6500) - tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 - - ld r11,PACA_TCD_PTR(r13) - srdi. r15,r16,60 /* get region */ - ori r16,r16,1 - - TLB_MISS_STATS_SAVE_INFO_BOLTED - bne tlb_miss_kernel_e6500 /* user/kernel test */ - - b tlb_miss_common_e6500 - - START_EXCEPTION(data_tlb_miss_e6500) - tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR - - ld r11,PACA_TCD_PTR(r13) - srdi. r15,r16,60 /* get region */ - rldicr r16,r16,0,62 - - TLB_MISS_STATS_SAVE_INFO_BOLTED - bne tlb_miss_kernel_e6500 /* user vs kernel check */ - -/* - * This is the guts of the TLB miss handler for e6500 and derivatives. - * We are entered with: - * - * r16 = page of faulting address (low bit 0 if data, 1 if instruction) - * r15 = crap (free to use) - * r14 = page table base - * r13 = PACA - * r11 = tlb_per_core ptr - * r10 = crap (free to use) - * r7 = esel_next - */ -tlb_miss_common_e6500: - crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */ - -BEGIN_FTR_SECTION /* CPU_FTR_SMT */ - /* - * Search if we already have an indirect entry for that virtual - * address, and if we do, bail out. - * - * MAS6:IND should be already set based on MAS4 - */ - lhz r10,PACAPACAINDEX(r13) - addi r10,r10,1 - crclr cr1*4+eq /* set cr1.eq = 0 for non-recursive */ -1: lbarx r15,0,r11 - cmpdi r15,0 - bne 2f - stbcx. r10,0,r11 - bne 1b -3: - .subsection 1 -2: cmpd cr1,r15,r10 /* recursive lock due to mcheck/crit/etc? */ - beq cr1,3b /* unlock will happen if cr1.eq = 0 */ -10: lbz r15,0(r11) - cmpdi r15,0 - bne 10b - b 1b - .previous -END_FTR_SECTION_IFSET(CPU_FTR_SMT) - - lbz r7,TCD_ESEL_NEXT(r11) - -BEGIN_FTR_SECTION /* CPU_FTR_SMT */ - /* - * Erratum A-008139 says that we can't use tlbwe to change - * an indirect entry in any way (including replacing or - * invalidating) if the other thread could be in the process - * of a lookup. The workaround is to invalidate the entry - * with tlbilx before overwriting. - */ - - rlwinm r10,r7,16,0xff0000 - oris r10,r10,MAS0_TLBSEL(1)@h - mtspr SPRN_MAS0,r10 - isync - tlbre - mfspr r15,SPRN_MAS1 - andis. r15,r15,MAS1_VALID@h - beq 5f - -BEGIN_FTR_SECTION_NESTED(532) - mfspr r10,SPRN_MAS8 - rlwinm r10,r10,0,0x80000fff /* tgs,tlpid -> sgs,slpid */ - mtspr SPRN_MAS5,r10 -END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) - - mfspr r10,SPRN_MAS1 - rlwinm r15,r10,0,0x3fff0000 /* tid -> spid */ - rlwimi r15,r10,20,0x00000003 /* ind,ts -> sind,sas */ - mfspr r10,SPRN_MAS6 - mtspr SPRN_MAS6,r15 - - mfspr r15,SPRN_MAS2 - isync - tlbilxva 0,r15 - isync - - mtspr SPRN_MAS6,r10 - -5: -BEGIN_FTR_SECTION_NESTED(532) - li r10,0 - mtspr SPRN_MAS8,r10 - mtspr SPRN_MAS5,r10 -END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) - - tlbsx 0,r16 - mfspr r10,SPRN_MAS1 - andis. r15,r10,MAS1_VALID@h - bne tlb_miss_done_e6500 -FTR_SECTION_ELSE - mfspr r10,SPRN_MAS1 -ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) - - oris r10,r10,MAS1_VALID@h - beq cr2,4f - rlwinm r10,r10,0,16,1 /* Clear TID */ -4: mtspr SPRN_MAS1,r10 - - /* Now, we need to walk the page tables. First check if we are in - * range. - */ - rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 - bne- tlb_miss_fault_e6500 - - rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 - cmpldi cr0,r14,0 - clrrdi r15,r15,3 - beq- tlb_miss_fault_e6500 /* No PGDIR, bail */ - ldx r14,r14,r15 /* grab pgd entry */ - - rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 - clrrdi r15,r15,3 - cmpdi cr0,r14,0 - bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */ - ldx r14,r14,r15 /* grab pud entry */ - - rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 - clrrdi r15,r15,3 - cmpdi cr0,r14,0 - bge tlb_miss_huge_e6500 - ldx r14,r14,r15 /* Grab pmd entry */ - - mfspr r10,SPRN_MAS0 - cmpdi cr0,r14,0 - bge tlb_miss_huge_e6500 - - /* Now we build the MAS for a 2M indirect page: - * - * MAS 0 : ESEL needs to be filled by software round-robin - * MAS 1 : Fully set up - * - PID already updated by caller if necessary - * - TSIZE for now is base ind page size always - * - TID already cleared if necessary - * MAS 2 : Default not 2M-aligned, need to be redone - * MAS 3+7 : Needs to be done - */ - - ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) - mtspr SPRN_MAS7_MAS3,r14 - - clrrdi r15,r16,21 /* make EA 2M-aligned */ - mtspr SPRN_MAS2,r15 - -tlb_miss_huge_done_e6500: - lbz r16,TCD_ESEL_MAX(r11) - lbz r14,TCD_ESEL_FIRST(r11) - rlwimi r10,r7,16,0x00ff0000 /* insert esel_next into MAS0 */ - addi r7,r7,1 /* increment esel_next */ - mtspr SPRN_MAS0,r10 - cmpw r7,r16 - iseleq r7,r14,r7 /* if next == last use first */ - stb r7,TCD_ESEL_NEXT(r11) - - tlbwe - -tlb_miss_done_e6500: - .macro tlb_unlock_e6500 -BEGIN_FTR_SECTION - beq cr1,1f /* no unlock if lock was recursively grabbed */ - li r15,0 - isync - stb r15,0(r11) -1: -END_FTR_SECTION_IFSET(CPU_FTR_SMT) - .endm - - tlb_unlock_e6500 - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) - tlb_epilog_bolted - rfi - -tlb_miss_huge_e6500: - beq tlb_miss_fault_e6500 - li r10,1 - andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ - rldimi r14,r10,63,0 /* Set PD_HUGE */ - xor r14,r14,r15 /* Clear size bits */ - ldx r14,0,r14 - - /* - * Now we build the MAS for a huge page. - * - * MAS 0 : ESEL needs to be filled by software round-robin - * - can be handled by indirect code - * MAS 1 : Need to clear IND and set TSIZE - * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler - */ - - subi r15,r15,10 /* Convert psize to tsize */ - mfspr r10,SPRN_MAS1 - rlwinm r10,r10,0,~MAS1_IND - rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK - mtspr SPRN_MAS1,r10 - - li r10,-0x400 - sld r15,r10,r15 /* Generate mask based on size */ - and r10,r16,r15 - rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT - rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ - clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ - rlwimi r15,r14,32-8,22,25 /* Move in U bits */ - mtspr SPRN_MAS2,r10 - andi. r10,r14,_PAGE_DIRTY - rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ - - /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ - bne 1f - li r10,MAS3_SW|MAS3_UW - andc r15,r15,r10 -1: - mtspr SPRN_MAS7_MAS3,r15 - - mfspr r10,SPRN_MAS0 - b tlb_miss_huge_done_e6500 - -tlb_miss_kernel_e6500: - ld r14,PACA_KERNELPGD(r13) - cmpldi cr1,r15,8 /* Check for vmalloc region */ - beq+ cr1,tlb_miss_common_e6500 - -tlb_miss_fault_e6500: - tlb_unlock_e6500 - /* We need to check if it was an instruction miss */ - andi. r16,r16,1 - bne itlb_miss_fault_e6500 -dtlb_miss_fault_e6500: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) - tlb_epilog_bolted - b exc_data_storage_book3e -itlb_miss_fault_e6500: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - tlb_epilog_bolted - b exc_instruction_storage_book3e -#endif /* CONFIG_PPC_FSL_BOOK3E */ - -/********************************************************************** - * * - * TLB miss handling for Book3E with TLB reservation and HES support * - * * - **********************************************************************/ - - -/* Data TLB miss */ - START_EXCEPTION(data_tlb_miss) - TLB_MISS_PROLOG - - /* Now we handle the fault proper. We only save DEAR in normal - * fault case since that's the only interesting values here. - * We could probably also optimize by not saving SRR0/1 in the - * linear mapping case but I'll leave that for later - */ - mfspr r14,SPRN_ESR - mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO - beq tlb_load_linear /* yes -> go to linear map load */ - - /* The page tables are mapped virtually linear. At this point, though, - * we don't know whether we are trying to fault in a first level - * virtual address or a virtual page table address. We can get that - * from bit 0x1 of the region ID which we have set for a page table - */ - andi. r10,r15,0x1 - bne- virt_page_table_tlb_miss - - std r14,EX_TLB_ESR(r12); /* save ESR */ - std r16,EX_TLB_DEAR(r12); /* save DEAR */ - - /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ - li r11,_PAGE_PRESENT - oris r11,r11,_PAGE_ACCESSED@h - - /* We do the user/kernel test for the PID here along with the RW test - */ - cmpldi cr0,r15,0 /* Check for user region */ - - /* We pre-test some combination of permissions to avoid double - * faults: - * - * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE - * ESR_ST is 0x00800000 - * _PAGE_BAP_SW is 0x00000010 - * So the shift is >> 19. This tests for supervisor writeability. - * If the page happens to be supervisor writeable and not user - * writeable, we will take a new fault later, but that should be - * a rare enough case. - * - * We also move ESR_ST in _PAGE_DIRTY position - * _PAGE_DIRTY is 0x00001000 so the shift is >> 11 - * - * MAS1 is preset for all we need except for TID that needs to - * be cleared for kernel translations - */ - rlwimi r11,r14,32-19,27,27 - rlwimi r11,r14,32-16,19,19 - beq normal_tlb_miss - /* XXX replace the RMW cycles with immediate loads + writes */ -1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss - - /* We got a crappy address, just fault with whatever DEAR and ESR - * are here - */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e - -/* Instruction TLB miss */ - START_EXCEPTION(instruction_tlb_miss) - TLB_MISS_PROLOG - - /* If we take a recursive fault, the second level handler may need - * to know whether we are handling a data or instruction fault in - * order to get to the right store fault handler. We provide that - * info by writing a crazy value in ESR in our exception frame - */ - li r14,-1 /* store to exception frame is done later */ - - /* Now we handle the fault proper. We only save DEAR in the non - * linear mapping case since we know the linear mapping case will - * not re-enter. We could indeed optimize and also not save SRR0/1 - * in the linear mapping case but I'll leave that for later - * - * Faulting address is SRR0 which is already in r16 - */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO - beq tlb_load_linear /* yes -> go to linear map load */ - - /* We do the user/kernel test for the PID here along with the RW test - */ - li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ - oris r11,r11,_PAGE_ACCESSED@h - - cmpldi cr0,r15,0 /* Check for user region */ - std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ - beq normal_tlb_miss - - li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ - oris r11,r11,_PAGE_ACCESSED@h - /* XXX replace the RMW cycles with immediate loads + writes */ - mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss - - /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_instruction_storage_book3e - -/* - * This is the guts of the first-level TLB miss handler for direct - * misses. We are entered with: - * - * r16 = faulting address - * r15 = region ID - * r14 = crap (free to use) - * r13 = PACA - * r12 = TLB exception frame in PACA - * r11 = PTE permission mask - * r10 = crap (free to use) - */ -normal_tlb_miss: - /* So we first construct the page table address. We do that by - * shifting the bottom of the address (not the region ID) by - * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and - * or'ing the fourth high bit. - * - * NOTE: For 64K pages, we do things slightly differently in - * order to handle the weird page table format used by linux - */ - ori r10,r15,0x1 -#ifdef CONFIG_PPC_64K_PAGES - /* For the top bits, 16 bytes per PTE */ - rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4 - /* Now create the bottom bits as 0 in position 0x8000 and - * the rest calculated for 8 bytes per PTE - */ - rldicl r15,r16,64-(PAGE_SHIFT-3),64-15 - /* Insert the bottom bits in */ - rlwimi r14,r15,0,16,31 -#else - rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 -#endif - sldi r15,r10,60 - clrrdi r14,r14,3 - or r10,r15,r14 - -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) - ld r14,0(r10) - beq normal_tlb_miss_done -MMU_FTR_SECTION_ELSE - ld r14,0(r10) -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) - -finish_normal_tlb_miss: - /* Check if required permissions are met */ - andc. r15,r11,r14 - bne- normal_tlb_miss_access_fault - - /* Now we build the MAS: - * - * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG - * MAS 1 : Almost fully setup - * - PID already updated by caller if necessary - * - TSIZE need change if !base page size, not - * yet implemented for now - * MAS 2 : Defaults not useful, need to be redone - * MAS 3+7 : Needs to be done - * - * TODO: mix up code below for better scheduling - */ - clrrdi r11,r16,12 /* Clear low crap in EA */ - rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ - mtspr SPRN_MAS2,r11 - - /* Check page size, if not standard, update MAS1 */ - rldicl r11,r14,64-8,64-8 -#ifdef CONFIG_PPC_64K_PAGES - cmpldi cr0,r11,BOOK3E_PAGESZ_64K -#else - cmpldi cr0,r11,BOOK3E_PAGESZ_4K -#endif - beq- 1f - mfspr r11,SPRN_MAS1 - rlwimi r11,r14,31,21,24 - rlwinm r11,r11,0,21,19 - mtspr SPRN_MAS1,r11 -1: - /* Move RPN in position */ - rldicr r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT - clrldi r15,r11,12 /* Clear crap at the top */ - rlwimi r15,r14,32-8,22,25 /* Move in U bits */ - rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ - - /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ - andi. r11,r14,_PAGE_DIRTY - bne 1f - li r11,MAS3_SW|MAS3_UW - andc r15,r15,r11 -1: -BEGIN_MMU_FTR_SECTION - srdi r16,r15,32 - mtspr SPRN_MAS3,r15 - mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r15 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) - - tlbwe - -normal_tlb_miss_done: - /* We don't bother with restoring DEAR or ESR since we know we are - * level 0 and just going back to userland. They are only needed - * if you are going to take an access fault - */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) - TLB_MISS_EPILOG_SUCCESS - rfi - -normal_tlb_miss_access_fault: - /* We need to check if it was an instruction miss */ - andi. r10,r11,_PAGE_EXEC - bne 1f - ld r14,EX_TLB_DEAR(r12) - ld r15,EX_TLB_ESR(r12) - mtspr SPRN_DEAR,r14 - mtspr SPRN_ESR,r15 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_instruction_storage_book3e - - -/* - * This is the guts of the second-level TLB miss handler for direct - * misses. We are entered with: - * - * r16 = virtual page table faulting address - * r15 = region (top 4 bits of address) - * r14 = crap (free to use) - * r13 = PACA - * r12 = TLB exception frame in PACA - * r11 = crap (free to use) - * r10 = crap (free to use) - * - * Note that this should only ever be called as a second level handler - * with the current scheme when using SW load. - * That means we can always get the original fault DEAR at - * EX_TLB_DEAR-EX_TLB_SIZE(r12) - * - * It can be re-entered by the linear mapping miss handler. However, to - * avoid too much complication, it will restart the whole fault at level - * 0 so we don't care too much about clobbers - * - * XXX That code was written back when we couldn't clobber r14. We can now, - * so we could probably optimize things a bit - */ -virt_page_table_tlb_miss: - /* Are we hitting a kernel page table ? */ - andi. r10,r15,0x8 - - /* The cool thing now is that r10 contains 0 for user and 8 for kernel, - * and we happen to have the swapper_pg_dir at offset 8 from the user - * pgdir in the PACA :-). - */ - add r11,r10,r13 - - /* If kernel, we need to clear MAS1 TID */ - beq 1f - /* XXX replace the RMW cycles with immediate loads + writes */ - mfspr r10,SPRN_MAS1 - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 -1: -BEGIN_MMU_FTR_SECTION - /* Search if we already have a TLB entry for that virtual address, and - * if we do, bail out. - */ - PPC_TLBSRX_DOT(0,R16) - beq virt_page_table_tlb_miss_done -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) - - /* Now, we need to walk the page tables. First check if we are in - * range. - */ - rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 - bne- virt_page_table_tlb_miss_fault - - /* Get the PGD pointer */ - ld r15,PACAPGD(r11) - cmpldi cr0,r15,0 - beq- virt_page_table_tlb_miss_fault - - /* Get to PGD entry */ - rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge virt_page_table_tlb_miss_fault - -#ifndef CONFIG_PPC_64K_PAGES - /* Get to PUD entry */ - rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge virt_page_table_tlb_miss_fault -#endif /* CONFIG_PPC_64K_PAGES */ - - /* Get to PMD entry */ - rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge virt_page_table_tlb_miss_fault - - /* Ok, we're all right, we can now create a kernel translation for - * a 4K or 64K page from r16 -> r15. - */ - /* Now we build the MAS: - * - * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG - * MAS 1 : Almost fully setup - * - PID already updated by caller if necessary - * - TSIZE for now is base page size always - * MAS 2 : Use defaults - * MAS 3+7 : Needs to be done - * - * So we only do MAS 2 and 3 for now... - */ - clrldi r11,r15,4 /* remove region ID from RPN */ - ori r10,r11,1 /* Or-in SR */ - -BEGIN_MMU_FTR_SECTION - srdi r16,r10,32 - mtspr SPRN_MAS3,r10 - mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) - - tlbwe - -BEGIN_MMU_FTR_SECTION -virt_page_table_tlb_miss_done: - - /* We have overridden MAS2:EPN but currently our primary TLB miss - * handler will always restore it so that should not be an issue, - * if we ever optimize the primary handler to not write MAS2 on - * some cases, we'll have to restore MAS2:EPN here based on the - * original fault's DEAR. If we do that we have to modify the - * ITLB miss handler to also store SRR0 in the exception frame - * as DEAR. - * - * However, one nasty thing we did is we cleared the reservation - * (well, potentially we did). We do a trick here thus if we - * are not a level 0 exception (we interrupted the TLB miss) we - * offset the return address by -4 in order to replay the tlbsrx - * instruction there - */ - subf r10,r13,r12 - cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE - bne- 1f - ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) - addi r10,r11,-4 - std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) -1: -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) - /* Return to caller, normal case */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK); - TLB_MISS_EPILOG_SUCCESS - rfi - -virt_page_table_tlb_miss_fault: - /* If we fault here, things are a little bit tricky. We need to call - * either data or instruction store fault, and we need to retrieve - * the original fault address and ESR (for data). - * - * The thing is, we know that in normal circumstances, this is - * always called as a second level tlb miss for SW load or as a first - * level TLB miss for HW load, so we should be able to peek at the - * relevant information in the first exception frame in the PACA. - * - * However, we do need to double check that, because we may just hit - * a stray kernel pointer or a userland attack trying to hit those - * areas. If that is the case, we do a data fault. (We can't get here - * from an instruction tlb miss anyway). - * - * Note also that when going to a fault, we must unwind the previous - * level as well. Since we are doing that, we don't need to clear or - * restore the TLB reservation neither. - */ - subf r10,r13,r12 - cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE - bne- virt_page_table_tlb_miss_whacko_fault - - /* We dig the original DEAR and ESR from slot 0 */ - ld r15,EX_TLB_DEAR+PACA_EXTLB(r13) - ld r16,EX_TLB_ESR+PACA_EXTLB(r13) - - /* We check for the "special" ESR value for instruction faults */ - cmpdi cr0,r16,-1 - beq 1f - mtspr SPRN_DEAR,r15 - mtspr SPRN_ESR,r16 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT); - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT); - TLB_MISS_EPILOG_ERROR - b exc_instruction_storage_book3e - -virt_page_table_tlb_miss_whacko_fault: - /* The linear fault will restart everything so ESR and DEAR will - * not have been clobbered, let's just fault with what we have - */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT); - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e - - -/************************************************************** - * * - * TLB miss handling for Book3E with hw page table support * - * * - **************************************************************/ - - -/* Data TLB miss */ - START_EXCEPTION(data_tlb_miss_htw) - TLB_MISS_PROLOG - - /* Now we handle the fault proper. We only save DEAR in normal - * fault case since that's the only interesting values here. - * We could probably also optimize by not saving SRR0/1 in the - * linear mapping case but I'll leave that for later - */ - mfspr r14,SPRN_ESR - mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO - beq tlb_load_linear /* yes -> go to linear map load */ - - /* We do the user/kernel test for the PID here along with the RW test - */ - cmpldi cr0,r11,0 /* Check for user region */ - ld r15,PACAPGD(r13) /* Load user pgdir */ - beq htw_tlb_miss - - /* XXX replace the RMW cycles with immediate loads + writes */ -1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ - beq+ htw_tlb_miss - - /* We got a crappy address, just fault with whatever DEAR and ESR - * are here - */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e - -/* Instruction TLB miss */ - START_EXCEPTION(instruction_tlb_miss_htw) - TLB_MISS_PROLOG - - /* If we take a recursive fault, the second level handler may need - * to know whether we are handling a data or instruction fault in - * order to get to the right store fault handler. We provide that - * info by keeping a crazy value for ESR in r14 - */ - li r14,-1 /* store to exception frame is done later */ - - /* Now we handle the fault proper. We only save DEAR in the non - * linear mapping case since we know the linear mapping case will - * not re-enter. We could indeed optimize and also not save SRR0/1 - * in the linear mapping case but I'll leave that for later - * - * Faulting address is SRR0 which is already in r16 - */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO - beq tlb_load_linear /* yes -> go to linear map load */ - - /* We do the user/kernel test for the PID here along with the RW test - */ - cmpldi cr0,r11,0 /* Check for user region */ - ld r15,PACAPGD(r13) /* Load user pgdir */ - beq htw_tlb_miss - - /* XXX replace the RMW cycles with immediate loads + writes */ -1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ - beq+ htw_tlb_miss - - /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_instruction_storage_book3e - - -/* - * This is the guts of the second-level TLB miss handler for direct - * misses. We are entered with: - * - * r16 = virtual page table faulting address - * r15 = PGD pointer - * r14 = ESR - * r13 = PACA - * r12 = TLB exception frame in PACA - * r11 = crap (free to use) - * r10 = crap (free to use) - * - * It can be re-entered by the linear mapping miss handler. However, to - * avoid too much complication, it will save/restore things for us - */ -htw_tlb_miss: - /* Search if we already have a TLB entry for that virtual address, and - * if we do, bail out. - * - * MAS1:IND should be already set based on MAS4 - */ - PPC_TLBSRX_DOT(0,R16) - beq htw_tlb_miss_done - - /* Now, we need to walk the page tables. First check if we are in - * range. - */ - rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 - bne- htw_tlb_miss_fault - - /* Get the PGD pointer */ - cmpldi cr0,r15,0 - beq- htw_tlb_miss_fault - - /* Get to PGD entry */ - rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge htw_tlb_miss_fault - -#ifndef CONFIG_PPC_64K_PAGES - /* Get to PUD entry */ - rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge htw_tlb_miss_fault -#endif /* CONFIG_PPC_64K_PAGES */ - - /* Get to PMD entry */ - rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge htw_tlb_miss_fault - - /* Ok, we're all right, we can now create an indirect entry for - * a 1M or 256M page. - * - * The last trick is now that because we use "half" pages for - * the HTW (1M IND is 2K and 256M IND is 32K) we need to account - * for an added LSB bit to the RPN. For 64K pages, there is no - * problem as we already use 32K arrays (half PTE pages), but for - * 4K page we need to extract a bit from the virtual address and - * insert it into the "PA52" bit of the RPN. - */ -#ifndef CONFIG_PPC_64K_PAGES - rlwimi r15,r16,32-9,20,20 -#endif - /* Now we build the MAS: - * - * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG - * MAS 1 : Almost fully setup - * - PID already updated by caller if necessary - * - TSIZE for now is base ind page size always - * MAS 2 : Use defaults - * MAS 3+7 : Needs to be done - */ -#ifdef CONFIG_PPC_64K_PAGES - ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT) -#else - ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) -#endif - -BEGIN_MMU_FTR_SECTION - srdi r16,r10,32 - mtspr SPRN_MAS3,r10 - mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) - - tlbwe - -htw_tlb_miss_done: - /* We don't bother with restoring DEAR or ESR since we know we are - * level 0 and just going back to userland. They are only needed - * if you are going to take an access fault - */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK) - TLB_MISS_EPILOG_SUCCESS - rfi - -htw_tlb_miss_fault: - /* We need to check if it was an instruction miss. We know this - * though because r14 would contain -1 - */ - cmpdi cr0,r14,-1 - beq 1f - mtspr SPRN_DEAR,r16 - mtspr SPRN_ESR,r14 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT) - TLB_MISS_EPILOG_ERROR - b exc_instruction_storage_book3e - -/* - * This is the guts of "any" level TLB miss handler for kernel linear - * mapping misses. We are entered with: - * - * - * r16 = faulting address - * r15 = crap (free to use) - * r14 = ESR (data) or -1 (instruction) - * r13 = PACA - * r12 = TLB exception frame in PACA - * r11 = crap (free to use) - * r10 = crap (free to use) - * - * In addition we know that we will not re-enter, so in theory, we could - * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later. - * - * We also need to be careful about MAS registers here & TLB reservation, - * as we know we'll have clobbered them if we interrupt the main TLB miss - * handlers in which case we probably want to do a full restart at level - * 0 rather than saving / restoring the MAS. - * - * Note: If we care about performance of that core, we can easily shuffle - * a few things around - */ -tlb_load_linear: - /* For now, we assume the linear mapping is contiguous and stops at - * linear_map_top. We also assume the size is a multiple of 1G, thus - * we only use 1G pages for now. That might have to be changed in a - * final implementation, especially when dealing with hypervisors - */ - ld r11,PACATOC(r13) - ld r11,linear_map_top@got(r11) - ld r10,0(r11) - tovirt(10,10) - cmpld cr0,r16,r10 - bge tlb_load_linear_fault - - /* MAS1 need whole new setup. */ - li r15,(BOOK3E_PAGESZ_1GB< - * IBM Corp. - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -/* - * This struct lists the sw-supported page sizes. The hardawre MMU may support - * other sizes not listed here. The .ind field is only used on MMUs that have - * indirect page table entries. - */ -#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx) -#ifdef CONFIG_PPC_FSL_BOOK3E -struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { - [MMU_PAGE_4K] = { - .shift = 12, - .enc = BOOK3E_PAGESZ_4K, - }, - [MMU_PAGE_2M] = { - .shift = 21, - .enc = BOOK3E_PAGESZ_2M, - }, - [MMU_PAGE_4M] = { - .shift = 22, - .enc = BOOK3E_PAGESZ_4M, - }, - [MMU_PAGE_16M] = { - .shift = 24, - .enc = BOOK3E_PAGESZ_16M, - }, - [MMU_PAGE_64M] = { - .shift = 26, - .enc = BOOK3E_PAGESZ_64M, - }, - [MMU_PAGE_256M] = { - .shift = 28, - .enc = BOOK3E_PAGESZ_256M, - }, - [MMU_PAGE_1G] = { - .shift = 30, - .enc = BOOK3E_PAGESZ_1GB, - }, -}; -#elif defined(CONFIG_PPC_8xx) -struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { - /* we only manage 4k and 16k pages as normal pages */ -#ifdef CONFIG_PPC_4K_PAGES - [MMU_PAGE_4K] = { - .shift = 12, - }, -#else - [MMU_PAGE_16K] = { - .shift = 14, - }, -#endif - [MMU_PAGE_512K] = { - .shift = 19, - }, - [MMU_PAGE_8M] = { - .shift = 23, - }, -}; -#else -struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { - [MMU_PAGE_4K] = { - .shift = 12, - .ind = 20, - .enc = BOOK3E_PAGESZ_4K, - }, - [MMU_PAGE_16K] = { - .shift = 14, - .enc = BOOK3E_PAGESZ_16K, - }, - [MMU_PAGE_64K] = { - .shift = 16, - .ind = 28, - .enc = BOOK3E_PAGESZ_64K, - }, - [MMU_PAGE_1M] = { - .shift = 20, - .enc = BOOK3E_PAGESZ_1M, - }, - [MMU_PAGE_16M] = { - .shift = 24, - .ind = 36, - .enc = BOOK3E_PAGESZ_16M, - }, - [MMU_PAGE_256M] = { - .shift = 28, - .enc = BOOK3E_PAGESZ_256M, - }, - [MMU_PAGE_1G] = { - .shift = 30, - .enc = BOOK3E_PAGESZ_1GB, - }, -}; -#endif /* CONFIG_FSL_BOOKE */ - -static inline int mmu_get_tsize(int psize) -{ - return mmu_psize_defs[psize].enc; -} -#else -static inline int mmu_get_tsize(int psize) -{ - /* This isn't used on !Book3E for now */ - return 0; -} -#endif /* CONFIG_PPC_BOOK3E_MMU */ - -/* The variables below are currently only used on 64-bit Book3E - * though this will probably be made common with other nohash - * implementations at some point - */ -#ifdef CONFIG_PPC64 - -int mmu_linear_psize; /* Page size used for the linear mapping */ -int mmu_pte_psize; /* Page size used for PTE pages */ -int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ -int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ -unsigned long linear_map_top; /* Top of linear mapping */ - - -/* - * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug - * exceptions. This is used for bolted and e6500 TLB miss handlers which - * do not modify this SPRG in the TLB miss code; for other TLB miss handlers, - * this is set to zero. - */ -int extlb_level_exc; - -#endif /* CONFIG_PPC64 */ - -#ifdef CONFIG_PPC_FSL_BOOK3E -/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */ -DEFINE_PER_CPU(int, next_tlbcam_idx); -EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx); -#endif - -/* - * Base TLB flushing operations: - * - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes kernel pages - * - * - local_* variants of page and mm only apply to the current - * processor - */ - -/* - * These are the base non-SMP variants of page and mm flushing - */ -void local_flush_tlb_mm(struct mm_struct *mm) -{ - unsigned int pid; - - preempt_disable(); - pid = mm->context.id; - if (pid != MMU_NO_CONTEXT) - _tlbil_pid(pid); - preempt_enable(); -} -EXPORT_SYMBOL(local_flush_tlb_mm); - -void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - int tsize, int ind) -{ - unsigned int pid; - - preempt_disable(); - pid = mm ? mm->context.id : 0; - if (pid != MMU_NO_CONTEXT) - _tlbil_va(vmaddr, pid, tsize, ind); - preempt_enable(); -} - -void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) -{ - __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, - mmu_get_tsize(mmu_virtual_psize), 0); -} -EXPORT_SYMBOL(local_flush_tlb_page); - -/* - * And here are the SMP non-local implementations - */ -#ifdef CONFIG_SMP - -static DEFINE_RAW_SPINLOCK(tlbivax_lock); - -struct tlb_flush_param { - unsigned long addr; - unsigned int pid; - unsigned int tsize; - unsigned int ind; -}; - -static void do_flush_tlb_mm_ipi(void *param) -{ - struct tlb_flush_param *p = param; - - _tlbil_pid(p ? p->pid : 0); -} - -static void do_flush_tlb_page_ipi(void *param) -{ - struct tlb_flush_param *p = param; - - _tlbil_va(p->addr, p->pid, p->tsize, p->ind); -} - - -/* Note on invalidations and PID: - * - * We snapshot the PID with preempt disabled. At this point, it can still - * change either because: - * - our context is being stolen (PID -> NO_CONTEXT) on another CPU - * - we are invaliating some target that isn't currently running here - * and is concurrently acquiring a new PID on another CPU - * - some other CPU is re-acquiring a lost PID for this mm - * etc... - * - * However, this shouldn't be a problem as we only guarantee - * invalidation of TLB entries present prior to this call, so we - * don't care about the PID changing, and invalidating a stale PID - * is generally harmless. - */ - -void flush_tlb_mm(struct mm_struct *mm) -{ - unsigned int pid; - - preempt_disable(); - pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) - goto no_context; - if (!mm_is_core_local(mm)) { - struct tlb_flush_param p = { .pid = pid }; - /* Ignores smp_processor_id() even if set. */ - smp_call_function_many(mm_cpumask(mm), - do_flush_tlb_mm_ipi, &p, 1); - } - _tlbil_pid(pid); - no_context: - preempt_enable(); -} -EXPORT_SYMBOL(flush_tlb_mm); - -void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - int tsize, int ind) -{ - struct cpumask *cpu_mask; - unsigned int pid; - - /* - * This function as well as __local_flush_tlb_page() must only be called - * for user contexts. - */ - if (WARN_ON(!mm)) - return; - - preempt_disable(); - pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) - goto bail; - cpu_mask = mm_cpumask(mm); - if (!mm_is_core_local(mm)) { - /* If broadcast tlbivax is supported, use it */ - if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { - int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); - if (lock) - raw_spin_lock(&tlbivax_lock); - _tlbivax_bcast(vmaddr, pid, tsize, ind); - if (lock) - raw_spin_unlock(&tlbivax_lock); - goto bail; - } else { - struct tlb_flush_param p = { - .pid = pid, - .addr = vmaddr, - .tsize = tsize, - .ind = ind, - }; - /* Ignores smp_processor_id() even if set in cpu_mask */ - smp_call_function_many(cpu_mask, - do_flush_tlb_page_ipi, &p, 1); - } - } - _tlbil_va(vmaddr, pid, tsize, ind); - bail: - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) -{ -#ifdef CONFIG_HUGETLB_PAGE - if (vma && is_vm_hugetlb_page(vma)) - flush_hugetlb_page(vma, vmaddr); -#endif - - __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, - mmu_get_tsize(mmu_virtual_psize), 0); -} -EXPORT_SYMBOL(flush_tlb_page); - -#endif /* CONFIG_SMP */ - -#ifdef CONFIG_PPC_47x -void __init early_init_mmu_47x(void) -{ -#ifdef CONFIG_SMP - unsigned long root = of_get_flat_dt_root(); - if (of_get_flat_dt_prop(root, "cooperative-partition", NULL)) - mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST); -#endif /* CONFIG_SMP */ -} -#endif /* CONFIG_PPC_47x */ - -/* - * Flush kernel TLB entries in the given range - */ -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); - _tlbil_pid(0); - preempt_enable(); -#else - _tlbil_pid(0); -#endif -} -EXPORT_SYMBOL(flush_tlb_kernel_range); - -/* - * Currently, for range flushing, we just do a full mm flush. This should - * be optimized based on a threshold on the size of the range, since - * some implementation can stack multiple tlbivax before a tlbsync but - * for now, we keep it that way - */ -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) - -{ - if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK)) - flush_tlb_page(vma, start); - else - flush_tlb_mm(vma->vm_mm); -} -EXPORT_SYMBOL(flush_tlb_range); - -void tlb_flush(struct mmu_gather *tlb) -{ - flush_tlb_mm(tlb->mm); -} - -/* - * Below are functions specific to the 64-bit variant of Book3E though that - * may change in the future - */ - -#ifdef CONFIG_PPC64 - -/* - * Handling of virtual linear page tables or indirect TLB entries - * flushing when PTE pages are freed - */ -void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) -{ - int tsize = mmu_psize_defs[mmu_pte_psize].enc; - - if (book3e_htw_mode != PPC_HTW_NONE) { - unsigned long start = address & PMD_MASK; - unsigned long end = address + PMD_SIZE; - unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; - - /* This isn't the most optimal, ideally we would factor out the - * while preempt & CPU mask mucking around, or even the IPI but - * it will do for now - */ - while (start < end) { - __flush_tlb_page(tlb->mm, start, tsize, 1); - start += size; - } - } else { - unsigned long rmask = 0xf000000000000000ul; - unsigned long rid = (address & rmask) | 0x1000000000000000ul; - unsigned long vpte = address & ~rmask; - -#ifdef CONFIG_PPC_64K_PAGES - vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; -#else - vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; -#endif - vpte |= rid; - __flush_tlb_page(tlb->mm, vpte, tsize, 0); - } -} - -static void setup_page_sizes(void) -{ - unsigned int tlb0cfg; - unsigned int tlb0ps; - unsigned int eptcfg; - int i, psize; - -#ifdef CONFIG_PPC_FSL_BOOK3E - unsigned int mmucfg = mfspr(SPRN_MMUCFG); - int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E); - - if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { - unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); - unsigned int min_pg, max_pg; - - min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; - max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; - - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def; - unsigned int shift; - - def = &mmu_psize_defs[psize]; - shift = def->shift; - - if (shift == 0 || shift & 1) - continue; - - /* adjust to be in terms of 4^shift Kb */ - shift = (shift - 10) >> 1; - - if ((shift >= min_pg) && (shift <= max_pg)) - def->flags |= MMU_PAGE_SIZE_DIRECT; - } - - goto out; - } - - if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { - u32 tlb1cfg, tlb1ps; - - tlb0cfg = mfspr(SPRN_TLB0CFG); - tlb1cfg = mfspr(SPRN_TLB1CFG); - tlb1ps = mfspr(SPRN_TLB1PS); - eptcfg = mfspr(SPRN_EPTCFG); - - if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) - book3e_htw_mode = PPC_HTW_E6500; - - /* - * We expect 4K subpage size and unrestricted indirect size. - * The lack of a restriction on indirect size is a Freescale - * extension, indicated by PSn = 0 but SPSn != 0. - */ - if (eptcfg != 2) - book3e_htw_mode = PPC_HTW_NONE; - - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - - if (!def->shift) - continue; - - if (tlb1ps & (1U << (def->shift - 10))) { - def->flags |= MMU_PAGE_SIZE_DIRECT; - - if (book3e_htw_mode && psize == MMU_PAGE_2M) - def->flags |= MMU_PAGE_SIZE_INDIRECT; - } - } - - goto out; - } -#endif - - tlb0cfg = mfspr(SPRN_TLB0CFG); - tlb0ps = mfspr(SPRN_TLB0PS); - eptcfg = mfspr(SPRN_EPTCFG); - - /* Look for supported direct sizes */ - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - - if (tlb0ps & (1U << (def->shift - 10))) - def->flags |= MMU_PAGE_SIZE_DIRECT; - } - - /* Indirect page sizes supported ? */ - if ((tlb0cfg & TLBnCFG_IND) == 0 || - (tlb0cfg & TLBnCFG_PT) == 0) - goto out; - - book3e_htw_mode = PPC_HTW_IBM; - - /* Now, we only deal with one IND page size for each - * direct size. Hopefully all implementations today are - * unambiguous, but we might want to be careful in the - * future. - */ - for (i = 0; i < 3; i++) { - unsigned int ps, sps; - - sps = eptcfg & 0x1f; - eptcfg >>= 5; - ps = eptcfg & 0x1f; - eptcfg >>= 5; - if (!ps || !sps) - continue; - for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - - if (ps == (def->shift - 10)) - def->flags |= MMU_PAGE_SIZE_INDIRECT; - if (sps == (def->shift - 10)) - def->ind = ps + 10; - } - } - -out: - /* Cleanup array and print summary */ - pr_info("MMU: Supported page sizes\n"); - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - const char *__page_type_names[] = { - "unsupported", - "direct", - "indirect", - "direct & indirect" - }; - if (def->flags == 0) { - def->shift = 0; - continue; - } - pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), - __page_type_names[def->flags & 0x3]); - } -} - -static void setup_mmu_htw(void) -{ - /* - * If we want to use HW tablewalk, enable it by patching the TLB miss - * handlers to branch to the one dedicated to it. - */ - - switch (book3e_htw_mode) { - case PPC_HTW_IBM: - patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); - patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); - break; -#ifdef CONFIG_PPC_FSL_BOOK3E - case PPC_HTW_E6500: - extlb_level_exc = EX_TLB_SIZE; - patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); - patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); - break; -#endif - } - pr_info("MMU: Book3E HW tablewalk %s\n", - book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported"); -} - -/* - * Early initialization of the MMU TLB code - */ -static void early_init_this_mmu(void) -{ - unsigned int mas4; - - /* Set MAS4 based on page table setting */ - - mas4 = 0x4 << MAS4_WIMGED_SHIFT; - switch (book3e_htw_mode) { - case PPC_HTW_E6500: - mas4 |= MAS4_INDD; - mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT; - mas4 |= MAS4_TLBSELD(1); - mmu_pte_psize = MMU_PAGE_2M; - break; - - case PPC_HTW_IBM: - mas4 |= MAS4_INDD; -#ifdef CONFIG_PPC_64K_PAGES - mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; - mmu_pte_psize = MMU_PAGE_256M; -#else - mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; - mmu_pte_psize = MMU_PAGE_1M; -#endif - break; - - case PPC_HTW_NONE: -#ifdef CONFIG_PPC_64K_PAGES - mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; -#else - mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; -#endif - mmu_pte_psize = mmu_virtual_psize; - break; - } - mtspr(SPRN_MAS4, mas4); - -#ifdef CONFIG_PPC_FSL_BOOK3E - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - unsigned int num_cams; - int __maybe_unused cpu = smp_processor_id(); - bool map = true; - - /* use a quarter of the TLBCAM for bolted linear map */ - num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; - - /* - * Only do the mapping once per core, or else the - * transient mapping would cause problems. - */ -#ifdef CONFIG_SMP - if (hweight32(get_tensr()) > 1) - map = false; -#endif - - if (map) - linear_map_top = map_mem_in_cams(linear_map_top, - num_cams, false); - } -#endif - - /* A sync won't hurt us after mucking around with - * the MMU configuration - */ - mb(); -} - -static void __init early_init_mmu_global(void) -{ - /* XXX This will have to be decided at runtime, but right - * now our boot and TLB miss code hard wires it. Ideally - * we should find out a suitable page size and patch the - * TLB miss code (either that or use the PACA to store - * the value we want) - */ - mmu_linear_psize = MMU_PAGE_1G; - - /* XXX This should be decided at runtime based on supported - * page sizes in the TLB, but for now let's assume 16M is - * always there and a good fit (which it probably is) - * - * Freescale booke only supports 4K pages in TLB0, so use that. - */ - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - mmu_vmemmap_psize = MMU_PAGE_4K; - else - mmu_vmemmap_psize = MMU_PAGE_16M; - - /* XXX This code only checks for TLB 0 capabilities and doesn't - * check what page size combos are supported by the HW. It - * also doesn't handle the case where a separate array holds - * the IND entries from the array loaded by the PT. - */ - /* Look for supported page sizes */ - setup_page_sizes(); - - /* Look for HW tablewalk support */ - setup_mmu_htw(); - -#ifdef CONFIG_PPC_FSL_BOOK3E - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - if (book3e_htw_mode == PPC_HTW_NONE) { - extlb_level_exc = EX_TLB_SIZE; - patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); - patch_exception(0x1e0, - exc_instruction_tlb_miss_bolted_book3e); - } - } -#endif - - /* Set the global containing the top of the linear mapping - * for use by the TLB miss code - */ - linear_map_top = memblock_end_of_DRAM(); -} - -static void __init early_mmu_set_memory_limit(void) -{ -#ifdef CONFIG_PPC_FSL_BOOK3E - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - /* - * Limit memory so we dont have linear faults. - * Unlike memblock_set_current_limit, which limits - * memory available during early boot, this permanently - * reduces the memory available to Linux. We need to - * do this because highmem is not supported on 64-bit. - */ - memblock_enforce_memory_limit(linear_map_top); - } -#endif - - memblock_set_current_limit(linear_map_top); -} - -/* boot cpu only */ -void __init early_init_mmu(void) -{ - early_init_mmu_global(); - early_init_this_mmu(); - early_mmu_set_memory_limit(); -} - -void early_init_mmu_secondary(void) -{ - early_init_this_mmu(); -} - -void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* On non-FSL Embedded 64-bit, we adjust the RMA size to match - * the bolted TLB entry. We know for now that only 1G - * entries are supported though that may eventually - * change. - * - * on FSL Embedded 64-bit, usually all RAM is bolted, but with - * unusual memory sizes it's possible for some RAM to not be mapped - * (such RAM is not used at all by Linux, since we don't support - * highmem on 64-bit). We limit ppc64_rma_size to what would be - * mappable if this memblock is the only one. Additional memblocks - * can only increase, not decrease, the amount that ends up getting - * mapped. We still limit max to 1G even if we'll eventually map - * more. This is due to what the early init code is set up to do. - * - * We crop it to the size of the first MEMBLOCK to - * avoid going over total available memory just in case... - */ -#ifdef CONFIG_PPC_FSL_BOOK3E - if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - unsigned long linear_sz; - unsigned int num_cams; - - /* use a quarter of the TLBCAM for bolted linear map */ - num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; - - linear_sz = map_mem_in_cams(first_memblock_size, num_cams, - true); - - ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); - } else -#endif - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); - - /* Finally limit subsequent allocations */ - memblock_set_current_limit(first_memblock_base + ppc64_rma_size); -} -#else /* ! CONFIG_PPC64 */ -void __init early_init_mmu(void) -{ -#ifdef CONFIG_PPC_47x - early_init_mmu_47x(); -#endif - -#ifdef CONFIG_PPC_MM_SLICES -#if defined(CONFIG_PPC_8xx) - init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; -#endif -#endif -} -#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S deleted file mode 100644 index e066a658acac..000000000000 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ /dev/null @@ -1,491 +0,0 @@ -/* - * This file contains low-level functions for performing various - * types of TLB invalidations on various processors with no hash - * table. - * - * This file implements the following functions for all no-hash - * processors. Some aren't implemented for some variants. Some - * are inline in tlbflush.h - * - * - tlbil_va - * - tlbil_pid - * - tlbil_all - * - tlbivax_bcast - * - * Code mostly moved over from misc_32.S - * - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Partially rewritten by Cort Dougan (cort@cs.nmt.edu) - * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_40x) - -/* - * 40x implementation needs only tlbil_va - */ -_GLOBAL(__tlbil_va) - /* We run the search with interrupts disabled because we have to change - * the PID and I don't want to preempt when that happens. - */ - mfmsr r5 - mfspr r6,SPRN_PID - wrteei 0 - mtspr SPRN_PID,r4 - tlbsx. r3, 0, r3 - mtspr SPRN_PID,r6 - wrtee r5 - bne 1f - sync - /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is - * clear. Since 25 is the V bit in the TLB_TAG, loading this value - * will invalidate the TLB entry. */ - tlbwe r3, r3, TLB_TAG - isync -1: blr - -#elif defined(CONFIG_PPC_8xx) - -/* - * Nothing to do for 8xx, everything is inline - */ - -#elif defined(CONFIG_44x) /* Includes 47x */ - -/* - * 440 implementation uses tlbsx/we for tlbil_va and a full sweep - * of the TLB for everything else. - */ -_GLOBAL(__tlbil_va) - mfspr r5,SPRN_MMUCR - mfmsr r10 - - /* - * We write 16 bits of STID since 47x supports that much, we - * will never be passed out of bounds values on 440 (hopefully) - */ - rlwimi r5,r4,0,16,31 - - /* We have to run the search with interrupts disabled, otherwise - * an interrupt which causes a TLB miss can clobber the MMUCR - * between the mtspr and the tlbsx. - * - * Critical and Machine Check interrupts take care of saving - * and restoring MMUCR, so only normal interrupts have to be - * taken care of. - */ - wrteei 0 - mtspr SPRN_MMUCR,r5 - tlbsx. r6,0,r3 - bne 10f - sync -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) - /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit - * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this - * value will invalidate the TLB entry. - */ - tlbwe r6,r6,PPC44x_TLB_PAGEID - isync -10: wrtee r10 - blr -2: -#ifdef CONFIG_PPC_47x - oris r7,r6,0x8000 /* specify way explicitly */ - clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ - ori r4,r4,PPC47x_TLBE_SIZE - tlbwe r4,r7,0 /* write it */ - isync - wrtee r10 - blr -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ - -_GLOBAL(_tlbil_all) -_GLOBAL(_tlbil_pid) -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) - li r3,0 - sync - - /* Load high watermark */ - lis r4,tlb_44x_hwater@ha - lwz r5,tlb_44x_hwater@l(r4) - -1: tlbwe r3,r3,PPC44x_TLB_PAGEID - addi r3,r3,1 - cmpw 0,r3,r5 - ble 1b - - isync - blr -2: -#ifdef CONFIG_PPC_47x - /* 476 variant. There's not simple way to do this, hopefully we'll - * try to limit the amount of such full invalidates - */ - mfmsr r11 /* Interrupts off */ - wrteei 0 - li r3,-1 /* Current set */ - lis r10,tlb_47x_boltmap@h - ori r10,r10,tlb_47x_boltmap@l - lis r7,0x8000 /* Specify way explicitly */ - - b 9f /* For each set */ - -1: li r9,4 /* Number of ways */ - li r4,0 /* Current way */ - li r6,0 /* Default entry value 0 */ - andi. r0,r8,1 /* Check if way 0 is bolted */ - mtctr r9 /* Load way counter */ - bne- 3f /* Bolted, skip loading it */ - -2: /* For each way */ - or r5,r3,r4 /* Make way|index for tlbre */ - rlwimi r5,r5,16,8,15 /* Copy index into position */ - tlbre r6,r5,0 /* Read entry */ -3: addis r4,r4,0x2000 /* Next way */ - andi. r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */ - beq 4f /* Nope, skip it */ - rlwimi r7,r5,0,1,2 /* Insert way number */ - rlwinm r6,r6,0,21,19 /* Clear V */ - tlbwe r6,r7,0 /* Write it */ -4: bdnz 2b /* Loop for each way */ - srwi r8,r8,1 /* Next boltmap bit */ -9: cmpwi cr1,r3,255 /* Last set done ? */ - addi r3,r3,1 /* Next set */ - beq cr1,1f /* End of loop */ - andi. r0,r3,0x1f /* Need to load a new boltmap word ? */ - bne 1b /* No, loop */ - lwz r8,0(r10) /* Load boltmap entry */ - addi r10,r10,4 /* Next word */ - b 1b /* Then loop */ -1: isync /* Sync shadows */ - wrtee r11 -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ - blr - -#ifdef CONFIG_PPC_47x - -/* - * _tlbivax_bcast is only on 47x. We don't bother doing a runtime - * check though, it will blow up soon enough if we mistakenly try - * to use it on a 440. - */ -_GLOBAL(_tlbivax_bcast) - mfspr r5,SPRN_MMUCR - mfmsr r10 - rlwimi r5,r4,0,16,31 - wrteei 0 - mtspr SPRN_MMUCR,r5 - isync - PPC_TLBIVAX(0, R3) - isync - eieio - tlbsync -BEGIN_FTR_SECTION - b 1f -END_FTR_SECTION_IFSET(CPU_FTR_476_DD2) - sync - wrtee r10 - blr -/* - * DD2 HW could hang if in instruction fetch happens before msync completes. - * Touch enough instruction cache lines to ensure cache hits - */ -1: mflr r9 - bl 2f -2: mflr r6 - li r7,32 - PPC_ICBT(0,R6,R7) /* touch next cache line */ - add r6,r6,r7 - PPC_ICBT(0,R6,R7) /* touch next cache line */ - add r6,r6,r7 - PPC_ICBT(0,R6,R7) /* touch next cache line */ - sync - nop - nop - nop - nop - nop - nop - nop - nop - mtlr r9 - wrtee r10 - blr -#endif /* CONFIG_PPC_47x */ - -#elif defined(CONFIG_FSL_BOOKE) -/* - * FSL BookE implementations. - * - * Since feature sections are using _SECTION_ELSE we need - * to have the larger code path before the _SECTION_ELSE - */ - -/* - * Flush MMU TLB on the local processor - */ -_GLOBAL(_tlbil_all) -BEGIN_MMU_FTR_SECTION - li r3,(MMUCSR0_TLBFI)@l - mtspr SPRN_MMUCSR0, r3 -1: - mfspr r3,SPRN_MMUCSR0 - andi. r3,r3,MMUCSR0_TLBFI@l - bne 1b -MMU_FTR_SECTION_ELSE - PPC_TLBILX_ALL(0,R0) -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) - msync - isync - blr - -_GLOBAL(_tlbil_pid) -BEGIN_MMU_FTR_SECTION - slwi r3,r3,16 - mfmsr r10 - wrteei 0 - mfspr r4,SPRN_MAS6 /* save MAS6 */ - mtspr SPRN_MAS6,r3 - PPC_TLBILX_PID(0,R0) - mtspr SPRN_MAS6,r4 /* restore MAS6 */ - wrtee r10 -MMU_FTR_SECTION_ELSE - li r3,(MMUCSR0_TLBFI)@l - mtspr SPRN_MMUCSR0, r3 -1: - mfspr r3,SPRN_MMUCSR0 - andi. r3,r3,MMUCSR0_TLBFI@l - bne 1b -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX) - msync - isync - blr - -/* - * Flush MMU TLB for a particular address, but only on the local processor - * (no broadcast) - */ -_GLOBAL(__tlbil_va) - mfmsr r10 - wrteei 0 - slwi r4,r4,16 - ori r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l - mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ -BEGIN_MMU_FTR_SECTION - tlbsx 0,r3 - mfspr r4,SPRN_MAS1 /* check valid */ - andis. r3,r4,MAS1_VALID@h - beq 1f - rlwinm r4,r4,0,1,31 - mtspr SPRN_MAS1,r4 - tlbwe -MMU_FTR_SECTION_ELSE - PPC_TLBILX_VA(0,R3) -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) - msync - isync -1: wrtee r10 - blr -#elif defined(CONFIG_PPC_BOOK3E) -/* - * New Book3E (>= 2.06) implementation - * - * Note: We may be able to get away without the interrupt masking stuff - * if we save/restore MAS6 on exceptions that might modify it - */ -_GLOBAL(_tlbil_pid) - slwi r4,r3,MAS6_SPID_SHIFT - mfmsr r10 - wrteei 0 - mtspr SPRN_MAS6,r4 - PPC_TLBILX_PID(0,R0) - wrtee r10 - msync - isync - blr - -_GLOBAL(_tlbil_pid_noind) - slwi r4,r3,MAS6_SPID_SHIFT - mfmsr r10 - ori r4,r4,MAS6_SIND - wrteei 0 - mtspr SPRN_MAS6,r4 - PPC_TLBILX_PID(0,R0) - wrtee r10 - msync - isync - blr - -_GLOBAL(_tlbil_all) - PPC_TLBILX_ALL(0,R0) - msync - isync - blr - -_GLOBAL(_tlbil_va) - mfmsr r10 - wrteei 0 - cmpwi cr0,r6,0 - slwi r4,r4,MAS6_SPID_SHIFT - rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK - beq 1f - rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND -1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ - PPC_TLBILX_VA(0,R3) - msync - isync - wrtee r10 - blr - -_GLOBAL(_tlbivax_bcast) - mfmsr r10 - wrteei 0 - cmpwi cr0,r6,0 - slwi r4,r4,MAS6_SPID_SHIFT - rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK - beq 1f - rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND -1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ - PPC_TLBIVAX(0,R3) - eieio - tlbsync - sync - wrtee r10 - blr - -_GLOBAL(set_context) -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr -#else -#error Unsupported processor type ! -#endif - -#if defined(CONFIG_PPC_FSL_BOOK3E) -/* - * extern void loadcam_entry(unsigned int index) - * - * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, and r10 - */ -_GLOBAL(loadcam_entry) - mflr r5 - LOAD_REG_ADDR_PIC(r4, TLBCAM) - mtlr r5 - mulli r5,r3,TLBCAM_SIZE - add r3,r5,r4 - lwz r4,TLBCAM_MAS0(r3) - mtspr SPRN_MAS0,r4 - lwz r4,TLBCAM_MAS1(r3) - mtspr SPRN_MAS1,r4 - PPC_LL r4,TLBCAM_MAS2(r3) - mtspr SPRN_MAS2,r4 - lwz r4,TLBCAM_MAS3(r3) - mtspr SPRN_MAS3,r4 -BEGIN_MMU_FTR_SECTION - lwz r4,TLBCAM_MAS7(r3) - mtspr SPRN_MAS7,r4 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) - isync - tlbwe - isync - blr - -/* - * Load multiple TLB entries at once, using an alternate-space - * trampoline so that we don't have to care about whether the same - * TLB entry maps us before and after. - * - * r3 = first entry to write - * r4 = number of entries to write - * r5 = temporary tlb entry - */ -_GLOBAL(loadcam_multi) - mflr r8 - - /* - * Set up temporary TLB entry that is the same as what we're - * running from, but in AS=1. - */ - bl 1f -1: mflr r6 - tlbsx 0,r8 - mfspr r6,SPRN_MAS1 - ori r6,r6,MAS1_TS - mtspr SPRN_MAS1,r6 - mfspr r6,SPRN_MAS0 - rlwimi r6,r5,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK - mr r7,r5 - mtspr SPRN_MAS0,r6 - isync - tlbwe - isync - - /* Switch to AS=1 */ - mfmsr r6 - ori r6,r6,MSR_IS|MSR_DS - mtmsr r6 - isync - - mr r9,r3 - add r10,r3,r4 -2: bl loadcam_entry - addi r9,r9,1 - cmpw r9,r10 - mr r3,r9 - blt 2b - - /* Return to AS=0 and clear the temporary entry */ - mfmsr r6 - rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) - mtmsr r6 - isync - - li r6,0 - mtspr SPRN_MAS1,r6 - rlwinm r6,r7,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK - oris r6,r6,MAS0_TLBSEL(1)@h - mtspr SPRN_MAS0,r6 - isync - tlbwe - isync - - mtlr r8 - blr -#endif -- cgit v1.2.3-55-g7522