From 6248d0602f9932a437070dda598b7973b8770384 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 1 Oct 2012 10:56:42 +0100 Subject: ARM: 7545/1: cache-l2x0: make outer_cache_fns a field of l2x0_of_data Instead of having multiple functions belonging to outer_cache and filling this structure on the fly, use a outer_cache_fns field inside l2x0_of_data and just memcopy it into outer_cache depending of the type of the l2x0 cache. For non DT case, the former code was kept. [rmk: fixed a style issue] Tested-and-Reviewed-by: Yehuda Yitschak Tested-and-Reviewed-by: Lior Amsalem Signed-off-by: Gregory CLEMENT Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 55 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 8a97e6443c62..db55d18691ed 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -39,9 +39,11 @@ struct l2x0_regs l2x0_saved_regs; struct l2x0_of_data { void (*setup)(const struct device_node *, u32 *, u32 *); void (*save)(void); - void (*resume)(void); + struct outer_cache_fns outer_cache; }; +static bool of_init = false; + static inline void cache_wait_way(void __iomem *reg, unsigned long mask) { /* wait for cache operation by line or way to complete */ @@ -380,13 +382,15 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) /* Save the value for resuming. */ l2x0_saved_regs.aux_ctrl = aux; - outer_cache.inv_range = l2x0_inv_range; - outer_cache.clean_range = l2x0_clean_range; - outer_cache.flush_range = l2x0_flush_range; - outer_cache.sync = l2x0_cache_sync; - outer_cache.flush_all = l2x0_flush_all; - outer_cache.inv_all = l2x0_inv_all; - outer_cache.disable = l2x0_disable; + if (!of_init) { + outer_cache.inv_range = l2x0_inv_range; + outer_cache.clean_range = l2x0_clean_range; + outer_cache.flush_range = l2x0_flush_range; + outer_cache.sync = l2x0_cache_sync; + outer_cache.flush_all = l2x0_flush_all; + outer_cache.inv_all = l2x0_inv_all; + outer_cache.disable = l2x0_disable; + } printk(KERN_INFO "%s cache controller enabled\n", type); printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n", @@ -537,15 +541,34 @@ static void pl310_resume(void) } static const struct l2x0_of_data pl310_data = { - pl310_of_setup, - pl310_save, - pl310_resume, + .setup = pl310_of_setup, + .save = pl310_save, + .outer_cache = { + .resume = pl310_resume, + .inv_range = l2x0_inv_range, + .clean_range = l2x0_clean_range, + .flush_range = l2x0_flush_range, + .sync = l2x0_cache_sync, + .flush_all = l2x0_flush_all, + .inv_all = l2x0_inv_all, + .disable = l2x0_disable, + .set_debug = pl310_set_debug, + }, }; static const struct l2x0_of_data l2x0_data = { - l2x0_of_setup, - NULL, - l2x0_resume, + .setup = l2x0_of_setup, + .save = NULL, + .outer_cache = { + .resume = l2x0_resume, + .inv_range = l2x0_inv_range, + .clean_range = l2x0_clean_range, + .flush_range = l2x0_flush_range, + .sync = l2x0_cache_sync, + .flush_all = l2x0_flush_all, + .inv_all = l2x0_inv_all, + .disable = l2x0_disable, + }, }; static const struct of_device_id l2x0_ids[] __initconst = { @@ -585,9 +608,11 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) if (data->save) data->save(); + of_init = true; l2x0_init(l2x0_base, aux_val, aux_mask); - outer_cache.resume = data->resume; + memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); + return 0; } #endif -- cgit v1.2.3-55-g7522 From 153cd8e839b5729358d4e5c3371e7509ee5ac96a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 16 Oct 2012 11:54:00 +0100 Subject: ARM: 7553/1: proc-v7: Ensure correct instruction set after cpu_reset Because mov pc, never switches instruction set when executed in Thumb code, Thumb-2 kernels will silently execute the target code after cpu_reset as Thumb code, even if the passed code pointer denotes ARM (bit 0 clear). This patch uses bx instead, ensuring the correct instruction set for the target code. Thumb code in the kernel is not supported prior to ARMv7, so other CPUs are not affected. Signed-off-by: Dave Martin Acked-by: Will Deacon Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 846d279f3176..42cc833aa02f 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -57,7 +57,7 @@ ENTRY(cpu_v7_reset) THUMB( bic r1, r1, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) mcr p15, 0, r1, c1, c0, 0 @ disable MMU isb - mov pc, r0 + bx r0 ENDPROC(cpu_v7_reset) .popsection -- cgit v1.2.3-55-g7522 From b5466f8728527a05a493cc4abe9e6f034a1bbaab Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2012 14:47:31 +0100 Subject: ARM: mm: remove IPI broadcasting on ASID rollover ASIDs are allocated to MMU contexts based on a rolling counter. This means that after 255 allocations we must invalidate all existing ASIDs via an expensive IPI mechanism to synchronise all of the online CPUs and ensure that all tasks execute with an ASID from the new generation. This patch changes the rollover behaviour so that we rely instead on the hardware broadcasting of the TLB invalidation to avoid the IPI calls. This works by keeping track of the active ASID on each core, which is then reserved in the case of a rollover so that currently scheduled tasks can continue to run. For cores without hardware TLB broadcasting, we keep track of pending flushes in a cpumask, so cores can flush their local TLB before scheduling a new mm. Reviewed-by: Catalin Marinas Tested-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/include/asm/mmu.h | 11 +-- arch/arm/include/asm/mmu_context.h | 82 +--------------- arch/arm/mm/context.c | 186 +++++++++++++++++-------------------- 3 files changed, 93 insertions(+), 186 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 14965658a923..5b53b53ab5cf 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -5,18 +5,15 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID - unsigned int id; - raw_spinlock_t id_lock; + u64 id; #endif unsigned int kvm_seq; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID -#define ASID(mm) ((mm)->context.id & 255) - -/* init_mm.context.id_lock should be initialized. */ -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), +#define ASID_BITS 8 +#define ASID_MASK ((~0ULL) << ASID_BITS) +#define ASID(mm) ((mm)->context.id & ~ASID_MASK) #else #define ASID(mm) (0) #endif diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 0306bc642c0d..a64f61cb23d1 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -24,84 +24,8 @@ void __check_kvm_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID -/* - * On ARMv6, we have the following structure in the Context ID: - * - * 31 7 0 - * +-------------------------+-----------+ - * | process ID | ASID | - * +-------------------------+-----------+ - * | context ID | - * +-------------------------------------+ - * - * The ASID is used to tag entries in the CPU caches and TLBs. - * The context ID is used by debuggers and trace logic, and - * should be unique within all running processes. - */ -#define ASID_BITS 8 -#define ASID_MASK ((~0) << ASID_BITS) -#define ASID_FIRST_VERSION (1 << ASID_BITS) - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); -void cpu_set_reserved_ttbr0(void); - -static inline void switch_new_context(struct mm_struct *mm) -{ - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); -} - -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) -{ - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); - - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ - cpu_set_reserved_ttbr0(); - - if (!((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled (it sends IPIs). - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); -} - -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) - -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) -{ - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) - switch_new_context(current->mm); -} +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); +#define init_new_context(tsk,mm) ({ mm->context.id = 0; }) #else /* !CONFIG_CPU_HAS_ASID */ @@ -143,6 +67,7 @@ static inline void finish_arch_post_lock_switch(void) #endif /* CONFIG_CPU_HAS_ASID */ #define destroy_context(mm) do { } while(0) +#define activate_mm(prev,next) switch_mm(prev, next, NULL) /* * This is called when "tsk" is about to enter lazy TLB mode. @@ -186,6 +111,5 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, } #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) #endif diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 4e07eec1270d..3172781a8e2e 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -2,6 +2,9 @@ * linux/arch/arm/mm/context.c * * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,14 +17,35 @@ #include #include +#include #include #include +/* + * On ARMv6, we have the following structure in the Context ID: + * + * 31 7 0 + * +-------------------------+-----------+ + * | process ID | ASID | + * +-------------------------+-----------+ + * | context ID | + * +-------------------------------------+ + * + * The ASID is used to tag entries in the CPU caches and TLBs. + * The context ID is used by debuggers and trace logic, and + * should be unique within all running processes. + */ +#define ASID_FIRST_VERSION (1ULL << ASID_BITS) + static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; +static u64 cpu_last_asid = ASID_FIRST_VERSION; + +static DEFINE_PER_CPU(u64, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; #ifdef CONFIG_ARM_LPAE -void cpu_set_reserved_ttbr0(void) +static void cpu_set_reserved_ttbr0(void) { unsigned long ttbl = __pa(swapper_pg_dir); unsigned long ttbh = 0; @@ -37,7 +61,7 @@ void cpu_set_reserved_ttbr0(void) isb(); } #else -void cpu_set_reserved_ttbr0(void) +static void cpu_set_reserved_ttbr0(void) { u32 ttb; /* Copy TTBR1 into TTBR0 */ @@ -84,124 +108,86 @@ static int __init contextidr_notifier_init(void) arch_initcall(contextidr_notifier_init); #endif -/* - * We fork()ed a process, and we need a new context for the child - * to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +static void flush_context(unsigned int cpu) { - mm->context.id = 0; - raw_spin_lock_init(&mm->context.id_lock); -} + int i; -static void flush_context(void) -{ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - if (icache_is_vivt_asid_tagged()) { + /* Update the list of reserved ASIDs. */ + per_cpu(active_asids, cpu) = 0; + for_each_possible_cpu(i) + per_cpu(reserved_asids, i) = per_cpu(active_asids, i); + + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + if (!tlb_ops_need_broadcast()) + cpumask_set_cpu(cpu, &tlb_flush_pending); + else + cpumask_setall(&tlb_flush_pending); + + if (icache_is_vivt_asid_tagged()) __flush_icache_all(); - dsb(); - } } -#ifdef CONFIG_SMP +static int is_reserved_asid(u64 asid, u64 mask) +{ + int cpu; + for_each_possible_cpu(cpu) + if ((per_cpu(reserved_asids, cpu) & mask) == (asid & mask)) + return 1; + return 0; +} -static void set_mm_context(struct mm_struct *mm, unsigned int asid) +static void new_context(struct mm_struct *mm, unsigned int cpu) { - unsigned long flags; + u64 asid = mm->context.id; - /* - * Locking needed for multi-threaded applications where the - * same mm->context.id could be set from different CPUs during - * the broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. - */ - raw_spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) { + if (asid != 0 && is_reserved_asid(asid, ULLONG_MAX)) { /* - * Old version of ASID found. Set the new one and - * reset mm_cpumask(mm). + * Our current ASID was active during a rollover, we can + * continue to use it and this was just a false alarm. */ - mm->context.id = asid; + asid = (cpu_last_asid & ASID_MASK) | (asid & ~ASID_MASK); + } else { + /* + * Allocate a free ASID. If we can't find one, take a + * note of the currently active ASIDs and mark the TLBs + * as requiring flushes. + */ + do { + asid = ++cpu_last_asid; + if ((asid & ~ASID_MASK) == 0) + flush_context(cpu); + } while (is_reserved_asid(asid, ~ASID_MASK)); cpumask_clear(mm_cpumask(mm)); } - raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); - /* - * Set the mm_cpumask(mm) bit for the current CPU. - */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + mm->context.id = asid; } -/* - * Reset the ASID on the current CPU. This function call is broadcast - * from the CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { - unsigned int asid; + unsigned long flags; unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = current->active_mm; - - smp_rmb(); - asid = cpu_last_asid + cpu + 1; - - flush_context(); - set_mm_context(mm, asid); - - /* set the new ASID */ - cpu_switch_mm(mm->pgd, mm); -} - -#else -static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) -{ - mm->context.id = asid; - cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); -} + if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) + __check_kvm_seq(mm); -#endif - -void __new_context(struct mm_struct *mm) -{ - unsigned int asid; - - raw_spin_lock(&cpu_asid_lock); -#ifdef CONFIG_SMP /* - * Check the ASID again, in case the change was broadcast from - * another CPU before we acquired the lock. + * Required during context switch to avoid speculative page table + * walking with the wrong TTBR. */ - if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - raw_spin_unlock(&cpu_asid_lock); - return; - } -#endif - /* - * At this point, it is guaranteed that the current mm (with - * an old ASID) isn't active on any other CPU since the ASIDs - * are changed simultaneously via IPI. - */ - asid = ++cpu_last_asid; - if (asid == 0) - asid = cpu_last_asid = ASID_FIRST_VERSION; + cpu_set_reserved_ttbr0(); - /* - * If we've used up all our ASIDs, we need - * to start a new version and flush the TLB. - */ - if (unlikely((asid & ~ASID_MASK) == 0)) { - asid = cpu_last_asid + smp_processor_id() + 1; - flush_context(); -#ifdef CONFIG_SMP - smp_wmb(); - smp_call_function(reset_context, NULL, 1); -#endif - cpu_last_asid += NR_CPUS; - } + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + if ((mm->context.id ^ cpu_last_asid) >> ASID_BITS) + new_context(mm, cpu); - set_mm_context(mm, asid); - raw_spin_unlock(&cpu_asid_lock); + *this_cpu_ptr(&active_asids) = mm->context.id; + cpumask_set_cpu(cpu, mm_cpumask(mm)); + + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + + cpu_switch_mm(mm->pgd, mm); } -- cgit v1.2.3-55-g7522 From 4b883160835faf38c9356f0885cf491a1e661e88 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 27 Jul 2012 12:31:35 +0100 Subject: ARM: mm: avoid taking ASID spinlock on fastpath When scheduling a new mm, we take a spinlock so that we can: 1. Safely allocate a new ASID, if required 2. Update our active_asids field without worrying about parallel updates to reserved_asids 3. Ensure that we flush our local TLB, if required However, this has the nasty affect of serialising context-switch across all CPUs in the system. The usual (fast) case is where the next mm has a valid ASID for the current generation. In such a scenario, we can avoid taking the lock and instead use atomic64_xchg to update the active_asids variable for the current CPU. If a rollover occurs on another CPU (which would take the lock), when copying the active_asids into the reserved_asids another atomic64_xchg is used to replace each active_asids with 0. The fast path can then detect this case and fall back to spinning on the lock. Tested-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/mm/context.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 3172781a8e2e..5ac09e8b4030 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -38,9 +38,9 @@ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -static u64 cpu_last_asid = ASID_FIRST_VERSION; +static atomic64_t cpu_last_asid = ATOMIC64_INIT(ASID_FIRST_VERSION); -static DEFINE_PER_CPU(u64, active_asids); +static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; @@ -113,9 +113,10 @@ static void flush_context(unsigned int cpu) int i; /* Update the list of reserved ASIDs. */ - per_cpu(active_asids, cpu) = 0; for_each_possible_cpu(i) - per_cpu(reserved_asids, i) = per_cpu(active_asids, i); + per_cpu(reserved_asids, i) = + atomic64_xchg(&per_cpu(active_asids, i), 0); + per_cpu(reserved_asids, cpu) = 0; /* Queue a TLB invalidate and flush the I-cache if necessary. */ if (!tlb_ops_need_broadcast()) @@ -145,7 +146,8 @@ static void new_context(struct mm_struct *mm, unsigned int cpu) * Our current ASID was active during a rollover, we can * continue to use it and this was just a false alarm. */ - asid = (cpu_last_asid & ASID_MASK) | (asid & ~ASID_MASK); + asid = (atomic64_read(&cpu_last_asid) & ASID_MASK) | \ + (asid & ~ASID_MASK); } else { /* * Allocate a free ASID. If we can't find one, take a @@ -153,7 +155,7 @@ static void new_context(struct mm_struct *mm, unsigned int cpu) * as requiring flushes. */ do { - asid = ++cpu_last_asid; + asid = atomic64_inc_return(&cpu_last_asid); if ((asid & ~ASID_MASK) == 0) flush_context(cpu); } while (is_reserved_asid(asid, ~ASID_MASK)); @@ -177,17 +179,22 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) */ cpu_set_reserved_ttbr0(); + if (!((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS) + && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id)) + goto switch_mm_fastpath; + raw_spin_lock_irqsave(&cpu_asid_lock, flags); /* Check that our ASID belongs to the current generation. */ - if ((mm->context.id ^ cpu_last_asid) >> ASID_BITS) + if ((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS) new_context(mm, cpu); - *this_cpu_ptr(&active_asids) = mm->context.id; + atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); cpumask_set_cpu(cpu, mm_cpumask(mm)); if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) local_flush_tlb_all(); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } -- cgit v1.2.3-55-g7522 From bf51bb82ccd9a74e9702d06107b23e54b27a5707 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Aug 2012 14:57:49 +0100 Subject: ARM: mm: use bitmap operations when allocating new ASIDs When allocating a new ASID, we must take care not to re-assign a reserved ASID-value to a new mm. This requires us to check each candidate ASID against those currently reserved by other cores before assigning a new ASID to the current mm. This patch improves the ASID allocation algorithm by using a bitmap-based approach. Rather than iterating over the reserved ASID array for each candidate ASID, we simply find the first zero bit, ensuring that those indices corresponding to reserved ASIDs are set when flushing during a rollover event. Tested-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/mm/context.c | 54 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 19 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 5ac09e8b4030..7a27d7363be2 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -36,9 +36,14 @@ * should be unique within all running processes. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) +#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) + +#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) +#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -static atomic64_t cpu_last_asid = ATOMIC64_INIT(ASID_FIRST_VERSION); +static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); +static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); @@ -111,12 +116,19 @@ arch_initcall(contextidr_notifier_init); static void flush_context(unsigned int cpu) { int i; - - /* Update the list of reserved ASIDs. */ - for_each_possible_cpu(i) - per_cpu(reserved_asids, i) = - atomic64_xchg(&per_cpu(active_asids, i), 0); - per_cpu(reserved_asids, cpu) = 0; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + for_each_possible_cpu(i) { + if (i == cpu) { + asid = 0; + } else { + asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + __set_bit(ASID_TO_IDX(asid), asid_map); + } + per_cpu(reserved_asids, i) = asid; + } /* Queue a TLB invalidate and flush the I-cache if necessary. */ if (!tlb_ops_need_broadcast()) @@ -128,11 +140,11 @@ static void flush_context(unsigned int cpu) __flush_icache_all(); } -static int is_reserved_asid(u64 asid, u64 mask) +static int is_reserved_asid(u64 asid) { int cpu; for_each_possible_cpu(cpu) - if ((per_cpu(reserved_asids, cpu) & mask) == (asid & mask)) + if (per_cpu(reserved_asids, cpu) == asid) return 1; return 0; } @@ -140,25 +152,29 @@ static int is_reserved_asid(u64 asid, u64 mask) static void new_context(struct mm_struct *mm, unsigned int cpu) { u64 asid = mm->context.id; + u64 generation = atomic64_read(&asid_generation); - if (asid != 0 && is_reserved_asid(asid, ULLONG_MAX)) { + if (asid != 0 && is_reserved_asid(asid)) { /* * Our current ASID was active during a rollover, we can * continue to use it and this was just a false alarm. */ - asid = (atomic64_read(&cpu_last_asid) & ASID_MASK) | \ - (asid & ~ASID_MASK); + asid = generation | (asid & ~ASID_MASK); } else { /* * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs * as requiring flushes. */ - do { - asid = atomic64_inc_return(&cpu_last_asid); - if ((asid & ~ASID_MASK) == 0) - flush_context(cpu); - } while (is_reserved_asid(asid, ~ASID_MASK)); + asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + if (asid == NUM_USER_ASIDS) { + generation = atomic64_add_return(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); + asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + } + __set_bit(asid, asid_map); + asid = generation | IDX_TO_ASID(asid); cpumask_clear(mm_cpumask(mm)); } @@ -179,13 +195,13 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) */ cpu_set_reserved_ttbr0(); - if (!((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS) + if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id)) goto switch_mm_fastpath; raw_spin_lock_irqsave(&cpu_asid_lock, flags); /* Check that our ASID belongs to the current generation. */ - if ((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS) + if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) new_context(mm, cpu); atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); -- cgit v1.2.3-55-g7522 From b8db6b886a1fecd6a5b1d13b190f3149247305ef Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 6 Nov 2012 01:58:07 +0100 Subject: ARM: 7547/4: cache-l2x0: add support for Aurora L2 cache ctrl Aurora Cache Controller was designed to be compatible with the ARM L2 Cache Controller. It comes with some difference or improvement such as: - no cache id part number available through hardware (need to get it by the DT). - always write through mode available. - two flavors of the controller outer cache and system cache (meaning maintenance operations on L1 are broadcasted to the L2 and L2 performs the same operation). - in outer cache mode, the cache maintenance operations are improved and can be done on a range inside a page and are not limited to a cache line. Tested-and-Reviewed-by: Lior Amsalem Signed-off-by: Gregory CLEMENT Signed-off-by: Yehuda Yitschak Reviewed-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/hardware/cache-l2x0.h | 4 + arch/arm/mm/cache-aurora-l2.h | 55 +++++++ arch/arm/mm/cache-l2x0.c | 223 +++++++++++++++++++++++++++-- 3 files changed, 269 insertions(+), 13 deletions(-) create mode 100644 arch/arm/mm/cache-aurora-l2.h (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 5f2c7b44fdaf..3b2c40b5bfa2 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -102,6 +102,10 @@ #define L2X0_ADDR_FILTER_EN 1 +#define L2X0_CTRL_EN 1 + +#define L2X0_WAY_SIZE_SHIFT 3 + #ifndef __ASSEMBLY__ extern void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask); #if defined(CONFIG_CACHE_L2X0) && defined(CONFIG_OF) diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h new file mode 100644 index 000000000000..c86124769831 --- /dev/null +++ b/arch/arm/mm/cache-aurora-l2.h @@ -0,0 +1,55 @@ +/* + * AURORA shared L2 cache controller support + * + * Copyright (C) 2012 Marvell + * + * Yehuda Yitschak + * Gregory CLEMENT + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H +#define __ASM_ARM_HARDWARE_AURORA_L2_H + +#define AURORA_SYNC_REG 0x700 +#define AURORA_RANGE_BASE_ADDR_REG 0x720 +#define AURORA_FLUSH_PHY_ADDR_REG 0x7f0 +#define AURORA_INVAL_RANGE_REG 0x774 +#define AURORA_CLEAN_RANGE_REG 0x7b4 +#define AURORA_FLUSH_RANGE_REG 0x7f4 + +#define AURORA_ACR_REPLACEMENT_OFFSET 27 +#define AURORA_ACR_REPLACEMENT_MASK \ + (0x3 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR \ + (0 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_LFSR \ + (1 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \ + (3 << AURORA_ACR_REPLACEMENT_OFFSET) + +#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET 0 +#define AURORA_ACR_FORCE_WRITE_POLICY_MASK \ + (0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_POLICY_DIS \ + (0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_BACK_POLICY \ + (1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_THRO_POLICY \ + (2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) + +#define MAX_RANGE_SIZE 1024 + +#define AURORA_WAY_SIZE_SHIFT 2 + +#define AURORA_CTRL_FW 0x100 + +/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make + * the distinction between a number coming from hardware and a number + * coming from the device tree */ +#define AURORA_CACHE_ID 0x100 + +#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */ diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index db55d18691ed..6911b8b2745c 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -25,6 +25,7 @@ #include #include +#include "cache-aurora-l2.h" #define CACHE_LINE_SIZE 32 @@ -34,6 +35,10 @@ static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; + struct l2x0_regs l2x0_saved_regs; struct l2x0_of_data { @@ -170,7 +175,7 @@ static void l2x0_inv_all(void) /* invalidate all ways */ raw_spin_lock_irqsave(&l2x0_lock, flags); /* Invalidating when L2 is enabled is a nono */ - BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1); + BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN); writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); cache_sync(); @@ -294,11 +299,18 @@ static void l2x0_unlock(u32 cache_id) int lockregs; int i; - if (cache_id == L2X0_CACHE_ID_PART_L310) + switch (cache_id) { + case L2X0_CACHE_ID_PART_L310: lockregs = 8; - else + break; + case AURORA_CACHE_ID: + lockregs = 4; + break; + default: /* L210 and unknown types */ lockregs = 1; + break; + } for (i = 0; i < lockregs; i++) { writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + @@ -314,18 +326,22 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) u32 cache_id; u32 way_size = 0; int ways; + int way_size_shift = L2X0_WAY_SIZE_SHIFT; const char *type; l2x0_base = base; - - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); + if (cache_id_part_number_from_dt) + cache_id = cache_id_part_number_from_dt; + else + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID) + & L2X0_CACHE_ID_PART_MASK; aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; /* Determine the number of ways */ - switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + switch (cache_id) { case L2X0_CACHE_ID_PART_L310: if (aux & (1 << 16)) ways = 16; @@ -342,6 +358,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) ways = (aux >> 13) & 0xf; type = "L210"; break; + + case AURORA_CACHE_ID: + sync_reg_offset = AURORA_SYNC_REG; + ways = (aux >> 13) & 0xf; + ways = 2 << ((ways + 1) >> 2); + way_size_shift = AURORA_WAY_SIZE_SHIFT; + type = "Aurora"; + break; default: /* Assume unknown chips have 8 ways */ ways = 8; @@ -355,7 +379,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) * L2 cache Size = Way size * Number of ways */ way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; - way_size = 1 << (way_size + 3); + way_size = 1 << (way_size + way_size_shift); + l2x0_size = ways * way_size * SZ_1K; /* @@ -363,7 +388,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) * If you are booting from non-secure mode * accessing the below registers will fault. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { /* Make sure that I&D is not locked down when starting */ l2x0_unlock(cache_id); @@ -373,7 +398,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) l2x0_inv_all(); /* enable L2X0 */ - writel_relaxed(1, l2x0_base + L2X0_CTRL); + writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); } /* Re-read it in case some bits are reserved. */ @@ -398,6 +423,100 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) } #ifdef CONFIG_OF +static int l2_wt_override; + +/* + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (end > start + MAX_RANGE_SIZE) + end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (end > PAGE_ALIGN(start+1)) + end = PAGE_ALIGN(start+1); + + return end; +} + +/* + * Make sure 'start' and 'end' reference the same page, as L2 is PIPT + * and range operations only do a TLB lookup on the start address. + */ +static void aurora_pa_range(unsigned long start, unsigned long end, + unsigned long offset) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + writel(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG); + writel(end, l2x0_base + offset); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + cache_sync(); +} + +static void aurora_inv_range(unsigned long start, unsigned long end) +{ + /* + * round start and end adresses up to cache line size + */ + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + unsigned long range_end = calc_range_end(start, end); + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_INVAL_RANGE_REG); + start = range_end; + } +} + +static void aurora_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_CLEAN_RANGE_REG); + start = range_end; + } + } +} + +static void aurora_flush_range(unsigned long start, unsigned long end) +{ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_FLUSH_RANGE_REG); + start = range_end; + } + } +} + static void __init l2x0_of_setup(const struct device_node *np, u32 *aux_val, u32 *aux_mask) { @@ -495,9 +614,15 @@ static void __init pl310_save(void) } } +static void aurora_save(void) +{ + l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL); + l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} + static void l2x0_resume(void) { - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { /* restore aux ctrl and enable l2 */ l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID)); @@ -506,7 +631,7 @@ static void l2x0_resume(void) l2x0_inv_all(); - writel_relaxed(1, l2x0_base + L2X0_CTRL); + writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); } } @@ -514,7 +639,7 @@ static void pl310_resume(void) { u32 l2x0_revision; - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { /* restore pl310 setup */ writel_relaxed(l2x0_saved_regs.tag_latency, l2x0_base + L2X0_TAG_LATENCY_CTRL); @@ -540,6 +665,46 @@ static void pl310_resume(void) l2x0_resume(); } +static void aurora_resume(void) +{ + if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel(l2x0_saved_regs.aux_ctrl, l2x0_base + L2X0_AUX_CTRL); + writel(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL); + } +} + +static void __init aurora_broadcast_l2_commands(void) +{ + __u32 u; + /* Enable Broadcasting of cache commands to L2*/ + __asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u)); + u |= AURORA_CTRL_FW; /* Set the FW bit */ + __asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u)); + isb(); +} + +static void __init aurora_of_setup(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; + u32 mask = AURORA_ACR_REPLACEMENT_MASK; + + of_property_read_u32(np, "cache-id-part", + &cache_id_part_number_from_dt); + + /* Determine and save the write policy */ + l2_wt_override = of_property_read_bool(np, "wt-override"); + + if (l2_wt_override) { + val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; + mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + static const struct l2x0_of_data pl310_data = { .setup = pl310_of_setup, .save = pl310_save, @@ -571,10 +736,37 @@ static const struct l2x0_of_data l2x0_data = { }, }; +static const struct l2x0_of_data aurora_with_outer_data = { + .setup = aurora_of_setup, + .save = aurora_save, + .outer_cache = { + .resume = aurora_resume, + .inv_range = aurora_inv_range, + .clean_range = aurora_clean_range, + .flush_range = aurora_flush_range, + .sync = l2x0_cache_sync, + .flush_all = l2x0_flush_all, + .inv_all = l2x0_inv_all, + .disable = l2x0_disable, + }, +}; + +static const struct l2x0_of_data aurora_no_outer_data = { + .setup = aurora_of_setup, + .save = aurora_save, + .outer_cache = { + .resume = aurora_resume, + }, +}; + static const struct of_device_id l2x0_ids[] __initconst = { { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, { .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, + { .compatible = "marvell,aurora-system-cache", + .data = (void *)&aurora_no_outer_data}, + { .compatible = "marvell,aurora-outer-cache", + .data = (void *)&aurora_with_outer_data}, {} }; @@ -600,9 +792,14 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) data = of_match_node(l2x0_ids, np)->data; /* L2 configuration can only be changed if the cache is disabled */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { if (data->setup) data->setup(np, &aux_val, &aux_mask); + + /* For aurora cache in no outer mode select the + * correct mode using the coprocessor*/ + if (data == &aurora_no_outer_data) + aurora_broadcast_l2_commands(); } if (data->save) -- cgit v1.2.3-55-g7522 From 864aa04cd02979c2c755cb28b5f4fe56039171c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Sep 2012 19:18:35 +0100 Subject: ARM: mm: use pteval_t to represent page protection values When updating the page protection map after calculating the user_pgprot value, the base protection map is temporarily stored in an unsigned long type, causing truncation of the protection bits when LPAE is enabled. This effectively means that calls to mprotect() will corrupt the upper page attributes, clearing the XN bit unconditionally. This patch uses pteval_t to store the intermediate protection values, preserving the upper bits for 64-bit descriptors. Cc: stable@vger.kernel.org Acked-by: Nicolas Pitre Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 941dfb9e9a78..99b47b950efc 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -488,7 +488,7 @@ static void __init build_mem_type_table(void) #endif for (i = 0; i < 16; i++) { - unsigned long v = pgprot_val(protection_map[i]); + pteval_t v = pgprot_val(protection_map[i]); protection_map[i] = __pgprot(v | user_pgprot); } -- cgit v1.2.3-55-g7522 From 0cbbbad63179652272cc5e18a68d69bfc8dd25ce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 31 Aug 2012 00:57:03 +0100 Subject: ARM: mm: don't use the access flag permissions mechanism for classic MMU The simplified access permissions model is not used for the classic MMU translation regime, so ensure that it is turned off in the sctlr prior to turning on address translation for ARMv7. Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm/mm/proc-v7-2level.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index fd045e706390..e37600b91b25 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -161,11 +161,11 @@ ENDPROC(cpu_v7_set_pte_ext) * TFR EV X F I D LR S * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 1 0 110 0011 1100 .111 1101 < we want + * 01 0 110 0011 1100 .111 1101 < we want */ .align 2 .type v7_crval, #object v7_crval: - crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c + crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c .previous -- cgit v1.2.3-55-g7522 From dbf62d50067e55a782583fe53c3d2a3d98b1f6f3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Jul 2012 11:51:05 +0100 Subject: ARM: mm: introduce L_PTE_VALID for page table entries For long-descriptor translation table formats, the ARMv7 architecture defines the last two bits of the second- and third-level descriptors to be: x0b - Invalid 01b - Block (second-level), Reserved (third-level) 11b - Table (second-level), Page (third-level) This allows us to define L_PTE_PRESENT as (3 << 0) and use this value to create ptes directly. However, when determining whether a given pte value is present in the low-level page table accessors, we only need to check the least significant bit of the descriptor, allowing us to write faulting, present entries which are required for PROT_NONE mappings. This patch introduces L_PTE_VALID, which can be used to test whether a pte should fault, and updates the low-level page table accessors accordingly. Signed-off-by: Will Deacon --- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 3 ++- arch/arm/include/asm/pgtable.h | 4 +--- arch/arm/mm/proc-v7-2level.S | 2 +- arch/arm/mm/proc-v7-3level.S | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 2317a71c8f8e..c44a1ecfc28a 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -115,6 +115,7 @@ * The PTE table pointer refers to the hardware entries; the "Linux" * entries are stored 1024 bytes below. */ +#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index b24903549d1c..e32311a9abca 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -67,7 +67,8 @@ * These bits overlap with the hardware bits but the naming is preserved for * consistency with the classic page table format. */ -#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Valid */ +#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ +#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */ #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 08c12312a1f9..ccf34b6e9903 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -203,9 +203,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) #define pte_special(pte) (0) -#define pte_present_user(pte) \ - ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ - (L_PTE_PRESENT | L_PTE_USER)) +#define pte_present_user(pte) (pte_present(pte) && (pte_val(pte) & L_PTE_USER)) #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index e37600b91b25..e755e9f8d1b4 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -100,7 +100,7 @@ ENTRY(cpu_v7_set_pte_ext) orrne r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG - tstne r1, #L_PTE_PRESENT + tstne r1, #L_PTE_VALID moveq r3, #0 ARM( str r3, [r0, #2048]! ) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 8de0f1dd1549..d23d067e1904 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -65,7 +65,7 @@ ENDPROC(cpu_v7_switch_mm) */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - tst r2, #L_PTE_PRESENT + tst r2, #L_PTE_VALID beq 1f tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY -- cgit v1.2.3-55-g7522 From 26ffd0d43b186b0d5186354da8714a1c2d360df0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 1 Sep 2012 05:22:12 +0100 Subject: ARM: mm: introduce present, faulting entries for PAGE_NONE PROT_NONE mappings apply the page protection attributes defined by _P000 which translate to PAGE_NONE for ARM. These attributes specify an XN, RDONLY pte that is inaccessible to userspace. However, on kernels configured without support for domains, such a pte *is* accessible to the kernel and can be read via get_user, allowing tasks to read PROT_NONE pages via syscalls such as read/write over a pipe. This patch introduces a new software pte flag, L_PTE_NONE, that is set to identify faulting, present entries. Signed-off-by: Will Deacon --- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 1 + arch/arm/include/asm/pgtable.h | 6 +++--- arch/arm/mm/proc-macros.S | 4 ++++ arch/arm/mm/proc-v7-2level.S | 4 ++++ arch/arm/mm/proc-v7-3level.S | 3 +++ 6 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index c44a1ecfc28a..f97ee02386ee 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -124,6 +124,7 @@ #define L_PTE_USER (_AT(pteval_t, 1) << 8) #define L_PTE_XN (_AT(pteval_t, 1) << 9) #define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */ +#define L_PTE_NONE (_AT(pteval_t, 1) << 11) /* * These are the memory types, defined to be compatible with diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index e32311a9abca..a3f37929940a 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -77,6 +77,7 @@ #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ #define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */ #define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */ +#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */ /* * To be used in assembly code with the upper page attributes. diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index ccf34b6e9903..9c82f988c0e3 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -73,7 +73,7 @@ extern pgprot_t pgprot_kernel; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) -#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY) +#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY | L_PTE_NONE) #define PAGE_SHARED _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_user, L_PTE_USER) #define PAGE_COPY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -83,7 +83,7 @@ extern pgprot_t pgprot_kernel; #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel -#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) #define __PAGE_SHARED_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER) #define __PAGE_COPY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -240,7 +240,7 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER; + const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | L_PTE_NONE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index b29a2265af01..eb6aa73bc8b7 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -167,6 +167,10 @@ tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 +#ifndef CONFIG_CPU_USE_DOMAINS + tstne r1, #L_PTE_NONE + movne r3, #0 +#endif str r3, [r0] mcr p15, 0, r0, c7, c10, 1 @ flush_pte diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index e755e9f8d1b4..6d98c13ab827 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -101,6 +101,10 @@ ENTRY(cpu_v7_set_pte_ext) tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_VALID +#ifndef CONFIG_CPU_USE_DOMAINS + eorne r1, r1, #L_PTE_NONE + tstne r1, #L_PTE_NONE +#endif moveq r3, #0 ARM( str r3, [r0, #2048]! ) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index d23d067e1904..7b56386f9496 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -67,6 +67,9 @@ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU tst r2, #L_PTE_VALID beq 1f + tst r3, #1 << (57 - 32) @ L_PTE_NONE + bicne r2, #L_PTE_VALID + bne 1f tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY 1: strd r2, r3, [r0] -- cgit v1.2.3-55-g7522 From e40678559fdf3f56ce9a349365fbf39e1f63ecc0 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 8 Nov 2012 19:46:07 +0100 Subject: ARM: 7573/1: idmap: use flush_cache_louis() and flush TLBs only when necessary Flushing the cache is needed for the hardware to see the idmap table and therefore can be done at init time. On ARMv7 it is not necessary to flush L2 so flush_cache_louis() is used here instead. There is no point flushing the cache in setup_mm_for_reboot() as the caller should, and already is, taking care of this. If switching the memory map requires a cache flush, then cpu_switch_mm() already includes that operation. What is not done by cpu_switch_mm() on ASID capable CPUs is TLB flushing as the whole point of the ASID is to tag the TLBs and avoid flushing them on a context switch. Since we don't have a clean ASID for the identity mapping, we need to flush the TLB explicitly in that case. Otherwise this is already performed by cpu_switch_mm(). Signed-off-by: Nicolas Pitre Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/idmap.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index ab88ed4f8e08..99db769307ec 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -92,6 +92,9 @@ static int __init init_static_idmap(void) (long long)idmap_start, (long long)idmap_end); identity_mapping_add(idmap_pgd, idmap_start, idmap_end); + /* Flush L1 for the hardware to see this page table content */ + flush_cache_louis(); + return 0; } early_initcall(init_static_idmap); @@ -103,12 +106,15 @@ early_initcall(init_static_idmap); */ void setup_mm_for_reboot(void) { - /* Clean and invalidate L1. */ - flush_cache_all(); - /* Switch to the identity mapping. */ cpu_switch_mm(idmap_pgd, &init_mm); - /* Flush the TLB. */ +#ifdef CONFIG_CPU_HAS_ASID + /* + * We don't have a clean ASID for the identity mapping, which + * may clash with virtual addresses of the previous page tables + * and therefore potentially in the TLB. + */ local_flush_tlb_all(); +#endif } -- cgit v1.2.3-55-g7522 From 3e99675af1b25a191c467700499b1cbe5585a778 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 25 Nov 2012 03:24:32 +0100 Subject: ARM: 7582/2: rename kvm_seq to vmalloc_seq so to avoid confusion with KVM The kvm_seq value has nothing to do what so ever with this other KVM. Given that KVM support on ARM is imminent, it's best to rename kvm_seq into something else to clearly identify what it is about i.e. a sequence number for vmalloc section mappings. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/mmu.h | 2 +- arch/arm/include/asm/mmu_context.h | 6 +++--- arch/arm/mm/context.c | 4 ++-- arch/arm/mm/ioremap.c | 16 ++++++++-------- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 5b53b53ab5cf..9f77e7804f3b 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -7,7 +7,7 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID u64 id; #endif - unsigned int kvm_seq; + unsigned int vmalloc_seq; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index a64f61cb23d1..e1f644bc7cc5 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -20,7 +20,7 @@ #include #include -void __check_kvm_seq(struct mm_struct *mm); +void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID @@ -34,8 +34,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); static inline void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); if (irqs_disabled()) /* diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 7a27d7363be2..bc4a5e9ebb78 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -186,8 +186,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) unsigned long flags; unsigned int cpu = smp_processor_id(); - if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) - __check_kvm_seq(mm); + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); /* * Required during context switch to avoid speculative page table diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 5dcc2fd46c46..88fd86cf3d9a 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -47,18 +47,18 @@ int ioremap_page(unsigned long virt, unsigned long phys, } EXPORT_SYMBOL(ioremap_page); -void __check_kvm_seq(struct mm_struct *mm) +void __check_vmalloc_seq(struct mm_struct *mm) { unsigned int seq; do { - seq = init_mm.context.kvm_seq; + seq = init_mm.context.vmalloc_seq; memcpy(pgd_offset(mm, VMALLOC_START), pgd_offset_k(VMALLOC_START), sizeof(pgd_t) * (pgd_index(VMALLOC_END) - pgd_index(VMALLOC_START))); - mm->context.kvm_seq = seq; - } while (seq != init_mm.context.kvm_seq); + mm->context.vmalloc_seq = seq; + } while (seq != init_mm.context.vmalloc_seq); } #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) @@ -89,13 +89,13 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) if (!pmd_none(pmd)) { /* * Clear the PMD from the page table, and - * increment the kvm sequence so others + * increment the vmalloc sequence so others * notice this change. * * Note: this is still racy on SMP machines. */ pmd_clear(pmdp); - init_mm.context.kvm_seq++; + init_mm.context.vmalloc_seq++; /* * Free the page table, if there was one. @@ -112,8 +112,8 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) * Ensure that the active_mm is up to date - we want to * catch any use-after-iounmap cases. */ - if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq) - __check_kvm_seq(current->active_mm); + if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq) + __check_vmalloc_seq(current->active_mm); flush_tlb_kernel_range(virt, end); } -- cgit v1.2.3-55-g7522