From 57853e8906a04a86c32fb96d8421b923c6d64162 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jul 2015 18:19:38 +0100 Subject: ARM: 8403/1: kbuild: don't use generic mcs_spinlock.h header We provide our own implementation of asm/mcs_spinlock.h, so there's no need to ask for the (empty) generic version. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 83c50193626c..517ef6dd22b9 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h -generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h -- cgit v1.2.3-55-g7522 From f81309067ff2d84788316c513a415f6bb8c9171f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 1 Jun 2015 23:44:46 +0100 Subject: ARM: move heavy barrier support out of line The existing memory barrier macro causes a significant amount of code to be inserted inline at every call site. For example, in gpio_set_irq_type(), we have this for mb(): c0344c08: f57ff04e dsb st c0344c0c: e59f8190 ldr r8, [pc, #400] ; c0344da4 c0344c10: e3590004 cmp r9, #4 c0344c14: e5983014 ldr r3, [r8, #20] c0344c18: 0a000054 beq c0344d70 c0344c1c: e3530000 cmp r3, #0 c0344c20: 0a000004 beq c0344c38 c0344c24: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344c28: e50bc034 str ip, [fp, #-52] ; 0xffffffcc c0344c2c: e12fff33 blx r3 c0344c30: e51bc034 ldr ip, [fp, #-52] ; 0xffffffcc c0344c34: e51b2030 ldr r2, [fp, #-48] ; 0xffffffd0 c0344c38: e5963004 ldr r3, [r6, #4] Moving the outer_cache_sync() call out of line reduces the impact of the barrier: c0344968: f57ff04e dsb st c034496c: e35a0004 cmp sl, #4 c0344970: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344974: 0a000044 beq c0344a8c c0344978: ebf363dd bl c001d8f4 c034497c: e5953004 ldr r3, [r5, #4] This should reduce the cache footprint of this code. Overall, this results in a reduction of around 20K in the kernel size: text data bss dec hex filename 10773970 667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old 10754219 667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new Another advantage to this approach is that we can finally resolve the issue of SoCs which have their own memory barrier requirements within multiplatform kernels (such as OMAP.) Here, the bus interconnects need additional handling to ensure that writes become visible in the correct order (eg, between dma_map() operations, writes to DMA coherent memory, and MMIO accesses.) Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/include/asm/barrier.h | 12 +++++++++--- arch/arm/include/asm/outercache.h | 17 ----------------- arch/arm/kernel/irq.c | 1 + arch/arm/mach-mmp/pm-pxa910.c | 1 + arch/arm/mach-prima2/pm.c | 1 + arch/arm/mach-ux500/cache-l2x0.c | 1 + arch/arm/mm/Kconfig | 4 ++++ arch/arm/mm/flush.c | 11 +++++++++++ 8 files changed, 28 insertions(+), 20 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 6c2327e1c732..fea99b0e2087 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -2,7 +2,6 @@ #define __ASM_BARRIER_H #ifndef __ASSEMBLY__ -#include #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); @@ -37,12 +36,19 @@ #define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif +#ifdef CONFIG_ARM_HEAVY_MB +extern void arm_heavy_mb(void); +#define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0) +#else +#define __arm_heavy_mb(x...) dsb(x) +#endif + #ifdef CONFIG_ARCH_HAS_BARRIERS #include #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) -#define mb() do { dsb(); outer_sync(); } while (0) +#define mb() __arm_heavy_mb() #define rmb() dsb() -#define wmb() do { dsb(st); outer_sync(); } while (0) +#define wmb() __arm_heavy_mb(st) #define dma_rmb() dmb(osh) #define dma_wmb() dmb(oshst) #else diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h index 563b92fc2f41..c2bf24f40177 100644 --- a/arch/arm/include/asm/outercache.h +++ b/arch/arm/include/asm/outercache.h @@ -129,21 +129,4 @@ static inline void outer_resume(void) { } #endif -#ifdef CONFIG_OUTER_CACHE_SYNC -/** - * outer_sync - perform a sync point for outer cache - * - * Ensure that all outer cache operations are complete and any store - * buffers are drained. - */ -static inline void outer_sync(void) -{ - if (outer_cache.sync) - outer_cache.sync(); -} -#else -static inline void outer_sync(void) -{ } -#endif - #endif /* __ASM_OUTERCACHE_H */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 350f188c92d2..b96c8ed1723a 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c index 04c9daf9f8d7..7db5870d127f 100644 --- a/arch/arm/mach-mmp/pm-pxa910.c +++ b/arch/arm/mach-mmp/pm-pxa910.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c index d99d08eeb966..83e94c95e314 100644 --- a/arch/arm/mach-prima2/pm.c +++ b/arch/arm/mach-prima2/pm.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c index 7557bede7ae6..780bd13cd7e3 100644 --- a/arch/arm/mach-ux500/cache-l2x0.c +++ b/arch/arm/mach-ux500/cache-l2x0.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "db8500-regs.h" diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 7c6b976ab8d3..df7537f12469 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -883,6 +883,7 @@ config OUTER_CACHE config OUTER_CACHE_SYNC bool + select ARM_HEAVY_MB help The outer cache has a outer_cache_fns.sync function pointer that can be used to drain the write buffer of the outer cache. @@ -1031,6 +1032,9 @@ config ARCH_HAS_BARRIERS This option allows the use of custom mandatory barriers included via the mach/barriers.h file. +config ARM_HEAVY_MB + bool + config ARCH_SUPPORTS_BIG_ENDIAN bool help diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 34b66af516ea..ce6c2960d5ac 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -21,6 +21,17 @@ #include "mm.h" +#ifdef CONFIG_ARM_HEAVY_MB +void arm_heavy_mb(void) +{ +#ifdef CONFIG_OUTER_CACHE_SYNC + if (outer_cache.sync) + outer_cache.sync(); +#endif +} +EXPORT_SYMBOL(arm_heavy_mb); +#endif + #ifdef CONFIG_CPU_CACHE_VIPT static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) -- cgit v1.2.3-55-g7522 From 4e1f8a6f1d978f033f1751e2887b3a69fab3f878 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Jun 2015 13:10:16 +0100 Subject: ARM: add soc memory barrier extension Add an extension to the heavy barrier code to allow a SoC specific memory barrier function to be provided. This is needed for platforms where the interconnect has weak ordering, and thus needs assistance to ensure that memory writes are properly visible in the correct order to other parts of the system. Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/include/asm/barrier.h | 1 + arch/arm/mm/flush.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index fea99b0e2087..3d8f1d3ad9a7 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -37,6 +37,7 @@ #endif #ifdef CONFIG_ARM_HEAVY_MB +extern void (*soc_mb)(void); extern void arm_heavy_mb(void); #define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0) #else diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index ce6c2960d5ac..1ec8e7590fc6 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -22,12 +22,16 @@ #include "mm.h" #ifdef CONFIG_ARM_HEAVY_MB +void (*soc_mb)(void); + void arm_heavy_mb(void) { #ifdef CONFIG_OUTER_CACHE_SYNC if (outer_cache.sync) outer_cache.sync(); #endif + if (soc_mb) + soc_mb(); } EXPORT_SYMBOL(arm_heavy_mb); #endif -- cgit v1.2.3-55-g7522 From fa8ad7889d83bcf0a6cdbf6d3622f3ec019cde14 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Jul 2015 12:23:53 +0100 Subject: arm: perf: factor arm_pmu core out to drivers To enable sharing of the arm_pmu code with arm64, this patch factors it out to drivers/perf/. A new drivers/perf directory is added for performance monitor drivers to live under. MAINTAINERS is updated accordingly. Files added previously without a corresponsing MAINTAINERS update (perf_regs.c, perf_callchain.c, and perf_event.h) are also added. Cc: Arnaldo Carvalho de Melo Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Linus Walleij Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Signed-off-by: Mark Rutland [will: augmented Kconfig help slightly] Signed-off-by: Will Deacon --- MAINTAINERS | 6 +- arch/arm/Kconfig | 8 +- arch/arm/include/asm/pmu.h | 154 ------ arch/arm/kernel/Makefile | 3 +- arch/arm/kernel/perf_event.c | 921 ------------------------------------ arch/arm/kernel/perf_event_v6.c | 2 +- arch/arm/kernel/perf_event_v7.c | 2 +- arch/arm/kernel/perf_event_xscale.c | 2 +- arch/arm/mach-ux500/cpu-db8500.c | 2 +- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/perf/Kconfig | 15 + drivers/perf/Makefile | 1 + drivers/perf/arm_pmu.c | 921 ++++++++++++++++++++++++++++++++++++ include/linux/perf/arm_pmu.h | 154 ++++++ 15 files changed, 1105 insertions(+), 1089 deletions(-) delete mode 100644 arch/arm/include/asm/pmu.h delete mode 100644 arch/arm/kernel/perf_event.c create mode 100644 drivers/perf/Kconfig create mode 100644 drivers/perf/Makefile create mode 100644 drivers/perf/arm_pmu.c create mode 100644 include/linux/perf/arm_pmu.h (limited to 'arch/arm/include/asm') diff --git a/MAINTAINERS b/MAINTAINERS index fd6078443083..485c92ced47d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -799,11 +799,13 @@ F: arch/arm/include/asm/floppy.h ARM PMU PROFILING AND DEBUGGING M: Will Deacon S: Maintained -F: arch/arm/kernel/perf_event* +F: arch/arm/kernel/perf_* F: arch/arm/oprofile/common.c -F: arch/arm/include/asm/pmu.h F: arch/arm/kernel/hw_breakpoint.c F: arch/arm/include/asm/hw_breakpoint.h +F: arch/arm/include/asm/perf_event.h +F: drivers/perf/arm_pmu.c +F: include/linux/perf/arm_pmu.h ARM PORT M: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1c5021002fe4..4f7bc3d4b186 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1701,12 +1701,8 @@ config HIGHPTE user-space 2nd level page tables to reside in high memory. config HW_PERF_EVENTS - bool "Enable hardware performance counter support for perf events" - depends on PERF_EVENTS - default y - help - Enable hardware performance counter support for perf events. If - disabled, perf events will use software events only. + def_bool y + depends on ARM_PMU config SYS_SUPPORTS_HUGETLBFS def_bool y diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h deleted file mode 100644 index 3fc87dfd77e6..000000000000 --- a/arch/arm/include/asm/pmu.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * linux/arch/arm/include/asm/pmu.h - * - * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#ifndef __ARM_PMU_H__ -#define __ARM_PMU_H__ - -#include -#include - -#include - -/* - * struct arm_pmu_platdata - ARM PMU platform data - * - * @handle_irq: an optional handler which will be called from the - * interrupt and passed the address of the low level handler, - * and can be used to implement any platform specific handling - * before or after calling it. - */ -struct arm_pmu_platdata { - irqreturn_t (*handle_irq)(int irq, void *dev, - irq_handler_t pmu_handler); -}; - -#ifdef CONFIG_HW_PERF_EVENTS - -/* - * The ARMv7 CPU PMU supports up to 32 event counters. - */ -#define ARMPMU_MAX_HWEVENTS 32 - -#define HW_OP_UNSUPPORTED 0xFFFF -#define C(_x) PERF_COUNT_HW_CACHE_##_x -#define CACHE_OP_UNSUPPORTED 0xFFFF - -#define PERF_MAP_ALL_UNSUPPORTED \ - [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED - -#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ -[0 ... C(MAX) - 1] = { \ - [0 ... C(OP_MAX) - 1] = { \ - [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ - }, \ -} - -/* The events for a given PMU register set. */ -struct pmu_hw_events { - /* - * The events that are active on the PMU for the given index. - */ - struct perf_event *events[ARMPMU_MAX_HWEVENTS]; - - /* - * A 1 bit for an index indicates that the counter is being used for - * an event. A 0 means that the counter can be used. - */ - DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); - - /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; - - /* - * When using percpu IRQs, we need a percpu dev_id. Place it here as we - * already have to allocate this struct per cpu. - */ - struct arm_pmu *percpu_pmu; -}; - -struct arm_pmu { - struct pmu pmu; - cpumask_t active_irqs; - cpumask_t supported_cpus; - int *irq_affinity; - char *name; - irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct perf_event *event); - void (*disable)(struct perf_event *event); - int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct perf_event *event); - void (*clear_event_idx)(struct pmu_hw_events *hw_events, - struct perf_event *event); - int (*set_event_filter)(struct hw_perf_event *evt, - struct perf_event_attr *attr); - u32 (*read_counter)(struct perf_event *event); - void (*write_counter)(struct perf_event *event, u32 val); - void (*start)(struct arm_pmu *); - void (*stop)(struct arm_pmu *); - void (*reset)(void *); - int (*request_irq)(struct arm_pmu *, irq_handler_t handler); - void (*free_irq)(struct arm_pmu *); - int (*map_event)(struct perf_event *event); - int num_events; - atomic_t active_events; - struct mutex reserve_mutex; - u64 max_period; - struct platform_device *plat_device; - struct pmu_hw_events __percpu *hw_events; - struct notifier_block hotplug_nb; -}; - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -int armpmu_register(struct arm_pmu *armpmu, int type); - -u64 armpmu_event_update(struct perf_event *event); - -int armpmu_event_set_period(struct perf_event *event); - -int armpmu_map_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask); - -struct pmu_probe_info { - unsigned int cpuid; - unsigned int mask; - int (*init)(struct arm_pmu *); -}; - -#define PMU_PROBE(_cpuid, _mask, _fn) \ -{ \ - .cpuid = (_cpuid), \ - .mask = (_mask), \ - .init = (_fn), \ -} - -#define ARM_PMU_PROBE(_cpuid, _fn) \ - PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn) - -#define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK) - -#define XSCALE_PMU_PROBE(_version, _fn) \ - PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) - -int arm_pmu_device_probe(struct platform_device *pdev, - const struct of_device_id *of_table, - const struct pmu_probe_info *probe_table); - -#endif /* CONFIG_HW_PERF_EVENTS */ - -#endif /* __ARM_PMU_H__ */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index e69f7a19735d..fcb25c1c5c21 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -71,8 +71,7 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \ - perf_event_xscale.o perf_event_v6.o \ +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ perf_event_v7.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c deleted file mode 100644 index 1cb40651d783..000000000000 --- a/arch/arm/kernel/perf_event.c +++ /dev/null @@ -1,921 +0,0 @@ -#undef DEBUG - -/* - * ARM performance counter support. - * - * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * Copyright (C) 2010 ARM Ltd., Will Deacon - * - * This code is based on the sparc64 perf event code, which is in turn based - * on the x86 code. - */ -#define pr_fmt(fmt) "hw perfevents: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int -armpmu_map_cache_event(const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u64 config) -{ - unsigned int cache_type, cache_op, cache_result, ret; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; - - if (ret == CACHE_OP_UNSUPPORTED) - return -ENOENT; - - return ret; -} - -static int -armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) -{ - int mapping; - - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - - mapping = (*event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; -} - -static int -armpmu_map_raw_event(u32 raw_event_mask, u64 config) -{ - return (int)(config & raw_event_mask); -} - -int -armpmu_map_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask) -{ - u64 config = event->attr.config; - int type = event->attr.type; - - if (type == event->pmu->type) - return armpmu_map_raw_event(raw_event_mask, config); - - switch (type) { - case PERF_TYPE_HARDWARE: - return armpmu_map_hw_event(event_map, config); - case PERF_TYPE_HW_CACHE: - return armpmu_map_cache_event(cache_map, config); - case PERF_TYPE_RAW: - return armpmu_map_raw_event(raw_event_mask, config); - } - - return -ENOENT; -} - -int armpmu_event_set_period(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - s64 left = local64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - /* - * Limit the maximum period to prevent the counter value - * from overtaking the one we are about to program. In - * effect we are reducing max_period to account for - * interrupt latency (and we are being very conservative). - */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; - - local64_set(&hwc->prev_count, (u64)-left); - - armpmu->write_counter(event, (u64)(-left) & 0xffffffff); - - perf_event_update_userpage(event); - - return ret; -} - -u64 armpmu_event_update(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - u64 delta, prev_raw_count, new_raw_count; - -again: - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(event); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; - - local64_add(delta, &event->count); - local64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void -armpmu_read(struct perf_event *event) -{ - armpmu_event_update(event); -} - -static void -armpmu_stop(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to update the counter, so ignore - * PERF_EF_UPDATE, see comments in armpmu_start(). - */ - if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(event); - armpmu_event_update(event); - hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; - } -} - -static void armpmu_start(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to reprogram the period, so ignore - * PERF_EF_RELOAD, see the comment below. - */ - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - - hwc->state = 0; - /* - * Set the period again. Some counters can't be stopped, so when we - * were stopped we simply disabled the IRQ source and the counter - * may have been left counting. If we don't do this step then we may - * get an interrupt too soon or *way* too late if the overflow has - * happened since disabling. - */ - armpmu_event_set_period(event); - armpmu->enable(event); -} - -static void -armpmu_del(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - armpmu_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - if (armpmu->clear_event_idx) - armpmu->clear_event_idx(hw_events, event); - - perf_event_update_userpage(event); -} - -static int -armpmu_add(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - /* An event following a process won't be stopped earlier */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return -ENOENT; - - perf_pmu_disable(event->pmu); - - /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, event); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ - event->hw.idx = idx; - armpmu->disable(event); - hw_events->events[idx] = event; - - hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - if (flags & PERF_EF_START) - armpmu_start(event, PERF_EF_RELOAD); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - perf_pmu_enable(event->pmu); - return err; -} - -static int -validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, - struct perf_event *event) -{ - struct arm_pmu *armpmu; - - if (is_software_event(event)) - return 1; - - /* - * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The - * core perf code won't check that the pmu->ctx == leader->ctx - * until after pmu->event_init(event). - */ - if (event->pmu != pmu) - return 0; - - if (event->state < PERF_EVENT_STATE_OFF) - return 1; - - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - armpmu = to_arm_pmu(event->pmu); - return armpmu->get_event_idx(hw_events, event) >= 0; -} - -static int -validate_group(struct perf_event *event) -{ - struct perf_event *sibling, *leader = event->group_leader; - struct pmu_hw_events fake_pmu; - - /* - * Initialise the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); - - if (!validate_event(event->pmu, &fake_pmu, leader)) - return -EINVAL; - - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(event->pmu, &fake_pmu, sibling)) - return -EINVAL; - } - - if (!validate_event(event->pmu, &fake_pmu, event)) - return -EINVAL; - - return 0; -} - -static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) -{ - struct arm_pmu *armpmu; - struct platform_device *plat_device; - struct arm_pmu_platdata *plat; - int ret; - u64 start_clock, finish_clock; - - /* - * we request the IRQ with a (possibly percpu) struct arm_pmu**, but - * the handlers expect a struct arm_pmu*. The percpu_irq framework will - * do any necessary shifting, we just need to perform the first - * dereference. - */ - armpmu = *(void **)dev; - plat_device = armpmu->plat_device; - plat = dev_get_platdata(&plat_device->dev); - - start_clock = sched_clock(); - if (plat && plat->handle_irq) - ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); - else - ret = armpmu->handle_irq(irq, armpmu); - finish_clock = sched_clock(); - - perf_sample_event_took(finish_clock - start_clock); - return ret; -} - -static void -armpmu_release_hardware(struct arm_pmu *armpmu) -{ - armpmu->free_irq(armpmu); -} - -static int -armpmu_reserve_hardware(struct arm_pmu *armpmu) -{ - int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); - if (err) { - armpmu_release_hardware(armpmu); - return err; - } - - return 0; -} - -static void -hw_perf_event_destroy(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - atomic_t *active_events = &armpmu->active_events; - struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; - - if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { - armpmu_release_hardware(armpmu); - mutex_unlock(pmu_reserve_mutex); - } -} - -static int -event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; -} - -static int -__hw_perf_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int mapping; - - mapping = armpmu->map_event(event); - - if (mapping < 0) { - pr_debug("event %x:%llx not supported\n", event->attr.type, - event->attr.config); - return mapping; - } - - /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. For SMP systems, each core has it's own PMU so we can't do any - * clever allocation or constraints checking at this point. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* - * Check whether we need to exclude the counter from certain modes. - */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_debug("ARM performance counters do not support " - "mode exclusion\n"); - return -EOPNOTSUPP; - } - - /* - * Store the event encoding into the config_base field. - */ - hwc->config_base |= (unsigned long)mapping; - - if (!is_sampling_event(event)) { - /* - * For non-sampling runs, limit the sample_period to half - * of the counter width. That way, the new counter value - * is far less likely to overtake the previous one unless - * you have some serious IRQ latency issues. - */ - hwc->sample_period = armpmu->max_period >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; -} - -static int armpmu_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - int err = 0; - atomic_t *active_events = &armpmu->active_events; - - /* - * Reject CPU-affine events for CPUs that are of a different class to - * that which this PMU handles. Process-following events (where - * event->cpu == -1) can be migrated between CPUs, and thus we have to - * reject them later (in armpmu_add) if they're scheduled on a - * different class of CPU. - */ - if (event->cpu != -1 && - !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) - return -ENOENT; - - /* does not support taken branch sampling */ - if (has_branch_stack(event)) - return -EOPNOTSUPP; - - if (armpmu->map_event(event) == -ENOENT) - return -ENOENT; - - event->destroy = hw_perf_event_destroy; - - if (!atomic_inc_not_zero(active_events)) { - mutex_lock(&armpmu->reserve_mutex); - if (atomic_read(active_events) == 0) - err = armpmu_reserve_hardware(armpmu); - - if (!err) - atomic_inc(active_events); - mutex_unlock(&armpmu->reserve_mutex); - } - - if (err) - return err; - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err; -} - -static void armpmu_enable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); - - /* For task-bound events we may be called on other CPUs */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return; - - if (enabled) - armpmu->start(armpmu); -} - -static void armpmu_disable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - - /* For task-bound events we may be called on other CPUs */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return; - - armpmu->stop(armpmu); -} - -/* - * In heterogeneous systems, events are specific to a particular - * microarchitecture, and aren't suitable for another. Thus, only match CPUs of - * the same microarchitecture. - */ -static int armpmu_filter_match(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); -} - -static void armpmu_init(struct arm_pmu *armpmu) -{ - atomic_set(&armpmu->active_events, 0); - mutex_init(&armpmu->reserve_mutex); - - armpmu->pmu = (struct pmu) { - .pmu_enable = armpmu_enable, - .pmu_disable = armpmu_disable, - .event_init = armpmu_event_init, - .add = armpmu_add, - .del = armpmu_del, - .start = armpmu_start, - .stop = armpmu_stop, - .read = armpmu_read, - .filter_match = armpmu_filter_match, - }; -} - -int armpmu_register(struct arm_pmu *armpmu, int type) -{ - armpmu_init(armpmu); - pr_info("enabled with %s PMU driver, %d counters available\n", - armpmu->name, armpmu->num_events); - return perf_pmu_register(&armpmu->pmu, armpmu->name, type); -} - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *__oprofile_cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!__oprofile_cpu_pmu) - return NULL; - - return __oprofile_cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) -{ - int max_events = 0; - - if (__oprofile_cpu_pmu != NULL) - max_events = __oprofile_cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -static void cpu_pmu_enable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static void cpu_pmu_disable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - disable_percpu_irq(irq); -} - -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &hw_events->percpu_pmu); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } - } -} - -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) -{ - int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); - return 0; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); - } - } - - return 0; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - int cpu = (unsigned long)hcpu; - struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); - - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - - if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) - return NOTIFY_DONE; - - if (pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; - - return NOTIFY_OK; -} - -static int cpu_pmu_init(struct arm_pmu *cpu_pmu) -{ - int err; - int cpu; - struct pmu_hw_events __percpu *cpu_hw_events; - - cpu_hw_events = alloc_percpu(struct pmu_hw_events); - if (!cpu_hw_events) - return -ENOMEM; - - cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; - err = register_cpu_notifier(&cpu_pmu->hotplug_nb); - if (err) - goto out_hw_events; - - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); - events->percpu_pmu = cpu_pmu; - } - - cpu_pmu->hw_events = cpu_hw_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, - cpu_pmu, 1); - - /* If no interrupts available, set the corresponding capability flag */ - if (!platform_get_irq(cpu_pmu->plat_device, 0)) - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - - return 0; - -out_hw_events: - free_percpu(cpu_hw_events); - return err; -} - -static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) -{ - unregister_cpu_notifier(&cpu_pmu->hotplug_nb); - free_percpu(cpu_pmu->hw_events); -} - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct arm_pmu *pmu, - const struct pmu_probe_info *info) -{ - int cpu = get_cpu(); - unsigned int cpuid = read_cpuid_id(); - int ret = -ENODEV; - - pr_info("probing PMU on CPU %d\n", cpu); - - for (; info->init != NULL; info++) { - if ((cpuid & info->mask) != info->cpuid) - continue; - ret = info->init(pmu); - break; - } - - put_cpu(); - return ret; -} - -static int of_pmu_irq_cfg(struct arm_pmu *pmu) -{ - int *irqs, i = 0; - bool using_spi = false; - struct platform_device *pdev = pmu->plat_device; - - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - - do { - struct device_node *dn; - int cpu, irq; - - /* See if we have an affinity entry */ - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); - if (!dn) - break; - - /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ - irq = platform_get_irq(pdev, i); - if (irq >= 0) { - bool spi = !irq_is_percpu(irq); - - if (i > 0 && spi != using_spi) { - pr_err("PPI/SPI IRQ type mismatch for %s!\n", - dn->name); - kfree(irqs); - return -EINVAL; - } - - using_spi = spi; - } - - /* Now look up the logical CPU number */ - for_each_possible_cpu(cpu) - if (dn == of_cpu_device_node_get(cpu)) - break; - - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - of_node_put(dn); - cpumask_setall(&pmu->supported_cpus); - break; - } - of_node_put(dn); - - /* For SPIs, we need to track the affinity per IRQ */ - if (using_spi) { - if (i >= pdev->num_resources) { - of_node_put(dn); - break; - } - - irqs[i] = cpu; - } - - /* Keep track of the CPUs containing this PMU type */ - cpumask_set_cpu(cpu, &pmu->supported_cpus); - of_node_put(dn); - i++; - } while (1); - - /* If we didn't manage to parse anything, claim to support all CPUs */ - if (cpumask_weight(&pmu->supported_cpus) == 0) - cpumask_setall(&pmu->supported_cpus); - - /* If we matched up the IRQ affinities, use them to route the SPIs */ - if (using_spi && i == pdev->num_resources) - pmu->irq_affinity = irqs; - else - kfree(irqs); - - return 0; -} - -int arm_pmu_device_probe(struct platform_device *pdev, - const struct of_device_id *of_table, - const struct pmu_probe_info *probe_table) -{ - const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); - struct device_node *node = pdev->dev.of_node; - struct arm_pmu *pmu; - int ret = -ENODEV; - - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); - return -ENOMEM; - } - - if (!__oprofile_cpu_pmu) - __oprofile_cpu_pmu = pmu; - - pmu->plat_device = pdev; - - if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { - init_fn = of_id->data; - - ret = of_pmu_irq_cfg(pmu); - if (!ret) - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu, probe_table); - cpumask_setall(&pmu->supported_cpus); - } - - if (ret) { - pr_info("failed to probe PMU!\n"); - goto out_free; - } - - ret = cpu_pmu_init(pmu); - if (ret) - goto out_free; - - ret = armpmu_register(pmu, -1); - if (ret) - goto out_destroy; - - return 0; - -out_destroy: - cpu_pmu_destroy(pmu); -out_free: - pr_info("failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 09f83e414a72..09413e7b49aa 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -34,9 +34,9 @@ #include #include -#include #include +#include #include enum armv6_perf_types { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index f9b37f876e20..126dc679b230 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -21,11 +21,11 @@ #include #include #include -#include #include #include "../vfp/vfpinstr.h" #include +#include #include /* diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 304d056d5b25..aa0499e2eef7 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -16,9 +16,9 @@ #include #include -#include #include +#include #include enum xscale_perf_types { diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 16913800bbf9..5578dc1ab52b 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include "setup.h" diff --git a/drivers/Kconfig b/drivers/Kconfig index 6e973b8e3a3b..3497485f5eab 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -176,6 +176,8 @@ source "drivers/powercap/Kconfig" source "drivers/mcb/Kconfig" +source "drivers/perf/Kconfig" + source "drivers/ras/Kconfig" source "drivers/thunderbolt/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index b64b49f6e01b..f245f2291b8a 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -161,6 +161,7 @@ obj-$(CONFIG_NTB) += ntb/ obj-$(CONFIG_FMC) += fmc/ obj-$(CONFIG_POWERCAP) += powercap/ obj-$(CONFIG_MCB) += mcb/ +obj-$(CONFIG_PERF_EVENTS) += perf/ obj-$(CONFIG_RAS) += ras/ obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig new file mode 100644 index 000000000000..d9de36ee165d --- /dev/null +++ b/drivers/perf/Kconfig @@ -0,0 +1,15 @@ +# +# Performance Monitor Drivers +# + +menu "Performance monitor support" + +config ARM_PMU + depends on PERF_EVENTS && ARM + bool "ARM PMU framework" + default y + help + Say y if you want to use CPU performance monitors on ARM-based + systems. + +endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile new file mode 100644 index 000000000000..acd2397ded94 --- /dev/null +++ b/drivers/perf/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ARM_PMU) += arm_pmu.o diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c new file mode 100644 index 000000000000..2365a32a595e --- /dev/null +++ b/drivers/perf/arm_pmu.c @@ -0,0 +1,921 @@ +#undef DEBUG + +/* + * ARM performance counter support. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * Copyright (C) 2010 ARM Ltd., Will Deacon + * + * This code is based on the sparc64 perf event code, which is in turn based + * on the x86 code. + */ +#define pr_fmt(fmt) "hw perfevents: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static int +armpmu_map_cache_event(const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + + if (ret == CACHE_OP_UNSUPPORTED) + return -ENOENT; + + return ret; +} + +static int +armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +{ + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int +armpmu_map_raw_event(u32 raw_event_mask, u64 config) +{ + return (int)(config & raw_event_mask); +} + +int +armpmu_map_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask) +{ + u64 config = event->attr.config; + int type = event->attr.type; + + if (type == event->pmu->type) + return armpmu_map_raw_event(raw_event_mask, config); + + switch (type) { + case PERF_TYPE_HARDWARE: + return armpmu_map_hw_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return armpmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return armpmu_map_raw_event(raw_event_mask, config); + } + + return -ENOENT; +} + +int armpmu_event_set_period(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + /* + * Limit the maximum period to prevent the counter value + * from overtaking the one we are about to program. In + * effect we are reducing max_period to account for + * interrupt latency (and we are being very conservative). + */ + if (left > (armpmu->max_period >> 1)) + left = armpmu->max_period >> 1; + + local64_set(&hwc->prev_count, (u64)-left); + + armpmu->write_counter(event, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +u64 armpmu_event_update(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = armpmu->read_counter(event); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void +armpmu_read(struct perf_event *event) +{ + armpmu_event_update(event); +} + +static void +armpmu_stop(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(event); + armpmu_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static void armpmu_start(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + /* + * Set the period again. Some counters can't be stopped, so when we + * were stopped we simply disabled the IRQ source and the counter + * may have been left counting. If we don't do this step then we may + * get an interrupt too soon or *way* too late if the overflow has + * happened since disabling. + */ + armpmu_event_set_period(event); + armpmu->enable(event); +} + +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + armpmu_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + if (armpmu->clear_event_idx) + armpmu->clear_event_idx(hw_events, event); + + perf_event_update_userpage(event); +} + +static int +armpmu_add(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return -ENOENT; + + perf_pmu_disable(event->pmu); + + /* If we don't have a space for the counter then finish early. */ + idx = armpmu->get_event_idx(hw_events, event); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then make + * sure it is disabled. + */ + event->hw.idx = idx; + armpmu->disable(event); + hw_events->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static int +validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, + struct perf_event *event) +{ + struct arm_pmu *armpmu; + + if (is_software_event(event)) + return 1; + + /* + * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The + * core perf code won't check that the pmu->ctx == leader->ctx + * until after pmu->event_init(event). + */ + if (event->pmu != pmu) + return 0; + + if (event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) + return 1; + + armpmu = to_arm_pmu(event->pmu); + return armpmu->get_event_idx(hw_events, event) >= 0; +} + +static int +validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct pmu_hw_events fake_pmu; + + /* + * Initialise the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); + + if (!validate_event(event->pmu, &fake_pmu, leader)) + return -EINVAL; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(event->pmu, &fake_pmu, sibling)) + return -EINVAL; + } + + if (!validate_event(event->pmu, &fake_pmu, event)) + return -EINVAL; + + return 0; +} + +static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +{ + struct arm_pmu *armpmu; + struct platform_device *plat_device; + struct arm_pmu_platdata *plat; + int ret; + u64 start_clock, finish_clock; + + /* + * we request the IRQ with a (possibly percpu) struct arm_pmu**, but + * the handlers expect a struct arm_pmu*. The percpu_irq framework will + * do any necessary shifting, we just need to perform the first + * dereference. + */ + armpmu = *(void **)dev; + plat_device = armpmu->plat_device; + plat = dev_get_platdata(&plat_device->dev); + + start_clock = sched_clock(); + if (plat && plat->handle_irq) + ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); + else + ret = armpmu->handle_irq(irq, armpmu); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return ret; +} + +static void +armpmu_release_hardware(struct arm_pmu *armpmu) +{ + armpmu->free_irq(armpmu); +} + +static int +armpmu_reserve_hardware(struct arm_pmu *armpmu) +{ + int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); + if (err) { + armpmu_release_hardware(armpmu); + return err; + } + + return 0; +} + +static void +hw_perf_event_destroy(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + atomic_t *active_events = &armpmu->active_events; + struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; + + if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { + armpmu_release_hardware(armpmu); + mutex_unlock(pmu_reserve_mutex); + } +} + +static int +event_requires_mode_exclusion(struct perf_event_attr *attr) +{ + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv; +} + +static int +__hw_perf_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int mapping; + + mapping = armpmu->map_event(event); + + if (mapping < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapping; + } + + /* + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. For SMP systems, each core has it's own PMU so we can't do any + * clever allocation or constraints checking at this point. + */ + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!armpmu->set_event_filter || + armpmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_debug("ARM performance counters do not support " + "mode exclusion\n"); + return -EOPNOTSUPP; + } + + /* + * Store the event encoding into the config_base field. + */ + hwc->config_base |= (unsigned long)mapping; + + if (!is_sampling_event(event)) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + hwc->sample_period = armpmu->max_period >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + if (event->group_leader != event) { + if (validate_group(event) != 0) + return -EINVAL; + } + + return 0; +} + +static int armpmu_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + int err = 0; + atomic_t *active_events = &armpmu->active_events; + + /* + * Reject CPU-affine events for CPUs that are of a different class to + * that which this PMU handles. Process-following events (where + * event->cpu == -1) can be migrated between CPUs, and thus we have to + * reject them later (in armpmu_add) if they're scheduled on a + * different class of CPU. + */ + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) + return -ENOENT; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (armpmu->map_event(event) == -ENOENT) + return -ENOENT; + + event->destroy = hw_perf_event_destroy; + + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&armpmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = armpmu_reserve_hardware(armpmu); + + if (!err) + atomic_inc(active_events); + mutex_unlock(&armpmu->reserve_mutex); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static void armpmu_enable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + + if (enabled) + armpmu->start(armpmu); +} + +static void armpmu_disable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + + armpmu->stop(armpmu); +} + +/* + * In heterogeneous systems, events are specific to a particular + * microarchitecture, and aren't suitable for another. Thus, only match CPUs of + * the same microarchitecture. + */ +static int armpmu_filter_match(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + return cpumask_test_cpu(cpu, &armpmu->supported_cpus); +} + +static void armpmu_init(struct arm_pmu *armpmu) +{ + atomic_set(&armpmu->active_events, 0); + mutex_init(&armpmu->reserve_mutex); + + armpmu->pmu = (struct pmu) { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, + .filter_match = armpmu_filter_match, + }; +} + +int armpmu_register(struct arm_pmu *armpmu, int type) +{ + armpmu_init(armpmu); + pr_info("enabled with %s PMU driver, %d counters available\n", + armpmu->name, armpmu->num_events); + return perf_pmu_register(&armpmu->pmu, armpmu->name, type); +} + +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *__oprofile_cpu_pmu; + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!__oprofile_cpu_pmu) + return NULL; + + return __oprofile_cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &hw_events->percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int err; + int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); + raw_spin_lock_init(&events->pmu_lock); + events->percpu_pmu = cpu_pmu; + } + + cpu_pmu->hw_events = cpu_hw_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + + return 0; + +out_hw_events: + free_percpu(cpu_hw_events); + return err; +} + +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) +{ + int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + for (; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); + break; + } + + put_cpu(); + return ret; +} + +static int of_pmu_irq_cfg(struct arm_pmu *pmu) +{ + int *irqs, i = 0; + bool using_spi = false; + struct platform_device *pdev = pmu->plat_device; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + + do { + struct device_node *dn; + int cpu, irq; + + /* See if we have an affinity entry */ + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); + if (!dn) + break; + + /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ + irq = platform_get_irq(pdev, i); + if (irq >= 0) { + bool spi = !irq_is_percpu(irq); + + if (i > 0 && spi != using_spi) { + pr_err("PPI/SPI IRQ type mismatch for %s!\n", + dn->name); + kfree(irqs); + return -EINVAL; + } + + using_spi = spi; + } + + /* Now look up the logical CPU number */ + for_each_possible_cpu(cpu) + if (dn == of_cpu_device_node_get(cpu)) + break; + + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + of_node_put(dn); + cpumask_setall(&pmu->supported_cpus); + break; + } + of_node_put(dn); + + /* For SPIs, we need to track the affinity per IRQ */ + if (using_spi) { + if (i >= pdev->num_resources) { + of_node_put(dn); + break; + } + + irqs[i] = cpu; + } + + /* Keep track of the CPUs containing this PMU type */ + cpumask_set_cpu(cpu, &pmu->supported_cpus); + of_node_put(dn); + i++; + } while (1); + + /* If we didn't manage to parse anything, claim to support all CPUs */ + if (cpumask_weight(&pmu->supported_cpus) == 0) + cpumask_setall(&pmu->supported_cpus); + + /* If we matched up the IRQ affinities, use them to route the SPIs */ + if (using_spi && i == pdev->num_resources) + pmu->irq_affinity = irqs; + else + kfree(irqs); + + return 0; +} + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!\n"); + return -ENOMEM; + } + + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { + init_fn = of_id->data; + + ret = of_pmu_irq_cfg(pmu); + if (!ret) + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu, probe_table); + cpumask_setall(&pmu->supported_cpus); + } + + if (ret) { + pr_info("failed to probe PMU!\n"); + goto out_free; + } + + ret = cpu_pmu_init(pmu); + if (ret) + goto out_free; + + ret = armpmu_register(pmu, -1); + if (ret) + goto out_destroy; + + return 0; + +out_destroy: + cpu_pmu_destroy(pmu); +out_free: + pr_info("failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h new file mode 100644 index 000000000000..bfa673bb822d --- /dev/null +++ b/include/linux/perf/arm_pmu.h @@ -0,0 +1,154 @@ +/* + * linux/arch/arm/include/asm/pmu.h + * + * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __ARM_PMU_H__ +#define __ARM_PMU_H__ + +#include +#include + +#include + +/* + * struct arm_pmu_platdata - ARM PMU platform data + * + * @handle_irq: an optional handler which will be called from the + * interrupt and passed the address of the low level handler, + * and can be used to implement any platform specific handling + * before or after calling it. + */ +struct arm_pmu_platdata { + irqreturn_t (*handle_irq)(int irq, void *dev, + irq_handler_t pmu_handler); +}; + +#ifdef CONFIG_ARM_PMU + +/* + * The ARMv7 CPU PMU supports up to 32 event counters. + */ +#define ARMPMU_MAX_HWEVENTS 32 + +#define HW_OP_UNSUPPORTED 0xFFFF +#define C(_x) PERF_COUNT_HW_CACHE_##_x +#define CACHE_OP_UNSUPPORTED 0xFFFF + +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ + }, \ +} + +/* The events for a given PMU register set. */ +struct pmu_hw_events { + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event *events[ARMPMU_MAX_HWEVENTS]; + + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); + + /* + * Hardware lock to serialize accesses to PMU registers. Needed for the + * read/modify/write sequences. + */ + raw_spinlock_t pmu_lock; + + /* + * When using percpu IRQs, we need a percpu dev_id. Place it here as we + * already have to allocate this struct per cpu. + */ + struct arm_pmu *percpu_pmu; +}; + +struct arm_pmu { + struct pmu pmu; + cpumask_t active_irqs; + cpumask_t supported_cpus; + int *irq_affinity; + char *name; + irqreturn_t (*handle_irq)(int irq_num, void *dev); + void (*enable)(struct perf_event *event); + void (*disable)(struct perf_event *event); + int (*get_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); + void (*clear_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); + int (*set_event_filter)(struct hw_perf_event *evt, + struct perf_event_attr *attr); + u32 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u32 val); + void (*start)(struct arm_pmu *); + void (*stop)(struct arm_pmu *); + void (*reset)(void *); + int (*request_irq)(struct arm_pmu *, irq_handler_t handler); + void (*free_irq)(struct arm_pmu *); + int (*map_event)(struct perf_event *event); + int num_events; + atomic_t active_events; + struct mutex reserve_mutex; + u64 max_period; + struct platform_device *plat_device; + struct pmu_hw_events __percpu *hw_events; + struct notifier_block hotplug_nb; +}; + +#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + +int armpmu_register(struct arm_pmu *armpmu, int type); + +u64 armpmu_event_update(struct perf_event *event); + +int armpmu_event_set_period(struct perf_event *event); + +int armpmu_map_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask); + +struct pmu_probe_info { + unsigned int cpuid; + unsigned int mask; + int (*init)(struct arm_pmu *); +}; + +#define PMU_PROBE(_cpuid, _mask, _fn) \ +{ \ + .cpuid = (_cpuid), \ + .mask = (_mask), \ + .init = (_fn), \ +} + +#define ARM_PMU_PROBE(_cpuid, _fn) \ + PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn) + +#define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK) + +#define XSCALE_PMU_PROBE(_version, _fn) \ + PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table); + +#endif /* CONFIG_ARM_PMU */ + +#endif /* __ARM_PMU_H__ */ -- cgit v1.2.3-55-g7522 From 787047eea24a2443c366679ae6b5a3873a33b64e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 29 Jul 2015 00:34:48 +0100 Subject: ARM: 8392/3: smp: Only expose /sys/.../cpuX/online if hotpluggable Writes to /sys/.../cpuX/online fail if we determine the platform doesn't support hotplug for that CPU. Furthermore, if the cpu_die op isn't specified the system hangs when we try to offline a CPU and it comes right back online unexpectedly. Let's figure this stuff out before we make the sysfs nodes so that the online file doesn't even exist if it isn't (at least sometimes) possible to hotplug the CPU. Add a new 'cpu_can_disable' op and repoint all 'cpu_disable' implementations at it because all implementers use the op to indicate if a CPU can be hotplugged or not in a static fashion. With PSCI we may need to add a 'cpu_disable' op so that the secure OS can be migrated off the CPU we're trying to hotplug. In this case, the 'cpu_can_disable' op will indicate that all CPUs are hotpluggable by returning true, but the 'cpu_disable' op will make a PSCI migration call and occasionally fail, denying the hotplug of a CPU. This shouldn't be any worse than x86 where we may indicate that all CPUs are hotpluggable but occasionally we can't offline a CPU due to check_irq_vectors_for_cpu_disable() failing to find a CPU to move vectors to. Cc: Mark Rutland Cc: Nicolas Pitre Cc: Dave Martin Acked-by: Simon Horman [shmobile portion] Tested-by: Simon Horman Cc: Magnus Damm Cc: Tested-by: Tyler Baker Cc: Geert Uytterhoeven Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/common/mcpm_platsmp.c | 12 ++++-------- arch/arm/include/asm/smp.h | 1 + arch/arm/include/asm/smp_plat.h | 9 +++++++++ arch/arm/kernel/setup.c | 2 +- arch/arm/kernel/smp.c | 15 ++++++++++++++- arch/arm/mach-shmobile/common.h | 2 +- arch/arm/mach-shmobile/platsmp.c | 4 ++-- arch/arm/mach-shmobile/smp-r8a7790.c | 2 +- arch/arm/mach-shmobile/smp-r8a7791.c | 2 +- arch/arm/mach-shmobile/smp-sh73a0.c | 2 +- 10 files changed, 35 insertions(+), 16 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 92e54d7c6f46..2b25b6038f66 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -65,14 +65,10 @@ static int mcpm_cpu_kill(unsigned int cpu) return !mcpm_wait_for_cpu_powerdown(pcpu, pcluster); } -static int mcpm_cpu_disable(unsigned int cpu) +static bool mcpm_cpu_can_disable(unsigned int cpu) { - /* - * We assume all CPUs may be shut down. - * This would be the hook to use for eventual Secure - * OS migration requests as described in the PSCI spec. - */ - return 0; + /* We assume all CPUs may be shut down. */ + return true; } static void mcpm_cpu_die(unsigned int cpu) @@ -92,7 +88,7 @@ static struct smp_operations __initdata mcpm_smp_ops = { .smp_secondary_init = mcpm_secondary_init, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = mcpm_cpu_kill, - .cpu_disable = mcpm_cpu_disable, + .cpu_can_disable = mcpm_cpu_can_disable, .cpu_die = mcpm_cpu_die, #endif }; diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 2f3ac1ba6fb4..318ce89eeff7 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -105,6 +105,7 @@ struct smp_operations { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_kill)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); + bool (*cpu_can_disable)(unsigned int cpu); int (*cpu_disable)(unsigned int cpu); #endif #endif diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 993e5224d8f7..f9080717fc88 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -107,4 +107,13 @@ static inline u32 mpidr_hash_size(void) extern int platform_can_secondary_boot(void); extern int platform_can_cpu_hotplug(void); +#ifdef CONFIG_HOTPLUG_CPU +extern int platform_can_hotplug_cpu(unsigned int cpu); +#else +static inline int platform_can_hotplug_cpu(unsigned int cpu) +{ + return 0; +} +#endif + #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 36c18b73c1f4..6bbec6042052 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1015,7 +1015,7 @@ static int __init topology_init(void) for_each_possible_cpu(cpu) { struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu); - cpuinfo->cpu.hotpluggable = 1; + cpuinfo->cpu.hotpluggable = platform_can_hotplug_cpu(cpu); register_cpu(&cpuinfo->cpu, cpu); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 90dfbedfbfb8..3cd846f48eaf 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -175,13 +175,26 @@ static int platform_cpu_disable(unsigned int cpu) if (smp_ops.cpu_disable) return smp_ops.cpu_disable(cpu); + return 0; +} + +int platform_can_hotplug_cpu(unsigned int cpu) +{ + /* cpu_die must be specified to support hotplug */ + if (!smp_ops.cpu_die) + return 0; + + if (smp_ops.cpu_can_disable) + return smp_ops.cpu_can_disable(cpu); + /* * By default, allow disabling all CPUs except the first one, * since this is special on a lot of platforms, e.g. because * of clock tick interrupts. */ - return cpu == 0 ? -EPERM : 0; + return cpu != 0; } + /* * __cpu_disable runs on the processor to be shutdown. */ diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index 476092b86c6e..8d27ec546a35 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -13,7 +13,7 @@ extern void shmobile_smp_boot(void); extern void shmobile_smp_sleep(void); extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg); -extern int shmobile_smp_cpu_disable(unsigned int cpu); +extern bool shmobile_smp_cpu_can_disable(unsigned int cpu); extern void shmobile_boot_scu(void); extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus); extern void shmobile_smp_scu_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c index 3923e09e966d..b23378f3d7e1 100644 --- a/arch/arm/mach-shmobile/platsmp.c +++ b/arch/arm/mach-shmobile/platsmp.c @@ -31,8 +31,8 @@ void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg) } #ifdef CONFIG_HOTPLUG_CPU -int shmobile_smp_cpu_disable(unsigned int cpu) +bool shmobile_smp_cpu_can_disable(unsigned int cpu) { - return 0; /* Hotplug of any CPU is supported */ + return true; /* Hotplug of any CPU is supported */ } #endif diff --git a/arch/arm/mach-shmobile/smp-r8a7790.c b/arch/arm/mach-shmobile/smp-r8a7790.c index 930f45cbc08a..947e437cab68 100644 --- a/arch/arm/mach-shmobile/smp-r8a7790.c +++ b/arch/arm/mach-shmobile/smp-r8a7790.c @@ -64,7 +64,7 @@ struct smp_operations r8a7790_smp_ops __initdata = { .smp_prepare_cpus = r8a7790_smp_prepare_cpus, .smp_boot_secondary = shmobile_smp_apmu_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_apmu_cpu_die, .cpu_kill = shmobile_smp_apmu_cpu_kill, #endif diff --git a/arch/arm/mach-shmobile/smp-r8a7791.c b/arch/arm/mach-shmobile/smp-r8a7791.c index 5e2d1db79afa..b2508c0d276b 100644 --- a/arch/arm/mach-shmobile/smp-r8a7791.c +++ b/arch/arm/mach-shmobile/smp-r8a7791.c @@ -58,7 +58,7 @@ struct smp_operations r8a7791_smp_ops __initdata = { .smp_prepare_cpus = r8a7791_smp_prepare_cpus, .smp_boot_secondary = r8a7791_smp_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_apmu_cpu_die, .cpu_kill = shmobile_smp_apmu_cpu_kill, #endif diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c index 2106d6b76a06..ae7c764fd6b4 100644 --- a/arch/arm/mach-shmobile/smp-sh73a0.c +++ b/arch/arm/mach-shmobile/smp-sh73a0.c @@ -68,7 +68,7 @@ struct smp_operations sh73a0_smp_ops __initdata = { .smp_prepare_cpus = sh73a0_smp_prepare_cpus, .smp_boot_secondary = sh73a0_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_scu_cpu_die, .cpu_kill = shmobile_smp_scu_cpu_kill, #endif -- cgit v1.2.3-55-g7522 From 9ac87c5a0b19417f925dc61cac7198d872159a2c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 31 Jul 2015 11:28:54 +0100 Subject: ARM: 8407/1: switch_to: Remove finish_arch_switch Fold finish_arch_switch() into switch_to(), in preparation for the removal of the finish_arch_switch call from core sched code. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Russell King --- arch/arm/include/asm/switch_to.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index c99e259469f7..12ebfcc1d539 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -10,7 +10,9 @@ * CPU. */ #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) -#define finish_arch_switch(prev) dsb(ish) +#define __complete_pending_tlbi() dsb(ish) +#else +#define __complete_pending_tlbi() #endif /* @@ -22,6 +24,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ + __complete_pending_tlbi(); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) -- cgit v1.2.3-55-g7522 From 1234e3fda9aa24b2d650bbcd9ef09d5f6a12dc86 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 24 Jul 2015 09:10:55 +0100 Subject: ARM: reduce visibility of dmac_* functions The dmac_* functions are private to the ARM DMA API implementation, and should not be used by drivers. In order to discourage their use, remove their prototypes and macros from asm/*.h. We have to leave dmac_flush_range() behind as Exynos and MSM IOMMU code use these; once these sites are fixed, this can be moved also. Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 4 ---- arch/arm/include/asm/glue-cache.h | 2 -- arch/arm/mm/dma-mapping.c | 1 + arch/arm/mm/dma.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) create mode 100644 arch/arm/mm/dma.h (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 4812cda8fd17..c5230a44eeca 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -140,8 +140,6 @@ extern struct cpu_cache_fns cpu_cache; * is visible to DMA, or data written by DMA to system memory is * visible to the CPU. */ -#define dmac_map_area cpu_cache.dma_map_area -#define dmac_unmap_area cpu_cache.dma_unmap_area #define dmac_flush_range cpu_cache.dma_flush_range #else @@ -161,8 +159,6 @@ extern void __cpuc_flush_dcache_area(void *, size_t); * is visible to DMA, or data written by DMA to system memory is * visible to the CPU. */ -extern void dmac_map_area(const void *, size_t, int); -extern void dmac_unmap_area(const void *, size_t, int); extern void dmac_flush_range(const void *, const void *); #endif diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index a3c24cd5b7c8..cab07f69382d 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -158,8 +158,6 @@ static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { } #define __cpuc_coherent_user_range __glue(_CACHE,_coherent_user_range) #define __cpuc_flush_dcache_area __glue(_CACHE,_flush_kern_dcache_area) -#define dmac_map_area __glue(_CACHE,_dma_map_area) -#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) #define dmac_flush_range __glue(_CACHE,_dma_flush_range) #endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1ced8a0f7a52..5edf17cf043d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -39,6 +39,7 @@ #include #include +#include "dma.h" #include "mm.h" /* diff --git a/arch/arm/mm/dma.h b/arch/arm/mm/dma.h new file mode 100644 index 000000000000..70ea6852f94e --- /dev/null +++ b/arch/arm/mm/dma.h @@ -0,0 +1,32 @@ +#ifndef DMA_H +#define DMA_H + +#include + +#ifndef MULTI_CACHE +#define dmac_map_area __glue(_CACHE,_dma_map_area) +#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +extern void dmac_map_area(const void *, size_t, int); +extern void dmac_unmap_area(const void *, size_t, int); + +#else + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +#define dmac_map_area cpu_cache.dma_map_area +#define dmac_unmap_area cpu_cache.dma_unmap_area + +#endif + +#endif -- cgit v1.2.3-55-g7522 From be120397e7709d9d5ed88317a385ce864a2603bc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Jul 2015 15:46:19 +0100 Subject: ARM: migrate to common PSCI client code Now that the common PSCI client code has been factored out to drivers/firmware, and made safe for 32-bit use, move the 32-bit ARM code over to it. This results in a moderate reduction of duplicated lines, and will prevent further duplication as the PSCI client code is updated for PSCI 1.0 and beyond. The two legacy platform users of the PSCI invocation code are updated to account for interface changes. In both cases the power state parameter (which is constant) is now generated using macros, so that the pack/unpack logic can be killed in preparation for PSCI 1.0 power state changes. Signed-off-by: Mark Rutland Acked-by: Rob Herring Cc: Catalin Marinas Cc: Ashwin Chaugule Cc: Lorenzo Pieralisi Cc: Russell King Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm/Kconfig | 1 + arch/arm/include/asm/psci.h | 23 --- arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/psci.c | 299 -------------------------------------- arch/arm/kernel/psci_smp.c | 29 +++- arch/arm/kernel/setup.c | 3 +- arch/arm/mach-highbank/highbank.c | 2 +- arch/arm/mach-highbank/pm.c | 16 +- drivers/cpuidle/cpuidle-calxeda.c | 15 +- 9 files changed, 46 insertions(+), 344 deletions(-) delete mode 100644 arch/arm/kernel/psci.c (limited to 'arch/arm/include/asm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1c5021002fe4..f5cd68425d8d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1496,6 +1496,7 @@ config HOTPLUG_CPU config ARM_PSCI bool "Support for the ARM Power State Coordination Interface (PSCI)" depends on CPU_V7 + select ARM_PSCI_FW help Say Y here if you want Linux to communicate with system firmware implementing the PSCI specification for CPU-centric power diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index c25ef3ec6d1f..68ee3ce17b82 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -14,34 +14,11 @@ #ifndef __ASM_ARM_PSCI_H #define __ASM_ARM_PSCI_H -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 - -struct psci_power_state { - u16 id; - u8 type; - u8 affinity_level; -}; - -struct psci_operations { - int (*cpu_suspend)(struct psci_power_state state, - unsigned long entry_point); - int (*cpu_off)(struct psci_power_state state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); - int (*affinity_info)(unsigned long target_affinity, - unsigned long lowest_affinity_level); - int (*migrate_info_type)(void); -}; - -extern struct psci_operations psci_ops; extern struct smp_operations psci_smp_ops; #ifdef CONFIG_ARM_PSCI -int psci_init(void); bool psci_smp_available(void); #else -static inline int psci_init(void) { return 0; } static inline bool psci_smp_available(void) { return false; } #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index e69f7a19735d..3b995f5c524d 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -89,7 +89,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci.o psci-call.o +obj-y += psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c deleted file mode 100644 index f90fdf4ce7c7..000000000000 --- a/arch/arm/kernel/psci.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ - -#define pr_fmt(fmt) "psci: " fmt - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -struct psci_operations psci_ops; - -static int (*invoke_psci_fn)(u32, u32, u32, u32); -typedef int (*psci_initcall_t)(const struct device_node *); - -asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32); -asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32); - -enum psci_function { - PSCI_FN_CPU_SUSPEND, - PSCI_FN_CPU_ON, - PSCI_FN_CPU_OFF, - PSCI_FN_MIGRATE, - PSCI_FN_AFFINITY_INFO, - PSCI_FN_MIGRATE_INFO_TYPE, - PSCI_FN_MAX, -}; - -static u32 psci_function_id[PSCI_FN_MAX]; - -static int psci_to_linux_errno(int errno) -{ - switch (errno) { - case PSCI_RET_SUCCESS: - return 0; - case PSCI_RET_NOT_SUPPORTED: - return -EOPNOTSUPP; - case PSCI_RET_INVALID_PARAMS: - return -EINVAL; - case PSCI_RET_DENIED: - return -EPERM; - }; - - return -EINVAL; -} - -static u32 psci_power_state_pack(struct psci_power_state state) -{ - return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) - & PSCI_0_2_POWER_STATE_ID_MASK) | - ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) - & PSCI_0_2_POWER_STATE_TYPE_MASK) | - ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) - & PSCI_0_2_POWER_STATE_AFFL_MASK); -} - -static int psci_get_version(void) -{ - int err; - - err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); - return err; -} - -static int psci_cpu_suspend(struct psci_power_state state, - unsigned long entry_point) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_off(struct psci_power_state state) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_OFF]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_CPU_ON]; - err = invoke_psci_fn(fn, cpuid, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_migrate(unsigned long cpuid) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE]; - err = invoke_psci_fn(fn, cpuid, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_affinity_info(unsigned long target_affinity, - unsigned long lowest_affinity_level) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; - err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); - return err; -} - -static int psci_migrate_info_type(void) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; - err = invoke_psci_fn(fn, 0, 0, 0); - return err; -} - -static int get_set_conduit_method(struct device_node *np) -{ - const char *method; - - pr_info("probing for conduit method from DT.\n"); - - if (of_property_read_string(np, "method", &method)) { - pr_warn("missing \"method\" property\n"); - return -ENXIO; - } - - if (!strcmp("hvc", method)) { - invoke_psci_fn = __invoke_psci_fn_hvc; - } else if (!strcmp("smc", method)) { - invoke_psci_fn = __invoke_psci_fn_smc; - } else { - pr_warn("invalid \"method\" property: %s\n", method); - return -EINVAL; - } - return 0; -} - -static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); -} - -static void psci_sys_poweroff(void) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); -} - -/* - * PSCI Function IDs for v0.2+ are well defined so use - * standard values. - */ -static int psci_0_2_init(struct device_node *np) -{ - int err, ver; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - ver = psci_get_version(); - - if (ver == PSCI_RET_NOT_SUPPORTED) { - /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ - pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); - err = -EOPNOTSUPP; - goto out_put_node; - } else { - pr_info("PSCIv%d.%d detected in firmware.\n", - PSCI_VERSION_MAJOR(ver), - PSCI_VERSION_MINOR(ver)); - - if (PSCI_VERSION_MAJOR(ver) == 0 && - PSCI_VERSION_MINOR(ver) < 2) { - err = -EINVAL; - pr_err("Conflicting PSCI version detected.\n"); - goto out_put_node; - } - } - - pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND; - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON; - psci_ops.cpu_on = psci_cpu_on; - - psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE; - psci_ops.migrate = psci_migrate; - - psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO; - psci_ops.affinity_info = psci_affinity_info; - - psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = - PSCI_0_2_FN_MIGRATE_INFO_TYPE; - psci_ops.migrate_info_type = psci_migrate_info_type; - - arm_pm_restart = psci_sys_reset; - - pm_power_off = psci_sys_poweroff; - -out_put_node: - of_node_put(np); - return err; -} - -/* - * PSCI < v0.2 get PSCI Function IDs via DT. - */ -static int psci_0_1_init(struct device_node *np) -{ - u32 id; - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - pr_info("Using PSCI v0.1 Function IDs from DT\n"); - - if (!of_property_read_u32(np, "cpu_suspend", &id)) { - psci_function_id[PSCI_FN_CPU_SUSPEND] = id; - psci_ops.cpu_suspend = psci_cpu_suspend; - } - - if (!of_property_read_u32(np, "cpu_off", &id)) { - psci_function_id[PSCI_FN_CPU_OFF] = id; - psci_ops.cpu_off = psci_cpu_off; - } - - if (!of_property_read_u32(np, "cpu_on", &id)) { - psci_function_id[PSCI_FN_CPU_ON] = id; - psci_ops.cpu_on = psci_cpu_on; - } - - if (!of_property_read_u32(np, "migrate", &id)) { - psci_function_id[PSCI_FN_MIGRATE] = id; - psci_ops.migrate = psci_migrate; - } - -out_put_node: - of_node_put(np); - return err; -} - -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", .data = psci_0_1_init}, - { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - {}, -}; - -int __init psci_init(void) -{ - struct device_node *np; - const struct of_device_id *matched_np; - psci_initcall_t init_fn; - - np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - if (!np) - return -ENODEV; - - init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); -} diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 244aaddfbfda..61c04b02faeb 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -17,6 +17,8 @@ #include #include #include +#include + #include #include @@ -56,17 +58,29 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle) } #ifdef CONFIG_HOTPLUG_CPU +int psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + + /* Trusted OS will deny CPU_OFF */ + if (psci_tos_resident_on(cpu)) + return -EPERM; + + return 0; +} + void __ref psci_cpu_die(unsigned int cpu) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - }; + u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << + PSCI_0_2_POWER_STATE_TYPE_SHIFT; - if (psci_ops.cpu_off) - psci_ops.cpu_off(ps); + if (psci_ops.cpu_off) + psci_ops.cpu_off(state); - /* We should never return */ - panic("psci: cpu %d failed to shutdown\n", cpu); + /* We should never return */ + panic("psci: cpu %d failed to shutdown\n", cpu); } int __ref psci_cpu_kill(unsigned int cpu) @@ -109,6 +123,7 @@ bool __init psci_smp_available(void) struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = psci_cpu_disable, .cpu_die = psci_cpu_die, .cpu_kill = psci_cpu_kill, #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 36c18b73c1f4..9c38bd42f04b 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -972,7 +973,7 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); arm_dt_init_cpu_maps(); - psci_init(); + psci_dt_init(); xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index 231fba0d03e5..6050a14faee6 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -28,8 +28,8 @@ #include #include #include +#include -#include #include #include #include diff --git a/arch/arm/mach-highbank/pm.c b/arch/arm/mach-highbank/pm.c index 7f2bd85eb935..400311695548 100644 --- a/arch/arm/mach-highbank/pm.c +++ b/arch/arm/mach-highbank/pm.c @@ -16,19 +16,21 @@ #include #include +#include #include #include -#include + +#include + +#define HIGHBANK_SUSPEND_PARAM \ + ((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \ + (1 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \ + (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT)) static int highbank_suspend_finish(unsigned long val) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - .affinity_level = 1, - }; - - return psci_ops.cpu_suspend(ps, __pa(cpu_resume)); + return psci_ops.cpu_suspend(HIGHBANK_SUSPEND_PARAM, __pa(cpu_resume)); } static int highbank_pm_enter(suspend_state_t state) diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index c13feec89ea1..ea9728fde9b3 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -25,16 +25,21 @@ #include #include #include +#include + #include #include -#include + +#include + +#define CALXEDA_IDLE_PARAM \ + ((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \ + (0 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \ + (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT)) static int calxeda_idle_finish(unsigned long val) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - }; - return psci_ops.cpu_suspend(ps, __pa(cpu_resume)); + return psci_ops.cpu_suspend(CALXEDA_IDLE_PARAM, __pa(cpu_resume)); } static int calxeda_pwrdown_idle(struct cpuidle_device *dev, -- cgit v1.2.3-55-g7522 From efaa6e266ba70439da00e7f1c8a218e243ae140a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 24 Jul 2015 10:21:02 +0100 Subject: firmware: qcom_scm-32: replace open-coded call to __cpuc_flush_dcache_area() Rathe rthan directly accessing architecture internal functions, provide an "method"-centric wrapper for qcom_scm-32 to do what's necessary to ensure that the secure monitor can see the data. This is called "secure_flush_area" and ensures that the specified memory area is coherent across the secure boundary. Acked-by: Andy Gross Reviewed-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 17 +++++++++++++++++ drivers/firmware/qcom_scm-32.c | 4 +--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index c5230a44eeca..d5525bfc7e3e 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -502,4 +502,21 @@ static inline void set_kernel_text_ro(void) { } void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); +/** + * secure_flush_area - ensure coherency across the secure boundary + * @addr: virtual address + * @size: size of region + * + * Ensure that the specified area of memory is coherent across the secure + * boundary from the non-secure side. This is used when calling secure + * firmware where the secure firmware does not ensure coherency. + */ +static inline void secure_flush_area(const void *addr, size_t size) +{ + phys_addr_t phys = __pa(addr); + + __cpuc_flush_dcache_area((void *)addr, size); + outer_flush_range(phys, phys + size); +} + #endif diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index 1bd6f9c34331..29e6850665eb 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -24,7 +24,6 @@ #include #include -#include #include #include "qcom_scm.h" @@ -219,8 +218,7 @@ static int __qcom_scm_call(const struct qcom_scm_command *cmd) * Flush the command buffer so that the secure world sees * the correct data. */ - __cpuc_flush_dcache_area((void *)cmd, cmd->len); - outer_flush_range(cmd_addr, cmd_addr + cmd->len); + secure_flush_area(cmd, cmd->len); ret = smc(cmd_addr); if (ret < 0) -- cgit v1.2.3-55-g7522 From a5f4c561b3b19a9bc43a81da6382b0098ebbc1fb Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 13 Aug 2015 00:01:52 +0100 Subject: ARM: 8415/1: early fixmap support for earlycon Add early fixmap support, initially to support permanent, fixed mapping support for early console. A temporary, early pte is created which is migrated to a permanent mapping in paging_init. This is also needed since the attributes may change as the memory types are initialized. The 3MiB range of fixmap spans two pte tables, but currently only one pte is created for early fixmap support. Re-add FIX_KMAP_BEGIN to the index calculation in highmem.c since the index for kmap does not start at zero anymore. This reverts 4221e2e6b316 ("ARM: 8031/1: fixmap: remove FIX_KMAP_BEGIN and FIX_KMAP_END") to some extent. Cc: Mark Salter Cc: Kees Cook Cc: Laura Abbott Cc: Arnd Bergmann Cc: Ard Biesheuvel Signed-off-by: Rob Herring Signed-off-by: Stefan Agner Signed-off-by: Russell King --- arch/arm/Kconfig | 3 ++ arch/arm/include/asm/fixmap.h | 15 +++++++- arch/arm/kernel/setup.c | 4 ++ arch/arm/mm/highmem.c | 6 +-- arch/arm/mm/mmu.c | 88 +++++++++++++++++++++++++++++++++++++++---- 5 files changed, 105 insertions(+), 11 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a750c1425c3a..1bcda7cb2e04 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -188,6 +188,9 @@ config ARCH_HAS_ILOG2_U64 config ARCH_HAS_BANDGAP bool +config FIX_EARLYCON_MEM + def_bool y if MMU + config GENERIC_HWEIGHT bool default y diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 0415eae1df27..58cfe9f1a687 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -6,9 +6,13 @@ #define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) #include +#include enum fixed_addresses { - FIX_KMAP_BEGIN, + FIX_EARLYCON_MEM_BASE, + __end_of_permanent_fixed_addresses, + + FIX_KMAP_BEGIN = __end_of_permanent_fixed_addresses, FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, /* Support writing RO kernel text via kprobes, jump labels, etc. */ @@ -18,7 +22,16 @@ enum fixed_addresses { __end_of_fixed_addresses }; +#define FIXMAP_PAGE_COMMON (L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY) + +#define FIXMAP_PAGE_NORMAL (FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK) + +/* Used by set_fixmap_(io|nocache), both meant for mapping a device */ +#define FIXMAP_PAGE_IO (FIXMAP_PAGE_COMMON | L_PTE_MT_DEV_SHARED | L_PTE_SHARED) +#define FIXMAP_PAGE_NOCACHE FIXMAP_PAGE_IO + void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +void __init early_fixmap_init(void); #include diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 6bbec6042052..e2ecee6b70ca 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -954,6 +955,9 @@ void __init setup_arch(char **cmdline_p) strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; + if (IS_ENABLED(CONFIG_FIX_EARLYCON_MEM)) + early_fixmap_init(); + parse_early_param(); #ifdef CONFIG_MMU diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index ee8dfa793989..9df5f09585ca 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -79,7 +79,7 @@ void *kmap_atomic(struct page *page) type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM /* @@ -106,7 +106,7 @@ void __kunmap_atomic(void *kvaddr) if (kvaddr >= (void *)FIXADDR_START) { type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); @@ -138,7 +138,7 @@ void *kmap_atomic_pfn(unsigned long pfn) return page_address(page); type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_fixmap_pte(vaddr))); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6ca7d9aa896f..fb9e817d08bb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -357,6 +357,47 @@ const struct mem_type *get_mem_type(unsigned int type) } EXPORT_SYMBOL(get_mem_type); +static pte_t *(*pte_offset_fixmap)(pmd_t *dir, unsigned long addr); + +static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] + __aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata; + +static pte_t * __init pte_offset_early_fixmap(pmd_t *dir, unsigned long addr) +{ + return &bm_pte[pte_index(addr)]; +} + +static pte_t *pte_offset_late_fixmap(pmd_t *dir, unsigned long addr) +{ + return pte_offset_kernel(dir, addr); +} + +static inline pmd_t * __init fixmap_pmd(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + + return pmd; +} + +void __init early_fixmap_init(void) +{ + pmd_t *pmd; + + /* + * The early fixmap range spans multiple pmds, for which + * we are not prepared: + */ + BUILD_BUG_ON((__fix_to_virt(__end_of_permanent_fixed_addresses) >> PMD_SHIFT) + != FIXADDR_TOP >> PMD_SHIFT); + + pmd = fixmap_pmd(FIXADDR_TOP); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + pte_offset_fixmap = pte_offset_early_fixmap; +} + /* * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). * As a result, this can only be called with preemption disabled, as under @@ -365,7 +406,7 @@ EXPORT_SYMBOL(get_mem_type); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { unsigned long vaddr = __fix_to_virt(idx); - pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr); /* Make sure fixmap region does not exceed available allocation. */ BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) > @@ -855,7 +896,7 @@ static void __init create_mapping(struct map_desc *md) } if ((md->type == MT_DEVICE || md->type == MT_ROM) && - md->virtual >= PAGE_OFFSET && + md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", (long long)__pfn_to_phys((u64)md->pfn), md->virtual); @@ -1213,10 +1254,10 @@ void __init arm_mm_memblock_reserve(void) /* * Set up the device mappings. Since we clear out the page tables for all - * mappings above VMALLOC_START, we will remove any debug device mappings. - * This means you have to be careful how you debug this function, or any - * called function. This means you can't use any function or debugging - * method which may touch any device, otherwise the kernel _will_ crash. + * mappings above VMALLOC_START, except early fixmap, we might remove debug + * device mappings. This means earlycon can be used to debug this function + * Any other function or debugging method which may touch any device _will_ + * crash the kernel. */ static void __init devicemaps_init(const struct machine_desc *mdesc) { @@ -1231,7 +1272,10 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) early_trap_init(vectors); - for (addr = VMALLOC_START; addr; addr += PMD_SIZE) + /* + * Clear page table except top pmd used by early fixmaps + */ + for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); /* @@ -1483,6 +1527,35 @@ void __init early_paging_init(const struct machine_desc *mdesc) #endif +static void __init early_fixmap_shutdown(void) +{ + int i; + unsigned long va = fix_to_virt(__end_of_permanent_fixed_addresses - 1); + + pte_offset_fixmap = pte_offset_late_fixmap; + pmd_clear(fixmap_pmd(va)); + local_flush_tlb_kernel_page(va); + + for (i = 0; i < __end_of_permanent_fixed_addresses; i++) { + pte_t *pte; + struct map_desc map; + + map.virtual = fix_to_virt(i); + pte = pte_offset_early_fixmap(pmd_off_k(map.virtual), map.virtual); + + /* Only i/o device mappings are supported ATM */ + if (pte_none(*pte) || + (pte_val(*pte) & L_PTE_MT_MASK) != L_PTE_MT_DEV_SHARED) + continue; + + map.pfn = pte_pfn(*pte); + map.type = MT_DEVICE; + map.length = PAGE_SIZE; + + create_mapping(&map); + } +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. @@ -1495,6 +1568,7 @@ void __init paging_init(const struct machine_desc *mdesc) prepare_page_table(); map_lowmem(); dma_contiguous_remap(); + early_fixmap_shutdown(); devicemaps_init(mdesc); kmap_init(); tcm_init(); -- cgit v1.2.3-55-g7522 From 8901925d32232adb57ba1b4c26d0c0f9d521a78c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Aug 2015 03:59:52 +0100 Subject: ARM: 8417/1: refactor bitops functions with BIT_MASK() and BIT_WORD() Use BIT_MASK() and BIT_WORD() rather than hard-coding the size of the "long" type. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/include/asm/bitops.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index 56380995f4c3..e943e6cee254 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -35,9 +35,9 @@ static inline void ____atomic_set_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); *p |= mask; @@ -47,9 +47,9 @@ static inline void ____atomic_set_bit(unsigned int bit, volatile unsigned long * static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); *p &= ~mask; @@ -59,9 +59,9 @@ static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long static inline void ____atomic_change_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); *p ^= mask; @@ -73,9 +73,9 @@ ____atomic_test_and_set_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; unsigned int res; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); res = *p; @@ -90,9 +90,9 @@ ____atomic_test_and_clear_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; unsigned int res; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); res = *p; @@ -107,9 +107,9 @@ ____atomic_test_and_change_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; unsigned int res; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); res = *p; -- cgit v1.2.3-55-g7522 From 96231b2686b53f71838a335bdc404cb5285d1a01 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 18 Aug 2015 09:14:15 +0100 Subject: ARM: 8419/1: dma-mapping: harmonize definition of DMA_ERROR_CODE All architectures except arm that define DMA_ERROR_CODE are casting it to (dma_addr_t) - as it is always compared to dma_addr_t in arm as well this could be harmonized. Signed-off-by: Nicholas Mc Guire Acked-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/include/asm/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index b52101d37ec7..a68b9d8a71fe 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -14,7 +14,7 @@ #include #include -#define DMA_ERROR_CODE (~0) +#define DMA_ERROR_CODE (~(dma_addr_t)0x0) extern struct dma_map_ops arm_dma_ops; extern struct dma_map_ops arm_coherent_dma_ops; -- cgit v1.2.3-55-g7522 From 1eef5d2f1b461c120bcd82077edee5ec706ac53b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 21:23:48 +0100 Subject: ARM: domains: switch to keeping domain value in register Rather than modifying both the domain access control register and our per-thread copy, modify only the domain access control register, and use the per-thread copy to save and restore the register over context switches. We can also avoid the explicit initialisation of the init thread_info structure. This allows us to avoid needing to gain access to the thread information at the uaccess control sites. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 20 +++++++++++++++----- arch/arm/include/asm/thread_info.h | 3 --- arch/arm/kernel/entry-armv.S | 2 ++ arch/arm/kernel/process.c | 13 ++++++++++--- 4 files changed, 27 insertions(+), 11 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 6ddbe446425e..7f2941905714 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -59,6 +59,17 @@ #ifndef __ASSEMBLY__ +static inline unsigned int get_domain(void) +{ + unsigned int domain; + + asm( + "mrc p15, 0, %0, c3, c0 @ get domain" + : "=r" (domain)); + + return domain; +} + #ifdef CONFIG_CPU_USE_DOMAINS static inline void set_domain(unsigned val) { @@ -70,11 +81,10 @@ static inline void set_domain(unsigned val) #define modify_domain(dom,type) \ do { \ - struct thread_info *thread = current_thread_info(); \ - unsigned int domain = thread->cpu_domain; \ - domain &= ~domain_val(dom, DOMAIN_MANAGER); \ - thread->cpu_domain = domain | domain_val(dom, type); \ - set_domain(thread->cpu_domain); \ + unsigned int domain = get_domain(); \ + domain &= ~domain_val(dom, DOMAIN_MANAGER); \ + domain = domain | domain_val(dom, type); \ + set_domain(domain); \ } while (0) #else diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index bd32eded3e50..0a0aec410d8c 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -74,9 +74,6 @@ struct thread_info { .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .cpu_domain = domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_IO, DOMAIN_CLIENT), \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7dac3086e361..d19adcf6c580 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -770,6 +770,8 @@ ENTRY(__switch_to) ldr r4, [r2, #TI_TP_VALUE] ldr r5, [r2, #TI_TP_VALUE + 4] #ifdef CONFIG_CPU_USE_DOMAINS + mrc p15, 0, r6, c3, c0, 0 @ Get domain register + str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register ldr r6, [r2, #TI_CPU_DOMAIN] #endif switch_tls r1, r4, r5, r3, r7 diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f192a2a41719..e722f9b3c9b1 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -146,10 +146,9 @@ void __show_regs(struct pt_regs *regs) buf[0] = '\0'; #ifdef CONFIG_CPU_CP15_MMU { - unsigned int transbase, dac; + unsigned int transbase, dac = get_domain(); asm("mrc p15, 0, %0, c2, c0\n\t" - "mrc p15, 0, %1, c3, c0\n" - : "=r" (transbase), "=r" (dac)); + : "=r" (transbase)); snprintf(buf, sizeof(buf), " Table: %08x DAC: %08x", transbase, dac); } @@ -210,6 +209,14 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); + /* + * Copy the initial value of the domain access control register + * from the current thread: thread->addr_limit will have been + * copied from the current thread via setup_thread_stack() in + * kernel/fork.c + */ + thread->cpu_domain = get_domain(); + if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->ARM_r0 = 0; -- cgit v1.2.3-55-g7522 From 8e798706f7e9cd7f096aa194de90269dde83773e Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 22:36:24 +0100 Subject: ARM: domains: provide domain_mask() Provide a macro to generate the mask for a domain, rather than using domain_val(, DOMAIN_MANAGER) which won't work when CPU_USE_DOMAINS is turned off. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 7f2941905714..045b9b453bcd 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -55,7 +55,8 @@ #define DOMAIN_MANAGER 1 #endif -#define domain_val(dom,type) ((type) << (2*(dom))) +#define domain_mask(dom) ((3) << (2 * (dom))) +#define domain_val(dom,type) ((type) << (2 * (dom))) #ifndef __ASSEMBLY__ @@ -82,7 +83,7 @@ static inline void set_domain(unsigned val) #define modify_domain(dom,type) \ do { \ unsigned int domain = get_domain(); \ - domain &= ~domain_val(dom, DOMAIN_MANAGER); \ + domain &= ~domain_mask(dom); \ domain = domain | domain_val(dom, type); \ set_domain(domain); \ } while (0) -- cgit v1.2.3-55-g7522 From 0171356a7708af01ad3224702b7f0aaa5b7a1399 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:23:26 +0100 Subject: ARM: domains: move initial domain setting value to asm/domains.h Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 6 ++++++ arch/arm/kernel/head.S | 5 +---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 045b9b453bcd..4218f88e8f7e 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -58,6 +58,12 @@ #define domain_mask(dom) ((3) << (2 * (dom))) #define domain_val(dom,type) ((type) << (2 * (dom))) +#define DACR_INIT \ + (domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_IO, DOMAIN_CLIENT)) + #ifndef __ASSEMBLY__ static inline unsigned int get_domain(void) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index bd755d97e459..d56e5e9a9e1e 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -461,10 +461,7 @@ __enable_mmu: #ifdef CONFIG_ARM_LPAE mcrr p15, 0, r4, r5, c2 @ load TTBR0 #else - mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_IO, DOMAIN_CLIENT)) + mov r5, #DACR_INIT mcr p15, 0, r5, c3, c0, 0 @ load domain access register mcr p15, 0, r4, c2, c0, 0 @ load page table pointer #endif -- cgit v1.2.3-55-g7522 From 3c2aed5b28819564e1a07b4686bd89802bcc4d6b Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:30:16 +0100 Subject: ARM: domains: get rid of manager mode for user domain Since we switched to early trap initialisation in 94e5a85b3be0 ("ARM: earlier initialization of vectors page") we haven't been writing directly to the vectors page, and so there's no need for this domain to be in manager mode. Switch it to client mode. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 2 +- arch/arm/kernel/traps.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 4218f88e8f7e..08b601e69ddc 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -59,7 +59,7 @@ #define domain_val(dom,type) ((type) << (2 * (dom))) #define DACR_INIT \ - (domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ + (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT)) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d358226236f2..969f9d9e665f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -870,7 +870,6 @@ void __init early_trap_init(void *vectors_base) kuser_init(vectors_base); flush_icache_range(vectors, vectors + PAGE_SIZE * 2); - modify_domain(DOMAIN_USER, DOMAIN_CLIENT); #else /* ifndef CONFIG_CPU_V7M */ /* * on V7-M there is no need to copy the vector table to a dedicated -- cgit v1.2.3-55-g7522 From a02d8dfd54cdf3b1b0464ccc2c1c4afe2c003a35 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:38:31 +0100 Subject: ARM: domains: keep vectors in separate domain Keep the machine vectors in its own domain to avoid software based user access control from making the vector code inaccessible, and thereby deadlocking the machine. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 4 +++- arch/arm/include/asm/pgtable-2level-hwdef.h | 1 + arch/arm/mm/mmu.c | 4 ++-- arch/arm/mm/pgd.c | 10 ++++++++++ 4 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 08b601e69ddc..396a12e486fe 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -43,6 +43,7 @@ #define DOMAIN_USER 1 #define DOMAIN_IO 0 #endif +#define DOMAIN_VECTORS 3 /* * Domain types @@ -62,7 +63,8 @@ (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_IO, DOMAIN_CLIENT)) + domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index 5e68278e953e..d0131ee6f6af 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -23,6 +23,7 @@ #define PMD_PXNTABLE (_AT(pmdval_t, 1) << 2) /* v7 */ #define PMD_BIT4 (_AT(pmdval_t, 1) << 4) #define PMD_DOMAIN(x) (_AT(pmdval_t, (x)) << 5) +#define PMD_DOMAIN_MASK PMD_DOMAIN(0x0f) #define PMD_PROTECTION (_AT(pmdval_t, 1) << 9) /* v5 */ /* * - section diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6ca7d9aa896f..a016de248034 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -291,13 +291,13 @@ static struct mem_type mem_types[] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, + .domain = DOMAIN_VECTORS, }, [MT_HIGH_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_USER | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, + .domain = DOMAIN_VECTORS, }, [MT_MEMORY_RWX] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index a3681f11dd9f..e683db1b90a3 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -84,6 +84,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (!new_pte) goto no_pte; +#ifndef CONFIG_ARM_LPAE + /* + * Modify the PTE pointer to have the correct domain. This + * needs to be the vectors domain to avoid the low vectors + * being unmapped. + */ + pmd_val(*new_pmd) &= ~PMD_DOMAIN_MASK; + pmd_val(*new_pmd) |= PMD_DOMAIN(DOMAIN_VECTORS); +#endif + init_pud = pud_offset(init_pgd, 0); init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); -- cgit v1.2.3-55-g7522 From 1fb6755f16872ad256c18cce2830f9087502dffd Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:42:10 +0100 Subject: ARM: domains: remove DOMAIN_TABLE DOMAIN_TABLE is not used; in any case, it aliases to the kernel domain. Remove this definition. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 396a12e486fe..2be929549938 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -34,12 +34,10 @@ */ #ifndef CONFIG_IO_36 #define DOMAIN_KERNEL 0 -#define DOMAIN_TABLE 0 #define DOMAIN_USER 1 #define DOMAIN_IO 2 #else #define DOMAIN_KERNEL 2 -#define DOMAIN_TABLE 2 #define DOMAIN_USER 1 #define DOMAIN_IO 0 #endif @@ -62,7 +60,6 @@ #define DACR_INIT \ (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) -- cgit v1.2.3-55-g7522 From b64d1f66517a89b9b0f6bd0bca86b05a55a5e742 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 18 Aug 2015 23:06:25 +0100 Subject: ARM: uaccess: simplify user access assembly The user assembly for byte and word accesses was virtually identical. Rather than duplicating this, use a macro instead. Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/uaccess.h | 47 +++++++++++------------------------------- 1 file changed, 12 insertions(+), 35 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 74b17d09ef7a..4cf54ebe408a 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -311,9 +311,9 @@ do { \ (x) = (__typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_asm_byte(x, addr, err) \ +#define __get_user_asm(x, addr, err, instr) \ __asm__ __volatile__( \ - "1: " TUSER(ldrb) " %1,[%2],#0\n" \ + "1: " TUSER(instr) " %1, [%2], #0\n" \ "2:\n" \ " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ @@ -329,6 +329,9 @@ do { \ : "r" (addr), "i" (-EFAULT) \ : "cc") +#define __get_user_asm_byte(x, addr, err) \ + __get_user_asm(x, addr, err, ldrb) + #ifndef __ARMEB__ #define __get_user_asm_half(x, __gu_addr, err) \ ({ \ @@ -348,22 +351,7 @@ do { \ #endif #define __get_user_asm_word(x, addr, err) \ - __asm__ __volatile__( \ - "1: " TUSER(ldr) " %1,[%2],#0\n" \ - "2:\n" \ - " .pushsection .text.fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %0, %3\n" \ - " mov %1, #0\n" \ - " b 2b\n" \ - " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 3b\n" \ - " .popsection" \ - : "+r" (err), "=&r" (x) \ - : "r" (addr), "i" (-EFAULT) \ - : "cc") + __get_user_asm(x, addr, err, ldr) #define __put_user(x, ptr) \ ({ \ @@ -393,9 +381,9 @@ do { \ } \ } while (0) -#define __put_user_asm_byte(x, __pu_addr, err) \ +#define __put_user_asm(x, __pu_addr, err, instr) \ __asm__ __volatile__( \ - "1: " TUSER(strb) " %1,[%2],#0\n" \ + "1: " TUSER(instr) " %1, [%2], #0\n" \ "2:\n" \ " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ @@ -410,6 +398,9 @@ do { \ : "r" (x), "r" (__pu_addr), "i" (-EFAULT) \ : "cc") +#define __put_user_asm_byte(x, __pu_addr, err) \ + __put_user_asm(x, __pu_addr, err, strb) + #ifndef __ARMEB__ #define __put_user_asm_half(x, __pu_addr, err) \ ({ \ @@ -427,21 +418,7 @@ do { \ #endif #define __put_user_asm_word(x, __pu_addr, err) \ - __asm__ __volatile__( \ - "1: " TUSER(str) " %1,[%2],#0\n" \ - "2:\n" \ - " .pushsection .text.fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %0, %3\n" \ - " b 2b\n" \ - " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 3b\n" \ - " .popsection" \ - : "+r" (err) \ - : "r" (x), "r" (__pu_addr), "i" (-EFAULT) \ - : "cc") + __put_user_asm(x, __pu_addr, err, str) #ifndef __ARMEB__ #define __reg_oper0 "%R2" -- cgit v1.2.3-55-g7522 From 01e09a28167c338684606b70797422da3bbb6650 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 20 Aug 2015 14:22:48 +0100 Subject: ARM: entry: get rid of asm_trace_hardirqs_on_cond There's no need for this macro, it can use a default for the condition argument. Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 4abe57279c66..742495eb5526 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -116,7 +116,7 @@ #endif .endm - .macro asm_trace_hardirqs_on_cond, cond + .macro asm_trace_hardirqs_on, cond=al #if defined(CONFIG_TRACE_IRQFLAGS) /* * actually the registers should be pushed and pop'd conditionally, but @@ -128,10 +128,6 @@ #endif .endm - .macro asm_trace_hardirqs_on - asm_trace_hardirqs_on_cond al - .endm - .macro disable_irq disable_irq_notrace asm_trace_hardirqs_off @@ -173,7 +169,7 @@ .macro restore_irqs, oldcpsr tst \oldcpsr, #PSR_I_BIT - asm_trace_hardirqs_on_cond eq + asm_trace_hardirqs_on cond=eq restore_irqs_notrace \oldcpsr .endm -- cgit v1.2.3-55-g7522 From 3302caddf10ad50710dbb7a94ccbdb3ad5bf1412 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 20 Aug 2015 16:13:37 +0100 Subject: ARM: entry: efficiency cleanups Make the "fast" syscall return path fast again. The addition of IRQ tracing and context tracking has made this path grossly inefficient. We can do much better if these options are enabled if we save the syscall return code on the stack - we then don't need to save a bunch of registers around every single callout to C code. Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 16 +++++++--- arch/arm/include/asm/thread_info.h | 20 +++++-------- arch/arm/kernel/entry-common.S | 61 ++++++++++++++++++++++++++++---------- arch/arm/kernel/signal.c | 6 ++++ 4 files changed, 71 insertions(+), 32 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 742495eb5526..5a5504f90d5f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -108,29 +108,37 @@ .endm #endif - .macro asm_trace_hardirqs_off + .macro asm_trace_hardirqs_off, save=1 #if defined(CONFIG_TRACE_IRQFLAGS) + .if \save stmdb sp!, {r0-r3, ip, lr} + .endif bl trace_hardirqs_off + .if \save ldmia sp!, {r0-r3, ip, lr} + .endif #endif .endm - .macro asm_trace_hardirqs_on, cond=al + .macro asm_trace_hardirqs_on, cond=al, save=1 #if defined(CONFIG_TRACE_IRQFLAGS) /* * actually the registers should be pushed and pop'd conditionally, but * after bl the flags are certainly clobbered */ + .if \save stmdb sp!, {r0-r3, ip, lr} + .endif bl\cond trace_hardirqs_on + .if \save ldmia sp!, {r0-r3, ip, lr} + .endif #endif .endm - .macro disable_irq + .macro disable_irq, save=1 disable_irq_notrace - asm_trace_hardirqs_off + asm_trace_hardirqs_off \save .endm .macro enable_irq diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index bd32eded3e50..71e0ffcedf8e 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -136,22 +136,18 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, /* * thread information flags: - * TIF_SYSCALL_TRACE - syscall trace active - * TIF_SYSCAL_AUDIT - syscall auditing active - * TIF_SIGPENDING - signal pending - * TIF_NEED_RESCHED - rescheduling necessary - * TIF_NOTIFY_RESUME - callback before returning to user * TIF_USEDFPU - FPU was used by this task this quantum (SMP) * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED */ -#define TIF_SIGPENDING 0 -#define TIF_NEED_RESCHED 1 +#define TIF_SIGPENDING 0 /* signal pending */ +#define TIF_NEED_RESCHED 1 /* rescheduling necessary */ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ -#define TIF_UPROBE 7 -#define TIF_SYSCALL_TRACE 8 -#define TIF_SYSCALL_AUDIT 9 -#define TIF_SYSCALL_TRACEPOINT 10 -#define TIF_SECCOMP 11 /* seccomp syscall filtering active */ +#define TIF_UPROBE 3 /* breakpointed or singlestepping */ +#define TIF_SYSCALL_TRACE 4 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ +#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ + #define TIF_NOHZ 12 /* in adaptive nohz mode */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 92828a1dec80..dd3721d1185e 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -24,35 +24,55 @@ .align 5 +#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING)) /* - * This is the fast syscall return path. We do as little as - * possible here, and this includes saving r0 back into the SVC - * stack. + * This is the fast syscall return path. We do as little as possible here, + * such as avoiding writing r0 to the stack. We only use this path if we + * have tracing and context tracking disabled - the overheads from those + * features make this path too inefficient. */ ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) - disable_irq @ disable interrupts + disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK - bne __sys_trace_return - tst r1, #_TIF_WORK_MASK + tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK bne fast_work_pending - asm_trace_hardirqs_on /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr - ct_user_enter restore_user_regs fast = 1, offset = S_OFF UNWIND(.fnend ) +ENDPROC(ret_fast_syscall) -/* - * Ok, we need to do extra processing, enter the slow path. - */ + /* Ok, we need to do extra processing, enter the slow path. */ fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 -work_pending: + /* fall through to work_pending */ +#else +/* + * The "replacement" ret_fast_syscall for when tracing or context tracking + * is enabled. As we will need to call out to some C functions, we save + * r0 first to avoid needing to save registers around each C function call. + */ +ret_fast_syscall: + UNWIND(.fnstart ) + UNWIND(.cantunwind ) + str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 + disable_irq_notrace @ disable interrupts + ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing + tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + beq no_work_pending + UNWIND(.fnend ) +ENDPROC(ret_fast_syscall) + + /* Slower path - fall through to work_pending */ +#endif + + tst r1, #_TIF_SYSCALL_WORK + bne __sys_trace_return_nosave +slow_work_pending: mov r0, sp @ 'regs' mov r2, why @ 'syscall' bl do_work_pending @@ -64,16 +84,19 @@ work_pending: /* * "slow" syscall return path. "why" tells us if this was a real syscall. + * IRQs may be enabled here, so always disable them. Note that we use the + * "notrace" version to avoid calling into the tracing code unnecessarily. + * do_work_pending() will update this state if necessary. */ ENTRY(ret_to_user) ret_slow_syscall: - disable_irq @ disable interrupts + disable_irq_notrace @ disable interrupts ENTRY(ret_to_user_from_irq) ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK - bne work_pending + bne slow_work_pending no_work_pending: - asm_trace_hardirqs_on + asm_trace_hardirqs_on save = 0 /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr @@ -251,6 +274,12 @@ __sys_trace_return: bl syscall_trace_exit b ret_slow_syscall +__sys_trace_return_nosave: + asm_trace_hardirqs_off save=0 + mov r0, sp + bl syscall_trace_exit + b ret_slow_syscall + .align 5 #ifdef CONFIG_ALIGNMENT_TRAP .type __cr_alignment, #object diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 423663e23791..b6cda06b455f 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -562,6 +562,12 @@ static int do_signal(struct pt_regs *regs, int syscall) asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { + /* + * The assembly code enters us with IRQs off, but it hasn't + * informed the tracing code of that for efficiency reasons. + * Update the trace code with the current status. + */ + trace_hardirqs_off(); do { if (likely(thread_flags & _TIF_NEED_RESCHED)) { schedule(); -- cgit v1.2.3-55-g7522 From 3fba7e23f754a9a6e639b640fa2a393712ffe1b8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 11:02:28 +0100 Subject: ARM: uaccess: provide uaccess_save_and_enable() and uaccess_restore() Provide uaccess_save_and_enable() and uaccess_restore() to permit control of userspace visibility to the kernel, and hook these into the appropriate places in the kernel where we need to access userspace. Signed-off-by: Russell King --- arch/arm/include/asm/futex.h | 19 ++++++++-- arch/arm/include/asm/uaccess.h | 71 +++++++++++++++++++++++++++++++++++--- arch/arm/kernel/armksyms.c | 6 ++-- arch/arm/lib/clear_user.S | 6 ++-- arch/arm/lib/copy_from_user.S | 6 ++-- arch/arm/lib/copy_to_user.S | 6 ++-- arch/arm/lib/uaccess_with_memcpy.c | 4 +-- 7 files changed, 97 insertions(+), 21 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 5eed82809d82..6795368ad023 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -22,8 +22,11 @@ #ifdef CONFIG_SMP #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ +({ \ + unsigned int __ua_flags; \ smp_mb(); \ prefetchw(uaddr); \ + __ua_flags = uaccess_save_and_enable(); \ __asm__ __volatile__( \ "1: ldrex %1, [%3]\n" \ " " insn "\n" \ @@ -34,12 +37,15 @@ __futex_atomic_ex_table("%5") \ : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "cc", "memory"); \ + uaccess_restore(__ua_flags); \ +}) static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + unsigned int __ua_flags; int ret; u32 val; @@ -49,6 +55,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, smp_mb(); /* Prefetching cannot fault */ prefetchw(uaddr); + __ua_flags = uaccess_save_and_enable(); __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: ldrex %1, [%4]\n" " teq %1, %2\n" @@ -61,6 +68,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=&r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) : "cc", "memory"); + uaccess_restore(__ua_flags); smp_mb(); *uval = val; @@ -73,6 +81,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #include #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ +({ \ + unsigned int __ua_flags = uaccess_save_and_enable(); \ __asm__ __volatile__( \ "1: " TUSER(ldr) " %1, [%3]\n" \ " " insn "\n" \ @@ -81,12 +91,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __futex_atomic_ex_table("%5") \ : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "cc", "memory"); \ + uaccess_restore(__ua_flags); \ +}) static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + unsigned int __ua_flags; int ret = 0; u32 val; @@ -94,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; preempt_disable(); + __ua_flags = uaccess_save_and_enable(); __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" @@ -103,6 +117,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "+r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) : "cc", "memory"); + uaccess_restore(__ua_flags); *uval = val; preempt_enable(); diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 74b17d09ef7a..82880132f941 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -49,6 +49,21 @@ struct exception_table_entry extern int fixup_exception(struct pt_regs *regs); +/* + * These two functions allow hooking accesses to userspace to increase + * system integrity by ensuring that the kernel can not inadvertantly + * perform such accesses (eg, via list poison values) which could then + * be exploited for priviledge escalation. + */ +static inline unsigned int uaccess_save_and_enable(void) +{ + return 0; +} + +static inline void uaccess_restore(unsigned int flags) +{ +} + /* * These two are intentionally not defined anywhere - if the kernel * code generates any references to them, that's a bug. @@ -165,6 +180,7 @@ extern int __get_user_64t_4(void *); register typeof(x) __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ + unsigned int __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(__p))) { \ case 1: \ if (sizeof((x)) >= 8) \ @@ -192,6 +208,7 @@ extern int __get_user_64t_4(void *); break; \ default: __e = __get_user_bad(); break; \ } \ + uaccess_restore(__ua_flags); \ x = (typeof(*(p))) __r2; \ __e; \ }) @@ -224,6 +241,7 @@ extern int __put_user_8(void *, unsigned long long); register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ + unsigned int __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(__p))) { \ case 1: \ __put_user_x(__r2, __p, __e, __l, 1); \ @@ -239,6 +257,7 @@ extern int __put_user_8(void *, unsigned long long); break; \ default: __e = __put_user_bad(); break; \ } \ + uaccess_restore(__ua_flags); \ __e; \ }) @@ -300,14 +319,17 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ unsigned long __gu_val; \ + unsigned int __ua_flags; \ __chk_user_ptr(ptr); \ might_fault(); \ + __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(ptr))) { \ case 1: __get_user_asm_byte(__gu_val, __gu_addr, err); break; \ case 2: __get_user_asm_half(__gu_val, __gu_addr, err); break; \ case 4: __get_user_asm_word(__gu_val, __gu_addr, err); break; \ default: (__gu_val) = __get_user_bad(); \ } \ + uaccess_restore(__ua_flags); \ (x) = (__typeof__(*(ptr)))__gu_val; \ } while (0) @@ -381,9 +403,11 @@ do { \ #define __put_user_err(x, ptr, err) \ do { \ unsigned long __pu_addr = (unsigned long)(ptr); \ + unsigned int __ua_flags; \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ might_fault(); \ + __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(ptr))) { \ case 1: __put_user_asm_byte(__pu_val, __pu_addr, err); break; \ case 2: __put_user_asm_half(__pu_val, __pu_addr, err); break; \ @@ -391,6 +415,7 @@ do { \ case 8: __put_user_asm_dword(__pu_val, __pu_addr, err); break; \ default: __put_user_bad(); \ } \ + uaccess_restore(__ua_flags); \ } while (0) #define __put_user_asm_byte(x, __pu_addr, err) \ @@ -474,11 +499,46 @@ do { \ #ifdef CONFIG_MMU -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); -extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); -extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); -extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n); +extern unsigned long __must_check +arm_copy_from_user(void *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + unsigned int __ua_flags = uaccess_save_and_enable(); + n = arm_copy_from_user(to, from, n); + uaccess_restore(__ua_flags); + return n; +} + +extern unsigned long __must_check +arm_copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check +__copy_to_user_std(void __user *to, const void *from, unsigned long n); + +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + unsigned int __ua_flags = uaccess_save_and_enable(); + n = arm_copy_to_user(to, from, n); + uaccess_restore(__ua_flags); + return n; +} + +extern unsigned long __must_check +arm_clear_user(void __user *addr, unsigned long n); +extern unsigned long __must_check +__clear_user_std(void __user *addr, unsigned long n); + +static inline unsigned long __must_check +__clear_user(void __user *addr, unsigned long n) +{ + unsigned int __ua_flags = uaccess_save_and_enable(); + n = arm_clear_user(addr, n); + uaccess_restore(__ua_flags); + return n; +} + #else #define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0) #define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0) @@ -511,6 +571,7 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } +/* These are from lib/ code, and use __get_user() and friends */ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..a35d72d30b56 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -91,9 +91,9 @@ EXPORT_SYMBOL(__memzero); #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); -EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(arm_copy_from_user); +EXPORT_SYMBOL(arm_copy_to_user); +EXPORT_SYMBOL(arm_clear_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 1710fd7db2d5..970d6c043774 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -12,14 +12,14 @@ .text -/* Prototype: int __clear_user(void *addr, size_t sz) +/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz) * Purpose : clear some user memory * Params : addr - user memory address to clear * : sz - number of bytes to clear * Returns : number of bytes NOT cleared */ ENTRY(__clear_user_std) -WEAK(__clear_user) +WEAK(arm_clear_user) stmfd sp!, {r1, lr} mov r2, #0 cmp r1, #4 @@ -44,7 +44,7 @@ WEAK(__clear_user) USER( strnebt r2, [r0]) mov r0, #0 ldmfd sp!, {r1, pc} -ENDPROC(__clear_user) +ENDPROC(arm_clear_user) ENDPROC(__clear_user_std) .pushsection .text.fixup,"ax" diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 7a235b9952be..1512bebfbf1b 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -17,7 +17,7 @@ /* * Prototype: * - * size_t __copy_from_user(void *to, const void *from, size_t n) + * size_t arm_copy_from_user(void *to, const void *from, size_t n) * * Purpose: * @@ -89,11 +89,11 @@ .text -ENTRY(__copy_from_user) +ENTRY(arm_copy_from_user) #include "copy_template.S" -ENDPROC(__copy_from_user) +ENDPROC(arm_copy_from_user) .pushsection .fixup,"ax" .align 0 diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 9648b0675a3e..caf5019d8161 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -17,7 +17,7 @@ /* * Prototype: * - * size_t __copy_to_user(void *to, const void *from, size_t n) + * size_t arm_copy_to_user(void *to, const void *from, size_t n) * * Purpose: * @@ -93,11 +93,11 @@ .text ENTRY(__copy_to_user_std) -WEAK(__copy_to_user) +WEAK(arm_copy_to_user) #include "copy_template.S" -ENDPROC(__copy_to_user) +ENDPROC(arm_copy_to_user) ENDPROC(__copy_to_user_std) .pushsection .text.fixup,"ax" diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 3e58d710013c..77f020e75ccd 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -136,7 +136,7 @@ out: } unsigned long -__copy_to_user(void __user *to, const void *from, unsigned long n) +arm_copy_to_user(void __user *to, const void *from, unsigned long n) { /* * This test is stubbed out of the main function above to keep @@ -190,7 +190,7 @@ out: return n; } -unsigned long __clear_user(void __user *addr, unsigned long n) +unsigned long arm_clear_user(void __user *addr, unsigned long n) { /* See rational for this in __copy_to_user() above. */ if (n < 64) -- cgit v1.2.3-55-g7522 From 9205b797dbe519a629267ec8c5766cd973d35063 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 24 Aug 2015 21:49:30 +0100 Subject: ARM: 8421/1: smp: Collapse arch_cpu_idle_dead() into cpu_die() The only caller of cpu_die() on ARM is arch_cpu_idle_dead(), so let's simplify the code by renaming cpu_die() to arch_cpu_idle_dead(). While were here, drop the __ref annotation because __cpuinit is gone nowadays. Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/include/asm/smp.h | 1 - arch/arm/kernel/process.c | 7 ------- arch/arm/kernel/smp.c | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 318ce89eeff7..ef356659b4f4 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -74,7 +74,6 @@ extern void secondary_startup_arm(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); -extern void cpu_die(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f192a2a41719..358984b7f249 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -91,13 +91,6 @@ void arch_cpu_idle_exit(void) ledtrig_cpu(CPU_LED_IDLE_END); } -#ifdef CONFIG_HOTPLUG_CPU -void arch_cpu_idle_dead(void) -{ - cpu_die(); -} -#endif - void __show_regs(struct pt_regs *regs) { unsigned long flags; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 3cd846f48eaf..0aad7cdf2e58 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -266,7 +266,7 @@ void __cpu_die(unsigned int cpu) * of the other hotplug-cpu capable cores, so presumably coming * out of idle fixes this. */ -void __ref cpu_die(void) +void arch_cpu_idle_dead(void) { unsigned int cpu = smp_processor_id(); -- cgit v1.2.3-55-g7522 From 2190fed67ba6f3e8129513929f2395843645e928 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 20 Aug 2015 10:32:02 +0100 Subject: ARM: entry: provide uaccess assembly macro hooks Provide hooks into the kernel entry and exit paths to permit control of userspace visibility to the kernel. The intended use is: - on entry to kernel from user, uaccess_disable will be called to disable userspace visibility - on exit from kernel to user, uaccess_enable will be called to enable userspace visibility - on entry from a kernel exception, uaccess_save_and_disable will be called to save the current userspace visibility setting, and disable access - on exit from a kernel exception, uaccess_restore will be called to restore the userspace visibility as it was before the exception occurred. These hooks allows us to keep userspace visibility disabled for the vast majority of the kernel, except for localised regions where we want to explicitly access userspace. Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 17 +++++++++++++++++ arch/arm/kernel/entry-armv.S | 30 ++++++++++++++++++++++-------- arch/arm/kernel/entry-common.S | 2 ++ arch/arm/kernel/entry-header.S | 3 +++ arch/arm/mm/abort-ev4.S | 1 + arch/arm/mm/abort-ev5t.S | 1 + arch/arm/mm/abort-ev5tj.S | 1 + arch/arm/mm/abort-ev6.S | 7 ++++--- arch/arm/mm/abort-ev7.S | 1 + arch/arm/mm/abort-lv4t.S | 2 ++ arch/arm/mm/abort-macro.S | 1 + 11 files changed, 55 insertions(+), 11 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 4abe57279c66..a91177043467 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -445,6 +445,23 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .macro uaccess_disable, tmp, isb=1 + .endm + + .macro uaccess_enable, tmp, isb=1 + .endm + + .macro uaccess_save, tmp + .endm + + .macro uaccess_restore + .endm + + .macro uaccess_save_and_disable, tmp + uaccess_save \tmp + uaccess_disable \tmp + .endm + .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index d19adcf6c580..61f00a3f3047 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -149,10 +149,10 @@ ENDPROC(__und_invalid) #define SPFIX(code...) #endif - .macro svc_entry, stack_hole=0, trace=1 + .macro svc_entry, stack_hole=0, trace=1, uaccess=1 UNWIND(.fnstart ) UNWIND(.save {r0 - pc} ) - sub sp, sp, #(S_FRAME_SIZE + \stack_hole - 4) + sub sp, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4) #ifdef CONFIG_THUMB2_KERNEL SPFIX( str r0, [sp] ) @ temporarily saved SPFIX( mov r0, sp ) @@ -167,7 +167,7 @@ ENDPROC(__und_invalid) ldmia r0, {r3 - r5} add r7, sp, #S_SP - 4 @ here for interlock avoidance mov r6, #-1 @ "" "" "" "" - add r2, sp, #(S_FRAME_SIZE + \stack_hole - 4) + add r2, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4) SPFIX( addeq r2, r2, #4 ) str r3, [sp, #-4]! @ save the "real" r0 copied @ from the exception stack @@ -185,6 +185,11 @@ ENDPROC(__und_invalid) @ stmia r7, {r2 - r6} + uaccess_save r0 + .if \uaccess + uaccess_disable r0 + .endif + .if \trace #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -194,7 +199,7 @@ ENDPROC(__und_invalid) .align 5 __dabt_svc: - svc_entry + svc_entry uaccess=0 mov r2, sp dabt_helper THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR @@ -368,7 +373,7 @@ ENDPROC(__fiq_abt) #error "sizeof(struct pt_regs) must be a multiple of 8" #endif - .macro usr_entry, trace=1 + .macro usr_entry, trace=1, uaccess=1 UNWIND(.fnstart ) UNWIND(.cantunwind ) @ don't unwind the user space sub sp, sp, #S_FRAME_SIZE @@ -400,6 +405,10 @@ ENDPROC(__fiq_abt) ARM( stmdb r0, {sp, lr}^ ) THUMB( store_user_sp_lr r0, r1, S_SP - S_PC ) + .if \uaccess + uaccess_disable ip + .endif + @ Enable the alignment trap while in kernel mode ATRAP( teq r8, r7) ATRAP( mcrne p15, 0, r8, c1, c0, 0) @@ -435,7 +444,7 @@ ENDPROC(__fiq_abt) .align 5 __dabt_usr: - usr_entry + usr_entry uaccess=0 kuser_cmpxchg_check mov r2, sp dabt_helper @@ -458,7 +467,7 @@ ENDPROC(__irq_usr) .align 5 __und_usr: - usr_entry + usr_entry uaccess=0 mov r2, r4 mov r3, r5 @@ -484,6 +493,8 @@ __und_usr: 1: ldrt r0, [r4] ARM_BE8(rev r0, r0) @ little endian instruction + uaccess_disable ip + @ r0 = 32-bit ARM instruction which caused the exception @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction @@ -518,9 +529,10 @@ __und_usr_thumb: 2: ldrht r5, [r4] ARM_BE8(rev16 r5, r5) @ little endian instruction cmp r5, #0xe800 @ 32bit instruction if xx != 0 - blo __und_usr_fault_16 @ 16bit undefined instruction + blo __und_usr_fault_16_pan @ 16bit undefined instruction 3: ldrht r0, [r2] ARM_BE8(rev16 r0, r0) @ little endian instruction + uaccess_disable ip add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 @@ -715,6 +727,8 @@ ENDPROC(no_fp) __und_usr_fault_32: mov r1, #4 b 1f +__und_usr_fault_16_pan: + uaccess_disable ip __und_usr_fault_16: mov r1, #2 1: mov r0, sp diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 92828a1dec80..189154980703 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -173,6 +173,8 @@ ENTRY(vector_swi) USER( ldr scno, [lr, #-4] ) @ get SWI instruction #endif + uaccess_disable tbl + adr tbl, sys_call_table @ load syscall table pointer #if defined(CONFIG_OABI_COMPAT) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index d47b5161b029..0d22ad206d52 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -215,6 +215,7 @@ blne trace_hardirqs_off #endif .endif + uaccess_restore #ifndef CONFIG_THUMB2_KERNEL @ ARM mode SVC restore @@ -258,6 +259,7 @@ @ on the stack remains correct). @ .macro svc_exit_via_fiq + uaccess_restore #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r0, sp @@ -287,6 +289,7 @@ .macro restore_user_regs, fast = 0, offset = 0 + uaccess_enable r1, isb=0 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r2, sp diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S index 54473cd4aba9..b3b31e30cadd 100644 --- a/arch/arm/mm/abort-ev4.S +++ b/arch/arm/mm/abort-ev4.S @@ -19,6 +19,7 @@ ENTRY(v4_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR ldr r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable userspace access bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR tst r3, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S index c913031b79cc..a6a381a6caa5 100644 --- a/arch/arm/mm/abort-ev5t.S +++ b/arch/arm/mm/abort-ev5t.S @@ -21,6 +21,7 @@ ENTRY(v5t_early_abort) mrc p15, 0, r0, c6, c0, 0 @ get FAR do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable user access bic r1, r1, #1 << 11 @ clear bits 11 of FSR teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? beq do_DataAbort @ yes diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S index 1b80d71adb0f..00ab011bef58 100644 --- a/arch/arm/mm/abort-ev5tj.S +++ b/arch/arm/mm/abort-ev5tj.S @@ -24,6 +24,7 @@ ENTRY(v5tj_early_abort) bne do_DataAbort do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable userspace access teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? beq do_DataAbort @ yes tst r3, #1 << 20 @ L = 0 -> write diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 113704f30e9f..8801a15aa105 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -26,17 +26,18 @@ ENTRY(v6_early_abort) ldr ip, =0x4107b36 mrc p15, 0, r3, c0, c0, 0 @ get processor id teq ip, r3, lsr #4 @ r0 ARM1136? - bne do_DataAbort + bne 1f tst r5, #PSR_J_BIT @ Java? tsteq r5, #PSR_T_BIT @ Thumb? - bne do_DataAbort + bne 1f bic r1, r1, #1 << 11 @ clear bit 11 of FSR ldr r3, [r4] @ read aborted ARM instruction ARM_BE8(rev r3, r3) teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? - beq do_DataAbort @ yes + beq 1f @ yes tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. #endif +1: uaccess_disable ip @ disable userspace access b do_DataAbort diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index 4812ad054214..e8d0e08c227f 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -15,6 +15,7 @@ ENTRY(v7_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR + uaccess_disable ip @ disable userspace access /* * V6 code adjusts the returned DFSR. diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S index f3982580c273..6d8e8e3365d1 100644 --- a/arch/arm/mm/abort-lv4t.S +++ b/arch/arm/mm/abort-lv4t.S @@ -26,6 +26,7 @@ ENTRY(v4t_late_abort) #endif bne .data_thumb_abort ldr r8, [r4] @ read arm instruction + uaccess_disable ip @ disable userspace access tst r8, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. and r7, r8, #15 << 24 @@ -155,6 +156,7 @@ ENTRY(v4t_late_abort) .data_thumb_abort: ldrh r8, [r4] @ read instruction + uaccess_disable ip @ disable userspace access tst r8, #1 << 11 @ L = 1 -> write? orreq r1, r1, #1 << 8 @ yes and r7, r8, #15 << 12 diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S index 50d6c0a900b1..4509bee4e081 100644 --- a/arch/arm/mm/abort-macro.S +++ b/arch/arm/mm/abort-macro.S @@ -13,6 +13,7 @@ tst \psr, #PSR_T_BIT beq not_thumb ldrh \tmp, [\pc] @ Read aborted Thumb instruction + uaccess_disable ip @ disable userspace access and \tmp, \tmp, # 0xfe00 @ Mask opcode field cmp \tmp, # 0x5600 @ Is it ldrsb? orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes -- cgit v1.2.3-55-g7522 From a5e090acbf545c0a3b04080f8a488b17ec41fe02 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 20:40:41 +0100 Subject: ARM: software-based priviledged-no-access support Provide a software-based implementation of the priviledged no access support found in ARMv8.1. Userspace pages are mapped using a different domain number from the kernel and IO mappings. If we switch the user domain to "no access" when we enter the kernel, we can prevent the kernel from touching userspace. However, the kernel needs to be able to access userspace via the various user accessor functions. With the wrapping in the previous patch, we can temporarily enable access when the kernel needs user access, and re-disable it afterwards. This allows us to trap non-intended accesses to userspace, eg, caused by an inadvertent dereference of the LIST_POISON* values, which, with appropriate user mappings setup, can be made to succeed. This in turn can allow use-after-free bugs to be further exploited than would otherwise be possible. Signed-off-by: Russell King --- arch/arm/Kconfig | 15 +++++++++++++++ arch/arm/include/asm/assembler.h | 30 ++++++++++++++++++++++++++++++ arch/arm/include/asm/domain.h | 21 +++++++++++++++++++-- arch/arm/include/asm/uaccess.h | 14 ++++++++++++++ arch/arm/kernel/process.c | 36 ++++++++++++++++++++++++++++++------ arch/arm/kernel/swp_emulate.c | 3 +++ arch/arm/lib/csumpartialcopyuser.S | 14 ++++++++++++++ 7 files changed, 125 insertions(+), 8 deletions(-) (limited to 'arch/arm/include/asm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a750c1425c3a..e15d5ed4d5f1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1694,6 +1694,21 @@ config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM +config CPU_SW_DOMAIN_PAN + bool "Enable use of CPU domains to implement privileged no-access" + depends on MMU && !ARM_LPAE + default y + help + Increase kernel security by ensuring that normal kernel accesses + are unable to access userspace addresses. This can help prevent + use-after-free bugs becoming an exploitable privilege escalation + by ensuring that magic values (such as LIST_POISON) will always + fault when dereferenced. + + CPUs with low-vector mappings use a best-efforts implementation. + Their lower 1MB needs to remain accessible for the vectors, but + the remainder of userspace will become appropriately inaccessible. + config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" depends on PERF_EVENTS diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index a91177043467..3ae0eda5e64f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -446,15 +446,45 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .endm .macro uaccess_disable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register + .if \isb + instr_sync + .endif +#endif .endm .macro uaccess_enable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_ENABLE + mcr p15, 0, \tmp, c3, c0, 0 + .if \isb + instr_sync + .endif +#endif .endm .macro uaccess_save, tmp +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + mrc p15, 0, \tmp, c3, c0, 0 + str \tmp, [sp, #S_FRAME_SIZE] +#endif .endm .macro uaccess_restore +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + ldr r0, [sp, #S_FRAME_SIZE] + mcr p15, 0, r0, c3, c0, 0 +#endif .endm .macro uaccess_save_and_disable, tmp diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 2be929549938..e878129f2fee 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -57,11 +57,29 @@ #define domain_mask(dom) ((3) << (2 * (dom))) #define domain_val(dom,type) ((type) << (2 * (dom))) +#ifdef CONFIG_CPU_SW_DOMAIN_PAN +#define DACR_INIT \ + (domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \ + domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) +#else #define DACR_INIT \ (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) +#endif + +#define __DACR_DEFAULT \ + domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT) + +#define DACR_UACCESS_DISABLE \ + (__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_NOACCESS)) +#define DACR_UACCESS_ENABLE \ + (__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_CLIENT)) #ifndef __ASSEMBLY__ @@ -76,7 +94,6 @@ static inline unsigned int get_domain(void) return domain; } -#ifdef CONFIG_CPU_USE_DOMAINS static inline void set_domain(unsigned val) { asm volatile( @@ -85,6 +102,7 @@ static inline void set_domain(unsigned val) isb(); } +#ifdef CONFIG_CPU_USE_DOMAINS #define modify_domain(dom,type) \ do { \ unsigned int domain = get_domain(); \ @@ -94,7 +112,6 @@ static inline void set_domain(unsigned val) } while (0) #else -static inline void set_domain(unsigned val) { } static inline void modify_domain(unsigned dom, unsigned type) { } #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 82880132f941..01bae13b2cea 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -57,11 +57,25 @@ extern int fixup_exception(struct pt_regs *regs); */ static inline unsigned int uaccess_save_and_enable(void) { +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + unsigned int old_domain = get_domain(); + + /* Set the current domain access to permit user accesses */ + set_domain((old_domain & ~domain_mask(DOMAIN_USER)) | + domain_val(DOMAIN_USER, DOMAIN_CLIENT)); + + return old_domain; +#else return 0; +#endif } static inline void uaccess_restore(unsigned int flags) { +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* Restore the user access mask */ + set_domain(flags); +#endif } /* diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index e722f9b3c9b1..3f18098dfd08 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -129,12 +129,36 @@ void __show_regs(struct pt_regs *regs) buf[4] = '\0'; #ifndef CONFIG_CPU_V7M - printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n", - buf, interrupts_enabled(regs) ? "n" : "ff", - fast_interrupts_enabled(regs) ? "n" : "ff", - processor_modes[processor_mode(regs)], - isa_modes[isa_mode(regs)], - get_fs() == get_ds() ? "kernel" : "user"); + { + unsigned int domain = get_domain(); + const char *segment; + +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Get the domain register for the parent context. In user + * mode, we don't save the DACR, so lets use what it should + * be. For other modes, we place it after the pt_regs struct. + */ + if (user_mode(regs)) + domain = DACR_UACCESS_ENABLE; + else + domain = *(unsigned int *)(regs + 1); +#endif + + if ((domain & domain_mask(DOMAIN_USER)) == + domain_val(DOMAIN_USER, DOMAIN_NOACCESS)) + segment = "none"; + else if (get_fs() == get_ds()) + segment = "kernel"; + else + segment = "user"; + + printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n", + buf, interrupts_enabled(regs) ? "n" : "ff", + fast_interrupts_enabled(regs) ? "n" : "ff", + processor_modes[processor_mode(regs)], + isa_modes[isa_mode(regs)], segment); + } #else printk("xPSR: %08lx\n", regs->ARM_cpsr); #endif diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 1361756782c7..5b26e7efa9ea 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -141,11 +141,14 @@ static int emulate_swpX(unsigned int address, unsigned int *data, while (1) { unsigned long temp; + unsigned int __ua_flags; + __ua_flags = uaccess_save_and_enable(); if (type == TYPE_SWPB) __user_swpb_asm(*data, address, res, temp); else __user_swp_asm(*data, address, res, temp); + uaccess_restore(__ua_flags); if (likely(res != -EAGAIN) || signal_pending(current)) break; diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 1d0957e61f89..1712f132b80d 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -17,6 +17,19 @@ .text +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + .macro save_regs + mrc p15, 0, ip, c3, c0, 0 + stmfd sp!, {r1, r2, r4 - r8, ip, lr} + uaccess_enable ip + .endm + + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, ip, lr} + mcr p15, 0, ip, c3, c0, 0 + ret lr + .endm +#else .macro save_regs stmfd sp!, {r1, r2, r4 - r8, lr} .endm @@ -24,6 +37,7 @@ .macro load_regs ldmfd sp!, {r1, r2, r4 - r8, pc} .endm +#endif .macro load1b, reg1 ldrusr \reg1, r0, 1 -- cgit v1.2.3-55-g7522