From 57853e8906a04a86c32fb96d8421b923c6d64162 Mon Sep 17 00:00:00 2001
From: Will Deacon
Date: Tue, 7 Jul 2015 18:19:38 +0100
Subject: ARM: 8403/1: kbuild: don't use generic mcs_spinlock.h header

We provide our own implementation of asm/mcs_spinlock.h, so there's no
need to ask for the (empty) generic version.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/Kbuild | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 83c50193626c..517ef6dd22b9 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
-generic-y += mcs_spinlock.h
 generic-y += msgbuf.h
 generic-y += param.h
 generic-y += parport.h
-- 
cgit v1.2.3-55-g7522


From f81309067ff2d84788316c513a415f6bb8c9171f Mon Sep 17 00:00:00 2001
From: Russell King
Date: Mon, 1 Jun 2015 23:44:46 +0100
Subject: ARM: move heavy barrier support out of line

The existing memory barrier macro causes a significant amount of code
to be inserted inline at every call site.  For example, in
gpio_set_irq_type(), we have this for mb():

c0344c08:       f57ff04e        dsb     st
c0344c0c:       e59f8190        ldr     r8, [pc, #400]  ; c0344da4 <gpio_set_irq_type+0x230>
c0344c10:       e3590004        cmp     r9, #4
c0344c14:       e5983014        ldr     r3, [r8, #20]
c0344c18:       0a000054        beq     c0344d70 <gpio_set_irq_type+0x1fc>
c0344c1c:       e3530000        cmp     r3, #0
c0344c20:       0a000004        beq     c0344c38 <gpio_set_irq_type+0xc4>
c0344c24:       e50b2030        str     r2, [fp, #-48]  ; 0xffffffd0
c0344c28:       e50bc034        str     ip, [fp, #-52]  ; 0xffffffcc
c0344c2c:       e12fff33        blx     r3
c0344c30:       e51bc034        ldr     ip, [fp, #-52]  ; 0xffffffcc
c0344c34:       e51b2030        ldr     r2, [fp, #-48]  ; 0xffffffd0
c0344c38:       e5963004        ldr     r3, [r6, #4]

Moving the outer_cache_sync() call out of line reduces the impact of
the barrier:

c0344968:       f57ff04e        dsb     st
c034496c:       e35a0004        cmp     sl, #4
c0344970:       e50b2030        str     r2, [fp, #-48]  ; 0xffffffd0
c0344974:       0a000044        beq     c0344a8c <gpio_set_irq_type+0x1b8>
c0344978:       ebf363dd        bl      c001d8f4 <arm_heavy_mb>
c034497c:       e5953004        ldr     r3, [r5, #4]

This should reduce the cache footprint of this code.  Overall, this
results in a reduction of around 20K in the kernel size:

    text    data      bss      dec     hex filename
10773970  667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old
10754219  667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new

Another advantage to this approach is that we can finally resolve the
issue of SoCs which have their own memory barrier requirements within
multiplatform kernels (such as OMAP.)  Here, the bus interconnects
need additional handling to ensure that writes become visible in the
correct order (eg, between dma_map() operations, writes to DMA
coherent memory, and MMIO accesses.)

Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/barrier.h    | 12 +++++++++---
 arch/arm/include/asm/outercache.h | 17 -----------------
 arch/arm/kernel/irq.c             |  1 +
 arch/arm/mach-mmp/pm-pxa910.c     |  1 +
 arch/arm/mach-prima2/pm.c         |  1 +
 arch/arm/mach-ux500/cache-l2x0.c  |  1 +
 arch/arm/mm/Kconfig               |  4 ++++
 arch/arm/mm/flush.c               | 11 +++++++++++
 8 files changed, 28 insertions(+), 20 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 6c2327e1c732..fea99b0e2087 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -2,7 +2,6 @@
 #define __ASM_BARRIER_H
 
 #ifndef __ASSEMBLY__
-#include <asm/outercache.h>
 
 #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
 
@@ -37,12 +36,19 @@
 #define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
+#ifdef CONFIG_ARM_HEAVY_MB
+extern void arm_heavy_mb(void);
+#define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0)
+#else
+#define __arm_heavy_mb(x...) dsb(x)
+#endif
+
 #ifdef CONFIG_ARCH_HAS_BARRIERS
 #include <mach/barriers.h>
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
-#define mb()		do { dsb(); outer_sync(); } while (0)
+#define mb()		__arm_heavy_mb()
 #define rmb()		dsb()
-#define wmb()		do { dsb(st); outer_sync(); } while (0)
+#define wmb()		__arm_heavy_mb(st)
 #define dma_rmb()	dmb(osh)
 #define dma_wmb()	dmb(oshst)
 #else
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index 563b92fc2f41..c2bf24f40177 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -129,21 +129,4 @@ static inline void outer_resume(void) { }
 
 #endif
 
-#ifdef CONFIG_OUTER_CACHE_SYNC
-/**
- * outer_sync - perform a sync point for outer cache
- *
- * Ensure that all outer cache operations are complete and any store
- * buffers are drained.
- */
-static inline void outer_sync(void)
-{
-	if (outer_cache.sync)
-		outer_cache.sync();
-}
-#else
-static inline void outer_sync(void)
-{ }
-#endif
-
 #endif	/* __ASM_OUTERCACHE_H */
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 350f188c92d2..b96c8ed1723a 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -39,6 +39,7 @@
 #include <linux/export.h>
 
 #include <asm/hardware/cache-l2x0.h>
+#include <asm/outercache.h>
 #include <asm/exception.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c
index 04c9daf9f8d7..7db5870d127f 100644
--- a/arch/arm/mach-mmp/pm-pxa910.c
+++ b/arch/arm/mach-mmp/pm-pxa910.c
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <asm/mach-types.h>
+#include <asm/outercache.h>
 #include <mach/hardware.h>
 #include <mach/cputype.h>
 #include <mach/addr-map.h>
diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c
index d99d08eeb966..83e94c95e314 100644
--- a/arch/arm/mach-prima2/pm.c
+++ b/arch/arm/mach-prima2/pm.c
@@ -16,6 +16,7 @@
 #include <linux/of_platform.h>
 #include <linux/io.h>
 #include <linux/rtc/sirfsoc_rtciobrg.h>
+#include <asm/outercache.h>
 #include <asm/suspend.h>
 #include <asm/hardware/cache-l2x0.h>
 
diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c
index 7557bede7ae6..780bd13cd7e3 100644
--- a/arch/arm/mach-ux500/cache-l2x0.c
+++ b/arch/arm/mach-ux500/cache-l2x0.c
@@ -8,6 +8,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 
+#include <asm/outercache.h>
 #include <asm/hardware/cache-l2x0.h>
 
 #include "db8500-regs.h"
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 7c6b976ab8d3..df7537f12469 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -883,6 +883,7 @@ config OUTER_CACHE
 
 config OUTER_CACHE_SYNC
 	bool
+	select ARM_HEAVY_MB
 	help
 	  The outer cache has a outer_cache_fns.sync function pointer
 	  that can be used to drain the write buffer of the outer cache.
@@ -1031,6 +1032,9 @@ config ARCH_HAS_BARRIERS
 	  This option allows the use of custom mandatory barriers
 	  included via the mach/barriers.h file.
 
+config ARM_HEAVY_MB
+	bool
+
 config ARCH_SUPPORTS_BIG_ENDIAN
 	bool
 	help
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 34b66af516ea..ce6c2960d5ac 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -21,6 +21,17 @@
 
 #include "mm.h"
 
+#ifdef CONFIG_ARM_HEAVY_MB
+void arm_heavy_mb(void)
+{
+#ifdef CONFIG_OUTER_CACHE_SYNC
+	if (outer_cache.sync)
+		outer_cache.sync();
+#endif
+}
+EXPORT_SYMBOL(arm_heavy_mb);
+#endif
+
 #ifdef CONFIG_CPU_CACHE_VIPT
 
 static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
-- 
cgit v1.2.3-55-g7522


From 4e1f8a6f1d978f033f1751e2887b3a69fab3f878 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Wed, 3 Jun 2015 13:10:16 +0100
Subject: ARM: add soc memory barrier extension

Add an extension to the heavy barrier code to allow a SoC specific
memory barrier function to be provided.  This is needed for platforms
where the interconnect has weak ordering, and thus needs assistance
to ensure that memory writes are properly visible in the correct order
to other parts of the system.

Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/barrier.h | 1 +
 arch/arm/mm/flush.c            | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index fea99b0e2087..3d8f1d3ad9a7 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -37,6 +37,7 @@
 #endif
 
 #ifdef CONFIG_ARM_HEAVY_MB
+extern void (*soc_mb)(void);
 extern void arm_heavy_mb(void);
 #define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0)
 #else
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index ce6c2960d5ac..1ec8e7590fc6 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -22,12 +22,16 @@
 #include "mm.h"
 
 #ifdef CONFIG_ARM_HEAVY_MB
+void (*soc_mb)(void);
+
 void arm_heavy_mb(void)
 {
 #ifdef CONFIG_OUTER_CACHE_SYNC
 	if (outer_cache.sync)
 		outer_cache.sync();
 #endif
+	if (soc_mb)
+		soc_mb();
 }
 EXPORT_SYMBOL(arm_heavy_mb);
 #endif
-- 
cgit v1.2.3-55-g7522


From fa8ad7889d83bcf0a6cdbf6d3622f3ec019cde14 Mon Sep 17 00:00:00 2001
From: Mark Rutland
Date: Mon, 6 Jul 2015 12:23:53 +0100
Subject: arm: perf: factor arm_pmu core out to drivers

To enable sharing of the arm_pmu code with arm64, this patch factors it
out to drivers/perf/. A new drivers/perf directory is added for
performance monitor drivers to live under.

MAINTAINERS is updated accordingly. Files added previously without a
corresponsing MAINTAINERS update (perf_regs.c, perf_callchain.c, and
perf_event.h) are also added.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[will: augmented Kconfig help slightly]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 MAINTAINERS                         |   6 +-
 arch/arm/Kconfig                    |   8 +-
 arch/arm/include/asm/pmu.h          | 154 ------
 arch/arm/kernel/Makefile            |   3 +-
 arch/arm/kernel/perf_event.c        | 921 ------------------------------------
 arch/arm/kernel/perf_event_v6.c     |   2 +-
 arch/arm/kernel/perf_event_v7.c     |   2 +-
 arch/arm/kernel/perf_event_xscale.c |   2 +-
 arch/arm/mach-ux500/cpu-db8500.c    |   2 +-
 drivers/Kconfig                     |   2 +
 drivers/Makefile                    |   1 +
 drivers/perf/Kconfig                |  15 +
 drivers/perf/Makefile               |   1 +
 drivers/perf/arm_pmu.c              | 921 ++++++++++++++++++++++++++++++++++++
 include/linux/perf/arm_pmu.h        | 154 ++++++
 15 files changed, 1105 insertions(+), 1089 deletions(-)
 delete mode 100644 arch/arm/include/asm/pmu.h
 delete mode 100644 arch/arm/kernel/perf_event.c
 create mode 100644 drivers/perf/Kconfig
 create mode 100644 drivers/perf/Makefile
 create mode 100644 drivers/perf/arm_pmu.c
 create mode 100644 include/linux/perf/arm_pmu.h

(limited to 'arch/arm/include/asm')

diff --git a/MAINTAINERS b/MAINTAINERS
index fd6078443083..485c92ced47d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -799,11 +799,13 @@ F:	arch/arm/include/asm/floppy.h
 ARM PMU PROFILING AND DEBUGGING
 M:	Will Deacon <will.deacon@arm.com>
 S:	Maintained
-F:	arch/arm/kernel/perf_event*
+F:	arch/arm/kernel/perf_*
 F:	arch/arm/oprofile/common.c
-F:	arch/arm/include/asm/pmu.h
 F:	arch/arm/kernel/hw_breakpoint.c
 F:	arch/arm/include/asm/hw_breakpoint.h
+F:	arch/arm/include/asm/perf_event.h
+F:	drivers/perf/arm_pmu.c
+F:	include/linux/perf/arm_pmu.h
 
 ARM PORT
 M:	Russell King <linux@arm.linux.org.uk>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1c5021002fe4..4f7bc3d4b186 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1701,12 +1701,8 @@ config HIGHPTE
 	  user-space 2nd level page tables to reside in high memory.
 
 config HW_PERF_EVENTS
-	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS
-	default y
-	help
-	  Enable hardware performance counter support for perf events. If
-	  disabled, perf events will use software events only.
+	def_bool y
+	depends on ARM_PMU
 
 config SYS_SUPPORTS_HUGETLBFS
        def_bool y
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
deleted file mode 100644
index 3fc87dfd77e6..000000000000
--- a/arch/arm/include/asm/pmu.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- *  linux/arch/arm/include/asm/pmu.h
- *
- *  Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#ifndef __ARM_PMU_H__
-#define __ARM_PMU_H__
-
-#include <linux/interrupt.h>
-#include <linux/perf_event.h>
-
-#include <asm/cputype.h>
-
-/*
- * struct arm_pmu_platdata - ARM PMU platform data
- *
- * @handle_irq: an optional handler which will be called from the
- *	interrupt and passed the address of the low level handler,
- *	and can be used to implement any platform specific handling
- *	before or after calling it.
- */
-struct arm_pmu_platdata {
-	irqreturn_t (*handle_irq)(int irq, void *dev,
-				  irq_handler_t pmu_handler);
-};
-
-#ifdef CONFIG_HW_PERF_EVENTS
-
-/*
- * The ARMv7 CPU PMU supports up to 32 event counters.
- */
-#define ARMPMU_MAX_HWEVENTS		32
-
-#define HW_OP_UNSUPPORTED		0xFFFF
-#define C(_x)				PERF_COUNT_HW_CACHE_##_x
-#define CACHE_OP_UNSUPPORTED		0xFFFF
-
-#define PERF_MAP_ALL_UNSUPPORTED					\
-	[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
-
-#define PERF_CACHE_MAP_ALL_UNSUPPORTED					\
-[0 ... C(MAX) - 1] = {							\
-	[0 ... C(OP_MAX) - 1] = {					\
-		[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,	\
-	},								\
-}
-
-/* The events for a given PMU register set. */
-struct pmu_hw_events {
-	/*
-	 * The events that are active on the PMU for the given index.
-	 */
-	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
-
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS);
-
-	/*
-	 * Hardware lock to serialize accesses to PMU registers. Needed for the
-	 * read/modify/write sequences.
-	 */
-	raw_spinlock_t		pmu_lock;
-
-	/*
-	 * When using percpu IRQs, we need a percpu dev_id. Place it here as we
-	 * already have to allocate this struct per cpu.
-	 */
-	struct arm_pmu		*percpu_pmu;
-};
-
-struct arm_pmu {
-	struct pmu	pmu;
-	cpumask_t	active_irqs;
-	cpumask_t	supported_cpus;
-	int		*irq_affinity;
-	char		*name;
-	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
-	void		(*enable)(struct perf_event *event);
-	void		(*disable)(struct perf_event *event);
-	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
-					 struct perf_event *event);
-	void		(*clear_event_idx)(struct pmu_hw_events *hw_events,
-					 struct perf_event *event);
-	int		(*set_event_filter)(struct hw_perf_event *evt,
-					    struct perf_event_attr *attr);
-	u32		(*read_counter)(struct perf_event *event);
-	void		(*write_counter)(struct perf_event *event, u32 val);
-	void		(*start)(struct arm_pmu *);
-	void		(*stop)(struct arm_pmu *);
-	void		(*reset)(void *);
-	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
-	void		(*free_irq)(struct arm_pmu *);
-	int		(*map_event)(struct perf_event *event);
-	int		num_events;
-	atomic_t	active_events;
-	struct mutex	reserve_mutex;
-	u64		max_period;
-	struct platform_device	*plat_device;
-	struct pmu_hw_events	__percpu *hw_events;
-	struct notifier_block	hotplug_nb;
-};
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-int armpmu_register(struct arm_pmu *armpmu, int type);
-
-u64 armpmu_event_update(struct perf_event *event);
-
-int armpmu_event_set_period(struct perf_event *event);
-
-int armpmu_map_event(struct perf_event *event,
-		     const unsigned (*event_map)[PERF_COUNT_HW_MAX],
-		     const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
-						[PERF_COUNT_HW_CACHE_OP_MAX]
-						[PERF_COUNT_HW_CACHE_RESULT_MAX],
-		     u32 raw_event_mask);
-
-struct pmu_probe_info {
-	unsigned int cpuid;
-	unsigned int mask;
-	int (*init)(struct arm_pmu *);
-};
-
-#define PMU_PROBE(_cpuid, _mask, _fn)	\
-{					\
-	.cpuid = (_cpuid),		\
-	.mask = (_mask),		\
-	.init = (_fn),			\
-}
-
-#define ARM_PMU_PROBE(_cpuid, _fn) \
-	PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn)
-
-#define ARM_PMU_XSCALE_MASK	((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK)
-
-#define XSCALE_PMU_PROBE(_version, _fn) \
-	PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn)
-
-int arm_pmu_device_probe(struct platform_device *pdev,
-			 const struct of_device_id *of_table,
-			 const struct pmu_probe_info *probe_table);
-
-#endif /* CONFIG_HW_PERF_EVENTS */
-
-#endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index e69f7a19735d..fcb25c1c5c21 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -71,8 +71,7 @@ obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
 obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o \
-				   perf_event_xscale.o perf_event_v6.o \
+obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event_xscale.o perf_event_v6.o \
 				   perf_event_v7.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
deleted file mode 100644
index 1cb40651d783..000000000000
--- a/arch/arm/kernel/perf_event.c
+++ /dev/null
@@ -1,921 +0,0 @@
-#undef DEBUG
-
-/*
- * ARM performance counter support.
- *
- * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
- * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
- *
- * This code is based on the sparc64 perf event code, which is in turn based
- * on the x86 code.
- */
-#define pr_fmt(fmt) "hw perfevents: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/cpumask.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/irq.h>
-#include <linux/irqdesc.h>
-
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-static int
-armpmu_map_cache_event(const unsigned (*cache_map)
-				      [PERF_COUNT_HW_CACHE_MAX]
-				      [PERF_COUNT_HW_CACHE_OP_MAX]
-				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
-		       u64 config)
-{
-	unsigned int cache_type, cache_op, cache_result, ret;
-
-	cache_type = (config >>  0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >>  8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
-	if (ret == CACHE_OP_UNSUPPORTED)
-		return -ENOENT;
-
-	return ret;
-}
-
-static int
-armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
-{
-	int mapping;
-
-	if (config >= PERF_COUNT_HW_MAX)
-		return -EINVAL;
-
-	mapping = (*event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int
-armpmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
-	return (int)(config & raw_event_mask);
-}
-
-int
-armpmu_map_event(struct perf_event *event,
-		 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
-		 const unsigned (*cache_map)
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX],
-		 u32 raw_event_mask)
-{
-	u64 config = event->attr.config;
-	int type = event->attr.type;
-
-	if (type == event->pmu->type)
-		return armpmu_map_raw_event(raw_event_mask, config);
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		return armpmu_map_hw_event(event_map, config);
-	case PERF_TYPE_HW_CACHE:
-		return armpmu_map_cache_event(cache_map, config);
-	case PERF_TYPE_RAW:
-		return armpmu_map_raw_event(raw_event_mask, config);
-	}
-
-	return -ENOENT;
-}
-
-int armpmu_event_set_period(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	s64 left = local64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	/*
-	 * Limit the maximum period to prevent the counter value
-	 * from overtaking the one we are about to program. In
-	 * effect we are reducing max_period to account for
-	 * interrupt latency (and we are being very conservative).
-	 */
-	if (left > (armpmu->max_period >> 1))
-		left = armpmu->max_period >> 1;
-
-	local64_set(&hwc->prev_count, (u64)-left);
-
-	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
-
-	perf_event_update_userpage(event);
-
-	return ret;
-}
-
-u64 armpmu_event_update(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	u64 delta, prev_raw_count, new_raw_count;
-
-again:
-	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = armpmu->read_counter(event);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
-
-	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
-
-	local64_add(delta, &event->count);
-	local64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void
-armpmu_read(struct perf_event *event)
-{
-	armpmu_event_update(event);
-}
-
-static void
-armpmu_stop(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to update the counter, so ignore
-	 * PERF_EF_UPDATE, see comments in armpmu_start().
-	 */
-	if (!(hwc->state & PERF_HES_STOPPED)) {
-		armpmu->disable(event);
-		armpmu_event_update(event);
-		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	}
-}
-
-static void armpmu_start(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to reprogram the period, so ignore
-	 * PERF_EF_RELOAD, see the comment below.
-	 */
-	if (flags & PERF_EF_RELOAD)
-		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
-	hwc->state = 0;
-	/*
-	 * Set the period again. Some counters can't be stopped, so when we
-	 * were stopped we simply disabled the IRQ source and the counter
-	 * may have been left counting. If we don't do this step then we may
-	 * get an interrupt too soon or *way* too late if the overflow has
-	 * happened since disabling.
-	 */
-	armpmu_event_set_period(event);
-	armpmu->enable(event);
-}
-
-static void
-armpmu_del(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	armpmu_stop(event, PERF_EF_UPDATE);
-	hw_events->events[idx] = NULL;
-	clear_bit(idx, hw_events->used_mask);
-	if (armpmu->clear_event_idx)
-		armpmu->clear_event_idx(hw_events, event);
-
-	perf_event_update_userpage(event);
-}
-
-static int
-armpmu_add(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx;
-	int err = 0;
-
-	/* An event following a process won't be stopped earlier */
-	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
-		return -ENOENT;
-
-	perf_pmu_disable(event->pmu);
-
-	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(hw_events, event);
-	if (idx < 0) {
-		err = idx;
-		goto out;
-	}
-
-	/*
-	 * If there is an event in the counter we are going to use then make
-	 * sure it is disabled.
-	 */
-	event->hw.idx = idx;
-	armpmu->disable(event);
-	hw_events->events[idx] = event;
-
-	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	if (flags & PERF_EF_START)
-		armpmu_start(event, PERF_EF_RELOAD);
-
-	/* Propagate our changes to the userspace mapping. */
-	perf_event_update_userpage(event);
-
-out:
-	perf_pmu_enable(event->pmu);
-	return err;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
-			       struct perf_event *event)
-{
-	struct arm_pmu *armpmu;
-
-	if (is_software_event(event))
-		return 1;
-
-	/*
-	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
-	 * core perf code won't check that the pmu->ctx == leader->ctx
-	 * until after pmu->event_init(event).
-	 */
-	if (event->pmu != pmu)
-		return 0;
-
-	if (event->state < PERF_EVENT_STATE_OFF)
-		return 1;
-
-	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
-		return 1;
-
-	armpmu = to_arm_pmu(event->pmu);
-	return armpmu->get_event_idx(hw_events, event) >= 0;
-}
-
-static int
-validate_group(struct perf_event *event)
-{
-	struct perf_event *sibling, *leader = event->group_leader;
-	struct pmu_hw_events fake_pmu;
-
-	/*
-	 * Initialise the fake PMU. We only need to populate the
-	 * used_mask for the purposes of validation.
-	 */
-	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
-
-	if (!validate_event(event->pmu, &fake_pmu, leader))
-		return -EINVAL;
-
-	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(event->pmu, &fake_pmu, sibling))
-			return -EINVAL;
-	}
-
-	if (!validate_event(event->pmu, &fake_pmu, event))
-		return -EINVAL;
-
-	return 0;
-}
-
-static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
-{
-	struct arm_pmu *armpmu;
-	struct platform_device *plat_device;
-	struct arm_pmu_platdata *plat;
-	int ret;
-	u64 start_clock, finish_clock;
-
-	/*
-	 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
-	 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
-	 * do any necessary shifting, we just need to perform the first
-	 * dereference.
-	 */
-	armpmu = *(void **)dev;
-	plat_device = armpmu->plat_device;
-	plat = dev_get_platdata(&plat_device->dev);
-
-	start_clock = sched_clock();
-	if (plat && plat->handle_irq)
-		ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
-	else
-		ret = armpmu->handle_irq(irq, armpmu);
-	finish_clock = sched_clock();
-
-	perf_sample_event_took(finish_clock - start_clock);
-	return ret;
-}
-
-static void
-armpmu_release_hardware(struct arm_pmu *armpmu)
-{
-	armpmu->free_irq(armpmu);
-}
-
-static int
-armpmu_reserve_hardware(struct arm_pmu *armpmu)
-{
-	int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
-	if (err) {
-		armpmu_release_hardware(armpmu);
-		return err;
-	}
-
-	return 0;
-}
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	atomic_t *active_events	 = &armpmu->active_events;
-	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
-
-	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
-		armpmu_release_hardware(armpmu);
-		mutex_unlock(pmu_reserve_mutex);
-	}
-}
-
-static int
-event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
-	return attr->exclude_idle || attr->exclude_user ||
-	       attr->exclude_kernel || attr->exclude_hv;
-}
-
-static int
-__hw_perf_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	int mapping;
-
-	mapping = armpmu->map_event(event);
-
-	if (mapping < 0) {
-		pr_debug("event %x:%llx not supported\n", event->attr.type,
-			 event->attr.config);
-		return mapping;
-	}
-
-	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
-	 */
-	hwc->idx		= -1;
-	hwc->config_base	= 0;
-	hwc->config		= 0;
-	hwc->event_base		= 0;
-
-	/*
-	 * Check whether we need to exclude the counter from certain modes.
-	 */
-	if ((!armpmu->set_event_filter ||
-	     armpmu->set_event_filter(hwc, &event->attr)) &&
-	     event_requires_mode_exclusion(&event->attr)) {
-		pr_debug("ARM performance counters do not support "
-			 "mode exclusion\n");
-		return -EOPNOTSUPP;
-	}
-
-	/*
-	 * Store the event encoding into the config_base field.
-	 */
-	hwc->config_base	    |= (unsigned long)mapping;
-
-	if (!is_sampling_event(event)) {
-		/*
-		 * For non-sampling runs, limit the sample_period to half
-		 * of the counter width. That way, the new counter value
-		 * is far less likely to overtake the previous one unless
-		 * you have some serious IRQ latency issues.
-		 */
-		hwc->sample_period  = armpmu->max_period >> 1;
-		hwc->last_period    = hwc->sample_period;
-		local64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	if (event->group_leader != event) {
-		if (validate_group(event) != 0)
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int armpmu_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	int err = 0;
-	atomic_t *active_events = &armpmu->active_events;
-
-	/*
-	 * Reject CPU-affine events for CPUs that are of a different class to
-	 * that which this PMU handles. Process-following events (where
-	 * event->cpu == -1) can be migrated between CPUs, and thus we have to
-	 * reject them later (in armpmu_add) if they're scheduled on a
-	 * different class of CPU.
-	 */
-	if (event->cpu != -1 &&
-		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
-		return -ENOENT;
-
-	/* does not support taken branch sampling */
-	if (has_branch_stack(event))
-		return -EOPNOTSUPP;
-
-	if (armpmu->map_event(event) == -ENOENT)
-		return -ENOENT;
-
-	event->destroy = hw_perf_event_destroy;
-
-	if (!atomic_inc_not_zero(active_events)) {
-		mutex_lock(&armpmu->reserve_mutex);
-		if (atomic_read(active_events) == 0)
-			err = armpmu_reserve_hardware(armpmu);
-
-		if (!err)
-			atomic_inc(active_events);
-		mutex_unlock(&armpmu->reserve_mutex);
-	}
-
-	if (err)
-		return err;
-
-	err = __hw_perf_event_init(event);
-	if (err)
-		hw_perf_event_destroy(event);
-
-	return err;
-}
-
-static void armpmu_enable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
-
-	/* For task-bound events we may be called on other CPUs */
-	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
-		return;
-
-	if (enabled)
-		armpmu->start(armpmu);
-}
-
-static void armpmu_disable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-
-	/* For task-bound events we may be called on other CPUs */
-	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
-		return;
-
-	armpmu->stop(armpmu);
-}
-
-/*
- * In heterogeneous systems, events are specific to a particular
- * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
- * the same microarchitecture.
- */
-static int armpmu_filter_match(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	unsigned int cpu = smp_processor_id();
-	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
-}
-
-static void armpmu_init(struct arm_pmu *armpmu)
-{
-	atomic_set(&armpmu->active_events, 0);
-	mutex_init(&armpmu->reserve_mutex);
-
-	armpmu->pmu = (struct pmu) {
-		.pmu_enable	= armpmu_enable,
-		.pmu_disable	= armpmu_disable,
-		.event_init	= armpmu_event_init,
-		.add		= armpmu_add,
-		.del		= armpmu_del,
-		.start		= armpmu_start,
-		.stop		= armpmu_stop,
-		.read		= armpmu_read,
-		.filter_match	= armpmu_filter_match,
-	};
-}
-
-int armpmu_register(struct arm_pmu *armpmu, int type)
-{
-	armpmu_init(armpmu);
-	pr_info("enabled with %s PMU driver, %d counters available\n",
-			armpmu->name, armpmu->num_events);
-	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
-}
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *__oprofile_cpu_pmu;
-
-/*
- * Despite the names, these two functions are CPU-specific and are used
- * by the OProfile/perf code.
- */
-const char *perf_pmu_name(void)
-{
-	if (!__oprofile_cpu_pmu)
-		return NULL;
-
-	return __oprofile_cpu_pmu->name;
-}
-EXPORT_SYMBOL_GPL(perf_pmu_name);
-
-int perf_num_counters(void)
-{
-	int max_events = 0;
-
-	if (__oprofile_cpu_pmu != NULL)
-		max_events = __oprofile_cpu_pmu->num_events;
-
-	return max_events;
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-static void cpu_pmu_enable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static void cpu_pmu_disable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	disable_percpu_irq(irq);
-}
-
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
-{
-	int i, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &hw_events->percpu_pmu);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
-
-			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
-				continue;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-		}
-	}
-}
-
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
-{
-	int i, err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (irqs < 1) {
-		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
-		return 0;
-	}
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		err = request_percpu_irq(irq, handler, "arm-pmu",
-					 &hw_events->percpu_pmu);
-		if (err) {
-			pr_err("unable to request IRQ%d for ARM PMU counters\n",
-				irq);
-			return err;
-		}
-		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			err = 0;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq < 0)
-				continue;
-
-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
-
-			/*
-			 * If we have a single PMU interrupt that we can't shift,
-			 * assume that we're running on a uniprocessor machine and
-			 * continue. Otherwise, continue without this interrupt.
-			 */
-			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
-				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					irq, cpu);
-				continue;
-			}
-
-			err = request_irq(irq, handler,
-					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-			if (err) {
-				pr_err("unable to request IRQ%d for ARM PMU counters\n",
-					irq);
-				return err;
-			}
-
-			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
-		}
-	}
-
-	return 0;
-}
-
-/*
- * PMU hardware loses all context when a CPU goes offline.
- * When a CPU is hotplugged back in, since some hardware registers are
- * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
- * junk values out of them.
- */
-static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
-			  void *hcpu)
-{
-	int cpu = (unsigned long)hcpu;
-	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
-
-	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
-		return NOTIFY_DONE;
-
-	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-		return NOTIFY_DONE;
-
-	if (pmu->reset)
-		pmu->reset(pmu);
-	else
-		return NOTIFY_DONE;
-
-	return NOTIFY_OK;
-}
-
-static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	int err;
-	int cpu;
-	struct pmu_hw_events __percpu *cpu_hw_events;
-
-	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
-	if (!cpu_hw_events)
-		return -ENOMEM;
-
-	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
-	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
-	if (err)
-		goto out_hw_events;
-
-	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
-		raw_spin_lock_init(&events->pmu_lock);
-		events->percpu_pmu = cpu_pmu;
-	}
-
-	cpu_pmu->hw_events	= cpu_hw_events;
-	cpu_pmu->request_irq	= cpu_pmu_request_irq;
-	cpu_pmu->free_irq	= cpu_pmu_free_irq;
-
-	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
-			 cpu_pmu, 1);
-
-	/* If no interrupts available, set the corresponding capability flag */
-	if (!platform_get_irq(cpu_pmu->plat_device, 0))
-		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-	return 0;
-
-out_hw_events:
-	free_percpu(cpu_hw_events);
-	return err;
-}
-
-static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
-{
-	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
-	free_percpu(cpu_pmu->hw_events);
-}
-
-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct arm_pmu *pmu,
-			     const struct pmu_probe_info *info)
-{
-	int cpu = get_cpu();
-	unsigned int cpuid = read_cpuid_id();
-	int ret = -ENODEV;
-
-	pr_info("probing PMU on CPU %d\n", cpu);
-
-	for (; info->init != NULL; info++) {
-		if ((cpuid & info->mask) != info->cpuid)
-			continue;
-		ret = info->init(pmu);
-		break;
-	}
-
-	put_cpu();
-	return ret;
-}
-
-static int of_pmu_irq_cfg(struct arm_pmu *pmu)
-{
-	int *irqs, i = 0;
-	bool using_spi = false;
-	struct platform_device *pdev = pmu->plat_device;
-
-	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-	if (!irqs)
-		return -ENOMEM;
-
-	do {
-		struct device_node *dn;
-		int cpu, irq;
-
-		/* See if we have an affinity entry */
-		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i);
-		if (!dn)
-			break;
-
-		/* Check the IRQ type and prohibit a mix of PPIs and SPIs */
-		irq = platform_get_irq(pdev, i);
-		if (irq >= 0) {
-			bool spi = !irq_is_percpu(irq);
-
-			if (i > 0 && spi != using_spi) {
-				pr_err("PPI/SPI IRQ type mismatch for %s!\n",
-					dn->name);
-				kfree(irqs);
-				return -EINVAL;
-			}
-
-			using_spi = spi;
-		}
-
-		/* Now look up the logical CPU number */
-		for_each_possible_cpu(cpu)
-			if (dn == of_cpu_device_node_get(cpu))
-				break;
-
-		if (cpu >= nr_cpu_ids) {
-			pr_warn("Failed to find logical CPU for %s\n",
-				dn->name);
-			of_node_put(dn);
-			cpumask_setall(&pmu->supported_cpus);
-			break;
-		}
-		of_node_put(dn);
-
-		/* For SPIs, we need to track the affinity per IRQ */
-		if (using_spi) {
-			if (i >= pdev->num_resources) {
-				of_node_put(dn);
-				break;
-			}
-
-			irqs[i] = cpu;
-		}
-
-		/* Keep track of the CPUs containing this PMU type */
-		cpumask_set_cpu(cpu, &pmu->supported_cpus);
-		of_node_put(dn);
-		i++;
-	} while (1);
-
-	/* If we didn't manage to parse anything, claim to support all CPUs */
-	if (cpumask_weight(&pmu->supported_cpus) == 0)
-		cpumask_setall(&pmu->supported_cpus);
-
-	/* If we matched up the IRQ affinities, use them to route the SPIs */
-	if (using_spi && i == pdev->num_resources)
-		pmu->irq_affinity = irqs;
-	else
-		kfree(irqs);
-
-	return 0;
-}
-
-int arm_pmu_device_probe(struct platform_device *pdev,
-			 const struct of_device_id *of_table,
-			 const struct pmu_probe_info *probe_table)
-{
-	const struct of_device_id *of_id;
-	const int (*init_fn)(struct arm_pmu *);
-	struct device_node *node = pdev->dev.of_node;
-	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!\n");
-		return -ENOMEM;
-	}
-
-	if (!__oprofile_cpu_pmu)
-		__oprofile_cpu_pmu = pmu;
-
-	pmu->plat_device = pdev;
-
-	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-
-		ret = of_pmu_irq_cfg(pmu);
-		if (!ret)
-			ret = init_fn(pmu);
-	} else {
-		ret = probe_current_pmu(pmu, probe_table);
-		cpumask_setall(&pmu->supported_cpus);
-	}
-
-	if (ret) {
-		pr_info("failed to probe PMU!\n");
-		goto out_free;
-	}
-
-	ret = cpu_pmu_init(pmu);
-	if (ret)
-		goto out_free;
-
-	ret = armpmu_register(pmu, -1);
-	if (ret)
-		goto out_destroy;
-
-	return 0;
-
-out_destroy:
-	cpu_pmu_destroy(pmu);
-out_free:
-	pr_info("failed to register PMU devices!\n");
-	kfree(pmu);
-	return ret;
-}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 09f83e414a72..09413e7b49aa 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -34,9 +34,9 @@
 
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
 
 #include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 
 enum armv6_perf_types {
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index f9b37f876e20..126dc679b230 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -21,11 +21,11 @@
 #include <asm/cp15.h>
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
 #include <asm/vfp.h>
 #include "../vfp/vfpinstr.h"
 
 #include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 
 /*
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 304d056d5b25..aa0499e2eef7 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -16,9 +16,9 @@
 
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
 
 #include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 
 enum xscale_perf_types {
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 16913800bbf9..5578dc1ab52b 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -20,10 +20,10 @@
 #include <linux/mfd/dbx500-prcmu.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/regulator/machine.h>
 #include <linux/random.h>
 
-#include <asm/pmu.h>
 #include <asm/mach/map.h>
 
 #include "setup.h"
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 6e973b8e3a3b..3497485f5eab 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -176,6 +176,8 @@ source "drivers/powercap/Kconfig"
 
 source "drivers/mcb/Kconfig"
 
+source "drivers/perf/Kconfig"
+
 source "drivers/ras/Kconfig"
 
 source "drivers/thunderbolt/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index b64b49f6e01b..f245f2291b8a 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -161,6 +161,7 @@ obj-$(CONFIG_NTB)		+= ntb/
 obj-$(CONFIG_FMC)		+= fmc/
 obj-$(CONFIG_POWERCAP)		+= powercap/
 obj-$(CONFIG_MCB)		+= mcb/
+obj-$(CONFIG_PERF_EVENTS)	+= perf/
 obj-$(CONFIG_RAS)		+= ras/
 obj-$(CONFIG_THUNDERBOLT)	+= thunderbolt/
 obj-$(CONFIG_CORESIGHT)		+= hwtracing/coresight/
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
new file mode 100644
index 000000000000..d9de36ee165d
--- /dev/null
+++ b/drivers/perf/Kconfig
@@ -0,0 +1,15 @@
+#
+# Performance Monitor Drivers
+#
+
+menu "Performance monitor support"
+
+config ARM_PMU
+	depends on PERF_EVENTS && ARM
+	bool "ARM PMU framework"
+	default y
+	help
+	  Say y if you want to use CPU performance monitors on ARM-based
+	  systems.
+
+endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
new file mode 100644
index 000000000000..acd2397ded94
--- /dev/null
+++ b/drivers/perf/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ARM_PMU) += arm_pmu.o
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
new file mode 100644
index 000000000000..2365a32a595e
--- /dev/null
+++ b/drivers/perf/arm_pmu.c
@@ -0,0 +1,921 @@
+#undef DEBUG
+
+/*
+ * ARM performance counter support.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based on the sparc64 perf event code, which is in turn based
+ * on the x86 code.
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of_device.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+
+static int
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result, ret;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	return ret;
+}
+
+static int
+armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
+{
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+	return (int)(config & raw_event_mask);
+}
+
+int
+armpmu_map_event(struct perf_event *event,
+		 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+		 const unsigned (*cache_map)
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX],
+		 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+	int type = event->attr.type;
+
+	if (type == event->pmu->type)
+		return armpmu_map_raw_event(raw_event_mask, config);
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_hw_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int armpmu_event_set_period(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;
+
+	local64_set(&hwc->prev_count, (u64)-left);
+
+	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+u64 armpmu_event_update(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = armpmu->read_counter(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void
+armpmu_read(struct perf_event *event)
+{
+	armpmu_event_update(event);
+}
+
+static void
+armpmu_stop(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in armpmu_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		armpmu->disable(event);
+		armpmu_event_update(event);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static void armpmu_start(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	/*
+	 * Set the period again. Some counters can't be stopped, so when we
+	 * were stopped we simply disabled the IRQ source and the counter
+	 * may have been left counting. If we don't do this step then we may
+	 * get an interrupt too soon or *way* too late if the overflow has
+	 * happened since disabling.
+	 */
+	armpmu_event_set_period(event);
+	armpmu->enable(event);
+}
+
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	armpmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
+	if (armpmu->clear_event_idx)
+		armpmu->clear_event_idx(hw_events, event);
+
+	perf_event_update_userpage(event);
+}
+
+static int
+armpmu_add(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
+
+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return -ENOENT;
+
+	perf_pmu_disable(event->pmu);
+
+	/* If we don't have a space for the counter then finish early. */
+	idx = armpmu->get_event_idx(hw_events, event);
+	if (idx < 0) {
+		err = idx;
+		goto out;
+	}
+
+	/*
+	 * If there is an event in the counter we are going to use then make
+	 * sure it is disabled.
+	 */
+	event->hw.idx = idx;
+	armpmu->disable(event);
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		armpmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+			       struct perf_event *event)
+{
+	struct arm_pmu *armpmu;
+
+	if (is_software_event(event))
+		return 1;
+
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
+	if (event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	armpmu = to_arm_pmu(event->pmu);
+	return armpmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int
+validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct pmu_hw_events fake_pmu;
+
+	/*
+	 * Initialise the fake PMU. We only need to populate the
+	 * used_mask for the purposes of validation.
+	 */
+	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
+
+	if (!validate_event(event->pmu, &fake_pmu, leader))
+		return -EINVAL;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
+			return -EINVAL;
+	}
+
+	if (!validate_event(event->pmu, &fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
+{
+	struct arm_pmu *armpmu;
+	struct platform_device *plat_device;
+	struct arm_pmu_platdata *plat;
+	int ret;
+	u64 start_clock, finish_clock;
+
+	/*
+	 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
+	 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
+	 * do any necessary shifting, we just need to perform the first
+	 * dereference.
+	 */
+	armpmu = *(void **)dev;
+	plat_device = armpmu->plat_device;
+	plat = dev_get_platdata(&plat_device->dev);
+
+	start_clock = sched_clock();
+	if (plat && plat->handle_irq)
+		ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
+	else
+		ret = armpmu->handle_irq(irq, armpmu);
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+	return ret;
+}
+
+static void
+armpmu_release_hardware(struct arm_pmu *armpmu)
+{
+	armpmu->free_irq(armpmu);
+}
+
+static int
+armpmu_reserve_hardware(struct arm_pmu *armpmu)
+{
+	int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
+	if (err) {
+		armpmu_release_hardware(armpmu);
+		return err;
+	}
+
+	return 0;
+}
+
+static void
+hw_perf_event_destroy(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	atomic_t *active_events	 = &armpmu->active_events;
+	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
+		armpmu_release_hardware(armpmu);
+		mutex_unlock(pmu_reserve_mutex);
+	}
+}
+
+static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
+}
+
+static int
+__hw_perf_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping;
+
+	mapping = armpmu->map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
+		pr_debug("ARM performance counters do not support "
+			 "mode exclusion\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base	    |= (unsigned long)mapping;
+
+	if (!is_sampling_event(event)) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		hwc->sample_period  = armpmu->max_period >> 1;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	if (event->group_leader != event) {
+		if (validate_group(event) != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int armpmu_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	int err = 0;
+	atomic_t *active_events = &armpmu->active_events;
+
+	/*
+	 * Reject CPU-affine events for CPUs that are of a different class to
+	 * that which this PMU handles. Process-following events (where
+	 * event->cpu == -1) can be migrated between CPUs, and thus we have to
+	 * reject them later (in armpmu_add) if they're scheduled on a
+	 * different class of CPU.
+	 */
+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
+		return -ENOENT;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	if (armpmu->map_event(event) == -ENOENT)
+		return -ENOENT;
+
+	event->destroy = hw_perf_event_destroy;
+
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&armpmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = armpmu_reserve_hardware(armpmu);
+
+		if (!err)
+			atomic_inc(active_events);
+		mutex_unlock(&armpmu->reserve_mutex);
+	}
+
+	if (err)
+		return err;
+
+	err = __hw_perf_event_init(event);
+	if (err)
+		hw_perf_event_destroy(event);
+
+	return err;
+}
+
+static void armpmu_enable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
+	if (enabled)
+		armpmu->start(armpmu);
+}
+
+static void armpmu_disable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
+	armpmu->stop(armpmu);
+}
+
+/*
+ * In heterogeneous systems, events are specific to a particular
+ * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
+ * the same microarchitecture.
+ */
+static int armpmu_filter_match(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	unsigned int cpu = smp_processor_id();
+	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+}
+
+static void armpmu_init(struct arm_pmu *armpmu)
+{
+	atomic_set(&armpmu->active_events, 0);
+	mutex_init(&armpmu->reserve_mutex);
+
+	armpmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+		.filter_match	= armpmu_filter_match,
+	};
+}
+
+int armpmu_register(struct arm_pmu *armpmu, int type)
+{
+	armpmu_init(armpmu);
+	pr_info("enabled with %s PMU driver, %d counters available\n",
+			armpmu->name, armpmu->num_events);
+	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
+}
+
+/* Set at runtime when we know what CPU type we are. */
+static struct arm_pmu *__oprofile_cpu_pmu;
+
+/*
+ * Despite the names, these two functions are CPU-specific and are used
+ * by the OProfile/perf code.
+ */
+const char *perf_pmu_name(void)
+{
+	if (!__oprofile_cpu_pmu)
+		return NULL;
+
+	return __oprofile_cpu_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	int max_events = 0;
+
+	if (__oprofile_cpu_pmu != NULL)
+		max_events = __oprofile_cpu_pmu->num_events;
+
+	return max_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static void cpu_pmu_enable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void cpu_pmu_disable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	disable_percpu_irq(irq);
+}
+
+static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+		free_percpu_irq(irq, &hw_events->percpu_pmu);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+				continue;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq >= 0)
+				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+		}
+	}
+}
+
+static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+{
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	if (!pmu_device)
+		return -ENODEV;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
+		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+		return 0;
+	}
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		err = request_percpu_irq(irq, handler, "arm-pmu",
+					 &hw_events->percpu_pmu);
+		if (err) {
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			return err;
+		}
+		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			err = 0;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq < 0)
+				continue;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			/*
+			 * If we have a single PMU interrupt that we can't shift,
+			 * assume that we're running on a uniprocessor machine and
+			 * continue. Otherwise, continue without this interrupt.
+			 */
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
+				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+					irq, cpu);
+				continue;
+			}
+
+			err = request_irq(irq, handler,
+					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+			if (err) {
+				pr_err("unable to request IRQ%d for ARM PMU counters\n",
+					irq);
+				return err;
+			}
+
+			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
+			  void *hcpu)
+{
+	int cpu = (unsigned long)hcpu;
+	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
+
+	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+		return NOTIFY_DONE;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	if (pmu->reset)
+		pmu->reset(pmu);
+	else
+		return NOTIFY_DONE;
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int err;
+	int cpu;
+	struct pmu_hw_events __percpu *cpu_hw_events;
+
+	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
+	if (!cpu_hw_events)
+		return -ENOMEM;
+
+	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
+	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
+	if (err)
+		goto out_hw_events;
+
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+		events->percpu_pmu = cpu_pmu;
+	}
+
+	cpu_pmu->hw_events	= cpu_hw_events;
+	cpu_pmu->request_irq	= cpu_pmu_request_irq;
+	cpu_pmu->free_irq	= cpu_pmu_free_irq;
+
+	/* Ensure the PMU has sane values out of reset. */
+	if (cpu_pmu->reset)
+		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
+			 cpu_pmu, 1);
+
+	/* If no interrupts available, set the corresponding capability flag */
+	if (!platform_get_irq(cpu_pmu->plat_device, 0))
+		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	return 0;
+
+out_hw_events:
+	free_percpu(cpu_hw_events);
+	return err;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+	free_percpu(cpu_pmu->hw_events);
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct arm_pmu *pmu,
+			     const struct pmu_probe_info *info)
+{
+	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
+	int ret = -ENODEV;
+
+	pr_info("probing PMU on CPU %d\n", cpu);
+
+	for (; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
+		break;
+	}
+
+	put_cpu();
+	return ret;
+}
+
+static int of_pmu_irq_cfg(struct arm_pmu *pmu)
+{
+	int *irqs, i = 0;
+	bool using_spi = false;
+	struct platform_device *pdev = pmu->plat_device;
+
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	do {
+		struct device_node *dn;
+		int cpu, irq;
+
+		/* See if we have an affinity entry */
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i);
+		if (!dn)
+			break;
+
+		/* Check the IRQ type and prohibit a mix of PPIs and SPIs */
+		irq = platform_get_irq(pdev, i);
+		if (irq >= 0) {
+			bool spi = !irq_is_percpu(irq);
+
+			if (i > 0 && spi != using_spi) {
+				pr_err("PPI/SPI IRQ type mismatch for %s!\n",
+					dn->name);
+				kfree(irqs);
+				return -EINVAL;
+			}
+
+			using_spi = spi;
+		}
+
+		/* Now look up the logical CPU number */
+		for_each_possible_cpu(cpu)
+			if (dn == of_cpu_device_node_get(cpu))
+				break;
+
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			of_node_put(dn);
+			cpumask_setall(&pmu->supported_cpus);
+			break;
+		}
+		of_node_put(dn);
+
+		/* For SPIs, we need to track the affinity per IRQ */
+		if (using_spi) {
+			if (i >= pdev->num_resources) {
+				of_node_put(dn);
+				break;
+			}
+
+			irqs[i] = cpu;
+		}
+
+		/* Keep track of the CPUs containing this PMU type */
+		cpumask_set_cpu(cpu, &pmu->supported_cpus);
+		of_node_put(dn);
+		i++;
+	} while (1);
+
+	/* If we didn't manage to parse anything, claim to support all CPUs */
+	if (cpumask_weight(&pmu->supported_cpus) == 0)
+		cpumask_setall(&pmu->supported_cpus);
+
+	/* If we matched up the IRQ affinities, use them to route the SPIs */
+	if (using_spi && i == pdev->num_resources)
+		pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
+	return 0;
+}
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table)
+{
+	const struct of_device_id *of_id;
+	const int (*init_fn)(struct arm_pmu *);
+	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
+
+	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!\n");
+		return -ENOMEM;
+	}
+
+	if (!__oprofile_cpu_pmu)
+		__oprofile_cpu_pmu = pmu;
+
+	pmu->plat_device = pdev;
+
+	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
+		init_fn = of_id->data;
+
+		ret = of_pmu_irq_cfg(pmu);
+		if (!ret)
+			ret = init_fn(pmu);
+	} else {
+		ret = probe_current_pmu(pmu, probe_table);
+		cpumask_setall(&pmu->supported_cpus);
+	}
+
+	if (ret) {
+		pr_info("failed to probe PMU!\n");
+		goto out_free;
+	}
+
+	ret = cpu_pmu_init(pmu);
+	if (ret)
+		goto out_free;
+
+	ret = armpmu_register(pmu, -1);
+	if (ret)
+		goto out_destroy;
+
+	return 0;
+
+out_destroy:
+	cpu_pmu_destroy(pmu);
+out_free:
+	pr_info("failed to register PMU devices!\n");
+	kfree(pmu);
+	return ret;
+}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
new file mode 100644
index 000000000000..bfa673bb822d
--- /dev/null
+++ b/include/linux/perf/arm_pmu.h
@@ -0,0 +1,154 @@
+/*
+ *  linux/arch/arm/include/asm/pmu.h
+ *
+ *  Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __ARM_PMU_H__
+#define __ARM_PMU_H__
+
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+
+#include <asm/cputype.h>
+
+/*
+ * struct arm_pmu_platdata - ARM PMU platform data
+ *
+ * @handle_irq: an optional handler which will be called from the
+ *	interrupt and passed the address of the low level handler,
+ *	and can be used to implement any platform specific handling
+ *	before or after calling it.
+ */
+struct arm_pmu_platdata {
+	irqreturn_t (*handle_irq)(int irq, void *dev,
+				  irq_handler_t pmu_handler);
+};
+
+#ifdef CONFIG_ARM_PMU
+
+/*
+ * The ARMv7 CPU PMU supports up to 32 event counters.
+ */
+#define ARMPMU_MAX_HWEVENTS		32
+
+#define HW_OP_UNSUPPORTED		0xFFFF
+#define C(_x)				PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED		0xFFFF
+
+#define PERF_MAP_ALL_UNSUPPORTED					\
+	[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
+
+#define PERF_CACHE_MAP_ALL_UNSUPPORTED					\
+[0 ... C(MAX) - 1] = {							\
+	[0 ... C(OP_MAX) - 1] = {					\
+		[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,	\
+	},								\
+}
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
+
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS);
+
+	/*
+	 * Hardware lock to serialize accesses to PMU registers. Needed for the
+	 * read/modify/write sequences.
+	 */
+	raw_spinlock_t		pmu_lock;
+
+	/*
+	 * When using percpu IRQs, we need a percpu dev_id. Place it here as we
+	 * already have to allocate this struct per cpu.
+	 */
+	struct arm_pmu		*percpu_pmu;
+};
+
+struct arm_pmu {
+	struct pmu	pmu;
+	cpumask_t	active_irqs;
+	cpumask_t	supported_cpus;
+	int		*irq_affinity;
+	char		*name;
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	void		(*enable)(struct perf_event *event);
+	void		(*disable)(struct perf_event *event);
+	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
+					 struct perf_event *event);
+	void		(*clear_event_idx)(struct pmu_hw_events *hw_events,
+					 struct perf_event *event);
+	int		(*set_event_filter)(struct hw_perf_event *evt,
+					    struct perf_event_attr *attr);
+	u32		(*read_counter)(struct perf_event *event);
+	void		(*write_counter)(struct perf_event *event, u32 val);
+	void		(*start)(struct arm_pmu *);
+	void		(*stop)(struct arm_pmu *);
+	void		(*reset)(void *);
+	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
+	void		(*free_irq)(struct arm_pmu *);
+	int		(*map_event)(struct perf_event *event);
+	int		num_events;
+	atomic_t	active_events;
+	struct mutex	reserve_mutex;
+	u64		max_period;
+	struct platform_device	*plat_device;
+	struct pmu_hw_events	__percpu *hw_events;
+	struct notifier_block	hotplug_nb;
+};
+
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+
+int armpmu_register(struct arm_pmu *armpmu, int type);
+
+u64 armpmu_event_update(struct perf_event *event);
+
+int armpmu_event_set_period(struct perf_event *event);
+
+int armpmu_map_event(struct perf_event *event,
+		     const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+		     const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+						[PERF_COUNT_HW_CACHE_OP_MAX]
+						[PERF_COUNT_HW_CACHE_RESULT_MAX],
+		     u32 raw_event_mask);
+
+struct pmu_probe_info {
+	unsigned int cpuid;
+	unsigned int mask;
+	int (*init)(struct arm_pmu *);
+};
+
+#define PMU_PROBE(_cpuid, _mask, _fn)	\
+{					\
+	.cpuid = (_cpuid),		\
+	.mask = (_mask),		\
+	.init = (_fn),			\
+}
+
+#define ARM_PMU_PROBE(_cpuid, _fn) \
+	PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn)
+
+#define ARM_PMU_XSCALE_MASK	((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK)
+
+#define XSCALE_PMU_PROBE(_version, _fn) \
+	PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn)
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table);
+
+#endif /* CONFIG_ARM_PMU */
+
+#endif /* __ARM_PMU_H__ */
-- 
cgit v1.2.3-55-g7522


From 787047eea24a2443c366679ae6b5a3873a33b64e Mon Sep 17 00:00:00 2001
From: Stephen Boyd
Date: Wed, 29 Jul 2015 00:34:48 +0100
Subject: ARM: 8392/3: smp: Only expose /sys/.../cpuX/online if hotpluggable

Writes to /sys/.../cpuX/online fail if we determine the platform
doesn't support hotplug for that CPU. Furthermore, if the cpu_die
op isn't specified the system hangs when we try to offline a CPU
and it comes right back online unexpectedly. Let's figure this
stuff out before we make the sysfs nodes so that the online file
doesn't even exist if it isn't (at least sometimes) possible to
hotplug the CPU.

Add a new 'cpu_can_disable' op and repoint all 'cpu_disable'
implementations at it because all implementers use the op to
indicate if a CPU can be hotplugged or not in a static fashion.
With PSCI we may need to add a 'cpu_disable' op so that the
secure OS can be migrated off the CPU we're trying to hotplug.
In this case, the 'cpu_can_disable' op will indicate that all
CPUs are hotpluggable by returning true, but the 'cpu_disable' op
will make a PSCI migration call and occasionally fail, denying
the hotplug of a CPU. This shouldn't be any worse than x86 where
we may indicate that all CPUs are hotpluggable but occasionally
we can't offline a CPU due to check_irq_vectors_for_cpu_disable()
failing to find a CPU to move vectors to.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Acked-by: Simon Horman <horms@verge.net.au> [shmobile portion]
Tested-by: Simon Horman <horms@verge.net.au>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: <linux-sh@vger.kernel.org>
Tested-by: Tyler Baker <tyler.baker@linaro.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/mcpm_platsmp.c       | 12 ++++--------
 arch/arm/include/asm/smp.h           |  1 +
 arch/arm/include/asm/smp_plat.h      |  9 +++++++++
 arch/arm/kernel/setup.c              |  2 +-
 arch/arm/kernel/smp.c                | 15 ++++++++++++++-
 arch/arm/mach-shmobile/common.h      |  2 +-
 arch/arm/mach-shmobile/platsmp.c     |  4 ++--
 arch/arm/mach-shmobile/smp-r8a7790.c |  2 +-
 arch/arm/mach-shmobile/smp-r8a7791.c |  2 +-
 arch/arm/mach-shmobile/smp-sh73a0.c  |  2 +-
 10 files changed, 35 insertions(+), 16 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c
index 92e54d7c6f46..2b25b6038f66 100644
--- a/arch/arm/common/mcpm_platsmp.c
+++ b/arch/arm/common/mcpm_platsmp.c
@@ -65,14 +65,10 @@ static int mcpm_cpu_kill(unsigned int cpu)
 	return !mcpm_wait_for_cpu_powerdown(pcpu, pcluster);
 }
 
-static int mcpm_cpu_disable(unsigned int cpu)
+static bool mcpm_cpu_can_disable(unsigned int cpu)
 {
-	/*
-	 * We assume all CPUs may be shut down.
-	 * This would be the hook to use for eventual Secure
-	 * OS migration requests as described in the PSCI spec.
-	 */
-	return 0;
+	/* We assume all CPUs may be shut down. */
+	return true;
 }
 
 static void mcpm_cpu_die(unsigned int cpu)
@@ -92,7 +88,7 @@ static struct smp_operations __initdata mcpm_smp_ops = {
 	.smp_secondary_init	= mcpm_secondary_init,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_kill		= mcpm_cpu_kill,
-	.cpu_disable		= mcpm_cpu_disable,
+	.cpu_can_disable	= mcpm_cpu_can_disable,
 	.cpu_die		= mcpm_cpu_die,
 #endif
 };
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 2f3ac1ba6fb4..318ce89eeff7 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -105,6 +105,7 @@ struct smp_operations {
 #ifdef CONFIG_HOTPLUG_CPU
 	int  (*cpu_kill)(unsigned int cpu);
 	void (*cpu_die)(unsigned int cpu);
+	bool  (*cpu_can_disable)(unsigned int cpu);
 	int  (*cpu_disable)(unsigned int cpu);
 #endif
 #endif
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 993e5224d8f7..f9080717fc88 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -107,4 +107,13 @@ static inline u32 mpidr_hash_size(void)
 extern int platform_can_secondary_boot(void);
 extern int platform_can_cpu_hotplug(void);
 
+#ifdef CONFIG_HOTPLUG_CPU
+extern int platform_can_hotplug_cpu(unsigned int cpu);
+#else
+static inline int platform_can_hotplug_cpu(unsigned int cpu)
+{
+	return 0;
+}
+#endif
+
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 36c18b73c1f4..6bbec6042052 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1015,7 +1015,7 @@ static int __init topology_init(void)
 
 	for_each_possible_cpu(cpu) {
 		struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu);
-		cpuinfo->cpu.hotpluggable = 1;
+		cpuinfo->cpu.hotpluggable = platform_can_hotplug_cpu(cpu);
 		register_cpu(&cpuinfo->cpu, cpu);
 	}
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 90dfbedfbfb8..3cd846f48eaf 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -175,13 +175,26 @@ static int platform_cpu_disable(unsigned int cpu)
 	if (smp_ops.cpu_disable)
 		return smp_ops.cpu_disable(cpu);
 
+	return 0;
+}
+
+int platform_can_hotplug_cpu(unsigned int cpu)
+{
+	/* cpu_die must be specified to support hotplug */
+	if (!smp_ops.cpu_die)
+		return 0;
+
+	if (smp_ops.cpu_can_disable)
+		return smp_ops.cpu_can_disable(cpu);
+
 	/*
 	 * By default, allow disabling all CPUs except the first one,
 	 * since this is special on a lot of platforms, e.g. because
 	 * of clock tick interrupts.
 	 */
-	return cpu == 0 ? -EPERM : 0;
+	return cpu != 0;
 }
+
 /*
  * __cpu_disable runs on the processor to be shutdown.
  */
diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h
index 476092b86c6e..8d27ec546a35 100644
--- a/arch/arm/mach-shmobile/common.h
+++ b/arch/arm/mach-shmobile/common.h
@@ -13,7 +13,7 @@ extern void shmobile_smp_boot(void);
 extern void shmobile_smp_sleep(void);
 extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn,
 			      unsigned long arg);
-extern int shmobile_smp_cpu_disable(unsigned int cpu);
+extern bool shmobile_smp_cpu_can_disable(unsigned int cpu);
 extern void shmobile_boot_scu(void);
 extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus);
 extern void shmobile_smp_scu_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index 3923e09e966d..b23378f3d7e1 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -31,8 +31,8 @@ void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-int shmobile_smp_cpu_disable(unsigned int cpu)
+bool shmobile_smp_cpu_can_disable(unsigned int cpu)
 {
-	return 0; /* Hotplug of any CPU is supported */
+	return true; /* Hotplug of any CPU is supported */
 }
 #endif
diff --git a/arch/arm/mach-shmobile/smp-r8a7790.c b/arch/arm/mach-shmobile/smp-r8a7790.c
index 930f45cbc08a..947e437cab68 100644
--- a/arch/arm/mach-shmobile/smp-r8a7790.c
+++ b/arch/arm/mach-shmobile/smp-r8a7790.c
@@ -64,7 +64,7 @@ struct smp_operations r8a7790_smp_ops __initdata = {
 	.smp_prepare_cpus	= r8a7790_smp_prepare_cpus,
 	.smp_boot_secondary	= shmobile_smp_apmu_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
-	.cpu_disable		= shmobile_smp_cpu_disable,
+	.cpu_can_disable	= shmobile_smp_cpu_can_disable,
 	.cpu_die		= shmobile_smp_apmu_cpu_die,
 	.cpu_kill		= shmobile_smp_apmu_cpu_kill,
 #endif
diff --git a/arch/arm/mach-shmobile/smp-r8a7791.c b/arch/arm/mach-shmobile/smp-r8a7791.c
index 5e2d1db79afa..b2508c0d276b 100644
--- a/arch/arm/mach-shmobile/smp-r8a7791.c
+++ b/arch/arm/mach-shmobile/smp-r8a7791.c
@@ -58,7 +58,7 @@ struct smp_operations r8a7791_smp_ops __initdata = {
 	.smp_prepare_cpus	= r8a7791_smp_prepare_cpus,
 	.smp_boot_secondary	= r8a7791_smp_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
-	.cpu_disable		= shmobile_smp_cpu_disable,
+	.cpu_can_disable	= shmobile_smp_cpu_can_disable,
 	.cpu_die		= shmobile_smp_apmu_cpu_die,
 	.cpu_kill		= shmobile_smp_apmu_cpu_kill,
 #endif
diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
index 2106d6b76a06..ae7c764fd6b4 100644
--- a/arch/arm/mach-shmobile/smp-sh73a0.c
+++ b/arch/arm/mach-shmobile/smp-sh73a0.c
@@ -68,7 +68,7 @@ struct smp_operations sh73a0_smp_ops __initdata = {
 	.smp_prepare_cpus	= sh73a0_smp_prepare_cpus,
 	.smp_boot_secondary	= sh73a0_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
-	.cpu_disable		= shmobile_smp_cpu_disable,
+	.cpu_can_disable	= shmobile_smp_cpu_can_disable,
 	.cpu_die		= shmobile_smp_scu_cpu_die,
 	.cpu_kill		= shmobile_smp_scu_cpu_kill,
 #endif
-- 
cgit v1.2.3-55-g7522


From 9ac87c5a0b19417f925dc61cac7198d872159a2c Mon Sep 17 00:00:00 2001
From: Will Deacon
Date: Fri, 31 Jul 2015 11:28:54 +0100
Subject: ARM: 8407/1: switch_to: Remove finish_arch_switch

Fold finish_arch_switch() into switch_to(), in preparation for the
removal of the finish_arch_switch call from core sched code.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/switch_to.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index c99e259469f7..12ebfcc1d539 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -10,7 +10,9 @@
  * CPU.
  */
 #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7)
-#define finish_arch_switch(prev)	dsb(ish)
+#define __complete_pending_tlbi()	dsb(ish)
+#else
+#define __complete_pending_tlbi()
 #endif
 
 /*
@@ -22,6 +24,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
 
 #define switch_to(prev,next,last)					\
 do {									\
+	__complete_pending_tlbi();					\
 	last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));	\
 } while (0)
 
-- 
cgit v1.2.3-55-g7522


From 1234e3fda9aa24b2d650bbcd9ef09d5f6a12dc86 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 24 Jul 2015 09:10:55 +0100
Subject: ARM: reduce visibility of dmac_* functions

The dmac_* functions are private to the ARM DMA API implementation, and
should not be used by drivers.  In order to discourage their use, remove
their prototypes and macros from asm/*.h.

We have to leave dmac_flush_range() behind as Exynos and MSM IOMMU code
use these; once these sites are fixed, this can be moved also.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h |  4 ----
 arch/arm/include/asm/glue-cache.h |  2 --
 arch/arm/mm/dma-mapping.c         |  1 +
 arch/arm/mm/dma.h                 | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 33 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm/mm/dma.h

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 4812cda8fd17..c5230a44eeca 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -140,8 +140,6 @@ extern struct cpu_cache_fns cpu_cache;
  * is visible to DMA, or data written by DMA to system memory is
  * visible to the CPU.
  */
-#define dmac_map_area			cpu_cache.dma_map_area
-#define dmac_unmap_area			cpu_cache.dma_unmap_area
 #define dmac_flush_range		cpu_cache.dma_flush_range
 
 #else
@@ -161,8 +159,6 @@ extern void __cpuc_flush_dcache_area(void *, size_t);
  * is visible to DMA, or data written by DMA to system memory is
  * visible to the CPU.
  */
-extern void dmac_map_area(const void *, size_t, int);
-extern void dmac_unmap_area(const void *, size_t, int);
 extern void dmac_flush_range(const void *, const void *);
 
 #endif
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index a3c24cd5b7c8..cab07f69382d 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -158,8 +158,6 @@ static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { }
 #define __cpuc_coherent_user_range	__glue(_CACHE,_coherent_user_range)
 #define __cpuc_flush_dcache_area	__glue(_CACHE,_flush_kern_dcache_area)
 
-#define dmac_map_area			__glue(_CACHE,_dma_map_area)
-#define dmac_unmap_area			__glue(_CACHE,_dma_unmap_area)
 #define dmac_flush_range		__glue(_CACHE,_dma_flush_range)
 #endif
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1ced8a0f7a52..5edf17cf043d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -39,6 +39,7 @@
 #include <asm/system_info.h>
 #include <asm/dma-contiguous.h>
 
+#include "dma.h"
 #include "mm.h"
 
 /*
diff --git a/arch/arm/mm/dma.h b/arch/arm/mm/dma.h
new file mode 100644
index 000000000000..70ea6852f94e
--- /dev/null
+++ b/arch/arm/mm/dma.h
@@ -0,0 +1,32 @@
+#ifndef DMA_H
+#define DMA_H
+
+#include <asm/glue-cache.h>
+
+#ifndef MULTI_CACHE
+#define dmac_map_area			__glue(_CACHE,_dma_map_area)
+#define dmac_unmap_area 		__glue(_CACHE,_dma_unmap_area)
+
+/*
+ * These are private to the dma-mapping API.  Do not use directly.
+ * Their sole purpose is to ensure that data held in the cache
+ * is visible to DMA, or data written by DMA to system memory is
+ * visible to the CPU.
+ */
+extern void dmac_map_area(const void *, size_t, int);
+extern void dmac_unmap_area(const void *, size_t, int);
+
+#else
+
+/*
+ * These are private to the dma-mapping API.  Do not use directly.
+ * Their sole purpose is to ensure that data held in the cache
+ * is visible to DMA, or data written by DMA to system memory is
+ * visible to the CPU.
+ */
+#define dmac_map_area			cpu_cache.dma_map_area
+#define dmac_unmap_area 		cpu_cache.dma_unmap_area
+
+#endif
+
+#endif
-- 
cgit v1.2.3-55-g7522


From be120397e7709d9d5ed88317a385ce864a2603bc Mon Sep 17 00:00:00 2001
From: Mark Rutland
Date: Fri, 31 Jul 2015 15:46:19 +0100
Subject: ARM: migrate to common PSCI client code

Now that the common PSCI client code has been factored out to
drivers/firmware, and made safe for 32-bit use, move the 32-bit ARM code
over to it. This results in a moderate reduction of duplicated lines,
and will prevent further duplication as the PSCI client code is updated
for PSCI 1.0 and beyond.

The two legacy platform users of the PSCI invocation code are updated to
account for interface changes. In both cases the power state parameter
(which is constant) is now generated using macros, so that the
pack/unpack logic can be killed in preparation for PSCI 1.0 power state
changes.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/Kconfig                  |   1 +
 arch/arm/include/asm/psci.h       |  23 ---
 arch/arm/kernel/Makefile          |   2 +-
 arch/arm/kernel/psci.c            | 299 --------------------------------------
 arch/arm/kernel/psci_smp.c        |  29 +++-
 arch/arm/kernel/setup.c           |   3 +-
 arch/arm/mach-highbank/highbank.c |   2 +-
 arch/arm/mach-highbank/pm.c       |  16 +-
 drivers/cpuidle/cpuidle-calxeda.c |  15 +-
 9 files changed, 46 insertions(+), 344 deletions(-)
 delete mode 100644 arch/arm/kernel/psci.c

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1c5021002fe4..f5cd68425d8d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1496,6 +1496,7 @@ config HOTPLUG_CPU
 config ARM_PSCI
 	bool "Support for the ARM Power State Coordination Interface (PSCI)"
 	depends on CPU_V7
+	select ARM_PSCI_FW
 	help
 	  Say Y here if you want Linux to communicate with system firmware
 	  implementing the PSCI specification for CPU-centric power
diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h
index c25ef3ec6d1f..68ee3ce17b82 100644
--- a/arch/arm/include/asm/psci.h
+++ b/arch/arm/include/asm/psci.h
@@ -14,34 +14,11 @@
 #ifndef __ASM_ARM_PSCI_H
 #define __ASM_ARM_PSCI_H
 
-#define PSCI_POWER_STATE_TYPE_STANDBY		0
-#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
-
-struct psci_power_state {
-	u16	id;
-	u8	type;
-	u8	affinity_level;
-};
-
-struct psci_operations {
-	int (*cpu_suspend)(struct psci_power_state state,
-			   unsigned long entry_point);
-	int (*cpu_off)(struct psci_power_state state);
-	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
-	int (*migrate)(unsigned long cpuid);
-	int (*affinity_info)(unsigned long target_affinity,
-			unsigned long lowest_affinity_level);
-	int (*migrate_info_type)(void);
-};
-
-extern struct psci_operations psci_ops;
 extern struct smp_operations psci_smp_ops;
 
 #ifdef CONFIG_ARM_PSCI
-int psci_init(void);
 bool psci_smp_available(void);
 #else
-static inline int psci_init(void) { return 0; }
 static inline bool psci_smp_available(void) { return false; }
 #endif
 
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index e69f7a19735d..3b995f5c524d 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -89,7 +89,7 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 ifeq ($(CONFIG_ARM_PSCI),y)
-obj-y				+= psci.o psci-call.o
+obj-y				+= psci-call.o
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
 
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
deleted file mode 100644
index f90fdf4ce7c7..000000000000
--- a/arch/arm/kernel/psci.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#define pr_fmt(fmt) "psci: " fmt
-
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/reboot.h>
-#include <linux/pm.h>
-#include <uapi/linux/psci.h>
-
-#include <asm/compiler.h>
-#include <asm/errno.h>
-#include <asm/psci.h>
-#include <asm/system_misc.h>
-
-struct psci_operations psci_ops;
-
-static int (*invoke_psci_fn)(u32, u32, u32, u32);
-typedef int (*psci_initcall_t)(const struct device_node *);
-
-asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32);
-asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32);
-
-enum psci_function {
-	PSCI_FN_CPU_SUSPEND,
-	PSCI_FN_CPU_ON,
-	PSCI_FN_CPU_OFF,
-	PSCI_FN_MIGRATE,
-	PSCI_FN_AFFINITY_INFO,
-	PSCI_FN_MIGRATE_INFO_TYPE,
-	PSCI_FN_MAX,
-};
-
-static u32 psci_function_id[PSCI_FN_MAX];
-
-static int psci_to_linux_errno(int errno)
-{
-	switch (errno) {
-	case PSCI_RET_SUCCESS:
-		return 0;
-	case PSCI_RET_NOT_SUPPORTED:
-		return -EOPNOTSUPP;
-	case PSCI_RET_INVALID_PARAMS:
-		return -EINVAL;
-	case PSCI_RET_DENIED:
-		return -EPERM;
-	};
-
-	return -EINVAL;
-}
-
-static u32 psci_power_state_pack(struct psci_power_state state)
-{
-	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
-			& PSCI_0_2_POWER_STATE_ID_MASK) |
-		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
-		 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
-		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
-		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
-}
-
-static int psci_get_version(void)
-{
-	int err;
-
-	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
-	return err;
-}
-
-static int psci_cpu_suspend(struct psci_power_state state,
-			    unsigned long entry_point)
-{
-	int err;
-	u32 fn, power_state;
-
-	fn = psci_function_id[PSCI_FN_CPU_SUSPEND];
-	power_state = psci_power_state_pack(state);
-	err = invoke_psci_fn(fn, power_state, entry_point, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_cpu_off(struct psci_power_state state)
-{
-	int err;
-	u32 fn, power_state;
-
-	fn = psci_function_id[PSCI_FN_CPU_OFF];
-	power_state = psci_power_state_pack(state);
-	err = invoke_psci_fn(fn, power_state, 0, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_CPU_ON];
-	err = invoke_psci_fn(fn, cpuid, entry_point, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_migrate(unsigned long cpuid)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_MIGRATE];
-	err = invoke_psci_fn(fn, cpuid, 0, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_affinity_info(unsigned long target_affinity,
-		unsigned long lowest_affinity_level)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
-	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
-	return err;
-}
-
-static int psci_migrate_info_type(void)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
-	err = invoke_psci_fn(fn, 0, 0, 0);
-	return err;
-}
-
-static int get_set_conduit_method(struct device_node *np)
-{
-	const char *method;
-
-	pr_info("probing for conduit method from DT.\n");
-
-	if (of_property_read_string(np, "method", &method)) {
-		pr_warn("missing \"method\" property\n");
-		return -ENXIO;
-	}
-
-	if (!strcmp("hvc", method)) {
-		invoke_psci_fn = __invoke_psci_fn_hvc;
-	} else if (!strcmp("smc", method)) {
-		invoke_psci_fn = __invoke_psci_fn_smc;
-	} else {
-		pr_warn("invalid \"method\" property: %s\n", method);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
-{
-	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
-}
-
-static void psci_sys_poweroff(void)
-{
-	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
-}
-
-/*
- * PSCI Function IDs for v0.2+ are well defined so use
- * standard values.
- */
-static int psci_0_2_init(struct device_node *np)
-{
-	int err, ver;
-
-	err = get_set_conduit_method(np);
-
-	if (err)
-		goto out_put_node;
-
-	ver = psci_get_version();
-
-	if (ver == PSCI_RET_NOT_SUPPORTED) {
-		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
-		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
-		err = -EOPNOTSUPP;
-		goto out_put_node;
-	} else {
-		pr_info("PSCIv%d.%d detected in firmware.\n",
-				PSCI_VERSION_MAJOR(ver),
-				PSCI_VERSION_MINOR(ver));
-
-		if (PSCI_VERSION_MAJOR(ver) == 0 &&
-				PSCI_VERSION_MINOR(ver) < 2) {
-			err = -EINVAL;
-			pr_err("Conflicting PSCI version detected.\n");
-			goto out_put_node;
-		}
-	}
-
-	pr_info("Using standard PSCI v0.2 function IDs\n");
-	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND;
-	psci_ops.cpu_suspend = psci_cpu_suspend;
-
-	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
-	psci_ops.cpu_off = psci_cpu_off;
-
-	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON;
-	psci_ops.cpu_on = psci_cpu_on;
-
-	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE;
-	psci_ops.migrate = psci_migrate;
-
-	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO;
-	psci_ops.affinity_info = psci_affinity_info;
-
-	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
-		PSCI_0_2_FN_MIGRATE_INFO_TYPE;
-	psci_ops.migrate_info_type = psci_migrate_info_type;
-
-	arm_pm_restart = psci_sys_reset;
-
-	pm_power_off = psci_sys_poweroff;
-
-out_put_node:
-	of_node_put(np);
-	return err;
-}
-
-/*
- * PSCI < v0.2 get PSCI Function IDs via DT.
- */
-static int psci_0_1_init(struct device_node *np)
-{
-	u32 id;
-	int err;
-
-	err = get_set_conduit_method(np);
-
-	if (err)
-		goto out_put_node;
-
-	pr_info("Using PSCI v0.1 Function IDs from DT\n");
-
-	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
-		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
-		psci_ops.cpu_suspend = psci_cpu_suspend;
-	}
-
-	if (!of_property_read_u32(np, "cpu_off", &id)) {
-		psci_function_id[PSCI_FN_CPU_OFF] = id;
-		psci_ops.cpu_off = psci_cpu_off;
-	}
-
-	if (!of_property_read_u32(np, "cpu_on", &id)) {
-		psci_function_id[PSCI_FN_CPU_ON] = id;
-		psci_ops.cpu_on = psci_cpu_on;
-	}
-
-	if (!of_property_read_u32(np, "migrate", &id)) {
-		psci_function_id[PSCI_FN_MIGRATE] = id;
-		psci_ops.migrate = psci_migrate;
-	}
-
-out_put_node:
-	of_node_put(np);
-	return err;
-}
-
-static const struct of_device_id psci_of_match[] __initconst = {
-	{ .compatible = "arm,psci", .data = psci_0_1_init},
-	{ .compatible = "arm,psci-0.2", .data = psci_0_2_init},
-	{},
-};
-
-int __init psci_init(void)
-{
-	struct device_node *np;
-	const struct of_device_id *matched_np;
-	psci_initcall_t init_fn;
-
-	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
-	if (!np)
-		return -ENODEV;
-
-	init_fn = (psci_initcall_t)matched_np->data;
-	return init_fn(np);
-}
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 244aaddfbfda..61c04b02faeb 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -17,6 +17,8 @@
 #include <linux/smp.h>
 #include <linux/of.h>
 #include <linux/delay.h>
+#include <linux/psci.h>
+
 #include <uapi/linux/psci.h>
 
 #include <asm/psci.h>
@@ -56,17 +58,29 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+int psci_cpu_disable(unsigned int cpu)
+{
+	/* Fail early if we don't have CPU_OFF support */
+	if (!psci_ops.cpu_off)
+		return -EOPNOTSUPP;
+
+	/* Trusted OS will deny CPU_OFF */
+	if (psci_tos_resident_on(cpu))
+		return -EPERM;
+
+	return 0;
+}
+
 void __ref psci_cpu_die(unsigned int cpu)
 {
-       const struct psci_power_state ps = {
-               .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
-       };
+	u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN <<
+		    PSCI_0_2_POWER_STATE_TYPE_SHIFT;
 
-       if (psci_ops.cpu_off)
-               psci_ops.cpu_off(ps);
+	if (psci_ops.cpu_off)
+		psci_ops.cpu_off(state);
 
-       /* We should never return */
-       panic("psci: cpu %d failed to shutdown\n", cpu);
+	/* We should never return */
+	panic("psci: cpu %d failed to shutdown\n", cpu);
 }
 
 int __ref psci_cpu_kill(unsigned int cpu)
@@ -109,6 +123,7 @@ bool __init psci_smp_available(void)
 struct smp_operations __initdata psci_smp_ops = {
 	.smp_boot_secondary	= psci_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= psci_cpu_disable,
 	.cpu_die		= psci_cpu_die,
 	.cpu_kill		= psci_cpu_kill,
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 36c18b73c1f4..9c38bd42f04b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -31,6 +31,7 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/sort.h>
+#include <linux/psci.h>
 
 #include <asm/unified.h>
 #include <asm/cp15.h>
@@ -972,7 +973,7 @@ void __init setup_arch(char **cmdline_p)
 	unflatten_device_tree();
 
 	arm_dt_init_cpu_maps();
-	psci_init();
+	psci_dt_init();
 	xen_early_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 231fba0d03e5..6050a14faee6 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -28,8 +28,8 @@
 #include <linux/reboot.h>
 #include <linux/amba/bus.h>
 #include <linux/platform_device.h>
+#include <linux/psci.h>
 
-#include <asm/psci.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-highbank/pm.c b/arch/arm/mach-highbank/pm.c
index 7f2bd85eb935..400311695548 100644
--- a/arch/arm/mach-highbank/pm.c
+++ b/arch/arm/mach-highbank/pm.c
@@ -16,19 +16,21 @@
 
 #include <linux/cpu_pm.h>
 #include <linux/init.h>
+#include <linux/psci.h>
 #include <linux/suspend.h>
 
 #include <asm/suspend.h>
-#include <asm/psci.h>
+
+#include <uapi/linux/psci.h>
+
+#define HIGHBANK_SUSPEND_PARAM \
+	((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \
+	 (1 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \
+	 (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT))
 
 static int highbank_suspend_finish(unsigned long val)
 {
-	const struct psci_power_state ps = {
-		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
-		.affinity_level = 1,
-	};
-
-	return psci_ops.cpu_suspend(ps, __pa(cpu_resume));
+	return psci_ops.cpu_suspend(HIGHBANK_SUSPEND_PARAM, __pa(cpu_resume));
 }
 
 static int highbank_pm_enter(suspend_state_t state)
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index c13feec89ea1..ea9728fde9b3 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -25,16 +25,21 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/platform_device.h>
+#include <linux/psci.h>
+
 #include <asm/cpuidle.h>
 #include <asm/suspend.h>
-#include <asm/psci.h>
+
+#include <uapi/linux/psci.h>
+
+#define CALXEDA_IDLE_PARAM \
+	((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \
+	 (0 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \
+	 (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT))
 
 static int calxeda_idle_finish(unsigned long val)
 {
-	const struct psci_power_state ps = {
-		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
-	};
-	return psci_ops.cpu_suspend(ps, __pa(cpu_resume));
+	return psci_ops.cpu_suspend(CALXEDA_IDLE_PARAM, __pa(cpu_resume));
 }
 
 static int calxeda_pwrdown_idle(struct cpuidle_device *dev,
-- 
cgit v1.2.3-55-g7522


From efaa6e266ba70439da00e7f1c8a218e243ae140a Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 24 Jul 2015 10:21:02 +0100
Subject: firmware: qcom_scm-32: replace open-coded call to
 __cpuc_flush_dcache_area()

Rathe rthan directly accessing architecture internal functions, provide
an "method"-centric wrapper for qcom_scm-32 to do what's necessary to
ensure that the secure monitor can see the data.  This is called
"secure_flush_area" and ensures that the specified memory area is
coherent across the secure boundary.

Acked-by: Andy Gross <agross@codeaurora.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h | 17 +++++++++++++++++
 drivers/firmware/qcom_scm-32.c    |  4 +---
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index c5230a44eeca..d5525bfc7e3e 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -502,4 +502,21 @@ static inline void set_kernel_text_ro(void) { }
 void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
 			     void *kaddr, unsigned long len);
 
+/**
+ * secure_flush_area - ensure coherency across the secure boundary
+ * @addr: virtual address
+ * @size: size of region
+ *
+ * Ensure that the specified area of memory is coherent across the secure
+ * boundary from the non-secure side.  This is used when calling secure
+ * firmware where the secure firmware does not ensure coherency.
+ */
+static inline void secure_flush_area(const void *addr, size_t size)
+{
+	phys_addr_t phys = __pa(addr);
+
+	__cpuc_flush_dcache_area((void *)addr, size);
+	outer_flush_range(phys, phys + size);
+}
+
 #endif
diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c
index 1bd6f9c34331..29e6850665eb 100644
--- a/drivers/firmware/qcom_scm-32.c
+++ b/drivers/firmware/qcom_scm-32.c
@@ -24,7 +24,6 @@
 #include <linux/err.h>
 #include <linux/qcom_scm.h>
 
-#include <asm/outercache.h>
 #include <asm/cacheflush.h>
 
 #include "qcom_scm.h"
@@ -219,8 +218,7 @@ static int __qcom_scm_call(const struct qcom_scm_command *cmd)
 	 * Flush the command buffer so that the secure world sees
 	 * the correct data.
 	 */
-	__cpuc_flush_dcache_area((void *)cmd, cmd->len);
-	outer_flush_range(cmd_addr, cmd_addr + cmd->len);
+	secure_flush_area(cmd, cmd->len);
 
 	ret = smc(cmd_addr);
 	if (ret < 0)
-- 
cgit v1.2.3-55-g7522


From a5f4c561b3b19a9bc43a81da6382b0098ebbc1fb Mon Sep 17 00:00:00 2001
From: Stefan Agner
Date: Thu, 13 Aug 2015 00:01:52 +0100
Subject: ARM: 8415/1: early fixmap support for earlycon

Add early fixmap support, initially to support permanent, fixed
mapping support for early console. A temporary, early pte is
created which is migrated to a permanent mapping in paging_init.
This is also needed since the attributes may change as the memory
types are initialized. The 3MiB range of fixmap spans two pte
tables, but currently only one pte is created for early fixmap
support.

Re-add FIX_KMAP_BEGIN to the index calculation in highmem.c since
the index for kmap does not start at zero anymore. This reverts
4221e2e6b316 ("ARM: 8031/1: fixmap: remove FIX_KMAP_BEGIN and
FIX_KMAP_END") to some extent.

Cc: Mark Salter <msalter@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig              |  3 ++
 arch/arm/include/asm/fixmap.h | 15 +++++++-
 arch/arm/kernel/setup.c       |  4 ++
 arch/arm/mm/highmem.c         |  6 +--
 arch/arm/mm/mmu.c             | 88 +++++++++++++++++++++++++++++++++++++++----
 5 files changed, 105 insertions(+), 11 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a750c1425c3a..1bcda7cb2e04 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -188,6 +188,9 @@ config ARCH_HAS_ILOG2_U64
 config ARCH_HAS_BANDGAP
 	bool
 
+config FIX_EARLYCON_MEM
+	def_bool y if MMU
+
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index 0415eae1df27..58cfe9f1a687 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -6,9 +6,13 @@
 #define FIXADDR_TOP		(FIXADDR_END - PAGE_SIZE)
 
 #include <asm/kmap_types.h>
+#include <asm/pgtable.h>
 
 enum fixed_addresses {
-	FIX_KMAP_BEGIN,
+	FIX_EARLYCON_MEM_BASE,
+	__end_of_permanent_fixed_addresses,
+
+	FIX_KMAP_BEGIN = __end_of_permanent_fixed_addresses,
 	FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
 
 	/* Support writing RO kernel text via kprobes, jump labels, etc. */
@@ -18,7 +22,16 @@ enum fixed_addresses {
 	__end_of_fixed_addresses
 };
 
+#define FIXMAP_PAGE_COMMON	(L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY)
+
+#define FIXMAP_PAGE_NORMAL	(FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK)
+
+/* Used by set_fixmap_(io|nocache), both meant for mapping a device */
+#define FIXMAP_PAGE_IO		(FIXMAP_PAGE_COMMON | L_PTE_MT_DEV_SHARED | L_PTE_SHARED)
+#define FIXMAP_PAGE_NOCACHE	FIXMAP_PAGE_IO
+
 void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
+void __init early_fixmap_init(void);
 
 #include <asm-generic/fixmap.h>
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 6bbec6042052..e2ecee6b70ca 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
+#include <asm/fixmap.h>
 #include <asm/procinfo.h>
 #include <asm/psci.h>
 #include <asm/sections.h>
@@ -954,6 +955,9 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = cmd_line;
 
+	if (IS_ENABLED(CONFIG_FIX_EARLYCON_MEM))
+		early_fixmap_init();
+
 	parse_early_param();
 
 #ifdef CONFIG_MMU
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index ee8dfa793989..9df5f09585ca 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -79,7 +79,7 @@ void *kmap_atomic(struct page *page)
 
 	type = kmap_atomic_idx_push();
 
-	idx = type + KM_TYPE_NR * smp_processor_id();
+	idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
 	/*
@@ -106,7 +106,7 @@ void __kunmap_atomic(void *kvaddr)
 
 	if (kvaddr >= (void *)FIXADDR_START) {
 		type = kmap_atomic_idx();
-		idx = type + KM_TYPE_NR * smp_processor_id();
+		idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
 
 		if (cache_is_vivt())
 			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
@@ -138,7 +138,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
 		return page_address(page);
 
 	type = kmap_atomic_idx_push();
-	idx = type + KM_TYPE_NR * smp_processor_id();
+	idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
 	BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6ca7d9aa896f..fb9e817d08bb 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -357,6 +357,47 @@ const struct mem_type *get_mem_type(unsigned int type)
 }
 EXPORT_SYMBOL(get_mem_type);
 
+static pte_t *(*pte_offset_fixmap)(pmd_t *dir, unsigned long addr);
+
+static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]
+	__aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata;
+
+static pte_t * __init pte_offset_early_fixmap(pmd_t *dir, unsigned long addr)
+{
+	return &bm_pte[pte_index(addr)];
+}
+
+static pte_t *pte_offset_late_fixmap(pmd_t *dir, unsigned long addr)
+{
+	return pte_offset_kernel(dir, addr);
+}
+
+static inline pmd_t * __init fixmap_pmd(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
+
+	return pmd;
+}
+
+void __init early_fixmap_init(void)
+{
+	pmd_t *pmd;
+
+	/*
+	 * The early fixmap range spans multiple pmds, for which
+	 * we are not prepared:
+	 */
+	BUILD_BUG_ON((__fix_to_virt(__end_of_permanent_fixed_addresses) >> PMD_SHIFT)
+		     != FIXADDR_TOP >> PMD_SHIFT);
+
+	pmd = fixmap_pmd(FIXADDR_TOP);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
+	pte_offset_fixmap = pte_offset_early_fixmap;
+}
+
 /*
  * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range().
  * As a result, this can only be called with preemption disabled, as under
@@ -365,7 +406,7 @@ EXPORT_SYMBOL(get_mem_type);
 void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
 {
 	unsigned long vaddr = __fix_to_virt(idx);
-	pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+	pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr);
 
 	/* Make sure fixmap region does not exceed available allocation. */
 	BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) >
@@ -855,7 +896,7 @@ static void __init create_mapping(struct map_desc *md)
 	}
 
 	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
-	    md->virtual >= PAGE_OFFSET &&
+	    md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START &&
 	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
 		pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
 			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
@@ -1213,10 +1254,10 @@ void __init arm_mm_memblock_reserve(void)
 
 /*
  * Set up the device mappings.  Since we clear out the page tables for all
- * mappings above VMALLOC_START, we will remove any debug device mappings.
- * This means you have to be careful how you debug this function, or any
- * called function.  This means you can't use any function or debugging
- * method which may touch any device, otherwise the kernel _will_ crash.
+ * mappings above VMALLOC_START, except early fixmap, we might remove debug
+ * device mappings.  This means earlycon can be used to debug this function
+ * Any other function or debugging method which may touch any device _will_
+ * crash the kernel.
  */
 static void __init devicemaps_init(const struct machine_desc *mdesc)
 {
@@ -1231,7 +1272,10 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
 
 	early_trap_init(vectors);
 
-	for (addr = VMALLOC_START; addr; addr += PMD_SIZE)
+	/*
+	 * Clear page table except top pmd used by early fixmaps
+	 */
+	for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
 
 	/*
@@ -1483,6 +1527,35 @@ void __init early_paging_init(const struct machine_desc *mdesc)
 
 #endif
 
+static void __init early_fixmap_shutdown(void)
+{
+	int i;
+	unsigned long va = fix_to_virt(__end_of_permanent_fixed_addresses - 1);
+
+	pte_offset_fixmap = pte_offset_late_fixmap;
+	pmd_clear(fixmap_pmd(va));
+	local_flush_tlb_kernel_page(va);
+
+	for (i = 0; i < __end_of_permanent_fixed_addresses; i++) {
+		pte_t *pte;
+		struct map_desc map;
+
+		map.virtual = fix_to_virt(i);
+		pte = pte_offset_early_fixmap(pmd_off_k(map.virtual), map.virtual);
+
+		/* Only i/o device mappings are supported ATM */
+		if (pte_none(*pte) ||
+		    (pte_val(*pte) & L_PTE_MT_MASK) != L_PTE_MT_DEV_SHARED)
+			continue;
+
+		map.pfn = pte_pfn(*pte);
+		map.type = MT_DEVICE;
+		map.length = PAGE_SIZE;
+
+		create_mapping(&map);
+	}
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
@@ -1495,6 +1568,7 @@ void __init paging_init(const struct machine_desc *mdesc)
 	prepare_page_table();
 	map_lowmem();
 	dma_contiguous_remap();
+	early_fixmap_shutdown();
 	devicemaps_init(mdesc);
 	kmap_init();
 	tcm_init();
-- 
cgit v1.2.3-55-g7522


From 8901925d32232adb57ba1b4c26d0c0f9d521a78c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada
Date: Mon, 17 Aug 2015 03:59:52 +0100
Subject: ARM: 8417/1: refactor bitops functions with BIT_MASK() and BIT_WORD()

Use BIT_MASK() and BIT_WORD() rather than hard-coding the size
of the "long" type.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/bitops.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index 56380995f4c3..e943e6cee254 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -35,9 +35,9 @@
 static inline void ____atomic_set_bit(unsigned int bit, volatile unsigned long *p)
 {
 	unsigned long flags;
-	unsigned long mask = 1UL << (bit & 31);
+	unsigned long mask = BIT_MASK(bit);
 
-	p += bit >> 5;
+	p += BIT_WORD(bit);
 
 	raw_local_irq_save(flags);
 	*p |= mask;
@@ -47,9 +47,9 @@ static inline void ____atomic_set_bit(unsigned int bit, volatile unsigned long *
 static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long *p)
 {
 	unsigned long flags;
-	unsigned long mask = 1UL << (bit & 31);
+	unsigned long mask = BIT_MASK(bit);
 
-	p += bit >> 5;
+	p += BIT_WORD(bit);
 
 	raw_local_irq_save(flags);
 	*p &= ~mask;
@@ -59,9 +59,9 @@ static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long
 static inline void ____atomic_change_bit(unsigned int bit, volatile unsigned long *p)
 {
 	unsigned long flags;
-	unsigned long mask = 1UL << (bit & 31);
+	unsigned long mask = BIT_MASK(bit);
 
-	p += bit >> 5;
+	p += BIT_WORD(bit);
 
 	raw_local_irq_save(flags);
 	*p ^= mask;
@@ -73,9 +73,9 @@ ____atomic_test_and_set_bit(unsigned int bit, volatile unsigned long *p)
 {
 	unsigned long flags;
 	unsigned int res;
-	unsigned long mask = 1UL << (bit & 31);
+	unsigned long mask = BIT_MASK(bit);
 
-	p += bit >> 5;
+	p += BIT_WORD(bit);
 
 	raw_local_irq_save(flags);
 	res = *p;
@@ -90,9 +90,9 @@ ____atomic_test_and_clear_bit(unsigned int bit, volatile unsigned long *p)
 {
 	unsigned long flags;
 	unsigned int res;
-	unsigned long mask = 1UL << (bit & 31);
+	unsigned long mask = BIT_MASK(bit);
 
-	p += bit >> 5;
+	p += BIT_WORD(bit);
 
 	raw_local_irq_save(flags);
 	res = *p;
@@ -107,9 +107,9 @@ ____atomic_test_and_change_bit(unsigned int bit, volatile unsigned long *p)
 {
 	unsigned long flags;
 	unsigned int res;
-	unsigned long mask = 1UL << (bit & 31);
+	unsigned long mask = BIT_MASK(bit);
 
-	p += bit >> 5;
+	p += BIT_WORD(bit);
 
 	raw_local_irq_save(flags);
 	res = *p;
-- 
cgit v1.2.3-55-g7522


From 96231b2686b53f71838a335bdc404cb5285d1a01 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski
Date: Tue, 18 Aug 2015 09:14:15 +0100
Subject: ARM: 8419/1: dma-mapping: harmonize definition of DMA_ERROR_CODE

All architectures except arm that define DMA_ERROR_CODE are casting it
to (dma_addr_t) - as it is always compared to dma_addr_t in arm as well
this could be harmonized.

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index b52101d37ec7..a68b9d8a71fe 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -14,7 +14,7 @@
 #include <xen/xen.h>
 #include <asm/xen/hypervisor.h>
 
-#define DMA_ERROR_CODE	(~0)
+#define DMA_ERROR_CODE	(~(dma_addr_t)0x0)
 extern struct dma_map_ops arm_dma_ops;
 extern struct dma_map_ops arm_coherent_dma_ops;
 
-- 
cgit v1.2.3-55-g7522


From 1eef5d2f1b461c120bcd82077edee5ec706ac53b Mon Sep 17 00:00:00 2001
From: Russell King
Date: Wed, 19 Aug 2015 21:23:48 +0100
Subject: ARM: domains: switch to keeping domain value in register

Rather than modifying both the domain access control register and our
per-thread copy, modify only the domain access control register, and
use the per-thread copy to save and restore the register over context
switches.  We can also avoid the explicit initialisation of the
init thread_info structure.

This allows us to avoid needing to gain access to the thread information
at the uaccess control sites.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/domain.h      | 20 +++++++++++++++-----
 arch/arm/include/asm/thread_info.h |  3 ---
 arch/arm/kernel/entry-armv.S       |  2 ++
 arch/arm/kernel/process.c          | 13 ++++++++++---
 4 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 6ddbe446425e..7f2941905714 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -59,6 +59,17 @@
 
 #ifndef __ASSEMBLY__
 
+static inline unsigned int get_domain(void)
+{
+	unsigned int domain;
+
+	asm(
+	"mrc	p15, 0, %0, c3, c0	@ get domain"
+	 : "=r" (domain));
+
+	return domain;
+}
+
 #ifdef CONFIG_CPU_USE_DOMAINS
 static inline void set_domain(unsigned val)
 {
@@ -70,11 +81,10 @@ static inline void set_domain(unsigned val)
 
 #define modify_domain(dom,type)					\
 	do {							\
-	struct thread_info *thread = current_thread_info();	\
-	unsigned int domain = thread->cpu_domain;		\
-	domain &= ~domain_val(dom, DOMAIN_MANAGER);		\
-	thread->cpu_domain = domain | domain_val(dom, type);	\
-	set_domain(thread->cpu_domain);				\
+		unsigned int domain = get_domain();		\
+		domain &= ~domain_val(dom, DOMAIN_MANAGER);	\
+		domain = domain | domain_val(dom, type);	\
+		set_domain(domain);				\
 	} while (0)
 
 #else
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index bd32eded3e50..0a0aec410d8c 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -74,9 +74,6 @@ struct thread_info {
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.addr_limit	= KERNEL_DS,					\
-	.cpu_domain	= domain_val(DOMAIN_USER, DOMAIN_MANAGER) |	\
-			  domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) |	\
-			  domain_val(DOMAIN_IO, DOMAIN_CLIENT),		\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7dac3086e361..d19adcf6c580 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -770,6 +770,8 @@ ENTRY(__switch_to)
 	ldr	r4, [r2, #TI_TP_VALUE]
 	ldr	r5, [r2, #TI_TP_VALUE + 4]
 #ifdef CONFIG_CPU_USE_DOMAINS
+	mrc	p15, 0, r6, c3, c0, 0		@ Get domain register
+	str	r6, [r1, #TI_CPU_DOMAIN]	@ Save old domain register
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
 	switch_tls r1, r4, r5, r3, r7
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index f192a2a41719..e722f9b3c9b1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -146,10 +146,9 @@ void __show_regs(struct pt_regs *regs)
 		buf[0] = '\0';
 #ifdef CONFIG_CPU_CP15_MMU
 		{
-			unsigned int transbase, dac;
+			unsigned int transbase, dac = get_domain();
 			asm("mrc p15, 0, %0, c2, c0\n\t"
-			    "mrc p15, 0, %1, c3, c0\n"
-			    : "=r" (transbase), "=r" (dac));
+			    : "=r" (transbase));
 			snprintf(buf, sizeof(buf), "  Table: %08x  DAC: %08x",
 			  	transbase, dac);
 		}
@@ -210,6 +209,14 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
 	memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
 
+	/*
+	 * Copy the initial value of the domain access control register
+	 * from the current thread: thread->addr_limit will have been
+	 * copied from the current thread via setup_thread_stack() in
+	 * kernel/fork.c
+	 */
+	thread->cpu_domain = get_domain();
+
 	if (likely(!(p->flags & PF_KTHREAD))) {
 		*childregs = *current_pt_regs();
 		childregs->ARM_r0 = 0;
-- 
cgit v1.2.3-55-g7522


From 8e798706f7e9cd7f096aa194de90269dde83773e Mon Sep 17 00:00:00 2001
From: Russell King
Date: Wed, 19 Aug 2015 22:36:24 +0100
Subject: ARM: domains: provide domain_mask()

Provide a macro to generate the mask for a domain, rather than using
domain_val(, DOMAIN_MANAGER) which won't work when CPU_USE_DOMAINS
is turned off.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/domain.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 7f2941905714..045b9b453bcd 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -55,7 +55,8 @@
 #define DOMAIN_MANAGER	1
 #endif
 
-#define domain_val(dom,type)	((type) << (2*(dom)))
+#define domain_mask(dom)	((3) << (2 * (dom)))
+#define domain_val(dom,type)	((type) << (2 * (dom)))
 
 #ifndef __ASSEMBLY__
 
@@ -82,7 +83,7 @@ static inline void set_domain(unsigned val)
 #define modify_domain(dom,type)					\
 	do {							\
 		unsigned int domain = get_domain();		\
-		domain &= ~domain_val(dom, DOMAIN_MANAGER);	\
+		domain &= ~domain_mask(dom);			\
 		domain = domain | domain_val(dom, type);	\
 		set_domain(domain);				\
 	} while (0)
-- 
cgit v1.2.3-55-g7522


From 0171356a7708af01ad3224702b7f0aaa5b7a1399 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 21 Aug 2015 09:23:26 +0100
Subject: ARM: domains: move initial domain setting value to asm/domains.h

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/domain.h | 6 ++++++
 arch/arm/kernel/head.S        | 5 +----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 045b9b453bcd..4218f88e8f7e 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -58,6 +58,12 @@
 #define domain_mask(dom)	((3) << (2 * (dom)))
 #define domain_val(dom,type)	((type) << (2 * (dom)))
 
+#define DACR_INIT \
+	(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
+	 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
+	 domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
+	 domain_val(DOMAIN_IO, DOMAIN_CLIENT))
+
 #ifndef __ASSEMBLY__
 
 static inline unsigned int get_domain(void)
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index bd755d97e459..d56e5e9a9e1e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -461,10 +461,7 @@ __enable_mmu:
 #ifdef CONFIG_ARM_LPAE
 	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
 #else
-	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_IO, DOMAIN_CLIENT))
+	mov	r5, #DACR_INIT
 	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
 	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
 #endif
-- 
cgit v1.2.3-55-g7522


From 3c2aed5b28819564e1a07b4686bd89802bcc4d6b Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 21 Aug 2015 09:30:16 +0100
Subject: ARM: domains: get rid of manager mode for user domain

Since we switched to early trap initialisation in 94e5a85b3be0
("ARM: earlier initialization of vectors page") we haven't been writing
directly to the vectors page, and so there's no need for this domain
to be in manager mode.  Switch it to client mode.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/domain.h | 2 +-
 arch/arm/kernel/traps.c       | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 4218f88e8f7e..08b601e69ddc 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -59,7 +59,7 @@
 #define domain_val(dom,type)	((type) << (2 * (dom)))
 
 #define DACR_INIT \
-	(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
+	(domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \
 	 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 	 domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
 	 domain_val(DOMAIN_IO, DOMAIN_CLIENT))
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index d358226236f2..969f9d9e665f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -870,7 +870,6 @@ void __init early_trap_init(void *vectors_base)
 	kuser_init(vectors_base);
 
 	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
-	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 #else /* ifndef CONFIG_CPU_V7M */
 	/*
 	 * on V7-M there is no need to copy the vector table to a dedicated
-- 
cgit v1.2.3-55-g7522


From a02d8dfd54cdf3b1b0464ccc2c1c4afe2c003a35 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 21 Aug 2015 09:38:31 +0100
Subject: ARM: domains: keep vectors in separate domain

Keep the machine vectors in its own domain to avoid software based
user access control from making the vector code inaccessible, and
thereby deadlocking the machine.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/domain.h               |  4 +++-
 arch/arm/include/asm/pgtable-2level-hwdef.h |  1 +
 arch/arm/mm/mmu.c                           |  4 ++--
 arch/arm/mm/pgd.c                           | 10 ++++++++++
 4 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 08b601e69ddc..396a12e486fe 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -43,6 +43,7 @@
 #define DOMAIN_USER	1
 #define DOMAIN_IO	0
 #endif
+#define DOMAIN_VECTORS	3
 
 /*
  * Domain types
@@ -62,7 +63,8 @@
 	(domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \
 	 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 	 domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
-	 domain_val(DOMAIN_IO, DOMAIN_CLIENT))
+	 domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
+	 domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
index 5e68278e953e..d0131ee6f6af 100644
--- a/arch/arm/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
@@ -23,6 +23,7 @@
 #define PMD_PXNTABLE		(_AT(pmdval_t, 1) << 2)     /* v7 */
 #define PMD_BIT4		(_AT(pmdval_t, 1) << 4)
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, (x)) << 5)
+#define PMD_DOMAIN_MASK		PMD_DOMAIN(0x0f)
 #define PMD_PROTECTION		(_AT(pmdval_t, 1) << 9)		/* v5 */
 /*
  *   - section
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6ca7d9aa896f..a016de248034 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -291,13 +291,13 @@ static struct mem_type mem_types[] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_RDONLY,
 		.prot_l1   = PMD_TYPE_TABLE,
-		.domain    = DOMAIN_USER,
+		.domain    = DOMAIN_VECTORS,
 	},
 	[MT_HIGH_VECTORS] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_USER | L_PTE_RDONLY,
 		.prot_l1   = PMD_TYPE_TABLE,
-		.domain    = DOMAIN_USER,
+		.domain    = DOMAIN_VECTORS,
 	},
 	[MT_MEMORY_RWX] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index a3681f11dd9f..e683db1b90a3 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -84,6 +84,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 		if (!new_pte)
 			goto no_pte;
 
+#ifndef CONFIG_ARM_LPAE
+		/*
+		 * Modify the PTE pointer to have the correct domain.  This
+		 * needs to be the vectors domain to avoid the low vectors
+		 * being unmapped.
+		 */
+		pmd_val(*new_pmd) &= ~PMD_DOMAIN_MASK;
+		pmd_val(*new_pmd) |= PMD_DOMAIN(DOMAIN_VECTORS);
+#endif
+
 		init_pud = pud_offset(init_pgd, 0);
 		init_pmd = pmd_offset(init_pud, 0);
 		init_pte = pte_offset_map(init_pmd, 0);
-- 
cgit v1.2.3-55-g7522


From 1fb6755f16872ad256c18cce2830f9087502dffd Mon Sep 17 00:00:00 2001
From: Russell King
Date: Fri, 21 Aug 2015 09:42:10 +0100
Subject: ARM: domains: remove DOMAIN_TABLE

DOMAIN_TABLE is not used; in any case, it aliases to the kernel domain.
Remove this definition.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/domain.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 396a12e486fe..2be929549938 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -34,12 +34,10 @@
  */
 #ifndef CONFIG_IO_36
 #define DOMAIN_KERNEL	0
-#define DOMAIN_TABLE	0
 #define DOMAIN_USER	1
 #define DOMAIN_IO	2
 #else
 #define DOMAIN_KERNEL	2
-#define DOMAIN_TABLE	2
 #define DOMAIN_USER	1
 #define DOMAIN_IO	0
 #endif
@@ -62,7 +60,6 @@
 #define DACR_INIT \
 	(domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \
 	 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
-	 domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
 	 domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
 	 domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
 
-- 
cgit v1.2.3-55-g7522


From b64d1f66517a89b9b0f6bd0bca86b05a55a5e742 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Tue, 18 Aug 2015 23:06:25 +0100
Subject: ARM: uaccess: simplify user access assembly

The user assembly for byte and word accesses was virtually identical.
Rather than duplicating this, use a macro instead.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/uaccess.h | 47 +++++++++++-------------------------------
 1 file changed, 12 insertions(+), 35 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 74b17d09ef7a..4cf54ebe408a 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -311,9 +311,9 @@ do {									\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
 } while (0)
 
-#define __get_user_asm_byte(x, addr, err)			\
+#define __get_user_asm(x, addr, err, instr)			\
 	__asm__ __volatile__(					\
-	"1:	" TUSER(ldrb) "	%1,[%2],#0\n"			\
+	"1:	" TUSER(instr) " %1, [%2], #0\n"		\
 	"2:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -329,6 +329,9 @@ do {									\
 	: "r" (addr), "i" (-EFAULT)				\
 	: "cc")
 
+#define __get_user_asm_byte(x, addr, err)			\
+	__get_user_asm(x, addr, err, ldrb)
+
 #ifndef __ARMEB__
 #define __get_user_asm_half(x, __gu_addr, err)			\
 ({								\
@@ -348,22 +351,7 @@ do {									\
 #endif
 
 #define __get_user_asm_word(x, addr, err)			\
-	__asm__ __volatile__(					\
-	"1:	" TUSER(ldr) "	%1,[%2],#0\n"			\
-	"2:\n"							\
-	"	.pushsection .text.fixup,\"ax\"\n"		\
-	"	.align	2\n"					\
-	"3:	mov	%0, %3\n"				\
-	"	mov	%1, #0\n"				\
-	"	b	2b\n"					\
-	"	.popsection\n"					\
-	"	.pushsection __ex_table,\"a\"\n"		\
-	"	.align	3\n"					\
-	"	.long	1b, 3b\n"				\
-	"	.popsection"					\
-	: "+r" (err), "=&r" (x)					\
-	: "r" (addr), "i" (-EFAULT)				\
-	: "cc")
+	__get_user_asm(x, addr, err, ldr)
 
 #define __put_user(x, ptr)						\
 ({									\
@@ -393,9 +381,9 @@ do {									\
 	}								\
 } while (0)
 
-#define __put_user_asm_byte(x, __pu_addr, err)			\
+#define __put_user_asm(x, __pu_addr, err, instr)		\
 	__asm__ __volatile__(					\
-	"1:	" TUSER(strb) "	%1,[%2],#0\n"			\
+	"1:	" TUSER(instr) " %1, [%2], #0\n"		\
 	"2:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -410,6 +398,9 @@ do {									\
 	: "r" (x), "r" (__pu_addr), "i" (-EFAULT)		\
 	: "cc")
 
+#define __put_user_asm_byte(x, __pu_addr, err)			\
+	__put_user_asm(x, __pu_addr, err, strb)
+
 #ifndef __ARMEB__
 #define __put_user_asm_half(x, __pu_addr, err)			\
 ({								\
@@ -427,21 +418,7 @@ do {									\
 #endif
 
 #define __put_user_asm_word(x, __pu_addr, err)			\
-	__asm__ __volatile__(					\
-	"1:	" TUSER(str) "	%1,[%2],#0\n"			\
-	"2:\n"							\
-	"	.pushsection .text.fixup,\"ax\"\n"		\
-	"	.align	2\n"					\
-	"3:	mov	%0, %3\n"				\
-	"	b	2b\n"					\
-	"	.popsection\n"					\
-	"	.pushsection __ex_table,\"a\"\n"		\
-	"	.align	3\n"					\
-	"	.long	1b, 3b\n"				\
-	"	.popsection"					\
-	: "+r" (err)						\
-	: "r" (x), "r" (__pu_addr), "i" (-EFAULT)		\
-	: "cc")
+	__put_user_asm(x, __pu_addr, err, str)
 
 #ifndef __ARMEB__
 #define	__reg_oper0	"%R2"
-- 
cgit v1.2.3-55-g7522


From 01e09a28167c338684606b70797422da3bbb6650 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Thu, 20 Aug 2015 14:22:48 +0100
Subject: ARM: entry: get rid of asm_trace_hardirqs_on_cond

There's no need for this macro, it can use a default for the
condition argument.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/assembler.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 4abe57279c66..742495eb5526 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -116,7 +116,7 @@
 #endif
 	.endm
 
-	.macro asm_trace_hardirqs_on_cond, cond
+	.macro asm_trace_hardirqs_on, cond=al
 #if defined(CONFIG_TRACE_IRQFLAGS)
 	/*
 	 * actually the registers should be pushed and pop'd conditionally, but
@@ -128,10 +128,6 @@
 #endif
 	.endm
 
-	.macro asm_trace_hardirqs_on
-	asm_trace_hardirqs_on_cond al
-	.endm
-
 	.macro disable_irq
 	disable_irq_notrace
 	asm_trace_hardirqs_off
@@ -173,7 +169,7 @@
 
 	.macro restore_irqs, oldcpsr
 	tst	\oldcpsr, #PSR_I_BIT
-	asm_trace_hardirqs_on_cond eq
+	asm_trace_hardirqs_on cond=eq
 	restore_irqs_notrace \oldcpsr
 	.endm
 
-- 
cgit v1.2.3-55-g7522


From 3302caddf10ad50710dbb7a94ccbdb3ad5bf1412 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Thu, 20 Aug 2015 16:13:37 +0100
Subject: ARM: entry: efficiency cleanups

Make the "fast" syscall return path fast again.  The addition of IRQ
tracing and context tracking has made this path grossly inefficient.
We can do much better if these options are enabled if we save the
syscall return code on the stack - we then don't need to save a bunch
of registers around every single callout to C code.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/assembler.h   | 16 +++++++---
 arch/arm/include/asm/thread_info.h | 20 +++++--------
 arch/arm/kernel/entry-common.S     | 61 ++++++++++++++++++++++++++++----------
 arch/arm/kernel/signal.c           |  6 ++++
 4 files changed, 71 insertions(+), 32 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 742495eb5526..5a5504f90d5f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -108,29 +108,37 @@
 	.endm
 #endif
 
-	.macro asm_trace_hardirqs_off
+	.macro asm_trace_hardirqs_off, save=1
 #if defined(CONFIG_TRACE_IRQFLAGS)
+	.if \save
 	stmdb   sp!, {r0-r3, ip, lr}
+	.endif
 	bl	trace_hardirqs_off
+	.if \save
 	ldmia	sp!, {r0-r3, ip, lr}
+	.endif
 #endif
 	.endm
 
-	.macro asm_trace_hardirqs_on, cond=al
+	.macro asm_trace_hardirqs_on, cond=al, save=1
 #if defined(CONFIG_TRACE_IRQFLAGS)
 	/*
 	 * actually the registers should be pushed and pop'd conditionally, but
 	 * after bl the flags are certainly clobbered
 	 */
+	.if \save
 	stmdb   sp!, {r0-r3, ip, lr}
+	.endif
 	bl\cond	trace_hardirqs_on
+	.if \save
 	ldmia	sp!, {r0-r3, ip, lr}
+	.endif
 #endif
 	.endm
 
-	.macro disable_irq
+	.macro disable_irq, save=1
 	disable_irq_notrace
-	asm_trace_hardirqs_off
+	asm_trace_hardirqs_off \save
 	.endm
 
 	.macro enable_irq
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index bd32eded3e50..71e0ffcedf8e 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -136,22 +136,18 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 
 /*
  * thread information flags:
- *  TIF_SYSCALL_TRACE	- syscall trace active
- *  TIF_SYSCAL_AUDIT	- syscall auditing active
- *  TIF_SIGPENDING	- signal pending
- *  TIF_NEED_RESCHED	- rescheduling necessary
- *  TIF_NOTIFY_RESUME	- callback before returning to user
  *  TIF_USEDFPU		- FPU was used by this task this quantum (SMP)
  *  TIF_POLLING_NRFLAG	- true if poll_idle() is polling TIF_NEED_RESCHED
  */
-#define TIF_SIGPENDING		0
-#define TIF_NEED_RESCHED	1
+#define TIF_SIGPENDING		0	/* signal pending */
+#define TIF_NEED_RESCHED	1	/* rescheduling necessary */
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
-#define TIF_UPROBE		7
-#define TIF_SYSCALL_TRACE	8
-#define TIF_SYSCALL_AUDIT	9
-#define TIF_SYSCALL_TRACEPOINT	10
-#define TIF_SECCOMP		11	/* seccomp syscall filtering active */
+#define TIF_UPROBE		3	/* breakpointed or singlestepping */
+#define TIF_SYSCALL_TRACE	4	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
+#define TIF_SECCOMP		7	/* seccomp syscall filtering active */
+
 #define TIF_NOHZ		12	/* in adaptive nohz mode */
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 92828a1dec80..dd3721d1185e 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -24,35 +24,55 @@
 
 
 	.align	5
+#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
 /*
- * This is the fast syscall return path.  We do as little as
- * possible here, and this includes saving r0 back into the SVC
- * stack.
+ * This is the fast syscall return path.  We do as little as possible here,
+ * such as avoiding writing r0 to the stack.  We only use this path if we
+ * have tracing and context tracking disabled - the overheads from those
+ * features make this path too inefficient.
  */
 ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
-	disable_irq				@ disable interrupts
+	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK
-	bne	__sys_trace_return
-	tst	r1, #_TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
-	asm_trace_hardirqs_on
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
-	ct_user_enter
 
 	restore_user_regs fast = 1, offset = S_OFF
  UNWIND(.fnend		)
+ENDPROC(ret_fast_syscall)
 
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
+	/* Ok, we need to do extra processing, enter the slow path. */
 fast_work_pending:
 	str	r0, [sp, #S_R0+S_OFF]!		@ returned r0
-work_pending:
+	/* fall through to work_pending */
+#else
+/*
+ * The "replacement" ret_fast_syscall for when tracing or context tracking
+ * is enabled.  As we will need to call out to some C functions, we save
+ * r0 first to avoid needing to save registers around each C function call.
+ */
+ret_fast_syscall:
+ UNWIND(.fnstart	)
+ UNWIND(.cantunwind	)
+	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
+	disable_irq_notrace			@ disable interrupts
+	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+	beq	no_work_pending
+ UNWIND(.fnend		)
+ENDPROC(ret_fast_syscall)
+
+	/* Slower path - fall through to work_pending */
+#endif
+
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	__sys_trace_return_nosave
+slow_work_pending:
 	mov	r0, sp				@ 'regs'
 	mov	r2, why				@ 'syscall'
 	bl	do_work_pending
@@ -64,16 +84,19 @@ work_pending:
 
 /*
  * "slow" syscall return path.  "why" tells us if this was a real syscall.
+ * IRQs may be enabled here, so always disable them.  Note that we use the
+ * "notrace" version to avoid calling into the tracing code unnecessarily.
+ * do_work_pending() will update this state if necessary.
  */
 ENTRY(ret_to_user)
 ret_slow_syscall:
-	disable_irq				@ disable interrupts
+	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
-	bne	work_pending
+	bne	slow_work_pending
 no_work_pending:
-	asm_trace_hardirqs_on
+	asm_trace_hardirqs_on save = 0
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
@@ -251,6 +274,12 @@ __sys_trace_return:
 	bl	syscall_trace_exit
 	b	ret_slow_syscall
 
+__sys_trace_return_nosave:
+	asm_trace_hardirqs_off save=0
+	mov	r0, sp
+	bl	syscall_trace_exit
+	b	ret_slow_syscall
+
 	.align	5
 #ifdef CONFIG_ALIGNMENT_TRAP
 	.type	__cr_alignment, #object
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 423663e23791..b6cda06b455f 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -562,6 +562,12 @@ static int do_signal(struct pt_regs *regs, int syscall)
 asmlinkage int
 do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 {
+	/*
+	 * The assembly code enters us with IRQs off, but it hasn't
+	 * informed the tracing code of that for efficiency reasons.
+	 * Update the trace code with the current status.
+	 */
+	trace_hardirqs_off();
 	do {
 		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
 			schedule();
-- 
cgit v1.2.3-55-g7522


From 3fba7e23f754a9a6e639b640fa2a393712ffe1b8 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Wed, 19 Aug 2015 11:02:28 +0100
Subject: ARM: uaccess: provide uaccess_save_and_enable() and uaccess_restore()

Provide uaccess_save_and_enable() and uaccess_restore() to permit
control of userspace visibility to the kernel, and hook these into
the appropriate places in the kernel where we need to access
userspace.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/futex.h       | 19 ++++++++--
 arch/arm/include/asm/uaccess.h     | 71 +++++++++++++++++++++++++++++++++++---
 arch/arm/kernel/armksyms.c         |  6 ++--
 arch/arm/lib/clear_user.S          |  6 ++--
 arch/arm/lib/copy_from_user.S      |  6 ++--
 arch/arm/lib/copy_to_user.S        |  6 ++--
 arch/arm/lib/uaccess_with_memcpy.c |  4 +--
 7 files changed, 97 insertions(+), 21 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 5eed82809d82..6795368ad023 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -22,8 +22,11 @@
 #ifdef CONFIG_SMP
 
 #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg)	\
+({								\
+	unsigned int __ua_flags;				\
 	smp_mb();						\
 	prefetchw(uaddr);					\
+	__ua_flags = uaccess_save_and_enable();			\
 	__asm__ __volatile__(					\
 	"1:	ldrex	%1, [%3]\n"				\
 	"	" insn "\n"					\
@@ -34,12 +37,15 @@
 	__futex_atomic_ex_table("%5")				\
 	: "=&r" (ret), "=&r" (oldval), "=&r" (tmp)		\
 	: "r" (uaddr), "r" (oparg), "Ir" (-EFAULT)		\
-	: "cc", "memory")
+	: "cc", "memory");					\
+	uaccess_restore(__ua_flags);				\
+})
 
 static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 			      u32 oldval, u32 newval)
 {
+	unsigned int __ua_flags;
 	int ret;
 	u32 val;
 
@@ -49,6 +55,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	smp_mb();
 	/* Prefetching cannot fault */
 	prefetchw(uaddr);
+	__ua_flags = uaccess_save_and_enable();
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
 	"1:	ldrex	%1, [%4]\n"
 	"	teq	%1, %2\n"
@@ -61,6 +68,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	: "=&r" (ret), "=&r" (val)
 	: "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
 	: "cc", "memory");
+	uaccess_restore(__ua_flags);
 	smp_mb();
 
 	*uval = val;
@@ -73,6 +81,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 #include <asm/domain.h>
 
 #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg)	\
+({								\
+	unsigned int __ua_flags = uaccess_save_and_enable();	\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(ldr) "	%1, [%3]\n"			\
 	"	" insn "\n"					\
@@ -81,12 +91,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	__futex_atomic_ex_table("%5")				\
 	: "=&r" (ret), "=&r" (oldval), "=&r" (tmp)		\
 	: "r" (uaddr), "r" (oparg), "Ir" (-EFAULT)		\
-	: "cc", "memory")
+	: "cc", "memory");					\
+	uaccess_restore(__ua_flags);				\
+})
 
 static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 			      u32 oldval, u32 newval)
 {
+	unsigned int __ua_flags;
 	int ret = 0;
 	u32 val;
 
@@ -94,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	preempt_disable();
+	__ua_flags = uaccess_save_and_enable();
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
 	"1:	" TUSER(ldr) "	%1, [%4]\n"
 	"	teq	%1, %2\n"
@@ -103,6 +117,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	: "+r" (ret), "=&r" (val)
 	: "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
 	: "cc", "memory");
+	uaccess_restore(__ua_flags);
 
 	*uval = val;
 	preempt_enable();
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 74b17d09ef7a..82880132f941 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -49,6 +49,21 @@ struct exception_table_entry
 
 extern int fixup_exception(struct pt_regs *regs);
 
+/*
+ * These two functions allow hooking accesses to userspace to increase
+ * system integrity by ensuring that the kernel can not inadvertantly
+ * perform such accesses (eg, via list poison values) which could then
+ * be exploited for priviledge escalation.
+ */
+static inline unsigned int uaccess_save_and_enable(void)
+{
+	return 0;
+}
+
+static inline void uaccess_restore(unsigned int flags)
+{
+}
+
 /*
  * These two are intentionally not defined anywhere - if the kernel
  * code generates any references to them, that's a bug.
@@ -165,6 +180,7 @@ extern int __get_user_64t_4(void *);
 		register typeof(x) __r2 asm("r2");			\
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\
+		unsigned int __ua_flags = uaccess_save_and_enable();	\
 		switch (sizeof(*(__p))) {				\
 		case 1:							\
 			if (sizeof((x)) >= 8)				\
@@ -192,6 +208,7 @@ extern int __get_user_64t_4(void *);
 			break;						\
 		default: __e = __get_user_bad(); break;			\
 		}							\
+		uaccess_restore(__ua_flags);				\
 		x = (typeof(*(p))) __r2;				\
 		__e;							\
 	})
@@ -224,6 +241,7 @@ extern int __put_user_8(void *, unsigned long long);
 		register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\
+		unsigned int __ua_flags = uaccess_save_and_enable();	\
 		switch (sizeof(*(__p))) {				\
 		case 1:							\
 			__put_user_x(__r2, __p, __e, __l, 1);		\
@@ -239,6 +257,7 @@ extern int __put_user_8(void *, unsigned long long);
 			break;						\
 		default: __e = __put_user_bad(); break;			\
 		}							\
+		uaccess_restore(__ua_flags);				\
 		__e;							\
 	})
 
@@ -300,14 +319,17 @@ static inline void set_fs(mm_segment_t fs)
 do {									\
 	unsigned long __gu_addr = (unsigned long)(ptr);			\
 	unsigned long __gu_val;						\
+	unsigned int __ua_flags;					\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
+	__ua_flags = uaccess_save_and_enable();				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:	__get_user_asm_byte(__gu_val, __gu_addr, err);	break;	\
 	case 2:	__get_user_asm_half(__gu_val, __gu_addr, err);	break;	\
 	case 4:	__get_user_asm_word(__gu_val, __gu_addr, err);	break;	\
 	default: (__gu_val) = __get_user_bad();				\
 	}								\
+	uaccess_restore(__ua_flags);					\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
 } while (0)
 
@@ -381,9 +403,11 @@ do {									\
 #define __put_user_err(x, ptr, err)					\
 do {									\
 	unsigned long __pu_addr = (unsigned long)(ptr);			\
+	unsigned int __ua_flags;					\
 	__typeof__(*(ptr)) __pu_val = (x);				\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
+	__ua_flags = uaccess_save_and_enable();				\
 	switch (sizeof(*(ptr))) {					\
 	case 1: __put_user_asm_byte(__pu_val, __pu_addr, err);	break;	\
 	case 2: __put_user_asm_half(__pu_val, __pu_addr, err);	break;	\
@@ -391,6 +415,7 @@ do {									\
 	case 8:	__put_user_asm_dword(__pu_val, __pu_addr, err);	break;	\
 	default: __put_user_bad();					\
 	}								\
+	uaccess_restore(__ua_flags);					\
 } while (0)
 
 #define __put_user_asm_byte(x, __pu_addr, err)			\
@@ -474,11 +499,46 @@ do {									\
 
 
 #ifdef CONFIG_MMU
-extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
-extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n);
-extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
-extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n);
+extern unsigned long __must_check
+arm_copy_from_user(void *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	unsigned int __ua_flags = uaccess_save_and_enable();
+	n = arm_copy_from_user(to, from, n);
+	uaccess_restore(__ua_flags);
+	return n;
+}
+
+extern unsigned long __must_check
+arm_copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __must_check
+__copy_to_user_std(void __user *to, const void *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	unsigned int __ua_flags = uaccess_save_and_enable();
+	n = arm_copy_to_user(to, from, n);
+	uaccess_restore(__ua_flags);
+	return n;
+}
+
+extern unsigned long __must_check
+arm_clear_user(void __user *addr, unsigned long n);
+extern unsigned long __must_check
+__clear_user_std(void __user *addr, unsigned long n);
+
+static inline unsigned long __must_check
+__clear_user(void __user *addr, unsigned long n)
+{
+	unsigned int __ua_flags = uaccess_save_and_enable();
+	n = arm_clear_user(addr, n);
+	uaccess_restore(__ua_flags);
+	return n;
+}
+
 #else
 #define __copy_from_user(to, from, n)	(memcpy(to, (void __force *)from, n), 0)
 #define __copy_to_user(to, from, n)	(memcpy((void __force *)to, from, n), 0)
@@ -511,6 +571,7 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
 	return n;
 }
 
+/* These are from lib/ code, and use __get_user() and friends */
 extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
 extern __must_check long strlen_user(const char __user *str);
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index a88671cfe1ff..a35d72d30b56 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -91,9 +91,9 @@ EXPORT_SYMBOL(__memzero);
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
-EXPORT_SYMBOL(__copy_from_user);
-EXPORT_SYMBOL(__copy_to_user);
-EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(arm_copy_from_user);
+EXPORT_SYMBOL(arm_copy_to_user);
+EXPORT_SYMBOL(arm_clear_user);
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 1710fd7db2d5..970d6c043774 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -12,14 +12,14 @@
 
 		.text
 
-/* Prototype: int __clear_user(void *addr, size_t sz)
+/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz)
  * Purpose  : clear some user memory
  * Params   : addr - user memory address to clear
  *          : sz   - number of bytes to clear
  * Returns  : number of bytes NOT cleared
  */
 ENTRY(__clear_user_std)
-WEAK(__clear_user)
+WEAK(arm_clear_user)
 		stmfd	sp!, {r1, lr}
 		mov	r2, #0
 		cmp	r1, #4
@@ -44,7 +44,7 @@ WEAK(__clear_user)
 USER(		strnebt	r2, [r0])
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
-ENDPROC(__clear_user)
+ENDPROC(arm_clear_user)
 ENDPROC(__clear_user_std)
 
 		.pushsection .text.fixup,"ax"
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 7a235b9952be..1512bebfbf1b 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -17,7 +17,7 @@
 /*
  * Prototype:
  *
- *	size_t __copy_from_user(void *to, const void *from, size_t n)
+ *	size_t arm_copy_from_user(void *to, const void *from, size_t n)
  *
  * Purpose:
  *
@@ -89,11 +89,11 @@
 
 	.text
 
-ENTRY(__copy_from_user)
+ENTRY(arm_copy_from_user)
 
 #include "copy_template.S"
 
-ENDPROC(__copy_from_user)
+ENDPROC(arm_copy_from_user)
 
 	.pushsection .fixup,"ax"
 	.align 0
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 9648b0675a3e..caf5019d8161 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -17,7 +17,7 @@
 /*
  * Prototype:
  *
- *	size_t __copy_to_user(void *to, const void *from, size_t n)
+ *	size_t arm_copy_to_user(void *to, const void *from, size_t n)
  *
  * Purpose:
  *
@@ -93,11 +93,11 @@
 	.text
 
 ENTRY(__copy_to_user_std)
-WEAK(__copy_to_user)
+WEAK(arm_copy_to_user)
 
 #include "copy_template.S"
 
-ENDPROC(__copy_to_user)
+ENDPROC(arm_copy_to_user)
 ENDPROC(__copy_to_user_std)
 
 	.pushsection .text.fixup,"ax"
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 3e58d710013c..77f020e75ccd 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -136,7 +136,7 @@ out:
 }
 
 unsigned long
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+arm_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	/*
 	 * This test is stubbed out of the main function above to keep
@@ -190,7 +190,7 @@ out:
 	return n;
 }
 
-unsigned long __clear_user(void __user *addr, unsigned long n)
+unsigned long arm_clear_user(void __user *addr, unsigned long n)
 {
 	/* See rational for this in __copy_to_user() above. */
 	if (n < 64)
-- 
cgit v1.2.3-55-g7522


From 9205b797dbe519a629267ec8c5766cd973d35063 Mon Sep 17 00:00:00 2001
From: Stephen Boyd
Date: Mon, 24 Aug 2015 21:49:30 +0100
Subject: ARM: 8421/1: smp: Collapse arch_cpu_idle_dead() into cpu_die()

The only caller of cpu_die() on ARM is arch_cpu_idle_dead(), so
let's simplify the code by renaming cpu_die() to
arch_cpu_idle_dead(). While were here, drop the __ref annotation
because __cpuinit is gone nowadays.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/smp.h | 1 -
 arch/arm/kernel/process.c  | 7 -------
 arch/arm/kernel/smp.c      | 2 +-
 3 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 318ce89eeff7..ef356659b4f4 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -74,7 +74,6 @@ extern void secondary_startup_arm(void);
 extern int __cpu_disable(void);
 
 extern void __cpu_die(unsigned int cpu);
-extern void cpu_die(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index f192a2a41719..358984b7f249 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -91,13 +91,6 @@ void arch_cpu_idle_exit(void)
 	ledtrig_cpu(CPU_LED_IDLE_END);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
-{
-	cpu_die();
-}
-#endif
-
 void __show_regs(struct pt_regs *regs)
 {
 	unsigned long flags;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 3cd846f48eaf..0aad7cdf2e58 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -266,7 +266,7 @@ void __cpu_die(unsigned int cpu)
  * of the other hotplug-cpu capable cores, so presumably coming
  * out of idle fixes this.
  */
-void __ref cpu_die(void)
+void arch_cpu_idle_dead(void)
 {
 	unsigned int cpu = smp_processor_id();
 
-- 
cgit v1.2.3-55-g7522


From 2190fed67ba6f3e8129513929f2395843645e928 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Thu, 20 Aug 2015 10:32:02 +0100
Subject: ARM: entry: provide uaccess assembly macro hooks

Provide hooks into the kernel entry and exit paths to permit control
of userspace visibility to the kernel.  The intended use is:

- on entry to kernel from user, uaccess_disable will be called to
  disable userspace visibility
- on exit from kernel to user, uaccess_enable will be called to
  enable userspace visibility
- on entry from a kernel exception, uaccess_save_and_disable will be
  called to save the current userspace visibility setting, and disable
  access
- on exit from a kernel exception, uaccess_restore will be called to
  restore the userspace visibility as it was before the exception
  occurred.

These hooks allows us to keep userspace visibility disabled for the
vast majority of the kernel, except for localised regions where we
want to explicitly access userspace.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/assembler.h | 17 +++++++++++++++++
 arch/arm/kernel/entry-armv.S     | 30 ++++++++++++++++++++++--------
 arch/arm/kernel/entry-common.S   |  2 ++
 arch/arm/kernel/entry-header.S   |  3 +++
 arch/arm/mm/abort-ev4.S          |  1 +
 arch/arm/mm/abort-ev5t.S         |  1 +
 arch/arm/mm/abort-ev5tj.S        |  1 +
 arch/arm/mm/abort-ev6.S          |  7 ++++---
 arch/arm/mm/abort-ev7.S          |  1 +
 arch/arm/mm/abort-lv4t.S         |  2 ++
 arch/arm/mm/abort-macro.S        |  1 +
 11 files changed, 55 insertions(+), 11 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 4abe57279c66..a91177043467 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -445,6 +445,23 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 #endif
 	.endm
 
+	.macro	uaccess_disable, tmp, isb=1
+	.endm
+
+	.macro	uaccess_enable, tmp, isb=1
+	.endm
+
+	.macro	uaccess_save, tmp
+	.endm
+
+	.macro	uaccess_restore
+	.endm
+
+	.macro	uaccess_save_and_disable, tmp
+	uaccess_save \tmp
+	uaccess_disable \tmp
+	.endm
+
 	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
 	.macro	ret\c, reg
 #if __LINUX_ARM_ARCH__ < 6
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index d19adcf6c580..61f00a3f3047 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -149,10 +149,10 @@ ENDPROC(__und_invalid)
 #define SPFIX(code...)
 #endif
 
-	.macro	svc_entry, stack_hole=0, trace=1
+	.macro	svc_entry, stack_hole=0, trace=1, uaccess=1
  UNWIND(.fnstart		)
  UNWIND(.save {r0 - pc}		)
-	sub	sp, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+	sub	sp, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4)
 #ifdef CONFIG_THUMB2_KERNEL
  SPFIX(	str	r0, [sp]	)	@ temporarily saved
  SPFIX(	mov	r0, sp		)
@@ -167,7 +167,7 @@ ENDPROC(__und_invalid)
 	ldmia	r0, {r3 - r5}
 	add	r7, sp, #S_SP - 4	@ here for interlock avoidance
 	mov	r6, #-1			@  ""  ""      ""       ""
-	add	r2, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+	add	r2, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4)
  SPFIX(	addeq	r2, r2, #4	)
 	str	r3, [sp, #-4]!		@ save the "real" r0 copied
 					@ from the exception stack
@@ -185,6 +185,11 @@ ENDPROC(__und_invalid)
 	@
 	stmia	r7, {r2 - r6}
 
+	uaccess_save r0
+	.if \uaccess
+	uaccess_disable r0
+	.endif
+
 	.if \trace
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
@@ -194,7 +199,7 @@ ENDPROC(__und_invalid)
 
 	.align	5
 __dabt_svc:
-	svc_entry
+	svc_entry uaccess=0
 	mov	r2, sp
 	dabt_helper
  THUMB(	ldr	r5, [sp, #S_PSR]	)	@ potentially updated CPSR
@@ -368,7 +373,7 @@ ENDPROC(__fiq_abt)
 #error "sizeof(struct pt_regs) must be a multiple of 8"
 #endif
 
-	.macro	usr_entry, trace=1
+	.macro	usr_entry, trace=1, uaccess=1
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)	@ don't unwind the user space
 	sub	sp, sp, #S_FRAME_SIZE
@@ -400,6 +405,10 @@ ENDPROC(__fiq_abt)
  ARM(	stmdb	r0, {sp, lr}^			)
  THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
 
+	.if \uaccess
+	uaccess_disable ip
+	.endif
+
 	@ Enable the alignment trap while in kernel mode
  ATRAP(	teq	r8, r7)
  ATRAP( mcrne	p15, 0, r8, c1, c0, 0)
@@ -435,7 +444,7 @@ ENDPROC(__fiq_abt)
 
 	.align	5
 __dabt_usr:
-	usr_entry
+	usr_entry uaccess=0
 	kuser_cmpxchg_check
 	mov	r2, sp
 	dabt_helper
@@ -458,7 +467,7 @@ ENDPROC(__irq_usr)
 
 	.align	5
 __und_usr:
-	usr_entry
+	usr_entry uaccess=0
 
 	mov	r2, r4
 	mov	r3, r5
@@ -484,6 +493,8 @@ __und_usr:
 1:	ldrt	r0, [r4]
  ARM_BE8(rev	r0, r0)				@ little endian instruction
 
+	uaccess_disable ip
+
 	@ r0 = 32-bit ARM instruction which caused the exception
 	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the faulting instruction
@@ -518,9 +529,10 @@ __und_usr_thumb:
 2:	ldrht	r5, [r4]
 ARM_BE8(rev16	r5, r5)				@ little endian instruction
 	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
-	blo	__und_usr_fault_16		@ 16bit undefined instruction
+	blo	__und_usr_fault_16_pan		@ 16bit undefined instruction
 3:	ldrht	r0, [r2]
 ARM_BE8(rev16	r0, r0)				@ little endian instruction
+	uaccess_disable ip
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
 	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
 	orr	r0, r0, r5, lsl #16
@@ -715,6 +727,8 @@ ENDPROC(no_fp)
 __und_usr_fault_32:
 	mov	r1, #4
 	b	1f
+__und_usr_fault_16_pan:
+	uaccess_disable ip
 __und_usr_fault_16:
 	mov	r1, #2
 1:	mov	r0, sp
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 92828a1dec80..189154980703 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -173,6 +173,8 @@ ENTRY(vector_swi)
  USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
 #endif
 
+	uaccess_disable tbl
+
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
 #if defined(CONFIG_OABI_COMPAT)
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index d47b5161b029..0d22ad206d52 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -215,6 +215,7 @@
 	blne	trace_hardirqs_off
 #endif
 	.endif
+	uaccess_restore
 
 #ifndef CONFIG_THUMB2_KERNEL
 	@ ARM mode SVC restore
@@ -258,6 +259,7 @@
 	@ on the stack remains correct).
 	@
 	.macro  svc_exit_via_fiq
+	uaccess_restore
 #ifndef CONFIG_THUMB2_KERNEL
 	@ ARM mode restore
 	mov	r0, sp
@@ -287,6 +289,7 @@
 
 
 	.macro	restore_user_regs, fast = 0, offset = 0
+	uaccess_enable r1, isb=0
 #ifndef CONFIG_THUMB2_KERNEL
 	@ ARM mode restore
 	mov	r2, sp
diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S
index 54473cd4aba9..b3b31e30cadd 100644
--- a/arch/arm/mm/abort-ev4.S
+++ b/arch/arm/mm/abort-ev4.S
@@ -19,6 +19,7 @@ ENTRY(v4_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
 	ldr	r3, [r4]			@ read aborted ARM instruction
+	uaccess_disable ip			@ disable userspace access
 	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
 	tst	r3, #1 << 20			@ L = 1 -> write?
 	orreq	r1, r1, #1 << 11		@ yes.
diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S
index c913031b79cc..a6a381a6caa5 100644
--- a/arch/arm/mm/abort-ev5t.S
+++ b/arch/arm/mm/abort-ev5t.S
@@ -21,6 +21,7 @@ ENTRY(v5t_early_abort)
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
 	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
 	ldreq	r3, [r4]			@ read aborted ARM instruction
+	uaccess_disable ip			@ disable user access
 	bic	r1, r1, #1 << 11		@ clear bits 11 of FSR
 	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
 	beq	do_DataAbort			@ yes
diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S
index 1b80d71adb0f..00ab011bef58 100644
--- a/arch/arm/mm/abort-ev5tj.S
+++ b/arch/arm/mm/abort-ev5tj.S
@@ -24,6 +24,7 @@ ENTRY(v5tj_early_abort)
 	bne	do_DataAbort
 	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
 	ldreq	r3, [r4]			@ read aborted ARM instruction
+	uaccess_disable ip			@ disable userspace access
 	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
 	beq	do_DataAbort			@ yes
 	tst	r3, #1 << 20			@ L = 0 -> write
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 113704f30e9f..8801a15aa105 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -26,17 +26,18 @@ ENTRY(v6_early_abort)
 	ldr	ip, =0x4107b36
 	mrc	p15, 0, r3, c0, c0, 0		@ get processor id
 	teq	ip, r3, lsr #4			@ r0 ARM1136?
-	bne	do_DataAbort
+	bne	1f
 	tst	r5, #PSR_J_BIT			@ Java?
 	tsteq	r5, #PSR_T_BIT			@ Thumb?
-	bne	do_DataAbort
+	bne	1f
 	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR
 	ldr	r3, [r4]			@ read aborted ARM instruction
  ARM_BE8(rev	r3, r3)
 
 	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
-	beq	do_DataAbort			@ yes
+	beq	1f				@ yes
 	tst	r3, #1 << 20			@ L = 0 -> write
 	orreq	r1, r1, #1 << 11		@ yes.
 #endif
+1:	uaccess_disable ip			@ disable userspace access
 	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
index 4812ad054214..e8d0e08c227f 100644
--- a/arch/arm/mm/abort-ev7.S
+++ b/arch/arm/mm/abort-ev7.S
@@ -15,6 +15,7 @@
 ENTRY(v7_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	uaccess_disable ip			@ disable userspace access
 
 	/*
 	 * V6 code adjusts the returned DFSR.
diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S
index f3982580c273..6d8e8e3365d1 100644
--- a/arch/arm/mm/abort-lv4t.S
+++ b/arch/arm/mm/abort-lv4t.S
@@ -26,6 +26,7 @@ ENTRY(v4t_late_abort)
 #endif
 	bne	.data_thumb_abort
 	ldr	r8, [r4]			@ read arm instruction
+	uaccess_disable ip			@ disable userspace access
 	tst	r8, #1 << 20			@ L = 1 -> write?
 	orreq	r1, r1, #1 << 11		@ yes.
 	and	r7, r8, #15 << 24
@@ -155,6 +156,7 @@ ENTRY(v4t_late_abort)
 
 .data_thumb_abort:
 	ldrh	r8, [r4]			@ read instruction
+	uaccess_disable ip			@ disable userspace access
 	tst	r8, #1 << 11			@ L = 1 -> write?
 	orreq	r1, r1, #1 << 8			@ yes
 	and	r7, r8, #15 << 12
diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S
index 50d6c0a900b1..4509bee4e081 100644
--- a/arch/arm/mm/abort-macro.S
+++ b/arch/arm/mm/abort-macro.S
@@ -13,6 +13,7 @@
 	tst	\psr, #PSR_T_BIT
 	beq	not_thumb
 	ldrh	\tmp, [\pc]			@ Read aborted Thumb instruction
+	uaccess_disable ip			@ disable userspace access
 	and	\tmp, \tmp, # 0xfe00		@ Mask opcode field
 	cmp	\tmp, # 0x5600			@ Is it ldrsb?
 	orreq	\tmp, \tmp, #1 << 11		@ Set L-bit if yes
-- 
cgit v1.2.3-55-g7522


From a5e090acbf545c0a3b04080f8a488b17ec41fe02 Mon Sep 17 00:00:00 2001
From: Russell King
Date: Wed, 19 Aug 2015 20:40:41 +0100
Subject: ARM: software-based priviledged-no-access support

Provide a software-based implementation of the priviledged no access
support found in ARMv8.1.

Userspace pages are mapped using a different domain number from the
kernel and IO mappings.  If we switch the user domain to "no access"
when we enter the kernel, we can prevent the kernel from touching
userspace.

However, the kernel needs to be able to access userspace via the
various user accessor functions.  With the wrapping in the previous
patch, we can temporarily enable access when the kernel needs user
access, and re-disable it afterwards.

This allows us to trap non-intended accesses to userspace, eg, caused
by an inadvertent dereference of the LIST_POISON* values, which, with
appropriate user mappings setup, can be made to succeed.  This in turn
can allow use-after-free bugs to be further exploited than would
otherwise be possible.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig                   | 15 +++++++++++++++
 arch/arm/include/asm/assembler.h   | 30 ++++++++++++++++++++++++++++++
 arch/arm/include/asm/domain.h      | 21 +++++++++++++++++++--
 arch/arm/include/asm/uaccess.h     | 14 ++++++++++++++
 arch/arm/kernel/process.c          | 36 ++++++++++++++++++++++++++++++------
 arch/arm/kernel/swp_emulate.c      |  3 +++
 arch/arm/lib/csumpartialcopyuser.S | 14 ++++++++++++++
 7 files changed, 125 insertions(+), 8 deletions(-)

(limited to 'arch/arm/include/asm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a750c1425c3a..e15d5ed4d5f1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1694,6 +1694,21 @@ config HIGHPTE
 	bool "Allocate 2nd-level pagetables from highmem"
 	depends on HIGHMEM
 
+config CPU_SW_DOMAIN_PAN
+	bool "Enable use of CPU domains to implement privileged no-access"
+	depends on MMU && !ARM_LPAE
+	default y
+	help
+	  Increase kernel security by ensuring that normal kernel accesses
+	  are unable to access userspace addresses.  This can help prevent
+	  use-after-free bugs becoming an exploitable privilege escalation
+	  by ensuring that magic values (such as LIST_POISON) will always
+	  fault when dereferenced.
+
+	  CPUs with low-vector mappings use a best-efforts implementation.
+	  Their lower 1MB needs to remain accessible for the vectors, but
+	  the remainder of userspace will become appropriately inaccessible.
+
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
 	depends on PERF_EVENTS
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index a91177043467..3ae0eda5e64f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -446,15 +446,45 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.endm
 
 	.macro	uaccess_disable, tmp, isb=1
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	/*
+	 * Whenever we re-enter userspace, the domains should always be
+	 * set appropriately.
+	 */
+	mov	\tmp, #DACR_UACCESS_DISABLE
+	mcr	p15, 0, \tmp, c3, c0, 0		@ Set domain register
+	.if	\isb
+	instr_sync
+	.endif
+#endif
 	.endm
 
 	.macro	uaccess_enable, tmp, isb=1
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	/*
+	 * Whenever we re-enter userspace, the domains should always be
+	 * set appropriately.
+	 */
+	mov	\tmp, #DACR_UACCESS_ENABLE
+	mcr	p15, 0, \tmp, c3, c0, 0
+	.if	\isb
+	instr_sync
+	.endif
+#endif
 	.endm
 
 	.macro	uaccess_save, tmp
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	mrc	p15, 0, \tmp, c3, c0, 0
+	str	\tmp, [sp, #S_FRAME_SIZE]
+#endif
 	.endm
 
 	.macro	uaccess_restore
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	ldr	r0, [sp, #S_FRAME_SIZE]
+	mcr	p15, 0, r0, c3, c0, 0
+#endif
 	.endm
 
 	.macro	uaccess_save_and_disable, tmp
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 2be929549938..e878129f2fee 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -57,11 +57,29 @@
 #define domain_mask(dom)	((3) << (2 * (dom)))
 #define domain_val(dom,type)	((type) << (2 * (dom)))
 
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+#define DACR_INIT \
+	(domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \
+	 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
+	 domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
+	 domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
+#else
 #define DACR_INIT \
 	(domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \
 	 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 	 domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
 	 domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
+#endif
+
+#define __DACR_DEFAULT \
+	domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) | \
+	domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
+	domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)
+
+#define DACR_UACCESS_DISABLE	\
+	(__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
+#define DACR_UACCESS_ENABLE	\
+	(__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_CLIENT))
 
 #ifndef __ASSEMBLY__
 
@@ -76,7 +94,6 @@ static inline unsigned int get_domain(void)
 	return domain;
 }
 
-#ifdef CONFIG_CPU_USE_DOMAINS
 static inline void set_domain(unsigned val)
 {
 	asm volatile(
@@ -85,6 +102,7 @@ static inline void set_domain(unsigned val)
 	isb();
 }
 
+#ifdef CONFIG_CPU_USE_DOMAINS
 #define modify_domain(dom,type)					\
 	do {							\
 		unsigned int domain = get_domain();		\
@@ -94,7 +112,6 @@ static inline void set_domain(unsigned val)
 	} while (0)
 
 #else
-static inline void set_domain(unsigned val) { }
 static inline void modify_domain(unsigned dom, unsigned type)	{ }
 #endif
 
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 82880132f941..01bae13b2cea 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -57,11 +57,25 @@ extern int fixup_exception(struct pt_regs *regs);
  */
 static inline unsigned int uaccess_save_and_enable(void)
 {
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	unsigned int old_domain = get_domain();
+
+	/* Set the current domain access to permit user accesses */
+	set_domain((old_domain & ~domain_mask(DOMAIN_USER)) |
+		   domain_val(DOMAIN_USER, DOMAIN_CLIENT));
+
+	return old_domain;
+#else
 	return 0;
+#endif
 }
 
 static inline void uaccess_restore(unsigned int flags)
 {
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	/* Restore the user access mask */
+	set_domain(flags);
+#endif
 }
 
 /*
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index e722f9b3c9b1..3f18098dfd08 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -129,12 +129,36 @@ void __show_regs(struct pt_regs *regs)
 	buf[4] = '\0';
 
 #ifndef CONFIG_CPU_V7M
-	printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
-		buf, interrupts_enabled(regs) ? "n" : "ff",
-		fast_interrupts_enabled(regs) ? "n" : "ff",
-		processor_modes[processor_mode(regs)],
-		isa_modes[isa_mode(regs)],
-		get_fs() == get_ds() ? "kernel" : "user");
+	{
+		unsigned int domain = get_domain();
+		const char *segment;
+
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+		/*
+		 * Get the domain register for the parent context. In user
+		 * mode, we don't save the DACR, so lets use what it should
+		 * be. For other modes, we place it after the pt_regs struct.
+		 */
+		if (user_mode(regs))
+			domain = DACR_UACCESS_ENABLE;
+		else
+			domain = *(unsigned int *)(regs + 1);
+#endif
+
+		if ((domain & domain_mask(DOMAIN_USER)) ==
+		    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
+			segment = "none";
+		else if (get_fs() == get_ds())
+			segment = "kernel";
+		else
+			segment = "user";
+
+		printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
+			buf, interrupts_enabled(regs) ? "n" : "ff",
+			fast_interrupts_enabled(regs) ? "n" : "ff",
+			processor_modes[processor_mode(regs)],
+			isa_modes[isa_mode(regs)], segment);
+	}
 #else
 	printk("xPSR: %08lx\n", regs->ARM_cpsr);
 #endif
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 1361756782c7..5b26e7efa9ea 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -141,11 +141,14 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
 
 	while (1) {
 		unsigned long temp;
+		unsigned int __ua_flags;
 
+		__ua_flags = uaccess_save_and_enable();
 		if (type == TYPE_SWPB)
 			__user_swpb_asm(*data, address, res, temp);
 		else
 			__user_swp_asm(*data, address, res, temp);
+		uaccess_restore(__ua_flags);
 
 		if (likely(res != -EAGAIN) || signal_pending(current))
 			break;
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1d0957e61f89..1712f132b80d 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -17,6 +17,19 @@
 
 		.text
 
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+		.macro	save_regs
+		mrc	p15, 0, ip, c3, c0, 0
+		stmfd	sp!, {r1, r2, r4 - r8, ip, lr}
+		uaccess_enable ip
+		.endm
+
+		.macro	load_regs
+		ldmfd	sp!, {r1, r2, r4 - r8, ip, lr}
+		mcr	p15, 0, ip, c3, c0, 0
+		ret	lr
+		.endm
+#else
 		.macro	save_regs
 		stmfd	sp!, {r1, r2, r4 - r8, lr}
 		.endm
@@ -24,6 +37,7 @@
 		.macro	load_regs
 		ldmfd	sp!, {r1, r2, r4 - r8, pc}
 		.endm
+#endif
 
 		.macro	load1b,	reg1
 		ldrusr	\reg1, r0, 1
-- 
cgit v1.2.3-55-g7522