Merge tag 'signed-kvm-ppc-next' of git://github.com/agraf/linux-2.6 into kvm-queue

Patch queue for ppc - 2015-08-22 Highlights for KVM PPC this time around: - Book3S: A few bug fixes - Book3S: Allow micro-threading on POWER8
author: Paolo Bonzini 2015-08-22 23:57:59 +0200
committer: Paolo Bonzini 2015-08-22 23:57:59 +0200
commit: e3dbc572fe11a5231568e106fa3dcedd1d1bec0f (patch)
tree: 5e15ee35c77e75d51cb550cb3cb0f6ecafa7b508 /arch
parent: Merge tag 'kvm-arm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
parent: KVM: PPC: Book3S: correct width in XER handling (diff)
download: kernel-qcow2-linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.tar.gz
kernel-qcow2-linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.tar.xz
kernel-qcow2-linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.zip
247 files changed, 3114 insertions, 1926 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index bec6666a3cc4..8a8ea7110de8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -221,6 +221,10 @@ config ARCH_TASK_STRUCT_ALLOCATOR
 config ARCH_THREAD_INFO_ALLOCATOR
 	bool
 
+# Select if arch wants to size task_struct dynamically via arch_task_struct_size:
+config ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	bool
+
 config HAVE_REGS_AND_STACK_ACCESS_API
 	bool
 	help
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index cde23cd03609..ffd9cf5ec8c4 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/mm-arch-hooks.h b/arch/alpha/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b07fd862fec3..000000000000
--- a/arch/alpha/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ALPHA_MM_ARCH_HOOKS_H
-#define _ASM_ALPHA_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ALPHA_MM_ARCH_HOOKS_H */
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index e7cee0a5c56d..91cf4055acab 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -115,6 +115,7 @@ if ISA_ARCOMPACT
 
 config ARC_CPU_750D
 	bool "ARC750D"
+	select ARC_CANT_LLSC
 	help
 	  Support for ARC750 core
 
@@ -362,7 +363,7 @@ config ARC_CANT_LLSC
 config ARC_HAS_LLSC
 	bool "Insn: LLOCK/SCOND (efficient atomic ops)"
 	default y
-	depends on !ARC_CPU_750D && !ARC_CANT_LLSC
+	depends on !ARC_CANT_LLSC
 
 config ARC_HAS_SWAPE
 	bool "Insn: SWAPE (endian-swap)"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 6107062c0111..46d87310220d 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -49,7 +49,8 @@ endif
 
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
-cflags-y  += -O3
+# Note: No need to add to cflags-y as that happens anyways
+ARCH_CFLAGS += -O3
 endif
 
 # small data is default for elf32 tool-chain. If not usable, disable it
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 15c8d6226c9d..1cd5e82f5dc2 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -12,7 +12,7 @@
 
 / {
 	compatible = "snps,arc";
-	clock-frequency = <75000000>;
+	clock-frequency = <90000000>;
 	#address-cells = <1>;
 	#size-cells = <1>;
 
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 199d42820eca..2f0b33257db2 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -12,7 +12,7 @@
 
 / {
 	compatible = "snps,arc";
-	clock-frequency = <75000000>;
+	clock-frequency = <90000000>;
 	#address-cells = <1>;
 	#size-cells = <1>;
 
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 1a80cc91a03b..7611b10a2d23 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += param.h
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 99fe118d3730..57c1f33844d4 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -50,8 +50,7 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
 	 * done for const @nr, but no code is generated due to gcc	\
 	 * const prop.							\
 	 */								\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
+	nr &= 0x1f;							\
 									\
 	__asm__ __volatile__(						\
 	"1:	llock       %0, [%1]		\n"			\
@@ -82,8 +81,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 									\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
+	nr &= 0x1f;							\
 									\
 	/*								\
 	 * Explicit full memory barrier needed before/after as		\
@@ -129,16 +127,13 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
 	unsigned long temp, flags;					\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	/*								\
 	 * spin lock/unlock provide the needed smp_mb() before/after	\
 	 */								\
 	bitops_lock(flags);						\
 									\
 	temp = *m;							\
-	*m = temp c_op (1UL << nr);					\
+	*m = temp c_op (1UL << (nr & 0x1f));					\
 									\
 	bitops_unlock(flags);						\
 }
@@ -149,17 +144,14 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 	unsigned long old, flags;					\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	bitops_lock(flags);						\
 									\
 	old = *m;							\
-	*m = old c_op (1 << nr);					\
+	*m = old c_op (1UL << (nr & 0x1f));				\
 									\
 	bitops_unlock(flags);						\
 									\
-	return (old & (1 << nr)) != 0;					\
+	return (old & (1UL << (nr & 0x1f))) != 0;			\
 }
 
 #endif /* CONFIG_ARC_HAS_LLSC */
@@ -174,11 +166,8 @@ static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)	\
 	unsigned long temp;						\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	temp = *m;							\
-	*m = temp c_op (1UL << nr);					\
+	*m = temp c_op (1UL << (nr & 0x1f));				\
 }
 
 #define __TEST_N_BIT_OP(op, c_op, asm_op)				\
@@ -187,13 +176,10 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long
 	unsigned long old;						\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	old = *m;							\
-	*m = old c_op (1 << nr);					\
+	*m = old c_op (1UL << (nr & 0x1f));				\
 									\
-	return (old & (1 << nr)) != 0;					\
+	return (old & (1UL << (nr & 0x1f))) != 0;			\
 }
 
 #define BIT_OPS(op, c_op, asm_op)					\
@@ -224,10 +210,7 @@ test_bit(unsigned int nr, const volatile unsigned long *addr)
 
 	addr += nr >> 5;
 
-	if (__builtin_constant_p(nr))
-		nr &= 0x1f;
-
-	mask = 1 << nr;
+	mask = 1UL << (nr & 0x1f);
 
 	return ((mask & *addr) != 0);
 }
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 05b5aaf5b0f9..70cfe16b742d 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -16,12 +16,40 @@
 #include <linux/uaccess.h>
 #include <asm/errno.h>
 
+#ifdef CONFIG_ARC_HAS_LLSC
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
+							\
+	__asm__ __volatile__(				\
+	"1:	llock	%1, [%2]		\n"	\
+		insn				"\n"	\
+	"2:	scond	%0, [%2]		\n"	\
+	"	bnz	1b			\n"	\
+	"	mov %0, 0			\n"	\
+	"3:					\n"	\
+	"	.section .fixup,\"ax\"		\n"	\
+	"	.align  4			\n"	\
+	"4:	mov %0, %4			\n"	\
+	"	b   3b				\n"	\
+	"	.previous			\n"	\
+	"	.section __ex_table,\"a\"	\n"	\
+	"	.align  4			\n"	\
+	"	.word   1b, 4b			\n"	\
+	"	.word   2b, 4b			\n"	\
+	"	.previous			\n"	\
+							\
+	: "=&r" (ret), "=&r" (oldval)			\
+	: "r" (uaddr), "r" (oparg), "ir" (-EFAULT)	\
+	: "cc", "memory")
+
+#else	/* !CONFIG_ARC_HAS_LLSC */
+
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
 							\
 	__asm__ __volatile__(				\
-	"1:	ld  %1, [%2]			\n"	\
+	"1:	ld	%1, [%2]		\n"	\
 		insn				"\n"	\
-	"2:	st  %0, [%2]			\n"	\
+	"2:	st	%0, [%2]		\n"	\
 	"	mov %0, 0			\n"	\
 	"3:					\n"	\
 	"	.section .fixup,\"ax\"		\n"	\
@@ -39,6 +67,8 @@
 	: "r" (uaddr), "r" (oparg), "ir" (-EFAULT)	\
 	: "cc", "memory")
 
+#endif
+
 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
@@ -123,11 +153,17 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
 
 	pagefault_disable();
 
-	/* TBD : can use llock/scond */
 	__asm__ __volatile__(
-	"1:	ld    %0, [%3]	\n"
-	"	brne  %0, %1, 3f	\n"
-	"2:	st    %2, [%3]	\n"
+#ifdef CONFIG_ARC_HAS_LLSC
+	"1:	llock	%0, [%3]		\n"
+	"	brne	%0, %1, 3f		\n"
+	"2:	scond	%2, [%3]		\n"
+	"	bnz	1b			\n"
+#else
+	"1:	ld	%0, [%3]		\n"
+	"	brne	%0, %1, 3f		\n"
+	"2:	st	%2, [%3]		\n"
+#endif
 	"3:	\n"
 	"	.section .fixup,\"ax\"	\n"
 	"4:	mov %0, %4	\n"
diff --git a/arch/arc/include/asm/mm-arch-hooks.h b/arch/arc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index c37541c5f8ba..000000000000
--- a/arch/arc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARC_MM_ARCH_HOOKS_H
-#define _ASM_ARC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ARC_MM_ARCH_HOOKS_H */
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 91755972b9a2..91694ec1ce95 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -106,7 +106,7 @@ struct callee_regs {
 	long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
 
-#define instruction_pointer(regs)	((regs)->ret)
+#define instruction_pointer(regs)	(unsigned long)((regs)->ret)
 #define profile_pc(regs)		instruction_pointer(regs)
 
 /* return 1 if user mode or 0 if kernel mode */
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 6208c630abed..26c156827479 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -12,7 +12,6 @@
 #include <linux/of.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
-#include "../../drivers/irqchip/irqchip.h"
 #include <asm/irq.h>
 
 /*
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index fcdddb631766..039fac30b5c1 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -12,7 +12,6 @@
 #include <linux/of.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
-#include "../../drivers/irqchip/irqchip.h"
 #include <asm/irq.h>
 
 /*
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 30284e8de6ff..2fb86589054d 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -175,7 +175,6 @@ void mcip_init_early_smp(void)
 #include <linux/irqchip.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
-#include "../../drivers/irqchip/irqchip.h"
 
 /*
  * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core)
@@ -218,11 +217,28 @@ static void idu_irq_unmask(struct irq_data *data)
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 }
 
+#ifdef CONFIG_SMP
 static int
-idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f)
+idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
+		     bool force)
 {
+	unsigned long flags;
+	cpumask_t online;
+
+	/* errout if no online cpu per @cpumask */
+	if (!cpumask_and(&online, cpumask, cpu_online_mask))
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	idu_set_dest(data->hwirq, cpumask_bits(&online)[0]);
+	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
 	return IRQ_SET_MASK_OK;
 }
+#endif
 
 static struct irq_chip idu_irq_chip = {
 	.name			= "MCIP IDU Intc",
@@ -330,8 +346,7 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
 		if (!i)
 			idu_first_irq = irq;
 
-		irq_set_handler_data(irq, domain);
-		irq_set_chained_handler(irq, idu_cascade_isr);
+		irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain);
 	}
 
 	__mcip_cmd(CMD_IDU_ENABLE, 0);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index a3d186211ed3..18cc01591c96 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -142,17 +142,22 @@ static void read_arc_build_cfg_regs(void)
 }
 
 static const struct cpuinfo_data arc_cpu_tbl[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
 	{ {0x20, "ARC 600"      }, 0x2F},
 	{ {0x30, "ARC 700"      }, 0x33},
 	{ {0x34, "ARC 700 R4.10"}, 0x34},
 	{ {0x35, "ARC 700 R4.11"}, 0x35},
-	{ {0x50, "ARC HS38"	}, 0x51},
+#else
+	{ {0x50, "ARC HS38 R2.0"}, 0x51},
+	{ {0x52, "ARC HS38 R2.1"}, 0x52},
+#endif
 	{ {0x00, NULL		} }
 };
 
-#define IS_AVAIL1(v, str)	((v) ? str : "")
-#define IS_USED(cfg)		(IS_ENABLED(cfg) ? "" : "(not used) ")
-#define IS_AVAIL2(v, str, cfg)  IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg))
+#define IS_AVAIL1(v, s)		((v) ? s : "")
+#define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
+#define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
+#define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 
 static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
@@ -226,7 +231,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 			n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
 		}
 		n += scnprintf(buf + n, len - n, "%s",
-			       IS_USED(CONFIG_ARC_HAS_HW_MPY));
+			       IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY));
 	}
 
 	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 807f7d61d7a7..a6f91e88ce36 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -58,7 +58,6 @@ static void show_callee_regs(struct callee_regs *cregs)
 
 static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
 {
-	struct path path;
 	char *path_nm = NULL;
 	struct mm_struct *mm;
 	struct file *exe_file;
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index b29d62ed4f7e..1cd6695b6ab5 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -468,10 +468,18 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
+	/*
+	 * SLC is shared between all cores and concurrent aux operations from
+	 * multiple cores need to be serialized using a spinlock
+	 * A concurrent operation can be silently ignored and/or the old/new
+	 * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
+	 * below)
+	 */
+	static DEFINE_SPINLOCK(lock);
 	unsigned long flags;
 	unsigned int ctrl;
 
-	local_irq_save(flags);
+	spin_lock_irqsave(&lock, flags);
 
 	/*
 	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
@@ -504,7 +512,7 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
 
 	while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
 
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&lock, flags);
 #endif
 }
 
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 74a637a1cfc4..57706a9c6948 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -60,8 +60,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 
 	/* This is kernel Virtual address (0x7000_0000 based) */
 	kvaddr = ioremap_nocache((unsigned long)paddr, size);
-	if (kvaddr != NULL)
-		memset(kvaddr, 0, size);
+	if (kvaddr == NULL)
+		return NULL;
 
 	/* This is bus address, platform dependent */
 	*dma_handle = (dma_addr_t)paddr;
diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts
index 0d35ab64641c..7106114c7464 100644
--- a/arch/arm/boot/dts/am335x-pepper.dts
+++ b/arch/arm/boot/dts/am335x-pepper.dts
@@ -74,6 +74,7 @@
 	audio_codec: tlv320aic3106@1b {
 		compatible = "ti,tlv320aic3106";
 		reg = <0x1b>;
+		ai3x-micbias-vg = <0x2>;
 	};
 
 	accel: lis331dlh@1d {
@@ -153,7 +154,7 @@
 	ti,audio-routing =
 		"Headphone Jack",	"HPLOUT",
 		"Headphone Jack",	"HPROUT",
-		"LINE1L",		"Line In";
+		"MIC3L",		"Mic3L Switch";
 };
 
 &mcasp0 {
@@ -438,41 +439,50 @@
 	regulators {
 		dcdc1_reg: regulator@0 {
 			/* VDD_1V8 system supply */
+			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
 			/* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
-			regulator-max-microvolt = <1325000>;
+			regulator-max-microvolt = <1150000>;
 			regulator-boot-on;
+			regulator-always-on;
 		};
 
 		dcdc3_reg: regulator@2 {
 			/* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
-			regulator-max-microvolt = <1150000>;
+			regulator-max-microvolt = <1325000>;
 			regulator-boot-on;
+			regulator-always-on;
 		};
 
 		ldo1_reg: regulator@3 {
 			/* VRTC 1.8V always-on supply */
+			regulator-name = "vrtc,vdds";
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
 			/* 3.3V rail */
+			regulator-name = "vdd_3v3aux";
+			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
 			/* VDD_3V3A 3.3V rail */
+			regulator-name = "vdd_3v3a";
 			regulator-min-microvolt = <3300000>;
 			regulator-max-microvolt = <3300000>;
 		};
 
 		ldo4_reg: regulator@6 {
 			/* VDD_3V3B 3.3V rail */
+			regulator-name = "vdd_3v3b";
+			regulator-always-on;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/cros-ec-keyboard.dtsi b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
index 9c7fb0acae79..4e42f30cb318 100644
--- a/arch/arm/boot/dts/cros-ec-keyboard.dtsi
+++ b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
@@ -22,6 +22,7 @@
 			MATRIX_KEY(0x00, 0x02, KEY_F1)
 			MATRIX_KEY(0x00, 0x03, KEY_B)
 			MATRIX_KEY(0x00, 0x04, KEY_F10)
+			MATRIX_KEY(0x00, 0x05, KEY_RO)
 			MATRIX_KEY(0x00, 0x06, KEY_N)
 			MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
 			MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
@@ -34,6 +35,7 @@
 			MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
 			MATRIX_KEY(0x01, 0x09, KEY_F9)
 			MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
+			MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)
 
 			MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
 			MATRIX_KEY(0x02, 0x01, KEY_TAB)
@@ -45,6 +47,7 @@
 			MATRIX_KEY(0x02, 0x07, KEY_102ND)
 			MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
 			MATRIX_KEY(0x02, 0x09, KEY_F8)
+			MATRIX_KEY(0x02, 0x0a, KEY_YEN)
 
 			MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
 			MATRIX_KEY(0x03, 0x02, KEY_F2)
@@ -53,6 +56,7 @@
 			MATRIX_KEY(0x03, 0x06, KEY_6)
 			MATRIX_KEY(0x03, 0x08, KEY_MINUS)
 			MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
+			MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)
 
 			MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
 			MATRIX_KEY(0x04, 0x01, KEY_A)
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index aa465904f6cc..096f68be99e2 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -686,7 +686,8 @@
 
 &dcan1 {
 	status = "ok";
-	pinctrl-names = "default", "sleep";
-	pinctrl-0 = <&dcan1_pins_default>;
+	pinctrl-names = "default", "sleep", "active";
+	pinctrl-0 = <&dcan1_pins_sleep>;
 	pinctrl-1 = <&dcan1_pins_sleep>;
+	pinctrl-2 = <&dcan1_pins_default>;
 };
diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts
index 4e1b60581782..803738414086 100644
--- a/arch/arm/boot/dts/dra72-evm.dts
+++ b/arch/arm/boot/dts/dra72-evm.dts
@@ -587,9 +587,10 @@
 
 &dcan1 {
 	status = "ok";
-	pinctrl-names = "default", "sleep";
-	pinctrl-0 = <&dcan1_pins_default>;
+	pinctrl-names = "default", "sleep", "active";
+	pinctrl-0 = <&dcan1_pins_sleep>;
 	pinctrl-1 = <&dcan1_pins_sleep>;
+	pinctrl-2 = <&dcan1_pins_default>;
 };
 
 &qspi {
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index c892d58e8dad..b995333ea22b 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -468,6 +468,7 @@
 				interrupts = <36 37 38 39 40 41 42 43 44>;
 				status = "disabled";
 				clocks = <&clks 26>;
+				#io-channel-cells = <1>;
 			};
 
 			spdif@80054000 {
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index bc215e4b75fd..b69be5c499cf 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -108,7 +108,7 @@
 			};
 
 			gpt1: timer@10003000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10003000 0x1000>;
 				interrupts = <26>;
 				clocks = <&clks IMX27_CLK_GPT1_IPG_GATE>,
@@ -117,7 +117,7 @@
 			};
 
 			gpt2: timer@10004000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10004000 0x1000>;
 				interrupts = <25>;
 				clocks = <&clks IMX27_CLK_GPT2_IPG_GATE>,
@@ -126,7 +126,7 @@
 			};
 
 			gpt3: timer@10005000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10005000 0x1000>;
 				interrupts = <24>;
 				clocks = <&clks IMX27_CLK_GPT3_IPG_GATE>,
@@ -376,7 +376,7 @@
 			};
 
 			gpt4: timer@10019000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10019000 0x1000>;
 				interrupts = <4>;
 				clocks = <&clks IMX27_CLK_GPT4_IPG_GATE>,
@@ -385,7 +385,7 @@
 			};
 
 			gpt5: timer@1001a000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x1001a000 0x1000>;
 				interrupts = <3>;
 				clocks = <&clks IMX27_CLK_GPT5_IPG_GATE>,
@@ -436,7 +436,7 @@
 			};
 
 			gpt6: timer@1001f000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x1001f000 0x1000>;
 				interrupts = <2>;
 				clocks = <&clks IMX27_CLK_GPT6_IPG_GATE>,
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index 181ae5ebf23f..ab4ba39f2ed9 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -295,9 +295,10 @@
 &tve {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_vga_sync>;
+	ddc-i2c-bus = <&i2c2>;
 	fsl,tve-mode = "vga";
-	fsl,hsync-pin = <4>;
-	fsl,vsync-pin = <6>;
+	fsl,hsync-pin = <7>;	/* IPU DI1 PIN7 via EIM_OE */
+	fsl,vsync-pin = <8>;	/* IPU DI1 PIN8 via EIM_RW */
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/k2e.dtsi b/arch/arm/boot/dts/k2e.dtsi
index 50e555eab50d..1b6494fbdb91 100644
--- a/arch/arm/boot/dts/k2e.dtsi
+++ b/arch/arm/boot/dts/k2e.dtsi
@@ -86,7 +86,7 @@
 			gpio,syscon-dev = <&devctrl 0x240>;
 		};
 
-		pcie@21020000 {
+		pcie1: pcie@21020000 {
 			compatible = "ti,keystone-pcie","snps,dw-pcie";
 			clocks = <&clkpcie1>;
 			clock-names = "pcie";
@@ -96,6 +96,7 @@
 			ranges = <0x81000000 0 0 0x23260000 0x4000 0x4000
 				0x82000000 0 0x60000000 0x60000000 0 0x10000000>;
 
+			status = "disabled";
 			device_type = "pci";
 			num-lanes = <2>;
 
diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi
index c06542b2c954..e7a6f6deabb6 100644
--- a/arch/arm/boot/dts/keystone.dtsi
+++ b/arch/arm/boot/dts/keystone.dtsi
@@ -286,7 +286,7 @@
 			ti,syscon-dev = <&devctrl 0x2a0>;
 		};
 
-		pcie@21800000 {
+		pcie0: pcie@21800000 {
 			compatible = "ti,keystone-pcie", "snps,dw-pcie";
 			clocks = <&clkpcie>;
 			clock-names = "pcie";
@@ -296,6 +296,7 @@
 			ranges = <0x81000000 0 0 0x23250000 0 0x4000
 				0x82000000 0 0x50000000 0x50000000 0 0x10000000>;
 
+			status = "disabled";
 			device_type = "pci";
 			num-lanes = <2>;
 
diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
index 233c69e50ae3..df8908adb0cb 100644
--- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
@@ -120,7 +120,7 @@
 
 	lcd0: display@0 {
 		compatible = "lgphilips,lb035q02";
-		label = "lcd";
+		label = "lcd35";
 
 		reg = <1>;					/* CS1 */
 		spi-max-frequency = <10000000>;
diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
index f5395b7da912..048fd216970a 100644
--- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
@@ -98,7 +98,7 @@
 
 	lcd0: display@0 {
 		compatible = "samsung,lte430wq-f0c", "panel-dpi";
-		label = "lcd";
+		label = "lcd43";
 
 		pinctrl-names = "default";
 		pinctrl-0 = <&lte430_pins>;
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index f884d6adb71e..7d31c6ff246f 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -551,6 +551,7 @@
 			reg = <0x4a066000 0x100>;
 			interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_dsp";
+			#iommu-cells = <0>;
 		};
 
 		mmu_ipu: mmu@55082000 {
@@ -558,6 +559,7 @@
 			reg = <0x55082000 0x100>;
 			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_ipu";
+			#iommu-cells = <0>;
 			ti,iommu-bus-err-back;
 		};
 
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 7d24ae0306b5..c8fd648a7108 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -612,6 +612,7 @@
 			reg = <0x4a066000 0x100>;
 			interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_dsp";
+			#iommu-cells = <0>;
 		};
 
 		mmu_ipu: mmu@55082000 {
@@ -619,6 +620,7 @@
 			reg = <0x55082000 0x100>;
 			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_ipu";
+			#iommu-cells = <0>;
 			ti,iommu-bus-err-back;
 		};
 
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
index 71468a7eb28f..5e17fd147728 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
@@ -60,27 +60,27 @@
 	rxc-skew-ps = <2000>;
 };
 
-&mmc0 {
-	vmmc-supply = <&regulator_3_3v>;
-	vqmmc-supply = <&regulator_3_3v>;
-};
-
-&usb1 {
-	status = "okay";
-};
-
 &gpio2 {
 	status = "okay";
 };
 
-&i2c1{
+&i2c1 {
 	status = "okay";
 
-	accel1: accel1@53{
-		compatible = "adxl34x";
+	accel1: accelerometer@53 {
+		compatible = "adi,adxl345";
 		reg = <0x53>;
 
-		interrupt-parent = < &portc >;
+		interrupt-parent = <&portc>;
 		interrupts = <3 2>;
 	};
 };
+
+&mmc0 {
+	vmmc-supply = <&regulator_3_3v>;
+	vqmmc-supply = <&regulator_3_3v>;
+};
+
+&usb1 {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts
index d42c84b1df8d..e48857249ce7 100644
--- a/arch/arm/boot/dts/spear1310-evb.dts
+++ b/arch/arm/boot/dts/spear1310-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr1310 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1310.dtsi b/arch/arm/boot/dts/spear1310.dtsi
index 9d342920695a..54bc6d3cf290 100644
--- a/arch/arm/boot/dts/spear1310.dtsi
+++ b/arch/arm/boot/dts/spear1310.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr1310 SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1340-evb.dts b/arch/arm/boot/dts/spear1340-evb.dts
index b23e05ed1d60..c611f5606dfe 100644
--- a/arch/arm/boot/dts/spear1340-evb.dts
+++ b/arch/arm/boot/dts/spear1340-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr1340 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi
index 13e1aa33daa2..df2232d767ed 100644
--- a/arch/arm/boot/dts/spear1340.dtsi
+++ b/arch/arm/boot/dts/spear1340.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr1340 SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 40accc87e3a2..14594ce8c18a 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr13xx SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear300-evb.dts b/arch/arm/boot/dts/spear300-evb.dts
index 5de1431653e4..e859e8288bcd 100644
--- a/arch/arm/boot/dts/spear300-evb.dts
+++ b/arch/arm/boot/dts/spear300-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr300 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi
index f79b3dfaabe6..f4e92e599729 100644
--- a/arch/arm/boot/dts/spear300.dtsi
+++ b/arch/arm/boot/dts/spear300.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr300 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear310-evb.dts b/arch/arm/boot/dts/spear310-evb.dts
index b09632963d15..070f2c1b7851 100644
--- a/arch/arm/boot/dts/spear310-evb.dts
+++ b/arch/arm/boot/dts/spear310-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr310 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi
index 95372080eea6..da210b454753 100644
--- a/arch/arm/boot/dts/spear310.dtsi
+++ b/arch/arm/boot/dts/spear310.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr310 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts
index fdedbb514102..1b1034477923 100644
--- a/arch/arm/boot/dts/spear320-evb.dts
+++ b/arch/arm/boot/dts/spear320-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr320 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi
index ffea342aeec9..22be6e5edaac 100644
--- a/arch/arm/boot/dts/spear320.dtsi
+++ b/arch/arm/boot/dts/spear320.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr320 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi
index f0e3fcf8e323..118135d75899 100644
--- a/arch/arm/boot/dts/spear3xx.dtsi
+++ b/arch/arm/boot/dts/spear3xx.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr3xx SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/ste-ccu8540.dts b/arch/arm/boot/dts/ste-ccu8540.dts
index 32dd55e5f4e6..6eaaf638e52e 100644
--- a/arch/arm/boot/dts/ste-ccu8540.dts
+++ b/arch/arm/boot/dts/ste-ccu8540.dts
@@ -17,6 +17,13 @@
 	model = "ST-Ericsson U8540 platform with Device Tree";
 	compatible = "st-ericsson,ccu8540", "st-ericsson,u8540";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	memory@0 {
 		device_type = "memory";
 		reg = <0x20000000 0x1f000000>, <0xc0000000 0x3f000000>;
diff --git a/arch/arm/boot/dts/ste-ccu9540.dts b/arch/arm/boot/dts/ste-ccu9540.dts
index 651c56d400a4..c8b815819cfe 100644
--- a/arch/arm/boot/dts/ste-ccu9540.dts
+++ b/arch/arm/boot/dts/ste-ccu9540.dts
@@ -16,6 +16,13 @@
 	model = "ST-Ericsson CCU9540 platform with Device Tree";
 	compatible = "st-ericsson,ccu9540", "st-ericsson,u9540";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	memory {
 		reg = <0x00000000 0x20000000>;
 	};
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index 853684ad7773..a75f3289e653 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -971,7 +971,7 @@
 			power-domains = <&pm_domains DOMAIN_VAPE>;
 		};
 
-		uart@80120000 {
+		ux500_serial0: uart@80120000 {
 			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x80120000 0x1000>;
 			interrupts = <0 11 IRQ_TYPE_LEVEL_HIGH>;
@@ -986,7 +986,7 @@
 			status = "disabled";
 		};
 
-		uart@80121000 {
+		ux500_serial1: uart@80121000 {
 			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x80121000 0x1000>;
 			interrupts = <0 19 IRQ_TYPE_LEVEL_HIGH>;
@@ -1001,7 +1001,7 @@
 			status = "disabled";
 		};
 
-		uart@80007000 {
+		ux500_serial2: uart@80007000 {
 			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x80007000 0x1000>;
 			interrupts = <0 26 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index 744c1e3a744d..6d8ce154347e 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -32,11 +32,11 @@
 			status = "okay";
 		};
 
+		/* This UART is unused and thus left disabled */
 		uart@80121000 {
 			pinctrl-names = "default", "sleep";
 			pinctrl-0 = <&uart1_default_mode>;
 			pinctrl-1 = <&uart1_sleep_mode>;
-			status = "okay";
 		};
 
 		uart@80007000 {
diff --git a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
index 2b1cb5b584b6..18e9795a94f9 100644
--- a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
+++ b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
@@ -17,6 +17,13 @@
 	model = "ST-Ericsson HREF (pre-v60) and ST UIB";
 	compatible = "st-ericsson,mop500", "st-ericsson,u8500";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	soc {
 		/* Reset line for the BU21013 touchscreen */
 		i2c@80110000 {
diff --git a/arch/arm/boot/dts/ste-hrefprev60-tvk.dts b/arch/arm/boot/dts/ste-hrefprev60-tvk.dts
index 59523f866812..24739914e689 100644
--- a/arch/arm/boot/dts/ste-hrefprev60-tvk.dts
+++ b/arch/arm/boot/dts/ste-hrefprev60-tvk.dts
@@ -16,4 +16,11 @@
 / {
 	model = "ST-Ericsson HREF (pre-v60) and TVK1281618 UIB";
 	compatible = "st-ericsson,mop500", "st-ericsson,u8500";
+
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
 };
diff --git a/arch/arm/boot/dts/ste-hrefprev60.dtsi b/arch/arm/boot/dts/ste-hrefprev60.dtsi
index 7f3975b58d16..b0278f4c486c 100644
--- a/arch/arm/boot/dts/ste-hrefprev60.dtsi
+++ b/arch/arm/boot/dts/ste-hrefprev60.dtsi
@@ -23,6 +23,11 @@
 	};
 
 	soc {
+		/* Enable UART1 on this board */
+		uart@80121000 {
+			status = "okay";
+		};
+
 		i2c@80004000 {
 			tps61052@33 {
 				compatible = "tps61052";
diff --git a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
index 8c6a2de56cf1..c2e1ba019a2f 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
+++ b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
@@ -19,6 +19,13 @@
 	model = "ST-Ericsson HREF (v60+) and ST UIB";
 	compatible = "st-ericsson,hrefv60+", "st-ericsson,u8500";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	soc {
 		/* Reset line for the BU21013 touchscreen */
 		i2c@80110000 {
diff --git a/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts b/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts
index d53cccdce776..ebd8547e98f1 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts
+++ b/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts
@@ -18,4 +18,11 @@
 / {
 	model = "ST-Ericsson HREF (v60+) and TVK1281618 UIB";
 	compatible = "st-ericsson,hrefv60+", "st-ericsson,u8500";
+
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
 };
diff --git a/arch/arm/boot/dts/ste-hrefv60plus.dtsi b/arch/arm/boot/dts/ste-hrefv60plus.dtsi
index a4bc9e77d640..810cda743b6d 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus.dtsi
+++ b/arch/arm/boot/dts/ste-hrefv60plus.dtsi
@@ -43,15 +43,26 @@
 				  <&vaudio_hf_hrefv60_mode>,
 				  <&gbf_hrefv60_mode>,
 				  <&hdtv_hrefv60_mode>,
-				  <&touch_hrefv60_mode>;
+				  <&touch_hrefv60_mode>,
+				  <&gpios_hrefv60_mode>;
 
 			sdi0 {
-				/* SD card detect GPIO pin, extend default state */
 				sdi0_default_mode: sdi0_default {
+					/* SD card detect GPIO pin, extend default state */
 					default_hrefv60_cfg1 {
 						pins = "GPIO95_E8";
 						ste,config = <&gpio_in_pu>;
 					};
+					/* VMMCI level-shifter enable */
+					default_hrefv60_cfg2 {
+						pins = "GPIO169_D22";
+						ste,config = <&gpio_out_lo>;
+					};
+					/* VMMCI level-shifter voltage select */
+					default_hrefv60_cfg3 {
+						pins = "GPIO5_AG6";
+						ste,config = <&gpio_out_hi>;
+					};
 				};
 			};
 			ipgpio {
@@ -213,6 +224,16 @@
 					};
 				};
 			};
+			gpios {
+				/* Dangling GPIO pins */
+				gpios_hrefv60_mode: gpios_hrefv60 {
+					default_cfg1 {
+						/* Normally UART1 RXD, now dangling */
+						pins = "GPIO4_AH6";
+						ste,config = <&in_pu>;
+					};
+				};
+			};
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index 9edadc37719f..32a5ccb14e7e 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -18,6 +18,13 @@
 	model = "Calao Systems Snowball platform with device tree";
 	compatible = "calaosystems,snowball-a9500", "st-ericsson,u9500";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	memory {
 		reg = <0x00000000 0x20000000>;
 	};
@@ -223,11 +230,11 @@
 			status = "okay";
 		};
 
+		/* This UART is unused and thus left disabled */
 		uart@80121000 {
 			pinctrl-names = "default", "sleep";
 			pinctrl-0 = <&uart1_default_mode>;
 			pinctrl-1 = <&uart1_sleep_mode>;
-			status = "okay";
 		};
 
 		uart@80007000 {
@@ -452,7 +459,21 @@
 						pins = "GPIO21_AB3"; /* DAT31DIR */
 						ste,config = <&out_hi>;
 					};
-
+					/* SD card detect GPIO pin, extend default state */
+					snowball_cfg2 {
+						pins = "GPIO218_AH11";
+						ste,config = <&gpio_in_pu>;
+					};
+					/* VMMCI level-shifter enable */
+					snowball_cfg3 {
+						pins = "GPIO217_AH12";
+						ste,config = <&gpio_out_lo>;
+					};
+					/* VMMCI level-shifter voltage select */
+					snowball_cfg4 {
+						pins = "GPIO228_AJ6";
+						ste,config = <&gpio_out_hi>;
+					};
 				};
 			};
 			ssp0 {
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 83c50193626c..30b3bc1666d2 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
 generic-y += param.h
 generic-y += parport.h
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 6f225acc07c5..b7f6fb462ea0 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -286,7 +286,7 @@ extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x);
  */
 static inline phys_addr_t __virt_to_idmap(unsigned long x)
 {
-	if (arch_virt_to_idmap)
+	if (IS_ENABLED(CONFIG_MMU) && arch_virt_to_idmap)
 		return arch_virt_to_idmap(x);
 	else
 		return __virt_to_phys(x);
diff --git a/arch/arm/include/asm/mm-arch-hooks.h b/arch/arm/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 7056660c7cc4..000000000000
--- a/arch/arm/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARM_MM_ARCH_HOOKS_H
-#define _ASM_ARM_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ARM_MM_ARCH_HOOKS_H */
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 357f57ea83f4..54272e0be713 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -818,12 +818,13 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
 				break;
 
-		of_node_put(dn);
 		if (cpu >= nr_cpu_ids) {
 			pr_warn("Failed to find logical CPU for %s\n",
 				dn->name);
+			of_node_put(dn);
 			break;
 		}
+		of_node_put(dn);
 
 		irqs[i] = cpu;
 		cpumask_set_cpu(cpu, &pmu->supported_cpus);
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
index 1a4d232796be..38269358fd25 100644
--- a/arch/arm/kernel/reboot.c
+++ b/arch/arm/kernel/reboot.c
@@ -50,7 +50,7 @@ static void __soft_restart(void *addr)
 	flush_cache_all();
 
 	/* Switch to the identity mapping. */
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset);
 	phys_reset((unsigned long)addr);
 
 	/* Should never get here. */
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 80bad29d609a..8c4467fad837 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -291,8 +291,6 @@ void __init imx_gpc_check_dt(void)
 	}
 }
 
-#ifdef CONFIG_PM_GENERIC_DOMAINS
-
 static void _imx6q_pm_pu_power_off(struct generic_pm_domain *genpd)
 {
 	int iso, iso2sw;
@@ -399,7 +397,6 @@ static struct genpd_onecell_data imx_gpc_onecell_data = {
 static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
 {
 	struct clk *clk;
-	bool is_off;
 	int i;
 
 	imx6q_pu_domain.reg = pu_reg;
@@ -416,18 +413,13 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
 	}
 	imx6q_pu_domain.num_clks = i;
 
-	is_off = IS_ENABLED(CONFIG_PM);
-	if (is_off) {
-		_imx6q_pm_pu_power_off(&imx6q_pu_domain.base);
-	} else {
-		/*
-		 * Enable power if compiled without CONFIG_PM in case the
-		 * bootloader disabled it.
-		 */
-		imx6q_pm_pu_power_on(&imx6q_pu_domain.base);
-	}
+	/* Enable power always in case bootloader disabled it. */
+	imx6q_pm_pu_power_on(&imx6q_pu_domain.base);
+
+	if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
+		return 0;
 
-	pm_genpd_init(&imx6q_pu_domain.base, NULL, is_off);
+	pm_genpd_init(&imx6q_pu_domain.base, NULL, false);
 	return of_genpd_add_provider_onecell(dev->of_node,
 					     &imx_gpc_onecell_data);
 
@@ -437,13 +429,6 @@ clk_err:
 	return -EINVAL;
 }
 
-#else
-static inline int imx_gpc_genpd_init(struct device *dev, struct regulator *reg)
-{
-	return 0;
-}
-#endif /* CONFIG_PM_GENERIC_DOMAINS */
-
 static int imx_gpc_probe(struct platform_device *pdev)
 {
 	struct regulator *pu_reg;
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index ecc04ff13e95..4a023e8d1bdb 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -60,6 +60,7 @@ config SOC_AM43XX
 	select ARM_GIC
 	select MACH_OMAP_GENERIC
 	select MIGHT_HAVE_CACHE_L2X0
+	select HAVE_ARM_SCU
 
 config SOC_DRA7XX
 	bool "TI DRA7XX"
diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c
index c092730749b9..bf366b39fa61 100644
--- a/arch/arm/mach-pxa/capc7117.c
+++ b/arch/arm/mach-pxa/capc7117.c
@@ -24,6 +24,7 @@
 #include <linux/ata_platform.h>
 #include <linux/serial_8250.h>
 #include <linux/gpio.h>
+#include <linux/regulator/machine.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -144,6 +145,8 @@ static void __init capc7117_init(void)
 
 	capc7117_uarts_init();
 	capc7117_ide_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(CAPC7117,
diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c
index bb99f59a36d8..a17a91eb8e9a 100644
--- a/arch/arm/mach-pxa/cm-x2xx.c
+++ b/arch/arm/mach-pxa/cm-x2xx.c
@@ -13,6 +13,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
+#include <linux/regulator/machine.h>
 
 #include <linux/dm9000.h>
 #include <linux/leds.h>
@@ -466,6 +467,8 @@ static void __init cmx2xx_init(void)
 	cmx2xx_init_ac97();
 	cmx2xx_init_touchscreen();
 	cmx2xx_init_leds();
+
+	regulator_has_full_constraints();
 }
 
 static void __init cmx2xx_init_irq(void)
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 4d3588d26c2a..5851f4c254c1 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -835,6 +835,8 @@ static void __init cm_x300_init(void)
 	cm_x300_init_ac97();
 	cm_x300_init_wi2wi();
 	cm_x300_init_bl();
+
+	regulator_has_full_constraints();
 }
 
 static void __init cm_x300_fixup(struct tag *tags, char **cmdline)
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c
index 5f9d9303b346..3503826333c7 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -18,6 +18,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
+#include <linux/regulator/machine.h>
 #include <linux/ucb1400.h>
 
 #include <asm/mach/arch.h>
@@ -294,6 +295,8 @@ static void __init colibri_pxa270_init(void)
 		printk(KERN_ERR "Illegal colibri_pxa270_baseboard type %d\n",
 				colibri_pxa270_baseboard);
 	}
+
+	regulator_has_full_constraints();
 }
 
 /* The "Income s.r.o. SH-Dmaster PXA270 SBC" board can be booted either
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 51531ecffca8..9d7072b04045 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -1306,6 +1306,8 @@ static void __init em_x270_init(void)
 	em_x270_init_i2c();
 	em_x270_init_camera();
 	em_x270_userspace_consumers_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(EM_X270, "Compulab EM-X270")
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index c98511c5abd1..9b0eb0252af6 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -26,6 +26,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/can/platform/mcp251x.h>
+#include <linux/regulator/machine.h>
 
 #include "generic.h"
 
@@ -185,6 +186,8 @@ static void __init icontrol_init(void)
 	mxm_8x10_mmc_init();
 
 	icontrol_can_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM")
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 872dcb20e757..066e3a250ee0 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -26,6 +26,7 @@
 #include <linux/dm9000.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
+#include <linux/regulator/machine.h>
 #include <linux/i2c/pxa-i2c.h>
 
 #include <asm/types.h>
@@ -534,6 +535,8 @@ static void __init trizeps4_init(void)
 
 	BCR_writew(trizeps_conxs_bcr);
 	board_backlight_power(1);
+
+	regulator_has_full_constraints();
 }
 
 static void __init trizeps4_map_io(void)
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index aa89488f961e..54122a983ae3 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -24,6 +24,7 @@
 #include <linux/dm9000.h>
 #include <linux/ucb1400.h>
 #include <linux/ata_platform.h>
+#include <linux/regulator/machine.h>
 #include <linux/regulator/max1586.h>
 #include <linux/i2c/pxa-i2c.h>
 
@@ -711,6 +712,8 @@ static void __init vpac270_init(void)
 	vpac270_ts_init();
 	vpac270_rtc_init();
 	vpac270_ide_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(VPAC270, "Voipac PXA270")
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index ac2ae5c71ab4..6158566fa0f7 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -868,6 +868,8 @@ static void __init zeus_init(void)
 	i2c_register_board_info(0, ARRAY_AND_SIZE(zeus_i2c_devices));
 	pxa2xx_set_spi_info(3, &pxa2xx_spi_ssp3_master_info);
 	spi_register_board_info(zeus_spi_board_info, ARRAY_SIZE(zeus_spi_board_info));
+
+	regulator_has_full_constraints();
 }
 
 static struct map_desc zeus_io_desc[] __initdata = {
diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h
index a99d90a4d09c..06640914d9a0 100644
--- a/arch/arm/mach-spear/generic.h
+++ b/arch/arm/mach-spear/generic.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009-2012 ST Microelectronics
  * Rajeev Kumar <rajeev-dlh.kumar@st.com>
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/irqs.h b/arch/arm/mach-spear/include/mach/irqs.h
index 92da0a8c6bce..7058720c5278 100644
--- a/arch/arm/mach-spear/include/mach/irqs.h
+++ b/arch/arm/mach-spear/include/mach/irqs.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009-2012 ST Microelectronics
  * Rajeev Kumar <rajeev-dlh.kumar@st.com>
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/misc_regs.h b/arch/arm/mach-spear/include/mach/misc_regs.h
index 935639ce59ba..cfaf7c665b58 100644
--- a/arch/arm/mach-spear/include/mach/misc_regs.h
+++ b/arch/arm/mach-spear/include/mach/misc_regs.h
@@ -4,7 +4,7 @@
  * Miscellaneous registers definitions for SPEAr3xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/spear.h b/arch/arm/mach-spear/include/mach/spear.h
index f2d6a0176575..5ed841ccf8a3 100644
--- a/arch/arm/mach-spear/include/mach/spear.h
+++ b/arch/arm/mach-spear/include/mach/spear.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009,2012 ST Microelectronics
  * Rajeev Kumar<rajeev-dlh.kumar@st.com>
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/uncompress.h b/arch/arm/mach-spear/include/mach/uncompress.h
index 51b2dc93e4da..8439b9c12edb 100644
--- a/arch/arm/mach-spear/include/mach/uncompress.h
+++ b/arch/arm/mach-spear/include/mach/uncompress.h
@@ -4,7 +4,7 @@
  * Serial port stubs for kernel decompress status messages
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/pl080.c b/arch/arm/mach-spear/pl080.c
index cfa1199d0f4a..b4529f3e0ee9 100644
--- a/arch/arm/mach-spear/pl080.c
+++ b/arch/arm/mach-spear/pl080.c
@@ -4,7 +4,7 @@
  * DMAC pl080 definitions for SPEAr platform
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/pl080.h b/arch/arm/mach-spear/pl080.h
index eb6590ded40d..608dec6725ae 100644
--- a/arch/arm/mach-spear/pl080.h
+++ b/arch/arm/mach-spear/pl080.h
@@ -4,7 +4,7 @@
  * DMAC pl080 definitions for SPEAr platform
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/restart.c b/arch/arm/mach-spear/restart.c
index ce5e098c4888..b4342155a783 100644
--- a/arch/arm/mach-spear/restart.c
+++ b/arch/arm/mach-spear/restart.c
@@ -4,7 +4,7 @@
  * SPEAr platform specific restart functions
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear1310.c b/arch/arm/mach-spear/spear1310.c
index d9ce4d8000f0..cd5d375d91f0 100644
--- a/arch/arm/mach-spear/spear1310.c
+++ b/arch/arm/mach-spear/spear1310.c
@@ -4,7 +4,7 @@
  * SPEAr1310 machine source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear1340.c b/arch/arm/mach-spear/spear1340.c
index 3f3c0f124bd3..94594d5a446c 100644
--- a/arch/arm/mach-spear/spear1340.c
+++ b/arch/arm/mach-spear/spear1340.c
@@ -4,7 +4,7 @@
  * SPEAr1340 machine source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index 2e463a93468d..b7afce6795f4 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -4,7 +4,7 @@
  * SPEAr13XX machines common source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear300.c b/arch/arm/mach-spear/spear300.c
index b52e48f342f4..5b32edda2276 100644
--- a/arch/arm/mach-spear/spear300.c
+++ b/arch/arm/mach-spear/spear300.c
@@ -4,7 +4,7 @@
  * SPEAr300 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear310.c b/arch/arm/mach-spear/spear310.c
index ed2029db391f..86a44ac7ff67 100644
--- a/arch/arm/mach-spear/spear310.c
+++ b/arch/arm/mach-spear/spear310.c
@@ -4,7 +4,7 @@
  * SPEAr310 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear320.c b/arch/arm/mach-spear/spear320.c
index bf634b32a930..d45d751926c5 100644
--- a/arch/arm/mach-spear/spear320.c
+++ b/arch/arm/mach-spear/spear320.c
@@ -4,7 +4,7 @@
  * SPEAr320 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c
index bf3b1fd8cb23..23394ac76cf2 100644
--- a/arch/arm/mach-spear/spear3xx.c
+++ b/arch/arm/mach-spear/spear3xx.c
@@ -4,7 +4,7 @@
  * SPEAr3XX machines common source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1ced8a0f7a52..cba12f34ff77 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1971,7 +1971,7 @@ static int extend_iommu_mapping(struct dma_iommu_mapping *mapping)
 {
 	int next_bitmap;
 
-	if (mapping->nr_bitmaps > mapping->extensions)
+	if (mapping->nr_bitmaps >= mapping->extensions)
 		return -EINVAL;
 
 	next_bitmap = mapping->nr_bitmaps;
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 0716bbe19872..de2b246fed38 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -274,7 +274,10 @@ __v7_ca15mp_setup:
 __v7_b15mp_setup:
 __v7_ca17mp_setup:
 	mov	r10, #0
-1:
+1:	adr	r12, __v7_setup_stack		@ the local stack
+	stmia	r12, {r0-r5, lr}		@ v7_invalidate_l1 touches r0-r6
+	bl      v7_invalidate_l1
+	ldmia	r12, {r0-r5, lr}
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc	p15, 0, r0, c1, c0, 1)
 	ALT_UP(mov	r0, #(1 << 6))		@ fake it for UP
@@ -283,7 +286,7 @@ __v7_ca17mp_setup:
 	orreq	r0, r0, r10			@ Enable CPU-specific SMP bits
 	mcreq	p15, 0, r0, c1, c0, 1
 #endif
-	b	__v7_setup
+	b	__v7_setup_cont
 
 /*
  * Errata:
@@ -413,10 +416,11 @@ __v7_pj4b_setup:
 
 __v7_setup:
 	adr	r12, __v7_setup_stack		@ the local stack
-	stmia	r12, {r0-r5, r7, r9, r11, lr}
+	stmia	r12, {r0-r5, lr}		@ v7_invalidate_l1 touches r0-r6
 	bl      v7_invalidate_l1
-	ldmia	r12, {r0-r5, r7, r9, r11, lr}
+	ldmia	r12, {r0-r5, lr}
 
+__v7_setup_cont:
 	and	r0, r9, #0xff000000		@ ARM?
 	teq	r0, #0x41000000
 	bne	__errata_finish
@@ -480,7 +484,7 @@ ENDPROC(__v7_setup)
 
 	.align	2
 __v7_setup_stack:
-	.space	4 * 11				@ 11 registers
+	.space	4 * 7				@ 12 registers
 
 	__INITDATA
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 4550d247e308..c011e2296cb1 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -74,32 +74,52 @@ struct jit_ctx {
 
 int bpf_jit_enable __read_mostly;
 
-static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
+static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
+		      unsigned int size)
+{
+	void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
+
+	if (!ptr)
+		return -EFAULT;
+	memcpy(ret, ptr, size);
+	return 0;
+}
+
+static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
 {
 	u8 ret;
 	int err;
 
-	err = skb_copy_bits(skb, offset, &ret, 1);
+	if (offset < 0)
+		err = call_neg_helper(skb, offset, &ret, 1);
+	else
+		err = skb_copy_bits(skb, offset, &ret, 1);
 
 	return (u64)err << 32 | ret;
 }
 
-static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
+static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
 {
 	u16 ret;
 	int err;
 
-	err = skb_copy_bits(skb, offset, &ret, 2);
+	if (offset < 0)
+		err = call_neg_helper(skb, offset, &ret, 2);
+	else
+		err = skb_copy_bits(skb, offset, &ret, 2);
 
 	return (u64)err << 32 | ntohs(ret);
 }
 
-static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
+static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
 {
 	u32 ret;
 	int err;
 
-	err = skb_copy_bits(skb, offset, &ret, 4);
+	if (offset < 0)
+		err = call_neg_helper(skb, offset, &ret, 4);
+	else
+		err = skb_copy_bits(skb, offset, &ret, 4);
 
 	return (u64)err << 32 | ntohl(ret);
 }
@@ -536,9 +556,6 @@ static int build_body(struct jit_ctx *ctx)
 		case BPF_LD | BPF_B | BPF_ABS:
 			load_order = 0;
 load:
-			/* the interpreter will deal with the negative K */
-			if ((int)k < 0)
-				return -ENOTSUPP;
 			emit_mov_i(r_off, k, ctx);
 load_common:
 			ctx->seen |= SEEN_DATA | SEEN_CALL;
@@ -547,12 +564,24 @@ load_common:
 				emit(ARM_SUB_I(r_scratch, r_skb_hl,
 					       1 << load_order), ctx);
 				emit(ARM_CMP_R(r_scratch, r_off), ctx);
-				condt = ARM_COND_HS;
+				condt = ARM_COND_GE;
 			} else {
 				emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
 				condt = ARM_COND_HI;
 			}
 
+			/*
+			 * test for negative offset, only if we are
+			 * currently scheduled to take the fast
+			 * path. this will update the flags so that
+			 * the slowpath instruction are ignored if the
+			 * offset is negative.
+			 *
+			 * for loard_order == 0 the HI condition will
+			 * make loads at offset 0 take the slow path too.
+			 */
+			_emit(condt, ARM_CMP_I(r_off, 0), ctx);
+
 			_emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
 			      ctx);
 
@@ -860,9 +889,11 @@ b_epilogue:
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
 			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
-				OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx);
-			else
-				OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx);
+				OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx);
+			else {
+				OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx);
+				OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx);
+			}
 			break;
 		case BPF_ANC | SKF_AD_QUEUE:
 			ctx->seen |= SEEN_SKB;
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index b112a39834d0..70fd9ffb58cf 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -25,6 +25,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += msi.h
diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 562b655f5ba9..000000000000
--- a/arch/arm64/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARM64_MM_ARCH_HOOKS_H
-#define _ASM_ARM64_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ARM64_MM_ARCH_HOOKS_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f860bfda454a..e16351819fed 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -585,7 +585,8 @@ ENDPROC(el0_irq)
  *
  */
 ENTRY(cpu_switch_to)
-	add	x8, x0, #THREAD_CPU_CONTEXT
+	mov	x10, #THREAD_CPU_CONTEXT
+	add	x8, x0, x10
 	mov	x9, sp
 	stp	x19, x20, [x8], #16		// store callee-saved registers
 	stp	x21, x22, [x8], #16
@@ -594,7 +595,7 @@ ENTRY(cpu_switch_to)
 	stp	x27, x28, [x8], #16
 	stp	x29, x9, [x8], #16
 	str	lr, [x8]
-	add	x8, x1, #THREAD_CPU_CONTEXT
+	add	x8, x1, x10
 	ldp	x19, x20, [x8], #16		// restore callee-saved registers
 	ldp	x21, x22, [x8], #16
 	ldp	x23, x24, [x8], #16
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 240b75c0e94f..463fa2e7e34c 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -61,7 +61,7 @@ void __init init_IRQ(void)
 static bool migrate_one_irq(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
-	const struct cpumask *affinity = d->affinity;
+	const struct cpumask *affinity = irq_data_get_affinity_mask(d);
 	struct irq_chip *c;
 	bool ret = false;
 
@@ -81,7 +81,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	if (!c->irq_set_affinity)
 		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
 	else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
-		cpumask_copy(d->affinity, affinity);
+		cpumask_copy(irq_data_get_affinity_mask(d), affinity);
 
 	return ret;
 }
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 1d66afdfac07..f61f2dd67464 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/avr32/include/asm/mm-arch-hooks.h b/arch/avr32/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 145452ffbdad..000000000000
--- a/arch/avr32/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_AVR32_MM_ARCH_HOOKS_H
-#define _ASM_AVR32_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_AVR32_MM_ARCH_HOOKS_H */
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index d0f771be9e96..a124c55733db 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -18,6 +18,7 @@
 
 #include <mach/pm.h>
 
+static bool disable_cpu_idle_poll;
 
 static cycle_t read_cycle_count(struct clocksource *cs)
 {
@@ -80,45 +81,45 @@ static int comparator_next_event(unsigned long delta,
 	return 0;
 }
 
-static void comparator_mode(enum clock_event_mode mode,
-		struct clock_event_device *evdev)
+static int comparator_shutdown(struct clock_event_device *evdev)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-		pr_debug("%s: start\n", evdev->name);
-		/* FALLTHROUGH */
-	case CLOCK_EVT_MODE_RESUME:
+	pr_debug("%s: %s\n", __func__, evdev->name);
+	sysreg_write(COMPARE, 0);
+
+	if (disable_cpu_idle_poll) {
+		disable_cpu_idle_poll = false;
 		/*
-		 * If we're using the COUNT and COMPARE registers we
-		 * need to force idle poll.
+		 * Only disable idle poll if we have forced that
+		 * in a previous call.
 		 */
-		cpu_idle_poll_ctrl(true);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		sysreg_write(COMPARE, 0);
-		pr_debug("%s: stop\n", evdev->name);
-		if (evdev->mode == CLOCK_EVT_MODE_ONESHOT ||
-		    evdev->mode == CLOCK_EVT_MODE_RESUME) {
-			/*
-			 * Only disable idle poll if we have forced that
-			 * in a previous call.
-			 */
-			cpu_idle_poll_ctrl(false);
-		}
-		break;
-	default:
-		BUG();
+		cpu_idle_poll_ctrl(false);
 	}
+	return 0;
+}
+
+static int comparator_set_oneshot(struct clock_event_device *evdev)
+{
+	pr_debug("%s: %s\n", __func__, evdev->name);
+
+	disable_cpu_idle_poll = true;
+	/*
+	 * If we're using the COUNT and COMPARE registers we
+	 * need to force idle poll.
+	 */
+	cpu_idle_poll_ctrl(true);
+
+	return 0;
 }
 
 static struct clock_event_device comparator = {
-	.name		= "avr32_comparator",
-	.features	= CLOCK_EVT_FEAT_ONESHOT,
-	.shift		= 16,
-	.rating		= 50,
-	.set_next_event	= comparator_next_event,
-	.set_mode	= comparator_mode,
+	.name			= "avr32_comparator",
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.shift			= 16,
+	.rating			= 50,
+	.set_next_event		= comparator_next_event,
+	.set_state_shutdown	= comparator_shutdown,
+	.set_state_oneshot	= comparator_set_oneshot,
+	.tick_resume		= comparator_set_oneshot,
 };
 
 void read_persistent_clock(struct timespec *ts)
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 07051a63415d..61cd1e786a14 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += mutex.h
diff --git a/arch/blackfin/include/asm/mm-arch-hooks.h b/arch/blackfin/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 1c5211ec338f..000000000000
--- a/arch/blackfin/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_BLACKFIN_MM_ARCH_HOOKS_H
-#define _ASM_BLACKFIN_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_BLACKFIN_MM_ARCH_HOOKS_H */
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 7aeb32272975..f17c4dc6050c 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -26,6 +26,7 @@ generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
diff --git a/arch/c6x/include/asm/mm-arch-hooks.h b/arch/c6x/include/asm/mm-arch-hooks.h
deleted file mode 100644
index bb3c4a6ce8e9..000000000000
--- a/arch/c6x/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_C6X_MM_ARCH_HOOKS_H
-#define _ASM_C6X_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_C6X_MM_ARCH_HOOKS_H */
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index d294f6aaff1d..ad2244f35bca 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/cris/include/asm/mm-arch-hooks.h b/arch/cris/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 314f774db2b0..000000000000
--- a/arch/cris/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_CRIS_MM_ARCH_HOOKS_H
-#define _ASM_CRIS_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_CRIS_MM_ARCH_HOOKS_H */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 30edce31e5c2..8e47b832cc76 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/mm-arch-hooks.h b/arch/frv/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 51d13a870404..000000000000
--- a/arch/frv/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_FRV_MM_ARCH_HOOKS_H
-#define _ASM_FRV_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_FRV_MM_ARCH_HOOKS_H */
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 00379d64f707..70e6ae1e7006 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -33,6 +33,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 5ade4a163558..daee37bd0999 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -28,6 +28,7 @@ generic-y += kmap_types.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += pci.h
diff --git a/arch/hexagon/include/asm/mm-arch-hooks.h b/arch/hexagon/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 05e8b939e416..000000000000
--- a/arch/hexagon/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_HEXAGON_MM_ARCH_HOOKS_H
-#define _ASM_HEXAGON_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_HEXAGON_MM_ARCH_HOOKS_H */
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index ccff13d33fa2..9de3ba12f6b9 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += exec.h
 generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/ia64/include/asm/mm-arch-hooks.h b/arch/ia64/include/asm/mm-arch-hooks.h
deleted file mode 100644
index ab4b5c698322..000000000000
--- a/arch/ia64/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_IA64_MM_ARCH_HOOKS_H
-#define _ASM_IA64_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_IA64_MM_ARCH_HOOKS_H */
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index ba1cdc018731..e0eb704ca1fa 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += preempt.h
 generic-y += sections.h
diff --git a/arch/m32r/include/asm/mm-arch-hooks.h b/arch/m32r/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 6d60b4750f41..000000000000
--- a/arch/m32r/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_M32R_MM_ARCH_HOOKS_H
-#define _ASM_M32R_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_M32R_MM_ARCH_HOOKS_H */
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 33013dfcd3e1..c496d48a8c8d 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -125,6 +125,13 @@ endif # M68KCLASSIC
 
 if COLDFIRE
 
+choice
+	prompt "ColdFire SoC type"
+	default M520x
+	help
+	  Select the type of ColdFire System-on-Chip (SoC) that you want
+	  to build for.
+
 config M5206
 	bool "MCF5206"
 	depends on !MMU
@@ -174,9 +181,6 @@ config M525x
 	help
 	  Freescale (Motorola) Coldfire 5251/5253 processor support.
 
-config M527x
-	bool
-
 config M5271
 	bool "MCF5271"
 	depends on !MMU
@@ -223,9 +227,6 @@ config M5307
 	help
 	  Motorola ColdFire 5307 processor support.
 
-config M53xx
-	bool
-
 config M532x
 	bool "MCF532x"
 	depends on !MMU
@@ -251,9 +252,6 @@ config M5407
 	help
 	  Motorola ColdFire 5407 processor support.
 
-config M54xx
-	bool
-
 config M547x
 	bool "MCF547x"
 	select M54xx
@@ -280,6 +278,17 @@ config M5441x
 	help
 	  Freescale Coldfire 54410/54415/54416/54417/54418 processor support.
 
+endchoice
+
+config M527x
+	bool
+
+config M53xx
+	bool
+
+config M54xx
+	bool
+
 endif # COLDFIRE
 
 
@@ -416,22 +425,18 @@ config HAVE_MBAR
 config HAVE_IPSBAR
 	bool
 
-config CLOCK_SET
-	bool "Enable setting the CPU clock frequency"
-	depends on COLDFIRE
-	default n
-	help
-	  On some CPU's you do not need to know what the core CPU clock
-	  frequency is. On these you can disable clock setting. On some
-	  traditional 68K parts, and on all ColdFire parts you need to set
-	  the appropriate CPU clock frequency. On these devices many of the
-	  onboard peripherals derive their timing from the master CPU clock
-	  frequency.
-
 config CLOCK_FREQ
 	int "Set the core clock frequency"
+	default "25000000" if M5206
+	default "54000000" if M5206e
+	default "166666666" if M520x
+	default "140000000" if M5249
+	default "150000000" if M527x || M523x
+	default "90000000" if M5307
+	default "50000000" if M5407
+	default "266000000" if M54xx
 	default "66666666"
-	depends on CLOCK_SET
+	depends on COLDFIRE
 	help
 	  Define the CPU clock frequency in use. This is the core clock
 	  frequency, it may or may not be the same as the external clock
diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig
index e7292f460af4..4c7b7938d53a 100644
--- a/arch/m68k/configs/m5208evb_defconfig
+++ b/arch/m68k/configs/m5208evb_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,17 +12,12 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
-CONFIG_M520x=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=166666666
-CONFIG_CLOCK_DIV=2
-CONFIG_M5208EVB=y
+# CONFIG_MMU is not set
 # CONFIG_4KSTACKS is not set
 CONFIG_RAMBASE=0x40000000
 CONFIG_RAMSIZE=0x2000000
 CONFIG_VECTORBASE=0x40000000
 CONFIG_KERNELBASE=0x40020000
-CONFIG_RAM16BIT=y
 CONFIG_BINFMT_FLAT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -40,24 +31,19 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
 CONFIG_FEC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_BAUDRATE=115200
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
@@ -68,8 +54,6 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_FULLDEBUG=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
+CONFIG_FULLDEBUG=y
diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig
index 0cd4b39f325b..a782f368650f 100644
--- a/arch/m68k/configs/m5249evb_defconfig
+++ b/arch/m68k/configs/m5249evb_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,10 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5249=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=140000000
-CONFIG_CLOCK_DIV=2
 CONFIG_M5249C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00800000
@@ -38,23 +32,18 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
@@ -62,7 +51,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig
index a60cb3509135..6f5fb92f5cbf 100644
--- a/arch/m68k/configs/m5272c3_defconfig
+++ b/arch/m68k/configs/m5272c3_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,8 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5272=y
-CONFIG_CLOCK_SET=y
 CONFIG_M5272C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00800000
@@ -36,23 +32,18 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
 CONFIG_FEC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
@@ -61,6 +52,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig
index e6502ab7cb2f..b5d7cd1ce856 100644
--- a/arch/m68k/configs/m5275evb_defconfig
+++ b/arch/m68k/configs/m5275evb_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,11 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5275=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=150000000
-CONFIG_CLOCK_DIV=2
-CONFIG_M5275EVB=y
 # CONFIG_4KSTACKS is not set
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00000000
@@ -39,24 +32,19 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
 CONFIG_FEC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
@@ -65,8 +53,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig
index 023812abd2e6..1b4c09461c40 100644
--- a/arch/m68k/configs/m5307c3_defconfig
+++ b/arch/m68k/configs/m5307c3_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,10 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5307=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=90000000
-CONFIG_CLOCK_DIV=2
 CONFIG_M5307C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00800000
@@ -38,16 +32,11 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 CONFIG_SLIP=y
 CONFIG_SLIP_COMPRESSED=y
@@ -56,21 +45,17 @@ CONFIG_SLIP_COMPRESSED=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
 # CONFIG_DNOTIFY is not set
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_FULLDEBUG=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
+CONFIG_FULLDEBUG=y
diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig
index 557b39f3be90..275ad543d4bc 100644
--- a/arch/m68k/configs/m5407c3_defconfig
+++ b/arch/m68k/configs/m5407c3_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -17,9 +13,8 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5407=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=50000000
 CONFIG_M5407C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00000000
@@ -38,22 +33,17 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 # CONFIG_INPUT is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
@@ -63,8 +53,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig
index c5018a68819b..4f4ccd13c11b 100644
--- a/arch/m68k/configs/m5475evb_defconfig
+++ b/arch/m68k/configs/m5475evb_defconfig
@@ -1,11 +1,7 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_SWAP is not set
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -20,19 +16,16 @@ CONFIG_MODULES=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_COLDFIRE=y
-CONFIG_M547x=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=266000000
 # CONFIG_4KSTACKS is not set
 CONFIG_RAMBASE=0x0
 CONFIG_RAMSIZE=0x2000000
 CONFIG_VECTORBASE=0x0
 CONFIG_MBAR=0xff000000
 CONFIG_KERNELBASE=0x20000
+CONFIG_PCI=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_JEDECPROBE=y
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 1555bc189c7d..eb85bd9c6180 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += mutex.h
 generic-y += percpu.h
diff --git a/arch/m68k/include/asm/coldfire.h b/arch/m68k/include/asm/coldfire.h
index c94557b91448..50aa4dac9ca2 100644
--- a/arch/m68k/include/asm/coldfire.h
+++ b/arch/m68k/include/asm/coldfire.h
@@ -19,7 +19,7 @@
  *	in any case new boards come along from time to time that have yet
  *	another different clocking frequency.
  */
-#ifdef CONFIG_CLOCK_SET
+#ifdef CONFIG_CLOCK_FREQ
 #define	MCF_CLK		CONFIG_CLOCK_FREQ
 #else
 #error "Don't know what your ColdFire CPU clock frequency is??"
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index 618c85d3c786..f55cad529400 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -413,7 +413,8 @@ static inline void isa_delay(void)
 #define writew(val, addr)	out_le16((addr), (val))
 #endif /* CONFIG_ATARI_ROM_ISA */
 
-#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA)
+#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) && \
+    !(defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE))
 /*
  * We need to define dummy functions for GENERIC_IOMAP support.
  */
diff --git a/arch/m68k/include/asm/mm-arch-hooks.h b/arch/m68k/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 7e8709bc90ae..000000000000
--- a/arch/m68k/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_M68K_MM_ARCH_HOOKS_H
-#define _ASM_M68K_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_M68K_MM_ARCH_HOOKS_H */
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 199320f3c345..df31353fd200 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -25,6 +25,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += param.h
diff --git a/arch/metag/include/asm/mm-arch-hooks.h b/arch/metag/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b0072b2eb0de..000000000000
--- a/arch/metag/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_METAG_MM_ARCH_HOOKS_H
-#define _ASM_METAG_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_METAG_MM_ARCH_HOOKS_H */
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 9989ddb169ca..2f222f355c4b 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += device.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += syscalls.h
 generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/mm-arch-hooks.h b/arch/microblaze/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 5c4065911bda..000000000000
--- a/arch/microblaze/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_MICROBLAZE_MM_ARCH_HOOKS_H
-#define _ASM_MICROBLAZE_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_MICROBLAZE_MM_ARCH_HOOKS_H */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index aab7e46cadd5..cee5f93e5712 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1427,6 +1427,7 @@ config CPU_MIPS64_R6
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_MSA
 	select GENERIC_CSUM
+	select MIPS_O32_FP64_SUPPORT if MIPS32_O32
 	help
 	  Choose this option to build a kernel for release 6 or later of the
 	  MIPS64 architecture.  New MIPS processors, starting with the Warrior
@@ -2262,11 +2263,6 @@ config MIPS_CM
 config MIPS_CPC
 	bool
 
-config SB1_PASS_1_WORKAROUNDS
-	bool
-	depends on CPU_SB1_PASS_1
-	default y
-
 config SB1_PASS_2_WORKAROUNDS
 	bool
 	depends on CPU_SB1 && (CPU_SB1_PASS_2_2 || CPU_SB1_PASS_2)
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index ae2dd59050f7..252e347958f3 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -181,13 +181,6 @@ cflags-$(CONFIG_CPU_R4000_WORKAROUNDS)	+= $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS)	+= $(call cc-option,-mfix-r4400,)
 cflags-$(CONFIG_CPU_DADDI_WORKAROUNDS)	+= $(call cc-option,-mno-daddi,)
 
-ifdef CONFIG_CPU_SB1
-ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
-KBUILD_AFLAGS_MODULE += -msb1-pass1-workarounds
-KBUILD_CFLAGS_MODULE += -msb1-pass1-workarounds
-endif
-endif
-
 # For smartmips configurations, there are hundreds of warnings due to ISA overrides
 # in assembly and header files. smartmips is only supported for MIPS32r1 onwards
 # and there is no support for 64-bit. Various '.set mips2' or '.set mips3' or
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 7fe5c61a3cb8..1f8546081d20 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += emergency-restart.h
 generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mutex.h
 generic-y += parport.h
 generic-y += percpu.h
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 084780b355aa..1b0625189835 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -74,7 +74,7 @@ static inline int __enable_fpu(enum fpu_mode mode)
 		goto fr_common;
 
 	case FPU_64BIT:
-#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS32_R6) \
+#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \
       || defined(CONFIG_64BIT))
 		/* we only have a 32-bit FPU */
 		return SIGFPE;
diff --git a/arch/mips/include/asm/mach-sibyte/war.h b/arch/mips/include/asm/mach-sibyte/war.h
index 0a227d426b9c..520f8fc2c806 100644
--- a/arch/mips/include/asm/mach-sibyte/war.h
+++ b/arch/mips/include/asm/mach-sibyte/war.h
@@ -13,8 +13,7 @@
 #define R4600_V2_HIT_CACHEOP_WAR	0
 #define R5432_CP0_INTERRUPT_WAR		0
 
-#if defined(CONFIG_SB1_PASS_1_WORKAROUNDS) || \
-    defined(CONFIG_SB1_PASS_2_WORKAROUNDS)
+#if defined(CONFIG_SB1_PASS_2_WORKAROUNDS)
 
 #ifndef __ASSEMBLY__
 extern int sb1250_m3_workaround_needed(void);
diff --git a/arch/mips/include/asm/mm-arch-hooks.h b/arch/mips/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b5609fe8e475..000000000000
--- a/arch/mips/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_MIPS_MM_ARCH_HOOKS_H
-#define _ASM_MIPS_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_MIPS_MM_ARCH_HOOKS_H */
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index 6c9906f59c6e..9081d88ae44f 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -16,7 +16,7 @@
 
 /*
  * Keep this struct definition in sync with the sigcontext fragment
- * in arch/mips/tools/offset.c
+ * in arch/mips/kernel/asm-offsets.c
  */
 struct sigcontext {
 	unsigned int		sc_regmask;	/* Unused */
@@ -46,7 +46,7 @@ struct sigcontext {
 #include <linux/posix_types.h>
 /*
  * Keep this struct definition in sync with the sigcontext fragment
- * in arch/mips/tools/offset.c
+ * in arch/mips/kernel/asm-offsets.c
  *
  * Warning: this structure illdefined with sc_badvaddr being just an unsigned
  * int so it was changed to unsigned long in 2.6.0-test1.  This may break
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index beabe19ff8e5..072fab13645d 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -1,5 +1,5 @@
 /*
- * offset.c: Calculate pt_regs and task_struct offsets.
+ * asm-offsets.c: Calculate pt_regs and task_struct offsets.
  *
  * Copyright (C) 1996 David S. Miller
  * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003 Ralf Baechle
diff --git a/arch/mips/sibyte/Kconfig b/arch/mips/sibyte/Kconfig
index a8bb972fd9fd..cb9a095f5c5e 100644
--- a/arch/mips/sibyte/Kconfig
+++ b/arch/mips/sibyte/Kconfig
@@ -81,11 +81,6 @@ choice
 	prompt "SiByte SOC Stepping"
 	depends on SIBYTE_SB1xxx_SOC
 
-config CPU_SB1_PASS_1
-	bool "1250 Pass1"
-	depends on SIBYTE_SB1250
-	select CPU_HAS_PREFETCH
-
 config CPU_SB1_PASS_2_1250
 	bool "1250 An"
 	depends on SIBYTE_SB1250
diff --git a/arch/mips/sibyte/common/bus_watcher.c b/arch/mips/sibyte/common/bus_watcher.c
index 5581844c9194..41a1d2242211 100644
--- a/arch/mips/sibyte/common/bus_watcher.c
+++ b/arch/mips/sibyte/common/bus_watcher.c
@@ -81,10 +81,7 @@ void check_bus_watcher(void)
 {
 	u32 status, l2_err, memio_err;
 
-#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
-	/* Destructive read, clears register and interrupt */
-	status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
-#elif defined(CONFIG_SIBYTE_BCM112X) || defined(CONFIG_SIBYTE_SB1250)
+#if defined(CONFIG_SIBYTE_BCM112X) || defined(CONFIG_SIBYTE_SB1250)
 	/* Use non-destructive register */
 	status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS_DEBUG));
 #elif defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c
index 3c02b2a77ae9..9d3c24efdf4a 100644
--- a/arch/mips/sibyte/sb1250/setup.c
+++ b/arch/mips/sibyte/sb1250/setup.c
@@ -202,12 +202,10 @@ void __init sb1250_setup(void)
 
 	switch (war_pass) {
 	case K_SYS_REVISION_BCM1250_PASS1:
-#ifndef CONFIG_SB1_PASS_1_WORKAROUNDS
 		printk("@@@@ This is a BCM1250 A0-A2 (Pass 1) board, "
 			    "and the kernel doesn't have the proper "
 			    "workarounds compiled in. @@@@\n");
 		bad_config = 1;
-#endif
 		break;
 	case K_SYS_REVISION_BCM1250_PASS2:
 		/* Pass 2 - easiest as default for now - so many numbers */
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index de30b0c88796..6edb9ee6128e 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/mm-arch-hooks.h b/arch/mn10300/include/asm/mm-arch-hooks.h
deleted file mode 100644
index e2029a652f4c..000000000000
--- a/arch/mn10300/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_MN10300_MM_ARCH_HOOKS_H
-#define _ASM_MN10300_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_MN10300_MM_ARCH_HOOKS_H */
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 434639d510b3..914864eb5a25 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -30,6 +30,7 @@ generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
 generic-y += msgbuf.h
diff --git a/arch/nios2/include/asm/mm-arch-hooks.h b/arch/nios2/include/asm/mm-arch-hooks.h
deleted file mode 100644
index d7290dc68558..000000000000
--- a/arch/nios2/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_NIOS2_MM_ARCH_HOOKS_H
-#define _ASM_NIOS2_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_NIOS2_MM_ARCH_HOOKS_H */
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e5a693b16da2..443f44de1020 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -17,6 +17,7 @@ config OPENRISC
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IOMAP
 	select GENERIC_CPU_DEVICES
+	select HAVE_UID16
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
@@ -31,9 +32,6 @@ config MMU
 config HAVE_DMA_ATTRS
 	def_bool y
 
-config UID16
-	def_bool y
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 2a2e39b8109a..2832f031fb11 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
 generic-y += msgbuf.h
diff --git a/arch/openrisc/include/asm/mm-arch-hooks.h b/arch/openrisc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 6d33cb555fe1..000000000000
--- a/arch/openrisc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_OPENRISC_MM_ARCH_HOOKS_H
-#define _ASM_OPENRISC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_OPENRISC_MM_ARCH_HOOKS_H */
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 12b341d04f88..f9b3a81aefcd 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mutex.h
 generic-y += param.h
 generic-y += percpu.h
diff --git a/arch/parisc/include/asm/mm-arch-hooks.h b/arch/parisc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 654ec63b0ee9..000000000000
--- a/arch/parisc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_PARISC_MM_ARCH_HOOKS_H
-#define _ASM_PARISC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_PARISC_MM_ARCH_HOOKS_H */
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 3a08eae3318f..3edbb9fc91b4 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -72,7 +72,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
+	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) {
 		/*
 		 * This is the permanent pmd attached to the pgd;
 		 * cannot free it.
@@ -81,6 +81,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 		 */
 		mm_inc_nr_pmds(mm);
 		return;
+	}
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index b91e74a817d8..9fac01cb89c1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -158,6 +158,7 @@ extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
 			bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 			unsigned long *rmap, long pte_index, int realmode);
+extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
 extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index);
 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
@@ -225,12 +226,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cr;
 }
 
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
 	vcpu->arch.xer = val;
 }
 
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.xer;
 }
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5dd3400..72b6225aca73 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,12 @@
 #define XICS_MFRR		0xc
 #define XICS_IPI		2	/* interrupt source # for IPIs */
 
+/* Maximum number of threads per physical core */
+#define MAX_SMT_THREADS		8
+
+/* Maximum number of subcores per physical core */
+#define MAX_SUBCORES		4
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
 
 #else  /*__ASSEMBLY__ */
 
+struct kvmppc_vcore;
+
+/* Struct used for coordinating micro-threading (split-core) mode changes */
+struct kvm_split_mode {
+	unsigned long	rpr;
+	unsigned long	pmmar;
+	unsigned long	ldbar;
+	u8		subcore_size;
+	u8		do_nap;
+	u8		napped[MAX_SMT_THREADS];
+	struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
+};
+
 /*
  * This struct goes in the PACA on 64-bit processors.  It is used
  * to store host state that needs to be saved when we enter a guest
@@ -100,6 +119,7 @@ struct kvmppc_host_state {
 	u64 host_spurr;
 	u64 host_dscr;
 	u64 dec_expires;
+	struct kvm_split_mode *kvm_split_mode;
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
 	u64 cfar;
@@ -112,7 +132,7 @@ struct kvmppc_book3s_shadow_vcpu {
 	bool in_use;
 	ulong gpr[14];
 	u32 cr;
-	u32 xer;
+	ulong xer;
 	ulong ctr;
 	ulong lr;
 	ulong pc;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index 3286f0d6a86c..bc6e29e4dfd4 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cr;
 }
 
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
 	vcpu->arch.xer = val;
 }
 
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.xer;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d91f65b28e32..e187b6a56e7e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -205,8 +205,10 @@ struct revmap_entry {
  */
 #define KVMPPC_RMAP_LOCK_BIT	63
 #define KVMPPC_RMAP_RC_SHIFT	32
+#define KVMPPC_RMAP_CHG_SHIFT	48
 #define KVMPPC_RMAP_REFERENCED	(HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
 #define KVMPPC_RMAP_CHANGED	(HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
+#define KVMPPC_RMAP_CHG_ORDER	(0x3ful << KVMPPC_RMAP_CHG_SHIFT)
 #define KVMPPC_RMAP_PRESENT	0x100000000ul
 #define KVMPPC_RMAP_INDEX	0xfffffffful
 
@@ -278,7 +280,9 @@ struct kvmppc_vcore {
 	u16 last_cpu;
 	u8 vcore_state;
 	u8 in_guest;
+	struct kvmppc_vcore *master_vcore;
 	struct list_head runnable_threads;
+	struct list_head preempt_list;
 	spinlock_t lock;
 	wait_queue_head_t wq;
 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
@@ -300,12 +304,21 @@ struct kvmppc_vcore {
 #define VCORE_EXIT_MAP(vc)	((vc)->entry_exit_map >> 8)
 #define VCORE_IS_EXITING(vc)	(VCORE_EXIT_MAP(vc) != 0)
 
-/* Values for vcore_state */
+/* This bit is used when a vcore exit is triggered from outside the vcore */
+#define VCORE_EXIT_REQ		0x10000
+
+/*
+ * Values for vcore_state.
+ * Note that these are arranged such that lower values
+ * (< VCORE_SLEEPING) don't require stolen time accounting
+ * on load/unload, and higher values do.
+ */
 #define VCORE_INACTIVE	0
-#define VCORE_SLEEPING	1
-#define VCORE_PREEMPT	2
-#define VCORE_RUNNING	3
-#define VCORE_EXITING	4
+#define VCORE_PREEMPT	1
+#define VCORE_PIGGYBACK	2
+#define VCORE_SLEEPING	3
+#define VCORE_RUNNING	4
+#define VCORE_EXITING	5
 
 /*
  * Struct used to manage memory for a virtual processor area
@@ -619,6 +632,7 @@ struct kvm_vcpu_arch {
 	int trap;
 	int state;
 	int ptid;
+	int thread_cpu;
 	bool timer_running;
 	wait_queue_head_t cpu_run;
 
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 8452335661a5..790f5d1d9a46 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -287,7 +287,7 @@
 
 /* POWER8 Micro Partition Prefetch (MPP) parameters */
 /* Address mask is common for LOGMPP instruction and MPPR SPR */
-#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000
+#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL
 
 /* Bits 60 and 61 of MPP SPR should be set to one of the following */
 /* Aborting the fetch is indeed setting 00 in the table size bits */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 98230579d99c..de62392f093c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -512,6 +512,8 @@ int main(void)
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
 	DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
+	DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
+	DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -674,7 +676,14 @@ int main(void)
 	HSTATE_FIELD(HSTATE_DSCR, host_dscr);
 	HSTATE_FIELD(HSTATE_DABR, dabr);
 	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+	HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
 	DEFINE(IPI_PRIORITY, IPI_PRIORITY);
+	DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
+	DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
+	DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
+	DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
+	DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
+	DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 3caec2c42105..c2024ac9d4e8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -74,14 +74,14 @@ config KVM_BOOK3S_64
 	  If unsure, say N.
 
 config KVM_BOOK3S_64_HV
-	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+	tristate "KVM for POWER7 and later using hypervisor mode in host"
 	depends on KVM_BOOK3S_64 && PPC_POWERNV
 	select KVM_BOOK3S_HV_POSSIBLE
 	select MMU_NOTIFIER
 	select CMA
 	---help---
 	  Support running unmodified book3s_64 guest kernels in
-	  virtual machines on POWER7 and PPC970 processors that have
+	  virtual machines on POWER7 and newer processors that have
 	  hypervisor mode available to the host.
 
 	  If you say Y here, KVM will use the hardware virtualization
@@ -89,8 +89,8 @@ config KVM_BOOK3S_64_HV
 	  guest operating systems will run at full hardware speed
 	  using supervisor and user modes.  However, this also means
 	  that KVM is not usable under PowerVM (pHyp), is only usable
-	  on POWER7 (or later) processors and PPC970-family processors,
-	  and cannot emulate a different processor from the host processor.
+	  on POWER7 or later processors, and cannot emulate a
+	  different processor from the host processor.
 
 	  If unsure, say N.
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 05ea8fc7f829..53285d52e381 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -240,7 +240,8 @@ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
 }
 
-int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
+					 unsigned int priority)
 {
 	int deliver = 1;
 	int vec = 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 2035d16a9262..d5c9bfeb0c9c 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -26,6 +26,7 @@
 #include <asm/machdep.h>
 #include <asm/mmu_context.h>
 #include <asm/hw_irq.h>
+#include "book3s.h"
 
 /* #define DEBUG_MMU */
 /* #define DEBUG_SR */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index b982d925c710..79ad35abd196 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -28,6 +28,7 @@
 #include <asm/mmu_context.h>
 #include <asm/hw_irq.h>
 #include "trace_pr.h"
+#include "book3s.h"
 
 #define PTE_SIZE 12
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index dab68b7af3f2..1f9c0a17f445 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -761,6 +761,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			/* Harvest R and C */
 			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
 			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+			if (rcbits & HPTE_R_C)
+				kvmppc_update_rmap_change(rmapp, psize);
 			if (rcbits & ~rev[i].guest_rpte) {
 				rev[i].guest_rpte = ptel | rcbits;
 				note_hpte_modification(kvm, &rev[i]);
@@ -927,8 +929,12 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
  retry:
 	lock_rmap(rmapp);
 	if (*rmapp & KVMPPC_RMAP_CHANGED) {
-		*rmapp &= ~KVMPPC_RMAP_CHANGED;
+		long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
+			>> KVMPPC_RMAP_CHG_SHIFT;
+		*rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
 		npages_dirty = 1;
+		if (change_order > PAGE_SHIFT)
+			npages_dirty = 1ul << (change_order - PAGE_SHIFT);
 	}
 	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
 		unlock_rmap(rmapp);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 5a2bc4b0dfe5..2afdb9c0937d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
 #include <asm/reg.h>
 #include <asm/switch_to.h>
 #include <asm/time.h>
+#include "book3s.h"
 
 #define OP_19_XOP_RFID		18
 #define OP_19_XOP_RFI		50
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 68d067ad4222..fad52f226c12 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,12 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 #define MPP_BUFFER_ORDER	3
 #endif
 
+static int dynamic_mt_modes = 6;
+module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
+static int target_smt_mode;
+module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
 
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,7 +120,7 @@ static bool kvmppc_ipi_thread(int cpu)
 
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
-	int cpu = vcpu->cpu;
+	int cpu;
 	wait_queue_head_t *wqp;
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
@@ -123,10 +129,11 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 		++vcpu->stat.halt_wakeup;
 	}
 
-	if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid))
+	if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
 		return;
 
 	/* CPU points to the first thread of the core */
+	cpu = vcpu->cpu;
 	if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
 		smp_send_reschedule(cpu);
 }
@@ -164,6 +171,27 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  * they should never fail.)
  */
 
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	vc->preempt_tb = mftb();
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	if (vc->preempt_tb != TB_NIL) {
+		vc->stolen_tb += mftb() - vc->preempt_tb;
+		vc->preempt_tb = TB_NIL;
+	}
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -175,14 +203,9 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 	 * vcpu, and once it is set to this vcpu, only this task
 	 * ever sets it to NULL.
 	 */
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
-		spin_lock_irqsave(&vc->stoltb_lock, flags);
-		if (vc->preempt_tb != TB_NIL) {
-			vc->stolen_tb += mftb() - vc->preempt_tb;
-			vc->preempt_tb = TB_NIL;
-		}
-		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
-	}
+	if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+		kvmppc_core_end_stolen(vc);
+
 	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
 	    vcpu->arch.busy_preempt != TB_NIL) {
@@ -197,11 +220,9 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
-		spin_lock_irqsave(&vc->stoltb_lock, flags);
-		vc->preempt_tb = mftb();
-		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
-	}
+	if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+		kvmppc_core_start_stolen(vc);
+
 	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
 		vcpu->arch.busy_preempt = mftb();
@@ -214,12 +235,12 @@ static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 	kvmppc_end_cede(vcpu);
 }
 
-void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
+static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
 {
 	vcpu->arch.pvr = pvr;
 }
 
-int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 {
 	unsigned long pcr = 0;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -259,7 +280,7 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 	return 0;
 }
 
-void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 {
 	int r;
 
@@ -292,7 +313,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 	       vcpu->arch.last_inst);
 }
 
-struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
 {
 	int r;
 	struct kvm_vcpu *v, *ret = NULL;
@@ -641,7 +662,8 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
 
 	spin_lock(&vcore->lock);
 	if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
-	    vcore->vcore_state != VCORE_INACTIVE)
+	    vcore->vcore_state != VCORE_INACTIVE &&
+	    vcore->runner)
 		target = vcore->runner;
 	spin_unlock(&vcore->lock);
 
@@ -1431,6 +1453,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 	vcore->lpcr = kvm->arch.lpcr;
 	vcore->first_vcpuid = core * threads_per_subcore;
 	vcore->kvm = kvm;
+	INIT_LIST_HEAD(&vcore->preempt_list);
 
 	vcore->mpp_buffer_is_valid = false;
 
@@ -1655,6 +1678,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	spin_unlock(&vcore->lock);
 	vcpu->arch.vcore = vcore;
 	vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
+	vcpu->arch.thread_cpu = -1;
 
 	vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	kvmppc_sanity_check(vcpu);
@@ -1749,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu)
 
 	/* Ensure the thread won't go into the kernel if it wakes */
 	tpaca->kvm_hstate.kvm_vcpu = NULL;
+	tpaca->kvm_hstate.kvm_vcore = NULL;
 	tpaca->kvm_hstate.napping = 0;
 	smp_wmb();
 	tpaca->kvm_hstate.hwthread_req = 1;
@@ -1780,26 +1805,32 @@ static void kvmppc_release_hwthread(int cpu)
 	tpaca = &paca[cpu];
 	tpaca->kvm_hstate.hwthread_req = 0;
 	tpaca->kvm_hstate.kvm_vcpu = NULL;
+	tpaca->kvm_hstate.kvm_vcore = NULL;
+	tpaca->kvm_hstate.kvm_split_mode = NULL;
 }
 
-static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
 	struct paca_struct *tpaca;
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	struct kvmppc_vcore *mvc = vc->master_vcore;
 
-	if (vcpu->arch.timer_running) {
-		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
-		vcpu->arch.timer_running = 0;
+	cpu = vc->pcpu;
+	if (vcpu) {
+		if (vcpu->arch.timer_running) {
+			hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+			vcpu->arch.timer_running = 0;
+		}
+		cpu += vcpu->arch.ptid;
+		vcpu->cpu = mvc->pcpu;
+		vcpu->arch.thread_cpu = cpu;
 	}
-	cpu = vc->pcpu + vcpu->arch.ptid;
 	tpaca = &paca[cpu];
-	tpaca->kvm_hstate.kvm_vcore = vc;
-	tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
-	vcpu->cpu = vc->pcpu;
-	/* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
-	smp_wmb();
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
+	tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
+	smp_wmb();
+	tpaca->kvm_hstate.kvm_vcore = mvc;
 	if (cpu != smp_processor_id())
 		kvmppc_ipi_thread(cpu);
 }
@@ -1812,12 +1843,12 @@ static void kvmppc_wait_for_nap(void)
 	for (loops = 0; loops < 1000000; ++loops) {
 		/*
 		 * Check if all threads are finished.
-		 * We set the vcpu pointer when starting a thread
+		 * We set the vcore pointer when starting a thread
 		 * and the thread clears it when finished, so we look
-		 * for any threads that still have a non-NULL vcpu ptr.
+		 * for any threads that still have a non-NULL vcore ptr.
 		 */
 		for (i = 1; i < threads_per_subcore; ++i)
-			if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+			if (paca[cpu + i].kvm_hstate.kvm_vcore)
 				break;
 		if (i == threads_per_subcore) {
 			HMT_medium();
@@ -1827,7 +1858,7 @@ static void kvmppc_wait_for_nap(void)
 	}
 	HMT_medium();
 	for (i = 1; i < threads_per_subcore; ++i)
-		if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+		if (paca[cpu + i].kvm_hstate.kvm_vcore)
 			pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
 }
 
@@ -1890,6 +1921,278 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
 	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
 }
 
+/*
+ * A list of virtual cores for each physical CPU.
+ * These are vcores that could run but their runner VCPU tasks are
+ * (or may be) preempted.
+ */
+struct preempted_vcore_list {
+	struct list_head	list;
+	spinlock_t		lock;
+};
+
+static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
+
+static void init_vcore_lists(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
+		spin_lock_init(&lp->lock);
+		INIT_LIST_HEAD(&lp->list);
+	}
+}
+
+static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
+{
+	struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+
+	vc->vcore_state = VCORE_PREEMPT;
+	vc->pcpu = smp_processor_id();
+	if (vc->num_threads < threads_per_subcore) {
+		spin_lock(&lp->lock);
+		list_add_tail(&vc->preempt_list, &lp->list);
+		spin_unlock(&lp->lock);
+	}
+
+	/* Start accumulating stolen time */
+	kvmppc_core_start_stolen(vc);
+}
+
+static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
+{
+	struct preempted_vcore_list *lp;
+
+	kvmppc_core_end_stolen(vc);
+	if (!list_empty(&vc->preempt_list)) {
+		lp = &per_cpu(preempted_vcores, vc->pcpu);
+		spin_lock(&lp->lock);
+		list_del_init(&vc->preempt_list);
+		spin_unlock(&lp->lock);
+	}
+	vc->vcore_state = VCORE_INACTIVE;
+}
+
+/*
+ * This stores information about the virtual cores currently
+ * assigned to a physical core.
+ */
+struct core_info {
+	int		n_subcores;
+	int		max_subcore_threads;
+	int		total_threads;
+	int		subcore_threads[MAX_SUBCORES];
+	struct kvm	*subcore_vm[MAX_SUBCORES];
+	struct list_head vcs[MAX_SUBCORES];
+};
+
+/*
+ * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
+ * respectively in 2-way micro-threading (split-core) mode.
+ */
+static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
+
+static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
+{
+	int sub;
+
+	memset(cip, 0, sizeof(*cip));
+	cip->n_subcores = 1;
+	cip->max_subcore_threads = vc->num_threads;
+	cip->total_threads = vc->num_threads;
+	cip->subcore_threads[0] = vc->num_threads;
+	cip->subcore_vm[0] = vc->kvm;
+	for (sub = 0; sub < MAX_SUBCORES; ++sub)
+		INIT_LIST_HEAD(&cip->vcs[sub]);
+	list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+}
+
+static bool subcore_config_ok(int n_subcores, int n_threads)
+{
+	/* Can only dynamically split if unsplit to begin with */
+	if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
+		return false;
+	if (n_subcores > MAX_SUBCORES)
+		return false;
+	if (n_subcores > 1) {
+		if (!(dynamic_mt_modes & 2))
+			n_subcores = 4;
+		if (n_subcores > 2 && !(dynamic_mt_modes & 4))
+			return false;
+	}
+
+	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
+}
+
+static void init_master_vcore(struct kvmppc_vcore *vc)
+{
+	vc->master_vcore = vc;
+	vc->entry_exit_map = 0;
+	vc->in_guest = 0;
+	vc->napping_threads = 0;
+	vc->conferring_threads = 0;
+}
+
+/*
+ * See if the existing subcores can be split into 3 (or fewer) subcores
+ * of at most two threads each, so we can fit in another vcore.  This
+ * assumes there are at most two subcores and at most 6 threads in total.
+ */
+static bool can_split_piggybacked_subcores(struct core_info *cip)
+{
+	int sub, new_sub;
+	int large_sub = -1;
+	int thr;
+	int n_subcores = cip->n_subcores;
+	struct kvmppc_vcore *vc, *vcnext;
+	struct kvmppc_vcore *master_vc = NULL;
+
+	for (sub = 0; sub < cip->n_subcores; ++sub) {
+		if (cip->subcore_threads[sub] <= 2)
+			continue;
+		if (large_sub >= 0)
+			return false;
+		large_sub = sub;
+		vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+				      preempt_list);
+		if (vc->num_threads > 2)
+			return false;
+		n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
+	}
+	if (n_subcores > 3 || large_sub < 0)
+		return false;
+
+	/*
+	 * Seems feasible, so go through and move vcores to new subcores.
+	 * Note that when we have two or more vcores in one subcore,
+	 * all those vcores must have only one thread each.
+	 */
+	new_sub = cip->n_subcores;
+	thr = 0;
+	sub = large_sub;
+	list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
+		if (thr >= 2) {
+			list_del(&vc->preempt_list);
+			list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
+			/* vc->num_threads must be 1 */
+			if (++cip->subcore_threads[new_sub] == 1) {
+				cip->subcore_vm[new_sub] = vc->kvm;
+				init_master_vcore(vc);
+				master_vc = vc;
+				++cip->n_subcores;
+			} else {
+				vc->master_vcore = master_vc;
+				++new_sub;
+			}
+		}
+		thr += vc->num_threads;
+	}
+	cip->subcore_threads[large_sub] = 2;
+	cip->max_subcore_threads = 2;
+
+	return true;
+}
+
+static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+{
+	int n_threads = vc->num_threads;
+	int sub;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return false;
+
+	if (n_threads < cip->max_subcore_threads)
+		n_threads = cip->max_subcore_threads;
+	if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
+		cip->max_subcore_threads = n_threads;
+	} else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
+		   vc->num_threads <= 2) {
+		/*
+		 * We may be able to fit another subcore in by
+		 * splitting an existing subcore with 3 or 4
+		 * threads into two 2-thread subcores, or one
+		 * with 5 or 6 threads into three subcores.
+		 * We can only do this if those subcores have
+		 * piggybacked virtual cores.
+		 */
+		if (!can_split_piggybacked_subcores(cip))
+			return false;
+	} else {
+		return false;
+	}
+
+	sub = cip->n_subcores;
+	++cip->n_subcores;
+	cip->total_threads += vc->num_threads;
+	cip->subcore_threads[sub] = vc->num_threads;
+	cip->subcore_vm[sub] = vc->kvm;
+	init_master_vcore(vc);
+	list_del(&vc->preempt_list);
+	list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
+
+	return true;
+}
+
+static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
+				  struct core_info *cip, int sub)
+{
+	struct kvmppc_vcore *vc;
+	int n_thr;
+
+	vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+			      preempt_list);
+
+	/* require same VM and same per-core reg values */
+	if (pvc->kvm != vc->kvm ||
+	    pvc->tb_offset != vc->tb_offset ||
+	    pvc->pcr != vc->pcr ||
+	    pvc->lpcr != vc->lpcr)
+		return false;
+
+	/* P8 guest with > 1 thread per core would see wrong TIR value */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+	    (vc->num_threads > 1 || pvc->num_threads > 1))
+		return false;
+
+	n_thr = cip->subcore_threads[sub] + pvc->num_threads;
+	if (n_thr > cip->max_subcore_threads) {
+		if (!subcore_config_ok(cip->n_subcores, n_thr))
+			return false;
+		cip->max_subcore_threads = n_thr;
+	}
+
+	cip->total_threads += pvc->num_threads;
+	cip->subcore_threads[sub] = n_thr;
+	pvc->master_vcore = vc;
+	list_del(&pvc->preempt_list);
+	list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
+
+	return true;
+}
+
+/*
+ * Work out whether it is possible to piggyback the execution of
+ * vcore *pvc onto the execution of the other vcores described in *cip.
+ */
+static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
+			  int target_threads)
+{
+	int sub;
+
+	if (cip->total_threads + pvc->num_threads > target_threads)
+		return false;
+	for (sub = 0; sub < cip->n_subcores; ++sub)
+		if (cip->subcore_threads[sub] &&
+		    can_piggyback_subcore(pvc, cip, sub))
+			return true;
+
+	if (can_dynamic_split(pvc, cip))
+		return true;
+
+	return false;
+}
+
 static void prepare_threads(struct kvmppc_vcore *vc)
 {
 	struct kvm_vcpu *vcpu, *vnext;
@@ -1909,12 +2212,45 @@ static void prepare_threads(struct kvmppc_vcore *vc)
 	}
 }
 
-static void post_guest_process(struct kvmppc_vcore *vc)
+static void collect_piggybacks(struct core_info *cip, int target_threads)
 {
+	struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+	struct kvmppc_vcore *pvc, *vcnext;
+
+	spin_lock(&lp->lock);
+	list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
+		if (!spin_trylock(&pvc->lock))
+			continue;
+		prepare_threads(pvc);
+		if (!pvc->n_runnable) {
+			list_del_init(&pvc->preempt_list);
+			if (pvc->runner == NULL) {
+				pvc->vcore_state = VCORE_INACTIVE;
+				kvmppc_core_end_stolen(pvc);
+			}
+			spin_unlock(&pvc->lock);
+			continue;
+		}
+		if (!can_piggyback(pvc, cip, target_threads)) {
+			spin_unlock(&pvc->lock);
+			continue;
+		}
+		kvmppc_core_end_stolen(pvc);
+		pvc->vcore_state = VCORE_PIGGYBACK;
+		if (cip->total_threads >= target_threads)
+			break;
+	}
+	spin_unlock(&lp->lock);
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
+{
+	int still_running = 0;
 	u64 now;
 	long ret;
 	struct kvm_vcpu *vcpu, *vnext;
 
+	spin_lock(&vc->lock);
 	now = get_tb();
 	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
 				 arch.run_list) {
@@ -1933,17 +2269,36 @@ static void post_guest_process(struct kvmppc_vcore *vc)
 		vcpu->arch.ret = ret;
 		vcpu->arch.trap = 0;
 
-		if (vcpu->arch.ceded) {
-			if (!is_kvmppc_resume_guest(ret))
-				kvmppc_end_cede(vcpu);
-			else
+		if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
+			if (vcpu->arch.pending_exceptions)
+				kvmppc_core_prepare_to_enter(vcpu);
+			if (vcpu->arch.ceded)
 				kvmppc_set_timer(vcpu);
-		}
-		if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
+			else
+				++still_running;
+		} else {
 			kvmppc_remove_runnable(vc, vcpu);
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
+	list_del_init(&vc->preempt_list);
+	if (!is_master) {
+		if (still_running > 0) {
+			kvmppc_vcore_preempt(vc);
+		} else if (vc->runner) {
+			vc->vcore_state = VCORE_PREEMPT;
+			kvmppc_core_start_stolen(vc);
+		} else {
+			vc->vcore_state = VCORE_INACTIVE;
+		}
+		if (vc->n_runnable > 0 && vc->runner == NULL) {
+			/* make sure there's a candidate runner awake */
+			vcpu = list_first_entry(&vc->runnable_threads,
+						struct kvm_vcpu, arch.run_list);
+			wake_up(&vcpu->arch.cpu_run);
+		}
+	}
+	spin_unlock(&vc->lock);
 }
 
 /*
@@ -1955,6 +2310,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	struct kvm_vcpu *vcpu, *vnext;
 	int i;
 	int srcu_idx;
+	struct core_info core_info;
+	struct kvmppc_vcore *pvc, *vcnext;
+	struct kvm_split_mode split_info, *sip;
+	int split, subcore_size, active;
+	int sub;
+	bool thr0_done;
+	unsigned long cmd_bit, stat_bit;
+	int pcpu, thr;
+	int target_threads;
 
 	/*
 	 * Remove from the list any threads that have a signal pending
@@ -1969,11 +2333,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	/*
 	 * Initialize *vc.
 	 */
-	vc->entry_exit_map = 0;
+	init_master_vcore(vc);
 	vc->preempt_tb = TB_NIL;
-	vc->in_guest = 0;
-	vc->napping_threads = 0;
-	vc->conferring_threads = 0;
 
 	/*
 	 * Make sure we are running on primary threads, and that secondary
@@ -1991,24 +2352,112 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		goto out;
 	}
 
-
-	vc->pcpu = smp_processor_id();
-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-		kvmppc_start_thread(vcpu);
-		kvmppc_create_dtl_entry(vcpu, vc);
-		trace_kvm_guest_enter(vcpu);
+	/*
+	 * See if we could run any other vcores on the physical core
+	 * along with this one.
+	 */
+	init_core_info(&core_info, vc);
+	pcpu = smp_processor_id();
+	target_threads = threads_per_subcore;
+	if (target_smt_mode && target_smt_mode < target_threads)
+		target_threads = target_smt_mode;
+	if (vc->num_threads < target_threads)
+		collect_piggybacks(&core_info, target_threads);
+
+	/* Decide on micro-threading (split-core) mode */
+	subcore_size = threads_per_subcore;
+	cmd_bit = stat_bit = 0;
+	split = core_info.n_subcores;
+	sip = NULL;
+	if (split > 1) {
+		/* threads_per_subcore must be MAX_SMT_THREADS (8) here */
+		if (split == 2 && (dynamic_mt_modes & 2)) {
+			cmd_bit = HID0_POWER8_1TO2LPAR;
+			stat_bit = HID0_POWER8_2LPARMODE;
+		} else {
+			split = 4;
+			cmd_bit = HID0_POWER8_1TO4LPAR;
+			stat_bit = HID0_POWER8_4LPARMODE;
+		}
+		subcore_size = MAX_SMT_THREADS / split;
+		sip = &split_info;
+		memset(&split_info, 0, sizeof(split_info));
+		split_info.rpr = mfspr(SPRN_RPR);
+		split_info.pmmar = mfspr(SPRN_PMMAR);
+		split_info.ldbar = mfspr(SPRN_LDBAR);
+		split_info.subcore_size = subcore_size;
+		for (sub = 0; sub < core_info.n_subcores; ++sub)
+			split_info.master_vcs[sub] =
+				list_first_entry(&core_info.vcs[sub],
+					struct kvmppc_vcore, preempt_list);
+		/* order writes to split_info before kvm_split_mode pointer */
+		smp_wmb();
+	}
+	pcpu = smp_processor_id();
+	for (thr = 0; thr < threads_per_subcore; ++thr)
+		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+
+	/* Initiate micro-threading (split-core) if required */
+	if (cmd_bit) {
+		unsigned long hid0 = mfspr(SPRN_HID0);
+
+		hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
+		mb();
+		mtspr(SPRN_HID0, hid0);
+		isync();
+		for (;;) {
+			hid0 = mfspr(SPRN_HID0);
+			if (hid0 & stat_bit)
+				break;
+			cpu_relax();
+		}
+		split_info.do_nap = 1;	/* ask secondaries to nap when done */
+	}
+
+	/* Start all the threads */
+	active = 0;
+	for (sub = 0; sub < core_info.n_subcores; ++sub) {
+		thr = subcore_thread_map[sub];
+		thr0_done = false;
+		active |= 1 << thr;
+		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
+			pvc->pcpu = pcpu + thr;
+			list_for_each_entry(vcpu, &pvc->runnable_threads,
+					    arch.run_list) {
+				kvmppc_start_thread(vcpu, pvc);
+				kvmppc_create_dtl_entry(vcpu, pvc);
+				trace_kvm_guest_enter(vcpu);
+				if (!vcpu->arch.ptid)
+					thr0_done = true;
+				active |= 1 << (thr + vcpu->arch.ptid);
+			}
+			/*
+			 * We need to start the first thread of each subcore
+			 * even if it doesn't have a vcpu.
+			 */
+			if (pvc->master_vcore == pvc && !thr0_done)
+				kvmppc_start_thread(NULL, pvc);
+			thr += pvc->num_threads;
+		}
 	}
-
-	/* Set this explicitly in case thread 0 doesn't have a vcpu */
-	get_paca()->kvm_hstate.kvm_vcore = vc;
-	get_paca()->kvm_hstate.ptid = 0;
+	/*
+	 * When doing micro-threading, poke the inactive threads as well.
+	 * This gets them to the nap instruction after kvm_do_nap,
+	 * which reduces the time taken to unsplit later.
+	 */
+	if (split > 1)
+		for (thr = 1; thr < threads_per_subcore; ++thr)
+			if (!(active & (1 << thr)))
+				kvmppc_ipi_thread(pcpu + thr);
 
 	vc->vcore_state = VCORE_RUNNING;
 	preempt_disable();
 
 	trace_kvmppc_run_core(vc, 0);
 
-	spin_unlock(&vc->lock);
+	for (sub = 0; sub < core_info.n_subcores; ++sub)
+		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
+			spin_unlock(&pvc->lock);
 
 	kvm_guest_enter();
 
@@ -2019,32 +2468,58 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	__kvmppc_vcore_entry();
 
-	spin_lock(&vc->lock);
-
 	if (vc->mpp_buffer)
 		kvmppc_start_saving_l2_cache(vc);
 
-	/* disable sending of IPIs on virtual external irqs */
-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
-		vcpu->cpu = -1;
-	/* wait for secondary threads to finish writing their state to memory */
-	kvmppc_wait_for_nap();
-	for (i = 0; i < threads_per_subcore; ++i)
-		kvmppc_release_hwthread(vc->pcpu + i);
+	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+
+	spin_lock(&vc->lock);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
 	vc->vcore_state = VCORE_EXITING;
-	spin_unlock(&vc->lock);
 
-	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+	/* wait for secondary threads to finish writing their state to memory */
+	kvmppc_wait_for_nap();
+
+	/* Return to whole-core mode if we split the core earlier */
+	if (split > 1) {
+		unsigned long hid0 = mfspr(SPRN_HID0);
+		unsigned long loops = 0;
+
+		hid0 &= ~HID0_POWER8_DYNLPARDIS;
+		stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+		mb();
+		mtspr(SPRN_HID0, hid0);
+		isync();
+		for (;;) {
+			hid0 = mfspr(SPRN_HID0);
+			if (!(hid0 & stat_bit))
+				break;
+			cpu_relax();
+			++loops;
+		}
+		split_info.do_nap = 0;
+	}
+
+	/* Let secondaries go back to the offline loop */
+	for (i = 0; i < threads_per_subcore; ++i) {
+		kvmppc_release_hwthread(pcpu + i);
+		if (sip && sip->napped[i])
+			kvmppc_ipi_thread(pcpu + i);
+	}
+
+	spin_unlock(&vc->lock);
 
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
 	kvm_guest_exit();
 
-	preempt_enable();
+	for (sub = 0; sub < core_info.n_subcores; ++sub)
+		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
+					 preempt_list)
+			post_guest_process(pvc, pvc == vc);
 
 	spin_lock(&vc->lock);
-	post_guest_process(vc);
+	preempt_enable();
 
  out:
 	vc->vcore_state = VCORE_INACTIVE;
@@ -2055,13 +2530,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
  * Wait for some other vcpu thread to execute us, and
  * wake us up when we need to handle something in the host.
  */
-static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
+static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
+				 struct kvm_vcpu *vcpu, int wait_state)
 {
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
-	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+		spin_unlock(&vc->lock);
 		schedule();
+		spin_lock(&vc->lock);
+	}
 	finish_wait(&vcpu->arch.cpu_run, &wait);
 }
 
@@ -2137,9 +2616,21 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * this thread straight away and have it join in.
 	 */
 	if (!signal_pending(current)) {
-		if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) {
+		if (vc->vcore_state == VCORE_PIGGYBACK) {
+			struct kvmppc_vcore *mvc = vc->master_vcore;
+			if (spin_trylock(&mvc->lock)) {
+				if (mvc->vcore_state == VCORE_RUNNING &&
+				    !VCORE_IS_EXITING(mvc)) {
+					kvmppc_create_dtl_entry(vcpu, vc);
+					kvmppc_start_thread(vcpu, vc);
+					trace_kvm_guest_enter(vcpu);
+				}
+				spin_unlock(&mvc->lock);
+			}
+		} else if (vc->vcore_state == VCORE_RUNNING &&
+			   !VCORE_IS_EXITING(vc)) {
 			kvmppc_create_dtl_entry(vcpu, vc);
-			kvmppc_start_thread(vcpu);
+			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
 			wake_up(&vc->wq);
@@ -2149,10 +2640,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
 	       !signal_pending(current)) {
+		if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
+			kvmppc_vcore_end_preempt(vc);
+
 		if (vc->vcore_state != VCORE_INACTIVE) {
-			spin_unlock(&vc->lock);
-			kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
-			spin_lock(&vc->lock);
+			kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
 			continue;
 		}
 		list_for_each_entry_safe(v, vn, &vc->runnable_threads,
@@ -2179,10 +2671,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		if (n_ceded == vc->n_runnable) {
 			kvmppc_vcore_blocked(vc);
 		} else if (should_resched()) {
-			vc->vcore_state = VCORE_PREEMPT;
+			kvmppc_vcore_preempt(vc);
 			/* Let something else run */
 			cond_resched_lock(&vc->lock);
-			vc->vcore_state = VCORE_INACTIVE;
+			if (vc->vcore_state == VCORE_PREEMPT)
+				kvmppc_vcore_end_preempt(vc);
 		} else {
 			kvmppc_run_core(vc);
 		}
@@ -2191,11 +2684,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
 	       (vc->vcore_state == VCORE_RUNNING ||
-		vc->vcore_state == VCORE_EXITING)) {
-		spin_unlock(&vc->lock);
-		kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
-		spin_lock(&vc->lock);
-	}
+		vc->vcore_state == VCORE_EXITING))
+		kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
 
 	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
 		kvmppc_remove_runnable(vc, vcpu);
@@ -2755,6 +3245,8 @@ static int kvmppc_book3s_init_hv(void)
 
 	init_default_hcalls();
 
+	init_vcore_lists();
+
 	r = kvmppc_mmu_hv_init();
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ed2589d4593f..fd7006bf6b1a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -110,14 +110,15 @@ void __init kvm_cma_reserve(void)
 long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
 			    unsigned int yield_count)
 {
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+	int ptid = local_paca->kvm_hstate.ptid;
 	int threads_running;
 	int threads_ceded;
 	int threads_conferring;
 	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
 	int rv = H_SUCCESS; /* => don't yield */
 
-	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	set_bit(ptid, &vc->conferring_threads);
 	while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
 		threads_running = VCORE_ENTRY_MAP(vc);
 		threads_ceded = vc->napping_threads;
@@ -127,7 +128,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
 			break;
 		}
 	}
-	clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	clear_bit(ptid, &vc->conferring_threads);
 	return rv;
 }
 
@@ -238,7 +239,8 @@ void kvmhv_commence_exit(int trap)
 {
 	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
 	int ptid = local_paca->kvm_hstate.ptid;
-	int me, ee;
+	struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
+	int me, ee, i;
 
 	/* Set our bit in the threads-exiting-guest map in the 0xff00
 	   bits of vcore->entry_exit_map */
@@ -258,4 +260,26 @@ void kvmhv_commence_exit(int trap)
 	 */
 	if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
 		kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+
+	/*
+	 * If we are doing dynamic micro-threading, interrupt the other
+	 * subcores to pull them out of their guests too.
+	 */
+	if (!sip)
+		return;
+
+	for (i = 0; i < MAX_SUBCORES; ++i) {
+		vc = sip->master_vcs[i];
+		if (!vc)
+			break;
+		do {
+			ee = vc->entry_exit_map;
+			/* Already asked to exit? */
+			if ((ee >> 8) != 0)
+				break;
+		} while (cmpxchg(&vc->entry_exit_map, ee,
+				 ee | VCORE_EXIT_REQ) != ee);
+		if ((ee >> 8) == 0)
+			kvmhv_interrupt_vcore(vc, ee);
+	}
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index b027a89737b6..c1df9bb1e413 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -12,6 +12,7 @@
 #include <linux/kvm_host.h>
 #include <linux/hugetlb.h>
 #include <linux/module.h>
+#include <linux/log2.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -97,25 +98,52 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 }
 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 
+/* Update the changed page order field of an rmap entry */
+void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
+{
+	unsigned long order;
+
+	if (!psize)
+		return;
+	order = ilog2(psize);
+	order <<= KVMPPC_RMAP_CHG_SHIFT;
+	if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
+		*rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
+}
+EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
+
+/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
+static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
+				      unsigned long hpte_gr)
+{
+	struct kvm_memory_slot *memslot;
+	unsigned long *rmap;
+	unsigned long gfn;
+
+	gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+	if (!memslot)
+		return NULL;
+
+	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
+	return rmap;
+}
+
 /* Remove this HPTE from the chain for a real page */
 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 				struct revmap_entry *rev,
 				unsigned long hpte_v, unsigned long hpte_r)
 {
 	struct revmap_entry *next, *prev;
-	unsigned long gfn, ptel, head;
-	struct kvm_memory_slot *memslot;
+	unsigned long ptel, head;
 	unsigned long *rmap;
 	unsigned long rcbits;
 
 	rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
 	ptel = rev->guest_rpte |= rcbits;
-	gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
-	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-	if (!memslot)
+	rmap = revmap_for_hpte(kvm, hpte_v, ptel);
+	if (!rmap)
 		return;
-
-	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
 	lock_rmap(rmap);
 
 	head = *rmap & KVMPPC_RMAP_INDEX;
@@ -131,6 +159,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 			*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
 	}
 	*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+	if (rcbits & HPTE_R_C)
+		kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
 	unlock_rmap(rmap);
 }
 
@@ -421,14 +451,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 	v = pte & ~HPTE_V_HVLOCK;
 	if (v & HPTE_V_VALID) {
-		u64 pte1;
-
-		pte1 = be64_to_cpu(hpte[1]);
 		hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
-		rb = compute_tlbie_rb(v, pte1, pte_index);
+		rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
 		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
-		/* Read PTE low word after tlbie to get final R/C values */
-		remove_revmap_chain(kvm, pte_index, rev, v, pte1);
+		/*
+		 * The reference (R) and change (C) bits in a HPT
+		 * entry can be set by hardware at any time up until
+		 * the HPTE is invalidated and the TLB invalidation
+		 * sequence has completed.  This means that when
+		 * removing a HPTE, we need to re-read the HPTE after
+		 * the invalidation sequence has completed in order to
+		 * obtain reliable values of R and C.
+		 */
+		remove_revmap_chain(kvm, pte_index, rev, v,
+				    be64_to_cpu(hpte[1]));
 	}
 	r = rev->guest_rpte & ~HPTE_GR_RESERVED;
 	note_hpte_modification(kvm, rev);
@@ -655,6 +691,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 	return H_SUCCESS;
 }
 
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+			unsigned long pte_index)
+{
+	struct kvm *kvm = vcpu->kvm;
+	__be64 *hpte;
+	unsigned long v, r, gr;
+	struct revmap_entry *rev;
+	unsigned long *rmap;
+	long ret = H_NOT_FOUND;
+
+	if (pte_index >= kvm->arch.hpt_npte)
+		return H_PARAMETER;
+
+	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+		cpu_relax();
+	v = be64_to_cpu(hpte[0]);
+	r = be64_to_cpu(hpte[1]);
+	if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+		goto out;
+
+	gr = rev->guest_rpte;
+	if (rev->guest_rpte & HPTE_R_R) {
+		rev->guest_rpte &= ~HPTE_R_R;
+		note_hpte_modification(kvm, rev);
+	}
+	if (v & HPTE_V_VALID) {
+		gr |= r & (HPTE_R_R | HPTE_R_C);
+		if (r & HPTE_R_R) {
+			kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
+			rmap = revmap_for_hpte(kvm, v, gr);
+			if (rmap) {
+				lock_rmap(rmap);
+				*rmap |= KVMPPC_RMAP_REFERENCED;
+				unlock_rmap(rmap);
+			}
+		}
+	}
+	vcpu->arch.gpr[4] = gr;
+	ret = H_SUCCESS;
+ out:
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+	return ret;
+}
+
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+			unsigned long pte_index)
+{
+	struct kvm *kvm = vcpu->kvm;
+	__be64 *hpte;
+	unsigned long v, r, gr;
+	struct revmap_entry *rev;
+	unsigned long *rmap;
+	long ret = H_NOT_FOUND;
+
+	if (pte_index >= kvm->arch.hpt_npte)
+		return H_PARAMETER;
+
+	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+		cpu_relax();
+	v = be64_to_cpu(hpte[0]);
+	r = be64_to_cpu(hpte[1]);
+	if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+		goto out;
+
+	gr = rev->guest_rpte;
+	if (gr & HPTE_R_C) {
+		rev->guest_rpte &= ~HPTE_R_C;
+		note_hpte_modification(kvm, rev);
+	}
+	if (v & HPTE_V_VALID) {
+		/* need to make it temporarily absent so C is stable */
+		hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
+		kvmppc_invalidate_hpte(kvm, hpte, pte_index);
+		r = be64_to_cpu(hpte[1]);
+		gr |= r & (HPTE_R_R | HPTE_R_C);
+		if (r & HPTE_R_C) {
+			unsigned long psize = hpte_page_size(v, r);
+			hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
+			eieio();
+			rmap = revmap_for_hpte(kvm, v, gr);
+			if (rmap) {
+				lock_rmap(rmap);
+				*rmap |= KVMPPC_RMAP_CHANGED;
+				kvmppc_update_rmap_change(rmap, psize);
+				unlock_rmap(rmap);
+			}
+		}
+	}
+	vcpu->arch.gpr[4] = gr;
+	ret = H_SUCCESS;
+ out:
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+	return ret;
+}
+
 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index)
 {
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 00e45b6d4f24..24f58076d49e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -67,14 +67,12 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 	}
 
 	/* Check if the core is loaded, if not, too hard */
-	cpu = vcpu->cpu;
+	cpu = vcpu->arch.thread_cpu;
 	if (cpu < 0 || cpu >= nr_cpu_ids) {
 		this_icp->rm_action |= XICS_RM_KICK_VCPU;
 		this_icp->rm_kick_target = vcpu;
 		return;
 	}
-	/* In SMT cpu will always point to thread 0, we adjust it */
-	cpu += vcpu->arch.ptid;
 
 	smp_mb();
 	kvmhv_rm_send_ipi(cpu);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index faa86e9c0551..472680f47d09 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	subf	r4, r4, r3
 	mtspr	SPRN_DEC, r4
 
+	/* hwthread_req may have got set by cede or no vcpu, so clear it */
+	li	r0, 0
+	stb	r0, HSTATE_HWTHREAD_REQ(r13)
+
 	/*
 	 * For external and machine check interrupts, we need
 	 * to call the Linux handler to process the interrupt.
@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
 	ld	r5, HSTATE_KVM_VCORE(r13)
 	li	r0, 0
 	stb	r0, HSTATE_NAPPING(r13)
-	stb	r0, HSTATE_HWTHREAD_REQ(r13)
 
 	/* check the wake reason */
 	bl	kvmppc_check_wake_reason
@@ -315,10 +318,10 @@ kvm_start_guest:
 	cmpdi	r3, 0
 	bge	kvm_no_guest
 
-	/* get vcpu pointer, NULL if we have no vcpu to run */
-	ld	r4,HSTATE_KVM_VCPU(r13)
-	cmpdi	r4,0
-	/* if we have no vcpu to run, go back to sleep */
+	/* get vcore pointer, NULL if we have nothing to run */
+	ld	r5,HSTATE_KVM_VCORE(r13)
+	cmpdi	r5,0
+	/* if we have no vcore to run, go back to sleep */
 	beq	kvm_no_guest
 
 kvm_secondary_got_guest:
@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
 	ld	r6, PACA_DSCR_DEFAULT(r13)
 	std	r6, HSTATE_DSCR(r13)
 
-	/* Order load of vcore, ptid etc. after load of vcpu */
+	/* On thread 0 of a subcore, set HDEC to max */
+	lbz	r4, HSTATE_PTID(r13)
+	cmpwi	r4, 0
+	bne	63f
+	lis	r6, 0x7fff
+	ori	r6, r6, 0xffff
+	mtspr	SPRN_HDEC, r6
+	/* and set per-LPAR registers, if doing dynamic micro-threading */
+	ld	r6, HSTATE_SPLIT_MODE(r13)
+	cmpdi	r6, 0
+	beq	63f
+	ld	r0, KVM_SPLIT_RPR(r6)
+	mtspr	SPRN_RPR, r0
+	ld	r0, KVM_SPLIT_PMMAR(r6)
+	mtspr	SPRN_PMMAR, r0
+	ld	r0, KVM_SPLIT_LDBAR(r6)
+	mtspr	SPRN_LDBAR, r0
+	isync
+63:
+	/* Order load of vcpu after load of vcore */
 	lwsync
+	ld	r4, HSTATE_KVM_VCPU(r13)
 	bl	kvmppc_hv_entry
 
 	/* Back from the guest, go back to nap */
-	/* Clear our vcpu pointer so we don't come back in early */
+	/* Clear our vcpu and vcore pointers so we don't come back in early */
 	li	r0, 0
+	std	r0, HSTATE_KVM_VCPU(r13)
 	/*
-	 * Once we clear HSTATE_KVM_VCPU(r13), the code in
+	 * Once we clear HSTATE_KVM_VCORE(r13), the code in
 	 * kvmppc_run_core() is going to assume that all our vcpu
 	 * state is visible in memory.  This lwsync makes sure
 	 * that that is true.
 	 */
 	lwsync
-	std	r0, HSTATE_KVM_VCPU(r13)
+	std	r0, HSTATE_KVM_VCORE(r13)
 
 /*
  * At this point we have finished executing in the guest.
@@ -374,16 +398,63 @@ kvm_no_guest:
 	b	power7_wakeup_loss
 
 53:	HMT_LOW
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	cmpdi	r4, 0
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	cmpdi	r5, 0
+	bne	60f
+	ld	r3, HSTATE_SPLIT_MODE(r13)
+	cmpdi	r3, 0
+	beq	kvm_no_guest
+	lbz	r0, KVM_SPLIT_DO_NAP(r3)
+	cmpwi	r0, 0
 	beq	kvm_no_guest
 	HMT_MEDIUM
+	b	kvm_unsplit_nap
+60:	HMT_MEDIUM
 	b	kvm_secondary_got_guest
 
 54:	li	r0, KVM_HWTHREAD_IN_KVM
 	stb	r0, HSTATE_HWTHREAD_STATE(r13)
 	b	kvm_no_guest
 
+/*
+ * Here the primary thread is trying to return the core to
+ * whole-core mode, so we need to nap.
+ */
+kvm_unsplit_nap:
+	/* clear any pending message */
+BEGIN_FTR_SECTION
+	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
+	PPC_MSGCLR(6)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	/* Set kvm_split_mode.napped[tid] = 1 */
+	ld	r3, HSTATE_SPLIT_MODE(r13)
+	li	r0, 1
+	lhz	r4, PACAPACAINDEX(r13)
+	clrldi	r4, r4, 61	/* micro-threading => P8 => 8 threads/core */
+	addi	r4, r4, KVM_SPLIT_NAPPED
+	stbx	r0, r3, r4
+	/* Check the do_nap flag again after setting napped[] */
+	sync
+	lbz	r0, KVM_SPLIT_DO_NAP(r3)
+	cmpwi	r0, 0
+	beq	57f
+	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
+	mfspr	r4, SPRN_LPCR
+	rlwimi	r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
+	mtspr	SPRN_LPCR, r4
+	isync
+	std	r0, HSTATE_SCRATCH0(r13)
+	ptesync
+	ld	r0, HSTATE_SCRATCH0(r13)
+1:	cmpd	r0, r0
+	bne	1b
+	nap
+	b	.
+
+57:	li	r0, 0
+	stbx	r0, r3, r4
+	b	kvm_no_guest
+
 /******************************************************************************
  *                                                                            *
  *                               Entry code                                   *
@@ -854,7 +925,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	cmpwi	r0, 0
 	bne	21f
 	HMT_LOW
-20:	lbz	r0, VCORE_IN_GUEST(r5)
+20:	lwz	r3, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r3, 0x100
+	bge	no_switch_exit
+	lbz	r0, VCORE_IN_GUEST(r5)
 	cmpwi	r0, 0
 	beq	20b
 	HMT_MEDIUM
@@ -870,7 +944,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	blt	hdec_soon
 
 	ld	r6, VCPU_CTR(r4)
-	lwz	r7, VCPU_XER(r4)
+	ld	r7, VCPU_XER(r4)
 
 	mtctr	r6
 	mtxer	r7
@@ -985,9 +1059,13 @@ secondary_too_late:
 #endif
 11:	b	kvmhv_switch_to_host
 
+no_switch_exit:
+	HMT_MEDIUM
+	li	r12, 0
+	b	12f
 hdec_soon:
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	stw	r12, VCPU_TRAP(r4)
+12:	stw	r12, VCPU_TRAP(r4)
 	mr	r9, r4
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 	addi	r3, r4, VCPU_TB_RMEXIT
@@ -1103,7 +1181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	mfctr	r3
 	mfxer	r4
 	std	r3, VCPU_CTR(r9)
-	stw	r4, VCPU_XER(r9)
+	std	r4, VCPU_XER(r9)
 
 	/* If this is a page table miss then see if it's theirs or ours */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
@@ -1176,6 +1254,11 @@ mc_cont:
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	lwz	r12, VCPU_TRAP(r9)
 
+	/* Stop others sending VCPU interrupts to this physical CPU */
+	li	r0, -1
+	stw	r0, VCPU_CPU(r9)
+	stw	r0, VCPU_THREAD_CPU(r9)
+
 	/* Save guest CTRL register, set runlatch to 1 */
 	mfspr	r6,SPRN_CTRLF
 	stw	r6,VCPU_CTRL(r9)
@@ -1540,12 +1623,17 @@ kvmhv_switch_to_host:
 
 	/* Primary thread waits for all the secondaries to exit guest */
 15:	lwz	r3,VCORE_ENTRY_EXIT(r5)
-	srwi	r0,r3,8
+	rlwinm	r0,r3,32-8,0xff
 	clrldi	r3,r3,56
 	cmpw	r3,r0
 	bne	15b
 	isync
 
+	/* Did we actually switch to the guest at all? */
+	lbz	r6, VCORE_IN_GUEST(r5)
+	cmpwi	r6, 0
+	beq	19f
+
 	/* Primary thread switches back to host partition */
 	ld	r6,KVM_HOST_SDR1(r4)
 	lwz	r7,KVM_HOST_LPID(r4)
@@ -1589,7 +1677,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 18:
 	/* Signal secondary CPUs to continue */
 	stb	r0,VCORE_IN_GUEST(r5)
-	lis	r8,0x7fff		/* MAX_INT@h */
+19:	lis	r8,0x7fff		/* MAX_INT@h */
 	mtspr	SPRN_HDEC,r8
 
 16:	ld	r8,KVM_HOST_LPCR(r4)
@@ -1675,7 +1763,7 @@ kvmppc_hdsi:
 	bl	kvmppc_msr_interrupt
 fast_interrupt_c_return:
 6:	ld	r7, VCPU_CTR(r9)
-	lwz	r8, VCPU_XER(r9)
+	ld	r8, VCPU_XER(r9)
 	mtctr	r7
 	mtxer	r8
 	mr	r4, r9
@@ -1816,8 +1904,8 @@ hcall_real_table:
 	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
 	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
 	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
-	.long	0		/* 0x10 - H_CLEAR_MOD */
-	.long	0		/* 0x14 - H_CLEAR_REF */
+	.long	DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
 	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
 	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
 	.long	DOTSYM(kvmppc_h_put_tce) - hcall_real_table
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bd6ab1672ae6..a759d9adb0b6 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,7 +352,7 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
 	return kvmppc_get_field(inst, msb + 32, lsb + 32);
 }
 
-bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
+static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
 {
 	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
 		return false;
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index acee37cde840..ca8f174289bb 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@ no_dcbz32_on:
 	PPC_LL	r8, SVCPU_CTR(r3)
 	PPC_LL	r9, SVCPU_LR(r3)
 	lwz	r10, SVCPU_CR(r3)
-	lwz	r11, SVCPU_XER(r3)
+	PPC_LL	r11, SVCPU_XER(r3)
 
 	mtctr	r8
 	mtlr	r9
@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	mfctr	r8
 	mflr	r9
 
-	stw	r5, SVCPU_XER(r13)
+	PPC_STL	r5, SVCPU_XER(r13)
 	PPC_STL	r6, SVCPU_FAULT_DAR(r13)
 	stw	r7, SVCPU_FAULT_DSISR(r13)
 	PPC_STL	r8, SVCPU_CTR(r13)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index cc5842657161..ae458f0fd061 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -933,6 +933,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
 #endif
 		break;
 	case BOOKE_INTERRUPT_CRITICAL:
+		kvmppc_fill_pt_regs(&regs);
 		unknown_exception(&regs);
 		break;
 	case BOOKE_INTERRUPT_DEBUG:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 50860e919cb8..29911a07bcdb 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -377,7 +377,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
 			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
 		vcpu->arch.shared->mas1 =
 			  (vcpu->arch.shared->mas6 & MAS6_SPID0)
-			| (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+			| ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
 			| (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
 		vcpu->arch.shared->mas2 &= MAS2_EPN;
 		vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e5dde32fe71f..2e51289610e4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -660,7 +660,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 	return kvmppc_core_pending_dec(vcpu);
 }
 
-enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
 {
 	struct kvm_vcpu *vcpu;
 
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index dc5385ebb071..5ad26dd94d77 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -3,5 +3,6 @@
 generic-y += clkdev.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index cfad7fca01d6..d7697ab802f6 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -57,7 +57,10 @@ union ctlreg0 {
 		unsigned long lap  : 1; /* Low-address-protection control */
 		unsigned long	   : 4;
 		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
-		unsigned long	   : 23;
+		unsigned long	   : 4;
+		unsigned long afp  : 1; /* AFP-register control */
+		unsigned long vx   : 1; /* Vector enablement control */
+		unsigned long	   : 17;
 	};
 };
 
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
index 629b79a93165..f7e5c36688c3 100644
--- a/arch/s390/include/asm/etr.h
+++ b/arch/s390/include/asm/etr.h
@@ -214,6 +214,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
 void etr_switch_to_local(void);
 void etr_sync_check(void);
 
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
 /* STP interruption parameter */
 struct stp_irq_parm {
 	unsigned int _pad0	: 14;
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 0130d0379edd..d9be7c0c1291 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -14,6 +14,7 @@
 
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range			free_pgd_range
+#define hugepages_supported()			(MACHINE_HAS_HPAGE)
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3024acbe1f9d..df4db81254d3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -258,6 +258,9 @@ struct kvm_vcpu_stat {
 	u32 diagnose_10;
 	u32 diagnose_44;
 	u32 diagnose_9c;
+	u32 diagnose_258;
+	u32 diagnose_308;
+	u32 diagnose_500;
 };
 
 #define PGM_OPERATION			0x01
@@ -630,7 +633,6 @@ extern char sie_exit;
 
 static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
-static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
diff --git a/arch/s390/include/asm/mm-arch-hooks.h b/arch/s390/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 07680b2f3c59..000000000000
--- a/arch/s390/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_S390_MM_ARCH_HOOKS_H
-#define _ASM_S390_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_S390_MM_ARCH_HOOKS_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index dd345238d9a7..53eacbd4f09b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -17,10 +17,7 @@
 #define PAGE_DEFAULT_ACC	0
 #define PAGE_DEFAULT_KEY	(PAGE_DEFAULT_ACC << 4)
 
-#include <asm/setup.h>
-#ifndef __ASSEMBLY__
-
-extern int HPAGE_SHIFT;
+#define HPAGE_SHIFT	20
 #define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
@@ -30,6 +27,9 @@ extern int HPAGE_SHIFT;
 #define ARCH_HAS_PREPARE_HUGEPAGE
 #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
 
+#include <asm/setup.h>
+#ifndef __ASSEMBLY__
+
 static inline void storage_key_init_range(unsigned long start, unsigned long end)
 {
 #if PAGE_DEFAULT_KEY
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 4cb19fe76dd9..f897ec73dc8c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -87,7 +87,15 @@ struct sf_raw_sample {
 } __packed;
 
 /* Perf hardware reserve and release functions */
+#ifdef CONFIG_PERF_EVENTS
 int perf_reserve_sampling(void);
 void perf_release_sampling(void);
+#else /* CONFIG_PERF_EVENTS */
+static inline int perf_reserve_sampling(void)
+{
+	return 0;
+}
+static inline void perf_release_sampling(void) {}
+#endif /* CONFIG_PERF_EVENTS */
 
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c7d1b9d09011..a2da259d9327 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -23,15 +23,15 @@
 
 int main(void)
 {
-	DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
-	DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
-	DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
-	BLANK();
+	DEFINE(__TASK_thread_info, offsetof(struct task_struct, stack));
+	DEFINE(__TASK_thread, offsetof(struct task_struct, thread));
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
-	DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
-	DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
-	DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
+	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
+	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
+	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
+	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
+	DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
 	BLANK();
 	DEFINE(__TI_task, offsetof(struct thread_info, task));
 	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
@@ -176,7 +176,6 @@ int main(void)
 	DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
 	DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
 	DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
-	DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
 	DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
 	DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
 	DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3238893c9d4f..84062e7a77da 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -178,17 +178,21 @@ _PIF_WORK	= (_PIF_PER_TRAP)
  */
 ENTRY(__switch_to)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
-	stg	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
-	lg	%r4,__THREAD_info(%r2)		# get thread_info of prev
-	lg	%r5,__THREAD_info(%r3)		# get thread_info of next
+	lgr	%r1,%r2
+	aghi	%r1,__TASK_thread		# thread_struct of prev task
+	lg	%r4,__TASK_thread_info(%r2)	# get thread_info of prev
+	lg	%r5,__TASK_thread_info(%r3)	# get thread_info of next
+	stg	%r15,__THREAD_ksp(%r1)		# store kernel stack of prev
+	lgr	%r1,%r3
+	aghi	%r1,__TASK_thread		# thread_struct of next task
 	lgr	%r15,%r5
 	aghi	%r15,STACK_INIT			# end of kernel stack of next
 	stg	%r3,__LC_CURRENT		# store task struct of next
 	stg	%r5,__LC_THREAD_INFO		# store thread info of next
 	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
+	lg	%r15,__THREAD_ksp(%r1)		# load kernel stack of next
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
-	lg	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	br	%r14
 
@@ -417,6 +421,7 @@ ENTRY(pgm_check_handler)
 	LAST_BREAK %r14
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r14,__TI_task(%r12)
+	aghi	%r14,__TASK_thread	# pointer to thread_struct
 	lghi	%r13,__LC_PGM_TDB
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
 	jz	2f
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 505c17c0ae1a..56b550893593 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -21,6 +21,7 @@
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/switch_to.h>
+#include <asm/ctl_reg.h>
 
 struct mcck_struct {
 	int kill_task;
@@ -129,26 +130,30 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 	} else
 		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
-	asm volatile(
-		"	ld	0,0(%0)\n"
-		"	ld	1,8(%0)\n"
-		"	ld	2,16(%0)\n"
-		"	ld	3,24(%0)\n"
-		"	ld	4,32(%0)\n"
-		"	ld	5,40(%0)\n"
-		"	ld	6,48(%0)\n"
-		"	ld	7,56(%0)\n"
-		"	ld	8,64(%0)\n"
-		"	ld	9,72(%0)\n"
-		"	ld	10,80(%0)\n"
-		"	ld	11,88(%0)\n"
-		"	ld	12,96(%0)\n"
-		"	ld	13,104(%0)\n"
-		"	ld	14,112(%0)\n"
-		"	ld	15,120(%0)\n"
-		: : "a" (fpt_save_area));
-	/* Revalidate vector registers */
-	if (MACHINE_HAS_VX && current->thread.vxrs) {
+	if (!MACHINE_HAS_VX) {
+		/* Revalidate floating point registers */
+		asm volatile(
+			"	ld	0,0(%0)\n"
+			"	ld	1,8(%0)\n"
+			"	ld	2,16(%0)\n"
+			"	ld	3,24(%0)\n"
+			"	ld	4,32(%0)\n"
+			"	ld	5,40(%0)\n"
+			"	ld	6,48(%0)\n"
+			"	ld	7,56(%0)\n"
+			"	ld	8,64(%0)\n"
+			"	ld	9,72(%0)\n"
+			"	ld	10,80(%0)\n"
+			"	ld	11,88(%0)\n"
+			"	ld	12,96(%0)\n"
+			"	ld	13,104(%0)\n"
+			"	ld	14,112(%0)\n"
+			"	ld	15,120(%0)\n"
+			: : "a" (fpt_save_area));
+	} else {
+		/* Revalidate vector registers */
+		union ctlreg0 cr0;
+
 		if (!mci->vr) {
 			/*
 			 * Vector registers can't be restored and therefore
@@ -156,8 +161,12 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			 */
 			kill_task = 1;
 		}
+		cr0.val = S390_lowcore.cregs_save_area[0];
+		cr0.afp = cr0.vx = 1;
+		__ctl_load(cr0.val, 0, 0);
 		restore_vx_regs((__vector128 *)
-				S390_lowcore.vector_save_area_addr);
+				&S390_lowcore.vector_save_area);
+		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
 	/* Revalidate access registers */
 	asm volatile(
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index dc5edc29b73a..8f587d871b9f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 asmlinkage void execve_tail(void)
 {
 	current->thread.fp_regs.fpc = 0;
-	asm volatile("sfpc %0,%0" : : "d" (0));
+	asm volatile("sfpc %0" : : "d" (0));
 }
 
 /*
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index 43c3169ea49c..ada0c07fe1a8 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -270,6 +270,8 @@ ENTRY(_sclp_print_early)
 	jno	.Lesa2
 	ahi	%r15,-80
 	stmh	%r6,%r15,96(%r15)		# store upper register halves
+	basr	%r13,0
+	lmh	%r0,%r15,.Lzeroes-.(%r13)	# clear upper register halves
 .Lesa2:
 	lr	%r10,%r2			# save string pointer
 	lhi	%r2,0
@@ -291,6 +293,8 @@ ENTRY(_sclp_print_early)
 .Lesa3:
 	lm	%r6,%r15,120(%r15)		# restore registers
 	br	%r14
+.Lzeroes:
+	.fill	64,4,0
 
 .LwritedataS4:
 	.long	0x00760005			# SCLP command for write data
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f7f027caaaaa..ca070d260af2 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -885,8 +885,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	setup_hwcaps();
 
-	HPAGE_SHIFT = MACHINE_HAS_HPAGE ? 20 : 0;
-
 	/*
 	 * Create kernel page tables and switch to virtual addressing.
 	 */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9e733d965e08..627887b075a7 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -58,6 +58,9 @@ EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -752,7 +755,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
 static int etr_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long clock, old_clock, delay, delta;
+	unsigned long long clock, old_clock, clock_delta, delay, delta;
 	struct clock_sync_data *etr_sync;
 	struct etr_aib *sync_port, *aib;
 	int port;
@@ -789,6 +792,9 @@ static int etr_sync_clock(void *data)
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
 		delta = adjust_time(old_clock, clock, delay);
+		clock_delta = clock - old_clock;
+		atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
+					   &clock_delta);
 		etr_sync->fixup_cc = delta;
 		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
@@ -1526,7 +1532,7 @@ void stp_island_check(void)
 static int stp_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long old_clock, delta;
+	unsigned long long old_clock, delta, new_clock, clock_delta;
 	struct clock_sync_data *stp_sync;
 	int rc;
 
@@ -1551,7 +1557,11 @@ static int stp_sync_clock(void *data)
 		old_clock = get_tod_clock();
 		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
 		if (rc == 0) {
-			delta = adjust_time(old_clock, get_tod_clock(), 0);
+			new_clock = get_tod_clock();
+			delta = adjust_time(old_clock, new_clock, 0);
+			clock_delta = new_clock - old_clock;
+			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
+						   0, &clock_delta);
 			fixup_clock_comparator(delta);
 			rc = chsc_sstpi(stp_page, &stp_info,
 					sizeof(struct stp_sstpi));
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 4d96c9f53455..7bea81d8a363 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -259,7 +259,7 @@ void vector_exception(struct pt_regs *regs)
 	}
 
 	/* get vector interrupt code from fpc */
-	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
 	vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
 	switch (vic) {
 	case 1: /* invalid vector operation */
@@ -297,7 +297,7 @@ void data_exception(struct pt_regs *regs)
 
 	location = get_trap_ip(regs);
 
-	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
 	/* Check for vector register enablement */
 	if (MACHINE_HAS_VX && !current->thread.vxrs &&
 	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index fc7ec95848c3..5fbfb88f8477 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,13 +27,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+	vcpu->stat.diagnose_10++;
 
 	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
 	    || start < 2 * PAGE_SIZE)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
-	vcpu->stat.diagnose_10++;
 
 	/*
 	 * We checked for start >= end above, so lets check for the
@@ -75,6 +75,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
 	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
 
+	VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
+		   vcpu->run->s.regs.gprs[rx]);
+	vcpu->stat.diagnose_258++;
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -85,6 +88,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
 	switch (parm.subcode) {
 	case 0: /* TOKEN */
+		VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx "
+			   "select mask 0x%llx compare mask 0x%llx",
+			   parm.token_addr, parm.select_mask, parm.compare_mask);
 		if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
 			/*
 			 * If the pagefault handshake is already activated,
@@ -114,6 +120,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 		 * the cancel, therefore to reduce code complexity, we assume
 		 * all outstanding tokens are already pending.
 		 */
+		VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr);
 		if (parm.token_addr || parm.select_mask ||
 		    parm.compare_mask || parm.zarch)
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -174,7 +181,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
 	unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
 
-	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+	VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
+	vcpu->stat.diagnose_308++;
 	switch (subcode) {
 	case 3:
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -202,6 +210,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 {
 	int ret;
 
+	vcpu->stat.diagnose_500++;
 	/* No virtio-ccw notification? Get out quickly. */
 	if (!vcpu->kvm->arch.css_support ||
 	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index e97b3455d7e6..47518a324d75 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -473,10 +473,45 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->iprcc &= ~PGM_PER;
 }
 
+#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
+#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH)
+#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
+#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
+
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
 {
+	int new_as;
+
 	if (debug_exit_required(vcpu))
 		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
 
 	filter_guest_per_event(vcpu);
+
+	/*
+	 * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
+	 * a space-switch event. PER events enforce space-switch events
+	 * for these instructions. So if no PER event for the guest is left,
+	 * we might have to filter the space-switch element out, too.
+	 */
+	if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) {
+		vcpu->arch.sie_block->iprcc = 0;
+		new_as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+
+		/*
+		 * If the AS changed from / to home, we had RP, SAC or SACF
+		 * instruction. Check primary and home space-switch-event
+		 * controls. (theoretically home -> home produced no event)
+		 */
+		if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) &&
+		     (pssec(vcpu) || hssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+
+		/*
+		 * PT, PTI, PR, PC instruction operate on primary AS only. Check
+		 * if the primary-space-switch-event control was or got set.
+		 */
+		if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) &&
+		    (pssec(vcpu) || old_ssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+	}
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c98d89708e99..b277d50dcf76 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -30,7 +30,6 @@
 #define IOINT_SCHID_MASK 0x0000ffff
 #define IOINT_SSID_MASK 0x00030000
 #define IOINT_CSSID_MASK 0x03fc0000
-#define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
@@ -72,9 +71,13 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
+	preempt_disable();
 	if (!(vcpu->arch.sie_block->ckc <
-	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
+		preempt_enable();
 		return 0;
+	}
+	preempt_enable();
 	return ckc_interrupts_enabled(vcpu);
 }
 
@@ -311,8 +314,8 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
 	li->irq.ext.ext_params2 = 0;
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
-		   0, ext.ext_params2);
+	VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
+		   ext.ext_params2);
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_INT_PFAULT_INIT,
 					 0, ext.ext_params2);
@@ -368,7 +371,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 	spin_unlock(&fi->lock);
 
 	if (deliver) {
-		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
 			   mchk.mcic);
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 						 KVM_S390_MCHK,
@@ -403,7 +406,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc;
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
 	vcpu->stat.deliver_restart_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
@@ -427,7 +430,6 @@ static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
 	vcpu->stat.deliver_prefix_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_SIGP_SET_PREFIX,
@@ -450,7 +452,7 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
 		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
 	vcpu->stat.deliver_emergency_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
 					 cpu_addr, 0);
@@ -477,7 +479,7 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
 	vcpu->stat.deliver_external_call++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_INT_EXTERNAL_CALL,
@@ -506,7 +508,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	memset(&li->irq.pgm, 0, sizeof(pgm_info));
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+	VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
 		   pgm_info.code, ilc);
 	vcpu->stat.deliver_program_int++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
@@ -622,7 +624,7 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
 	spin_unlock(&fi->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+	VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
 		   ext.ext_params);
 	vcpu->stat.deliver_service_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
@@ -651,9 +653,6 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 					struct kvm_s390_interrupt_info,
 					list);
 	if (inti) {
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
-				KVM_S390_INT_PFAULT_DONE, 0,
-				inti->ext.ext_params2);
 		list_del(&inti->list);
 		fi->counters[FIRQ_CNTR_PFAULT] -= 1;
 	}
@@ -662,6 +661,12 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 	spin_unlock(&fi->lock);
 
 	if (inti) {
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_INT_PFAULT_DONE, 0,
+						 inti->ext.ext_params2);
+		VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
+			   inti->ext.ext_params2);
+
 		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
 				(u16 *)__LC_EXT_INT_CODE);
 		rc |= put_guest_lc(vcpu, PFAULT_DONE,
@@ -691,7 +696,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 					list);
 	if (inti) {
 		VCPU_EVENT(vcpu, 4,
-			   "interrupt: virtio parm:%x,parm64:%llx",
+			   "deliver: virtio parm: 0x%x,parm64: 0x%llx",
 			   inti->ext.ext_params, inti->ext.ext_params2);
 		vcpu->stat.deliver_virtio_interrupt++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
@@ -741,7 +746,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 					struct kvm_s390_interrupt_info,
 					list);
 	if (inti) {
-		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+		VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
 		vcpu->stat.deliver_io_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 				inti->type,
@@ -855,7 +860,9 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		goto no_timer;
 	}
 
+	preempt_disable();
 	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	preempt_enable();
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/* underflow */
@@ -864,7 +871,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
 	__set_cpu_idle(vcpu);
 	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
-	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
+	VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
 no_timer:
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	kvm_vcpu_block(vcpu);
@@ -894,7 +901,9 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 	u64 now, sltime;
 
 	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+	preempt_disable();
 	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	preempt_enable();
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/*
@@ -968,6 +977,10 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+				   irq->u.pgm.code, 0);
+
 	li->irq.pgm = irq->u.pgm;
 	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
 	return 0;
@@ -978,9 +991,6 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_irq irq;
 
-	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
-				   0, 1);
 	spin_lock(&li->lock);
 	irq.u.pgm.code = code;
 	__inject_prog(vcpu, &irq);
@@ -996,10 +1006,6 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 	struct kvm_s390_irq irq;
 	int rc;
 
-	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
-		   pgm_info->code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
-				   pgm_info->code, 0, 1);
 	spin_lock(&li->lock);
 	irq.u.pgm = *pgm_info;
 	rc = __inject_prog(vcpu, &irq);
@@ -1012,11 +1018,11 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
-		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
+		   irq->u.ext.ext_params2);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
 				   irq->u.ext.ext_params,
-				   irq->u.ext.ext_params2, 2);
+				   irq->u.ext.ext_params2);
 
 	li->irq.ext = irq->u.ext;
 	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
@@ -1045,10 +1051,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
 	uint16_t src_id = irq->u.extcall.code;
 
-	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+	VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
 		   src_id);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
-				   src_id, 0, 2);
+				   src_id, 0);
 
 	/* sending vcpu invalid */
 	if (src_id >= KVM_MAX_VCPUS ||
@@ -1070,10 +1076,10 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
 
-	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
 		   irq->u.prefix.address);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
-				   irq->u.prefix.address, 0, 2);
+				   irq->u.prefix.address, 0);
 
 	if (!is_vcpu_stopped(vcpu))
 		return -EBUSY;
@@ -1090,7 +1096,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_stop_info *stop = &li->irq.stop;
 	int rc = 0;
 
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
 
 	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
 		return -EINVAL;
@@ -1114,8 +1120,8 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
 	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
 	return 0;
@@ -1126,10 +1132,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+	VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
 		   irq->u.emerg.code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
-				   irq->u.emerg.code, 0, 2);
+				   irq->u.emerg.code, 0);
 
 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
@@ -1142,10 +1148,10 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
 
-	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+	VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
 		   irq->u.mchk.mcic);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
-				   irq->u.mchk.mcic, 2);
+				   irq->u.mchk.mcic);
 
 	/*
 	 * Because repressible machine checks can be indicated along with
@@ -1172,9 +1178,9 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
-				   0, 0, 2);
+				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1185,9 +1191,9 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
-				   0, 0, 2);
+				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1435,20 +1441,20 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_INT_SERVICE:
-		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+		VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
 		inti->ext.ext_params = s390int->parm;
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_MCHK:
-		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+		VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
 			 s390int->parm64);
 		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
 		inti->mchk.mcic = s390int->parm64;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (inti->type & IOINT_AI_MASK)
+		if (inti->type & KVM_S390_INT_IO_AI_MASK)
 			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
 		else
 			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
@@ -1535,8 +1541,6 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
 	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
-		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-			   irq->u.pgm.code);
 		rc = __inject_prog(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_SET_PREFIX:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2078f92d15ac..397b88dc1b2c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
 #include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
+#include <asm/etr.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
@@ -108,6 +109,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
+	{ "diagnose_258", VCPU_STAT(diagnose_258) },
+	{ "diagnose_308", VCPU_STAT(diagnose_308) },
+	{ "diagnose_500", VCPU_STAT(diagnose_500) },
 	{ NULL }
 };
 
@@ -124,6 +128,7 @@ unsigned long kvm_s390_fac_list_mask_size(void)
 }
 
 static struct gmap_notifier gmap_notifier;
+debug_info_t *kvm_s390_dbf;
 
 /* Section: not file related */
 int kvm_arch_hardware_enable(void)
@@ -134,24 +139,69 @@ int kvm_arch_hardware_enable(void)
 
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 
+/*
+ * This callback is executed during stop_machine(). All CPUs are therefore
+ * temporarily stopped. In order not to change guest behavior, we have to
+ * disable preemption whenever we touch the epoch of kvm and the VCPUs,
+ * so a CPU won't be stopped while calculating with the epoch.
+ */
+static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
+			  void *v)
+{
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int i;
+	unsigned long long *delta = v;
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		kvm->arch.epoch -= *delta;
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			vcpu->arch.sie_block->epoch -= *delta;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_clock_notifier = {
+	.notifier_call = kvm_clock_sync,
+};
+
 int kvm_arch_hardware_setup(void)
 {
 	gmap_notifier.notifier_call = kvm_gmap_notifier;
 	gmap_register_ipte_notifier(&gmap_notifier);
+	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+				       &kvm_clock_notifier);
 	return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
 {
 	gmap_unregister_ipte_notifier(&gmap_notifier);
+	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+					 &kvm_clock_notifier);
 }
 
 int kvm_arch_init(void *opaque)
 {
+	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
+	if (!kvm_s390_dbf)
+		return -ENOMEM;
+
+	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
+		debug_unregister(kvm_s390_dbf);
+		return -ENOMEM;
+	}
+
 	/* Register floating interrupt controller interface. */
 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
 
+void kvm_arch_exit(void)
+{
+	debug_unregister(kvm_s390_dbf);
+}
+
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
@@ -281,10 +331,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 
 	switch (cap->cap) {
 	case KVM_CAP_S390_IRQCHIP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 		kvm->arch.use_irqchip = 1;
 		r = 0;
 		break;
 	case KVM_CAP_S390_USER_SIGP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 		kvm->arch.user_sigp = 1;
 		r = 0;
 		break;
@@ -295,8 +347,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 			r = 0;
 		} else
 			r = -EINVAL;
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
+			 r ? "(not available)" : "(success)");
 		break;
 	case KVM_CAP_S390_USER_STSI:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 		kvm->arch.user_stsi = 1;
 		r = 0;
 		break;
@@ -314,6 +369,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	switch (attr->attr) {
 	case KVM_S390_VM_MEM_LIMIT_SIZE:
 		ret = 0;
+		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
+			 kvm->arch.gmap->asce_end);
 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 			ret = -EFAULT;
 		break;
@@ -330,7 +387,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	unsigned int idx;
 	switch (attr->attr) {
 	case KVM_S390_VM_MEM_ENABLE_CMMA:
+		/* enable CMMA only for z10 and later (EDAT_1) */
+		ret = -EINVAL;
+		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+			break;
+
 		ret = -EBUSY;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 		mutex_lock(&kvm->lock);
 		if (atomic_read(&kvm->online_vcpus) == 0) {
 			kvm->arch.use_cmma = 1;
@@ -339,6 +402,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		mutex_unlock(&kvm->lock);
 		break;
 	case KVM_S390_VM_MEM_CLR_CMMA:
+		ret = -EINVAL;
+		if (!kvm->arch.use_cmma)
+			break;
+
+		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 		mutex_lock(&kvm->lock);
 		idx = srcu_read_lock(&kvm->srcu);
 		s390_reset_cmma(kvm->arch.gmap->mm);
@@ -374,6 +442,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 			}
 		}
 		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 		break;
 	}
 	default:
@@ -400,22 +469,26 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 		kvm->arch.crypto.aes_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 		get_random_bytes(
 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 		kvm->arch.crypto.dea_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 		kvm->arch.crypto.aes_kw = 0;
 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 		kvm->arch.crypto.dea_kw = 0;
 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 		break;
 	default:
 		mutex_unlock(&kvm->lock);
@@ -440,6 +513,7 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
 	if (gtod_high != 0)
 		return -EINVAL;
+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
 
 	return 0;
 }
@@ -459,12 +533,15 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 		return r;
 
 	mutex_lock(&kvm->lock);
+	preempt_disable();
 	kvm->arch.epoch = gtod - host_tod;
 	kvm_s390_vcpu_block_all(kvm);
 	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
 		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
 	kvm_s390_vcpu_unblock_all(kvm);
+	preempt_enable();
 	mutex_unlock(&kvm->lock);
+	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
 	return 0;
 }
 
@@ -496,6 +573,7 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
 					 sizeof(gtod_high)))
 		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
 
 	return 0;
 }
@@ -509,9 +587,12 @@ static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (r)
 		return r;
 
+	preempt_disable();
 	gtod = host_tod + kvm->arch.epoch;
+	preempt_enable();
 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
 
 	return 0;
 }
@@ -821,7 +902,9 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 	}
 
 	/* Enable storage key handling for the guest */
-	s390_enable_skey();
+	r = s390_enable_skey();
+	if (r)
+		goto out;
 
 	for (i = 0; i < args->count; i++) {
 		hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -879,8 +962,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->arch.use_irqchip) {
 			/* Set up dummy routing. */
 			memset(&routing, 0, sizeof(routing));
-			kvm_set_irq_routing(kvm, &routing, 0, 0);
-			r = 0;
+			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 		}
 		break;
 	}
@@ -1043,7 +1125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	sprintf(debug_name, "kvm-%u", current->pid);
 
-	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
 	if (!kvm->arch.dbf)
 		goto out_err;
 
@@ -1086,7 +1168,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	mutex_init(&kvm->arch.ipte_mutex);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
-	VM_EVENT(kvm, 3, "%s", "vm created");
+	VM_EVENT(kvm, 3, "vm created with type %lu", type);
 
 	if (type & KVM_VM_S390_UCONTROL) {
 		kvm->arch.gmap = NULL;
@@ -1103,6 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
+	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
 
 	return 0;
 out_err:
@@ -1110,6 +1193,7 @@ out_err:
 	free_page((unsigned long)kvm->arch.model.fac);
 	debug_unregister(kvm->arch.dbf);
 	free_page((unsigned long)(kvm->arch.sca));
+	KVM_EVENT(3, "creation of vm failed: %d", rc);
 	return rc;
 }
 
@@ -1131,7 +1215,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_free(vcpu->arch.gmap);
 
-	if (kvm_s390_cmma_enabled(vcpu->kvm))
+	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 
@@ -1166,6 +1250,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
+	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
 }
 
 /* Section: vcpu related */
@@ -1264,7 +1349,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 {
 	mutex_lock(&vcpu->kvm->lock);
+	preempt_disable();
 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 	if (!kvm_is_ucontrol(vcpu->kvm))
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
@@ -1342,7 +1429,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	}
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
-	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+	if (vcpu->kvm->arch.use_cmma) {
 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
 		if (rc)
 			return rc;
@@ -1723,18 +1810,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-bool kvm_s390_cmma_enabled(struct kvm *kvm)
-{
-	if (!MACHINE_IS_LPAR)
-		return false;
-	/* only enable for z10 and later */
-	if (!MACHINE_HAS_EDAT1)
-		return false;
-	if (!kvm->arch.use_cmma)
-		return false;
-	return true;
-}
-
 static bool ibs_enabled(struct kvm_vcpu *vcpu)
 {
 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
@@ -2340,6 +2415,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	case KVM_CAP_S390_CSS_SUPPORT:
 		if (!vcpu->kvm->arch.css_support) {
 			vcpu->kvm->arch.css_support = 1;
+			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
 			trace_kvm_s390_enable_css(vcpu->kvm);
 		}
 		r = 0;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c5704786e473..c446aabf60d3 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -27,6 +27,13 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 #define TDB_FORMAT1		1
 #define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
 
+extern debug_info_t *kvm_s390_dbf;
+#define KVM_EVENT(d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
 do { \
 	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -65,6 +72,8 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
 
 static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 {
+	VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id,
+		   prefix);
 	vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
@@ -217,8 +226,6 @@ void exit_sie(struct kvm_vcpu *vcpu);
 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
-/* is cmma enabled */
-bool kvm_s390_cmma_enabled(struct kvm *kvm);
 unsigned long kvm_s390_fac_list_mask_size(void);
 extern unsigned long kvm_s390_fac_list_mask[];
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ad4242245771..4d21dc4d1a84 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -53,11 +53,14 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 		kvm_s390_set_psw_cc(vcpu, 3);
 		return 0;
 	}
+	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
 	val = (val - hostclk) & ~0x3fUL;
 
 	mutex_lock(&vcpu->kvm->lock);
+	preempt_disable();
 	kvm_for_each_vcpu(i, cpup, vcpu->kvm)
 		cpup->arch.sie_block->epoch = val;
+	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 
 	kvm_s390_set_psw_cc(vcpu, 0);
@@ -98,8 +101,6 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	kvm_s390_set_prefix(vcpu, address);
-
-	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
 	trace_kvm_s390_handle_prefix(vcpu, 1, address);
 	return 0;
 }
@@ -129,7 +130,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+	VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2);
 	trace_kvm_s390_handle_prefix(vcpu, 0, address);
 	return 0;
 }
@@ -155,7 +156,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+	VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga);
 	trace_kvm_s390_handle_stap(vcpu, ga);
 	return 0;
 }
@@ -167,6 +168,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu)
 		return rc;
 
 	rc = s390_enable_skey();
+	VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
 	trace_kvm_s390_skey_related_inst(vcpu);
 	vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
 	return rc;
@@ -370,7 +372,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 			    &fac, sizeof(fac));
 	if (rc)
 		return rc;
-	VCPU_EVENT(vcpu, 5, "store facility list value %x", fac);
+	VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac);
 	trace_kvm_s390_handle_stfl(vcpu, fac);
 	return 0;
 }
@@ -468,7 +470,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+	VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data);
 	return 0;
 }
 
@@ -521,7 +523,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	ar_t ar;
 
 	vcpu->stat.instruction_stsi++;
-	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+	VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -758,10 +760,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 	struct gmap *gmap;
 	int i;
 
-	VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+	VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
 	gmap = vcpu->arch.gmap;
 	vcpu->stat.instruction_essa++;
-	if (!kvm_s390_cmma_enabled(vcpu->kvm))
+	if (!vcpu->kvm->arch.use_cmma)
 		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -829,7 +831,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -868,7 +870,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
@@ -902,7 +904,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -940,7 +942,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
 	reg = reg1;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 72e58bd2bee7..da690b69f9fe 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -205,9 +205,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
 		return SIGP_CC_STATUS_STORED;
-	} else if (rc == 0) {
-		VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
-			   dst_vcpu->vcpu_id, irq.u.prefix.address);
 	}
 
 	return rc;
@@ -371,7 +368,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
 	return rc;
 }
 
-static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code,
+					   u16 cpu_addr)
 {
 	if (!vcpu->kvm->arch.user_sigp)
 		return 0;
@@ -414,9 +412,8 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
 	default:
 		vcpu->stat.instruction_sigp_unknown++;
 	}
-
-	VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
-		   order_code);
+	VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace",
+		   order_code, cpu_addr);
 
 	return 1;
 }
@@ -435,7 +432,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
 	order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
-	if (handle_sigp_order_in_user_space(vcpu, order_code))
+	if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr))
 		return -EOPNOTSUPP;
 
 	if (r1 % 2)
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 3208d33a48cb..cc1d6c68356f 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -105,11 +105,22 @@ TRACE_EVENT(kvm_s390_vcpu_start_stop,
 	{KVM_S390_PROGRAM_INT, "program interrupt"},			\
 	{KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"},			\
 	{KVM_S390_RESTART, "sigp restart"},				\
+	{KVM_S390_INT_PFAULT_INIT, "pfault init"},			\
+	{KVM_S390_INT_PFAULT_DONE, "pfault done"},			\
+	{KVM_S390_MCHK, "machine check"},				\
+	{KVM_S390_INT_CLOCK_COMP, "clock comparator"},			\
+	{KVM_S390_INT_CPU_TIMER, "cpu timer"},				\
 	{KVM_S390_INT_VIRTIO, "virtio interrupt"},			\
 	{KVM_S390_INT_SERVICE, "sclp interrupt"},			\
 	{KVM_S390_INT_EMERGENCY, "sigp emergency"},			\
 	{KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
 
+#define get_irq_name(__type) \
+	(__type > KVM_S390_INT_IO_MAX ? \
+	__print_symbolic(__type, kvm_s390_int_type) : \
+		(__type & KVM_S390_INT_IO_AI_MASK ? \
+		 "adapter I/O interrupt" : "subchannel I/O interrupt"))
+
 TRACE_EVENT(kvm_s390_inject_vm,
 	    TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
 	    TP_ARGS(type, parm, parm64, who),
@@ -131,22 +142,19 @@ TRACE_EVENT(kvm_s390_inject_vm,
 	    TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
 		      (__entry->who == 1) ? " (from kernel)" :
 		      (__entry->who == 2) ? " (from user)" : "",
-		      __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
+		      __entry->inttype, get_irq_name(__entry->inttype),
 		      __entry->parm, __entry->parm64)
 	);
 
 TRACE_EVENT(kvm_s390_inject_vcpu,
-	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
-		     int who),
-	    TP_ARGS(id, type, parm, parm64, who),
+	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64),
+	    TP_ARGS(id, type, parm, parm64),
 
 	    TP_STRUCT__entry(
 		    __field(int, id)
 		    __field(__u32, inttype)
 		    __field(__u32, parm)
 		    __field(__u64, parm64)
-		    __field(int, who)
 		    ),
 
 	    TP_fast_assign(
@@ -154,15 +162,12 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
 		    __entry->inttype = type & 0x00000000ffffffff;
 		    __entry->parm = parm;
 		    __entry->parm64 = parm64;
-		    __entry->who = who;
 		    ),
 
-	    TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
-		      (__entry->who == 1) ? " (from kernel)" :
-		      (__entry->who == 2) ? " (from user)" : "",
+	    TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
 		      __entry->id, __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
-		      __entry->parm, __entry->parm64)
+		      get_irq_name(__entry->inttype), __entry->parm,
+		      __entry->parm64)
 	);
 
 /*
@@ -189,8 +194,8 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
 	    TP_printk("deliver interrupt (vcpu %d): type:%x (%s) "	\
 		      "data:%08llx %016llx",
 		      __entry->id, __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
-		      __entry->data0, __entry->data1)
+		      get_irq_name(__entry->inttype), __entry->data0,
+		      __entry->data1)
 	);
 
 /*
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 33082d0d101b..b33f66110ca9 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -31,8 +31,6 @@
 #define ALLOC_ORDER	2
 #define FRAG_MASK	0x03
 
-int HPAGE_SHIFT;
-
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index bc927a09a172..9cfa2ffaa9d6 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <asm/processor.h>
+#include <asm/perf_event.h>
 
 #include "../../../drivers/oprofile/oprof.h"
 
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 138fb3db45ba..92ffe397b893 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
diff --git a/arch/score/include/asm/mm-arch-hooks.h b/arch/score/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 5e38689f189a..000000000000
--- a/arch/score/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_SCORE_MM_ARCH_HOOKS_H
-#define _ASM_SCORE_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_SCORE_MM_ARCH_HOOKS_H */
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 9ac4626e7284..aac452b26aa8 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += param.h
diff --git a/arch/sh/include/asm/mm-arch-hooks.h b/arch/sh/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 18087298b728..000000000000
--- a/arch/sh/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_SH_MM_ARCH_HOOKS_H
-#define _ASM_SH_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_SH_MM_ARCH_HOOKS_H */
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 2b2a69dcc467..e928618838bc 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += mutex.h
 generic-y += preempt.h
diff --git a/arch/sparc/include/asm/mm-arch-hooks.h b/arch/sparc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b89ba44c16f1..000000000000
--- a/arch/sparc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_SPARC_MM_ARCH_HOOKS_H
-#define _ASM_SPARC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_SPARC_MM_ARCH_HOOKS_H */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index d53654488c2c..d8a843163471 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += irq_regs.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += param.h
diff --git a/arch/tile/include/asm/mm-arch-hooks.h b/arch/tile/include/asm/mm-arch-hooks.h
deleted file mode 100644
index d1709ea774f7..000000000000
--- a/arch/tile/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_TILE_MM_ARCH_HOOKS_H
-#define _ASM_TILE_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_TILE_MM_ARCH_HOOKS_H */
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 3d63ff6f583f..149ec55f9c46 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mutex.h
 generic-y += param.h
 generic-y += pci.h
diff --git a/arch/um/include/asm/mm-arch-hooks.h b/arch/um/include/asm/mm-arch-hooks.h
deleted file mode 100644
index a7c8b0dfdd4e..000000000000
--- a/arch/um/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_UM_MM_ARCH_HOOKS_H
-#define _ASM_UM_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_UM_MM_ARCH_HOOKS_H */
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index d12b377b5a8b..1fc7a286dc6f 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -26,6 +26,7 @@ generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
 generic-y += msgbuf.h
diff --git a/arch/unicore32/include/asm/mm-arch-hooks.h b/arch/unicore32/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 4d79a850c509..000000000000
--- a/arch/unicore32/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_UNICORE32_MM_ARCH_HOOKS_H
-#define _ASM_UNICORE32_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_UNICORE32_MM_ARCH_HOOKS_H */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3dbb7e7909ca..b3a1a5d77d92 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -41,6 +41,7 @@ config X86
 	select ARCH_USE_CMPXCHG_LOCKREF		if X86_64
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION	if X86_32
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index a15893d17c55..d8c0d3266173 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -297,6 +297,18 @@ config OPTIMIZE_INLINING
 
 	  If unsure, say N.
 
+config DEBUG_ENTRY
+	bool "Debug low-level entry code"
+	depends on DEBUG_KERNEL
+	---help---
+	  This option enables sanity checks in x86's low-level entry code.
+	  Some of these sanity checks may slow down kernel entries and
+	  exits or otherwise impact performance.
+
+	  This is currently used to help test NMI code.
+
+	  If unsure, say N.
+
 config DEBUG_NMI_SELFTEST
 	bool "NMI Selftest"
 	depends on DEBUG_KERNEL && X86_LOCAL_APIC
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3bb2c4302df1..8cb3e438f21e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1237,11 +1237,12 @@ ENTRY(nmi)
 	 *  If the variable is not set and the stack is not the NMI
 	 *  stack then:
 	 *    o Set the special variable on the stack
-	 *    o Copy the interrupt frame into a "saved" location on the stack
-	 *    o Copy the interrupt frame into a "copy" location on the stack
+	 *    o Copy the interrupt frame into an "outermost" location on the
+	 *      stack
+	 *    o Copy the interrupt frame into an "iret" location on the stack
 	 *    o Continue processing the NMI
 	 *  If the variable is set or the previous stack is the NMI stack:
-	 *    o Modify the "copy" location to jump to the repeate_nmi
+	 *    o Modify the "iret" location to jump to the repeat_nmi
 	 *    o return back to the first NMI
 	 *
 	 * Now on exit of the first NMI, we first clear the stack variable
@@ -1250,31 +1251,151 @@ ENTRY(nmi)
 	 * a nested NMI that updated the copy interrupt stack frame, a
 	 * jump will be made to the repeat_nmi code that will handle the second
 	 * NMI.
+	 *
+	 * However, espfix prevents us from directly returning to userspace
+	 * with a single IRET instruction.  Similarly, IRET to user mode
+	 * can fault.  We therefore handle NMIs from user space like
+	 * other IST entries.
 	 */
 
 	/* Use %rdx as our temp variable throughout */
 	pushq	%rdx
 
+	testb	$3, CS-RIP+8(%rsp)
+	jz	.Lnmi_from_kernel
+
+	/*
+	 * NMI from user mode.  We need to run on the thread stack, but we
+	 * can't go through the normal entry paths: NMIs are masked, and
+	 * we don't want to enable interrupts, because then we'll end
+	 * up in an awkward situation in which IRQs are on but NMIs
+	 * are off.
+	 */
+
+	SWAPGS
+	cld
+	movq	%rsp, %rdx
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	pushq	5*8(%rdx)	/* pt_regs->ss */
+	pushq	4*8(%rdx)	/* pt_regs->rsp */
+	pushq	3*8(%rdx)	/* pt_regs->flags */
+	pushq	2*8(%rdx)	/* pt_regs->cs */
+	pushq	1*8(%rdx)	/* pt_regs->rip */
+	pushq   $-1		/* pt_regs->orig_ax */
+	pushq   %rdi		/* pt_regs->di */
+	pushq   %rsi		/* pt_regs->si */
+	pushq   (%rdx)		/* pt_regs->dx */
+	pushq   %rcx		/* pt_regs->cx */
+	pushq   %rax		/* pt_regs->ax */
+	pushq   %r8		/* pt_regs->r8 */
+	pushq   %r9		/* pt_regs->r9 */
+	pushq   %r10		/* pt_regs->r10 */
+	pushq   %r11		/* pt_regs->r11 */
+	pushq	%rbx		/* pt_regs->rbx */
+	pushq	%rbp		/* pt_regs->rbp */
+	pushq	%r12		/* pt_regs->r12 */
+	pushq	%r13		/* pt_regs->r13 */
+	pushq	%r14		/* pt_regs->r14 */
+	pushq	%r15		/* pt_regs->r15 */
+
+	/*
+	 * At this point we no longer need to worry about stack damage
+	 * due to nesting -- we're on the normal thread stack and we're
+	 * done with the NMI stack.
+	 */
+
+	movq	%rsp, %rdi
+	movq	$-1, %rsi
+	call	do_nmi
+
+	/*
+	 * Return back to user mode.  We must *not* do the normal exit
+	 * work, because we don't want to enable interrupts.  Fortunately,
+	 * do_nmi doesn't modify pt_regs.
+	 */
+	SWAPGS
+	jmp	restore_c_regs_and_iret
+
+.Lnmi_from_kernel:
+	/*
+	 * Here's what our stack frame will look like:
+	 * +---------------------------------------------------------+
+	 * | original SS                                             |
+	 * | original Return RSP                                     |
+	 * | original RFLAGS                                         |
+	 * | original CS                                             |
+	 * | original RIP                                            |
+	 * +---------------------------------------------------------+
+	 * | temp storage for rdx                                    |
+	 * +---------------------------------------------------------+
+	 * | "NMI executing" variable                                |
+	 * +---------------------------------------------------------+
+	 * | iret SS          } Copied from "outermost" frame        |
+	 * | iret Return RSP  } on each loop iteration; overwritten  |
+	 * | iret RFLAGS      } by a nested NMI to force another     |
+	 * | iret CS          } iteration if needed.                 |
+	 * | iret RIP         }                                      |
+	 * +---------------------------------------------------------+
+	 * | outermost SS          } initialized in first_nmi;       |
+	 * | outermost Return RSP  } will not be changed before      |
+	 * | outermost RFLAGS      } NMI processing is done.         |
+	 * | outermost CS          } Copied to "iret" frame on each  |
+	 * | outermost RIP         } iteration.                      |
+	 * +---------------------------------------------------------+
+	 * | pt_regs                                                 |
+	 * +---------------------------------------------------------+
+	 *
+	 * The "original" frame is used by hardware.  Before re-enabling
+	 * NMIs, we need to be done with it, and we need to leave enough
+	 * space for the asm code here.
+	 *
+	 * We return by executing IRET while RSP points to the "iret" frame.
+	 * That will either return for real or it will loop back into NMI
+	 * processing.
+	 *
+	 * The "outermost" frame is copied to the "iret" frame on each
+	 * iteration of the loop, so each iteration starts with the "iret"
+	 * frame pointing to the final return target.
+	 */
+
 	/*
-	 * If %cs was not the kernel segment, then the NMI triggered in user
-	 * space, which means it is definitely not nested.
+	 * Determine whether we're a nested NMI.
+	 *
+	 * If we interrupted kernel code between repeat_nmi and
+	 * end_repeat_nmi, then we are a nested NMI.  We must not
+	 * modify the "iret" frame because it's being written by
+	 * the outer NMI.  That's okay; the outer NMI handler is
+	 * about to about to call do_nmi anyway, so we can just
+	 * resume the outer NMI.
 	 */
-	cmpl	$__KERNEL_CS, 16(%rsp)
-	jne	first_nmi
+
+	movq	$repeat_nmi, %rdx
+	cmpq	8(%rsp), %rdx
+	ja	1f
+	movq	$end_repeat_nmi, %rdx
+	cmpq	8(%rsp), %rdx
+	ja	nested_nmi_out
+1:
 
 	/*
-	 * Check the special variable on the stack to see if NMIs are
-	 * executing.
+	 * Now check "NMI executing".  If it's set, then we're nested.
+	 * This will not detect if we interrupted an outer NMI just
+	 * before IRET.
 	 */
 	cmpl	$1, -8(%rsp)
 	je	nested_nmi
 
 	/*
-	 * Now test if the previous stack was an NMI stack.
-	 * We need the double check. We check the NMI stack to satisfy the
-	 * race when the first NMI clears the variable before returning.
-	 * We check the variable because the first NMI could be in a
-	 * breakpoint routine using a breakpoint stack.
+	 * Now test if the previous stack was an NMI stack.  This covers
+	 * the case where we interrupt an outer NMI after it clears
+	 * "NMI executing" but before IRET.  We need to be careful, though:
+	 * there is one case in which RSP could point to the NMI stack
+	 * despite there being no NMI active: naughty userspace controls
+	 * RSP at the very beginning of the SYSCALL targets.  We can
+	 * pull a fast one on naughty userspace, though: we program
+	 * SYSCALL to mask DF, so userspace cannot cause DF to be set
+	 * if it controls the kernel's RSP.  We set DF before we clear
+	 * "NMI executing".
 	 */
 	lea	6*8(%rsp), %rdx
 	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
@@ -1286,25 +1407,20 @@ ENTRY(nmi)
 	cmpq	%rdx, 4*8(%rsp)
 	/* If it is below the NMI stack, it is a normal NMI */
 	jb	first_nmi
-	/* Ah, it is within the NMI stack, treat it as nested */
+
+	/* Ah, it is within the NMI stack. */
+
+	testb	$(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
+	jz	first_nmi	/* RSP was user controlled. */
+
+	/* This is a nested NMI. */
 
 nested_nmi:
 	/*
-	 * Do nothing if we interrupted the fixup in repeat_nmi.
-	 * It's about to repeat the NMI handler, so we are fine
-	 * with ignoring this one.
+	 * Modify the "iret" frame to point to repeat_nmi, forcing another
+	 * iteration of NMI handling.
 	 */
-	movq	$repeat_nmi, %rdx
-	cmpq	8(%rsp), %rdx
-	ja	1f
-	movq	$end_repeat_nmi, %rdx
-	cmpq	8(%rsp), %rdx
-	ja	nested_nmi_out
-
-1:
-	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
-	leaq	-1*8(%rsp), %rdx
-	movq	%rdx, %rsp
+	subq	$8, %rsp
 	leaq	-10*8(%rsp), %rdx
 	pushq	$__KERNEL_DS
 	pushq	%rdx
@@ -1318,61 +1434,42 @@ nested_nmi:
 nested_nmi_out:
 	popq	%rdx
 
-	/* No need to check faults here */
+	/* We are returning to kernel mode, so this cannot result in a fault. */
 	INTERRUPT_RETURN
 
 first_nmi:
-	/*
-	 * Because nested NMIs will use the pushed location that we
-	 * stored in rdx, we must keep that space available.
-	 * Here's what our stack frame will look like:
-	 * +-------------------------+
-	 * | original SS             |
-	 * | original Return RSP     |
-	 * | original RFLAGS         |
-	 * | original CS             |
-	 * | original RIP            |
-	 * +-------------------------+
-	 * | temp storage for rdx    |
-	 * +-------------------------+
-	 * | NMI executing variable  |
-	 * +-------------------------+
-	 * | copied SS               |
-	 * | copied Return RSP       |
-	 * | copied RFLAGS           |
-	 * | copied CS               |
-	 * | copied RIP              |
-	 * +-------------------------+
-	 * | Saved SS                |
-	 * | Saved Return RSP        |
-	 * | Saved RFLAGS            |
-	 * | Saved CS                |
-	 * | Saved RIP               |
-	 * +-------------------------+
-	 * | pt_regs                 |
-	 * +-------------------------+
-	 *
-	 * The saved stack frame is used to fix up the copied stack frame
-	 * that a nested NMI may change to make the interrupted NMI iret jump
-	 * to the repeat_nmi. The original stack frame and the temp storage
-	 * is also used by nested NMIs and can not be trusted on exit.
-	 */
-	/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
+	/* Restore rdx. */
 	movq	(%rsp), %rdx
 
-	/* Set the NMI executing variable on the stack. */
-	pushq	$1
+	/* Make room for "NMI executing". */
+	pushq	$0
 
-	/* Leave room for the "copied" frame */
+	/* Leave room for the "iret" frame */
 	subq	$(5*8), %rsp
 
-	/* Copy the stack frame to the Saved frame */
+	/* Copy the "original" frame to the "outermost" frame */
 	.rept 5
 	pushq	11*8(%rsp)
 	.endr
 
 	/* Everything up to here is safe from nested NMIs */
 
+#ifdef CONFIG_DEBUG_ENTRY
+	/*
+	 * For ease of testing, unmask NMIs right away.  Disabled by
+	 * default because IRET is very expensive.
+	 */
+	pushq	$0		/* SS */
+	pushq	%rsp		/* RSP (minus 8 because of the previous push) */
+	addq	$8, (%rsp)	/* Fix up RSP */
+	pushfq			/* RFLAGS */
+	pushq	$__KERNEL_CS	/* CS */
+	pushq	$1f		/* RIP */
+	INTERRUPT_RETURN	/* continues at repeat_nmi below */
+1:
+#endif
+
+repeat_nmi:
 	/*
 	 * If there was a nested NMI, the first NMI's iret will return
 	 * here. But NMIs are still enabled and we can take another
@@ -1381,16 +1478,20 @@ first_nmi:
 	 * it will just return, as we are about to repeat an NMI anyway.
 	 * This makes it safe to copy to the stack frame that a nested
 	 * NMI will update.
+	 *
+	 * RSP is pointing to "outermost RIP".  gsbase is unknown, but, if
+	 * we're repeating an NMI, gsbase has the same value that it had on
+	 * the first iteration.  paranoid_entry will load the kernel
+	 * gsbase if needed before we call do_nmi.  "NMI executing"
+	 * is zero.
 	 */
-repeat_nmi:
+	movq	$1, 10*8(%rsp)		/* Set "NMI executing". */
+
 	/*
-	 * Update the stack variable to say we are still in NMI (the update
-	 * is benign for the non-repeat case, where 1 was pushed just above
-	 * to this very stack slot).
+	 * Copy the "outermost" frame to the "iret" frame.  NMIs that nest
+	 * here must not modify the "iret" frame while we're writing to
+	 * it or it will end up containing garbage.
 	 */
-	movq	$1, 10*8(%rsp)
-
-	/* Make another copy, this one may be modified by nested NMIs */
 	addq	$(10*8), %rsp
 	.rept 5
 	pushq	-6*8(%rsp)
@@ -1399,9 +1500,9 @@ repeat_nmi:
 end_repeat_nmi:
 
 	/*
-	 * Everything below this point can be preempted by a nested
-	 * NMI if the first NMI took an exception and reset our iret stack
-	 * so that we repeat another NMI.
+	 * Everything below this point can be preempted by a nested NMI.
+	 * If this happens, then the inner NMI will change the "iret"
+	 * frame to point back to repeat_nmi.
 	 */
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	ALLOC_PT_GPREGS_ON_STACK
@@ -1415,28 +1516,11 @@ end_repeat_nmi:
 	 */
 	call	paranoid_entry
 
-	/*
-	 * Save off the CR2 register. If we take a page fault in the NMI then
-	 * it could corrupt the CR2 value. If the NMI preempts a page fault
-	 * handler before it was able to read the CR2 register, and then the
-	 * NMI itself takes a page fault, the page fault that was preempted
-	 * will read the information from the NMI page fault and not the
-	 * origin fault. Save it off and restore it if it changes.
-	 * Use the r12 callee-saved register.
-	 */
-	movq	%cr2, %r12
-
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq	%rsp, %rdi
 	movq	$-1, %rsi
 	call	do_nmi
 
-	/* Did the NMI take a page fault? Restore cr2 if it did */
-	movq	%cr2, %rcx
-	cmpq	%rcx, %r12
-	je	1f
-	movq	%r12, %cr2
-1:
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	nmi_restore
 nmi_swapgs:
@@ -1444,11 +1528,26 @@ nmi_swapgs:
 nmi_restore:
 	RESTORE_EXTRA_REGS
 	RESTORE_C_REGS
-	/* Pop the extra iret frame at once */
+
+	/* Point RSP at the "iret" frame. */
 	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
-	/* Clear the NMI executing stack variable */
-	movq	$0, 5*8(%rsp)
+	/*
+	 * Clear "NMI executing".  Set DF first so that we can easily
+	 * distinguish the remaining code between here and IRET from
+	 * the SYSCALL entry and exit paths.  On a native kernel, we
+	 * could just inspect RIP, but, on paravirt kernels,
+	 * INTERRUPT_RETURN can translate into a jump into a
+	 * hypercall page.
+	 */
+	std
+	movq	$0, 5*8(%rsp)		/* clear "NMI executing" */
+
+	/*
+	 * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
+	 * stack in a single instruction.  We are returning to kernel
+	 * mode, so this cannot result in a fault.
+	 */
 	INTERRUPT_RETURN
 END(nmi)
 
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4dd1f2d770af..aeac434c9feb 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -9,3 +9,4 @@ generic-y += cputime.h
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 0637826292de..c49c5173158e 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -189,6 +189,7 @@ union fpregs_state {
 	struct fxregs_state		fxsave;
 	struct swregs_state		soft;
 	struct xregs_state		xsave;
+	u8 __padding[PAGE_SIZE];
 };
 
 /*
@@ -198,40 +199,6 @@ union fpregs_state {
  */
 struct fpu {
 	/*
-	 * @state:
-	 *
-	 * In-memory copy of all FPU registers that we save/restore
-	 * over context switches. If the task is using the FPU then
-	 * the registers in the FPU are more recent than this state
-	 * copy. If the task context-switches away then they get
-	 * saved here and represent the FPU state.
-	 *
-	 * After context switches there may be a (short) time period
-	 * during which the in-FPU hardware registers are unchanged
-	 * and still perfectly match this state, if the tasks
-	 * scheduled afterwards are not using the FPU.
-	 *
-	 * This is the 'lazy restore' window of optimization, which
-	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
-	 *
-	 * We detect whether a subsequent task uses the FPU via setting
-	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
-	 *
-	 * During this window, if the task gets scheduled again, we
-	 * might be able to skip having to do a restore from this
-	 * memory buffer to the hardware registers - at the cost of
-	 * incurring the overhead of #NM fault traps.
-	 *
-	 * Note that on modern CPUs that support the XSAVEOPT (or other
-	 * optimized XSAVE instructions), we don't use #NM traps anymore,
-	 * as the hardware can track whether FPU registers need saving
-	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
-	 * logic, which unconditionally saves/restores all FPU state
-	 * across context switches. (if FPU state exists.)
-	 */
-	union fpregs_state		state;
-
-	/*
 	 * @last_cpu:
 	 *
 	 * Records the last CPU on which this context was loaded into
@@ -288,6 +255,43 @@ struct fpu {
 	 * deal with bursty apps that only use the FPU for a short time:
 	 */
 	unsigned char			counter;
+	/*
+	 * @state:
+	 *
+	 * In-memory copy of all FPU registers that we save/restore
+	 * over context switches. If the task is using the FPU then
+	 * the registers in the FPU are more recent than this state
+	 * copy. If the task context-switches away then they get
+	 * saved here and represent the FPU state.
+	 *
+	 * After context switches there may be a (short) time period
+	 * during which the in-FPU hardware registers are unchanged
+	 * and still perfectly match this state, if the tasks
+	 * scheduled afterwards are not using the FPU.
+	 *
+	 * This is the 'lazy restore' window of optimization, which
+	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+	 *
+	 * We detect whether a subsequent task uses the FPU via setting
+	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+	 *
+	 * During this window, if the task gets scheduled again, we
+	 * might be able to skip having to do a restore from this
+	 * memory buffer to the hardware registers - at the cost of
+	 * incurring the overhead of #NM fault traps.
+	 *
+	 * Note that on modern CPUs that support the XSAVEOPT (or other
+	 * optimized XSAVE instructions), we don't use #NM traps anymore,
+	 * as the hardware can track whether FPU registers need saving
+	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+	 * logic, which unconditionally saves/restores all FPU state
+	 * across context switches. (if FPU state exists.)
+	 */
+	union fpregs_state		state;
+	/*
+	 * WARNING: 'state' is dynamically-sized.  Do not put
+	 * anything after it here.
+	 */
 };
 
 #endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
index 200ec2e7821d..cd0310e186f4 100644
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ b/arch/x86/include/asm/intel_pmc_ipc.h
@@ -25,36 +25,9 @@
 
 #if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
 
-/*
- * intel_pmc_ipc_simple_command
- * @cmd: command
- * @sub: sub type
- */
 int intel_pmc_ipc_simple_command(int cmd, int sub);
-
-/*
- * intel_pmc_ipc_raw_cmd
- * @cmd: command
- * @sub: sub type
- * @in: input data
- * @inlen: input length in bytes
- * @out: output data
- * @outlen: output length in dwords
- * @sptr: data writing to SPTR register
- * @dptr: data writing to DPTR register
- */
 int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
 		u32 *out, u32 outlen, u32 dptr, u32 sptr);
-
-/*
- * intel_pmc_ipc_command
- * @cmd: command
- * @sub: sub type
- * @in: input data
- * @inlen: input length in bytes
- * @out: output data
- * @outlen: output length in dwords
- */
 int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
 		u32 *out, u32 outlen);
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2a7f5d782c33..c12e845f59e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -252,6 +252,11 @@ struct kvm_pio_request {
 	int size;
 };
 
+struct rsvd_bits_validate {
+	u64 rsvd_bits_mask[2][4];
+	u64 bad_mt_xwr;
+};
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -289,8 +294,15 @@ struct kvm_mmu {
 
 	u64 *pae_root;
 	u64 *lm_root;
-	u64 rsvd_bits_mask[2][4];
-	u64 bad_mt_xwr;
+
+	/*
+	 * check zero bits on shadow page table entries, these
+	 * bits include not only hardware reserved bits but also
+	 * the bits spte never used.
+	 */
+	struct rsvd_bits_validate shadow_zero_check;
+
+	struct rsvd_bits_validate guest_rsvd_check;
 
 	/*
 	 * Bitmap: bit set = last pte in walk
@@ -358,6 +370,11 @@ struct kvm_mtrr {
 	struct list_head head;
 };
 
+/* Hyper-V per vcpu emulation context */
+struct kvm_vcpu_hv {
+	u64 hv_vapic;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -514,8 +531,7 @@ struct kvm_vcpu_arch {
 	/* used for guest single stepping over the given code position */
 	unsigned long singlestep_rip;
 
-	/* fields used by HYPER-V emulation */
-	u64 hv_vapic;
+	struct kvm_vcpu_hv hyperv;
 
 	cpumask_var_t wbinvd_dirty_mask;
 
@@ -586,6 +602,17 @@ struct kvm_apic_map {
 	struct kvm_lapic *logical_map[16][16];
 };
 
+/* Hyper-V emulation context */
+struct kvm_hv {
+	u64 hv_guest_os_id;
+	u64 hv_hypercall;
+	u64 hv_tsc_page;
+
+	/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
+	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
+	u64 hv_crash_ctl;
+};
+
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -604,6 +631,8 @@ struct kvm_arch {
 	bool iommu_noncoherent;
 #define __KVM_HAVE_ARCH_NONCOHERENT_DMA
 	atomic_t noncoherent_dma_count;
+#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
+	atomic_t assigned_device_count;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -643,16 +672,14 @@ struct kvm_arch {
 	/* reads protected by irq_srcu, writes by irq_lock */
 	struct hlist_head mask_notifier_list;
 
-	/* fields used by HYPER-V emulation */
-	u64 hv_guest_os_id;
-	u64 hv_hypercall;
-	u64 hv_tsc_page;
+	struct kvm_hv hyperv;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
 	#endif
 
 	bool boot_vcpu_runs_old_kvmclock;
+	u32 bsp_vcpu_id;
 
 	u64 disabled_quirks;
 };
@@ -1201,5 +1228,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 			    const struct kvm_userspace_memory_region *mem);
 int x86_set_memory_region(struct kvm *kvm,
 			  const struct kvm_userspace_memory_region *mem);
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mm-arch-hooks.h b/arch/x86/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 4e881a342236..000000000000
--- a/arch/x86/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_X86_MM_ARCH_HOOKS_H
-#define _ASM_X86_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_X86_MM_ARCH_HOOKS_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 5e8daee7c5c9..804a3a6030ca 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -23,7 +23,7 @@ extern struct static_key rdpmc_always_available;
 
 static inline void load_mm_cr4(struct mm_struct *mm)
 {
-	if (static_key_true(&rdpmc_always_available) ||
+	if (static_key_false(&rdpmc_always_available) ||
 	    atomic_read(&mm->context.perf_rdpmc_allowed))
 		cr4_set_bits(X86_CR4_PCE);
 	else
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 43e6519df0d5..944f1785ed0d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -390,9 +390,6 @@ struct thread_struct {
 #endif
 	unsigned long		gs;
 
-	/* Floating point and extended processor state */
-	struct fpu		fpu;
-
 	/* Save middle states of ptrace breakpoints */
 	struct perf_event	*ptrace_bps[HBP_NUM];
 	/* Debug status used for traps, single steps, etc... */
@@ -418,6 +415,13 @@ struct thread_struct {
 	unsigned long		iopl;
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
+
+	/* Floating point and extended processor state */
+	struct fpu		fpu;
+	/*
+	 * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
+	 * the end.
+	 */
 };
 
 /*
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da772edd19ab..448b7ca61aee 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -47,6 +47,7 @@
 #define CPU_BASED_MOV_DR_EXITING                0x00800000
 #define CPU_BASED_UNCOND_IO_EXITING             0x01000000
 #define CPU_BASED_USE_IO_BITMAPS                0x02000000
+#define CPU_BASED_MONITOR_TRAP_FLAG             0x08000000
 #define CPU_BASED_USE_MSR_BITMAPS               0x10000000
 #define CPU_BASED_MONITOR_EXITING               0x20000000
 #define CPU_BASED_PAUSE_EXITING                 0x40000000
@@ -367,29 +368,29 @@ enum vmcs_field {
 #define TYPE_PHYSICAL_APIC_EVENT        (10 << 12)
 #define TYPE_PHYSICAL_APIC_INST         (15 << 12)
 
-/* segment AR */
-#define SEGMENT_AR_L_MASK (1 << 13)
-
-#define AR_TYPE_ACCESSES_MASK 1
-#define AR_TYPE_READABLE_MASK (1 << 1)
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
-#define AR_TYPE_CODE_MASK (1 << 3)
-#define AR_TYPE_MASK 0x0f
-#define AR_TYPE_BUSY_64_TSS 11
-#define AR_TYPE_BUSY_32_TSS 11
-#define AR_TYPE_BUSY_16_TSS 3
-#define AR_TYPE_LDT 2
-
-#define AR_UNUSABLE_MASK (1 << 16)
-#define AR_S_MASK (1 << 4)
-#define AR_P_MASK (1 << 7)
-#define AR_L_MASK (1 << 13)
-#define AR_DB_MASK (1 << 14)
-#define AR_G_MASK (1 << 15)
-#define AR_DPL_SHIFT 5
-#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
-
-#define AR_RESERVD_MASK 0xfffe0f00
+/* segment AR in VMCS -- these are different from what LAR reports */
+#define VMX_SEGMENT_AR_L_MASK (1 << 13)
+
+#define VMX_AR_TYPE_ACCESSES_MASK 1
+#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
+#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define VMX_AR_TYPE_CODE_MASK (1 << 3)
+#define VMX_AR_TYPE_MASK 0x0f
+#define VMX_AR_TYPE_BUSY_64_TSS 11
+#define VMX_AR_TYPE_BUSY_32_TSS 11
+#define VMX_AR_TYPE_BUSY_16_TSS 3
+#define VMX_AR_TYPE_LDT 2
+
+#define VMX_AR_UNUSABLE_MASK (1 << 16)
+#define VMX_AR_S_MASK (1 << 4)
+#define VMX_AR_P_MASK (1 << 7)
+#define VMX_AR_L_MASK (1 << 13)
+#define VMX_AR_DB_MASK (1 << 14)
+#define VMX_AR_G_MASK (1 << 15)
+#define VMX_AR_DPL_SHIFT 5
+#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
+
+#define VMX_AR_RESERVD_MASK 0xfffe0f00
 
 #define TSS_PRIVATE_MEMSLOT			(KVM_USER_MEM_SLOTS + 0)
 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_USER_MEM_SLOTS + 1)
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 8fba544e9cc4..f36d56bd7632 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -108,6 +108,8 @@
 #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE		(1 << 4)
 /* Support for a virtual guest idle state is available */
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE		(1 << 5)
+/* Guest crash data handler available */
+#define HV_X64_GUEST_CRASH_MSR_AVAILABLE		(1 << 10)
 
 /*
  * Implementation recommendations. Indicates which behaviors the hypervisor
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a4ae82eb82aa..cd54147cb365 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -354,7 +354,7 @@ struct kvm_xcrs {
 struct kvm_sync_regs {
 };
 
-#define KVM_QUIRK_LINT0_REENABLED	(1 << 0)
-#define KVM_QUIRK_CD_NW_CLEARED		(1 << 1)
+#define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 1fe92181ee9e..37fee272618f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -58,6 +58,7 @@
 #define EXIT_REASON_INVALID_STATE       33
 #define EXIT_REASON_MSR_LOAD_FAIL       34
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_TRAP_FLAG   37
 #define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
 #define EXIT_REASON_MCE_DURING_VMENTRY  41
@@ -106,6 +107,7 @@
 	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
 	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
 	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_TRAP_FLAG,     "MONITOR_TRAP_FLAG" }, \
 	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
 	{ EXIT_REASON_PAUSE_INSTRUCTION,     "PAUSE_INSTRUCTION" }, \
 	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 32826791e675..0b39173dd971 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -4,6 +4,8 @@
 #include <asm/fpu/internal.h>
 #include <asm/tlbflush.h>
 
+#include <linux/sched.h>
+
 /*
  * Initialize the TS bit in CR0 according to the style of context-switches
  * we are using:
@@ -136,6 +138,43 @@ static void __init fpu__init_system_generic(void)
 unsigned int xstate_size;
 EXPORT_SYMBOL_GPL(xstate_size);
 
+/* Enforce that 'MEMBER' is the last field of 'TYPE': */
+#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
+	BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
+
+/*
+ * We append the 'struct fpu' to the task_struct:
+ */
+static void __init fpu__init_task_struct_size(void)
+{
+	int task_size = sizeof(struct task_struct);
+
+	/*
+	 * Subtract off the static size of the register state.
+	 * It potentially has a bunch of padding.
+	 */
+	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
+
+	/*
+	 * Add back the dynamically-calculated register state
+	 * size.
+	 */
+	task_size += xstate_size;
+
+	/*
+	 * We dynamically size 'struct fpu', so we require that
+	 * it be at the end of 'thread_struct' and that
+	 * 'thread_struct' be at the end of 'task_struct'.  If
+	 * you hit a compile error here, check the structure to
+	 * see if something got added to the end.
+	 */
+	CHECK_MEMBER_AT_END_OF(struct fpu, state);
+	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
+	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
+
+	arch_task_struct_size = task_size;
+}
+
 /*
  * Set up the xstate_size based on the legacy FPU context size.
  *
@@ -287,6 +326,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
 	fpu__init_system_generic();
 	fpu__init_system_xstate_size_legacy();
 	fpu__init_system_xstate();
+	fpu__init_task_struct_size();
 
 	fpu__init_system_ctx_switch();
 }
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index c3e985d1751c..d05bd2e2ee91 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs)
 NOKPROBE_SYMBOL(default_do_nmi);
 
 /*
- * NMIs can hit breakpoints which will cause it to lose its
- * NMI context with the CPU when the breakpoint does an iret.
- */
-#ifdef CONFIG_X86_32
-/*
- * For i386, NMIs use the same stack as the kernel, and we can
- * add a workaround to the iret problem in C (preventing nested
- * NMIs if an NMI takes a trap). Simply have 3 states the NMI
- * can be in:
+ * NMIs can page fault or hit breakpoints which will cause it to lose
+ * its NMI context with the CPU when the breakpoint or page fault does an IRET.
+ *
+ * As a result, NMIs can nest if NMIs get unmasked due an IRET during
+ * NMI processing.  On x86_64, the asm glue protects us from nested NMIs
+ * if the outer NMI came from kernel mode, but we can still nest if the
+ * outer NMI came from user mode.
+ *
+ * To handle these nested NMIs, we have three states:
  *
  *  1) not running
  *  2) executing
@@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi);
  * (Note, the latch is binary, thus multiple NMIs triggering,
  *  when one is running, are ignored. Only one NMI is restarted.)
  *
- * If an NMI hits a breakpoint that executes an iret, another
- * NMI can preempt it. We do not want to allow this new NMI
- * to run, but we want to execute it when the first one finishes.
- * We set the state to "latched", and the exit of the first NMI will
- * perform a dec_return, if the result is zero (NOT_RUNNING), then
- * it will simply exit the NMI handler. If not, the dec_return
- * would have set the state to NMI_EXECUTING (what we want it to
- * be when we are running). In this case, we simply jump back
- * to rerun the NMI handler again, and restart the 'latched' NMI.
+ * If an NMI executes an iret, another NMI can preempt it. We do not
+ * want to allow this new NMI to run, but we want to execute it when the
+ * first one finishes.  We set the state to "latched", and the exit of
+ * the first NMI will perform a dec_return, if the result is zero
+ * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
+ * dec_return would have set the state to NMI_EXECUTING (what we want it
+ * to be when we are running). In this case, we simply jump back to
+ * rerun the NMI handler again, and restart the 'latched' NMI.
  *
  * No trap (breakpoint or page fault) should be hit before nmi_restart,
  * thus there is no race between the first check of state for NOT_RUNNING
@@ -461,49 +460,36 @@ enum nmi_states {
 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
 static DEFINE_PER_CPU(unsigned long, nmi_cr2);
 
-#define nmi_nesting_preprocess(regs)					\
-	do {								\
-		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\
-			this_cpu_write(nmi_state, NMI_LATCHED);		\
-			return;						\
-		}							\
-		this_cpu_write(nmi_state, NMI_EXECUTING);		\
-		this_cpu_write(nmi_cr2, read_cr2());			\
-	} while (0);							\
-	nmi_restart:
-
-#define nmi_nesting_postprocess()					\
-	do {								\
-		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\
-			write_cr2(this_cpu_read(nmi_cr2));		\
-		if (this_cpu_dec_return(nmi_state))			\
-			goto nmi_restart;				\
-	} while (0)
-#else /* x86_64 */
+#ifdef CONFIG_X86_64
 /*
- * In x86_64 things are a bit more difficult. This has the same problem
- * where an NMI hitting a breakpoint that calls iret will remove the
- * NMI context, allowing a nested NMI to enter. What makes this more
- * difficult is that both NMIs and breakpoints have their own stack.
- * When a new NMI or breakpoint is executed, the stack is set to a fixed
- * point. If an NMI is nested, it will have its stack set at that same
- * fixed address that the first NMI had, and will start corrupting the
- * stack. This is handled in entry_64.S, but the same problem exists with
- * the breakpoint stack.
+ * In x86_64, we need to handle breakpoint -> NMI -> breakpoint.  Without
+ * some care, the inner breakpoint will clobber the outer breakpoint's
+ * stack.
  *
- * If a breakpoint is being processed, and the debug stack is being used,
- * if an NMI comes in and also hits a breakpoint, the stack pointer
- * will be set to the same fixed address as the breakpoint that was
- * interrupted, causing that stack to be corrupted. To handle this case,
- * check if the stack that was interrupted is the debug stack, and if
- * so, change the IDT so that new breakpoints will use the current stack
- * and not switch to the fixed address. On return of the NMI, switch back
- * to the original IDT.
+ * If a breakpoint is being processed, and the debug stack is being
+ * used, if an NMI comes in and also hits a breakpoint, the stack
+ * pointer will be set to the same fixed address as the breakpoint that
+ * was interrupted, causing that stack to be corrupted. To handle this
+ * case, check if the stack that was interrupted is the debug stack, and
+ * if so, change the IDT so that new breakpoints will use the current
+ * stack and not switch to the fixed address. On return of the NMI,
+ * switch back to the original IDT.
  */
 static DEFINE_PER_CPU(int, update_debug_stack);
+#endif
 
-static inline void nmi_nesting_preprocess(struct pt_regs *regs)
+dotraplinkage notrace void
+do_nmi(struct pt_regs *regs, long error_code)
 {
+	if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
+		this_cpu_write(nmi_state, NMI_LATCHED);
+		return;
+	}
+	this_cpu_write(nmi_state, NMI_EXECUTING);
+	this_cpu_write(nmi_cr2, read_cr2());
+nmi_restart:
+
+#ifdef CONFIG_X86_64
 	/*
 	 * If we interrupted a breakpoint, it is possible that
 	 * the nmi handler will have breakpoints too. We need to
@@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
 		debug_stack_set_zero();
 		this_cpu_write(update_debug_stack, 1);
 	}
-}
-
-static inline void nmi_nesting_postprocess(void)
-{
-	if (unlikely(this_cpu_read(update_debug_stack))) {
-		debug_stack_reset();
-		this_cpu_write(update_debug_stack, 0);
-	}
-}
 #endif
 
-dotraplinkage notrace void
-do_nmi(struct pt_regs *regs, long error_code)
-{
-	nmi_nesting_preprocess(regs);
-
 	nmi_enter();
 
 	inc_irq_stat(__nmi_count);
@@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code)
 
 	nmi_exit();
 
-	/* On i386, may loop back to preprocess */
-	nmi_nesting_postprocess();
+#ifdef CONFIG_X86_64
+	if (unlikely(this_cpu_read(update_debug_stack))) {
+		debug_stack_reset();
+		this_cpu_write(update_debug_stack, 0);
+	}
+#endif
+
+	if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
+		write_cr2(this_cpu_read(nmi_cr2));
+	if (this_cpu_dec_return(nmi_state))
+		goto nmi_restart;
 }
 NOKPROBE_SYMBOL(do_nmi);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9cad694ed7c4..397688beed4b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
  */
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-	*dst = *src;
+	memcpy(dst, src, arch_task_struct_size);
 
 	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d3010aa79daf..b1f3ed9c7a9e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -992,8 +992,17 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 
 	common_cpu_up(cpu, tidle);
 
+	/*
+	 * We have to walk the irq descriptors to setup the vector
+	 * space for the cpu which comes online.  Prevent irq
+	 * alloc/free across the bringup.
+	 */
+	irq_lock_sparse();
+
 	err = do_boot_cpu(apicid, cpu, tidle);
+
 	if (err) {
+		irq_unlock_sparse();
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
 		return -EIO;
 	}
@@ -1011,6 +1020,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 		touch_nmi_watchdog();
 	}
 
+	irq_unlock_sparse();
+
 	return 0;
 }
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 67d215cb8953..a1ff508bb423 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,9 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
+			   hyperv.o
+
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o pmu_intel.o
 kvm-amd-y		+= svm.o pmu_amd.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 64dd46793099..2fbea2544f24 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
 	vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
+	if (vcpu->arch.eager_fpu)
+		kvm_x86_ops->fpu_activate(vcpu);
 
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
new file mode 100644
index 000000000000..a8160d2ae362
--- /dev/null
+++ b/arch/x86/kvm/hyperv.c
@@ -0,0 +1,377 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Amit Shah    <amit.shah@qumranet.com>
+ *   Ben-Ami Yassour <benami@il.ibm.com>
+ *   Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "x86.h"
+#include "lapic.h"
+#include "hyperv.h"
+
+#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+	bool r = false;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
+	case HV_X64_MSR_CRASH_CTL:
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		r = true;
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
+				     u32 index, u64 *pdata)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+		return -EINVAL;
+
+	*pdata = hv->hv_crash_param[index];
+	return 0;
+}
+
+static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	*pdata = hv->hv_crash_ctl;
+	return 0;
+}
+
+static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (host)
+		hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
+
+	if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
+
+		vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
+			  hv->hv_crash_param[0],
+			  hv->hv_crash_param[1],
+			  hv->hv_crash_param[2],
+			  hv->hv_crash_param[3],
+			  hv->hv_crash_param[4]);
+
+		/* Send notification about crash to user space */
+		kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
+	}
+
+	return 0;
+}
+
+static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
+				     u32 index, u64 data)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+		return -EINVAL;
+
+	hv->hv_crash_param[index] = data;
+	return 0;
+}
+
+static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
+			     bool host)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		hv->hv_guest_os_id = data;
+		/* setting guest os id to zero disables hypercall page */
+		if (!hv->hv_guest_os_id)
+			hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+		break;
+	case HV_X64_MSR_HYPERCALL: {
+		u64 gfn;
+		unsigned long addr;
+		u8 instructions[4];
+
+		/* if guest os id is not set hypercall should remain disabled */
+		if (!hv->hv_guest_os_id)
+			break;
+		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+			hv->hv_hypercall = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		kvm_x86_ops->patch_hypercall(vcpu, instructions);
+		((unsigned char *)instructions)[3] = 0xc3; /* ret */
+		if (__copy_to_user((void __user *)addr, instructions, 4))
+			return 1;
+		hv->hv_hypercall = data;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+
+		memset(&tsc_ref, 0, sizeof(tsc_ref));
+		hv->hv_tsc_page = data;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+			break;
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		if (kvm_write_guest(
+				kvm,
+				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
+				&tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		return kvm_hv_msr_set_crash_data(vcpu,
+						 msr - HV_X64_MSR_CRASH_P0,
+						 data);
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+			    msr, data);
+		return 1;
+	}
+	return 0;
+}
+
+static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+		u64 gfn;
+		unsigned long addr;
+
+		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+			hv->hv_vapic = data;
+			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+				return 1;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (__clear_user((void __user *)addr, PAGE_SIZE))
+			return 1;
+		hv->hv_vapic = data;
+		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+		if (kvm_lapic_enable_pv_eoi(vcpu,
+					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+			return 1;
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+			    msr, data);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		data = hv->hv_guest_os_id;
+		break;
+	case HV_X64_MSR_HYPERCALL:
+		data = hv->hv_hypercall;
+		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		data =
+		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = hv->hv_tsc_page;
+		break;
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		return kvm_hv_msr_get_crash_data(vcpu,
+						 msr - HV_X64_MSR_CRASH_P0,
+						 pdata);
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+
+	*pdata = data;
+	return 0;
+}
+
+static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_VP_INDEX: {
+		int r;
+		struct kvm_vcpu *v;
+
+		kvm_for_each_vcpu(r, v, vcpu->kvm) {
+			if (v == vcpu) {
+				data = r;
+				break;
+			}
+		}
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
+	case HV_X64_MSR_APIC_ASSIST_PAGE:
+		data = hv->hv_vapic;
+		break;
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
+{
+	if (kvm_hv_msr_partition_wide(msr)) {
+		int r;
+
+		mutex_lock(&vcpu->kvm->lock);
+		r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
+		mutex_unlock(&vcpu->kvm->lock);
+		return r;
+	} else
+		return kvm_hv_set_msr(vcpu, msr, data);
+}
+
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	if (kvm_hv_msr_partition_wide(msr)) {
+		int r;
+
+		mutex_lock(&vcpu->kvm->lock);
+		r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
+		mutex_unlock(&vcpu->kvm->lock);
+		return r;
+	} else
+		return kvm_hv_get_msr(vcpu, msr, pdata);
+}
+
+bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+	return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+	u64 param, ingpa, outgpa, ret;
+	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+	bool fast, longmode;
+
+	/*
+	 * hypercall generates UD from non zero cpl and real mode
+	 * per HYPER-V spec
+	 */
+	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	longmode = is_64_bit_mode(vcpu);
+
+	if (!longmode) {
+		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
+		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
+		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+	}
+#endif
+
+	code = param & 0xffff;
+	fast = (param >> 16) & 0x1;
+	rep_cnt = (param >> 32) & 0xfff;
+	rep_idx = (param >> 48) & 0xfff;
+
+	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+	switch (code) {
+	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+		kvm_vcpu_on_spin(vcpu);
+		break;
+	default:
+		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		break;
+	}
+
+	ret = res | (((u64)rep_done & 0xfff) << 32);
+	if (longmode) {
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+	} else {
+		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+	}
+
+	return 1;
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
new file mode 100644
index 000000000000..c7bce559f67b
--- /dev/null
+++ b/arch/x86/kvm/hyperv.h
@@ -0,0 +1,32 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Amit Shah    <amit.shah@qumranet.com>
+ *   Ben-Ami Yassour <benami@il.ibm.com>
+ *   Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __ARCH_X86_KVM_HYPERV_H__
+#define __ARCH_X86_KVM_HYPERV_H__
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+bool kvm_hv_hypercall_enabled(struct kvm *kvm);
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index fef922ff2635..7cc2360f1848 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -651,15 +651,10 @@ fail_unlock:
 	return NULL;
 }
 
-void kvm_destroy_pic(struct kvm *kvm)
+void kvm_destroy_pic(struct kvm_pic *vpic)
 {
-	struct kvm_pic *vpic = kvm->arch.vpic;
-
-	if (vpic) {
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
-		kvm->arch.vpic = NULL;
-		kfree(vpic);
-	}
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+	kfree(vpic);
 }
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index 7dbced309ddb..5c520ebf6343 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
 			goto out_unmap;
 	}
 
+	kvm_arch_start_assignment(kvm);
 	pci_set_dev_assigned(pdev);
 
 	dev_info(&pdev->dev, "kvm assign device\n");
@@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
 	iommu_detach_device(domain, &pdev->dev);
 
 	pci_clear_dev_assigned(pdev);
+	kvm_arch_end_assignment(kvm);
 
 	dev_info(&pdev->dev, "kvm deassign device\n");
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73008c5..3d782a2c336a 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -74,7 +74,7 @@ struct kvm_pic {
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
-void kvm_destroy_pic(struct kvm *kvm);
+void kvm_destroy_pic(struct kvm_pic *vpic);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 
@@ -85,11 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
-	int ret;
+	struct kvm_pic *vpic = pic_irqchip(kvm);
 
-	ret = (pic_irqchip(kvm) != NULL);
+	/* Read vpic before kvm->irq_routing.  */
 	smp_rmb();
-	return ret;
+	return vpic != NULL;
 }
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 954e98a8c2e3..9a3e342e3cda 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1595,7 +1595,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 	apic_update_lvtt(apic);
-	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
 		apic_set_reg(apic, APIC_LVT0,
 			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 	apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
@@ -1900,8 +1900,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
 		return;
 
-	kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
-				sizeof(u32));
+	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+				  sizeof(u32)))
+		return;
 
 	apic_set_tpr(vcpu->arch.apic, data & 0xff);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71952748222a..764037991d26 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -91,7 +91,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 
 static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+	return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
 }
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f807496b62c2..fb16a8ea3dee 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep)
 {
 	return ACCESS_ONCE(*sptep);
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-	/* It is valid if the spte is zapped. */
-	return spte == 0ull;
-}
 #else
 union split_spte {
 	struct {
@@ -478,23 +472,6 @@ retry:
 
 	return spte.spte;
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-	union split_spte sspte = (union split_spte)spte;
-	u32 high_mmio_mask = shadow_mmio_mask >> 32;
-
-	/* It is valid if the spte is zapped. */
-	if (spte == 0ull)
-		return true;
-
-	/* It is valid if the spte is being zapped. */
-	if (sspte.spte_low == 0ull &&
-	    (sspte.spte_high & high_mmio_mask) == high_mmio_mask)
-		return true;
-
-	return false;
-}
 #endif
 
 static bool spte_is_locklessly_modifiable(u64 spte)
@@ -2479,6 +2456,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return 0;
 }
 
+static bool kvm_is_mmio_pfn(pfn_t pfn)
+{
+	if (pfn_valid(pfn))
+		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+
+	return true;
+}
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    unsigned pte_access, int level,
 		    gfn_t gfn, pfn_t pfn, bool speculative,
@@ -2506,7 +2491,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		spte |= PT_PAGE_SIZE_MASK;
 	if (tdp_enabled)
 		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-			kvm_is_reserved_pfn(pfn));
+			kvm_is_mmio_pfn(pfn));
 
 	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
@@ -3283,54 +3268,89 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
 	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
 }
 
-static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+static bool
+__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
 {
-	if (direct)
-		return vcpu_match_mmio_gpa(vcpu, addr);
+	int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
 
-	return vcpu_match_mmio_gva(vcpu, addr);
+	return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
+		((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
 }
 
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+}
 
-/*
- * On direct hosts, the last spte is only allows two states
- * for mmio page fault:
- *   - It is the mmio spte
- *   - It is zapped or it is being zapped.
- *
- * This function completely checks the spte when the last spte
- * is not the mmio spte.
- */
-static bool check_direct_spte_mmio_pf(u64 spte)
+static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
 {
-	return __check_direct_spte_mmio_pf(spte);
+	return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
 }
 
-static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
+static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+{
+	if (direct)
+		return vcpu_match_mmio_gpa(vcpu, addr);
+
+	return vcpu_match_mmio_gva(vcpu, addr);
+}
+
+/* return true if reserved bit is detected on spte. */
+static bool
+walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	u64 spte = 0ull;
+	u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+	int root, leaf;
+	bool reserved = false;
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return spte;
+		goto exit;
 
 	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
+
+	for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
+	     shadow_walk_okay(&iterator);
+	     __shadow_walk_next(&iterator, spte)) {
+		leaf = iterator.level;
+		spte = mmu_spte_get_lockless(iterator.sptep);
+
+		sptes[leaf - 1] = spte;
+
 		if (!is_shadow_present_pte(spte))
 			break;
+
+		reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+						    leaf);
+	}
+
 	walk_shadow_page_lockless_end(vcpu);
 
-	return spte;
+	if (reserved) {
+		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
+		       __func__, addr);
+		while (root >= leaf) {
+			pr_err("------ spte 0x%llx level %d.\n",
+			       sptes[root - 1], root);
+			root--;
+		}
+	}
+exit:
+	*sptep = spte;
+	return reserved;
 }
 
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
 	u64 spte;
+	bool reserved;
 
 	if (quickly_check_mmio_pf(vcpu, addr, direct))
 		return RET_MMIO_PF_EMULATE;
 
-	spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
+	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+	if (unlikely(reserved))
+		return RET_MMIO_PF_BUG;
 
 	if (is_mmio_spte(spte)) {
 		gfn_t gfn = get_mmio_spte_gfn(spte);
@@ -3348,13 +3368,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 	}
 
 	/*
-	 * It's ok if the gva is remapped by other cpus on shadow guest,
-	 * it's a BUG if the gfn is not a mmio page.
-	 */
-	if (direct && !check_direct_spte_mmio_pf(spte))
-		return RET_MMIO_PF_BUG;
-
-	/*
 	 * If the page table is zapped by other cpus, let CPU fault again on
 	 * the address.
 	 */
@@ -3596,19 +3609,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
 #include "paging_tmpl.h"
 #undef PTTYPE
 
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
+static void
+__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+			struct rsvd_bits_validate *rsvd_check,
+			int maxphyaddr, int level, bool nx, bool gbpages,
+			bool pse)
 {
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
 	u64 gbpages_bit_rsvd = 0;
 	u64 nonleaf_bit8_rsvd = 0;
 
-	context->bad_mt_xwr = 0;
+	rsvd_check->bad_mt_xwr = 0;
 
-	if (!context->nx)
+	if (!nx)
 		exb_bit_rsvd = rsvd_bits(63, 63);
-	if (!guest_cpuid_has_gbpages(vcpu))
+	if (!gbpages)
 		gbpages_bit_rsvd = rsvd_bits(7, 7);
 
 	/*
@@ -3618,80 +3633,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	if (guest_cpuid_is_amd(vcpu))
 		nonleaf_bit8_rsvd = rsvd_bits(8, 8);
 
-	switch (context->root_level) {
+	switch (level) {
 	case PT32_ROOT_LEVEL:
 		/* no rsvd bits for 2 level 4K page table entries */
-		context->rsvd_bits_mask[0][1] = 0;
-		context->rsvd_bits_mask[0][0] = 0;
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[0][1] = 0;
+		rsvd_check->rsvd_bits_mask[0][0] = 0;
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 
-		if (!is_pse(vcpu)) {
-			context->rsvd_bits_mask[1][1] = 0;
+		if (!pse) {
+			rsvd_check->rsvd_bits_mask[1][1] = 0;
 			break;
 		}
 
 		if (is_cpuid_PSE36())
 			/* 36bits PSE 4MB page */
-			context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
 		else
 			/* 32 bits PSE 4MB page */
-			context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
 		break;
 	case PT32E_ROOT_LEVEL:
-		context->rsvd_bits_mask[0][2] =
+		rsvd_check->rsvd_bits_mask[0][2] =
 			rsvd_bits(maxphyaddr, 63) |
 			rsvd_bits(5, 8) | rsvd_bits(1, 2);	/* PDPTE */
-		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);	/* PDE */
-		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62); 	/* PTE */
-		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62) |
 			rsvd_bits(13, 20);		/* large page */
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 		break;
 	case PT64_ROOT_LEVEL:
-		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
 			rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | gbpages_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-		context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[1][3] =
+			rsvd_check->rsvd_bits_mask[0][3];
+		rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
 			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
 			rsvd_bits(13, 29);
-		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51) |
 			rsvd_bits(13, 20);		/* large page */
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 		break;
 	}
 }
 
-static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
-		struct kvm_mmu *context, bool execonly)
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu *context)
+{
+	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+				cpuid_maxphyaddr(vcpu), context->root_level,
+				context->nx, guest_cpuid_has_gbpages(vcpu),
+				is_pse(vcpu));
+}
+
+static void
+__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+			    int maxphyaddr, bool execonly)
 {
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	int pte;
 
-	context->rsvd_bits_mask[0][3] =
+	rsvd_check->rsvd_bits_mask[0][3] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
-	context->rsvd_bits_mask[0][2] =
+	rsvd_check->rsvd_bits_mask[0][2] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	context->rsvd_bits_mask[0][1] =
+	rsvd_check->rsvd_bits_mask[0][1] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
 
 	/* large page */
-	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-	context->rsvd_bits_mask[1][2] =
+	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
+	rsvd_check->rsvd_bits_mask[1][2] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
-	context->rsvd_bits_mask[1][1] =
+	rsvd_check->rsvd_bits_mask[1][1] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
-	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
 
 	for (pte = 0; pte < 64; pte++) {
 		int rwx_bits = pte & 7;
@@ -3699,10 +3729,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
 		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
 				rwx_bits == 0x2 || rwx_bits == 0x6 ||
 				(rwx_bits == 0x4 && !execonly))
-			context->bad_mt_xwr |= (1ull << pte);
+			rsvd_check->bad_mt_xwr |= (1ull << pte);
 	}
 }
 
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
+				    cpuid_maxphyaddr(vcpu), execonly);
+}
+
+/*
+ * the page table on host is the shadow page table for the page
+ * table in guest or amd nested guest, its mmu features completely
+ * follow the features in guest.
+ */
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+	__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+				boot_cpu_data.x86_phys_bits,
+				context->shadow_root_level, context->nx,
+				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu));
+}
+EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
+
+/*
+ * the direct page table on host, use as much mmu features as
+ * possible, however, kvm currently does not do execution-protection.
+ */
+static void
+reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context)
+{
+	if (guest_cpuid_is_amd(vcpu))
+		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+					boot_cpu_data.x86_phys_bits,
+					context->shadow_root_level, false,
+					cpu_has_gbpages, true);
+	else
+		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+					    boot_cpu_data.x86_phys_bits,
+					    false);
+
+}
+
+/*
+ * as the comments in reset_shadow_zero_bits_mask() except it
+ * is the shadow page table for intel nested guest.
+ */
+static void
+reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+				    boot_cpu_data.x86_phys_bits, execonly);
+}
+
 static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 				      struct kvm_mmu *mmu, bool ept)
 {
@@ -3881,6 +3965,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
 	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
+	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
@@ -3908,6 +3993,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	context->base_role.smap_andnot_wp
 		= smap && !is_write_protection(vcpu);
 	context->base_role.smm = is_smm(vcpu);
+	reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -3931,6 +4017,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
 
 	update_permission_bitmask(vcpu, context, true);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
@@ -4852,28 +4939,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	return nr_mmu_pages;
 }
 
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
-{
-	struct kvm_shadow_walk_iterator iterator;
-	u64 spte;
-	int nr_sptes = 0;
-
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return nr_sptes;
-
-	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
-		sptes[iterator.level-1] = spte;
-		nr_sptes++;
-		if (!is_shadow_present_pte(spte))
-			break;
-	}
-	walk_shadow_page_lockless_end(vcpu);
-
-	return nr_sptes;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
-
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_unload(vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 398d21c0f6dd..e4202e41d535 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -50,9 +50,11 @@ static inline u64 rsvd_bits(int s, int e)
 	return ((1ULL << (e - s + 1)) - 1) << s;
 }
 
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+
 /*
  * Return values of handle_mmio_page_fault_common:
  * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index de1d2d8062e2..9e8bf13572e6 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -120,6 +120,16 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
 	return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
 }
 
+static u8 mtrr_disabled_type(void)
+{
+	/*
+	 * Intel SDM 11.11.2.2: all MTRRs are disabled when
+	 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
+	 * memory type is applied to all of physical memory.
+	 */
+	return MTRR_TYPE_UNCACHABLE;
+}
+
 /*
 * Three terms are used in the following code:
 * - segment, it indicates the address segments covered by fixed MTRRs.
@@ -434,6 +444,8 @@ struct mtrr_iter {
 
 	/* output fields. */
 	int mem_type;
+	/* mtrr is completely disabled? */
+	bool mtrr_disabled;
 	/* [start, end) is not fully covered in MTRRs? */
 	bool partial_map;
 
@@ -549,7 +561,7 @@ static void mtrr_lookup_var_next(struct mtrr_iter *iter)
 static void mtrr_lookup_start(struct mtrr_iter *iter)
 {
 	if (!mtrr_is_enabled(iter->mtrr_state)) {
-		iter->partial_map = true;
+		iter->mtrr_disabled = true;
 		return;
 	}
 
@@ -563,6 +575,7 @@ static void mtrr_lookup_init(struct mtrr_iter *iter,
 	iter->mtrr_state = mtrr_state;
 	iter->start = start;
 	iter->end = end;
+	iter->mtrr_disabled = false;
 	iter->partial_map = false;
 	iter->fixed = false;
 	iter->range = NULL;
@@ -656,15 +669,19 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 		return MTRR_TYPE_WRBACK;
 	}
 
-	/* It is not covered by MTRRs. */
-	if (iter.partial_map) {
-		/*
-		 * We just check one page, partially covered by MTRRs is
-		 * impossible.
-		 */
-		WARN_ON(type != -1);
-		type = mtrr_default_type(mtrr_state);
-	}
+	if (iter.mtrr_disabled)
+		return mtrr_disabled_type();
+
+	/* not contained in any MTRRs. */
+	if (type == -1)
+		return mtrr_default_type(mtrr_state);
+
+	/*
+	 * We just check one page, partially covered by MTRRs is
+	 * impossible.
+	 */
+	WARN_ON(iter.partial_map);
+
 	return type;
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
@@ -689,6 +706,9 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 			return false;
 	}
 
+	if (iter.mtrr_disabled)
+		return true;
+
 	if (!iter.partial_map)
 		return true;
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0f67d7e24800..736e6ab8784d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -128,14 +128,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
 	*access &= mask;
 }
 
-static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
-{
-	int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
-
-	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
-		((mmu->bad_mt_xwr & (1ull << low6)) != 0);
-}
-
 static inline int FNAME(is_present_gpte)(unsigned long pte)
 {
 #if PTTYPE != PTTYPE_EPT
@@ -172,7 +164,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp, u64 *spte,
 				  u64 gpte)
 {
-	if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
 		goto no_present;
 
 	if (!FNAME(is_present_gpte)(gpte))
@@ -353,8 +345,7 @@ retry_walk:
 		if (unlikely(!FNAME(is_present_gpte)(pte)))
 			goto error;
 
-		if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte,
-					             walker->level))) {
+		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
 			errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
 			goto error;
 		}
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 886aa25a7131..39b91127ef07 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -133,8 +133,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	/* MSR_K7_PERFCTRn */
 	pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
 	if (pmc) {
-		if (!msr_info->host_initiated)
-			data = (s64)data;
 		pmc->counter += data - pmc_read_counter(pmc);
 		return 0;
 	}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 602b974a60a6..74d825716f4f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
+#define MTRR_TYPE_UC_MINUS	7
+#define MTRR2PROTVAL_INVALID 0xff
+
+static u8 mtrr2protval[8];
+
+static u8 fallback_mtrr_type(int mtrr)
+{
+	/*
+	 * WT and WP aren't always available in the host PAT.  Treat
+	 * them as UC and UC- respectively.  Everything else should be
+	 * there.
+	 */
+	switch (mtrr)
+	{
+	case MTRR_TYPE_WRTHROUGH:
+		return MTRR_TYPE_UNCACHABLE;
+	case MTRR_TYPE_WRPROT:
+		return MTRR_TYPE_UC_MINUS;
+	default:
+		BUG();
+	}
+}
+
+static void build_mtrr2protval(void)
+{
+	int i;
+	u64 pat;
+
+	for (i = 0; i < 8; i++)
+		mtrr2protval[i] = MTRR2PROTVAL_INVALID;
+
+	/* Ignore the invalid MTRR types.  */
+	mtrr2protval[2] = 0;
+	mtrr2protval[3] = 0;
+
+	/*
+	 * Use host PAT value to figure out the mapping from guest MTRR
+	 * values to nested page table PAT/PCD/PWT values.  We do not
+	 * want to change the host PAT value every time we enter the
+	 * guest.
+	 */
+	rdmsrl(MSR_IA32_CR_PAT, pat);
+	for (i = 0; i < 8; i++) {
+		u8 mtrr = pat >> (8 * i);
+
+		if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
+			mtrr2protval[mtrr] = __cm_idx2pte(i);
+	}
+
+	for (i = 0; i < 8; i++) {
+		if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
+			u8 fallback = fallback_mtrr_type(i);
+			mtrr2protval[i] = mtrr2protval[fallback];
+			BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
+		}
+	}
+}
+
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
@@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void)
 	} else
 		kvm_disable_tdp();
 
+	build_mtrr2protval();
 	return 0;
 
 err:
@@ -1085,6 +1144,43 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 	return target_tsc - tsc;
 }
 
+static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+
+	/* Unlike Intel, AMD takes the guest's CR0.CD into account.
+	 *
+	 * AMD does not have IPAT.  To emulate it for the case of guests
+	 * with no assigned devices, just set everything to WB.  If guests
+	 * have assigned devices, however, we cannot force WB for RAM
+	 * pages only, so use the guest PAT directly.
+	 */
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		*g_pat = 0x0606060606060606;
+	else
+		*g_pat = vcpu->arch.pat;
+}
+
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	u8 mtrr;
+
+	/*
+	 * 1. MMIO: trust guest MTRR, so same as item 3.
+	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
+	 * 3. Passthrough: can't guarantee the result, try to trust guest.
+	 */
+	if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
+		return 0;
+
+	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
+	    kvm_read_cr0(vcpu) & X86_CR0_CD)
+		return _PAGE_NOCACHE;
+
+	mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+	return mtrr2protval[mtrr];
+}
+
 static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1180,6 +1276,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = svm->vcpu.arch.pat;
+		svm_set_guest_pat(svm, &save->g_pat);
 		save->cr3 = 0;
 		save->cr4 = 0;
 	}
@@ -1574,13 +1671,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-	/*
-	 * re-enable caching here because the QEMU bios
-	 * does not do it - this results in some delay at
-	 * reboot
-	 */
-	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
-		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
+	/* These are emulated via page tables.  */
+	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
 	svm->vmcb->save.cr0 = cr0;
 	mark_dirty(svm->vmcb, VMCB_CR);
 	update_cr0_intercept(svm);
@@ -2013,6 +2107,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
 	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
 	vcpu->arch.mmu.shadow_root_level = get_npt_level();
+	reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
 
@@ -3254,6 +3349,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
+	case MSR_IA32_CR_PAT:
+		if (npt_enabled) {
+			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+				return 1;
+			vcpu->arch.pat = data;
+			svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
+			mark_dirty(svm->vmcb, VMCB_NPT);
+			break;
+		}
+		/* fall through */
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
@@ -4088,11 +4193,6 @@ static bool svm_has_high_real_mode_segbase(void)
 	return true;
 }
 
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	return 0;
-}
-
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e856dd566f4c..da1590ea43fc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2443,10 +2443,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
 #endif
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
-		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
-		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
-		CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
-		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
+		CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
+		CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
+		CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
 	 * hardware. For example, L1 can specify an MSR bitmap - and we
@@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 	vmx_segment_cache_clear(to_vmx(vcpu));
 
 	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
-	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+	if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
 		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
 				     __func__);
 		vmcs_write32(GUEST_TR_AR_BYTES,
-			     (guest_tr_ar & ~AR_TYPE_MASK)
-			     | AR_TYPE_BUSY_64_TSS);
+			     (guest_tr_ar & ~VMX_AR_TYPE_MASK)
+			     | VMX_AR_TYPE_BUSY_64_TSS);
 	}
 	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
 }
@@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 		return 0;
 	else {
 		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
-		return AR_DPL(ar);
+		return VMX_AR_DPL(ar);
 	}
 }
 
@@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
 
 	if (cs.unusable)
 		return false;
-	if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
+	if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
 		return false;
 	if (!cs.s)
 		return false;
-	if (cs.type & AR_TYPE_WRITEABLE_MASK) {
+	if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
 		if (cs.dpl > cs_rpl)
 			return false;
 	} else {
@@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (!var.present)
 		return false;
-	if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
+	if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
 		if (var.dpl < rpl) /* DPL < RPL */
 			return false;
 	}
@@ -5759,73 +5759,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
-static u64 ept_rsvd_mask(u64 spte, int level)
-{
-	int i;
-	u64 mask = 0;
-
-	for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
-		mask |= (1ULL << i);
-
-	if (level == 4)
-		/* bits 7:3 reserved */
-		mask |= 0xf8;
-	else if (spte & (1ULL << 7))
-		/*
-		 * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
-		 * level == 1 if the hypervisor is using the ignored bit 7.
-		 */
-		mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
-	else if (level > 1)
-		/* bits 6:3 reserved */
-		mask |= 0x78;
-
-	return mask;
-}
-
-static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
-				       int level)
-{
-	printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
-
-	/* 010b (write-only) */
-	WARN_ON((spte & 0x7) == 0x2);
-
-	/* 110b (write/execute) */
-	WARN_ON((spte & 0x7) == 0x6);
-
-	/* 100b (execute-only) and value not supported by logical processor */
-	if (!cpu_has_vmx_ept_execute_only())
-		WARN_ON((spte & 0x7) == 0x4);
-
-	/* not 000b */
-	if ((spte & 0x7)) {
-		u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
-
-		if (rsvd_bits != 0) {
-			printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
-					 __func__, rsvd_bits);
-			WARN_ON(1);
-		}
-
-		/* bits 5:3 are _not_ reserved for large page or leaf page */
-		if ((rsvd_bits & 0x38) == 0) {
-			u64 ept_mem_type = (spte & 0x38) >> 3;
-
-			if (ept_mem_type == 2 || ept_mem_type == 3 ||
-			    ept_mem_type == 7) {
-				printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
-						__func__, ept_mem_type);
-				WARN_ON(1);
-			}
-		}
-	}
-}
-
 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 {
-	u64 sptes[4];
-	int nr_sptes, i, ret;
+	int ret;
 	gpa_t gpa;
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -5846,13 +5782,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 		return 1;
 
 	/* It is the real ept misconfig */
-	printk(KERN_ERR "EPT: Misconfiguration.\n");
-	printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
-
-	nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
-
-	for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
-		ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
+	WARN_ON(1);
 
 	vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 	vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
@@ -6246,6 +6176,11 @@ static int handle_mwait(struct kvm_vcpu *vcpu)
 	return handle_nop(vcpu);
 }
 
+static int handle_monitor_trap(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+
 static int handle_monitor(struct kvm_vcpu *vcpu)
 {
 	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
@@ -6408,8 +6343,12 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
  */
 static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 				 unsigned long exit_qualification,
-				 u32 vmx_instruction_info, gva_t *ret)
+				 u32 vmx_instruction_info, bool wr, gva_t *ret)
 {
+	gva_t off;
+	bool exn;
+	struct kvm_segment s;
+
 	/*
 	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
 	 * Execution", on an exit, vmx_instruction_info holds most of the
@@ -6434,22 +6373,63 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 
 	/* Addr = segment_base + offset */
 	/* offset = base + [index * scale] + displacement */
-	*ret = vmx_get_segment_base(vcpu, seg_reg);
+	off = exit_qualification; /* holds the displacement */
 	if (base_is_valid)
-		*ret += kvm_register_read(vcpu, base_reg);
+		off += kvm_register_read(vcpu, base_reg);
 	if (index_is_valid)
-		*ret += kvm_register_read(vcpu, index_reg)<<scaling;
-	*ret += exit_qualification; /* holds the displacement */
+		off += kvm_register_read(vcpu, index_reg)<<scaling;
+	vmx_get_segment(vcpu, &s, seg_reg);
+	*ret = s.base + off;
 
 	if (addr_size == 1) /* 32 bit */
 		*ret &= 0xffffffff;
 
-	/*
-	 * TODO: throw #GP (and return 1) in various cases that the VM*
-	 * instructions require it - e.g., offset beyond segment limit,
-	 * unusable or unreadable/unwritable segment, non-canonical 64-bit
-	 * address, and so on. Currently these are not checked.
-	 */
+	/* Checks for #GP/#SS exceptions. */
+	exn = false;
+	if (is_protmode(vcpu)) {
+		/* Protected mode: apply checks for segment validity in the
+		 * following order:
+		 * - segment type check (#GP(0) may be thrown)
+		 * - usability check (#GP(0)/#SS(0))
+		 * - limit check (#GP(0)/#SS(0))
+		 */
+		if (wr)
+			/* #GP(0) if the destination operand is located in a
+			 * read-only data segment or any code segment.
+			 */
+			exn = ((s.type & 0xa) == 0 || (s.type & 8));
+		else
+			/* #GP(0) if the source operand is located in an
+			 * execute-only code segment
+			 */
+			exn = ((s.type & 0xa) == 8);
+	}
+	if (exn) {
+		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+		return 1;
+	}
+	if (is_long_mode(vcpu)) {
+		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
+		 * non-canonical form. This is an only check for long mode.
+		 */
+		exn = is_noncanonical_address(*ret);
+	} else if (is_protmode(vcpu)) {
+		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
+		 */
+		exn = (s.unusable != 0);
+		/* Protected mode: #GP(0)/#SS(0) if the memory
+		 * operand is outside the segment limit.
+		 */
+		exn = exn || (off + sizeof(u64) > s.limit);
+	}
+	if (exn) {
+		kvm_queue_exception_e(vcpu,
+				      seg_reg == VCPU_SREG_SS ?
+						SS_VECTOR : GP_VECTOR,
+				      0);
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -6471,7 +6451,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
 		return 1;
 
 	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
@@ -6999,7 +6979,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 			field_value);
 	} else {
 		if (get_vmx_mem_address(vcpu, exit_qualification,
-				vmx_instruction_info, &gva))
+				vmx_instruction_info, true, &gva))
 			return 1;
 		/* _system ok, as nested_vmx_check_permission verified cpl=0 */
 		kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
@@ -7036,7 +7016,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 			(((vmx_instruction_info) >> 3) & 0xf));
 	else {
 		if (get_vmx_mem_address(vcpu, exit_qualification,
-				vmx_instruction_info, &gva))
+				vmx_instruction_info, false, &gva))
 			return 1;
 		if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
 			   &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
@@ -7128,7 +7108,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (get_vmx_mem_address(vcpu, exit_qualification,
-			vmx_instruction_info, &vmcs_gva))
+			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
 	/* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
 	if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
@@ -7184,7 +7164,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	 * operand is read even if it isn't needed (e.g., for type==global)
 	 */
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmx_instruction_info, &gva))
+			vmx_instruction_info, false, &gva))
 		return 1;
 	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
 				sizeof(operand), &e)) {
@@ -7282,6 +7262,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
+	[EXIT_REASON_MONITOR_TRAP_FLAG]       = handle_monitor_trap,
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
@@ -7542,6 +7523,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return true;
 	case EXIT_REASON_MWAIT_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
+	case EXIT_REASON_MONITOR_TRAP_FLAG:
+		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
 	case EXIT_REASON_MONITOR_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
 	case EXIT_REASON_PAUSE_INSTRUCTION:
@@ -8632,22 +8615,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	u64 ipat = 0;
 
 	/* For VT-d and EPT combination
-	 * 1. MMIO: always map as UC
+	 * 1. MMIO: guest may want to apply WC, trust it.
 	 * 2. EPT with VT-d:
 	 *   a. VT-d without snooping control feature: can't guarantee the
-	 *	result, try to trust guest.
+	 *	result, try to trust guest.  So the same as item 1.
 	 *   b. VT-d with snooping control feature: snooping control feature of
 	 *	VT-d engine can guarantee the cache correctness. Just set it
 	 *	to WB to keep consistent with host. So the same as item 3.
 	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
 	 *    consistent with host MTRR
 	 */
-	if (is_mmio) {
-		cache = MTRR_TYPE_UNCACHABLE;
-		goto exit;
-	}
-
-	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+	if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
 		ipat = VMX_EPT_IPAT_BIT;
 		cache = MTRR_TYPE_WRBACK;
 		goto exit;
@@ -8655,7 +8633,10 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 
 	if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
 		ipat = VMX_EPT_IPAT_BIT;
-		cache = MTRR_TYPE_UNCACHABLE;
+		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+			cache = MTRR_TYPE_WRBACK;
+		else
+			cache = MTRR_TYPE_UNCACHABLE;
 		goto exit;
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bbaf44e8f0d3..c7b6aed998e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@
 #include "cpuid.h"
 #include "assigned-dev.h"
 #include "pmu.h"
+#include "hyperv.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -221,11 +222,9 @@ static void shared_msr_update(unsigned slot, u32 msr)
 void kvm_define_shared_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_NR_SHARED_MSRS);
+	shared_msrs_global.msrs[slot] = msr;
 	if (slot >= shared_msrs_global.nr)
 		shared_msrs_global.nr = slot + 1;
-	shared_msrs_global.msrs[slot] = msr;
-	/* we need ensured the shared_msr_global have been updated */
-	smp_wmb();
 }
 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
 
@@ -526,7 +525,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
 		if (is_present_gpte(pdpte[i]) &&
-		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
+		    (pdpte[i] &
+		     vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}
@@ -949,6 +949,8 @@ static u32 emulated_msrs[] = {
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
+	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
@@ -1217,11 +1219,6 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
 		 __func__, base_khz, scaled_khz, shift, *pmultiplier);
 }
 
-static inline u64 get_kernel_ns(void)
-{
-	return ktime_get_boot_ns();
-}
-
 #ifdef CONFIG_X86_64
 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
 #endif
@@ -1869,123 +1866,6 @@ out:
 	return r;
 }
 
-static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
-{
-	return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
-}
-
-static bool kvm_hv_msr_partition_wide(u32 msr)
-{
-	bool r = false;
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-	case HV_X64_MSR_HYPERCALL:
-	case HV_X64_MSR_REFERENCE_TSC:
-	case HV_X64_MSR_TIME_REF_COUNT:
-		r = true;
-		break;
-	}
-
-	return r;
-}
-
-static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	struct kvm *kvm = vcpu->kvm;
-
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-		kvm->arch.hv_guest_os_id = data;
-		/* setting guest os id to zero disables hypercall page */
-		if (!kvm->arch.hv_guest_os_id)
-			kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
-		break;
-	case HV_X64_MSR_HYPERCALL: {
-		u64 gfn;
-		unsigned long addr;
-		u8 instructions[4];
-
-		/* if guest os id is not set hypercall should remain disabled */
-		if (!kvm->arch.hv_guest_os_id)
-			break;
-		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
-			kvm->arch.hv_hypercall = data;
-			break;
-		}
-		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
-		addr = gfn_to_hva(kvm, gfn);
-		if (kvm_is_error_hva(addr))
-			return 1;
-		kvm_x86_ops->patch_hypercall(vcpu, instructions);
-		((unsigned char *)instructions)[3] = 0xc3; /* ret */
-		if (__copy_to_user((void __user *)addr, instructions, 4))
-			return 1;
-		kvm->arch.hv_hypercall = data;
-		mark_page_dirty(kvm, gfn);
-		break;
-	}
-	case HV_X64_MSR_REFERENCE_TSC: {
-		u64 gfn;
-		HV_REFERENCE_TSC_PAGE tsc_ref;
-		memset(&tsc_ref, 0, sizeof(tsc_ref));
-		kvm->arch.hv_tsc_page = data;
-		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
-			break;
-		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-		if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
-			&tsc_ref, sizeof(tsc_ref)))
-			return 1;
-		mark_page_dirty(kvm, gfn);
-		break;
-	}
-	default:
-		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
-			    "data 0x%llx\n", msr, data);
-		return 1;
-	}
-	return 0;
-}
-
-static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	switch (msr) {
-	case HV_X64_MSR_APIC_ASSIST_PAGE: {
-		u64 gfn;
-		unsigned long addr;
-
-		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
-			vcpu->arch.hv_vapic = data;
-			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
-				return 1;
-			break;
-		}
-		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
-		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
-		if (kvm_is_error_hva(addr))
-			return 1;
-		if (__clear_user((void __user *)addr, PAGE_SIZE))
-			return 1;
-		vcpu->arch.hv_vapic = data;
-		kvm_vcpu_mark_page_dirty(vcpu, gfn);
-		if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
-			return 1;
-		break;
-	}
-	case HV_X64_MSR_EOI:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
-	case HV_X64_MSR_ICR:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
-	case HV_X64_MSR_TPR:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
-	default:
-		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
-			    "data 0x%llx\n", msr, data);
-		return 1;
-	}
-
-	return 0;
-}
-
 static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
@@ -2224,15 +2104,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 */
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-		if (kvm_hv_msr_partition_wide(msr)) {
-			int r;
-			mutex_lock(&vcpu->kvm->lock);
-			r = set_msr_hyperv_pw(vcpu, msr, data);
-			mutex_unlock(&vcpu->kvm->lock);
-			return r;
-		} else
-			return set_msr_hyperv(vcpu, msr, data);
-		break;
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_set_msr_common(vcpu, msr, data,
+					     msr_info->host_initiated);
 	case MSR_IA32_BBL_CR_CTL3:
 		/* Drop writes to this legacy MSR -- see rdmsr
 		 * counterpart for further detail.
@@ -2315,68 +2190,6 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
-static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-	u64 data = 0;
-	struct kvm *kvm = vcpu->kvm;
-
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-		data = kvm->arch.hv_guest_os_id;
-		break;
-	case HV_X64_MSR_HYPERCALL:
-		data = kvm->arch.hv_hypercall;
-		break;
-	case HV_X64_MSR_TIME_REF_COUNT: {
-		data =
-		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
-		break;
-	}
-	case HV_X64_MSR_REFERENCE_TSC:
-		data = kvm->arch.hv_tsc_page;
-		break;
-	default:
-		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
-		return 1;
-	}
-
-	*pdata = data;
-	return 0;
-}
-
-static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-	u64 data = 0;
-
-	switch (msr) {
-	case HV_X64_MSR_VP_INDEX: {
-		int r;
-		struct kvm_vcpu *v;
-		kvm_for_each_vcpu(r, v, vcpu->kvm) {
-			if (v == vcpu) {
-				data = r;
-				break;
-			}
-		}
-		break;
-	}
-	case HV_X64_MSR_EOI:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
-	case HV_X64_MSR_ICR:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
-	case HV_X64_MSR_TPR:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
-	case HV_X64_MSR_APIC_ASSIST_PAGE:
-		data = vcpu->arch.hv_vapic;
-		break;
-	default:
-		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
-		return 1;
-	}
-	*pdata = data;
-	return 0;
-}
-
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	switch (msr_info->index) {
@@ -2493,14 +2306,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = 0x20000000;
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-		if (kvm_hv_msr_partition_wide(msr_info->index)) {
-			int r;
-			mutex_lock(&vcpu->kvm->lock);
-			r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data);
-			mutex_unlock(&vcpu->kvm->lock);
-			return r;
-		} else
-			return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_get_msr_common(vcpu,
+					     msr_info->index, &msr_info->data);
 		break;
 	case MSR_IA32_BBL_CR_CTL3:
 		/* This legacy MSR exists but isn't fully documented in current
@@ -2651,6 +2460,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_TSC_DEADLINE_TIMER:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_DISABLE_QUIRKS:
+	case KVM_CAP_SET_BOOT_CPU_ID:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3157,8 +2967,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 			cpuid_count(XSTATE_CPUID, index,
 				    &size, &offset, &ecx, &edx);
 			memcpy(dest, src + offset, size);
-		} else
-			WARN_ON_ONCE(1);
+		}
 
 		valid -= feature;
 	}
@@ -3818,30 +3627,25 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = kvm_ioapic_init(kvm);
 			if (r) {
 				mutex_lock(&kvm->slots_lock);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_master);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_slave);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_eclr);
+				kvm_destroy_pic(vpic);
 				mutex_unlock(&kvm->slots_lock);
-				kfree(vpic);
 				goto create_irqchip_unlock;
 			}
 		} else
 			goto create_irqchip_unlock;
-		smp_wmb();
-		kvm->arch.vpic = vpic;
-		smp_wmb();
 		r = kvm_setup_default_irq_routing(kvm);
 		if (r) {
 			mutex_lock(&kvm->slots_lock);
 			mutex_lock(&kvm->irq_lock);
 			kvm_ioapic_destroy(kvm);
-			kvm_destroy_pic(kvm);
+			kvm_destroy_pic(vpic);
 			mutex_unlock(&kvm->irq_lock);
 			mutex_unlock(&kvm->slots_lock);
+			goto create_irqchip_unlock;
 		}
+		/* Write kvm->irq_routing before kvm->arch.vpic.  */
+		smp_wmb();
+		kvm->arch.vpic = vpic;
 	create_irqchip_unlock:
 		mutex_unlock(&kvm->lock);
 		break;
@@ -3968,6 +3772,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_reinject(kvm, &control);
 		break;
 	}
+	case KVM_SET_BOOT_CPU_ID:
+		r = 0;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus) != 0)
+			r = -EBUSY;
+		else
+			kvm->arch.bsp_vcpu_id = arg;
+		mutex_unlock(&kvm->lock);
+		break;
 	case KVM_XEN_HVM_CONFIG: {
 		r = -EFAULT;
 		if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
@@ -5883,66 +5696,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
-int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
-{
-	u64 param, ingpa, outgpa, ret;
-	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
-	bool fast, longmode;
-
-	/*
-	 * hypercall generates UD from non zero cpl and real mode
-	 * per HYPER-V spec
-	 */
-	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 0;
-	}
-
-	longmode = is_64_bit_mode(vcpu);
-
-	if (!longmode) {
-		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
-		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
-		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
-	}
-#ifdef CONFIG_X86_64
-	else {
-		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
-		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
-		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
-	}
-#endif
-
-	code = param & 0xffff;
-	fast = (param >> 16) & 0x1;
-	rep_cnt = (param >> 32) & 0xfff;
-	rep_idx = (param >> 48) & 0xfff;
-
-	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
-
-	switch (code) {
-	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
-		kvm_vcpu_on_spin(vcpu);
-		break;
-	default:
-		res = HV_STATUS_INVALID_HYPERCALL_CODE;
-		break;
-	}
-
-	ret = res | (((u64)rep_done & 0xfff) << 32);
-	if (longmode) {
-		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
-	} else {
-		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
-		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
-	}
-
-	return 1;
-}
-
 /*
  * kvm_pv_kick_cpu_op:  Kick a vcpu.
  *
@@ -6514,6 +6267,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
 			kvm_vcpu_reload_apic_access_page(vcpu);
+		if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
+			r = 0;
+			goto out;
+		}
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7315,11 +7074,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu = kvm_x86_ops->vcpu_create(kvm, id);
 
-	/*
-	 * Activate fpu unconditionally in case the guest needs eager FPU.  It will be
-	 * deactivated soon if it doesn't.
-	 */
-	kvm_x86_ops->fpu_activate(vcpu);
 	return vcpu;
 }
 
@@ -7541,6 +7295,17 @@ void kvm_arch_check_processor_compat(void *rtn)
 	kvm_x86_ops->check_processor_compatibility(rtn);
 }
 
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
+
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
+}
+
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 {
 	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
@@ -8218,6 +7983,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+void kvm_arch_start_assignment(struct kvm *kvm)
+{
+	atomic_inc(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
+
+void kvm_arch_end_assignment(struct kvm *kvm)
+{
+	atomic_dec(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+
+bool kvm_arch_has_assigned_device(struct kvm *kvm)
+{
+	return atomic_read(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+
 void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
 {
 	atomic_inc(&kvm->arch.noncoherent_dma_count);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index edc8cdcd786b..2f822cd886c2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -147,6 +147,16 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
 	return kvm_register_write(vcpu, reg, val);
 }
 
+static inline u64 get_kernel_ns(void)
+{
+	return ktime_get_boot_ns();
+}
+
+static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
+{
+	return !(kvm->arch.disabled_quirks & quirk);
+}
+
 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 14d15bf1a95b..5b478accd5fc 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += resource.h
diff --git a/arch/xtensa/include/asm/mm-arch-hooks.h b/arch/xtensa/include/asm/mm-arch-hooks.h
deleted file mode 100644
index d2e5cfd3dd02..000000000000
--- a/arch/xtensa/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_XTENSA_MM_ARCH_HOOKS_H
-#define _ASM_XTENSA_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_XTENSA_MM_ARCH_HOOKS_H */
author	Paolo Bonzini	2015-08-22 23:57:59 +0200
committer	Paolo Bonzini	2015-08-22 23:57:59 +0200
commit	e3dbc572fe11a5231568e106fa3dcedd1d1bec0f (patch)
tree	5e15ee35c77e75d51cb550cb3cb0f6ecafa7b508 /arch
parent	Merge tag 'kvm-arm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
parent	KVM: PPC: Book3S: correct width in XER handling (diff)
download	kernel-qcow2-linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.tar.gz kernel-qcow2-linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.tar.xz kernel-qcow2-linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.zip