summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/exceptions-64s.S
diff options
context:
space:
mode:
authorMahesh Salgaonkar2018-09-11 16:27:00 +0200
committerMichael Ellerman2018-09-19 13:59:22 +0200
commita43c1590426c44a5c6bbaf51b70a36a5c6d86914 (patch)
treedf756695fbf54f7aca9bf2d9f54160a13ffebe29 /arch/powerpc/kernel/exceptions-64s.S
parentpowerpc/pseries: Define MCE error event section. (diff)
downloadkernel-qcow2-linux-a43c1590426c44a5c6bbaf51b70a36a5c6d86914.tar.gz
kernel-qcow2-linux-a43c1590426c44a5c6bbaf51b70a36a5c6d86914.tar.xz
kernel-qcow2-linux-a43c1590426c44a5c6bbaf51b70a36a5c6d86914.zip
powerpc/pseries: Flush SLB contents on SLB MCE errors.
On pseries, as of today system crashes if we get a machine check exceptions due to SLB errors. These are soft errors and can be fixed by flushing the SLBs so the kernel can continue to function instead of system crash. We do this in real mode before turning on MMU. Otherwise we would run into nested machine checks. This patch now fetches the rtas error log in real mode and flushes the SLBs on SLB/ERAT errors. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michal Suchanek <msuchanek@suse.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel/exceptions-64s.S')
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S129
1 files changed, 129 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ea04dfb8c092..b36e11d73702 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -331,6 +331,9 @@ TRAMP_REAL_BEGIN(machine_check_pSeries)
machine_check_fwnmi:
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+ b machine_check_pSeries_early
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
/*
@@ -342,6 +345,103 @@ machine_check_pSeries_0:
TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
+TRAMP_REAL_BEGIN(machine_check_pSeries_early)
+BEGIN_FTR_SECTION
+ EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+ mr r10,r1 /* Save r1 */
+ lhz r11,PACA_IN_MCE(r13)
+ cmpwi r11,0 /* Are we in nested machine check */
+ bne 0f /* Yes, we are. */
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ addi r11,r11,1 /* increment paca->in_mce */
+ sth r11,PACA_IN_MCE(r13)
+ /* Limit nested MCE to level 4 to avoid stack overflow */
+ cmpwi r11,MAX_MCE_DEPTH
+ bgt 1f /* Check if we hit limit of 4 */
+ mfspr r11,SPRN_SRR0 /* Save SRR0 */
+ mfspr r12,SPRN_SRR1 /* Save SRR1 */
+ EXCEPTION_PROLOG_COMMON_1()
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+ EXCEPTION_PROLOG_COMMON_3(0x200)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
+ ld r12,_MSR(r1)
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne 2f /* continue in V mode if we are. */
+
+ /*
+ * At this point we are not sure about what context we come from.
+ * We may be in the middle of switching stack. r1 may not be valid.
+ * Hence stay on emergency stack, call machine_check_exception and
+ * return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ beq 1f
+
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+ beq 1f /* if !handled then panic */
+
+ /* Stay on emergency stack and return from interrupt. */
+ LOAD_HANDLER(r10,mce_return)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ RFI_TO_KERNEL
+ b .
+
+1: LOAD_HANDLER(r10,unrecover_mce)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ /*
+ * We are going down. But there are chances that we might get hit by
+ * another MCE during panic path and we may run into unstable state
+ * with no way out. Hence, turn ME bit off while going down, so that
+ * when another MCE is hit during panic path, hypervisor will
+ * power cycle the lpar, instead of getting into MCE loop.
+ */
+ li r3,MSR_ME
+ andc r10,r10,r3 /* Turn off MSR_ME */
+ mtspr SPRN_SRR1,r10
+ RFI_TO_KERNEL
+ b .
+
+ /* Move original SRR0 and SRR1 into the respective regs */
+2: ld r9,_MSR(r1)
+ mtspr SPRN_SRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_SRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ /* Decrement paca->in_mce. */
+ lhz r12,PACA_IN_MCE(r13)
+ subi r12,r12,1
+ sth r12,PACA_IN_MCE(r13)
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_0(PACA_EXMC)
+ b machine_check_pSeries_0
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
@@ -535,6 +635,35 @@ EXC_COMMON_BEGIN(unrecover_mce)
bl unrecoverable_exception
b 1b
+EXC_COMMON_BEGIN(mce_return)
+ /* Invoke machine_check_exception to print MCE event and return. */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ ld r9,_MSR(r1)
+ mtspr SPRN_SRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_SRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ /* Decrement paca->in_mce. */
+ lhz r12,PACA_IN_MCE(r13)
+ subi r12,r12,1
+ sth r12,PACA_IN_MCE(r13)
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+ RFI_TO_KERNEL
+ b .
EXC_REAL(data_access, 0x300, 0x80)
EXC_VIRT(data_access, 0x4300, 0x80, 0x300)