From d9d73fcc878469d209d7a7030726f20dd10841a7 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Sat, 30 Apr 2016 14:33:55 +0200 Subject: x86/mce: Detect and use SMCA-specific msr_ops Replace all calls to MCx_IA32_{CTL,ADDR,MISC,STATUS} with the appropriate msr_ops. Use SMCA-specific msr_ops when on an SMCA-enabled processor. Carved out from a patch by Aravind Gopalakrishnan . Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1462019637-16474-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce_amd.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9d656fd436ef..c594680c36f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -333,7 +333,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, /* Fall back to method we used for older processors: */ switch (block) { case 0: - addr = MSR_IA32_MCx_MISC(bank); + addr = msr_ops.misc(bank); break; case 1: offset = ((low & MASK_BLKPTR_LO) >> 21); @@ -435,7 +435,7 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) struct mce m; u64 status; - rdmsrl(MSR_IA32_MCx_STATUS(bank), status); + rdmsrl(msr_ops.status(bank), status); if (!(status & MCI_STATUS_VAL)) return; @@ -448,10 +448,10 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) m.misc = misc; if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr); + rdmsrl(msr_ops.addr(bank), m.addr); mce_log(&m); - wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); + wrmsrl(msr_ops.status(bank), 0); } static inline void __smp_deferred_error_interrupt(void) @@ -483,7 +483,7 @@ static void amd_deferred_error_interrupt(void) unsigned int bank; for (bank = 0; bank < mca_cfg.banks; ++bank) { - rdmsrl(MSR_IA32_MCx_STATUS(bank), status); + rdmsrl(msr_ops.status(bank), status); if (!(status & MCI_STATUS_VAL) || !(status & MCI_STATUS_DEFERRED)) -- cgit v1.2.3-55-g7522 From 34102009580a047c02b21f089f7fc7f65e605887 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 11 May 2016 14:58:23 +0200 Subject: x86/mce/AMD: Log Deferred Errors using SMCA MCA_DE{STAT,ADDR} registers Scalable MCA provides new registers for all banks for logging deferred errors: MCA_DESTAT and MCA_DEADDR. Deferred errors are always logged to these registers. Update the AMD deferred error handler to use these registers, if available. Signed-off-by: Yazen Ghannam [ Sanity-check __log_error() args, massage a bit. ] Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1462971509-3856-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 4 ++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 32 ++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce_amd.c') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 53ab69704771..8bf766ef0e18 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -110,6 +110,8 @@ #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 +#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) @@ -117,6 +119,8 @@ #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) /* diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index c594680c36f2..d1b1e62f7cb9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -430,12 +430,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } -static void __log_error(unsigned int bank, bool threshold_err, u64 misc) +static void +__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) { + u32 msr_status = msr_ops.status(bank); + u32 msr_addr = msr_ops.addr(bank); struct mce m; u64 status; - rdmsrl(msr_ops.status(bank), status); + WARN_ON_ONCE(deferred_err && threshold_err); + + if (deferred_err && mce_flags.smca) { + msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank); + msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank); + } + + rdmsrl(msr_status, status); + if (!(status & MCI_STATUS_VAL)) return; @@ -448,10 +459,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) m.misc = misc; if (m.status & MCI_STATUS_ADDRV) - rdmsrl(msr_ops.addr(bank), m.addr); + rdmsrl(msr_addr, m.addr); mce_log(&m); - wrmsrl(msr_ops.status(bank), 0); + + wrmsrl(msr_status, 0); } static inline void __smp_deferred_error_interrupt(void) @@ -479,17 +491,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void) /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { - u64 status; unsigned int bank; + u32 msr_status; + u64 status; for (bank = 0; bank < mca_cfg.banks; ++bank) { - rdmsrl(msr_ops.status(bank), status); + msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank) + : msr_ops.status(bank); + + rdmsrl(msr_status, status); if (!(status & MCI_STATUS_VAL) || !(status & MCI_STATUS_DEFERRED)) continue; - __log_error(bank, false, 0); + __log_error(bank, true, false, 0); break; } } @@ -544,7 +560,7 @@ static void amd_threshold_interrupt(void) return; log: - __log_error(bank, true, ((u64)high << 32) | low); + __log_error(bank, false, true, ((u64)high << 32) | low); } /* -- cgit v1.2.3-55-g7522 From 32544f060326bffa60dade49ce4595652df4d3ab Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 11 May 2016 14:58:24 +0200 Subject: x86/mce/AMD: Disable LogDeferredInMcaStat for SMCA systems Disable Deferred Error logging in MCA_{STATUS,ADDR} additionally for SMCA systems as this information will retrieved from MCA_DE{STAT,ADDR} on those systems. Signed-off-by: Yazen Ghannam [ Simplify, drop SMCA_MCAX_EN_OFF define too. ] Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1462971509-3856-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 38 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce_amd.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index d1b1e62f7cb9..527ddae3017b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -54,14 +54,6 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -/* - * OS is required to set the MCAX bit to acknowledge that it is now using the - * new MSR ranges and new registers under each bank. It also means that the OS - * will configure deferred errors in the new MCx_CONFIG register. If the bit is - * not set, uncorrectable errors will cause a system panic. - */ -#define SMCA_MCAX_EN_OFF 0x1 - static const char * const th_names[] = { "load_store", "insn_fetch", @@ -374,7 +366,35 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { - smca_high |= SMCA_MCAX_EN_OFF; + /* + * OS is required to set the MCAX bit to acknowledge + * that it is now using the new MSR ranges and new + * registers under each bank. It also means that the OS + * will configure deferred errors in the new MCx_CONFIG + * register. If the bit is not set, uncorrectable errors + * will cause a system panic. + * + * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of + * the MSR.) + */ + smca_high |= BIT(0); + + /* + * SMCA logs Deferred Error information in + * MCA_DE{STAT,ADDR} registers with the option of + * additionally logging to MCA_{STATUS,ADDR} if + * MCA_CONFIG[LogDeferredInMcaStat] is set. + * + * This bit is usually set by BIOS to retain the old + * behavior for OSes that don't use the new registers. + * Linux supports the new registers so let's disable + * that additional logging here. + * + * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in + * the high portion of the MSR). + */ + smca_high &= ~BIT(2); + wrmsr(smca_addr, smca_low, smca_high); } -- cgit v1.2.3-55-g7522 From e128b4f4833cc1e0ce46dfb978763b260f166188 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 11 May 2016 14:58:25 +0200 Subject: x86/mce/AMD: Save an indentation level in prepare_threshold_block() Do the !SMCA work first and then save us an indentation level for the SMCA code. No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/1462971509-3856-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 78 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce_amd.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 527ddae3017b..10b0661651e0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -343,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, int offset, u32 misc_high) { unsigned int cpu = smp_processor_id(); + u32 smca_low, smca_high, smca_addr; struct threshold_block b; int new; @@ -361,52 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, b.interrupt_enable = 1; - if (mce_flags.smca) { - u32 smca_low, smca_high; - u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); - - if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { - /* - * OS is required to set the MCAX bit to acknowledge - * that it is now using the new MSR ranges and new - * registers under each bank. It also means that the OS - * will configure deferred errors in the new MCx_CONFIG - * register. If the bit is not set, uncorrectable errors - * will cause a system panic. - * - * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of - * the MSR.) - */ - smca_high |= BIT(0); + if (!mce_flags.smca) { + new = (misc_high & MASK_LVTOFF_HI) >> 20; + goto set_offset; + } - /* - * SMCA logs Deferred Error information in - * MCA_DE{STAT,ADDR} registers with the option of - * additionally logging to MCA_{STATUS,ADDR} if - * MCA_CONFIG[LogDeferredInMcaStat] is set. - * - * This bit is usually set by BIOS to retain the old - * behavior for OSes that don't use the new registers. - * Linux supports the new registers so let's disable - * that additional logging here. - * - * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in - * the high portion of the MSR). - */ - smca_high &= ~BIT(2); + smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); - wrmsr(smca_addr, smca_low, smca_high); - } + if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { + /* + * OS is required to set the MCAX bit to acknowledge that it is + * now using the new MSR ranges and new registers under each + * bank. It also means that the OS will configure deferred + * errors in the new MCx_CONFIG register. If the bit is not set, + * uncorrectable errors will cause a system panic. + * + * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.) + */ + smca_high |= BIT(0); - /* Gather LVT offset for thresholding: */ - if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) - goto out; + /* + * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR} + * registers with the option of additionally logging to + * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set. + * + * This bit is usually set by BIOS to retain the old behavior + * for OSes that don't use the new registers. Linux supports the + * new registers so let's disable that additional logging here. + * + * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high + * portion of the MSR). + */ + smca_high &= ~BIT(2); - new = (smca_low & SMCA_THR_LVT_OFF) >> 12; - } else { - new = (misc_high & MASK_LVTOFF_HI) >> 20; + wrmsr(smca_addr, smca_low, smca_high); } + /* Gather LVT offset for thresholding: */ + if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) + goto out; + + new = (smca_low & SMCA_THR_LVT_OFF) >> 12; + +set_offset: offset = setup_APIC_mce_threshold(offset, new); if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) -- cgit v1.2.3-55-g7522