diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/dev-mcelog.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 28 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 29 |
4 files changed, 67 insertions, 18 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c index 7f85b76f43bc..213e8c2ca702 100644 --- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c @@ -243,7 +243,7 @@ out: return err ? err : buf - ubuf; } -static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) +static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) { poll_wait(file, &mce_chrdev_wait, wait); if (READ_ONCE(mcelog.next)) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 4ca632a06e0b..5bbd06f38ff6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -59,6 +59,7 @@ static struct severity { #define MCGMASK(x, y) .mcgmask = x, .mcgres = y #define MASK(x, y) .mask = x, .result = y #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) +#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR) #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) @@ -101,6 +102,22 @@ static struct severity { NOSER, BITCLR(MCI_STATUS_UC) ), + /* + * known AO MCACODs reported via MCE or CMC: + * + * SRAO could be signaled either via a machine check exception or + * CMCI with the corresponding bit S 1 or 0. So we don't need to + * check bit S for SRAO. + */ + MCESEV( + AO, "Action optional: memory scrubbing error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB) + ), + MCESEV( + AO, "Action optional: last level cache writeback error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) + ), + /* ignore OVER for UCNA */ MCESEV( UCNA, "Uncorrected no action required", @@ -149,15 +166,6 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) ), - /* known AO MCACODs: */ - MCESEV( - AO, "Action optional: memory scrubbing error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) - ), - MCESEV( - AO, "Action optional: last level cache writeback error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) - ), MCESEV( SOME, "Action optional: unknown MCACOD", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1d616d08eee..ba1f9555fbc5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -503,10 +503,8 @@ static int mce_usable_address(struct mce *m) bool mce_is_memory_error(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD) { - /* ErrCodeExt[20:16] */ - u8 xec = (m->status >> 16) & 0x1f; + return amd_mce_is_memory_error(m); - return (xec == 0x0 || xec == 0x8); } else if (m->cpuvendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes @@ -530,6 +528,17 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool mce_is_correctable(struct mce *m) +{ + if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) + return false; + + if (m->status & MCI_STATUS_UC) + return false; + + return true; +} + static bool cec_add_mce(struct mce *m) { if (!m) @@ -537,7 +546,7 @@ static bool cec_add_mce(struct mce *m) /* We eat only correctable DRAM errors with usable addresses. */ if (mce_is_memory_error(m) && - !(m->status & MCI_STATUS_UC) && + mce_is_correctable(m) && mce_usable_address(m)) if (!cec_add_elem(m->addr >> PAGE_SHIFT)) return true; @@ -582,7 +591,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; - memory_failure(pfn, MCE_VECTOR, 0); + memory_failure(pfn, 0); } return NOTIFY_OK; @@ -1046,7 +1055,7 @@ static int do_memory_failure(struct mce *m) pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr); if (!(m->mcgstatus & MCG_STATUS_RIPV)) flags |= MF_MUST_KILL; - ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags); + ret = memory_failure(m->addr >> PAGE_SHIFT, flags); if (ret) pr_err("Memory error not recovered"); return ret; @@ -1325,7 +1334,7 @@ out_ist: EXPORT_SYMBOL_GPL(do_machine_check); #ifndef CONFIG_MEMORY_FAILURE -int memory_failure(unsigned long pfn, int vector, int flags) +int memory_failure(unsigned long pfn, int flags) { /* mce_severity() should not hand us an ACTION_REQUIRED error */ BUG_ON(flags & MF_ACTION_REQUIRED); @@ -1785,6 +1794,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +{ + machine_check_vector(regs, error_code); +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 486f640b02ef..0f32ad242324 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -110,6 +110,20 @@ const char *smca_get_long_name(enum smca_bank_types t) } EXPORT_SYMBOL_GPL(smca_get_long_name); +static enum smca_bank_types smca_get_bank_type(struct mce *m) +{ + struct smca_bank *b; + + if (m->bank >= N_SMCA_BANK_TYPES) + return N_SMCA_BANK_TYPES; + + b = &smca_banks[m->bank]; + if (!b->hwid) + return N_SMCA_BANK_TYPES; + + return b->hwid->bank_type; +} + static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ @@ -407,7 +421,9 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) (deferred_error_int_vector != amd_deferred_error_interrupt)) deferred_error_int_vector = amd_deferred_error_interrupt; - low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + if (!mce_flags.smca) + low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + wrmsr(MSR_CU_DEF_ERR, low, high); } @@ -738,6 +754,17 @@ out_err: } EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); +bool amd_mce_is_memory_error(struct mce *m) +{ + /* ErrCodeExt[20:16] */ + u8 xec = (m->status >> 16) & 0x1f; + + if (mce_flags.smca) + return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0; + + return m->bank == 4 && xec == 0x8; +} + static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) { struct mce m; |