diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-13 20:21:00 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-13 20:21:00 +0300 |
commit | a9429089d3e822d45be01a9635f0685174508fd3 (patch) | |
tree | d5483a635cdc4d74967c1ddb383f1e283bf9849c /arch/x86/kernel/cpu/mce/core.c | |
parent | 076f14be7fc942e112c94c841baec44124275cd0 (diff) | |
parent | 7ccddc4613db446dc3cbb69a3763ba60ec651d13 (diff) | |
download | linux-a9429089d3e822d45be01a9635f0685174508fd3.tar.xz |
Merge tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS updates from Thomas Gleixner:
"RAS updates from Borislav Petkov:
- Unmap a whole guest page if an MCE is encountered in it to avoid
follow-on MCEs leading to the guest crashing, by Tony Luck.
This change collided with the entry changes and the merge
resolution would have been rather unpleasant. To avoid that the
entry branch was merged in before applying this. The resulting code
did not change over the rebase.
- AMD MCE error thresholding machinery cleanup and hotplug
sanitization, by Thomas Gleixner.
- Change the MCE notifiers to denote whether they have handled the
error and not break the chain early by returning NOTIFY_STOP, thus
giving the opportunity for the later handlers in the chain to see
it. By Tony Luck.
- Add AMD family 0x17, models 0x60-6f support, by Alexander Monakov.
- Last but not least, the usual round of fixes and improvements"
* tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
x86/mce/dev-mcelog: Fix -Wstringop-truncation warning about strncpy()
x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned
EDAC/amd64: Add AMD family 17h model 60h PCI IDs
hwmon: (k10temp) Add AMD family 17h model 60h PCI match
x86/amd_nb: Add AMD family 17h model 60h PCI IDs
x86/mcelog: Add compat_ioctl for 32-bit mcelog support
x86/mce: Drop bogus comment about mce.kflags
x86/mce: Fixup exception only for the correct MCEs
EDAC: Drop the EDAC report status checks
x86/mce: Add mce=print_all option
x86/mce: Change default MCE logger to check mce->kflags
x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
x86/mce: Add a struct mce.kflags field
x86/mce: Convert the CEC to use the MCE notifier
x86/mce: Rename "first" function as "early"
x86/mce/amd, edac: Remove report_gart_errors
x86/mce/amd: Make threshold bank setting hotplug robust
x86/mce/amd: Cleanup threshold device remove path
x86/mce/amd: Straighten CPU hotplug path
x86/mce/amd: Sanitize thresholding device creation hotplug path
...
Diffstat (limited to 'arch/x86/kernel/cpu/mce/core.c')
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 102 |
1 files changed, 55 insertions, 47 deletions
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b9cb381b4019..ce9120c4f740 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -160,29 +160,17 @@ void mce_log(struct mce *m) } EXPORT_SYMBOL_GPL(mce_log); -/* - * We run the default notifier if we have only the UC, the first and the - * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS - * notifiers registered on the chain. - */ -#define NUM_DEFAULT_NOTIFIERS 3 -static atomic_t num_notifiers; - void mce_register_decode_chain(struct notifier_block *nb) { if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC)) return; - atomic_inc(&num_notifiers); - blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); } EXPORT_SYMBOL_GPL(mce_register_decode_chain); void mce_unregister_decode_chain(struct notifier_block *nb) { - atomic_dec(&num_notifiers); - blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb); } EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); @@ -265,6 +253,7 @@ static void __print_mce(struct mce *m) } pr_cont("\n"); + /* * Note this output is parsed by external tools and old fields * should not be changed. @@ -531,6 +520,14 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool whole_page(struct mce *m) +{ + if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV)) + return true; + + return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT; +} + bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) @@ -546,22 +543,7 @@ bool mce_is_correctable(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_correctable); -static bool cec_add_mce(struct mce *m) -{ - if (!m) - return false; - - /* We eat only correctable DRAM errors with usable addresses. */ - if (mce_is_memory_error(m) && - mce_is_correctable(m) && - mce_usable_address(m)) - if (!cec_add_elem(m->addr >> PAGE_SHIFT)) - return true; - - return false; -} - -static int mce_first_notifier(struct notifier_block *nb, unsigned long val, +static int mce_early_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; @@ -569,9 +551,6 @@ static int mce_first_notifier(struct notifier_block *nb, unsigned long val, if (!m) return NOTIFY_DONE; - if (cec_add_mce(m)) - return NOTIFY_STOP; - /* Emit the trace record: */ trace_mce_record(m); @@ -582,9 +561,9 @@ static int mce_first_notifier(struct notifier_block *nb, unsigned long val, return NOTIFY_DONE; } -static struct notifier_block first_nb = { - .notifier_call = mce_first_notifier, - .priority = MCE_PRIO_FIRST, +static struct notifier_block early_nb = { + .notifier_call = mce_early_notifier, + .priority = MCE_PRIO_EARLY, }; static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, @@ -601,8 +580,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, return NOTIFY_DONE; pfn = mce->addr >> PAGE_SHIFT; - if (!memory_failure(pfn, 0)) - set_mce_nospec(pfn); + if (!memory_failure(pfn, 0)) { + set_mce_nospec(pfn, whole_page(mce)); + mce->kflags |= MCE_HANDLED_UC; + } return NOTIFY_OK; } @@ -620,10 +601,8 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val, if (!m) return NOTIFY_DONE; - if (atomic_read(&num_notifiers) > NUM_DEFAULT_NOTIFIERS) - return NOTIFY_DONE; - - __print_mce(m); + if (mca_cfg.print_all || !m->kflags) + __print_mce(m); return NOTIFY_DONE; } @@ -1202,11 +1181,12 @@ static void kill_me_maybe(struct callback_head *cb) int flags = MF_ACTION_REQUIRED; pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); - if (!(p->mce_status & MCG_STATUS_RIPV)) + + if (!p->mce_ripv) flags |= MF_MUST_KILL; if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) { - set_mce_nospec(p->mce_addr >> PAGE_SHIFT); + set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); return; } @@ -1360,14 +1340,26 @@ void noinstr do_machine_check(struct pt_regs *regs) BUG_ON(!on_thread_stack() || !user_mode(regs)); current->mce_addr = m.addr; - current->mce_status = m.mcgstatus; + current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV); + current->mce_whole_page = whole_page(&m); current->mce_kill_me.func = kill_me_maybe; if (kill_it) current->mce_kill_me.func = kill_me_now; task_work_add(current, ¤t->mce_kill_me, true); } else { - if (!fixup_exception(regs, X86_TRAP_MC, 0, 0)) - mce_panic("Failed kernel mode recovery", &m, msg); + /* + * Handle an MCE which has happened in kernel space but from + * which the kernel can recover: ex_has_fault_handler() has + * already verified that the rIP at which the error happened is + * a rIP from which the kernel can recover (by jumping to + * recovery code specified in _ASM_EXTABLE_FAULT()) and the + * corresponding exception handler which would do that is the + * proper one. + */ + if (m.kflags & MCE_IN_KERNEL_RECOV) { + if (!fixup_exception(regs, X86_TRAP_MC, 0, 0)) + mce_panic("Failed kernel mode recovery", &m, msg); + } } } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1758,6 +1750,7 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); + mce_flags.amd_threshold = 1; if (mce_flags.smca) { msr_ops.ctl = smca_ctl_reg; @@ -2055,6 +2048,7 @@ void mce_disable_bank(int bank) * mce=no_cmci Disables CMCI * mce=no_lmce Disables LMCE * mce=dont_log_ce Clears corrected events silently, no log created for CEs. + * mce=print_all Print all machine check logs to console * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) * monarchtimeout is how long to wait for other CPUs on machine @@ -2083,6 +2077,8 @@ static int __init mcheck_enable(char *str) cfg->lmce_disabled = 1; else if (!strcmp(str, "dont_log_ce")) cfg->dont_log_ce = true; + else if (!strcmp(str, "print_all")) + cfg->print_all = true; else if (!strcmp(str, "ignore_ce")) cfg->ignore_ce = true; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) @@ -2105,7 +2101,7 @@ __setup("mce", mcheck_enable); int __init mcheck_init(void) { mcheck_intel_therm_init(); - mce_register_decode_chain(&first_nb); + mce_register_decode_chain(&early_nb); mce_register_decode_chain(&mce_uc_nb); mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); @@ -2349,6 +2345,7 @@ static ssize_t store_int_with_restart(struct device *s, static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant); static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout); static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce); +static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all); static struct dev_ext_attribute dev_attr_check_interval = { __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), @@ -2373,6 +2370,7 @@ static struct device_attribute *mce_device_attrs[] = { #endif &dev_attr_monarch_timeout.attr, &dev_attr_dont_log_ce.attr, + &dev_attr_print_all.attr, &dev_attr_ignore_ce.attr, &dev_attr_cmci_disabled.attr, NULL @@ -2545,6 +2543,13 @@ static __init void mce_init_banks(void) } } +/* + * When running on XEN, this initcall is ordered against the XEN mcelog + * initcall: + * + * device_initcall(xen_late_init_mcelog); + * device_initcall_sync(mcheck_init_device); + */ static __init int mcheck_init_device(void) { int err; @@ -2576,6 +2581,10 @@ static __init int mcheck_init_device(void) if (err) goto err_out_mem; + /* + * Invokes mce_cpu_online() on all CPUs which are online when + * the state is installed. + */ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online", mce_cpu_online, mce_cpu_pre_down); if (err < 0) @@ -2665,7 +2674,6 @@ static int __init mcheck_late_init(void) static_branch_inc(&mcsafe_key); mcheck_debugfs_init(); - cec_init(); /* * Flush out everything that has been logged during early boot, now that |