summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mce/core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-13 20:21:00 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-13 20:21:00 +0300
commita9429089d3e822d45be01a9635f0685174508fd3 (patch)
treed5483a635cdc4d74967c1ddb383f1e283bf9849c /arch/x86/kernel/cpu/mce/core.c
parent076f14be7fc942e112c94c841baec44124275cd0 (diff)
parent7ccddc4613db446dc3cbb69a3763ba60ec651d13 (diff)
downloadlinux-a9429089d3e822d45be01a9635f0685174508fd3.tar.xz
Merge tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS updates from Thomas Gleixner: "RAS updates from Borislav Petkov: - Unmap a whole guest page if an MCE is encountered in it to avoid follow-on MCEs leading to the guest crashing, by Tony Luck. This change collided with the entry changes and the merge resolution would have been rather unpleasant. To avoid that the entry branch was merged in before applying this. The resulting code did not change over the rebase. - AMD MCE error thresholding machinery cleanup and hotplug sanitization, by Thomas Gleixner. - Change the MCE notifiers to denote whether they have handled the error and not break the chain early by returning NOTIFY_STOP, thus giving the opportunity for the later handlers in the chain to see it. By Tony Luck. - Add AMD family 0x17, models 0x60-6f support, by Alexander Monakov. - Last but not least, the usual round of fixes and improvements" * tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/mce/dev-mcelog: Fix -Wstringop-truncation warning about strncpy() x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned EDAC/amd64: Add AMD family 17h model 60h PCI IDs hwmon: (k10temp) Add AMD family 17h model 60h PCI match x86/amd_nb: Add AMD family 17h model 60h PCI IDs x86/mcelog: Add compat_ioctl for 32-bit mcelog support x86/mce: Drop bogus comment about mce.kflags x86/mce: Fixup exception only for the correct MCEs EDAC: Drop the EDAC report status checks x86/mce: Add mce=print_all option x86/mce: Change default MCE logger to check mce->kflags x86/mce: Fix all mce notifiers to update the mce->kflags bitmask x86/mce: Add a struct mce.kflags field x86/mce: Convert the CEC to use the MCE notifier x86/mce: Rename "first" function as "early" x86/mce/amd, edac: Remove report_gart_errors x86/mce/amd: Make threshold bank setting hotplug robust x86/mce/amd: Cleanup threshold device remove path x86/mce/amd: Straighten CPU hotplug path x86/mce/amd: Sanitize thresholding device creation hotplug path ...
Diffstat (limited to 'arch/x86/kernel/cpu/mce/core.c')
-rw-r--r--arch/x86/kernel/cpu/mce/core.c102
1 files changed, 55 insertions, 47 deletions
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b9cb381b4019..ce9120c4f740 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -160,29 +160,17 @@ void mce_log(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_log);
-/*
- * We run the default notifier if we have only the UC, the first and the
- * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
- * notifiers registered on the chain.
- */
-#define NUM_DEFAULT_NOTIFIERS 3
-static atomic_t num_notifiers;
-
void mce_register_decode_chain(struct notifier_block *nb)
{
if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC))
return;
- atomic_inc(&num_notifiers);
-
blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
}
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
void mce_unregister_decode_chain(struct notifier_block *nb)
{
- atomic_dec(&num_notifiers);
-
blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
@@ -265,6 +253,7 @@ static void __print_mce(struct mce *m)
}
pr_cont("\n");
+
/*
* Note this output is parsed by external tools and old fields
* should not be changed.
@@ -531,6 +520,14 @@ bool mce_is_memory_error(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);
+static bool whole_page(struct mce *m)
+{
+ if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV))
+ return true;
+
+ return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT;
+}
+
bool mce_is_correctable(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
@@ -546,22 +543,7 @@ bool mce_is_correctable(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_correctable);
-static bool cec_add_mce(struct mce *m)
-{
- if (!m)
- return false;
-
- /* We eat only correctable DRAM errors with usable addresses. */
- if (mce_is_memory_error(m) &&
- mce_is_correctable(m) &&
- mce_usable_address(m))
- if (!cec_add_elem(m->addr >> PAGE_SHIFT))
- return true;
-
- return false;
-}
-
-static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
+static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *m = (struct mce *)data;
@@ -569,9 +551,6 @@ static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
if (!m)
return NOTIFY_DONE;
- if (cec_add_mce(m))
- return NOTIFY_STOP;
-
/* Emit the trace record: */
trace_mce_record(m);
@@ -582,9 +561,9 @@ static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
return NOTIFY_DONE;
}
-static struct notifier_block first_nb = {
- .notifier_call = mce_first_notifier,
- .priority = MCE_PRIO_FIRST,
+static struct notifier_block early_nb = {
+ .notifier_call = mce_early_notifier,
+ .priority = MCE_PRIO_EARLY,
};
static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
@@ -601,8 +580,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
return NOTIFY_DONE;
pfn = mce->addr >> PAGE_SHIFT;
- if (!memory_failure(pfn, 0))
- set_mce_nospec(pfn);
+ if (!memory_failure(pfn, 0)) {
+ set_mce_nospec(pfn, whole_page(mce));
+ mce->kflags |= MCE_HANDLED_UC;
+ }
return NOTIFY_OK;
}
@@ -620,10 +601,8 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
if (!m)
return NOTIFY_DONE;
- if (atomic_read(&num_notifiers) > NUM_DEFAULT_NOTIFIERS)
- return NOTIFY_DONE;
-
- __print_mce(m);
+ if (mca_cfg.print_all || !m->kflags)
+ __print_mce(m);
return NOTIFY_DONE;
}
@@ -1202,11 +1181,12 @@ static void kill_me_maybe(struct callback_head *cb)
int flags = MF_ACTION_REQUIRED;
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
- if (!(p->mce_status & MCG_STATUS_RIPV))
+
+ if (!p->mce_ripv)
flags |= MF_MUST_KILL;
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
- set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
+ set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
return;
}
@@ -1360,14 +1340,26 @@ void noinstr do_machine_check(struct pt_regs *regs)
BUG_ON(!on_thread_stack() || !user_mode(regs));
current->mce_addr = m.addr;
- current->mce_status = m.mcgstatus;
+ current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(&m);
current->mce_kill_me.func = kill_me_maybe;
if (kill_it)
current->mce_kill_me.func = kill_me_now;
task_work_add(current, &current->mce_kill_me, true);
} else {
- if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
- mce_panic("Failed kernel mode recovery", &m, msg);
+ /*
+ * Handle an MCE which has happened in kernel space but from
+ * which the kernel can recover: ex_has_fault_handler() has
+ * already verified that the rIP at which the error happened is
+ * a rIP from which the kernel can recover (by jumping to
+ * recovery code specified in _ASM_EXTABLE_FAULT()) and the
+ * corresponding exception handler which would do that is the
+ * proper one.
+ */
+ if (m.kflags & MCE_IN_KERNEL_RECOV) {
+ if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
+ mce_panic("Failed kernel mode recovery", &m, msg);
+ }
}
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1758,6 +1750,7 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
+ mce_flags.amd_threshold = 1;
if (mce_flags.smca) {
msr_ops.ctl = smca_ctl_reg;
@@ -2055,6 +2048,7 @@ void mce_disable_bank(int bank)
* mce=no_cmci Disables CMCI
* mce=no_lmce Disables LMCE
* mce=dont_log_ce Clears corrected events silently, no log created for CEs.
+ * mce=print_all Print all machine check logs to console
* mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
* monarchtimeout is how long to wait for other CPUs on machine
@@ -2083,6 +2077,8 @@ static int __init mcheck_enable(char *str)
cfg->lmce_disabled = 1;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
+ else if (!strcmp(str, "print_all"))
+ cfg->print_all = true;
else if (!strcmp(str, "ignore_ce"))
cfg->ignore_ce = true;
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
@@ -2105,7 +2101,7 @@ __setup("mce", mcheck_enable);
int __init mcheck_init(void)
{
mcheck_intel_therm_init();
- mce_register_decode_chain(&first_nb);
+ mce_register_decode_chain(&early_nb);
mce_register_decode_chain(&mce_uc_nb);
mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity();
@@ -2349,6 +2345,7 @@ static ssize_t store_int_with_restart(struct device *s,
static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
+static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all);
static struct dev_ext_attribute dev_attr_check_interval = {
__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
@@ -2373,6 +2370,7 @@ static struct device_attribute *mce_device_attrs[] = {
#endif
&dev_attr_monarch_timeout.attr,
&dev_attr_dont_log_ce.attr,
+ &dev_attr_print_all.attr,
&dev_attr_ignore_ce.attr,
&dev_attr_cmci_disabled.attr,
NULL
@@ -2545,6 +2543,13 @@ static __init void mce_init_banks(void)
}
}
+/*
+ * When running on XEN, this initcall is ordered against the XEN mcelog
+ * initcall:
+ *
+ * device_initcall(xen_late_init_mcelog);
+ * device_initcall_sync(mcheck_init_device);
+ */
static __init int mcheck_init_device(void)
{
int err;
@@ -2576,6 +2581,10 @@ static __init int mcheck_init_device(void)
if (err)
goto err_out_mem;
+ /*
+ * Invokes mce_cpu_online() on all CPUs which are online when
+ * the state is installed.
+ */
err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online",
mce_cpu_online, mce_cpu_pre_down);
if (err < 0)
@@ -2665,7 +2674,6 @@ static int __init mcheck_late_init(void)
static_branch_inc(&mcsafe_key);
mcheck_debugfs_init();
- cec_init();
/*
* Flush out everything that has been logged during early boot, now that