From 160c7ba34605d9b59ee406a1b4a61b0f942b1ae9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 8 Jul 2020 16:25:43 -0700 Subject: lib: Add backtrace_idle parameter to force backtrace of idle CPUs Currently, the nmi_cpu_backtrace() declines to produce backtraces for idle CPUs. This is a good choice in the common case in which problems are caused only by non-idle CPUs. However, there are occasionally situations in which idle CPUs are helping to cause problems. This commit therefore adds an nmi_backtrace.backtrace_idle kernel boot parameter that causes nmi_cpu_backtrace() to dump stacks even of idle CPUs. Signed-off-by: Paul E. McKenney Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: --- lib/nmi_backtrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 15ca78e1c7d4..8abe1870dba4 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -85,12 +85,16 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, put_cpu(); } +// Dump stacks even for idle CPUs. +static bool backtrace_idle; +module_param(backtrace_idle, bool, 0644); + bool nmi_cpu_backtrace(struct pt_regs *regs) { int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - if (regs && cpu_in_idle(instruction_pointer(regs))) { + if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", cpu, (void *)instruction_pointer(regs)); } else { -- cgit v1.2.3 From e9d338a0b1799c988b678e8ccb66a442272e6aa3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 24 Jun 2020 15:59:59 -0700 Subject: scftorture: Add smp_call_function() torture test This commit adds an smp_call_function() torture test that repeatedly invokes this function and complains if things go badly awry. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 92 +++++++ kernel/Makefile | 2 + kernel/scftorture.c | 350 ++++++++++++++++++++++++ lib/Kconfig.debug | 10 + 4 files changed, 454 insertions(+) create mode 100644 kernel/scftorture.c (limited to 'lib') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdc1f33fd3d1..91a56382ae56 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4637,6 +4637,98 @@ Format: integer between 0 and 10 Default is 0. + scftorture.holdoff= [KNL] + Number of seconds to hold off before starting + test. Defaults to zero for module insertion and + to 10 seconds for built-in smp_call_function() + tests. + + scftorture.longwait= [KNL] + Request ridiculously long waits randomly selected + up to the chosen limit in seconds. Zero (the + default) disables this feature. Please note + that requesting even small non-zero numbers of + seconds can result in RCU CPU stall warnings, + softlockup complaints, and so on. + + scftorture.nthreads= [KNL] + Number of kthreads to spawn to invoke the + smp_call_function() family of functions. + The default of -1 specifies a number of kthreads + equal to the number of CPUs. + + scftorture.onoff_holdoff= [KNL] + Number seconds to wait after the start of the + test before initiating CPU-hotplug operations. + + scftorture.onoff_interval= [KNL] + Number seconds to wait between successive + CPU-hotplug operations. Specifying zero (which + is the default) disables CPU-hotplug operations. + + scftorture.shutdown_secs= [KNL] + The number of seconds following the start of the + test after which to shut down the system. The + default of zero avoids shutting down the system. + Non-zero values are useful for automated tests. + + scftorture.stat_interval= [KNL] + The number of seconds between outputting the + current test statistics to the console. A value + of zero disables statistics output. + + scftorture.stutter_cpus= [KNL] + The number of jiffies to wait between each change + to the set of CPUs under test. + + scftorture.use_cpus_read_lock= [KNL] + Use use_cpus_read_lock() instead of the default + preempt_disable() to disable CPU hotplug + while invoking one of the smp_call_function*() + functions. + + scftorture.verbose= [KNL] + Enable additional printk() statements. + + scftorture.weight_single= [KNL] + The probability weighting to use for the + smp_call_function_single() function with a zero + "wait" parameter. A value of -1 selects the + default if all other weights are -1. However, + if at least one weight has some other value, a + value of -1 will instead select a weight of zero. + + scftorture.weight_single_wait= [KNL] + The probability weighting to use for the + smp_call_function_single() function with a + non-zero "wait" parameter. See weight_single. + + scftorture.weight_many= [KNL] + The probability weighting to use for the + smp_call_function_many() function with a zero + "wait" parameter. See weight_single. + Note well that setting a high probability for + this weighting can place serious IPI load + on the system. + + scftorture.weight_many_wait= [KNL] + The probability weighting to use for the + smp_call_function_many() function with a + non-zero "wait" parameter. See weight_single + and weight_many. + + scftorture.weight_all= [KNL] + The probability weighting to use for the + smp_call_function_all() function with a zero + "wait" parameter. See weight_single and + weight_many. + + scftorture.weight_all_wait= [KNL] + The probability weighting to use for the + smp_call_function_all() function with a + non-zero "wait" parameter. See weight_single + and weight_many. + skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate xtime_lock contention on larger systems, and/or RCU lock contention on all systems with CONFIG_MAXSMP set. diff --git a/kernel/Makefile b/kernel/Makefile index 9a20016d4900..c45f551deaaa 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -133,6 +133,8 @@ KASAN_SANITIZE_stackleak.o := n KCSAN_SANITIZE_stackleak.o := n KCOV_INSTRUMENT_stackleak.o := n +obj-$(CONFIG_SCF_TORTURE_TEST) += scftorture.o + $(obj)/configs.o: $(obj)/config_data.gz targets += config_data.gz diff --git a/kernel/scftorture.c b/kernel/scftorture.c new file mode 100644 index 000000000000..44f1e49ba6e9 --- /dev/null +++ b/kernel/scftorture.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Torture test for smp_call_function() and friends. +// +// Copyright (C) Facebook, 2020. +// +// Author: Paul E. McKenney + +#define pr_fmt(fmt) fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SCFTORT_STRING "scftorture" +#define SCFTORT_FLAG SCFTORT_STRING ": " + +#define SCFTORTOUT(s, x...) \ + pr_alert(SCFTORT_FLAG s, ## x) + +#define VERBOSE_SCFTORTOUT(s, x...) \ + do { if (verbose) pr_alert(SCFTORT_FLAG s, ## x); } while (0) + +#define VERBOSE_SCFTORTOUT_ERRSTRING(s, x...) \ + do { if (verbose) pr_alert(SCFTORT_FLAG "!!! " s, ## x); } while (0) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney "); + +// Wait until there are multiple CPUs before starting test. +torture_param(int, holdoff, IS_BUILTIN(CONFIG_SCF_TORTURE_TEST) ? 10 : 0, + "Holdoff time before test start (s)"); +torture_param(int, longwait, 0, "Include ridiculously long waits? (seconds)"); +torture_param(int, nthreads, -1, "# threads, defaults to -1 for all CPUs."); +torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); +torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable"); +torture_param(int, shutdown_secs, 0, "Shutdown time (ms), <= zero to disable."); +torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s."); +torture_param(int, stutter_cpus, 5, "Number of jiffies to change CPUs under test, 0=disable"); +torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU hotplug."); +torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); +torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations."); +torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations."); +torture_param(int, weight_mult, -1, "Testing weight for multi-CPU no-wait operations."); +torture_param(int, weight_mult_wait, -1, "Testing weight for multi-CPU operations."); +torture_param(int, weight_all, -1, "Testing weight for all-CPU no-wait operations."); +torture_param(int, weight_all_wait, -1, "Testing weight for all-CPU operations."); + +char *torture_type = ""; + +#ifdef MODULE +# define SCFTORT_SHUTDOWN 0 +#else +# define SCFTORT_SHUTDOWN 1 +#endif + +torture_param(bool, shutdown, SCFTORT_SHUTDOWN, "Shutdown at end of torture test."); + +struct scf_statistics { + struct task_struct *task; + int cpu; + long long n_single; + long long n_single_wait; + long long n_multi; + long long n_multi_wait; + long long n_all; + long long n_all_wait; +}; + +static struct scf_statistics *scf_stats_p; +static struct task_struct *scf_torture_stats_task; +static DEFINE_PER_CPU(long long, scf_invoked_count); + +// Use to wait for all threads to start. +static atomic_t n_started; +static atomic_t n_errs; +static bool scfdone; + +DEFINE_TORTURE_RANDOM_PERCPU(scf_torture_rand); + +// Print torture statistics. Caller must ensure serialization. +static void scf_torture_stats_print(void) +{ + int cpu; + long long invoked_count = 0; + bool isdone = READ_ONCE(scfdone); + + for_each_possible_cpu(cpu) + invoked_count += data_race(per_cpu(scf_invoked_count, cpu)); + pr_alert("%s scf_invoked_count %s: %lld ", + SCFTORT_FLAG, isdone ? "VER" : "ver", invoked_count); + torture_onoff_stats(); + pr_cont("\n"); +} + +// Periodically prints torture statistics, if periodic statistics printing +// was specified via the stat_interval module parameter. +static int +scf_torture_stats(void *arg) +{ + VERBOSE_TOROUT_STRING("scf_torture_stats task started"); + do { + schedule_timeout_interruptible(stat_interval * HZ); + scf_torture_stats_print(); + torture_shutdown_absorb("scf_torture_stats"); + } while (!torture_must_stop()); + torture_kthread_stopping("scf_torture_stats"); + return 0; +} + +// Update statistics and occasionally burn up mass quantities of CPU time, +// if told to do so via scftorture.longwait. Otherwise, occasionally burn +// a little bit. +static void scf_handler(void *unused) +{ + int i; + int j; + unsigned long r = torture_random(this_cpu_ptr(&scf_torture_rand)); + + this_cpu_inc(scf_invoked_count); + if (longwait <= 0) { + if (!(r & 0xffc0)) + udelay(r & 0x3f); + return; + } + if (r & 0xfff) + return; + r = (r >> 12); + if (longwait <= 0) { + udelay((r & 0xff) + 1); + return; + } + r = r % longwait + 1; + for (i = 0; i < r; i++) { + for (j = 0; j < 1000; j++) { + udelay(1000); + cpu_relax(); + } + } +} + +// Randomly do an smp_call_function*() invocation. +static void scftorture_invoke_one(struct scf_statistics *scfp,struct torture_random_state *trsp) +{ + if (use_cpus_read_lock) + cpus_read_lock(); + else + preempt_disable(); + scfp->n_all++; + smp_call_function(scf_handler, NULL, 0); + if (use_cpus_read_lock) + cpus_read_unlock(); + else + preempt_enable(); + if (!(torture_random(trsp) & 0xfff)) + schedule_timeout_uninterruptible(1); +} + +// SCF test kthread. Repeatedly does calls to members of the +// smp_call_function() family of functions. +static int scftorture_invoker(void *arg) +{ + DEFINE_TORTURE_RANDOM(rand); + struct scf_statistics *scfp = (struct scf_statistics *)arg; + + VERBOSE_SCFTORTOUT("scftorture_invoker %d: task started", scfp->cpu); + set_cpus_allowed_ptr(current, cpumask_of(scfp->cpu % nr_cpu_ids)); + set_user_nice(current, MAX_NICE); + if (holdoff) + schedule_timeout_interruptible(holdoff * HZ); + + VERBOSE_SCFTORTOUT("scftorture_invoker %d: Waiting for all SCF torturers from cpu %d", scfp->cpu, smp_processor_id()); + + // Make sure that the CPU is affinitized appropriately during testing. + WARN_ON_ONCE(smp_processor_id() != scfp->cpu); + + if (!atomic_dec_return(&n_started)) + while (atomic_read_acquire(&n_started)) { + if (torture_must_stop()) { + VERBOSE_SCFTORTOUT("scftorture_invoker %d ended before starting", scfp->cpu); + goto end; + } + schedule_timeout_uninterruptible(1); + } + + VERBOSE_SCFTORTOUT("scftorture_invoker %d started", scfp->cpu); + + do { + scftorture_invoke_one(scfp, &rand); + } while (!torture_must_stop()); + + VERBOSE_SCFTORTOUT("scftorture_invoker %d ended", scfp->cpu); +end: + torture_kthread_stopping("scftorture_invoker"); + return 0; +} + +static void +scftorture_print_module_parms(const char *tag) +{ + pr_alert(SCFTORT_FLAG + "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter_cpus=%d use_cpus_read_lock=%d, weight_single=%d, weight_single_wait=%d, weight_mult=%d, weight_mult_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag, + verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter_cpus, use_cpus_read_lock, weight_single, weight_single_wait, weight_mult, weight_mult_wait, weight_all, weight_all_wait); +} + +static void scf_cleanup_handler(void *unused) +{ +} + +static void scf_torture_cleanup(void) +{ + int i; + + if (torture_cleanup_begin()) + return; + + WRITE_ONCE(scfdone, true); + if (nthreads) + for (i = 0; i < nthreads; i++) + torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task); + else + goto end; + kfree(scf_stats_p); + scf_stats_p = NULL; + smp_call_function(scf_cleanup_handler, NULL, 0); + torture_stop_kthread(scf_torture_stats, scf_torture_stats_task); + scf_torture_stats_print(); // -After- the stats thread is stopped! + + if (atomic_read(&n_errs)) + scftorture_print_module_parms("End of test: FAILURE"); + else if (torture_onoff_failures()) + scftorture_print_module_parms("End of test: LOCK_HOTPLUG"); + else + scftorture_print_module_parms("End of test: SUCCESS"); + +end: + torture_cleanup_end(); +} + +static int __init scf_torture_init(void) +{ + long i; + int firsterr = 0; + + if (!torture_init_begin(SCFTORT_STRING, verbose)) + return -EBUSY; + + scftorture_print_module_parms("Start of test"); + + if (weight_single == -1 && weight_single_wait == -1 && + weight_mult == -1 && weight_mult_wait == -1 && + weight_all == -1 && weight_all_wait == -1) { + weight_single = 1; + weight_single_wait = 1; + weight_mult = 1; + weight_mult_wait = 1; + weight_all = 1; + weight_all_wait = 1; + } else { + if (weight_single == -1) + weight_single = 0; + if (weight_single_wait == -1) + weight_single_wait = 0; + if (weight_mult == -1) + weight_mult = 0; + if (weight_mult_wait == -1) + weight_mult_wait = 0; + if (weight_all == -1) + weight_all = 0; + if (weight_all_wait == -1) + weight_all_wait = 0; + } + if (weight_single == 0 && weight_single_wait == 0 && + weight_mult == 0 && weight_mult_wait == 0 && + weight_all == 0 && weight_all_wait == 0) { + firsterr = -EINVAL; + goto unwind; + } + + if (onoff_interval > 0) { + firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, NULL); + if (firsterr) + goto unwind; + } + if (shutdown_secs > 0) { + firsterr = torture_shutdown_init(shutdown_secs, scf_torture_cleanup); + if (firsterr) + goto unwind; + } + + // Worker tasks invoking smp_call_function(). + if (nthreads < 0) + nthreads = num_online_cpus(); + scf_stats_p = kcalloc(nthreads, sizeof(scf_stats_p[0]), GFP_KERNEL); + if (!scf_stats_p) { + VERBOSE_SCFTORTOUT_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + + VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads\n", nthreads); + + atomic_set(&n_started, nthreads); + for (i = 0; i < nthreads; i++) { + scf_stats_p[i].cpu = i; + firsterr = torture_create_kthread(scftorture_invoker, (void *)&scf_stats_p[i], + scf_stats_p[i].task); + if (firsterr) + goto unwind; + } + if (stat_interval > 0) { + firsterr = torture_create_kthread(scf_torture_stats, NULL, scf_torture_stats_task); + if (firsterr) + goto unwind; + } + + torture_init_end(); + return 0; + +unwind: + torture_init_end(); + scf_torture_cleanup(); + return firsterr; +} + +module_init(scf_torture_init); +module_exit(scf_torture_cleanup); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..0c3a6c752ede 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1367,6 +1367,16 @@ config WW_MUTEX_SELFTEST Say M if you want these self tests to build as a module. Say N if you are unsure. +config SCF_TORTURE_TEST + tristate "torture tests for smp_call_function*()" + depends on DEBUG_KERNEL + select TORTURE_TEST + help + This option provides a kernel module that runs torture tests + on the smp_call_function() family of primitives. The kernel + module may be built after the fact on the running kernel to + be tested, if desired. + endmenu # lock debugging config TRACE_IRQFLAGS -- cgit v1.2.3 From 35feb60474bf4f7fa7840e14fc7fd344996b919d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 30 Jun 2020 13:22:54 -0700 Subject: kernel/smp: Provide CSD lock timeout diagnostics This commit causes csd_lock_wait() to emit diagnostics when a CPU fails to respond quickly enough to one of the smp_call_function() family of function calls. These diagnostics are enabled by a new CSD_LOCK_WAIT_DEBUG Kconfig option that depends on DEBUG_KERNEL. This commit was inspired by an earlier patch by Josef Bacik. [ paulmck: Fix for syzbot+0f719294463916a3fc0e@syzkaller.appspotmail.com ] [ paulmck: Fix KASAN use-after-free issue reported by Qian Cai. ] [ paulmck: Fix botched nr_cpu_ids comparison per Dan Carpenter. ] [ paulmck: Apply Peter Zijlstra feedback. ] Link: https://lore.kernel.org/lkml/00000000000042f21905a991ecea@google.com Link: https://lore.kernel.org/lkml/0000000000002ef21705a9933cf3@google.com Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/smp.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/Kconfig.debug | 11 +++++ 2 files changed, 141 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/kernel/smp.c b/kernel/smp.c index 865a876f83ce..c5d31885bd30 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include "smpboot.h" #include "sched/smp.h" @@ -96,6 +99,103 @@ void __init call_function_init(void) smpcfd_prepare_cpu(smp_processor_id()); } +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + +static DEFINE_PER_CPU(call_single_data_t *, cur_csd); +static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); +static DEFINE_PER_CPU(void *, cur_csd_info); + +#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) +atomic_t csd_bug_count = ATOMIC_INIT(0); + +/* Record current CSD work for current CPU, NULL to erase. */ +static void csd_lock_record(call_single_data_t *csd) +{ + if (!csd) { + smp_mb(); /* NULL cur_csd after unlock. */ + __this_cpu_write(cur_csd, NULL); + return; + } + __this_cpu_write(cur_csd_func, csd->func); + __this_cpu_write(cur_csd_info, csd->info); + smp_wmb(); /* func and info before csd. */ + __this_cpu_write(cur_csd, csd); + smp_mb(); /* Update cur_csd before function call. */ + /* Or before unlock, as the case may be. */ +} + +static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd) +{ + unsigned int csd_type; + + csd_type = CSD_TYPE(csd); + if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC) + return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */ + return -1; +} + +/* + * Complain if too much time spent waiting. Note that only + * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, + * so waiting on other types gets much less information. + */ +static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) +{ + int cpu = -1; + int cpux; + bool firsttime; + u64 ts2, ts_delta; + call_single_data_t *cpu_cur_csd; + unsigned int flags = READ_ONCE(csd->flags); + + if (!(flags & CSD_FLAG_LOCK)) { + if (!unlikely(*bug_id)) + return true; + cpu = csd_lock_wait_getcpu(csd); + pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n", + *bug_id, raw_smp_processor_id(), cpu); + return true; + } + + ts2 = sched_clock(); + ts_delta = ts2 - *ts1; + if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) + return false; + + firsttime = !*bug_id; + if (firsttime) + *bug_id = atomic_inc_return(&csd_bug_count); + cpu = csd_lock_wait_getcpu(csd); + if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu)) + cpux = 0; + else + cpux = cpu; + cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ + pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", + firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, + cpu, csd->func, csd->info); + if (cpu_cur_csd && csd != cpu_cur_csd) { + pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", + *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), + READ_ONCE(per_cpu(cur_csd_info, cpux))); + } else { + pr_alert("\tcsd: CSD lock (#%d) %s.\n", + *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); + } + if (cpu >= 0) { + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); + if (!cpu_cur_csd) { + pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); + arch_send_call_function_single_ipi(cpu); + } + } + dump_stack(); + *ts1 = ts2; + + return false; +} + /* * csd_lock/csd_unlock used to serialize access to per-cpu csd resources * @@ -103,10 +203,30 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ +static __always_inline void csd_lock_wait(call_single_data_t *csd) +{ + int bug_id = 0; + u64 ts0, ts1; + + ts1 = ts0 = sched_clock(); + for (;;) { + if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id)) + break; + cpu_relax(); + } + smp_acquire__after_ctrl_dep(); +} + +#else +static void csd_lock_record(call_single_data_t *csd) +{ +} + static __always_inline void csd_lock_wait(call_single_data_t *csd) { smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); } +#endif static __always_inline void csd_lock(call_single_data_t *csd) { @@ -166,9 +286,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd) * We can unlock early even for the synchronous on-stack case, * since we're doing this from the same CPU.. */ + csd_lock_record(csd); csd_unlock(csd); local_irq_save(flags); func(info); + csd_lock_record(NULL); local_irq_restore(flags); return 0; } @@ -268,8 +390,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) entry = &csd_next->llist; } + csd_lock_record(csd); func(info); csd_unlock(csd); + csd_lock_record(NULL); } else { prev = &csd->llist; } @@ -296,8 +420,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) smp_call_func_t func = csd->func; void *info = csd->info; + csd_lock_record(csd); csd_unlock(csd); func(info); + csd_lock_record(NULL); } else if (type == CSD_TYPE_IRQ_WORK) { irq_work_single(csd); } @@ -375,7 +501,8 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, csd->func = func; csd->info = info; -#ifdef CONFIG_64BIT +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + csd->src = smp_processor_id(); csd->dst = cpu; #endif @@ -543,7 +670,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask, csd->flags |= CSD_TYPE_SYNC; csd->func = func; csd->info = info; -#ifdef CONFIG_64BIT +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + csd->src = smp_processor_id(); csd->dst = cpu; #endif if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..86a35fdfe021 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1367,6 +1367,17 @@ config WW_MUTEX_SELFTEST Say M if you want these self tests to build as a module. Say N if you are unsure. +config CSD_LOCK_WAIT_DEBUG + bool "Debugging for csd_lock_wait(), called from smp_call_function*()" + depends on DEBUG_KERNEL + depends on 64BIT + default n + help + This option enables debug prints when CPUs are slow to respond + to the smp_call_function*() IPI wrappers. These debug prints + include the IPI handler function currently executing (if any) + and relevant stack traces. + endmenu # lock debugging config TRACE_IRQFLAGS -- cgit v1.2.3