From a0143f5ac0594d73ef91c2336d8172217ff9cd72 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 6 May 2021 16:27:25 -0400 Subject: clocksource/drivers/samsung_pwm: Minor whitespace cleanup Cleanup the code to be slightly more readable and follow coding convention - only whitespace. This fixes checkpatch warnings: WARNING: Block comments should align the * on each line WARNING: please, no space before tabs WARNING: Missing a blank line after declarations CHECK: Alignment should match open parenthesis Signed-off-by: Krzysztof Kozlowski Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210506202729.157260-1-krzysztof.kozlowski@canonical.com --- include/clocksource/samsung_pwm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/clocksource/samsung_pwm.h b/include/clocksource/samsung_pwm.h index c395238d0922..76341988fb4f 100644 --- a/include/clocksource/samsung_pwm.h +++ b/include/clocksource/samsung_pwm.h @@ -27,6 +27,7 @@ struct samsung_pwm_variant { }; void samsung_pwm_clocksource_init(void __iomem *base, - unsigned int *irqs, struct samsung_pwm_variant *variant); + unsigned int *irqs, + struct samsung_pwm_variant *variant); #endif /* __CLOCKSOURCE_SAMSUNG_PWM_H */ -- cgit v1.2.3 From bb08e96575dbbd49acb49999dd0d7ffedb5c1608 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 6 May 2021 16:27:26 -0400 Subject: clocksource/drivers/samsung_pwm: Constify passed structure The 'struct samsung_pwm_variant' argument passed to initialization functions is not modified, so it can be made const for safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210506202729.157260-2-krzysztof.kozlowski@canonical.com --- drivers/clocksource/samsung_pwm_timer.c | 2 +- include/clocksource/samsung_pwm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 69bf79c7f462..bfad61b509f9 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -401,7 +401,7 @@ static int __init _samsung_pwm_clocksource_init(void) void __init samsung_pwm_clocksource_init(void __iomem *base, unsigned int *irqs, - struct samsung_pwm_variant *variant) + const struct samsung_pwm_variant *variant) { pwm.base = base; memcpy(&pwm.variant, variant, sizeof(pwm.variant)); diff --git a/include/clocksource/samsung_pwm.h b/include/clocksource/samsung_pwm.h index 76341988fb4f..9b435caa95fe 100644 --- a/include/clocksource/samsung_pwm.h +++ b/include/clocksource/samsung_pwm.h @@ -28,6 +28,6 @@ struct samsung_pwm_variant { void samsung_pwm_clocksource_init(void __iomem *base, unsigned int *irqs, - struct samsung_pwm_variant *variant); + const struct samsung_pwm_variant *variant); #endif /* __CLOCKSOURCE_SAMSUNG_PWM_H */ -- cgit v1.2.3 From 9517c577f9f722270584cfb1a7b4e1354e408658 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 15 Apr 2021 11:55:06 +0300 Subject: clocksource/drivers/timer-ti-dm: Save and restore timer TIOCP_CFG As we are using cpu_pm to save and restore context, we must also save and restore the timer sysconfig register TIOCP_CFG. This is needed because we are not calling PM runtime functions at all with cpu_pm. Fixes: b34677b0999a ("clocksource/drivers/timer-ti-dm: Implement cpu_pm notifier for context save and restore") Cc: Aaro Koskinen Cc: Adam Ford Cc: Andreas Kemnade Cc: Lokesh Vutla Cc: Peter Ujfalusi Signed-off-by: Tony Lindgren Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210415085506.56828-1-tony@atomide.com --- drivers/clocksource/timer-ti-dm.c | 6 ++++++ include/clocksource/timer-ti-dm.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index 33eeabf9c3d1..e5c631f1b5cb 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -78,6 +78,9 @@ static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg, static void omap_timer_restore_context(struct omap_dm_timer *timer) { + __omap_dm_timer_write(timer, OMAP_TIMER_OCP_CFG_OFFSET, + timer->context.ocp_cfg, 0); + omap_dm_timer_write_reg(timer, OMAP_TIMER_WAKEUP_EN_REG, timer->context.twer); omap_dm_timer_write_reg(timer, OMAP_TIMER_COUNTER_REG, @@ -95,6 +98,9 @@ static void omap_timer_restore_context(struct omap_dm_timer *timer) static void omap_timer_save_context(struct omap_dm_timer *timer) { + timer->context.ocp_cfg = + __omap_dm_timer_read(timer, OMAP_TIMER_OCP_CFG_OFFSET, 0); + timer->context.tclr = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG); timer->context.twer = diff --git a/include/clocksource/timer-ti-dm.h b/include/clocksource/timer-ti-dm.h index 4c61dade8835..f6da8a132639 100644 --- a/include/clocksource/timer-ti-dm.h +++ b/include/clocksource/timer-ti-dm.h @@ -74,6 +74,7 @@ #define OMAP_TIMER_ERRATA_I103_I767 0x80000000 struct timer_regs { + u32 ocp_cfg; u32 tidr; u32 tier; u32 twer; -- cgit v1.2.3 From 7560c02bdffb7c52d1457fa551b9e745d4b9e754 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 May 2021 12:01:20 -0700 Subject: clocksource: Check per-CPU clock synchronization when marked unstable Some sorts of per-CPU clock sources have a history of going out of synchronization with each other. However, this problem has purportedy been solved in the past ten years. Except that it is all too possible that the problem has instead simply been made less likely, which might mean that some of the occasional "Marking clocksource 'tsc' as unstable" messages might be due to desynchronization. How would anyone know? Therefore apply CPU-to-CPU synchronization checking to newly unstable clocksource that are marked with the new CLOCK_SOURCE_VERIFY_PERCPU flag. Lists of desynchronized CPUs are printed, with the caveat that if it is the reporting CPU that is itself desynchronized, it will appear that all the other clocks are wrong. Just like in real life. Reported-by: Chris Mason Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Acked-by: Feng Tang Link: https://lore.kernel.org/r/20210527190124.440372-2-paulmck@kernel.org --- arch/x86/kernel/tsc.c | 3 ++- include/linux/clocksource.h | 2 +- kernel/time/clocksource.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 57ec01192180..6eb1b097e97e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1152,7 +1152,8 @@ static struct clocksource clocksource_tsc = { .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VALID_FOR_HRES | - CLOCK_SOURCE_MUST_VERIFY, + CLOCK_SOURCE_MUST_VERIFY | + CLOCK_SOURCE_VERIFY_PERCPU, .vdso_clock_mode = VDSO_CLOCKMODE_TSC, .enable = tsc_cs_enable, .resume = tsc_resume, diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d6ab416ee2d2..7f83d51c0fd7 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -137,7 +137,7 @@ struct clocksource { #define CLOCK_SOURCE_UNSTABLE 0x40 #define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80 #define CLOCK_SOURCE_RESELECT 0x100 - +#define CLOCK_SOURCE_VERIFY_PERCPU 0x200 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 43243f2be98e..cb12225bf050 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -224,6 +224,60 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) return false; } +static u64 csnow_mid; +static cpumask_t cpus_ahead; +static cpumask_t cpus_behind; + +static void clocksource_verify_one_cpu(void *csin) +{ + struct clocksource *cs = (struct clocksource *)csin; + + csnow_mid = cs->read(cs); +} + +static void clocksource_verify_percpu(struct clocksource *cs) +{ + int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX; + u64 csnow_begin, csnow_end; + int cpu, testcpu; + s64 delta; + + cpumask_clear(&cpus_ahead); + cpumask_clear(&cpus_behind); + preempt_disable(); + testcpu = smp_processor_id(); + pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu); + for_each_online_cpu(cpu) { + if (cpu == testcpu) + continue; + csnow_begin = cs->read(cs); + smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1); + csnow_end = cs->read(cs); + delta = (s64)((csnow_mid - csnow_begin) & cs->mask); + if (delta < 0) + cpumask_set_cpu(cpu, &cpus_behind); + delta = (csnow_end - csnow_mid) & cs->mask; + if (delta < 0) + cpumask_set_cpu(cpu, &cpus_ahead); + delta = clocksource_delta(csnow_end, csnow_begin, cs->mask); + cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); + if (cs_nsec > cs_nsec_max) + cs_nsec_max = cs_nsec; + if (cs_nsec < cs_nsec_min) + cs_nsec_min = cs_nsec; + } + preempt_enable(); + if (!cpumask_empty(&cpus_ahead)) + pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n", + cpumask_pr_args(&cpus_ahead), testcpu, cs->name); + if (!cpumask_empty(&cpus_behind)) + pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n", + cpumask_pr_args(&cpus_behind), testcpu, cs->name); + if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind)) + pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", + testcpu, cs_nsec_min, cs_nsec_max, cs->name); +} + static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; @@ -448,6 +502,12 @@ static int __clocksource_watchdog_kthread(void) unsigned long flags; int select = 0; + /* Do any required per-CPU skew verification. */ + if (curr_clocksource && + curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE && + curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU) + clocksource_verify_percpu(curr_clocksource); + spin_lock_irqsave(&watchdog_lock, flags); list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { if (cs->flags & CLOCK_SOURCE_UNSTABLE) { -- cgit v1.2.3 From 2e27e793e280ff12cb5c202a1214c08b0d3a0f26 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 May 2021 12:01:22 -0700 Subject: clocksource: Reduce clocksource-skew threshold Currently, WATCHDOG_THRESHOLD is set to detect a 62.5-millisecond skew in a 500-millisecond WATCHDOG_INTERVAL. This requires that clocks be skewed by more than 12.5% in order to be marked unstable. Except that a clock that is skewed by that much is probably destroying unsuspecting software right and left. And given that there are now checks for false-positive skews due to delays between reading the two clocks, it should be possible to greatly decrease WATCHDOG_THRESHOLD, at least for fine-grained clocks such as TSC. Therefore, add a new uncertainty_margin field to the clocksource structure that contains the maximum uncertainty in nanoseconds for the corresponding clock. This field may be initialized manually, as it is for clocksource_tsc_early and clocksource_jiffies, which is copied to refined_jiffies. If the field is not initialized manually, it will be computed at clock-registry time as the period of the clock in question based on the scale and freq parameters to __clocksource_update_freq_scale() function. If either of those two parameters are zero, the tens-of-milliseconds WATCHDOG_THRESHOLD is used as a cowardly alternative to dividing by zero. No matter how the uncertainty_margin field is calculated, it is bounded below by twice WATCHDOG_MAX_SKEW, that is, by 100 microseconds. Note that manually initialized uncertainty_margin fields are not adjusted, but there is a WARN_ON_ONCE() that triggers if any such field is less than twice WATCHDOG_MAX_SKEW. This WARN_ON_ONCE() is intended to discourage production use of the one-nanosecond uncertainty_margin values that are used to test the clock-skew code itself. The actual clock-skew check uses the sum of the uncertainty_margin fields of the two clocksource structures being compared. Integer overflow is avoided because the largest computed value of the uncertainty_margin fields is one billion (10^9), and double that value fits into an unsigned int. However, if someone manually specifies (say) UINT_MAX, they will get what they deserve. Note that the refined_jiffies uncertainty_margin field is initialized to TICK_NSEC, which means that skew checks involving this clocksource will be sufficently forgiving. In a similar vein, the clocksource_tsc_early uncertainty_margin field is initialized to 32*NSEC_PER_MSEC, which replicates the current behavior and allows custom setting if needed in order to address the rare skews detected for this clocksource in current mainline. Suggested-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Acked-by: Feng Tang Link: https://lore.kernel.org/r/20210527190124.440372-4-paulmck@kernel.org --- arch/x86/kernel/tsc.c | 1 + include/linux/clocksource.h | 3 +++ kernel/time/clocksource.c | 48 +++++++++++++++++++++++++++++++++++---------- kernel/time/jiffies.c | 15 +++++++------- 4 files changed, 50 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6eb1b097e97e..2e076a459a0c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1128,6 +1128,7 @@ static int tsc_cs_enable(struct clocksource *cs) static struct clocksource clocksource_tsc_early = { .name = "tsc-early", .rating = 299, + .uncertainty_margin = 32 * NSEC_PER_MSEC, .read = read_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 7f83d51c0fd7..895203727cb5 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -43,6 +43,8 @@ struct module; * @shift: Cycle to nanosecond divisor (power of two) * @max_idle_ns: Maximum idle time permitted by the clocksource (nsecs) * @maxadj: Maximum adjustment value to mult (~11%) + * @uncertainty_margin: Maximum uncertainty in nanoseconds per half second. + * Zero says to use default WATCHDOG_THRESHOLD. * @archdata: Optional arch-specific data * @max_cycles: Maximum safe cycle value which won't overflow on * multiplication @@ -98,6 +100,7 @@ struct clocksource { u32 shift; u64 max_idle_ns; u32 maxadj; + u32 uncertainty_margin; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e4beab21a1fa..9b27888a6e75 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -95,6 +95,20 @@ static char override_name[CS_NAME_LEN]; static int finished_booting; static u64 suspend_start; +/* + * Threshold: 0.0312s, when doubled: 0.0625s. + * Also a default for cs->uncertainty_margin when registering clocks. + */ +#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5) + +/* + * Maximum permissible delay between two readouts of the watchdog + * clocksource surrounding a read of the clocksource being validated. + * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as + * a lower bound for cs->uncertainty_margin values when registering clocks. + */ +#define WATCHDOG_MAX_SKEW (50 * NSEC_PER_USEC) + #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); static void clocksource_select(void); @@ -121,17 +135,9 @@ static int clocksource_watchdog_kthread(void *data); static void __clocksource_change_rating(struct clocksource *cs, int rating); /* - * Interval: 0.5sec Threshold: 0.0625s + * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) -#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) - -/* - * Maximum permissible delay between two readouts of the watchdog - * clocksource surrounding a read of the clocksource being validated. - * This delay could be due to SMIs, NMIs, or to VCPU preemptions. - */ -#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC) static void clocksource_watchdog_work(struct work_struct *work) { @@ -348,6 +354,7 @@ static void clocksource_watchdog(struct timer_list *unused) int next_cpu, reset_pending; int64_t wd_nsec, cs_nsec; struct clocksource *cs; + u32 md; spin_lock(&watchdog_lock); if (!watchdog_running) @@ -394,7 +401,8 @@ static void clocksource_watchdog(struct timer_list *unused) continue; /* Check the deviation from the watchdog clocksource. */ - if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { + md = cs->uncertainty_margin + watchdog->uncertainty_margin; + if (abs(cs_nsec - wd_nsec) > md) { pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", smp_processor_id(), cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", @@ -1047,6 +1055,26 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, NSEC_PER_SEC / scale, sec * scale); } + + /* + * If the uncertainty margin is not specified, calculate it. + * If both scale and freq are non-zero, calculate the clock + * period, but bound below at 2*WATCHDOG_MAX_SKEW. However, + * if either of scale or freq is zero, be very conservative and + * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the + * uncertainty margin. Allow stupidly small uncertainty margins + * to be specified by the caller for testing purposes, but warn + * to discourage production use of this capability. + */ + if (scale && freq && !cs->uncertainty_margin) { + cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq); + if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW) + cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW; + } else if (!cs->uncertainty_margin) { + cs->uncertainty_margin = WATCHDOG_THRESHOLD; + } + WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW); + /* * Ensure clocksources that have large 'mult' values don't overflow * when adjusted. diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a492e4da69ba..01935aafdb46 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -49,13 +49,14 @@ static u64 jiffies_read(struct clocksource *cs) * for "tick-less" systems. */ static struct clocksource clocksource_jiffies = { - .name = "jiffies", - .rating = 1, /* lowest valid rating*/ - .read = jiffies_read, - .mask = CLOCKSOURCE_MASK(32), - .mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */ - .shift = JIFFIES_SHIFT, - .max_cycles = 10, + .name = "jiffies", + .rating = 1, /* lowest valid rating*/ + .uncertainty_margin = 32 * NSEC_PER_MSEC, + .read = jiffies_read, + .mask = CLOCKSOURCE_MASK(32), + .mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */ + .shift = JIFFIES_SHIFT, + .max_cycles = 10, }; __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); -- cgit v1.2.3 From 1253b9b87e42ab6a3d5c2cb27af2bdd67d7e50ff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 May 2021 12:01:23 -0700 Subject: clocksource: Provide kernel module to test clocksource watchdog When the clocksource watchdog marks a clock as unstable, this might be due to that clock being unstable or it might be due to delays that happen to occur between the reads of the two clocks. It would be good to have a way of testing the clocksource watchdog's ability to distinguish between these two causes of clock skew and instability. Therefore, provide a new clocksource-wdtest module selected by a new TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module parameter named "holdoff" that provides the number of seconds of delay before testing should start, which defaults to zero when built as a module and to 10 seconds when built directly into the kernel. Very large systems that boot slowly may need to increase the value of this module parameter. This module uses hand-crafted clocksource structures to do its testing, thus avoiding messing up timing for the rest of the kernel and for user applications. This module first verifies that the ->uncertainty_margin field of the clocksource structures are set sanely. It then tests the delay-detection capability of the clocksource watchdog, increasing the number of consecutive delays injected, first provoking console messages complaining about the delays and finally forcing a clock-skew event. Unexpected test results cause at least one WARN_ON_ONCE() console splat. If there are no splats, the test has passed. Finally, it fuzzes the value returned from a clocksource to test the clocksource watchdog's ability to detect time skew. This module checks the state of its clocksource after each test, and uses WARN_ON_ONCE() to emit a console splat if there are any failures. This should enable all types of test frameworks to detect any such failures. This facility is intended for diagnostic use only, and should be avoided on production systems. Reported-by: Chris Mason Suggested-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Tested-by: Feng Tang Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 6 + include/linux/clocksource.h | 3 + kernel/time/Makefile | 1 + kernel/time/clocksource-wdtest.c | 202 ++++++++++++++++++++++++ kernel/time/clocksource.c | 6 +- lib/Kconfig.debug | 12 ++ 6 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 kernel/time/clocksource-wdtest.c (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9ec9ea1a51f2..591048ed1365 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -597,6 +597,12 @@ The actual CPUs are chosen randomly, with no replacement if the same CPU is chosen twice. + clocksource-wdtest.holdoff= [KNL] + Set the time in seconds that the clocksource + watchdog test waits before commencing its tests. + Defaults to zero when built as a module and to + 10 seconds when built into the kernel. + clearcpuid=BITNUM[,BITNUM...] [X86] Disable CPUID feature X for the kernel. See arch/x86/include/asm/cpufeatures.h for the valid bit diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 895203727cb5..1d42d4b17327 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,4 +291,7 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) +extern ulong max_cswd_read_retries; +void clocksource_verify_percpu(struct clocksource *cs); + #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 1fb1c1ef6a19..1ed85b25b096 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o obj-$(CONFIG_TIME_NS) += namespace.o +obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) += clocksource-wdtest.o diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c new file mode 100644 index 000000000000..01df12395c0e --- /dev/null +++ b/kernel/time/clocksource-wdtest.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Unit test for the clocksource watchdog. + * + * Copyright (C) 2021 Facebook, Inc. + * + * Author: Paul E. McKenney + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include /* for spin_unlock_irq() using preempt_count() m68k */ +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney "); + +static int holdoff = IS_BUILTIN(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) ? 10 : 0; +module_param(holdoff, int, 0444); +MODULE_PARM_DESC(holdoff, "Time to wait to start test (s)."); + +/* Watchdog kthread's task_struct pointer for debug purposes. */ +static struct task_struct *wdtest_task; + +static u64 wdtest_jiffies_read(struct clocksource *cs) +{ + return (u64)jiffies; +} + +/* Assume HZ > 100. */ +#define JIFFIES_SHIFT 8 + +static struct clocksource clocksource_wdtest_jiffies = { + .name = "wdtest-jiffies", + .rating = 1, /* lowest valid rating*/ + .uncertainty_margin = TICK_NSEC, + .read = wdtest_jiffies_read, + .mask = CLOCKSOURCE_MASK(32), + .flags = CLOCK_SOURCE_MUST_VERIFY, + .mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */ + .shift = JIFFIES_SHIFT, + .max_cycles = 10, +}; + +static int wdtest_ktime_read_ndelays; +static bool wdtest_ktime_read_fuzz; + +static u64 wdtest_ktime_read(struct clocksource *cs) +{ + int wkrn = READ_ONCE(wdtest_ktime_read_ndelays); + static int sign = 1; + u64 ret; + + if (wkrn) { + udelay(cs->uncertainty_margin / 250); + WRITE_ONCE(wdtest_ktime_read_ndelays, wkrn - 1); + } + ret = ktime_get_real_fast_ns(); + if (READ_ONCE(wdtest_ktime_read_fuzz)) { + sign = -sign; + ret = ret + sign * 100 * NSEC_PER_MSEC; + } + return ret; +} + +static void wdtest_ktime_cs_mark_unstable(struct clocksource *cs) +{ + pr_info("--- Marking %s unstable due to clocksource watchdog.\n", cs->name); +} + +#define KTIME_FLAGS (CLOCK_SOURCE_IS_CONTINUOUS | \ + CLOCK_SOURCE_VALID_FOR_HRES | \ + CLOCK_SOURCE_MUST_VERIFY | \ + CLOCK_SOURCE_VERIFY_PERCPU) + +static struct clocksource clocksource_wdtest_ktime = { + .name = "wdtest-ktime", + .rating = 300, + .read = wdtest_ktime_read, + .mask = CLOCKSOURCE_MASK(64), + .flags = KTIME_FLAGS, + .mark_unstable = wdtest_ktime_cs_mark_unstable, + .list = LIST_HEAD_INIT(clocksource_wdtest_ktime.list), +}; + +/* Reset the clocksource if needed. */ +static void wdtest_ktime_clocksource_reset(void) +{ + if (clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE) { + clocksource_unregister(&clocksource_wdtest_ktime); + clocksource_wdtest_ktime.flags = KTIME_FLAGS; + schedule_timeout_uninterruptible(HZ / 10); + clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000); + } +} + +/* Run the specified series of watchdog tests. */ +static int wdtest_func(void *arg) +{ + unsigned long j1, j2; + char *s; + int i; + + schedule_timeout_uninterruptible(holdoff * HZ); + + /* + * Verify that jiffies-like clocksources get the manually + * specified uncertainty margin. + */ + pr_info("--- Verify jiffies-like uncertainty margin.\n"); + __clocksource_register(&clocksource_wdtest_jiffies); + WARN_ON_ONCE(clocksource_wdtest_jiffies.uncertainty_margin != TICK_NSEC); + + j1 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies); + schedule_timeout_uninterruptible(HZ); + j2 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies); + WARN_ON_ONCE(j1 == j2); + + clocksource_unregister(&clocksource_wdtest_jiffies); + + /* + * Verify that tsc-like clocksources are assigned a reasonable + * uncertainty margin. + */ + pr_info("--- Verify tsc-like uncertainty margin.\n"); + clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000); + WARN_ON_ONCE(clocksource_wdtest_ktime.uncertainty_margin < NSEC_PER_USEC); + + j1 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime); + udelay(1); + j2 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime); + pr_info("--- tsc-like times: %lu - %lu = %lu.\n", j2, j1, j2 - j1); + WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC)); + + /* Verify tsc-like stability with various numbers of errors injected. */ + for (i = 0; i <= max_cswd_read_retries + 1; i++) { + if (i <= 1 && i < max_cswd_read_retries) + s = ""; + else if (i <= max_cswd_read_retries) + s = ", expect message"; + else + s = ", expect clock skew"; + pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s); + WRITE_ONCE(wdtest_ktime_read_ndelays, i); + schedule_timeout_uninterruptible(2 * HZ); + WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays)); + WARN_ON_ONCE((i <= max_cswd_read_retries) != + !(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE)); + wdtest_ktime_clocksource_reset(); + } + + /* Verify tsc-like stability with clock-value-fuzz error injection. */ + pr_info("--- Watchdog clock-value-fuzz error injection, expect clock skew and per-CPU mismatches.\n"); + WRITE_ONCE(wdtest_ktime_read_fuzz, true); + schedule_timeout_uninterruptible(2 * HZ); + WARN_ON_ONCE(!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE)); + clocksource_verify_percpu(&clocksource_wdtest_ktime); + WRITE_ONCE(wdtest_ktime_read_fuzz, false); + + clocksource_unregister(&clocksource_wdtest_ktime); + + pr_info("--- Done with test.\n"); + return 0; +} + +static void wdtest_print_module_parms(void) +{ + pr_alert("--- holdoff=%d\n", holdoff); +} + +/* Cleanup function. */ +static void clocksource_wdtest_cleanup(void) +{ +} + +static int __init clocksource_wdtest_init(void) +{ + int ret = 0; + + wdtest_print_module_parms(); + + /* Create watchdog-test task. */ + wdtest_task = kthread_run(wdtest_func, NULL, "wdtest"); + if (IS_ERR(wdtest_task)) { + ret = PTR_ERR(wdtest_task); + pr_warn("%s: Failed to create wdtest kthread.\n", __func__); + wdtest_task = NULL; + return ret; + } + + return 0; +} + +module_init(clocksource_wdtest_init); +module_exit(clocksource_wdtest_cleanup); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 9b27888a6e75..74d6a234fd14 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -199,8 +199,9 @@ void clocksource_mark_unstable(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } -static ulong max_cswd_read_retries = 3; +ulong max_cswd_read_retries = 3; module_param(max_cswd_read_retries, ulong, 0644); +EXPORT_SYMBOL_GPL(max_cswd_read_retries); static int verify_n_cpus = 8; module_param(verify_n_cpus, int, 0644); @@ -294,7 +295,7 @@ static void clocksource_verify_one_cpu(void *csin) csnow_mid = cs->read(cs); } -static void clocksource_verify_percpu(struct clocksource *cs) +void clocksource_verify_percpu(struct clocksource *cs) { int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX; u64 csnow_begin, csnow_end; @@ -347,6 +348,7 @@ static void clocksource_verify_percpu(struct clocksource *cs) pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", testcpu, cs_nsec_min, cs_nsec_max, cs->name); } +EXPORT_SYMBOL_GPL(clocksource_verify_percpu); static void clocksource_watchdog(struct timer_list *unused) { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 678c13967580..0a5a70c742e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2571,6 +2571,18 @@ config TEST_FPU If unsure, say N. +config TEST_CLOCKSOURCE_WATCHDOG + tristate "Test clocksource watchdog in kernel space" + depends on CLOCKSOURCE_WATCHDOG + help + Enable this option to create a kernel module that will trigger + a test of the clocksource watchdog. This module may be loaded + via modprobe or insmod in which case it will run upon being + loaded, or it may be built in, in which case it will run + shortly after boot. + + If unsure, say N. + endif # RUNTIME_TESTING_MENU config ARCH_USE_MEMTEST -- cgit v1.2.3