From 24ee7607b286b44a5112ced38652df14cd80d5e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:21:02 +0200 Subject: arm64/arch_timer: Provide noinstr sched_clock_read() functions With the intent to provide local_clock_noinstr(), a variant of local_clock() that's safe to be called from noinstr code (with the assumption that any such code will already be non-preemptible), prepare for things by providing a noinstr sched_clock_read() function. Specifically, preempt_enable_*() calls out to schedule(), which upsets noinstr validation efforts. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.435618812@infradead.org --- drivers/clocksource/arm_arch_timer.c | 54 ++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 14 deletions(-) (limited to 'drivers/clocksource') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index e09d4427f604..b23d23b033cc 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -191,22 +191,40 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, return val; } -static notrace u64 arch_counter_get_cntpct_stable(void) +static noinstr u64 raw_counter_get_cntpct_stable(void) { return __arch_counter_get_cntpct_stable(); } -static notrace u64 arch_counter_get_cntpct(void) +static notrace u64 arch_counter_get_cntpct_stable(void) +{ + u64 val; + preempt_disable_notrace(); + val = __arch_counter_get_cntpct_stable(); + preempt_enable_notrace(); + return val; +} + +static noinstr u64 arch_counter_get_cntpct(void) { return __arch_counter_get_cntpct(); } -static notrace u64 arch_counter_get_cntvct_stable(void) +static noinstr u64 raw_counter_get_cntvct_stable(void) { return __arch_counter_get_cntvct_stable(); } -static notrace u64 arch_counter_get_cntvct(void) +static notrace u64 arch_counter_get_cntvct_stable(void) +{ + u64 val; + preempt_disable_notrace(); + val = __arch_counter_get_cntvct_stable(); + preempt_enable_notrace(); + return val; +} + +static noinstr u64 arch_counter_get_cntvct(void) { return __arch_counter_get_cntvct(); } @@ -753,14 +771,14 @@ static int arch_timer_set_next_event_phys(unsigned long evt, return 0; } -static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo) +static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo) { u32 cnt_lo, cnt_hi, tmp_hi; do { - cnt_hi = readl_relaxed(t->base + offset_lo + 4); - cnt_lo = readl_relaxed(t->base + offset_lo); - tmp_hi = readl_relaxed(t->base + offset_lo + 4); + cnt_hi = __raw_readl(t->base + offset_lo + 4); + cnt_lo = __raw_readl(t->base + offset_lo); + tmp_hi = __raw_readl(t->base + offset_lo + 4); } while (cnt_hi != tmp_hi); return ((u64) cnt_hi << 32) | cnt_lo; @@ -1060,7 +1078,7 @@ bool arch_timer_evtstrm_available(void) return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available); } -static u64 arch_counter_get_cntvct_mem(void) +static noinstr u64 arch_counter_get_cntvct_mem(void) { return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO); } @@ -1074,6 +1092,7 @@ struct arch_timer_kvm_info *arch_timer_get_kvm_info(void) static void __init arch_counter_register(unsigned type) { + u64 (*scr)(void); u64 start_count; int width; @@ -1083,21 +1102,28 @@ static void __init arch_counter_register(unsigned type) if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) || arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { - if (arch_timer_counter_has_wa()) + if (arch_timer_counter_has_wa()) { rd = arch_counter_get_cntvct_stable; - else + scr = raw_counter_get_cntvct_stable; + } else { rd = arch_counter_get_cntvct; + scr = arch_counter_get_cntvct; + } } else { - if (arch_timer_counter_has_wa()) + if (arch_timer_counter_has_wa()) { rd = arch_counter_get_cntpct_stable; - else + scr = raw_counter_get_cntpct_stable; + } else { rd = arch_counter_get_cntpct; + scr = arch_counter_get_cntpct; + } } arch_timer_read_counter = rd; clocksource_counter.vdso_clock_mode = vdso_default; } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; + scr = arch_counter_get_cntvct_mem; } width = arch_counter_get_width(); @@ -1113,7 +1139,7 @@ static void __init arch_counter_register(unsigned type) timecounter_init(&arch_timer_kvm_info.timecounter, &cyclecounter, start_count); - sched_clock_register(arch_timer_read_counter, width, arch_timer_rate); + sched_clock_register(scr, width, arch_timer_rate); } static void arch_timer_stop(struct clock_event_device *clk) -- cgit v1.2.3 From 9397fa2ea3e7634f61da1ab76b9eb88ba04dfdfc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:21:07 +0200 Subject: clocksource: hyper-v: Adjust hv_read_tsc_page_tsc() to avoid special casing U64_MAX Currently hv_read_tsc_page_tsc() (ab)uses the (valid) time value of U64_MAX as an error return. This breaks the clean wrap-around of the clock. Modify the function signature to return a boolean state and provide another u64 pointer to store the actual time on success. This obviates the need to steal one time value and restores the full counter width. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Michael Kelley Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.775630881@infradead.org --- arch/x86/include/asm/vdso/gettimeofday.h | 10 ++++++---- arch/x86/kvm/x86.c | 7 +++---- drivers/clocksource/hyperv_timer.c | 16 +++++++++++----- include/clocksource/hyperv_timer.h | 24 +++++++++--------------- 4 files changed, 29 insertions(+), 28 deletions(-) (limited to 'drivers/clocksource') diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 0badf0a9f03d..c81858d903dc 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -238,10 +238,12 @@ static u64 vread_pvclock(void) #ifdef CONFIG_HYPERV_TIMER static u64 vread_hvclock(void) { - u64 ret = hv_read_tsc_page(&hvclock_page); - if (likely(ret != U64_MAX)) - ret &= S64_MAX; - return ret; + u64 tsc, time; + + if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time)) + return time & S64_MAX; + + return U64_MAX; } #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ceb7c5e9cf9e..99d97ba6104f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2799,14 +2799,13 @@ static u64 read_tsc(void) static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp, int *mode) { - long v; u64 tsc_pg_val; + long v; switch (clock->vclock_mode) { case VDSO_CLOCKMODE_HVCLOCK: - tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), - tsc_timestamp); - if (tsc_pg_val != U64_MAX) { + if (hv_read_tsc_page_tsc(hv_get_tsc_page(), + tsc_timestamp, &tsc_pg_val)) { /* TSC page valid */ *mode = VDSO_CLOCKMODE_HVCLOCK; v = (tsc_pg_val - clock->cycle_last) & diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index bcd9042a0c9f..c643bfe2f3d4 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -393,14 +393,20 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void) } EXPORT_SYMBOL_GPL(hv_get_tsc_page); -static u64 notrace read_hv_clock_tsc(void) +static notrace u64 read_hv_clock_tsc(void) { - u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); + u64 cur_tsc, time; - if (current_tick == U64_MAX) - current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); + /* + * The Hyper-V Top-Level Function Spec (TLFS), section Timers, + * subsection Refererence Counter, guarantees that the TSC and MSR + * times are in sync and monotonic. Therefore we can fall back + * to the MSR in case the TSC page indicates unavailability. + */ + if (!hv_read_tsc_page_tsc(tsc_page, &cur_tsc, &time)) + time = hv_get_register(HV_REGISTER_TIME_REF_COUNT); - return current_tick; + return time; } static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index 536f897375d0..6cdc873ac907 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -38,8 +38,9 @@ extern void hv_remap_tsc_clocksource(void); extern unsigned long hv_get_tsc_pfn(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline notrace u64 -hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) +static __always_inline bool +hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + u64 *cur_tsc, u64 *time) { u64 scale, offset; u32 sequence; @@ -63,7 +64,7 @@ hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) do { sequence = READ_ONCE(tsc_pg->tsc_sequence); if (!sequence) - return U64_MAX; + return false; /* * Make sure we read sequence before we read other values from * TSC page. @@ -82,15 +83,8 @@ hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline notrace u64 -hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); + *time = mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; + return true; } #else /* CONFIG_HYPERV_TIMER */ @@ -104,10 +98,10 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) return NULL; } -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) +static __always_inline bool +hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc, u64 *time) { - return U64_MAX; + return false; } static inline int hv_stimer_cleanup(unsigned int cpu) { return 0; } -- cgit v1.2.3 From e39acc37db34f6688e2c16e958fb1d662c422c81 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:21:08 +0200 Subject: clocksource: hyper-v: Provide noinstr sched_clock() With the intent to provide local_clock_noinstr(), a variant of local_clock() that's safe to be called from noinstr code (with the assumption that any such code will already be non-preemptible), prepare for things by making the Hyper-V TSC and MSR sched_clock implementations noinstr. Signed-off-by: Peter Zijlstra (Intel) Co-developed-by: Michael Kelley Signed-off-by: Michael Kelley Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.843039089@infradead.org --- arch/x86/include/asm/mshyperv.h | 5 +++++ drivers/clocksource/hyperv_timer.c | 32 ++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 14 deletions(-) (limited to 'drivers/clocksource') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 49bb4f2bd300..88d9ef98e087 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -257,6 +257,11 @@ void hv_set_register(unsigned int reg, u64 value); u64 hv_get_non_nested_register(unsigned int reg); void hv_set_non_nested_register(unsigned int reg, u64 value); +static __always_inline u64 hv_raw_get_register(unsigned int reg) +{ + return __rdmsr(reg); +} + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index c643bfe2f3d4..d851970e310c 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -365,6 +365,20 @@ void hv_stimer_global_cleanup(void) } EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); +static __always_inline u64 read_hv_clock_msr(void) +{ + /* + * Read the partition counter to get the current tick count. This count + * is set to 0 when the partition is created and is incremented in 100 + * nanosecond units. + * + * Use hv_raw_get_register() because this function is used from + * noinstr. Notable; while HV_REGISTER_TIME_REF_COUNT is a synthetic + * register it doesn't need the GHCB path. + */ + return hv_raw_get_register(HV_REGISTER_TIME_REF_COUNT); +} + /* * Code and definitions for the Hyper-V clocksources. Two * clocksources are defined: one that reads the Hyper-V defined MSR, and @@ -393,7 +407,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void) } EXPORT_SYMBOL_GPL(hv_get_tsc_page); -static notrace u64 read_hv_clock_tsc(void) +static __always_inline u64 read_hv_clock_tsc(void) { u64 cur_tsc, time; @@ -404,7 +418,7 @@ static notrace u64 read_hv_clock_tsc(void) * to the MSR in case the TSC page indicates unavailability. */ if (!hv_read_tsc_page_tsc(tsc_page, &cur_tsc, &time)) - time = hv_get_register(HV_REGISTER_TIME_REF_COUNT); + time = read_hv_clock_msr(); return time; } @@ -414,7 +428,7 @@ static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) return read_hv_clock_tsc(); } -static u64 notrace read_hv_sched_clock_tsc(void) +static u64 noinstr read_hv_sched_clock_tsc(void) { return (read_hv_clock_tsc() - hv_sched_clock_offset) * (NSEC_PER_SEC / HV_CLOCK_HZ); @@ -466,22 +480,12 @@ static struct clocksource hyperv_cs_tsc = { #endif }; -static u64 notrace read_hv_clock_msr(void) -{ - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - return hv_get_register(HV_REGISTER_TIME_REF_COUNT); -} - static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) { return read_hv_clock_msr(); } -static u64 notrace read_hv_sched_clock_msr(void) +static u64 noinstr read_hv_sched_clock_msr(void) { return (read_hv_clock_msr() - hv_sched_clock_offset) * (NSEC_PER_SEC / HV_CLOCK_HZ); -- cgit v1.2.3 From 5416bf1cf5602ab3a38b4c0d15ccec1ca4199633 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 Jun 2023 10:06:14 +0200 Subject: arm64/arch_timer: Fix MMIO byteswap The readl_relaxed() to __raw_readl() change meant to loose the instrumentation, but also (inadvertently) lost the byteswap. Fixes: 24ee7607b286 ("arm64/arch_timer: Provide noinstr sched_clock_read() functions") Reported-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Link: https://lkml.kernel.org/r/20230606080614.GB905437@hirez.programming.kicks-ass.net --- drivers/clocksource/arm_arch_timer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/clocksource') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index b23d23b033cc..e733a2a1927a 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -776,9 +776,9 @@ static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo) u32 cnt_lo, cnt_hi, tmp_hi; do { - cnt_hi = __raw_readl(t->base + offset_lo + 4); - cnt_lo = __raw_readl(t->base + offset_lo); - tmp_hi = __raw_readl(t->base + offset_lo + 4); + cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4)); + cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo)); + tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4)); } while (cnt_hi != tmp_hi); return ((u64) cnt_hi << 32) | cnt_lo; -- cgit v1.2.3