From 5b26ef660a690e424d9548fdf0565d4172d5d88f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:06 +0200 Subject: vdso: Consolidate nanoseconds calculation Consolidate nanoseconds calculation to simplify and reduce code duplication. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-3-adrian.hunter@intel.com --- arch/x86/include/asm/vdso/gettimeofday.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 8e048ca980df..5727dedd3549 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -300,7 +300,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) #define vdso_cycles_ok arch_vdso_cycles_ok /* - * x86 specific delta calculation. + * x86 specific calculation of nanoseconds for the current cycle count * * The regular implementation assumes that clocksource reads are globally * monotonic. The TSC can be slightly off across sockets which can cause @@ -308,8 +308,8 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * jump. * * Therefore it needs to be verified that @cycles are greater than - * @last. If not then use @last, which is the base time of the current - * conversion period. + * @vd->cycles_last. If not then use @vd->cycles_last, which is the base + * time of the current conversion period. * * This variant also uses a custom mask because while the clocksource mask of * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses @@ -317,25 +317,24 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * declares everything with the MSB/Sign-bit set as invalid. Therefore the * effective mask is S64_MAX. */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) { /* * Due to the MSB/Sign-bit being used as invalid marker (see * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. */ - u64 delta = (cycles - last) & S64_MAX; + u64 delta = (cycles - vd->cycle_last) & S64_MAX; /* * Due to the above mentioned TSC wobbles, filter out negative motion. * Per the above masking, the effective sign bit is now bit 62. */ if (unlikely(delta & (1ULL << 62))) - return 0; + return base >> vd->shift; - return delta * mult; + return ((delta * vd->mult) + base) >> vd->shift; } -#define vdso_calc_delta vdso_calc_delta +#define vdso_calc_ns vdso_calc_ns #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 7e90ffb716d289b3b82fb41892bb52a11bdadfd9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:12 +0200 Subject: x86/vdso: Make delta calculation overflow safe Kernel timekeeping is designed to keep the change in cycles (since the last timer interrupt) below max_cycles, which prevents multiplication overflow when converting cycles to nanoseconds. However, if timer interrupts stop, the calculation will eventually overflow. Add protection against that. Select GENERIC_VDSO_OVERFLOW_PROTECT so that max_cycles is made available in the VDSO data page. Check against max_cycles, falling back to a slower higher precision calculation. Take advantage of the opportunity to move masking and negative motion check into the slow path. The result is a calculation that has similar performance as before. Newer machines showed performance benefit, whereas older Skylake-based hardware such as Intel Kaby Lake was seen <1% worse. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-9-adrian.hunter@intel.com --- arch/x86/Kconfig | 1 + arch/x86/include/asm/vdso/gettimeofday.h | 31 ++++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4fff6ed46e90..4e251ba3bad2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -168,6 +168,7 @@ config X86 select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY select GENERIC_VDSO_TIME_NS + select GENERIC_VDSO_OVERFLOW_PROTECT select GUP_GET_PXX_LOW_HIGH if X86_PAE select HARDIRQS_SW_RESEND select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 5727dedd3549..0ef36190abe6 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -319,18 +319,31 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) */ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) { - /* - * Due to the MSB/Sign-bit being used as invalid marker (see - * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. - */ - u64 delta = (cycles - vd->cycle_last) & S64_MAX; + u64 delta = cycles - vd->cycle_last; /* - * Due to the above mentioned TSC wobbles, filter out negative motion. - * Per the above masking, the effective sign bit is now bit 62. + * Negative motion and deltas which can cause multiplication + * overflow require special treatment. This check covers both as + * negative motion is guaranteed to be greater than @vd::max_cycles + * due to unsigned comparison. + * + * Due to the MSB/Sign-bit being used as invalid marker (see + * arch_vdso_cycles_valid() above), the effective mask is S64_MAX, + * but that case is also unlikely and will also take the unlikely path + * here. */ - if (unlikely(delta & (1ULL << 62))) - return base >> vd->shift; + if (unlikely(delta > vd->max_cycles)) { + /* + * Due to the above mentioned TSC wobbles, filter out + * negative motion. Per the above masking, the effective + * sign bit is now bit 62. + */ + if (delta & (1ULL << 62)) + return base >> vd->shift; + + /* Handle multiplication overflow gracefully */ + return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift); + } return ((delta * vd->mult) + base) >> vd->shift; } -- cgit v1.2.3