diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-23 00:13:22 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-23 00:13:22 +0300 |
commit | f8a6e48c6c6dc30dbd423a3f4b082df625664730 (patch) | |
tree | 19b68bfffb5483634ec52f0c39a657f3cb54b929 | |
parent | 5f16eb0549ab502906fb2a10147dad4b9dc185c4 (diff) | |
parent | b9b60b3199b70fe3ce74ff493b1870ccd7554134 (diff) | |
download | linux-f8a6e48c6c6dc30dbd423a3f4b082df625664730.tar.xz |
Merge local branch 'x86-codegen'
Merge trivial x86 code generation annoyances
- Introduce helper macros for clang asm input problems
- use said macros to improve trivially stupid code generation issues in
bitops and array_index_mask_nospec
- also improve codegen with 32-bit array index comparisons
None of these really matter, but I look at code generation and profiles
fairly regularly, and these misfeatures caused the generated code to
look really odd and distract from the real issues.
* branch 'x86-codegen' of local tree:
x86: improve bitop code generation with clang
x86: improve array_index_mask_nospec() code generation
clang: work around asm input constraint problems
-rw-r--r-- | arch/x86/include/asm/barrier.h | 24 | ||||
-rw-r--r-- | arch/x86/include/asm/bitops.h | 10 | ||||
-rw-r--r-- | include/linux/compiler-clang.h | 10 | ||||
-rw-r--r-- | include/linux/compiler_types.h | 9 |
4 files changed, 34 insertions, 19 deletions
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 63bdc6b85219..7b44b3c4cce1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -33,20 +33,16 @@ * Returns: * 0 - (index < size) */ -static __always_inline unsigned long array_index_mask_nospec(unsigned long index, - unsigned long size) -{ - unsigned long mask; - - asm volatile ("cmp %1,%2; sbb %0,%0;" - :"=r" (mask) - :"g"(size),"r" (index) - :"cc"); - return mask; -} - -/* Override the default implementation from linux/nospec.h. */ -#define array_index_mask_nospec array_index_mask_nospec +#define array_index_mask_nospec(idx,sz) ({ \ + typeof((idx)+(sz)) __idx = (idx); \ + typeof(__idx) __sz = (sz); \ + unsigned long __mask; \ + asm volatile ("cmp %1,%2; sbb %0,%0" \ + :"=r" (__mask) \ + :ASM_INPUT_G (__sz), \ + "r" (__idx) \ + :"cc"); \ + __mask; }) /* Prevent speculative execution past this barrier. */ #define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 990eb686ca67..b96d45944c59 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) - : "rm" (word)); + : ASM_INPUT_RM (word)); return word; } @@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word) asm("bsr %1,%0" : "=r" (word) - : "rm" (word)); + : ASM_INPUT_RM (word)); return word; } @@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x) */ asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (-1)); + : ASM_INPUT_RM (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x) */ asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (-1)); + : ASM_INPUT_RM (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x) */ asm("bsrq %1,%q0" : "+r" (bitpos) - : "rm" (x)); + : ASM_INPUT_RM (x)); return bitpos + 1; } #else diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 49feac0162a5..4c1a39dcb624 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -118,3 +118,13 @@ #define __diag_ignore_all(option, comment) \ __diag_clang(13, ignore, option) + +/* + * clang has horrible behavior with "g" or "rm" constraints for asm + * inputs, turning them into something worse than "m". Avoid using + * constraints with multiple possible uses (but "ir" seems to be ok): + * + * https://github.com/llvm/llvm-project/issues/20571 + */ +#define ASM_INPUT_G "ir" +#define ASM_INPUT_RM "r" diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index d1a9dbb8e1a7..93600de3800b 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -409,6 +409,15 @@ struct ftrace_likely_data { #define asm_goto_output(x...) asm volatile goto(x) #endif +/* + * Clang has trouble with constraints with multiple + * alternative behaviors (mainly "g" and "rm"). + */ +#ifndef ASM_INPUT_G + #define ASM_INPUT_G "g" + #define ASM_INPUT_RM "rm" +#endif + #ifdef CONFIG_CC_HAS_ASM_INLINE #define asm_inline asm __inline #else |