From 59eca2fa1934de42d8aa44d3bef655c92ea69703 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Tue, 2 Mar 2021 10:02:17 +0800 Subject: x86/cpu/hygon: Set __max_die_per_package on Hygon Set the maximum DIE per package variable on Hygon using the nodes_per_socket value in order to do per-DIE manipulations for drivers such as powercap. Signed-off-by: Pu Wen Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20210302020217.1827-1-puwen@hygon.cn --- arch/x86/kernel/cpu/hygon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index ae59115d18f9..0bd6c74e3ba1 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -215,12 +215,12 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) u32 ecx; ecx = cpuid_ecx(0x8000001e); - nodes_per_socket = ((ecx >> 8) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; } if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && -- cgit v1.2.3 From a89dfde3dc3c2dbf56910af75e2d8b11ec5308f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Mar 2021 12:32:54 +0100 Subject: x86: Remove dynamic NOP selection This ensures that a NOP is a NOP and not a random other instruction that is also a NOP. It allows simplification of dynamic code patching that wants to verify existing code before writing new instructions (ftrace, jump_label, static_call, etc..). Differentiating on NOPs is not a feature. This pessimises 32bit (DONTCARE) and 32bit on 64bit CPUs (CARELESS). 32bit is not a performance target. Everything x86_64 since AMD K10 (2007) and Intel IvyBridge (2012) is fine with using NOPL (as opposed to prefix NOP). And per FEATURE_NOPL being required for x86_64, all x86_64 CPUs can use NOPL. So stop caring about NOPs, simplify things and get on with life. [ The problem seems to be that some uarchs can only decode NOPL on a single front-end port while others have severe decode penalties for excessive prefixes. All modern uarchs can handle both, except Atom, which has prefix penalties. ] [ Also, much doubt you can actually measure any of this on normal workloads. ] After this, FEATURE_NOPL is unused except for required-features for x86_64. FEATURE_K8 is only used for PTI. [ bp: Kernel build measurements showed ~0.3s slowdown on Sandybridge which is hardly a slowdown. Get rid of X86_FEATURE_K7, while at it. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Acked-by: Alexei Starovoitov # bpf Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20210312115749.065275711@infradead.org --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/jump_label.h | 12 +-- arch/x86/include/asm/nops.h | 176 ++++++++++--------------------- arch/x86/include/asm/special_insns.h | 4 +- arch/x86/kernel/alternative.c | 198 ++++------------------------------- arch/x86/kernel/cpu/amd.c | 5 - arch/x86/kernel/ftrace.c | 4 +- arch/x86/kernel/jump_label.c | 32 ++---- arch/x86/kernel/kprobes/core.c | 2 +- arch/x86/kernel/setup.c | 1 - arch/x86/kernel/static_call.c | 4 +- arch/x86/net/bpf_jit_comp.c | 8 +- 12 files changed, 97 insertions(+), 351 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index cc96e26d69f7..8afa3183dfdd 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ /* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 06c3cc22a058..5ce342b91ff3 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -6,12 +6,6 @@ #define JUMP_LABEL_NOP_SIZE 5 -#ifdef CONFIG_X86_64 -# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC -#else -# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC -#endif - #include #include @@ -23,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" - ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" + ".byte " __stringify(BYTES_NOP5) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" ".long 1b - ., %l[l_yes] - . \n\t" @@ -63,7 +57,7 @@ l_yes: .long \target - .Lstatic_jump_after_\@ .Lstatic_jump_after_\@: .else - .byte STATIC_KEY_INIT_NOP + .byte BYTES_NOP5 .endif .pushsection __jump_table, "aw" _ASM_ALIGN @@ -75,7 +69,7 @@ l_yes: .macro STATIC_JUMP_IF_FALSE target, key, def .Lstatic_jump_\@: .if \def - .byte STATIC_KEY_INIT_NOP + .byte BYTES_NOP5 .else /* Equivalent to "jmp.d32 \target" */ .byte 0xe9 diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index 12f12b5cf2ca..c1e5e818ba16 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -4,89 +4,58 @@ /* * Define nops for use with alternative() and for tracing. - * - * *_NOP5_ATOMIC must be a single instruction. */ -#define NOP_DS_PREFIX 0x3e +#ifndef CONFIG_64BIT -/* generic versions from gas - 1: nop - the following instructions are NOT nops in 64-bit mode, - for 64-bit mode use K8 or P6 nops instead - 2: movl %esi,%esi - 3: leal 0x00(%esi),%esi - 4: leal 0x00(,%esi,1),%esi - 6: leal 0x00000000(%esi),%esi - 7: leal 0x00000000(,%esi,1),%esi -*/ -#define GENERIC_NOP1 0x90 -#define GENERIC_NOP2 0x89,0xf6 -#define GENERIC_NOP3 0x8d,0x76,0x00 -#define GENERIC_NOP4 0x8d,0x74,0x26,0x00 -#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4 -#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 -#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 -#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7 -#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4 +/* + * Generic 32bit nops from GAS: + * + * 1: nop + * 2: movl %esi,%esi + * 3: leal 0x0(%esi),%esi + * 4: leal 0x0(%esi,%eiz,1),%esi + * 5: leal %ds:0x0(%esi,%eiz,1),%esi + * 6: leal 0x0(%esi),%esi + * 7: leal 0x0(%esi,%eiz,1),%esi + * 8: leal %ds:0x0(%esi,%eiz,1),%esi + * + * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2 + * nop instructions. + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x89,0xf6 +#define BYTES_NOP3 0x8d,0x76,0x00 +#define BYTES_NOP4 0x8d,0x74,0x26,0x00 +#define BYTES_NOP5 0x3e,BYTES_NOP4 +#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 +#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x3e,BYTES_NOP7 -/* Opteron 64bit nops - 1: nop - 2: osp nop - 3: osp osp nop - 4: osp osp osp nop -*/ -#define K8_NOP1 GENERIC_NOP1 -#define K8_NOP2 0x66,K8_NOP1 -#define K8_NOP3 0x66,K8_NOP2 -#define K8_NOP4 0x66,K8_NOP3 -#define K8_NOP5 K8_NOP3,K8_NOP2 -#define K8_NOP6 K8_NOP3,K8_NOP3 -#define K8_NOP7 K8_NOP4,K8_NOP3 -#define K8_NOP8 K8_NOP4,K8_NOP4 -#define K8_NOP5_ATOMIC 0x66,K8_NOP4 +#else -/* K7 nops - uses eax dependencies (arbitrary choice) - 1: nop - 2: movl %eax,%eax - 3: leal (,%eax,1),%eax - 4: leal 0x00(,%eax,1),%eax - 6: leal 0x00000000(%eax),%eax - 7: leal 0x00000000(,%eax,1),%eax -*/ -#define K7_NOP1 GENERIC_NOP1 -#define K7_NOP2 0x8b,0xc0 -#define K7_NOP3 0x8d,0x04,0x20 -#define K7_NOP4 0x8d,0x44,0x20,0x00 -#define K7_NOP5 K7_NOP4,K7_NOP1 -#define K7_NOP6 0x8d,0x80,0,0,0,0 -#define K7_NOP7 0x8D,0x04,0x05,0,0,0,0 -#define K7_NOP8 K7_NOP7,K7_NOP1 -#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4 +/* + * Generic 64bit nops from GAS: + * + * 1: nop + * 2: osp nop + * 3: nopl (%eax) + * 4: nopl 0x00(%eax) + * 5: nopl 0x00(%eax,%eax,1) + * 6: osp nopl 0x00(%eax,%eax,1) + * 7: nopl 0x00000000(%eax) + * 8: nopl 0x00000000(%eax,%eax,1) + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x66,BYTES_NOP1 +#define BYTES_NOP3 0x0f,0x1f,0x00 +#define BYTES_NOP4 0x0f,0x1f,0x40,0x00 +#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00 +#define BYTES_NOP6 0x66,BYTES_NOP5 +#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 -/* P6 nops - uses eax dependencies (Intel-recommended choice) - 1: nop - 2: osp nop - 3: nopl (%eax) - 4: nopl 0x00(%eax) - 5: nopl 0x00(%eax,%eax,1) - 6: osp nopl 0x00(%eax,%eax,1) - 7: nopl 0x00000000(%eax) - 8: nopl 0x00000000(%eax,%eax,1) - Note: All the above are assumed to be a single instruction. - There is kernel code that depends on this. -*/ -#define P6_NOP1 GENERIC_NOP1 -#define P6_NOP2 0x66,0x90 -#define P6_NOP3 0x0f,0x1f,0x00 -#define P6_NOP4 0x0f,0x1f,0x40,0 -#define P6_NOP5 0x0f,0x1f,0x44,0x00,0 -#define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0 -#define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0 -#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0 -#define P6_NOP5_ATOMIC P6_NOP5 +#endif /* CONFIG_64BIT */ #ifdef __ASSEMBLY__ #define _ASM_MK_NOP(x) .byte x @@ -94,54 +63,19 @@ #define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" #endif -#if defined(CONFIG_MK7) -#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC) -#elif defined(CONFIG_X86_P6_NOP) -#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC) -#elif defined(CONFIG_X86_64) -#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC) -#else -#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC) -#endif +#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) #define ASM_NOP_MAX 8 -#define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */ #ifndef __ASSEMBLY__ -extern const unsigned char * const *ideal_nops; -extern void arch_init_ideal_nops(void); +extern const unsigned char * const x86_nops[]; #endif #endif /* _ASM_X86_NOPS_H */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 1d3cbaef4bb7..2acd6cb62328 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -214,7 +214,7 @@ static inline void clflush(volatile void *__p) static inline void clflushopt(volatile void *__p) { - alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0", + alternative_io(".byte 0x3e; clflush %P0", ".byte 0x66; clflush %P0", X86_FEATURE_CLFLUSHOPT, "+m" (*(volatile char __force *)__p)); @@ -225,7 +225,7 @@ static inline void clwb(volatile void *__p) volatile struct { char x[64]; } *p = __p; asm volatile(ALTERNATIVE_2( - ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])", + ".byte 0x3e; clflush (%[pax])", ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ X86_FEATURE_CLFLUSHOPT, ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 8d778e46725d..fcac875c3d79 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -74,186 +74,30 @@ do { \ } \ } while (0) -/* - * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes - * that correspond to that nop. Getting from one nop to the next, we - * add to the array the offset that is equal to the sum of all sizes of - * nops preceding the one we are after. - * - * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the - * nice symmetry of sizes of the previous nops. - */ -#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) -static const unsigned char intelnops[] = +const unsigned char x86nops[] = { - GENERIC_NOP1, - GENERIC_NOP2, - GENERIC_NOP3, - GENERIC_NOP4, - GENERIC_NOP5, - GENERIC_NOP6, - GENERIC_NOP7, - GENERIC_NOP8, - GENERIC_NOP5_ATOMIC -}; -static const unsigned char * const intel_nops[ASM_NOP_MAX+2] = -{ - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, + BYTES_NOP1, + BYTES_NOP2, + BYTES_NOP3, + BYTES_NOP4, + BYTES_NOP5, + BYTES_NOP6, + BYTES_NOP7, + BYTES_NOP8, }; -#endif -#ifdef K8_NOP1 -static const unsigned char k8nops[] = -{ - K8_NOP1, - K8_NOP2, - K8_NOP3, - K8_NOP4, - K8_NOP5, - K8_NOP6, - K8_NOP7, - K8_NOP8, - K8_NOP5_ATOMIC -}; -static const unsigned char * const k8_nops[ASM_NOP_MAX+2] = -{ - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -#if defined(K7_NOP1) && !defined(CONFIG_X86_64) -static const unsigned char k7nops[] = -{ - K7_NOP1, - K7_NOP2, - K7_NOP3, - K7_NOP4, - K7_NOP5, - K7_NOP6, - K7_NOP7, - K7_NOP8, - K7_NOP5_ATOMIC -}; -static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = -{ - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -#ifdef P6_NOP1 -static const unsigned char p6nops[] = -{ - P6_NOP1, - P6_NOP2, - P6_NOP3, - P6_NOP4, - P6_NOP5, - P6_NOP6, - P6_NOP7, - P6_NOP8, - P6_NOP5_ATOMIC -}; -static const unsigned char * const p6_nops[ASM_NOP_MAX+2] = +const unsigned char * const x86_nops[ASM_NOP_MAX+1] = { NULL, - p6nops, - p6nops + 1, - p6nops + 1 + 2, - p6nops + 1 + 2 + 3, - p6nops + 1 + 2 + 3 + 4, - p6nops + 1 + 2 + 3 + 4 + 5, - p6nops + 1 + 2 + 3 + 4 + 5 + 6, - p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, + x86nops, + x86nops + 1, + x86nops + 1 + 2, + x86nops + 1 + 2 + 3, + x86nops + 1 + 2 + 3 + 4, + x86nops + 1 + 2 + 3 + 4 + 5, + x86nops + 1 + 2 + 3 + 4 + 5 + 6, + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; -#endif - -/* Initialize these to a safe default */ -#ifdef CONFIG_X86_64 -const unsigned char * const *ideal_nops = p6_nops; -#else -const unsigned char * const *ideal_nops = intel_nops; -#endif - -void __init arch_init_ideal_nops(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - /* - * Due to a decoder implementation quirk, some - * specific Intel CPUs actually perform better with - * the "k8_nops" than with the SDM-recommended NOPs. - */ - if (boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model >= 0x0f && - boot_cpu_data.x86_model != 0x1c && - boot_cpu_data.x86_model != 0x26 && - boot_cpu_data.x86_model != 0x27 && - boot_cpu_data.x86_model < 0x30) { - ideal_nops = k8_nops; - } else if (boot_cpu_has(X86_FEATURE_NOPL)) { - ideal_nops = p6_nops; - } else { -#ifdef CONFIG_X86_64 - ideal_nops = k8_nops; -#else - ideal_nops = intel_nops; -#endif - } - break; - - case X86_VENDOR_HYGON: - ideal_nops = p6_nops; - return; - - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 > 0xf) { - ideal_nops = p6_nops; - return; - } - - fallthrough; - - default: -#ifdef CONFIG_X86_64 - ideal_nops = k8_nops; -#else - if (boot_cpu_has(X86_FEATURE_K8)) - ideal_nops = k8_nops; - else if (boot_cpu_has(X86_FEATURE_K7)) - ideal_nops = k7_nops; - else - ideal_nops = intel_nops; -#endif - } -} /* Use this to add nops to a buffer, then text_poke the whole buffer. */ static void __init_or_module add_nops(void *insns, unsigned int len) @@ -262,7 +106,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) unsigned int noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; - memcpy(insns, ideal_nops[noplen], noplen); + memcpy(insns, x86_nops[noplen], noplen); insns += noplen; len -= noplen; } @@ -1302,13 +1146,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, default: /* assume NOP */ switch (len) { case 2: /* NOP2 -- emulate as JMP8+0 */ - BUG_ON(memcmp(emulate, ideal_nops[len], len)); + BUG_ON(memcmp(emulate, x86_nops[len], len)); tp->opcode = JMP8_INSN_OPCODE; tp->rel32 = 0; break; case 5: /* NOP5 -- emulate as JMP32+0 */ - BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len)); + BUG_ON(memcmp(emulate, x86_nops[len], len)); tp->opcode = JMP32_INSN_OPCODE; tp->rel32 = 0; break; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 347a956f71ca..2d11384dc9ab 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -628,11 +628,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_init_amd_mc(c); -#ifdef CONFIG_X86_32 - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_K7); -#endif - if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 7edbd5ee5ed4..1b3ce3b4a2a2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -66,7 +66,7 @@ int ftrace_arch_code_modify_post_process(void) static const char *ftrace_nop_replace(void) { - return ideal_nops[NOP_ATOMIC5]; + return x86_nops[5]; } static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) @@ -377,7 +377,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + (jmp_offset - start_offset); if (WARN_ON(*(char *)ip != 0x75)) goto fail; - ret = copy_from_kernel_nofault(ip, ideal_nops[2], 2); + ret = copy_from_kernel_nofault(ip, x86_nops[2], 2); if (ret < 0) goto fail; } diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 5ba8477c2cb7..6a2eb62c85e6 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -28,10 +28,8 @@ static void bug_at(const void *ip, int line) } static const void * -__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init) +__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type) { - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; - const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; const void *expect, *code; const void *addr, *dest; int line; @@ -41,10 +39,8 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); - if (init) { - expect = default_nop; line = __LINE__; - } else if (type == JUMP_LABEL_JMP) { - expect = ideal_nop; line = __LINE__; + if (type == JUMP_LABEL_JMP) { + expect = x86_nops[5]; line = __LINE__; } else { expect = code; line = __LINE__; } @@ -53,7 +49,7 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, bug_at(addr, line); if (type == JUMP_LABEL_NOP) - code = ideal_nop; + code = x86_nops[5]; return code; } @@ -62,7 +58,7 @@ static inline void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) { - const void *opcode = __jump_label_set_jump_code(entry, type, init); + const void *opcode = __jump_label_set_jump_code(entry, type); /* * As long as only a single processor is running and the code is still @@ -113,7 +109,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, } mutex_lock(&text_mutex); - opcode = __jump_label_set_jump_code(entry, type, 0); + opcode = __jump_label_set_jump_code(entry, type); text_poke_queue((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); mutex_unlock(&text_mutex); @@ -136,22 +132,6 @@ static enum { __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - /* - * This function is called at boot up and when modules are - * first loaded. Check if the default nop, the one that is - * inserted at compile time, is the ideal nop. If it is, then - * we do not need to update the nop, and we can leave it as is. - * If it is not, then we need to update the nop to the ideal nop. - */ - if (jlstate == JL_STATE_START) { - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; - const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - - if (memcmp(ideal_nop, default_nop, 5) != 0) - jlstate = JL_STATE_UPDATE; - else - jlstate = JL_STATE_NO_UPDATE; - } if (jlstate == JL_STATE_UPDATE) jump_label_transform(entry, type, 1); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index df776cdca327..6356834928b9 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -229,7 +229,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) return 0UL; if (faddr) - memcpy(buf, ideal_nops[NOP_ATOMIC5], 5); + memcpy(buf, x86_nops[5], 5); else buf[0] = kp->opcode; return (unsigned long)buf; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d883176ef2ce..3b4b9b281545 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -822,7 +822,6 @@ void __init setup_arch(char **cmdline_p) idt_setup_early_traps(); early_cpu_init(); - arch_init_ideal_nops(); jump_label_init(); static_call_init(); early_ioremap_init(); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 9442c4136c38..ea028e736831 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -34,7 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case NOP: - code = ideal_nops[NOP_ATOMIC5]; + code = x86_nops[5]; break; case JMP: @@ -66,7 +66,7 @@ static void __static_call_validate(void *insn, bool tail) return; } else { if (opcode == CALL_INSN_OPCODE || - !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) || + !memcmp(insn, x86_nops[5], 5) || !memcmp(insn, xor5rax, 5)) return; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79e7a0ec1da5..6aa29c45c3f9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -282,7 +282,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ - memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt); + memcpy(prog, x86_nops[5], cnt); prog += cnt; if (!ebpf_from_cbpf) { if (tail_call_reachable && !is_subprog) @@ -330,7 +330,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr, const bool text_live) { - const u8 *nop_insn = ideal_nops[NOP_ATOMIC5]; + const u8 *nop_insn = x86_nops[5]; u8 old_insn[X86_PATCH_SIZE]; u8 new_insn[X86_PATCH_SIZE]; u8 *prog; @@ -560,7 +560,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); - memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); + memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; /* out: */ @@ -881,7 +881,7 @@ static int emit_nops(u8 **pprog, int len) noplen = ASM_NOP_MAX; for (i = 0; i < noplen; i++) - EMIT1(ideal_nops[noplen][i]); + EMIT1(x86_nops[noplen][i]); len -= noplen; } -- cgit v1.2.3 From 301cddc21a157a3072d789a3097857202e550a24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Mar 2021 12:32:55 +0100 Subject: objtool/x86: Use asm/nops.h Since the kernel will rely on a single canonical set of NOPs, make sure objtool uses the exact same ones. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210312115749.136357911@infradead.org --- tools/arch/x86/include/asm/nops.h | 81 +++++++++++++++++++++++++++++++++++++++ tools/objtool/arch/x86/decode.c | 13 ++++--- tools/objtool/sync-check.sh | 1 + 3 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 tools/arch/x86/include/asm/nops.h diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h new file mode 100644 index 000000000000..c1e5e818ba16 --- /dev/null +++ b/tools/arch/x86/include/asm/nops.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_NOPS_H +#define _ASM_X86_NOPS_H + +/* + * Define nops for use with alternative() and for tracing. + */ + +#ifndef CONFIG_64BIT + +/* + * Generic 32bit nops from GAS: + * + * 1: nop + * 2: movl %esi,%esi + * 3: leal 0x0(%esi),%esi + * 4: leal 0x0(%esi,%eiz,1),%esi + * 5: leal %ds:0x0(%esi,%eiz,1),%esi + * 6: leal 0x0(%esi),%esi + * 7: leal 0x0(%esi,%eiz,1),%esi + * 8: leal %ds:0x0(%esi,%eiz,1),%esi + * + * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2 + * nop instructions. + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x89,0xf6 +#define BYTES_NOP3 0x8d,0x76,0x00 +#define BYTES_NOP4 0x8d,0x74,0x26,0x00 +#define BYTES_NOP5 0x3e,BYTES_NOP4 +#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 +#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x3e,BYTES_NOP7 + +#else + +/* + * Generic 64bit nops from GAS: + * + * 1: nop + * 2: osp nop + * 3: nopl (%eax) + * 4: nopl 0x00(%eax) + * 5: nopl 0x00(%eax,%eax,1) + * 6: osp nopl 0x00(%eax,%eax,1) + * 7: nopl 0x00000000(%eax) + * 8: nopl 0x00000000(%eax,%eax,1) + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x66,BYTES_NOP1 +#define BYTES_NOP3 0x0f,0x1f,0x00 +#define BYTES_NOP4 0x0f,0x1f,0x40,0x00 +#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00 +#define BYTES_NOP6 0x66,BYTES_NOP5 +#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 + +#endif /* CONFIG_64BIT */ + +#ifdef __ASSEMBLY__ +#define _ASM_MK_NOP(x) .byte x +#else +#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" +#endif + +#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) + +#define ASM_NOP_MAX 8 + +#ifndef __ASSEMBLY__ +extern const unsigned char * const x86_nops[]; +#endif + +#endif /* _ASM_X86_NOPS_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 549813cff8ab..c117bfc60370 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -11,6 +11,9 @@ #include "../../../arch/x86/lib/inat.c" #include "../../../arch/x86/lib/insn.c" +#define CONFIG_64BIT 1 +#include + #include #include #include @@ -596,11 +599,11 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) const char *arch_nop_insn(int len) { static const char nops[5][5] = { - /* 1 */ { 0x90 }, - /* 2 */ { 0x66, 0x90 }, - /* 3 */ { 0x0f, 0x1f, 0x00 }, - /* 4 */ { 0x0f, 0x1f, 0x40, 0x00 }, - /* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 }, + { BYTES_NOP1 }, + { BYTES_NOP2 }, + { BYTES_NOP3 }, + { BYTES_NOP4 }, + { BYTES_NOP5 }, }; if (len < 1 || len > 5) { diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 606a4b5e929f..d23268683910 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -10,6 +10,7 @@ FILES="include/linux/objtool.h" if [ "$SRCARCH" = "x86" ]; then FILES="$FILES +arch/x86/include/asm/nops.h arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h -- cgit v1.2.3 From a331f5fdd36dba1ffb0239a4dfaaf1df91ff1aab Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 19 Mar 2021 10:39:19 -0700 Subject: x86/mce: Add Xeon Sapphire Rapids to list of CPUs that support PPIN New CPU model, same MSRs to control and read the inventory number. Signed-off-by: Tony Luck Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210319173919.291428-1-tony.luck@intel.com --- arch/x86/kernel/cpu/mce/intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index e309476743b7..acfd5d9f93c6 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: -- cgit v1.2.3