diff options
Diffstat (limited to 'arch/x86/lib/retpoline.S')
-rw-r--r-- | arch/x86/lib/retpoline.S | 137 |
1 files changed, 95 insertions, 42 deletions
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 2cff585f22f2..cd86aeb5fdd3 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -13,7 +13,7 @@ #include <asm/frame.h> #include <asm/nops.h> - .section .text.__x86.indirect_thunk + .section .text..__x86.indirect_thunk .macro POLINE reg @@ -133,75 +133,106 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) #ifdef CONFIG_RETHUNK /* - * srso_untrain_ret_alias() and srso_safe_ret_alias() are placed at + * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at * special addresses: * - * - srso_untrain_ret_alias() is 2M aligned - * - srso_safe_ret_alias() is also in the same 2M page but bits 2, 8, 14 + * - srso_alias_untrain_ret() is 2M aligned + * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 * and 20 in its virtual address are set (while those bits in the - * srso_untrain_ret_alias() function are cleared). + * srso_alias_untrain_ret() function are cleared). * * This guarantees that those two addresses will alias in the branch * target buffer of Zen3/4 generations, leading to any potential * poisoned entries at that BTB slot to get evicted. * - * As a result, srso_safe_ret_alias() becomes a safe return. + * As a result, srso_alias_safe_ret() becomes a safe return. */ #ifdef CONFIG_CPU_SRSO - .section .text.__x86.rethunk_untrain + .section .text..__x86.rethunk_untrain -SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + UNWIND_HINT_FUNC ANNOTATE_NOENDBR ASM_NOP2 lfence - jmp __x86_return_thunk -SYM_FUNC_END(srso_untrain_ret_alias) -__EXPORT_THUNK(srso_untrain_ret_alias) - - .section .text.__x86.rethunk_safe + jmp srso_alias_return_thunk +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) + + .section .text..__x86.rethunk_safe +#else +/* dummy definition for alternatives */ +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) #endif -/* Needs a definition for the __x86_return_thunk alternative below. */ -SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) -#ifdef CONFIG_CPU_SRSO - add $8, %_ASM_SP +SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) + lea 8(%_ASM_SP), %_ASM_SP UNWIND_HINT_FUNC -#endif ANNOTATE_UNRET_SAFE ret int3 -SYM_FUNC_END(srso_safe_ret_alias) +SYM_FUNC_END(srso_alias_safe_ret) - .section .text.__x86.return_thunk + .section .text..__x86.return_thunk + +SYM_CODE_START(srso_alias_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_alias_safe_ret + ud2 +SYM_CODE_END(srso_alias_return_thunk) + +/* + * Some generic notes on the untraining sequences: + * + * They are interchangeable when it comes to flushing potentially wrong + * RET predictions from the BTB. + * + * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the + * Retbleed sequence because the return sequence done there + * (srso_safe_ret()) is longer and the return sequence must fully nest + * (end before) the untraining sequence. Therefore, the untraining + * sequence must fully overlap the return sequence. + * + * Regarding alignment - the instructions which need to be untrained, + * must all start at a cacheline boundary for Zen1/2 generations. That + * is, instruction sequences starting at srso_safe_ret() and + * the respective instruction sequences at retbleed_return_thunk() + * must start at a cacheline boundary. + */ /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. - * 2) The instruction at zen_untrain_ret must contain, and not + * 2) The instruction at retbleed_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread * from re-poisioning the BTB prediction. */ .align 64 - .skip 64 - (__ret - zen_untrain_ret), 0xcc -SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc +SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* - * As executed from zen_untrain_ret, this is: + * As executed from retbleed_untrain_ret, this is: * * TEST $0xcc, %bl * LFENCE - * JMP __x86_return_thunk + * JMP retbleed_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from __x86_return_thunk, this is a plain RET. + * As executed from retbleed_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -213,13 +244,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, __x86_return_thunk will suffer Straight Line Speculation + * evicted, retbleed_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(__ret, SYM_L_GLOBAL) +SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(__ret) +SYM_CODE_END(retbleed_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -230,16 +261,16 @@ SYM_CODE_END(__ret) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp __ret + jmp retbleed_return_thunk int3 -SYM_FUNC_END(zen_untrain_ret) -__EXPORT_THUNK(zen_untrain_ret) +SYM_FUNC_END(retbleed_untrain_ret) +__EXPORT_THUNK(retbleed_untrain_ret) /* - * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret() + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() * above. On kernel entry, srso_untrain_ret() is executed which is a * - * movabs $0xccccccc308c48348,%rax + * movabs $0xccccc30824648d48,%rax * * and when the return thunk executes the inner label srso_safe_ret() * later, it is a stack manipulation and a RET which is mispredicted and @@ -251,22 +282,44 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR .byte 0x48, 0xb8 +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) - add $8, %_ASM_SP + lea 8(%_ASM_SP), %_ASM_SP ret int3 int3 - int3 + /* end of movabs */ lfence call srso_safe_ret - int3 + ud2 SYM_CODE_END(srso_safe_ret) SYM_FUNC_END(srso_untrain_ret) __EXPORT_THUNK(srso_untrain_ret) -SYM_FUNC_START(__x86_return_thunk) - ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \ - "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS +SYM_CODE_START(srso_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret + ud2 +SYM_CODE_END(srso_return_thunk) + +SYM_FUNC_START(entry_untrain_ret) + ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ + "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ + "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS +SYM_FUNC_END(entry_untrain_ret) +__EXPORT_THUNK(entry_untrain_ret) + +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) |