From 762f169f5d9b92f057ad2c27ec4e3849b743239a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:21:20 +0200 Subject: efi/x86: Move EFI runtime call setup/teardown helpers out of line Only the arch_efi_call_virt() macro that some architectures override needs to be a macro, given that it is variadic and encapsulates calls via function pointers that have different prototypes. The associated setup and teardown code are not special in this regard, and don't need to be instantiated at each call site. So turn them into ordinary C functions and move them out of line. Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b0994ae3bc23..c4555b269a1b 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -91,19 +91,6 @@ static inline void efi_fpu_end(void) #ifdef CONFIG_X86_32 #define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) - -#define arch_efi_call_virt_setup() \ -({ \ - efi_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ -}) - -#define arch_efi_call_virt_teardown() \ -({ \ - firmware_restrict_branch_speculation_end(); \ - efi_fpu_end(); \ -}) - #else /* !CONFIG_X86_32 */ #define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT @@ -116,14 +103,6 @@ extern bool efi_disable_ibt_for_runtime; __efi_call(__VA_ARGS__); \ }) -#define arch_efi_call_virt_setup() \ -({ \ - efi_sync_low_kernel_mappings(); \ - efi_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ - efi_enter_mm(); \ -}) - #undef arch_efi_call_virt #define arch_efi_call_virt(p, f, args...) ({ \ u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime); \ @@ -132,13 +111,6 @@ extern bool efi_disable_ibt_for_runtime; ret; \ }) -#define arch_efi_call_virt_teardown() \ -({ \ - efi_leave_mm(); \ - firmware_restrict_branch_speculation_end(); \ - efi_fpu_end(); \ -}) - #ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present @@ -168,8 +140,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); -void efi_enter_mm(void); -void efi_leave_mm(void); +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); /* kexec external ABI */ struct efi_setup_data { -- cgit v1.2.3 From 25e73b7e3f72a25aa30cbb2eecb49036e0acf066 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2023 12:55:46 +0200 Subject: x86/ibt: Suppress spurious ENDBR It was reported that under certain circumstances GCC emits ENDBR instructions for _THIS_IP_ usage. Specifically, when it appears at the start of a basic block -- but not elsewhere. Since _THIS_IP_ is never used for control flow, these ENDBR instructions are completely superfluous. Override the _THIS_IP_ definition for x86_64 to avoid this. Less ENDBR instructions is better. Fixes: 156ff4a544ae ("x86/ibt: Base IBT bits") Reported-by: David Kaplan Reviewed-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230802110323.016197440@infradead.org --- arch/x86/include/asm/linkage.h | 8 ++++++++ include/linux/instruction_pointer.h | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 97a3de7892d3..5ff49fd67732 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -8,6 +8,14 @@ #undef notrace #define notrace __attribute__((no_instrument_function)) +#ifdef CONFIG_64BIT +/* + * The generic version tends to create spurious ENDBR instructions under + * certain conditions. + */ +#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; }) +#endif + #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h index cda1f706eaeb..aa0b3ffea935 100644 --- a/include/linux/instruction_pointer.h +++ b/include/linux/instruction_pointer.h @@ -2,7 +2,12 @@ #ifndef _LINUX_INSTRUCTION_POINTER_H #define _LINUX_INSTRUCTION_POINTER_H +#include + #define _RET_IP_ (unsigned long)__builtin_return_address(0) + +#ifndef _THIS_IP_ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif #endif /* _LINUX_INSTRUCTION_POINTER_H */ -- cgit v1.2.3 From f530ee95b72e77b09c141c4b1a4b94d1199ffbd9 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Sep 2023 10:02:21 +0300 Subject: x86/boot/compressed: Reserve more memory for page tables The decompressor has a hard limit on the number of page tables it can allocate. This limit is defined at compile-time and will cause boot failure if it is reached. The kernel is very strict and calculates the limit precisely for the worst-case scenario based on the current configuration. However, it is easy to forget to adjust the limit when a new use-case arises. The worst-case scenario is rarely encountered during sanity checks. In the case of enabling 5-level paging, a use-case was overlooked. The limit needs to be increased by one to accommodate the additional level. This oversight went unnoticed until Aaron attempted to run the kernel via kexec with 5-level paging and unaccepted memory enabled. Update wost-case calculations to include 5-level paging. To address this issue, let's allocate some extra space for page tables. 128K should be sufficient for any use-case. The logic can be simplified by using a single value for all kernel configurations. [ Also add a warning, should this memory run low - by Dave Hansen. ] Fixes: 34bbb0009f3b ("x86/boot/compressed: Enable 5-level paging during decompression stage") Reported-by: Aaron Lu Signed-off-by: Kirill A. Shutemov Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915070221.10266-1-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/ident_map_64.c | 8 ++++++ arch/x86/include/asm/boot.h | 45 +++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index bcc956c17872..08f93b0401bb 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -59,6 +59,14 @@ static void *alloc_pgt_page(void *context) return NULL; } + /* Consumed more tables than expected? */ + if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) { + debug_putstr("pgt_buf running low in " __FILE__ "\n"); + debug_putstr("Need to raise BOOT_PGT_SIZE?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + } + entry = pages->pgt_buf + pages->pgt_buf_offset; pages->pgt_buf_offset += PAGE_SIZE; diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4ae14339cb8c..b3a7cfb0d99e 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -40,23 +40,40 @@ #ifdef CONFIG_X86_64 # define BOOT_STACK_SIZE 0x4000 +/* + * Used by decompressor's startup_32() to allocate page tables for identity + * mapping of the 4G of RAM in 4-level paging mode: + * - 1 level4 table; + * - 1 level3 table; + * - 4 level2 table that maps everything with 2M pages; + * + * The additional level5 table needed for 5-level paging is allocated from + * trampoline_32bit memory. + */ # define BOOT_INIT_PGT_SIZE (6*4096) -# ifdef CONFIG_RANDOMIZE_BASE + /* - * Assuming all cross the 512GB boundary: - * 1 page for level4 - * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel - * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP). - * Total is 19 pages. + * Total number of page tables kernel_add_identity_map() can allocate, + * including page tables consumed by startup_32(). + * + * Worst-case scenario: + * - 5-level paging needs 1 level5 table; + * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel, + * assuming all of them cross 256T boundary: + * + 4*2 level4 table; + * + 4*2 level3 table; + * + 4*2 level2 table; + * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM): + * + 1 level4 table; + * + 1 level3 table; + * + 1 level2 table; + * Total: 28 tables + * + * Add 4 spare table in case decompressor touches anything beyond what is + * accounted above. Warn if it happens. */ -# ifdef CONFIG_X86_VERBOSE_BOOTUP -# define BOOT_PGT_SIZE (19*4096) -# else /* !CONFIG_X86_VERBOSE_BOOTUP */ -# define BOOT_PGT_SIZE (17*4096) -# endif -# else /* !CONFIG_RANDOMIZE_BASE */ -# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE -# endif +# define BOOT_PGT_SIZE_WARN (28*4096) +# define BOOT_PGT_SIZE (32*4096) #else /* !CONFIG_X86_64 */ # define BOOT_STACK_SIZE 0x1000 -- cgit v1.2.3