From d2d7a54f69b67cd0a30e0ebb5307cb2de625baac Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:00 +0200 Subject: x86/efistub: Branch straight to kernel entry point from C code Instead of returning to the calling code in assembler that does nothing more than perform an indirect call with the boot_params pointer in register ESI/RSI, perform the jump directly from the EFI stub C code. This will allow the asm entrypoint code to be dropped entirely in subsequent patches. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-4-ardb@kernel.org --- drivers/firmware/efi/libstub/x86-stub.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 220be75a5cdc..40a10db2d85e 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -290,7 +290,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) #define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) #define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) -void startup_32(struct boot_params *boot_params); +extern const u8 startup_32[], startup_64[]; static void setup_memory_protection(unsigned long image_base, unsigned long image_size) @@ -803,10 +803,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } +static void __noreturn enter_kernel(unsigned long kernel_addr, + struct boot_params *boot_params) +{ + /* enter decompressed kernel with boot_params pointer in RSI/ESI */ + asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params)); + + unreachable(); +} + /* - * On success, we return the address of startup_32, which has potentially been - * relocated by efi_relocate_kernel. - * On failure, we exit to the firmware via efi_exit instead of returning. + * On success, this routine will jump to the relocated image directly and never + * return. On failure, it will exit to the firmware via efi_exit() instead of + * returning. */ asmlinkage unsigned long efi_main(efi_handle_t handle, efi_system_table_t *sys_table_arg, @@ -950,7 +959,10 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, goto fail; } - return bzimage_addr; + if (IS_ENABLED(CONFIG_X86_64)) + bzimage_addr += startup_64 - startup_32; + + enter_kernel(bzimage_addr, boot_params); fail: efi_err("efi_main() failed!\n"); -- cgit v1.2.3 From df9215f15206c2a81909ccf60f21d170801dce38 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:01 +0200 Subject: x86/efistub: Simplify and clean up handover entry code Now that the EFI entry code in assembler is only used by the optional and deprecated EFI handover protocol, and given that the EFI stub C code no longer returns to it, most of it can simply be dropped. While at it, clarify the symbol naming, by merging efi_main() and efi_stub_entry(), making the latter the shared entry point for all different boot modes that enter via the EFI stub. The efi32_stub_entry() and efi64_stub_entry() names are referenced explicitly by the tooling that populates the setup header, so these must be retained, but can be emitted as aliases of efi_stub_entry() where appropriate. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-5-ardb@kernel.org --- Documentation/arch/x86/boot.rst | 2 +- arch/x86/boot/compressed/efi_mixed.S | 22 ++++++++++++---------- arch/x86/boot/compressed/head_32.S | 11 ----------- arch/x86/boot/compressed/head_64.S | 12 ++---------- drivers/firmware/efi/libstub/x86-stub.c | 20 ++++++++++++++++---- 5 files changed, 31 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst index 33520ecdb37a..cdbca15a4fc2 100644 --- a/Documentation/arch/x86/boot.rst +++ b/Documentation/arch/x86/boot.rst @@ -1417,7 +1417,7 @@ execution context provided by the EFI firmware. The function prototype for the handover entry point looks like this:: - efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) + efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp) 'handle' is the EFI image handle passed to the boot loader by the EFI firmware, 'table' is the EFI system table - these are the first two diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 4ca70bf93dc0..dcc562c8f7f3 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -26,8 +26,8 @@ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() * is the first thing that runs after switching to long mode. Depending on * whether the EFI handover protocol or the compat entry point was used to - * enter the kernel, it will either branch to the 64-bit EFI handover - * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF + * enter the kernel, it will either branch to the common 64-bit EFI stub + * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a * struct bootparams pointer as the third argument, so the presence of such a * pointer is used to disambiguate. @@ -37,21 +37,23 @@ * | efi32_pe_entry |---->| | | +-----------+--+ * +------------------+ | | +------+----------------+ | * | startup_32 |---->| startup_64_mixed_mode | | - * +------------------+ | | +------+----------------+ V - * | efi32_stub_entry |---->| | | +------------------+ - * +------------------+ +------------+ +---->| efi64_stub_entry | - * +-------------+----+ - * +------------+ +----------+ | - * | startup_64 |<----| efi_main |<--------------+ - * +------------+ +----------+ + * +------------------+ | | +------+----------------+ | + * | efi32_stub_entry |---->| | | | + * +------------------+ +------------+ | | + * V | + * +------------+ +----------------+ | + * | startup_64 |<----| efi_stub_entry |<--------+ + * +------------+ +----------------+ */ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx - jnz efi64_stub_entry + jnz efi_stub_entry +#endif /* * efi_pe_entry uses MS calling convention, which requires 32 bytes of * shadow space on the stack even if all arguments are passed in diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 987ae727cf9f..8876ffe30e9a 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -150,17 +150,6 @@ SYM_FUNC_START(startup_32) jmp *%eax SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_STUB -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp - movl 8(%esp), %esi /* save boot_params pointer */ - call efi_main - /* efi_main returns the possibly relocated address of startup_32 */ - jmp *%eax -SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index f732426d3b48..e6880208b9a1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -542,19 +542,11 @@ trampoline_return: jmp *%rax SYM_CODE_END(startup_64) -#ifdef CONFIG_EFI_STUB -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) .org 0x390 -#endif SYM_FUNC_START(efi64_stub_entry) - and $~0xf, %rsp /* realign the stack */ - movq %rdx, %rbx /* save boot_params pointer */ - call efi_main - movq %rbx,%rsi - leaq rva(startup_64)(%rax), %rax - jmp *%rax + jmp efi_stub_entry SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 40a10db2d85e..3f3b3edf7a38 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -817,9 +817,9 @@ static void __noreturn enter_kernel(unsigned long kernel_addr, * return. On failure, it will exit to the firmware via efi_exit() instead of * returning. */ -asmlinkage unsigned long efi_main(efi_handle_t handle, - efi_system_table_t *sys_table_arg, - struct boot_params *boot_params) +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) { unsigned long bzimage_addr = (unsigned long)startup_32; unsigned long buffer_start, buffer_end; @@ -964,7 +964,19 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, enter_kernel(bzimage_addr, boot_params); fail: - efi_err("efi_main() failed!\n"); + efi_err("efi_stub_entry() failed!\n"); efi_exit(handle, status); } + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +#ifndef CONFIG_EFI_MIXED +extern __alias(efi_stub_entry) +void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + +extern __alias(efi_stub_entry) +void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); +#endif +#endif -- cgit v1.2.3 From d7156b986d4cc0657fa6dc05c9fcf51c3d55a0fe Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:03 +0200 Subject: x86/efistub: Clear BSS in EFI handover protocol entrypoint The so-called EFI handover protocol is value-add from the distros that permits a loader to simply copy a PE kernel image into memory and call an alternative entrypoint that is described by an embedded boot_params structure. Most implementations of this protocol do not bother to check the PE header for minimum alignment, section placement, etc, and therefore also don't clear the image's BSS, or even allocate enough memory for it. Allocating more memory on the fly is rather difficult, but at least clear the BSS region explicitly when entering in this manner, so that the EFI stub code does not get confused by global variables that were not zero-initialized correctly. When booting in mixed mode, this BSS clearing must occur before any global state is created, so clear it in the 32-bit asm entry point. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-7-ardb@kernel.org --- arch/x86/boot/compressed/efi_mixed.S | 14 +++++++++++++- drivers/firmware/efi/libstub/x86-stub.c | 13 +++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 9308b595f6f0..8a02a151806d 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -142,6 +142,18 @@ SYM_FUNC_END(__efi64_thunk) .code32 #ifdef CONFIG_EFI_HANDOVER_PROTOCOL SYM_FUNC_START(efi32_stub_entry) + call 1f +1: popl %ecx + + /* Clear BSS */ + xorl %eax, %eax + leal (_bss - 1b)(%ecx), %edi + leal (_ebss - 1b)(%ecx), %ecx + subl %edi, %ecx + shrl $2, %ecx + cld + rep stosl + add $0x4, %esp /* Discard return address */ popl %ecx popl %edx @@ -334,7 +346,7 @@ SYM_FUNC_END(efi32_pe_entry) .org efi32_stub_entry + 0x200 .code64 SYM_FUNC_START_NOALIGN(efi64_stub_entry) - jmp efi_stub_entry + jmp efi_handover_entry SYM_FUNC_END(efi64_stub_entry) #endif diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 3f3b3edf7a38..9247dbc7dbbd 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -970,12 +970,21 @@ fail: } #ifdef CONFIG_EFI_HANDOVER_PROTOCOL +void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) +{ + extern char _bss[], _ebss[]; + + memset(_bss, 0, _ebss - _bss); + efi_stub_entry(handle, sys_table_arg, boot_params); +} + #ifndef CONFIG_EFI_MIXED -extern __alias(efi_stub_entry) +extern __alias(efi_handover_entry) void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params); -extern __alias(efi_stub_entry) +extern __alias(efi_handover_entry) void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params); #endif -- cgit v1.2.3 From cb1c9e02b0c13032c3aec325643453ba48e96e17 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:13 +0200 Subject: x86/efistub: Perform 4/5 level paging switch from the stub In preparation for updating the EFI stub boot flow to avoid the bare metal decompressor code altogether, implement the support code for switching between 4 and 5 levels of paging before jumping to the kernel proper. Reuse the newly refactored trampoline that the bare metal decompressor uses, but relies on EFI APIs to allocate 32-bit addressable memory and remap it with the appropriate permissions. Given that the bare metal decompressor will no longer call into the trampoline if the number of paging levels is already set correctly, it is no longer needed to remove NX restrictions from the memory range where this trampoline may end up. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-17-ardb@kernel.org --- drivers/firmware/efi/libstub/Makefile | 1 + drivers/firmware/efi/libstub/efi-stub-helper.c | 2 + drivers/firmware/efi/libstub/efistub.h | 1 + drivers/firmware/efi/libstub/x86-5lvl.c | 95 ++++++++++++++++++++++++++ drivers/firmware/efi/libstub/x86-stub.c | 40 ++++------- drivers/firmware/efi/libstub/x86-stub.h | 17 +++++ 6 files changed, 130 insertions(+), 26 deletions(-) create mode 100644 drivers/firmware/efi/libstub/x86-5lvl.c create mode 100644 drivers/firmware/efi/libstub/x86-stub.h (limited to 'drivers') diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 16d64a34d1e1..ae8874401a9f 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o lib-$(CONFIG_X86) += x86-stub.o +lib-$(CONFIG_X86_64) += x86-5lvl.o lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 732984295295..bfa30625f5d0 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline) efi_loglevel = CONSOLE_LOGLEVEL_QUIET; } else if (!strcmp(param, "noinitrd")) { efi_noinitrd = true; + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { + efi_no5lvl = true; } else if (!strcmp(param, "efi") && val) { efi_nochunk = parse_option_str(val, "nochunk"); efi_novamap |= parse_option_str(val, "novamap"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6aa38a1bf126..06b7abc92ced 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -33,6 +33,7 @@ #define EFI_ALLOC_LIMIT ULONG_MAX #endif +extern bool efi_no5lvl; extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c new file mode 100644 index 000000000000..479dd445acdc --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-5lvl.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +#include +#include +#include + +#include "efistub.h" +#include "x86-stub.h" + +bool efi_no5lvl; + +static void (*la57_toggle)(void *cr3); + +static const struct desc_struct gdt[] = { + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), +}; + +/* + * Enabling (or disabling) 5 level paging is tricky, because it can only be + * done from 32-bit mode with paging disabled. This means not only that the + * code itself must be running from 32-bit addressable physical memory, but + * also that the root page table must be 32-bit addressable, as programming + * a 64-bit value into CR3 when running in 32-bit mode is not supported. + */ +efi_status_t efi_setup_5level_paging(void) +{ + u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src; + efi_status_t status; + u8 *la57_code; + + if (!efi_is_64bit()) + return EFI_SUCCESS; + + /* check for 5 level paging support */ + if (native_cpuid_eax(0) < 7 || + !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return EFI_SUCCESS; + + /* allocate some 32-bit addressable memory for code and a page table */ + status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code, + U32_MAX); + if (status != EFI_SUCCESS) + return status; + + la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size); + memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size); + + /* + * To avoid the need to allocate a 32-bit addressable stack, the + * trampoline uses a LJMP instruction to switch back to long mode. + * LJMP takes an absolute destination address, which needs to be + * fixed up at runtime. + */ + *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code; + + efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE); + + return EFI_SUCCESS; +} + +void efi_5level_switch(void) +{ + bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; + bool have_la57 = native_read_cr4() & X86_CR4_LA57; + bool need_toggle = want_la57 ^ have_la57; + u64 *pgt = (void *)la57_toggle + PAGE_SIZE; + u64 *cr3 = (u64 *)__native_read_cr3(); + u64 *new_cr3; + + if (!la57_toggle || !need_toggle) + return; + + if (!have_la57) { + /* + * 5 level paging will be enabled, so a root level page needs + * to be allocated from the 32-bit addressable physical region, + * with its first entry referring to the existing hierarchy. + */ + new_cr3 = memset(pgt, 0, PAGE_SIZE); + new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC; + } else { + /* take the new root table pointer from the current entry #0 */ + new_cr3 = (u64 *)(cr3[0] & PAGE_MASK); + + /* copy the new root table if it is not 32-bit addressable */ + if ((u64)new_cr3 > U32_MAX) + new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE); + } + + native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt }); + + la57_toggle(new_cr3); +} diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9247dbc7dbbd..af5f50617a5b 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -17,6 +17,7 @@ #include #include "efistub.h" +#include "x86-stub.h" /* Maximum physical address for 64-bit kernel with 4-level paging */ #define MAXMEM_X86_64_4LEVEL (1ull << 46) @@ -223,8 +224,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -static void -adjust_memory_range_protection(unsigned long start, unsigned long size) +void efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -278,35 +279,14 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) } } -/* - * Trampoline takes 2 pages and can be loaded in first megabyte of memory - * with its end placed between 128k and 640k where BIOS might start. - * (see arch/x86/boot/compressed/pgtable_64.c) - * - * We cannot find exact trampoline placement since memory map - * can be modified by UEFI, and it can alter the computed address. - */ - -#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) -#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) - extern const u8 startup_32[], startup_64[]; static void setup_memory_protection(unsigned long image_base, unsigned long image_size) { - /* - * Allow execution of possible trampoline used - * for switching between 4- and 5-level page tables - * and relocated kernel image. - */ - - adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, - TRAMPOLINE_PLACEMENT_SIZE); - #ifdef CONFIG_64BIT if (image_base != (unsigned long)startup_32) - adjust_memory_range_protection(image_base, image_size); + efi_adjust_memory_range_protection(image_base, image_size); #else /* * Clear protection flags on a whole range of possible @@ -316,8 +296,8 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size) * need to remove possible protection on relocated image * itself disregarding further relocations. */ - adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); + efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, + KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); #endif } @@ -839,6 +819,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_dxe_table = NULL; } + status = efi_setup_5level_paging(); + if (status != EFI_SUCCESS) { + efi_err("efi_setup_5level_paging() failed!\n"); + goto fail; + } + /* * If the kernel isn't already loaded at a suitable address, * relocate it. @@ -959,6 +945,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } + efi_5level_switch(); + if (IS_ENABLED(CONFIG_X86_64)) bzimage_addr += startup_64 - startup_32; diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h new file mode 100644 index 000000000000..37c5a36b9d8c --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +extern void trampoline_32bit_src(void *, bool); +extern const u16 trampoline_ljmp_imm_offset; + +void efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); + +#ifdef CONFIG_X86_64 +efi_status_t efi_setup_5level_paging(void); +void efi_5level_switch(void); +#else +static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; } +static inline void efi_5level_switch(void) {} +#endif -- cgit v1.2.3 From 11078876b7a6a1b7226344fecab968945c806832 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:14 +0200 Subject: x86/efistub: Prefer EFI memory attributes protocol over DXE services Currently, the EFI stub relies on DXE services in some cases to clear non-execute restrictions from page allocations that need to be executable. This is dodgy, because DXE services are not specified by UEFI but by PI, and they are not intended for consumption by OS loaders. However, no alternative existed at the time. Now, there is a new UEFI protocol that should be used instead, so if it exists, prefer it over the DXE services calls. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-18-ardb@kernel.org --- drivers/firmware/efi/libstub/x86-stub.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index af5f50617a5b..acb1c65bf8ac 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -26,6 +26,7 @@ const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; +static efi_memory_attribute_protocol_t *memattr; typedef union sev_memory_acceptance_protocol sev_memory_acceptance_protocol_t; union sev_memory_acceptance_protocol { @@ -233,12 +234,18 @@ void efi_adjust_memory_range_protection(unsigned long start, unsigned long rounded_start, rounded_end; unsigned long unprotect_start, unprotect_size; - if (efi_dxe_table == NULL) - return; - rounded_start = rounddown(start, EFI_PAGE_SIZE); rounded_end = roundup(start + size, EFI_PAGE_SIZE); + if (memattr != NULL) { + efi_call_proto(memattr, clear_memory_attributes, rounded_start, + rounded_end - rounded_start, EFI_MEMORY_XP); + return; + } + + if (efi_dxe_table == NULL) + return; + /* * Don't modify memory region attributes, they are * already suitable, to lower the possibility to @@ -801,6 +808,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params) { + efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; unsigned long bzimage_addr = (unsigned long)startup_32; unsigned long buffer_start, buffer_end; struct setup_header *hdr = &boot_params->hdr; @@ -812,13 +820,18 @@ void __noreturn efi_stub_entry(efi_handle_t handle, if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); - efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); - if (efi_dxe_table && - efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { - efi_warn("Ignoring DXE services table: invalid signature\n"); - efi_dxe_table = NULL; + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { + efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); + if (efi_dxe_table && + efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { + efi_warn("Ignoring DXE services table: invalid signature\n"); + efi_dxe_table = NULL; + } } + /* grab the memory attributes protocol if it exists */ + efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr); + status = efi_setup_5level_paging(); if (status != EFI_SUCCESS) { efi_err("efi_setup_5level_paging() failed!\n"); -- cgit v1.2.3 From bc5ddceff4c14494d83449ad45c985e6cd353fce Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:18 +0200 Subject: efi/libstub: Add limit argument to efi_random_alloc() x86 will need to limit the kernel memory allocation to the lowest 512 MiB of memory, to match the behavior of the existing bare metal KASLR physical randomization logic. So in preparation for that, add a limit parameter to efi_random_alloc() and wire it up. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-22-ardb@kernel.org --- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- drivers/firmware/efi/libstub/efistub.h | 2 +- drivers/firmware/efi/libstub/randomalloc.c | 10 ++++++---- drivers/firmware/efi/libstub/zboot.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 770b8ecb7398..8c40fc89f5f9 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -106,7 +106,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, */ status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed, - EFI_LOADER_CODE); + EFI_LOADER_CODE, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 06b7abc92ced..9823f6fb3e01 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -956,7 +956,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type); + int memory_type, unsigned long alloc_limit); efi_status_t efi_random_get_seed(void); diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 32c7a54923b4..674a064b8f7a 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -16,7 +16,8 @@ */ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, - unsigned long align_shift) + unsigned long align_shift, + u64 alloc_limit) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - (u64)EFI_ALLOC_LIMIT); + alloc_limit); if (region_end < size) return 0; @@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type) + int memory_type, + unsigned long alloc_limit) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align)); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c index e5d7fa1f1d8f..bdb17eac0cb4 100644 --- a/drivers/firmware/efi/libstub/zboot.c +++ b/drivers/firmware/efi/libstub/zboot.c @@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab) } status = efi_random_alloc(alloc_size, min_kimg_align, &image_base, - seed, EFI_LOADER_CODE); + seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) { efi_err("Failed to allocate memory\n"); goto free_cmdline; -- cgit v1.2.3 From 31c77a50992e8dd136feed7b67073bb5f1f978cc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:19 +0200 Subject: x86/efistub: Perform SNP feature test while running in the firmware Before refactoring the EFI stub boot flow to avoid the legacy bare metal decompressor, duplicate the SNP feature check in the EFI stub before handing over to the kernel proper. The SNP feature check can be performed while running under the EFI boot services, which means it can force the boot to fail gracefully and return an error to the bootloader if the loaded kernel does not implement support for all the features that the hypervisor enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-23-ardb@kernel.org --- arch/x86/boot/compressed/sev.c | 112 +++++++++++++++++++------------- arch/x86/include/asm/sev.h | 4 ++ drivers/firmware/efi/libstub/x86-stub.c | 17 +++++ 3 files changed, 87 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index c3e343bd4760..199155b8af3b 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -367,20 +367,25 @@ static void enforce_vmpl0(void) */ #define SNP_FEATURES_PRESENT (0) +u64 snp_get_unsupported_features(u64 status) +{ + if (!(status & MSR_AMD64_SEV_SNP_ENABLED)) + return 0; + + return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; +} + void snp_check_features(void) { u64 unsupported; - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) - return; - /* * Terminate the boot if hypervisor has enabled any feature lacking * guest side implementation. Pass on the unsupported features mask through * EXIT_INFO_2 of the GHCB protocol so that those features can be reported * as part of the guest boot failure. */ - unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + unsupported = snp_get_unsupported_features(sev_status); if (unsupported) { if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); @@ -390,35 +395,22 @@ void snp_check_features(void) } } -void sev_enable(struct boot_params *bp) +/* + * sev_check_cpu_support - Check for SEV support in the CPU capabilities + * + * Returns < 0 if SEV is not supported, otherwise the position of the + * encryption bit in the page table descriptors. + */ +static int sev_check_cpu_support(void) { unsigned int eax, ebx, ecx, edx; - struct msr m; - bool snp; - - /* - * bp->cc_blob_address should only be set by boot/compressed kernel. - * Initialize it to 0 to ensure that uninitialized values from - * buggy bootloaders aren't propagated. - */ - if (bp) - bp->cc_blob_address = 0; - - /* - * Do an initial SEV capability check before snp_init() which - * loads the CPUID page and the same checks afterwards are done - * without the hypervisor and are trustworthy. - * - * If the HV fakes SEV support, the guest will crash'n'burn - * which is good enough. - */ /* Check for the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); if (eax < 0x8000001f) - return; + return -ENODEV; /* * Check for the SME/SEV feature: @@ -433,6 +425,35 @@ void sev_enable(struct boot_params *bp) native_cpuid(&eax, &ebx, &ecx, &edx); /* Check whether SEV is supported */ if (!(eax & BIT(1))) + return -ENODEV; + + return ebx & 0x3f; +} + +void sev_enable(struct boot_params *bp) +{ + struct msr m; + int bitpos; + bool snp; + + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + + /* + * Do an initial SEV capability check before snp_init() which + * loads the CPUID page and the same checks afterwards are done + * without the hypervisor and are trustworthy. + * + * If the HV fakes SEV support, the guest will crash'n'burn + * which is good enough. + */ + + if (sev_check_cpu_support() < 0) return; /* @@ -443,26 +464,8 @@ void sev_enable(struct boot_params *bp) /* Now repeat the checks with the SNP CPUID table. */ - /* Recheck the SME/SEV support leaf */ - eax = 0x80000000; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (eax < 0x8000001f) - return; - - /* - * Recheck for the SME/SEV feature: - * CPUID Fn8000_001F[EAX] - * - Bit 0 - Secure Memory Encryption support - * - Bit 1 - Secure Encrypted Virtualization support - * CPUID Fn8000_001F[EBX] - * - Bits 5:0 - Pagetable bit position used to indicate encryption - */ - eax = 0x8000001f; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - /* Check whether SEV is supported */ - if (!(eax & BIT(1))) { + bitpos = sev_check_cpu_support(); + if (bitpos < 0) { if (snp) error("SEV-SNP support indicated by CC blob, but not CPUID."); return; @@ -494,7 +497,24 @@ void sev_enable(struct boot_params *bp) if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); - sme_me_mask = BIT_ULL(ebx & 0x3f); + sme_me_mask = BIT_ULL(bitpos); +} + +/* + * sev_get_status - Retrieve the SEV status mask + * + * Returns 0 if the CPU is not SEV capable, otherwise the value of the + * AMD64_SEV MSR. + */ +u64 sev_get_status(void) +{ + struct msr m; + + if (sev_check_cpu_support() < 0) + return 0; + + boot_rdmsr(MSR_AMD64_SEV, &m); + return m.q; } /* Search for Confidential Computing blob in the EFI config table. */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 66c806784c52..b97d239e18ea 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -210,6 +210,8 @@ bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -235,6 +237,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } #endif #endif diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index acb1c65bf8ac..b4685da2b8d5 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "efistub.h" #include "x86-stub.h" @@ -790,6 +791,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } +static bool have_unsupported_snp_features(void) +{ + u64 unsupported; + + unsupported = snp_get_unsupported_features(sev_get_status()); + if (unsupported) { + efi_err("Unsupported SEV-SNP features detected: 0x%llx\n", + unsupported); + return true; + } + return false; +} + static void __noreturn enter_kernel(unsigned long kernel_addr, struct boot_params *boot_params) { @@ -820,6 +834,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); + if (have_unsupported_snp_features()) + efi_exit(handle, EFI_UNSUPPORTED); + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); if (efi_dxe_table && -- cgit v1.2.3 From a1b87d54f4e45ff5e0d081fb1d9db3bf1a8fb39a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:20 +0200 Subject: x86/efistub: Avoid legacy decompressor when doing EFI boot The bare metal decompressor code was never really intended to run in a hosted environment such as the EFI boot services, and does a few things that are becoming problematic in the context of EFI boot now that the logo requirements are getting tighter: EFI executables will no longer be allowed to consist of a single executable section that is mapped with read, write and execute permissions if they are intended for use in a context where Secure Boot is enabled (and where Microsoft's set of certificates is used, i.e., every x86 PC built to run Windows). To avoid stepping on reserved memory before having inspected the E820 tables, and to ensure the correct placement when running a kernel build that is non-relocatable, the bare metal decompressor moves its own executable image to the end of the allocation that was reserved for it, in order to perform the decompression in place. This means the region in question requires both write and execute permissions, which either need to be given upfront (which EFI will no longer permit), or need to be applied on demand using the existing page fault handling framework. However, the physical placement of the kernel is usually randomized anyway, and even if it isn't, a dedicated decompression output buffer can be allocated anywhere in memory using EFI APIs when still running in the boot services, given that EFI support already implies a relocatable kernel. This means that decompression in place is never necessary, nor is moving the compressed image from one end to the other. Since EFI already maps all of memory 1:1, it is also unnecessary to create new page tables or handle page faults when decompressing the kernel. That means there is also no need to replace the special exception handlers for SEV. Generally, there is little need to do any of the things that the decompressor does beyond - initialize SEV encryption, if needed, - perform the 4/5 level paging switch, if needed, - decompress the kernel - relocate the kernel So do all of this from the EFI stub code, and avoid the bare metal decompressor altogether. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-24-ardb@kernel.org --- arch/x86/boot/compressed/Makefile | 5 + arch/x86/boot/compressed/efi_mixed.S | 55 ----------- arch/x86/boot/compressed/head_32.S | 13 --- arch/x86/boot/compressed/head_64.S | 27 ------ arch/x86/include/asm/efi.h | 7 +- arch/x86/include/asm/sev.h | 2 + drivers/firmware/efi/libstub/x86-stub.c | 166 ++++++++++++++------------------ 7 files changed, 84 insertions(+), 191 deletions(-) (limited to 'drivers') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 40d2ff503079..71fc531b95b4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) endif +ifeq ($(CONFIG_EFI_STUB),y) +# ensure that the static EFI stub library will be pulled in, even if it is +# never referenced explicitly from the startup code +LDFLAGS_vmlinux += -u efi_pe_entry +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 8a02a151806d..f4e22ef774ab 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -269,10 +269,6 @@ SYM_FUNC_START_LOCAL(efi32_entry) jmp startup_32 SYM_FUNC_END(efi32_entry) -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - /* * efi_status_t efi32_pe_entry(efi_handle_t image_handle, * efi_system_table_32_t *sys_table) @@ -280,8 +276,6 @@ SYM_FUNC_END(efi32_entry) SYM_FUNC_START(efi32_pe_entry) pushl %ebp movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - pushl %ebx // save callee-save registers pushl %edi @@ -290,48 +284,8 @@ SYM_FUNC_START(efi32_pe_entry) movl $0x80000003, %eax // EFI_UNSUPPORTED jnz 2f - call 1f -1: pop %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal (loaded_image_proto - 1b)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebp // calculate image_offset - movl %ebp, (image_offset - 1b)(%ebx) // save image_offset xorl %esi, %esi jmp efi32_entry // pass %ecx, %edx, %esi // no other registers remain live @@ -350,15 +304,6 @@ SYM_FUNC_START_NOALIGN(efi64_stub_entry) SYM_FUNC_END(efi64_stub_entry) #endif - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) - .data .balign 8 SYM_DATA_START_LOCAL(efi32_boot_gdt) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3af4a383615b..1cfe9802a42f 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE leal startup_32@GOTOFF(%edx), %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32() will be at an - * offset to the start of the space allocated for the image. efi_pe_entry() will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl image_offset@GOTOFF(%edx), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 28f46051c706..bf4a10a5794f 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -146,19 +146,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl rva(image_offset)(%ebp), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -335,20 +322,6 @@ SYM_CODE_START(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - movl image_offset(%rip), %eax - subq %rax, %rbp -#endif - movl BP_kernel_alignment(%rsi), %eax decl %eax addq %rax, %rbp diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8b4be7cecdb8..b0994ae3bc23 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,8 @@ static inline void efi_fpu_end(void) } #ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) + #define arch_efi_call_virt_setup() \ ({ \ efi_fpu_begin(); \ @@ -103,8 +105,7 @@ static inline void efi_fpu_end(void) }) #else /* !CONFIG_X86_32 */ - -#define EFI_LOADER_SIGNATURE "EL64" +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT extern asmlinkage u64 __efi_call(void *fp, ...); @@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, #ifdef CONFIG_EFI_MIXED +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + #define ARCH_HAS_EFISTUB_WRAPPERS static inline bool efi_is_64bit(void) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index b97d239e18ea..5b4a1ce3d368 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { @@ -218,6 +219,7 @@ static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index b4685da2b8d5..e976288728e9 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,17 +15,14 @@ #include #include #include +#include #include #include "efistub.h" #include "x86-stub.h" -/* Maximum physical address for 64-bit kernel with 4-level paging */ -#define MAXMEM_X86_64_4LEVEL (1ull << 46) - const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; -u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; static efi_memory_attribute_protocol_t *memattr; @@ -287,28 +284,6 @@ void efi_adjust_memory_range_protection(unsigned long start, } } -extern const u8 startup_32[], startup_64[]; - -static void -setup_memory_protection(unsigned long image_base, unsigned long image_size) -{ -#ifdef CONFIG_64BIT - if (image_base != (unsigned long)startup_32) - efi_adjust_memory_range_protection(image_base, image_size); -#else - /* - * Clear protection flags on a whole range of possible - * addresses used for KASLR. We don't need to do that - * on x86_64, since KASLR/extraction is performed after - * dedicated identity page tables are built and we only - * need to remove possible protection on relocated image - * itself disregarding further relocations. - */ - efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); -#endif -} - static void setup_unaccepted_memory(void) { efi_guid_t mem_acceptance_proto = OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID; @@ -334,9 +309,7 @@ static void setup_unaccepted_memory(void) static const efi_char16_t apple[] = L"Apple"; -static void setup_quirks(struct boot_params *boot_params, - unsigned long image_base, - unsigned long image_size) +static void setup_quirks(struct boot_params *boot_params) { efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) efi_table_attr(efi_system_table, fw_vendor); @@ -345,9 +318,6 @@ static void setup_quirks(struct boot_params *boot_params, if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) retrieve_apple_device_properties(boot_params); } - - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) - setup_memory_protection(image_base, image_size); } /* @@ -500,7 +470,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, } image_base = efi_table_attr(image, image_base); - image_offset = (void *)startup_32 - image_base; status = efi_allocate_pages(sizeof(struct boot_params), (unsigned long *)&boot_params, ULONG_MAX); @@ -804,6 +773,61 @@ static bool have_unsupported_snp_features(void) return false; } +static void efi_get_seed(void *seed, int size) +{ + efi_get_random_bytes(size, seed); + + /* + * This only updates seed[0] when running on 32-bit, but in that case, + * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit. + */ + *(unsigned long *)seed ^= kaslr_get_random_long("EFI"); +} + +static void error(char *str) +{ + efi_warn("Decompression failed: %s\n", str); +} + +static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) +{ + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long addr, alloc_size, entry; + efi_status_t status; + u32 seed[2] = {}; + + /* determine the required size of the allocation */ + alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size), + MIN_KERNEL_ALIGN); + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { + u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + + efi_get_seed(seed, sizeof(seed)); + + virt_addr += (range * seed[1]) >> 32; + virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + } + + status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, + seed[0], EFI_LOADER_CODE, + EFI_X86_KERNEL_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + return status; + + entry = decompress_kernel((void *)addr, virt_addr, error); + if (entry == ULONG_MAX) { + efi_free(alloc_size, addr); + return EFI_LOAD_ERROR; + } + + *kernel_entry = addr + entry; + + efi_adjust_memory_range_protection(addr, kernel_total_size); + + return EFI_SUCCESS; +} + static void __noreturn enter_kernel(unsigned long kernel_addr, struct boot_params *boot_params) { @@ -823,10 +847,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, struct boot_params *boot_params) { efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; - unsigned long bzimage_addr = (unsigned long)startup_32; - unsigned long buffer_start, buffer_end; struct setup_header *hdr = &boot_params->hdr; const struct linux_efi_initrd *initrd = NULL; + unsigned long kernel_entry; efi_status_t status; efi_system_table = sys_table_arg; @@ -855,60 +878,6 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } - /* - * If the kernel isn't already loaded at a suitable address, - * relocate it. - * - * It must be loaded above LOAD_PHYSICAL_ADDR. - * - * The maximum address for 64-bit is 1 << 46 for 4-level paging. This - * is defined as the macro MAXMEM, but unfortunately that is not a - * compile-time constant if 5-level paging is configured, so we instead - * define our own macro for use here. - * - * For 32-bit, the maximum address is complicated to figure out, for - * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what - * KASLR uses. - * - * Also relocate it if image_offset is zero, i.e. the kernel wasn't - * loaded by LoadImage, but rather by a bootloader that called the - * handover entry. The reason we must always relocate in this case is - * to handle the case of systemd-boot booting a unified kernel image, - * which is a PE executable that contains the bzImage and an initrd as - * COFF sections. The initrd section is placed after the bzImage - * without ensuring that there are at least init_size bytes available - * for the bzImage, and thus the compressed kernel's startup code may - * overwrite the initrd unless it is moved out of the way. - */ - - buffer_start = ALIGN(bzimage_addr - image_offset, - hdr->kernel_alignment); - buffer_end = buffer_start + hdr->init_size; - - if ((buffer_start < LOAD_PHYSICAL_ADDR) || - (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || - (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || - (image_offset == 0)) { - extern char _bss[]; - - status = efi_relocate_kernel(&bzimage_addr, - (unsigned long)_bss - bzimage_addr, - hdr->init_size, - hdr->pref_address, - hdr->kernel_alignment, - LOAD_PHYSICAL_ADDR); - if (status != EFI_SUCCESS) { - efi_err("efi_relocate_kernel() failed!\n"); - goto fail; - } - /* - * Now that we've copied the kernel elsewhere, we no longer - * have a set up block before startup_32(), so reset image_offset - * to zero in case it was set earlier. - */ - image_offset = 0; - } - #ifdef CONFIG_CMDLINE_BOOL status = efi_parse_options(CONFIG_CMDLINE); if (status != EFI_SUCCESS) { @@ -926,6 +895,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle, } } + status = efi_decompress_kernel(&kernel_entry); + if (status != EFI_SUCCESS) { + efi_err("Failed to decompress kernel\n"); + goto fail; + } + /* * At this point, an initrd may already have been loaded by the * bootloader and passed via bootparams. We permit an initrd loaded @@ -965,7 +940,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle, setup_efi_pci(boot_params); - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); + setup_quirks(boot_params); setup_unaccepted_memory(); @@ -975,12 +950,15 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } - efi_5level_switch(); + /* + * Call the SEV init code while still running with the firmware's + * GDT/IDT, so #VC exceptions will be handled by EFI. + */ + sev_enable(boot_params); - if (IS_ENABLED(CONFIG_X86_64)) - bzimage_addr += startup_64 - startup_32; + efi_5level_switch(); - enter_kernel(bzimage_addr, boot_params); + enter_kernel(kernel_entry, boot_params); fail: efi_err("efi_stub_entry() failed!\n"); -- cgit v1.2.3