diff options
author | Brian Gerst <brgerst@gmail.com> | 2023-03-17 01:21:03 +0300 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2023-03-21 15:35:53 +0300 |
commit | 3adee777ad0d328e76ca9015cb7924134a992e81 (patch) | |
tree | bee9a622486c7759ff8c4325d982143376fd67cb /arch/x86/kernel/head_64.S | |
parent | cefad862f23874174136a48d91cb4a6ac3b1c5ce (diff) | |
download | linux-3adee777ad0d328e76ca9015cb7924134a992e81.tar.xz |
x86/smpboot: Remove initial_stack on 64-bit
In order to facilitate parallel startup, start to eliminate some of the
global variables passing information to CPUs in the startup path.
However, start by introducing one more: smpboot_control. For now this
merely holds the CPU# of the CPU which is coming up. Each CPU can then
find its own per-cpu data, and everything else it needs can be found
from there, allowing the other global variables to be removed.
First to be removed is initial_stack. Each CPU can load %rsp from its
current_task->thread.sp instead. That is already set up with the correct
idle thread for APs. Set up the .sp field in INIT_THREAD on x86 so that
the BSP also finds a suitable stack pointer in the static per-cpu data
when coming up on first boot.
On resume from S3, the CPU needs a temporary stack because its idle task
is already active. Instead of setting initial_stack, the sleep code can
simply set its own current->thread.sp to point to the temporary stack.
Nobody else cares about ->thread.sp for a thread which is currently on
a CPU, because the true value is actually in the %rsp register. Which
is restored with the rest of the CPU context in do_suspend_lowlevel().
Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Usama Arif <usama.arif@bytedance.com>
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Link: https://lore.kernel.org/r/20230316222109.1940300-7-usama.arif@bytedance.com
Diffstat (limited to 'arch/x86/kernel/head_64.S')
-rw-r--r-- | arch/x86/kernel/head_64.S | 43 |
1 files changed, 28 insertions, 15 deletions
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 222efd4a09bc..cc1b145055ac 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -61,8 +61,8 @@ SYM_CODE_START_NOALIGN(startup_64) * tables and then reload them. */ - /* Set up the stack for verify_cpu(), similar to initial_stack below */ - leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp + /* Set up the stack for verify_cpu() */ + leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp leaq _text(%rip), %rdi @@ -241,6 +241,24 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR // above +#ifdef CONFIG_SMP + movl smpboot_control(%rip), %ecx + + /* Get the per cpu offset for the given CPU# which is in ECX */ + movq __per_cpu_offset(,%rcx,8), %rdx +#else + xorl %edx, %edx /* zero-extended to clear all of RDX */ +#endif /* CONFIG_SMP */ + + /* + * Setup a boot time stack - Any secondary CPU will have lost its stack + * by now because the cr3-switch above unmaps the real-mode stack. + * + * RDX contains the per-cpu offset + */ + movq pcpu_hot + X86_current_task(%rdx), %rax + movq TASK_threadsp(%rax), %rsp + /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -275,12 +293,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) movl initial_gs+4(%rip),%edx wrmsr - /* - * Setup a boot time stack - Any secondary CPU will have lost its stack - * by now because the cr3-switch above unmaps the real-mode stack - */ - movq initial_stack(%rip), %rsp - /* Setup and Load IDT */ pushq %rsi call early_setup_idt @@ -372,7 +384,11 @@ SYM_CODE_END(secondary_startup_64) SYM_CODE_START(start_cpu0) ANNOTATE_NOENDBR UNWIND_HINT_EMPTY - movq initial_stack(%rip), %rsp + + /* Find the idle task stack */ + movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx + movq TASK_threadsp(%rcx), %rsp + jmp .Ljump_to_C_code SYM_CODE_END(start_cpu0) #endif @@ -420,12 +436,6 @@ SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data)) #ifdef CONFIG_AMD_MEM_ENCRYPT SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) #endif - -/* - * The FRAME_SIZE gap is a convention which helps the in-kernel unwinder - * reliably detect the end of the stack. - */ -SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE) __FINITDATA __INIT @@ -661,6 +671,9 @@ SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1) SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page)) .align 16 +SYM_DATA(smpboot_control, .long 0) + + .align 16 /* This must match the first entry in level2_kernel_pgt */ SYM_DATA(phys_base, .quad 0x0) EXPORT_SYMBOL(phys_base) |