summaryrefslogtreecommitdiff
path: root/arch/x86/entry
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry')
-rw-r--r--arch/x86/entry/common.c115
-rw-r--r--arch/x86/entry/entry.S2
-rw-r--r--arch/x86/entry/entry_32.S2
-rw-r--r--arch/x86/entry/entry_64.S84
-rw-r--r--arch/x86/entry/entry_64_compat.S11
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl3
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl3
-rw-r--r--arch/x86/entry/thunk_32.S2
-rw-r--r--arch/x86/entry/thunk_64.S2
-rw-r--r--arch/x86/entry/vdso/Makefile3
-rw-r--r--arch/x86/entry/vdso/vsgx.S1
11 files changed, 107 insertions, 121 deletions
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..d813160b14d8 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -19,6 +19,7 @@
#include <linux/nospec.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <linux/init.h>
#ifdef CONFIG_XEN_PV
#include <xen/xen-ops.h>
@@ -70,7 +71,8 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
return false;
}
-__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);
@@ -84,6 +86,46 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
instrumentation_end();
syscall_exit_to_user_mode(regs);
+
+ /*
+ * Check that the register state is valid for using SYSRET to exit
+ * to userspace. Otherwise use the slower but fully capable IRET
+ * exit path.
+ */
+
+ /* XEN PV guests always use the IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* SYSRET requires RCX == RIP and R11 == EFLAGS */
+ if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+ return false;
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP.
+ *
+ * TASK_SIZE_MAX covers all user-accessible addresses other than
+ * the deprecated vsyscall page.
+ */
+ if (unlikely(regs->ip >= TASK_SIZE_MAX))
+ return false;
+
+ /*
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
+ */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+ return false;
+
+ /* Use SYSRET to exit to userspace */
+ return true;
}
#endif
@@ -96,6 +138,16 @@ static __always_inline int syscall_32_enter(struct pt_regs *regs)
return (int)regs->orig_ax;
}
+#ifdef CONFIG_IA32_EMULATION
+bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED);
+
+static int ia32_emulation_override_cmdline(char *arg)
+{
+ return kstrtobool(arg, &__ia32_enabled);
+}
+early_param("ia32_emulation", ia32_emulation_override_cmdline);
+#endif
+
/*
* Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
*/
@@ -182,8 +234,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
return true;
}
-/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
+/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
+__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
{
/*
* Called using the internal vDSO SYSENTER/SYSCALL32 calling
@@ -201,41 +253,36 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
/* Invoke the syscall. If it failed, keep it simple: use IRET. */
if (!__do_fast_syscall_32(regs))
- return 0;
+ return false;
-#ifdef CONFIG_X86_64
/*
- * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
- * SYSRETL is available on all 64-bit CPUs, so we don't need to
- * bother with SYSEXIT.
- *
- * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
- * because the ECX fixup above will ensure that this is essentially
- * never the case.
- */
- return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
- regs->ip == landing_pad &&
- (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
-#else
- /*
- * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
- *
- * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
- * because the ECX fixup above will ensure that this is essentially
- * never the case.
- *
- * We don't allow syscalls at all from VM86 mode, but we still
- * need to check VM, because we might be returning from sys_vm86.
+ * Check that the register state is valid for using SYSRETL/SYSEXIT
+ * to exit to userspace. Otherwise use the slower but fully capable
+ * IRET exit path.
*/
- return static_cpu_has(X86_FEATURE_SEP) &&
- regs->cs == __USER_CS && regs->ss == __USER_DS &&
- regs->ip == landing_pad &&
- (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
-#endif
+
+ /* XEN PV guests always use the IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* EIP must point to the VDSO landing pad */
+ if (unlikely(regs->ip != landing_pad))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS))
+ return false;
+
+ /* If the TF, RF, or VM flags are set, use IRET */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)))
+ return false;
+
+ /* Use SYSRETL/SYSEXIT to exit to userspace */
+ return true;
}
-/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
+/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
+__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs)
{
/* SYSENTER loses RSP, but the vDSO saved it in RBP. */
regs->sp = regs->bp;
@@ -294,7 +341,7 @@ static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs)
inc_irq_stat(irq_hv_callback_count);
- xen_hvm_evtchn_do_upcall();
+ xen_evtchn_do_upcall();
set_irq_regs(old_regs);
}
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index bfb7bcb362bc..8c8d38f0cb1d 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -3,8 +3,8 @@
* Common place for both 32- and 64-bit entry routines.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
#include <asm/msr-index.h>
.pushsection .noinstr.text, "ax"
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 6e6af42e044a..c73047bf9f4b 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -837,7 +837,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
movl %esp, %eax
call do_SYSENTER_32
- testl %eax, %eax
+ testb %al, %al
jz .Lsyscall_32_done
STACKLEAK_ERASE
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 43606de22511..de6469dffe3a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -18,6 +18,7 @@
* - SYM_FUNC_START/END:Define functions in the symbol table.
* - idtentry: Define exception entry points.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/cache.h>
@@ -34,7 +35,6 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/pgtable_types.h>
-#include <asm/export.h>
#include <asm/frame.h>
#include <asm/trapnr.h>
#include <asm/nospec-branch.h>
@@ -126,70 +126,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* In the Xen PV case we must use iret anyway.
*/
- ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
- X86_FEATURE_XENPV
-
- movq RCX(%rsp), %rcx
- movq RIP(%rsp), %r11
-
- cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP.
- *
- * If width of "canonical tail" ever becomes variable, this will need
- * to be updated to remain correct on both old and new CPUs.
- *
- * Change top bits to match most significant bit (47th or 56th bit
- * depending on paging mode) in the address.
- */
-#ifdef CONFIG_X86_5LEVEL
- ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
- "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
- shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
- sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
- /* If this changed %rcx, it was not canonical */
- cmpq %rcx, %r11
- jne swapgs_restore_regs_and_return_to_usermode
-
- cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
-
- movq R11(%rsp), %r11
- cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
- * restore RF properly. If the slowpath sets it for whatever reason, we
- * need to restore it correctly.
- *
- * SYSRET can restore TF, but unlike IRET, restoring TF results in a
- * trap from userspace immediately after SYSRET. This would cause an
- * infinite loop whenever #DB happens with register state that satisfies
- * the opportunistic SYSRET conditions. For example, single-stepping
- * this user code:
- *
- * movq $stuck_here, %rcx
- * pushfq
- * popq %r11
- * stuck_here:
- *
- * would never get past 'stuck_here'.
- */
- testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz swapgs_restore_regs_and_return_to_usermode
-
- /* nothing to check for RSP */
-
- cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
+ ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
/*
* We win! This label is here just for ease of understanding
@@ -1163,8 +1101,8 @@ SYM_CODE_START(asm_exc_nmi)
* anyway.
*
* To handle this case we do the following:
- * Check the a special location on the stack that contains
- * a variable that is set when NMIs are executing.
+ * Check a special location on the stack that contains a
+ * variable that is set when NMIs are executing.
* The interrupted task's stack is also checked to see if it
* is an NMI stack.
* If the variable is not set and the stack is not the NMI
@@ -1237,7 +1175,6 @@ SYM_CODE_START(asm_exc_nmi)
*/
movq %rsp, %rdi
- movq $-1, %rsi
call exc_nmi
/*
@@ -1295,8 +1232,8 @@ SYM_CODE_START(asm_exc_nmi)
* end_repeat_nmi, then we are a nested NMI. We must not
* modify the "iret" frame because it's being written by
* the outer NMI. That's okay; the outer NMI handler is
- * about to about to call exc_nmi() anyway, so we can just
- * resume the outer NMI.
+ * about to call exc_nmi() anyway, so we can just resume
+ * the outer NMI.
*/
movq $repeat_nmi, %rdx
@@ -1451,7 +1388,6 @@ end_repeat_nmi:
UNWIND_HINT_REGS
movq %rsp, %rdi
- movq $-1, %rsi
call exc_nmi
/* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
@@ -1511,18 +1447,16 @@ nmi_restore:
iretq
SYM_CODE_END(asm_exc_nmi)
-#ifndef CONFIG_IA32_EMULATION
/*
* This handles SYSCALL from 32-bit code. There is no way to program
* MSRs to fully disable 32-bit SYSCALL.
*/
-SYM_CODE_START(ignore_sysret)
+SYM_CODE_START(entry_SYSCALL32_ignore)
UNWIND_HINT_END_OF_STACK
ENDBR
mov $-ENOSYS, %eax
sysretl
-SYM_CODE_END(ignore_sysret)
-#endif
+SYM_CODE_END(entry_SYSCALL32_ignore)
.pushsection .text, "ax"
__FUNC_ALIGN
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 70150298f8bd..27c05d08558a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -118,9 +118,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
movq %rsp, %rdi
call do_SYSENTER_32
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
- "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
jmp sysret32_from_system_call
.Lsysenter_fix_flags:
@@ -212,13 +209,15 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movq %rsp, %rdi
call do_fast_syscall_32
+
+sysret32_from_system_call:
/* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
+ ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
- /* Opportunistic SYSRET */
-sysret32_from_system_call:
/*
+ * Opportunistic SYSRET
+ *
* We are not going to return to userspace from the trampoline
* stack. So let's erase the thread stack right now.
*/
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 54748f6d7c45..c8fac5205803 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -458,3 +458,6 @@
451 i386 cachestat sys_cachestat
452 i386 fchmodat2 sys_fchmodat2
453 i386 map_shadow_stack sys_map_shadow_stack
+454 i386 futex_wake sys_futex_wake
+455 i386 futex_wait sys_futex_wait
+456 i386 futex_requeue sys_futex_requeue
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 2a62eaf30d69..8cb8bf68721c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -375,6 +375,9 @@
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
453 64 map_shadow_stack sys_map_shadow_stack
+454 common futex_wake sys_futex_wake
+455 common futex_wait sys_futex_wait
+456 common futex_requeue sys_futex_requeue
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index ff6e7003da97..0103e103a657 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -4,9 +4,9 @@
* Copyright 2008 by Steven Rostedt, Red Hat, Inc
* (inspired by Andi Kleen's thunk_64.S)
*/
+ #include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asm.h>
- #include <asm/export.h>
/* put return address in eax (arg1) */
.macro THUNK name, func, put_ret_addr_in_eax=0
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 27b5da2111ac..416b400f39db 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -4,10 +4,10 @@
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include "calling.h"
#include <asm/asm.h>
-#include <asm/export.h>
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 6a1821bd7d5e..83c0afb7c741 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -42,7 +42,8 @@ vdso_img-$(VDSO64-y) += 64
vdso_img-$(VDSOX32-y) += x32
vdso_img-$(VDSO32-y) += 32
-obj-$(VDSO32-y) += vdso32-setup.o
+obj-$(VDSO32-y) += vdso32-setup.o
+OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S
index d77d278ee9dd..37a3d4c02366 100644
--- a/arch/x86/entry/vdso/vsgx.S
+++ b/arch/x86/entry/vdso/vsgx.S
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
-#include <asm/export.h>
#include <asm/errno.h>
#include <asm/enclu.h>