From 96e5d28ae7a5250f3deb2434f1895c9daf48b1bd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Apr 2016 17:31:49 -0700 Subject: x86/cpu: Add Erratum 88 detection on AMD Erratum 88 affects old AMD K8s, where a SWAPGS fails to cause an input dependency on GS. Therefore, we need to MFENCE before it. But that MFENCE is expensive and unnecessary on the remaining x86 CPUs out there so patch it out on the CPUs which don't require it. Signed-off-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/aec6b2df1bfc56101d4e9e2e5d5d570bf41663c6.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 858b555e274b..64d2033d1e49 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -783,7 +783,7 @@ ENTRY(native_load_gs_index) SWAPGS gs_change: movl %edi, %gs -2: mfence /* workaround */ +2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS popfq ret -- cgit v1.2.3 From 42c748bb2544f21c3d115240527fe4478e193641 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Apr 2016 17:31:50 -0700 Subject: x86/entry/64: Make gs_change a local label ... so that it doesn't appear in objdump output. Signed-off-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/b9c532a0e5f8d56dede2bd59767d40024d5a75e2.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 64d2033d1e49..1693c17dbf81 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -781,7 +781,7 @@ ENTRY(native_load_gs_index) pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS -gs_change: +.Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS @@ -789,7 +789,7 @@ gs_change: ret END(native_load_gs_index) - _ASM_EXTABLE(gs_change, bad_gs) + _ASM_EXTABLE(.Lgs_change, bad_gs) .section .fixup, "ax" /* running with kernelgs */ bad_gs: @@ -1019,13 +1019,13 @@ ENTRY(error_entry) movl %ecx, %eax /* zero extend */ cmpq %rax, RIP+8(%rsp) je .Lbstep_iret - cmpq $gs_change, RIP+8(%rsp) + cmpq $.Lgs_change, RIP+8(%rsp) jne .Lerror_entry_done /* - * hack: gs_change can fail with user gsbase. If this happens, fix up + * hack: .Lgs_change can fail with user gsbase. If this happens, fix up * gsbase and proceed. We'll fix up the exception and land in - * gs_change's error handler with kernel gsbase. + * .Lgs_change's error handler with kernel gsbase. */ jmp .Lerror_entry_from_usermode_swapgs -- cgit v1.2.3 From 1ed95e52d902035e39a715ff3a314a893a96e5b7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 7 Apr 2016 17:16:59 -0700 Subject: x86/vdso: Remove direct HPET access through the vDSO Allowing user code to map the HPET is problematic. HPET implementations are notoriously buggy, and there are probably many machines on which even MMIO reads from bogus HPET addresses are problematic. We have a report that the Dell Precision M2800 with: ACPI: HPET 0x00000000C8FE6238 000038 (v01 DELL CBX3 01072009 AMI. 00000005) is either so slow when accessing the HPET or actually hangs in some regard, causing soft lockups to be reported if users do unexpected things to the HPET. The vclock HPET code has also always been a questionable speedup. Accessing an HPET is exceedingly slow (on the order of several microseconds), so the added overhead in requiring a syscall to read the HPET is a small fraction of the total code of accessing it. To avoid future problems, let's just delete the code entirely. In the long run, this could actually be a speedup. Waiman Long as a patch to optimize the case where multiple CPUs contend for the HPET, but that won't help unless all the accesses are mediated by the kernel. Reported-by: Rasmus Villemoes Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Acked-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Waiman Long Cc: Waiman Long Link: http://lkml.kernel.org/r/d2f90bba98db9905041cff294646d290d378f67a.1460074438.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 15 --------------- arch/x86/entry/vdso/vdso-layout.lds.S | 5 ++--- arch/x86/entry/vdso/vma.c | 11 ----------- arch/x86/include/asm/clocksource.h | 9 ++++----- arch/x86/kernel/hpet.c | 1 - arch/x86/kvm/trace.h | 3 +-- 6 files changed, 7 insertions(+), 37 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 03c3eb77bfce..2f02d23a05ef 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include @@ -28,16 +27,6 @@ extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_HPET_TIMER -extern u8 hpet_page - __attribute__((visibility("hidden"))); - -static notrace cycle_t vread_hpet(void) -{ - return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); -} -#endif - #ifdef CONFIG_PARAVIRT_CLOCK extern u8 pvclock_page __attribute__((visibility("hidden"))); @@ -195,10 +184,6 @@ notrace static inline u64 vgetsns(int *mode) if (gtod->vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); -#ifdef CONFIG_HPET_TIMER - else if (gtod->vclock_mode == VCLOCK_HPET) - cycles = vread_hpet(); -#endif #ifdef CONFIG_PARAVIRT_CLOCK else if (gtod->vclock_mode == VCLOCK_PVCLOCK) cycles = vread_pvclock(mode); diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 4158acc17df0..a708aa90b507 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -25,7 +25,7 @@ SECTIONS * segment. */ - vvar_start = . - 3 * PAGE_SIZE; + vvar_start = . - 2 * PAGE_SIZE; vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ @@ -35,8 +35,7 @@ SECTIONS #undef __VVAR_KERNEL_LDS #undef EMIT_VVAR - hpet_page = vvar_start + PAGE_SIZE; - pvclock_page = vvar_start + 2 * PAGE_SIZE; + pvclock_page = vvar_start + PAGE_SIZE; . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 10f704584922..b3cf81333a54 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -129,16 +128,6 @@ static int vvar_fault(const struct vm_special_mapping *sm, if (sym_offset == image->sym_vvar_page) { ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, __pa_symbol(&__vvar_page) >> PAGE_SHIFT); - } else if (sym_offset == image->sym_hpet_page) { -#ifdef CONFIG_HPET_TIMER - if (hpet_address && vclock_was_used(VCLOCK_HPET)) { - ret = vm_insert_pfn_prot( - vma, - (unsigned long)vmf->virtual_address, - hpet_address >> PAGE_SHIFT, - pgprot_noncached(PAGE_READONLY)); - } -#endif } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = pvclock_pvti_cpu0_va(); diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index d194266acb28..eae33c7170c8 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -3,11 +3,10 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#define VCLOCK_NONE 0 /* No vDSO clock available. */ -#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ -#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ -#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ -#define VCLOCK_MAX 3 +#define VCLOCK_NONE 0 /* No vDSO clock available. */ +#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ +#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */ +#define VCLOCK_MAX 2 struct arch_clocksource_data { int vclock_mode; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a1f0e4a5c47e..7282c2e3858e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = hpet_resume_counter, - .archdata = { .vclock_mode = VCLOCK_HPET }, }; static int hpet_clocksource_register(void) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 2f1ea2f61e1f..b72743c5668d 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -809,8 +809,7 @@ TRACE_EVENT(kvm_write_tsc_offset, #define host_clocks \ {VCLOCK_NONE, "none"}, \ - {VCLOCK_TSC, "tsc"}, \ - {VCLOCK_HPET, "hpet"} \ + {VCLOCK_TSC, "tsc"} \ TRACE_EVENT(kvm_update_master_clock, TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched), -- cgit v1.2.3 From 482dd2ef124484601adea82e5e806e81e2bc5521 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Apr 2016 22:43:59 +0200 Subject: x86/syscalls: Wire up compat readv2/writev2 syscalls Reported-by: David Smith Tested-by: David Smith Signed-off-by: Christoph Hellwig Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160407204359.GA3720@lst.de Signed-off-by: Ingo Molnar --- arch/x86/entry/syscalls/syscall_32.tbl | 4 ++-- arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index b30dd8154cc2..4cddd17153fb 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -384,5 +384,5 @@ 375 i386 membarrier sys_membarrier 376 i386 mlock2 sys_mlock2 377 i386 copy_file_range sys_copy_file_range -378 i386 preadv2 sys_preadv2 -379 i386 pwritev2 sys_pwritev2 +378 i386 preadv2 sys_preadv2 compat_sys_preadv2 +379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index cac6d17ce5db..555263e385c9 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -374,3 +374,5 @@ 543 x32 io_setup compat_sys_io_setup 544 x32 io_submit compat_sys_io_submit 545 x32 execveat compat_sys_execveat/ptregs +534 x32 preadv2 compat_sys_preadv2 +535 x32 pwritev2 compat_sys_pwritev2 -- cgit v1.2.3 From abfb9498ee1327f534df92a7ecaea81a85913bae Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 18 Apr 2016 16:43:43 +0300 Subject: x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() The is_ia32_task()/is_x32_task() function names are a big misnomer: they suggests that the compat-ness of a system call is a task property, which is not true, the compatness of a system call purely depends on how it was invoked through the system call layer. A task may call 32-bit and 64-bit and x32 system calls without changing any of its kernel visible state. This specific minomer is also actively dangerous, as it might cause kernel developers to use the wrong kind of security checks within system calls. So rename it to in_{ia32,x32}_syscall(). Suggested-by: Andy Lutomirski Suggested-by: Ingo Molnar Signed-off-by: Dmitry Safonov [ Expanded the changelog. ] Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1460987025-30360-1-git-send-email-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 2 +- arch/x86/include/asm/compat.h | 4 ++-- arch/x86/include/asm/thread_info.h | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 2 +- arch/x86/kernel/uprobes.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index e79d93d44ecd..ec138e538c44 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, long syscall_trace_enter(struct pt_regs *regs) { - u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); if (phase1_result == 0) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index ebb102e1bbc7..5a3b2c119ed0 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *)round_down(sp - len, 16); } -static inline bool is_x32_task(void) +static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) @@ -318,7 +318,7 @@ static inline bool is_x32_task(void) static inline bool in_compat_syscall(void) { - return is_ia32_task() || is_x32_task(); + return in_ia32_syscall() || in_x32_syscall(); } #define in_compat_syscall in_compat_syscall /* override the generic impl */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ffae84df8a93..30c133ac05cd 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void) return true; } -static inline bool is_ia32_task(void) +static inline bool in_ia32_syscall(void) { #ifdef CONFIG_X86_32 return true; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 50337eac1ca2..24d1b7fb4399 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -210,7 +210,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, */ if (clone_flags & CLONE_SETTLS) { #ifdef CONFIG_IA32_EMULATION - if (is_ia32_task()) + if (in_ia32_syscall()) err = do_set_thread_area(p, -1, (struct user_desc __user *)tls, 0); else diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 32e9d9cbb884..0f4d2a5df2dc 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1266,7 +1266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { #ifdef CONFIG_X86_X32_ABI - if (!is_ia32_task()) + if (!in_ia32_syscall()) return x32_arch_ptrace(child, request, caddr, cdata); #endif #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6408c09bbcd4..2ebcc60f0e14 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -762,7 +762,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { #ifdef CONFIG_X86_64 - if (is_ia32_task()) + if (in_ia32_syscall()) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index bf4db6eaec8f..98b4dc87628b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -516,7 +516,7 @@ struct uprobe_xol_ops { static inline int sizeof_long(void) { - return is_ia32_task() ? 4 : 8; + return in_ia32_syscall() ? 4 : 8; } static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) -- cgit v1.2.3 From b038c842b385f1470f991078e71b7c5b084a7341 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:27 -0700 Subject: x86/segments/64: When load_gs_index fails, clear the base On AMD CPUs, a failed load_gs_base currently may not clear the FS base. Fix it. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1a6c4d3a8a4e7be79ba448b42685e0321d50c14c.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1693c17dbf81..6344629ae1ce 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -794,6 +794,12 @@ END(native_load_gs_index) /* running with kernelgs */ bad_gs: SWAPGS /* switch back to user gs */ +.macro ZAP_GS + /* This can't be a string because the preprocessor needs to see it. */ + movl $__USER_DS, %eax + movl %eax, %gs +.endm + ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG xorl %eax, %eax movl %eax, %gs jmp 2b -- cgit v1.2.3 From 778843f934e362ed4ed734520f60a44a78a074b4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 2 May 2016 16:56:50 +0200 Subject: x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs Use of a temporary R8 register here seems to be unnecessary. "push %r8" is a two-byte insn (it needs REX prefix to specify R8), "push $0" is two-byte too. It seems just using the latter would be no worse. Thus, code had an unnecessary "xorq %r8,%r8" insn. It probably costs nothing in execution time here since we are probably limited by store bandwidth at this point, but still. Run-tested under QEMU: 32-bit calls still work: / # ./test_syscall_vdso32 [RUN] Executing 6-argument 32-bit syscall via VDSO [OK] Arguments are preserved across syscall [NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn [OK] R8..R15 did not leak kernel data [RUN] Executing 6-argument 32-bit syscall via INT 80 [OK] Arguments are preserved across syscall [OK] R8..R15 did not leak kernel data [RUN] Running tests under ptrace [RUN] Executing 6-argument 32-bit syscall via VDSO [OK] Arguments are preserved across syscall [NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn [OK] R8..R15 did not leak kernel data [RUN] Executing 6-argument 32-bit syscall via INT 80 [OK] Arguments are preserved across syscall [OK] R8..R15 did not leak kernel data Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1462201010-16846-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 45 +++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 847f2f0c31e5..e1721dafbcb1 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -72,24 +72,23 @@ ENTRY(entry_SYSENTER_compat) pushfq /* pt_regs->flags (except IF = 0) */ orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ pushq $__USER32_CS /* pt_regs->cs */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->ip = 0 (placeholder) */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ cld /* @@ -205,17 +204,16 @@ ENTRY(entry_SYSCALL_compat) pushq %rdx /* pt_regs->dx */ pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -316,11 +314,10 @@ ENTRY(entry_INT80_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp */ pushq %r12 /* pt_regs->r12 */ -- cgit v1.2.3 From 092c74e420952c7cb68141731f2b562245b51eeb Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 4 May 2016 22:44:36 -0400 Subject: x86/entry, sched/x86: Don't save/restore EFLAGS on task switch Now that NT is filtered by the SYSENTER entry code, it is safe to skip saving and restoring flags on task switch. Also remove a leftover reset of flags on 64-bit fork. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1462416278-11974-2-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 4 ---- arch/x86/entry/entry_64.S | 3 --- arch/x86/include/asm/switch_to.h | 4 +--- 3 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 10868aa734dc..c84d99bfff94 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -209,8 +209,6 @@ ENTRY(ret_from_fork) call schedule_tail GET_THREAD_INFO(%ebp) popl %eax - pushl $0x0202 # Reset kernel eflags - popfl /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax @@ -223,8 +221,6 @@ ENTRY(ret_from_kernel_thread) call schedule_tail GET_THREAD_INFO(%ebp) popl %eax - pushl $0x0202 # Reset kernel eflags - popfl movl PT_EBP(%esp), %eax call *PT_EBX(%esp) movl $0, PT_EAX(%esp) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6344629ae1ce..9ee0da1807ed 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -372,9 +372,6 @@ END(ptregs_\func) ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK, TI_flags(%r8) - pushq $0x0002 - popfq /* reset kernel eflags */ - call schedule_tail /* rdi: 'prev' task parameter */ testb $3, CS(%rsp) /* from kernel_thread? */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 751bf4b7bf11..8f321a1b03a1 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -39,8 +39,7 @@ do { \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ \ - asm volatile("pushfl\n\t" /* save flags */ \ - "pushl %%ebp\n\t" /* save EBP */ \ + asm volatile("pushl %%ebp\n\t" /* save EBP */ \ "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ @@ -49,7 +48,6 @@ do { \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ "popl %%ebp\n\t" /* restore EBP */ \ - "popfl\n" /* restore flags */ \ \ /* output parameters */ \ : [prev_sp] "=m" (prev->thread.sp), \ -- cgit v1.2.3 From 1e17880371f85d3d866962e04ba3567c0654a125 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 4 May 2016 22:44:37 -0400 Subject: x86/entry/32: Remove GET_THREAD_INFO() from entry code The entry code used to cache the thread_info pointer in the EBP register, but all the code that used it has been moved to C. Remove the unused code to get the pointer. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1462416278-11974-3-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index c84d99bfff94..983e5d3a0d27 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -207,7 +207,6 @@ ENTRY(ret_from_fork) pushl %eax call schedule_tail - GET_THREAD_INFO(%ebp) popl %eax /* When we fork, we trace the syscall return in the child, too. */ @@ -219,7 +218,6 @@ END(ret_from_fork) ENTRY(ret_from_kernel_thread) pushl %eax call schedule_tail - GET_THREAD_INFO(%ebp) popl %eax movl PT_EBP(%esp), %eax call *PT_EBX(%esp) @@ -247,7 +245,6 @@ ENDPROC(ret_from_kernel_thread) ret_from_exception: preempt_stop(CLBR_ANY) ret_from_intr: - GET_THREAD_INFO(%ebp) #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al -- cgit v1.2.3