summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/uaccess_64.h4
-rw-r--r--arch/x86/lib/clear_page_64.S114
-rw-r--r--tools/objtool/check.c2
3 files changed, 73 insertions, 47 deletions
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 8cc918acbabc..a0533e672496 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -83,7 +83,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
*/
__must_check unsigned long
-clear_user_original(void __user *addr, unsigned long len);
+rep_stos_alternative(void __user *addr, unsigned long len);
static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
{
@@ -97,7 +97,7 @@ static __always_inline __must_check unsigned long __clear_user(void __user *addr
asm volatile(
"1:\n\t"
ALTERNATIVE("rep stosb",
- "call clear_user_original", ALT_NOT(X86_FEATURE_FSRS))
+ "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS))
"2:\n"
_ASM_EXTABLE_UA(1b, 2b)
: "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index fcd01b9f8d50..f74a3e704a1c 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -57,59 +57,85 @@ EXPORT_SYMBOL_GPL(clear_page_erms)
* Input:
* rdi destination
* rcx count
+ * rax is zero
*
* Output:
* rcx: uncleared bytes or 0 if successful.
*/
-SYM_FUNC_START(clear_user_original)
- /*
- * Copy only the lower 32 bits of size as that is enough to handle the rest bytes,
- * i.e., no need for a 'q' suffix and thus a REX prefix.
- */
- mov %ecx,%eax
- shr $3,%rcx
- jz .Lrest_bytes
+SYM_FUNC_START(rep_stos_alternative)
+ cmpq $64,%rcx
+ jae .Lunrolled
- # do the qwords first
- .p2align 4
-.Lqwords:
- movq $0,(%rdi)
- lea 8(%rdi),%rdi
- dec %rcx
- jnz .Lqwords
+ cmp $8,%ecx
+ jae .Lword
-.Lrest_bytes:
- and $7, %eax
- jz .Lexit
+ testl %ecx,%ecx
+ je .Lexit
- # now do the rest bytes
-.Lbytes:
- movb $0,(%rdi)
+.Lclear_user_tail:
+0: movb %al,(%rdi)
inc %rdi
- dec %eax
- jnz .Lbytes
-
+ dec %rcx
+ jnz .Lclear_user_tail
.Lexit:
- /*
- * %rax still needs to be cleared in the exception case because this function is called
- * from inline asm and the compiler expects %rax to be zero when exiting the inline asm,
- * in case it might reuse it somewhere.
- */
- xor %eax,%eax
- RET
+ RET
-.Lqwords_exception:
- # convert remaining qwords back into bytes to return to caller
- shl $3, %rcx
- and $7, %eax
- add %rax,%rcx
- jmp .Lexit
+ _ASM_EXTABLE_UA( 0b, .Lexit)
-.Lbytes_exception:
- mov %eax,%ecx
- jmp .Lexit
+.Lword:
+1: movq %rax,(%rdi)
+ addq $8,%rdi
+ sub $8,%ecx
+ je .Lexit
+ cmp $8,%ecx
+ jae .Lword
+ jmp .Lclear_user_tail
- _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception)
- _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception)
-SYM_FUNC_END(clear_user_original)
-EXPORT_SYMBOL(clear_user_original)
+ .p2align 4
+.Lunrolled:
+10: movq %rax,(%rdi)
+11: movq %rax,8(%rdi)
+12: movq %rax,16(%rdi)
+13: movq %rax,24(%rdi)
+14: movq %rax,32(%rdi)
+15: movq %rax,40(%rdi)
+16: movq %rax,48(%rdi)
+17: movq %rax,56(%rdi)
+ addq $64,%rdi
+ subq $64,%rcx
+ cmpq $64,%rcx
+ jae .Lunrolled
+ cmpl $8,%ecx
+ jae .Lword
+ testl %ecx,%ecx
+ jne .Lclear_user_tail
+ RET
+
+ /*
+ * If we take an exception on any of the
+ * word stores, we know that %rcx isn't zero,
+ * so we can just go to the tail clearing to
+ * get the exact count.
+ *
+ * The unrolled case might end up clearing
+ * some bytes twice. Don't care.
+ *
+ * We could use the value in %rdi to avoid
+ * a second fault on the exact count case,
+ * but do we really care? No.
+ *
+ * Finally, we could try to align %rdi at the
+ * top of the unrolling. But unaligned stores
+ * just aren't that common or expensive.
+ */
+ _ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(10b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(11b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(12b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(13b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(14b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(15b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(16b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(17b, .Lclear_user_tail)
+SYM_FUNC_END(rep_stos_alternative)
+EXPORT_SYMBOL(rep_stos_alternative)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 44817bbe48fe..ac96c9939cd1 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1284,7 +1284,7 @@ static const char *uaccess_safe_builtin[] = {
"copy_mc_fragile_handle_tail",
"copy_mc_enhanced_fast_string",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
- "clear_user_original",
+ "rep_stos_alternative",
"copy_user_generic_unrolled",
"__copy_user_nocache",
NULL