summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-15 23:14:59 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-19 03:05:28 +0300
commitadfcf4231b8cbc2d9c1e7bfaa965b907e60639eb (patch)
tree4aceffad12112ff5fab00b48005fab8c55cc8404
parent20f3337d350c4e1b4ac66d731fd4e98565bf6cc0 (diff)
downloadlinux-adfcf4231b8cbc2d9c1e7bfaa965b907e60639eb.tar.xz
x86: don't use REP_GOOD or ERMS for user memory copies
The modern target to use is FSRM (Fast Short REP MOVS), and the other cases should only be used for bigger areas (ie mainly things like page clearing). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/include/asm/uaccess_64.h15
-rw-r--r--arch/x86/lib/copy_user_64.S51
2 files changed, 12 insertions, 54 deletions
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index d13d71af5cf6..c697cf10b7c8 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -18,9 +18,7 @@
/* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long
-copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_string(void *to, const void *from, unsigned len);
+copy_user_fast_string(void *to, const void *from, unsigned len);
__must_check unsigned long
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
@@ -30,15 +28,12 @@ copy_user_generic(void *to, const void *from, unsigned len)
unsigned ret;
/*
- * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
- * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
+ * If CPU has FSRM feature, use 'rep movs'.
* Otherwise, use copy_user_generic_unrolled.
*/
- alternative_call_2(copy_user_generic_unrolled,
- copy_user_generic_string,
- X86_FEATURE_REP_GOOD,
- copy_user_enhanced_fast_string,
- X86_FEATURE_ERMS,
+ alternative_call(copy_user_generic_unrolled,
+ copy_user_fast_string,
+ X86_FEATURE_FSRM,
ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
"=d" (len)),
"1" (to), "2" (from), "3" (len)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 9dec1b38a98f..d0283bc7567d 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -104,8 +104,8 @@ SYM_FUNC_START(copy_user_generic_unrolled)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
-/* Some CPUs run faster using the string copy instructions.
- * This is also a lot simpler. Use them when possible.
+/*
+ * Some CPUs support FSRM for Fast Short REP MOVS.
*
* Only 4GB of copy is supported. This shouldn't be a problem
* because the kernel normally only writes from/to page sized chunks
@@ -122,58 +122,21 @@ EXPORT_SYMBOL(copy_user_generic_unrolled)
* Output:
* eax uncopied bytes or 0 if successful.
*/
-SYM_FUNC_START(copy_user_generic_string)
+SYM_FUNC_START(copy_user_fast_string)
ASM_STAC
- cmpl $8,%edx
- jb 2f /* less than 8 bytes, go to byte copy loop */
- ALIGN_DESTINATION
movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
-1: rep movsq
-2: movl %edx,%ecx
-3: rep movsb
+1: rep movsb
xorl %eax,%eax
ASM_CLAC
RET
-11: leal (%rdx,%rcx,8),%ecx
-12: movl %ecx,%edx /* ecx is zerorest also */
- jmp .Lcopy_user_handle_tail
-
- _ASM_EXTABLE_CPY(1b, 11b)
- _ASM_EXTABLE_CPY(3b, 12b)
-SYM_FUNC_END(copy_user_generic_string)
-EXPORT_SYMBOL(copy_user_generic_string)
-
-/*
- * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
- * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-SYM_FUNC_START(copy_user_enhanced_fast_string)
- ASM_STAC
- /* CPUs without FSRM should avoid rep movsb for short copies */
- ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
- movl %edx,%ecx
-1: rep movsb
- xorl %eax,%eax
+12: movl %ecx,%eax /* ecx is zerorest also */
ASM_CLAC
RET
-12: movl %ecx,%edx /* ecx is zerorest also */
- jmp .Lcopy_user_handle_tail
-
_ASM_EXTABLE_CPY(1b, 12b)
-SYM_FUNC_END(copy_user_enhanced_fast_string)
-EXPORT_SYMBOL(copy_user_enhanced_fast_string)
+SYM_FUNC_END(copy_user_fast_string)
+EXPORT_SYMBOL(copy_user_fast_string)
/*
* Try to copy last bytes and clear the rest if needed.