From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Oct 2020 20:40:16 -0700 Subject: x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}() In reaction to a proposal to introduce a memcpy_mcsafe_fast() implementation Linus points out that memcpy_mcsafe() is poorly named relative to communicating the scope of the interface. Specifically what addresses are valid to pass as source, destination, and what faults / exceptions are handled. Of particular concern is that even though x86 might be able to handle the semantics of copy_mc_to_user() with its common copy_user_generic() implementation other archs likely need / want an explicit path for this case: On Fri, May 1, 2020 at 11:28 AM Linus Torvalds wrote: > > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams wrote: > > > > However now I see that copy_user_generic() works for the wrong reason. > > It works because the exception on the source address due to poison > > looks no different than a write fault on the user address to the > > caller, it's still just a short copy. So it makes copy_to_user() work > > for the wrong reason relative to the name. > > Right. > > And it won't work that way on other architectures. On x86, we have a > generic function that can take faults on either side, and we use it > for both cases (and for the "in_user" case too), but that's an > artifact of the architecture oddity. > > In fact, it's probably wrong even on x86 - because it can hide bugs - > but writing those things is painful enough that everybody prefers > having just one function. Replace a single top-level memcpy_mcsafe() with either copy_mc_to_user(), or copy_mc_to_kernel(). Introduce an x86 copy_mc_fragile() name as the rename for the low-level x86 implementation formerly named memcpy_mcsafe(). It is used as the slow / careful backend that is supplanted by a fast copy_mc_generic() in a follow-on patch. One side-effect of this reorganization is that separating copy_mc_64.S to its own file means that perf no longer needs to track dependencies for its memcpy_64.S benchmarks. [ bp: Massage a bit. ] Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Michael Ellerman Cc: Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/string.h | 2 - arch/powerpc/include/asm/uaccess.h | 40 +++--- arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/copy_mc_64.S | 242 ++++++++++++++++++++++++++++++++++++ arch/powerpc/lib/memcpy_mcsafe_64.S | 242 ------------------------------------ 6 files changed, 270 insertions(+), 260 deletions(-) create mode 100644 arch/powerpc/lib/copy_mc_64.S delete mode 100644 arch/powerpc/lib/memcpy_mcsafe_64.S (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1f48bbfb3ce9..76473eda3426 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -136,7 +136,7 @@ config PPC select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE - select ARCH_HAS_UACCESS_MCSAFE if PPC64 + select ARCH_HAS_COPY_MC if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 283552cd0e58..2aa0e31e6884 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n); #ifndef CONFIG_KASAN #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 -#define __HAVE_ARCH_MEMCPY_MCSAFE -extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz); extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 00699903f1ef..20a35373cafc 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -435,6 +435,32 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_generic(void *to, const void *from, unsigned long size); + +static inline unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned long size) +{ + return copy_mc_generic(to, from, size); +} +#define copy_mc_to_kernel copy_mc_to_kernel + +static inline unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(check_copy_size(from, n, true))) { + if (access_ok(to, n)) { + allow_write_to_user(to, n); + n = copy_mc_generic((void *)to, from, n); + prevent_write_to_user(to, n); + } + } + + return n; +} +#endif + #ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) @@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) return ret; } -static __always_inline unsigned long __must_check -copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) -{ - if (likely(check_copy_size(from, n, true))) { - if (access_ok(to, n)) { - allow_write_to_user(to, n); - n = memcpy_mcsafe((void *)to, from, n); - prevent_write_to_user(to, n); - } - } - - return n; -} - unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d66a645503eb..69a91b571845 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ memcpy_power7.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o memcpy_mcsafe_64.o + memcpy_64.o copy_mc_64.o ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP) += locks.o diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S new file mode 100644 index 000000000000..88d46c471493 --- /dev/null +++ b/arch/powerpc/lib/copy_mc_64.S @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) IBM Corporation, 2011 + * Derived from copyuser_power7.s by Anton Blanchard + * Author - Balbir Singh + */ +#include +#include +#include + + .macro err1 +100: + EX_TABLE(100b,.Ldo_err1) + .endm + + .macro err2 +200: + EX_TABLE(200b,.Ldo_err2) + .endm + + .macro err3 +300: EX_TABLE(300b,.Ldone) + .endm + +.Ldo_err2: + ld r22,STK_REG(R22)(r1) + ld r21,STK_REG(R21)(r1) + ld r20,STK_REG(R20)(r1) + ld r19,STK_REG(R19)(r1) + ld r18,STK_REG(R18)(r1) + ld r17,STK_REG(R17)(r1) + ld r16,STK_REG(R16)(r1) + ld r15,STK_REG(R15)(r1) + ld r14,STK_REG(R14)(r1) + addi r1,r1,STACKFRAMESIZE +.Ldo_err1: + /* Do a byte by byte copy to get the exact remaining size */ + mtctr r7 +46: +err3; lbz r0,0(r4) + addi r4,r4,1 +err3; stb r0,0(r3) + addi r3,r3,1 + bdnz 46b + li r3,0 + blr + +.Ldone: + mfctr r3 + blr + + +_GLOBAL(copy_mc_generic) + mr r7,r5 + cmpldi r5,16 + blt .Lshort_copy + +.Lcopy: + /* Get the source 8B aligned */ + neg r6,r4 + mtocrf 0x01,r6 + clrldi r6,r6,(64-3) + + bf cr7*4+3,1f +err1; lbz r0,0(r4) + addi r4,r4,1 +err1; stb r0,0(r3) + addi r3,r3,1 + subi r7,r7,1 + +1: bf cr7*4+2,2f +err1; lhz r0,0(r4) + addi r4,r4,2 +err1; sth r0,0(r3) + addi r3,r3,2 + subi r7,r7,2 + +2: bf cr7*4+1,3f +err1; lwz r0,0(r4) + addi r4,r4,4 +err1; stw r0,0(r3) + addi r3,r3,4 + subi r7,r7,4 + +3: sub r5,r5,r6 + cmpldi r5,128 + + mflr r0 + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(R14)(r1) + std r15,STK_REG(R15)(r1) + std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) + std r18,STK_REG(R18)(r1) + std r19,STK_REG(R19)(r1) + std r20,STK_REG(R20)(r1) + std r21,STK_REG(R21)(r1) + std r22,STK_REG(R22)(r1) + std r0,STACKFRAMESIZE+16(r1) + + blt 5f + srdi r6,r5,7 + mtctr r6 + + /* Now do cacheline (128B) sized loads and stores. */ + .align 5 +4: +err2; ld r0,0(r4) +err2; ld r6,8(r4) +err2; ld r8,16(r4) +err2; ld r9,24(r4) +err2; ld r10,32(r4) +err2; ld r11,40(r4) +err2; ld r12,48(r4) +err2; ld r14,56(r4) +err2; ld r15,64(r4) +err2; ld r16,72(r4) +err2; ld r17,80(r4) +err2; ld r18,88(r4) +err2; ld r19,96(r4) +err2; ld r20,104(r4) +err2; ld r21,112(r4) +err2; ld r22,120(r4) + addi r4,r4,128 +err2; std r0,0(r3) +err2; std r6,8(r3) +err2; std r8,16(r3) +err2; std r9,24(r3) +err2; std r10,32(r3) +err2; std r11,40(r3) +err2; std r12,48(r3) +err2; std r14,56(r3) +err2; std r15,64(r3) +err2; std r16,72(r3) +err2; std r17,80(r3) +err2; std r18,88(r3) +err2; std r19,96(r3) +err2; std r20,104(r3) +err2; std r21,112(r3) +err2; std r22,120(r3) + addi r3,r3,128 + subi r7,r7,128 + bdnz 4b + + clrldi r5,r5,(64-7) + + /* Up to 127B to go */ +5: srdi r6,r5,4 + mtocrf 0x01,r6 + +6: bf cr7*4+1,7f +err2; ld r0,0(r4) +err2; ld r6,8(r4) +err2; ld r8,16(r4) +err2; ld r9,24(r4) +err2; ld r10,32(r4) +err2; ld r11,40(r4) +err2; ld r12,48(r4) +err2; ld r14,56(r4) + addi r4,r4,64 +err2; std r0,0(r3) +err2; std r6,8(r3) +err2; std r8,16(r3) +err2; std r9,24(r3) +err2; std r10,32(r3) +err2; std r11,40(r3) +err2; std r12,48(r3) +err2; std r14,56(r3) + addi r3,r3,64 + subi r7,r7,64 + +7: ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) + ld r18,STK_REG(R18)(r1) + ld r19,STK_REG(R19)(r1) + ld r20,STK_REG(R20)(r1) + ld r21,STK_REG(R21)(r1) + ld r22,STK_REG(R22)(r1) + addi r1,r1,STACKFRAMESIZE + + /* Up to 63B to go */ + bf cr7*4+2,8f +err1; ld r0,0(r4) +err1; ld r6,8(r4) +err1; ld r8,16(r4) +err1; ld r9,24(r4) + addi r4,r4,32 +err1; std r0,0(r3) +err1; std r6,8(r3) +err1; std r8,16(r3) +err1; std r9,24(r3) + addi r3,r3,32 + subi r7,r7,32 + + /* Up to 31B to go */ +8: bf cr7*4+3,9f +err1; ld r0,0(r4) +err1; ld r6,8(r4) + addi r4,r4,16 +err1; std r0,0(r3) +err1; std r6,8(r3) + addi r3,r3,16 + subi r7,r7,16 + +9: clrldi r5,r5,(64-4) + + /* Up to 15B to go */ +.Lshort_copy: + mtocrf 0x01,r5 + bf cr7*4+0,12f +err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ +err1; lwz r6,4(r4) + addi r4,r4,8 +err1; stw r0,0(r3) +err1; stw r6,4(r3) + addi r3,r3,8 + subi r7,r7,8 + +12: bf cr7*4+1,13f +err1; lwz r0,0(r4) + addi r4,r4,4 +err1; stw r0,0(r3) + addi r3,r3,4 + subi r7,r7,4 + +13: bf cr7*4+2,14f +err1; lhz r0,0(r4) + addi r4,r4,2 +err1; sth r0,0(r3) + addi r3,r3,2 + subi r7,r7,2 + +14: bf cr7*4+3,15f +err1; lbz r0,0(r4) +err1; stb r0,0(r3) + +15: li r3,0 + blr + +EXPORT_SYMBOL_GPL(copy_mc_generic); diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S deleted file mode 100644 index cb882d9a6d8a..000000000000 --- a/arch/powerpc/lib/memcpy_mcsafe_64.S +++ /dev/null @@ -1,242 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) IBM Corporation, 2011 - * Derived from copyuser_power7.s by Anton Blanchard - * Author - Balbir Singh - */ -#include -#include -#include - - .macro err1 -100: - EX_TABLE(100b,.Ldo_err1) - .endm - - .macro err2 -200: - EX_TABLE(200b,.Ldo_err2) - .endm - - .macro err3 -300: EX_TABLE(300b,.Ldone) - .endm - -.Ldo_err2: - ld r22,STK_REG(R22)(r1) - ld r21,STK_REG(R21)(r1) - ld r20,STK_REG(R20)(r1) - ld r19,STK_REG(R19)(r1) - ld r18,STK_REG(R18)(r1) - ld r17,STK_REG(R17)(r1) - ld r16,STK_REG(R16)(r1) - ld r15,STK_REG(R15)(r1) - ld r14,STK_REG(R14)(r1) - addi r1,r1,STACKFRAMESIZE -.Ldo_err1: - /* Do a byte by byte copy to get the exact remaining size */ - mtctr r7 -46: -err3; lbz r0,0(r4) - addi r4,r4,1 -err3; stb r0,0(r3) - addi r3,r3,1 - bdnz 46b - li r3,0 - blr - -.Ldone: - mfctr r3 - blr - - -_GLOBAL(memcpy_mcsafe) - mr r7,r5 - cmpldi r5,16 - blt .Lshort_copy - -.Lcopy: - /* Get the source 8B aligned */ - neg r6,r4 - mtocrf 0x01,r6 - clrldi r6,r6,(64-3) - - bf cr7*4+3,1f -err1; lbz r0,0(r4) - addi r4,r4,1 -err1; stb r0,0(r3) - addi r3,r3,1 - subi r7,r7,1 - -1: bf cr7*4+2,2f -err1; lhz r0,0(r4) - addi r4,r4,2 -err1; sth r0,0(r3) - addi r3,r3,2 - subi r7,r7,2 - -2: bf cr7*4+1,3f -err1; lwz r0,0(r4) - addi r4,r4,4 -err1; stw r0,0(r3) - addi r3,r3,4 - subi r7,r7,4 - -3: sub r5,r5,r6 - cmpldi r5,128 - - mflr r0 - stdu r1,-STACKFRAMESIZE(r1) - std r14,STK_REG(R14)(r1) - std r15,STK_REG(R15)(r1) - std r16,STK_REG(R16)(r1) - std r17,STK_REG(R17)(r1) - std r18,STK_REG(R18)(r1) - std r19,STK_REG(R19)(r1) - std r20,STK_REG(R20)(r1) - std r21,STK_REG(R21)(r1) - std r22,STK_REG(R22)(r1) - std r0,STACKFRAMESIZE+16(r1) - - blt 5f - srdi r6,r5,7 - mtctr r6 - - /* Now do cacheline (128B) sized loads and stores. */ - .align 5 -4: -err2; ld r0,0(r4) -err2; ld r6,8(r4) -err2; ld r8,16(r4) -err2; ld r9,24(r4) -err2; ld r10,32(r4) -err2; ld r11,40(r4) -err2; ld r12,48(r4) -err2; ld r14,56(r4) -err2; ld r15,64(r4) -err2; ld r16,72(r4) -err2; ld r17,80(r4) -err2; ld r18,88(r4) -err2; ld r19,96(r4) -err2; ld r20,104(r4) -err2; ld r21,112(r4) -err2; ld r22,120(r4) - addi r4,r4,128 -err2; std r0,0(r3) -err2; std r6,8(r3) -err2; std r8,16(r3) -err2; std r9,24(r3) -err2; std r10,32(r3) -err2; std r11,40(r3) -err2; std r12,48(r3) -err2; std r14,56(r3) -err2; std r15,64(r3) -err2; std r16,72(r3) -err2; std r17,80(r3) -err2; std r18,88(r3) -err2; std r19,96(r3) -err2; std r20,104(r3) -err2; std r21,112(r3) -err2; std r22,120(r3) - addi r3,r3,128 - subi r7,r7,128 - bdnz 4b - - clrldi r5,r5,(64-7) - - /* Up to 127B to go */ -5: srdi r6,r5,4 - mtocrf 0x01,r6 - -6: bf cr7*4+1,7f -err2; ld r0,0(r4) -err2; ld r6,8(r4) -err2; ld r8,16(r4) -err2; ld r9,24(r4) -err2; ld r10,32(r4) -err2; ld r11,40(r4) -err2; ld r12,48(r4) -err2; ld r14,56(r4) - addi r4,r4,64 -err2; std r0,0(r3) -err2; std r6,8(r3) -err2; std r8,16(r3) -err2; std r9,24(r3) -err2; std r10,32(r3) -err2; std r11,40(r3) -err2; std r12,48(r3) -err2; std r14,56(r3) - addi r3,r3,64 - subi r7,r7,64 - -7: ld r14,STK_REG(R14)(r1) - ld r15,STK_REG(R15)(r1) - ld r16,STK_REG(R16)(r1) - ld r17,STK_REG(R17)(r1) - ld r18,STK_REG(R18)(r1) - ld r19,STK_REG(R19)(r1) - ld r20,STK_REG(R20)(r1) - ld r21,STK_REG(R21)(r1) - ld r22,STK_REG(R22)(r1) - addi r1,r1,STACKFRAMESIZE - - /* Up to 63B to go */ - bf cr7*4+2,8f -err1; ld r0,0(r4) -err1; ld r6,8(r4) -err1; ld r8,16(r4) -err1; ld r9,24(r4) - addi r4,r4,32 -err1; std r0,0(r3) -err1; std r6,8(r3) -err1; std r8,16(r3) -err1; std r9,24(r3) - addi r3,r3,32 - subi r7,r7,32 - - /* Up to 31B to go */ -8: bf cr7*4+3,9f -err1; ld r0,0(r4) -err1; ld r6,8(r4) - addi r4,r4,16 -err1; std r0,0(r3) -err1; std r6,8(r3) - addi r3,r3,16 - subi r7,r7,16 - -9: clrldi r5,r5,(64-4) - - /* Up to 15B to go */ -.Lshort_copy: - mtocrf 0x01,r5 - bf cr7*4+0,12f -err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ -err1; lwz r6,4(r4) - addi r4,r4,8 -err1; stw r0,0(r3) -err1; stw r6,4(r3) - addi r3,r3,8 - subi r7,r7,8 - -12: bf cr7*4+1,13f -err1; lwz r0,0(r4) - addi r4,r4,4 -err1; stw r0,0(r3) - addi r3,r3,4 - subi r7,r7,4 - -13: bf cr7*4+2,14f -err1; lhz r0,0(r4) - addi r4,r4,2 -err1; sth r0,0(r3) - addi r3,r3,2 - subi r7,r7,2 - -14: bf cr7*4+3,15f -err1; lbz r0,0(r4) -err1; stb r0,0(r3) - -15: li r3,0 - blr - -EXPORT_SYMBOL_GPL(memcpy_mcsafe); -- cgit v1.2.3