From 20516d6e51dd9994afda8d556507cfbe7853384b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 11 Apr 2024 13:46:14 -0300 Subject: x86: Stop using weak symbols for __iowrite32_copy() Start switching iomap_copy routines over to use #define and arch provided inline/macro functions instead of weak symbols. Inline functions allow more compiler optimization and this is often a driver hot path. x86 has the only weak implementation for __iowrite32_copy(), so replace it with a static inline containing the same single instruction inline assembly. The compiler will generate the "mov edx,ecx" in a more optimal way. Remove iomap_copy_64.S Link: https://lore.kernel.org/r/1-v3-1893cd8b9369+1925-mlx5_arm_wc_jgg@nvidia.com Acked-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- arch/x86/include/asm/io.h | 17 +++++++++++++++++ arch/x86/lib/Makefile | 1 - arch/x86/lib/iomap_copy_64.S | 15 --------------- 3 files changed, 17 insertions(+), 16 deletions(-) delete mode 100644 arch/x86/lib/iomap_copy_64.S (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 294cd2a40818..4b99ed326b17 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -209,6 +209,23 @@ void memset_io(volatile void __iomem *, int, size_t); #define memcpy_toio memcpy_toio #define memset_io memset_io +#ifdef CONFIG_X86_64 +/* + * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for + * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy + * loop. + */ +static inline void __iowrite32_copy(void __iomem *to, const void *from, + size_t count) +{ + asm volatile("rep ; movsl" + : "=&c"(count), "=&D"(to), "=&S"(from) + : "0"(count), "1"(to), "2"(from) + : "memory"); +} +#define __iowrite32_copy __iowrite32_copy +#endif + /* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 6da73513f026..98583a9dbab3 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -53,7 +53,6 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y) lib-y += atomic64_386_32.o endif else - obj-y += iomap_copy_64.o ifneq ($(CONFIG_GENERIC_CSUM),y) lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o endif diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S deleted file mode 100644 index 6ff2f56cb0f7..000000000000 --- a/arch/x86/lib/iomap_copy_64.S +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2006 PathScale, Inc. All Rights Reserved. - */ - -#include - -/* - * override generic version in lib/iomap_copy.c - */ -SYM_FUNC_START(__iowrite32_copy) - movl %edx,%ecx - rep movsl - RET -SYM_FUNC_END(__iowrite32_copy) -- cgit v1.2.3