summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/io.h
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2024-04-11 19:46:14 +0300
committerJason Gunthorpe <jgg@nvidia.com>2024-04-22 23:11:19 +0300
commit20516d6e51dd9994afda8d556507cfbe7853384b (patch)
tree15e4bdf535cbe812c0c232481c3a5cefbb529c4b /arch/x86/include/asm/io.h
parent1a633bdc8fd9e9e4a9f9a668ae122edfc5aacc86 (diff)
downloadlinux-20516d6e51dd9994afda8d556507cfbe7853384b.tar.xz
x86: Stop using weak symbols for __iowrite32_copy()
Start switching iomap_copy routines over to use #define and arch provided inline/macro functions instead of weak symbols. Inline functions allow more compiler optimization and this is often a driver hot path. x86 has the only weak implementation for __iowrite32_copy(), so replace it with a static inline containing the same single instruction inline assembly. The compiler will generate the "mov edx,ecx" in a more optimal way. Remove iomap_copy_64.S Link: https://lore.kernel.org/r/1-v3-1893cd8b9369+1925-mlx5_arm_wc_jgg@nvidia.com Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'arch/x86/include/asm/io.h')
-rw-r--r--arch/x86/include/asm/io.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 294cd2a40818..4b99ed326b17 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -209,6 +209,23 @@ void memset_io(volatile void __iomem *, int, size_t);
#define memcpy_toio memcpy_toio
#define memset_io memset_io
+#ifdef CONFIG_X86_64
+/*
+ * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
+ * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
+ * loop.
+ */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+ size_t count)
+{
+ asm volatile("rep ; movsl"
+ : "=&c"(count), "=&D"(to), "=&S"(from)
+ : "0"(count), "1"(to), "2"(from)
+ : "memory");
+}
+#define __iowrite32_copy __iowrite32_copy
+#endif
+
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
* explicitly ioremap() it. The fact that the ISA IO space is mapped