summaryrefslogtreecommitdiff
path: root/arch/s390/include/asm/fpu-insn.h
diff options
context:
space:
mode:
authorHeiko Carstens <hca@linux.ibm.com>2024-02-03 13:45:23 +0300
committerHeiko Carstens <hca@linux.ibm.com>2024-02-16 16:30:17 +0300
commitdcd3e1de9d17dc43dfed87a9fc814b9dec508043 (patch)
treef4dc28be4bafb8cde3a619428250517fac82244d /arch/s390/include/asm/fpu-insn.h
parentcb2a1dd589a0ce97429bf2beeb560e5b030c2ccc (diff)
downloadlinux-dcd3e1de9d17dc43dfed87a9fc814b9dec508043.tar.xz
s390/checksum: provide csum_partial_copy_nocheck()
With csum_partial(), which reads all bytes into registers it is easy to also implement csum_partial_copy_nocheck() which copies the buffer while calculating its checksum. For a 512 byte buffer this reduces the runtime by 19%. Compared to the old generic variant (memcpy() + cksm instruction) runtime is reduced by 42%). Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/fpu-insn.h')
-rw-r--r--arch/s390/include/asm/fpu-insn.h58
1 files changed, 58 insertions, 0 deletions
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index 7e9997fa45d3..35c4fbe0bdd6 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -241,6 +241,64 @@ static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index)
#ifdef CONFIG_CC_IS_CLANG
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+ instrument_write(vxr, sizeof(__vector128));
+ asm volatile("\n"
+ " la 1,%[vxr]\n"
+ " VST %[v1],0,,1\n"
+ : [vxr] "=R" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+ instrument_write(vxr, sizeof(__vector128));
+ asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
+ : [vxr] "=Q" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_write(vxr, size);
+ asm volatile("\n"
+ " la 1,%[vxr]\n"
+ " VSTL %[v1],%[index],0,1\n"
+ : [vxr] "=R" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_write(vxr, size);
+ asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ : [vxr] "=Q" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \