s390/checksum: provide vector register variant of csum_partial()

Provide a faster variant of csum_partial() which uses vector registers instead of the cksm instruction. Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
author: Heiko Carstens <hca@linux.ibm.com> 2024-02-03 13:45:22 +0300
committer: Heiko Carstens <hca@linux.ibm.com> 2024-02-16 16:30:17 +0300
commit: cb2a1dd589a0ce97429bf2beeb560e5b030c2ccc (patch)
tree: cb2ffaa05e7442ba1f4dfb305dcdb7bfef87d463 /arch/s390/lib
parent: 3a74f44de2c901e1536d227d29257cae1a6ed18f (diff)
download: linux-cb2a1dd589a0ce97429bf2beeb560e5b030c2ccc.tar.xz
2 files changed, 64 insertions, 0 deletions
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 7c50eca85ca4..90eac15ea62a 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -4,6 +4,7 @@
 #
 
 lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
+lib-y += csum-partial.o
 obj-y += mem.o xor.o
 lib-$(CONFIG_KPROBES) += probes.o
 lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c
new file mode 100644
index 000000000000..3ea009cbc3b7
--- /dev/null
+++ b/arch/s390/lib/csum-partial.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <asm/checksum.h>
+#include <asm/fpu.h>
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * Returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic.
+ *
+ * This function must be called with even lengths, except
+ * for the last fragment, which may be odd.
+ *
+ * It's best to have buff aligned on a 64-bit boundary.
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	DECLARE_KERNEL_FPU_ONSTACK8(vxstate);
+
+	if (!cpu_has_vx())
+		return cksm(buff, len, sum);
+	kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23);
+	fpu_vlvgf(16, (__force u32)sum, 1);
+	fpu_vzero(17);
+	fpu_vzero(18);
+	fpu_vzero(19);
+	while (len >= 64) {
+		fpu_vlm(20, 23, buff);
+		fpu_vcksm(16, 20, 16);
+		fpu_vcksm(17, 21, 17);
+		fpu_vcksm(18, 22, 18);
+		fpu_vcksm(19, 23, 19);
+		buff += 64;
+		len -= 64;
+	}
+	while (len >= 32) {
+		fpu_vlm(20, 21, buff);
+		fpu_vcksm(16, 20, 16);
+		fpu_vcksm(17, 21, 17);
+		buff += 32;
+		len -= 32;
+	}
+	while (len >= 16) {
+		fpu_vl(20, buff);
+		fpu_vcksm(16, 20, 16);
+		buff += 16;
+		len -= 16;
+	}
+	if (len) {
+		fpu_vll(20, len - 1, buff);
+		fpu_vcksm(16, 20, 16);
+	}
+	fpu_vcksm(18, 19, 18);
+	fpu_vcksm(16, 17, 16);
+	fpu_vcksm(16, 18, 16);
+	sum = (__force __wsum)fpu_vlgvf(16, 1);
+	kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23);
+	return sum;
+}
+EXPORT_SYMBOL(csum_partial);
author	Heiko Carstens <hca@linux.ibm.com>	2024-02-03 13:45:22 +0300
committer	Heiko Carstens <hca@linux.ibm.com>	2024-02-16 16:30:17 +0300
commit	cb2a1dd589a0ce97429bf2beeb560e5b030c2ccc (patch)
tree	cb2ffaa05e7442ba1f4dfb305dcdb7bfef87d463 /arch/s390/lib
parent	3a74f44de2c901e1536d227d29257cae1a6ed18f (diff)
download	linux-cb2a1dd589a0ce97429bf2beeb560e5b030c2ccc.tar.xz