From cb2a1dd589a0ce97429bf2beeb560e5b030c2ccc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:22 +0100 Subject: s390/checksum: provide vector register variant of csum_partial() Provide a faster variant of csum_partial() which uses vector registers instead of the cksm instruction. Signed-off-by: Heiko Carstens --- arch/s390/lib/Makefile | 1 + arch/s390/lib/csum-partial.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 arch/s390/lib/csum-partial.c (limited to 'arch/s390/lib') diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 7c50eca85ca4..90eac15ea62a 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -4,6 +4,7 @@ # lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o +lib-y += csum-partial.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c new file mode 100644 index 000000000000..3ea009cbc3b7 --- /dev/null +++ b/arch/s390/lib/csum-partial.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). + * + * Returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic. + * + * This function must be called with even lengths, except + * for the last fragment, which may be odd. + * + * It's best to have buff aligned on a 64-bit boundary. + */ +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + DECLARE_KERNEL_FPU_ONSTACK8(vxstate); + + if (!cpu_has_vx()) + return cksm(buff, len, sum); + kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23); + fpu_vlvgf(16, (__force u32)sum, 1); + fpu_vzero(17); + fpu_vzero(18); + fpu_vzero(19); + while (len >= 64) { + fpu_vlm(20, 23, buff); + fpu_vcksm(16, 20, 16); + fpu_vcksm(17, 21, 17); + fpu_vcksm(18, 22, 18); + fpu_vcksm(19, 23, 19); + buff += 64; + len -= 64; + } + while (len >= 32) { + fpu_vlm(20, 21, buff); + fpu_vcksm(16, 20, 16); + fpu_vcksm(17, 21, 17); + buff += 32; + len -= 32; + } + while (len >= 16) { + fpu_vl(20, buff); + fpu_vcksm(16, 20, 16); + buff += 16; + len -= 16; + } + if (len) { + fpu_vll(20, len - 1, buff); + fpu_vcksm(16, 20, 16); + } + fpu_vcksm(18, 19, 18); + fpu_vcksm(16, 17, 16); + fpu_vcksm(16, 18, 16); + sum = (__force __wsum)fpu_vlgvf(16, 1); + kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23); + return sum; +} +EXPORT_SYMBOL(csum_partial); -- cgit v1.2.3