summaryrefslogtreecommitdiff
path: root/arch/loongarch/lib/xor_simd.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2023-11-01 02:07:35 +0300
committerJiri Kosina <jkosina@suse.cz>2023-11-01 02:07:35 +0300
commit20cd569d7ee8fce24e8753f0f43af6c420557b1f (patch)
treef559cfda594846795aa51c99d96f92d8c912851a /arch/loongarch/lib/xor_simd.c
parent62cc9c3cb3ec1bf31cc116146185ed97b450836a (diff)
parenteeebfe6259ba2d5b0980eb7b0df384eb77e9e4f5 (diff)
downloadlinux-20cd569d7ee8fce24e8753f0f43af6c420557b1f.tar.xz
Merge branch 'for-6.7/config_pm' into for-linus
- #ifdef CONFIG_PM removal from HID code (Thomas Weißschuh)
Diffstat (limited to 'arch/loongarch/lib/xor_simd.c')
-rw-r--r--arch/loongarch/lib/xor_simd.c93
1 files changed, 93 insertions, 0 deletions
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
new file mode 100644
index 000000000000..84cd24b728c4
--- /dev/null
+++ b/arch/loongarch/lib/xor_simd.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset) \
+ "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset) \
+ "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base) \
+ LD(0, base, 0) \
+ LD(1, base, 16) \
+ LD(2, base, 32) \
+ LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base) \
+ LD(4, base, 0) \
+ LD(5, base, 16) \
+ LD(6, base, 32) \
+ LD(7, base, 48) \
+ XOR(0, 4) \
+ XOR(1, 5) \
+ XOR(2, 6) \
+ XOR(3, 7)
+
+#define ST_LINE(base) \
+ ST(0, base, 0) \
+ ST(1, base, 16) \
+ ST(2, base, 32) \
+ ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset) \
+ "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset) \
+ "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base) \
+ LD(0, base, 0) \
+ LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base) \
+ LD(2, base, 0) \
+ LD(3, base, 32) \
+ XOR(0, 2) \
+ XOR(1, 3)
+
+#define ST_LINE(base) \
+ ST(0, base, 0) \
+ ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */