From 5125d033c8af733ee4d52e3e3c6ebf5784976e46 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 14 May 2024 12:24:18 +0800 Subject: LoongArch: Select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 This allows compiling a full 128-bit product of two 64-bit integers as a mul/mulh pair, instead of a nasty long sequence of 20+ instructions. However, after selecting ARCH_SUPPORTS_INT128, when optimizing for size the compiler generates calls to __ashlti3, __ashrti3, and __lshrti3 for shifting __int128 values, causing a link failure: loongarch64-unknown-linux-gnu-ld: kernel/sched/fair.o: in function `mul_u64_u32_shr': /include/linux/math64.h:161:(.text+0x5e4): undefined reference to `__lshrti3' So provide the implementation of these functions if ARCH_SUPPORTS_INT128. Closes: https://lore.kernel.org/loongarch/CAAhV-H5EZ=7OF7CSiYyZ8_+wWuenpo=K2WT8-6mAT4CvzUC_4g@mail.gmail.com/ Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/lib/Makefile | 2 ++ arch/loongarch/lib/tishift.S | 56 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 arch/loongarch/lib/tishift.S (limited to 'arch/loongarch/lib') diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index a77bf160bfc4..ccea3bbd4353 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -6,6 +6,8 @@ lib-y += delay.o memset.o memcpy.o memmove.o \ clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o +obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o + obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/loongarch/lib/tishift.S b/arch/loongarch/lib/tishift.S new file mode 100644 index 000000000000..fa1d310012bc --- /dev/null +++ b/arch/loongarch/lib/tishift.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +SYM_FUNC_START(__ashlti3) + srli.d t2, a0, 1 + nor t3, zero, a2 + sll.d t1, a1, a2 + srl.d t2, t2, t3 + andi t0, a2, 64 + sll.d a0, a0, a2 + or t1, t2, t1 + maskeqz a1, a0, t0 + masknez a0, a0, t0 + masknez t0, t1, t0 + or a1, t0, a1 + jr ra +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) + +SYM_FUNC_START(__ashrti3) + nor t3, zero, a2 + slli.d t2, a1, 1 + srl.d t1, a0, a2 + sll.d t2, t2, t3 + andi t0, a2, 64 + or t1, t2, t1 + sra.d a2, a1, a2 + srai.d a1, a1, 63 + maskeqz a0, a2, t0 + maskeqz a1, a1, t0 + masknez a2, a2, t0 + masknez t0, t1, t0 + or a1, a1, a2 + or a0, t0, a0 + jr ra +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__lshrti3) + slli.d t2, a1, 1 + nor t3, zero, a2 + srl.d t1, a0, a2 + sll.d t2, t2, t3 + andi t0, a2, 64 + srl.d a1, a1, a2 + or t1, t2, t1 + maskeqz a0, a1, t0 + masknez a1, a1, t0 + masknez t0, t1, t0 + or a0, t0, a0 + jr ra +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) -- cgit v1.2.3