summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/loongarch/Kconfig51
-rw-r--r--arch/loongarch/include/asm/asmmacro.h393
-rw-r--r--arch/loongarch/include/asm/fpu.h185
-rw-r--r--arch/loongarch/include/uapi/asm/ptrace.h16
-rw-r--r--arch/loongarch/include/uapi/asm/sigcontext.h18
-rw-r--r--arch/loongarch/kernel/cpu-probe.c12
-rw-r--r--arch/loongarch/kernel/fpu.S270
-rw-r--r--arch/loongarch/kernel/process.c10
-rw-r--r--arch/loongarch/kernel/ptrace.c110
-rw-r--r--arch/loongarch/kernel/signal.c326
-rw-r--r--arch/loongarch/kernel/traps.c84
11 files changed, 1452 insertions, 23 deletions
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 1944bae2f31c..72b614429c37 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -164,14 +164,6 @@ config 32BIT
config 64BIT
def_bool y
-config CPU_HAS_FPU
- bool
- default y
-
-config CPU_HAS_PREFETCH
- bool
- default y
-
config GENERIC_BUG
def_bool y
depends on BUG
@@ -247,6 +239,12 @@ config AS_HAS_EXPLICIT_RELOCS
config AS_HAS_FCSR_CLASS
def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0)
+config AS_HAS_LSX_EXTENSION
+ def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0)
+
+config AS_HAS_LASX_EXTENSION
+ def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
+
menu "Kernel type and options"
source "kernel/Kconfig.hz"
@@ -487,6 +485,43 @@ config ARCH_STRICT_ALIGN
to run kernel only on systems with h/w unaligned access support in
order to optimise for performance.
+config CPU_HAS_FPU
+ bool
+ default y
+
+config CPU_HAS_LSX
+ bool "Support for the Loongson SIMD Extension"
+ depends on AS_HAS_LSX_EXTENSION
+ help
+ Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers
+ and a set of SIMD instructions to operate on them. When this option
+ is enabled the kernel will support allocating & switching LSX
+ vector register contexts. If you know that your kernel will only be
+ running on CPUs which do not support LSX or that your userland will
+ not be making use of it then you may wish to say N here to reduce
+ the size & complexity of your kernel.
+
+ If unsure, say Y.
+
+config CPU_HAS_LASX
+ bool "Support for the Loongson Advanced SIMD Extension"
+ depends on CPU_HAS_LSX
+ depends on AS_HAS_LASX_EXTENSION
+ help
+ Loongson Advanced SIMD Extension (LASX) introduces 256 bit wide vector
+ registers and a set of SIMD instructions to operate on them. When this
+ option is enabled the kernel will support allocating & switching LASX
+ vector register contexts. If you know that your kernel will only be
+ running on CPUs which do not support LASX or that your userland will
+ not be making use of it then you may wish to say N here to reduce
+ the size & complexity of your kernel.
+
+ If unsure, say Y.
+
+config CPU_HAS_PREFETCH
+ bool
+ default y
+
config KEXEC
bool "Kexec system call"
select KEXEC_CORE
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index c51a1b43acb4..79e1d53fea89 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -270,6 +270,399 @@
fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0
.endm
+ .macro lsx_save_data thread tmp
+ li.w \tmp, THREAD_FPR0
+ PTR_ADD \tmp, \thread, \tmp
+ vst $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0
+ vst $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0
+ vst $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0
+ vst $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0
+ vst $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0
+ vst $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0
+ vst $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0
+ vst $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0
+ vst $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0
+ vst $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0
+ vst $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
+ vst $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
+ vst $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
+ vst $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
+ vst $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
+ vst $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
+ vst $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
+ vst $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
+ vst $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
+ vst $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
+ vst $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
+ vst $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
+ vst $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
+ vst $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
+ vst $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
+ vst $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
+ vst $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
+ vst $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
+ vst $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
+ vst $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
+ vst $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
+ vst $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
+ .endm
+
+ .macro lsx_restore_data thread tmp
+ li.w \tmp, THREAD_FPR0
+ PTR_ADD \tmp, \thread, \tmp
+ vld $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0
+ vld $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0
+ vld $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0
+ vld $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0
+ vld $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0
+ vld $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0
+ vld $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0
+ vld $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0
+ vld $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0
+ vld $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0
+ vld $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
+ vld $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
+ vld $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
+ vld $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
+ vld $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
+ vld $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
+ vld $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
+ vld $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
+ vld $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
+ vld $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
+ vld $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
+ vld $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
+ vld $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
+ vld $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
+ vld $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
+ vld $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
+ vld $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
+ vld $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
+ vld $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
+ vld $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
+ vld $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
+ vld $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
+ .endm
+
+ .macro lsx_save_all thread tmp0 tmp1
+ fpu_save_cc \thread, \tmp0, \tmp1
+ fpu_save_csr \thread, \tmp0
+ lsx_save_data \thread, \tmp0
+ .endm
+
+ .macro lsx_restore_all thread tmp0 tmp1
+ lsx_restore_data \thread, \tmp0
+ fpu_restore_cc \thread, \tmp0, \tmp1
+ fpu_restore_csr \thread, \tmp0
+ .endm
+
+ .macro lsx_save_upper vd base tmp off
+ vpickve2gr.d \tmp, \vd, 1
+ st.d \tmp, \base, (\off+8)
+ .endm
+
+ .macro lsx_save_all_upper thread base tmp
+ li.w \tmp, THREAD_FPR0
+ PTR_ADD \base, \thread, \tmp
+ lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
+ lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
+ lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
+ lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
+ lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
+ lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
+ lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
+ lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
+ lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
+ lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
+ lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
+ lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
+ lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
+ lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
+ lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
+ lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
+ lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
+ lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
+ lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
+ lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
+ lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
+ lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
+ lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
+ lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
+ lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
+ lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
+ lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
+ lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
+ lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
+ lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
+ lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
+ lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
+ .endm
+
+ .macro lsx_restore_upper vd base tmp off
+ ld.d \tmp, \base, (\off+8)
+ vinsgr2vr.d \vd, \tmp, 1
+ .endm
+
+ .macro lsx_restore_all_upper thread base tmp
+ li.w \tmp, THREAD_FPR0
+ PTR_ADD \base, \thread, \tmp
+ lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
+ lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
+ lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
+ lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
+ lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
+ lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
+ lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
+ lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
+ lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
+ lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
+ lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
+ lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
+ lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
+ lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
+ lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
+ lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
+ lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
+ lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
+ lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
+ lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
+ lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
+ lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
+ lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
+ lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
+ lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
+ lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
+ lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
+ lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
+ lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
+ lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
+ lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
+ lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
+ .endm
+
+ .macro lsx_init_upper vd tmp
+ vinsgr2vr.d \vd, \tmp, 1
+ .endm
+
+ .macro lsx_init_all_upper tmp
+ not \tmp, zero
+ lsx_init_upper $vr0 \tmp
+ lsx_init_upper $vr1 \tmp
+ lsx_init_upper $vr2 \tmp
+ lsx_init_upper $vr3 \tmp
+ lsx_init_upper $vr4 \tmp
+ lsx_init_upper $vr5 \tmp
+ lsx_init_upper $vr6 \tmp
+ lsx_init_upper $vr7 \tmp
+ lsx_init_upper $vr8 \tmp
+ lsx_init_upper $vr9 \tmp
+ lsx_init_upper $vr10 \tmp
+ lsx_init_upper $vr11 \tmp
+ lsx_init_upper $vr12 \tmp
+ lsx_init_upper $vr13 \tmp
+ lsx_init_upper $vr14 \tmp
+ lsx_init_upper $vr15 \tmp
+ lsx_init_upper $vr16 \tmp
+ lsx_init_upper $vr17 \tmp
+ lsx_init_upper $vr18 \tmp
+ lsx_init_upper $vr19 \tmp
+ lsx_init_upper $vr20 \tmp
+ lsx_init_upper $vr21 \tmp
+ lsx_init_upper $vr22 \tmp
+ lsx_init_upper $vr23 \tmp
+ lsx_init_upper $vr24 \tmp
+ lsx_init_upper $vr25 \tmp
+ lsx_init_upper $vr26 \tmp
+ lsx_init_upper $vr27 \tmp
+ lsx_init_upper $vr28 \tmp
+ lsx_init_upper $vr29 \tmp
+ lsx_init_upper $vr30 \tmp
+ lsx_init_upper $vr31 \tmp
+ .endm
+
+ .macro lasx_save_data thread tmp
+ li.w \tmp, THREAD_FPR0
+ PTR_ADD \tmp, \thread, \tmp
+ xvst $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0
+ xvst $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0
+ xvst $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0
+ xvst $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0
+ xvst $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0
+ xvst $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0
+ xvst $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0
+ xvst $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0
+ xvst $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0
+ xvst $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0
+ xvst $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
+ xvst $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
+ xvst $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
+ xvst $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
+ xvst $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
+ xvst $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
+ xvst $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
+ xvst $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
+ xvst $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
+ xvst $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
+ xvst $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
+ xvst $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
+ xvst $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
+ xvst $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
+ xvst $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
+ xvst $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
+ xvst $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
+ xvst $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
+ xvst $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
+ xvst $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
+ xvst $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
+ xvst $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
+ .endm
+
+ .macro lasx_restore_data thread tmp
+ li.w \tmp, THREAD_FPR0
+ PTR_ADD \tmp, \thread, \tmp
+ xvld $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0
+ xvld $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0
+ xvld $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0
+ xvld $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0
+ xvld $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0
+ xvld $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0
+ xvld $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0
+ xvld $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0
+ xvld $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0
+ xvld $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0
+ xvld $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
+ xvld $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
+ xvld $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
+ xvld $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
+ xvld $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
+ xvld $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
+ xvld $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
+ xvld $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
+ xvld $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
+ xvld $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
+ xvld $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
+ xvld $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
+ xvld $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
+ xvld $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
+ xvld $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
+ xvld $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
+ xvld $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
+ xvld $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
+ xvld $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
+ xvld $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
+ xvld $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
+ xvld $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
+ .endm
+
+ .macro lasx_save_all thread tmp0 tmp1
+ fpu_save_cc \thread, \tmp0, \tmp1
+ fpu_save_csr \thread, \tmp0
+ lasx_save_data \thread, \tmp0
+ .endm
+
+ .macro lasx_restore_all thread tmp0 tmp1
+ lasx_restore_data \thread, \tmp0
+ fpu_restore_cc \thread, \tmp0, \tmp1
+ fpu_restore_csr \thread, \tmp0
+ .endm
+
+ .macro lasx_save_upper xd base tmp off
+ /* Nothing */
+ .endm
+
+ .macro lasx_save_all_upper thread base tmp
+ /* Nothing */
+ .endm
+
+ .macro lasx_restore_upper xd base tmp0 tmp1 off
+ vld \tmp0, \base, (\off+16)
+ xvpermi.q \xd, \tmp1, 0x2
+ .endm
+
+ .macro lasx_restore_all_upper thread base tmp
+ li.w \tmp, THREAD_FPR0
+ PTR_ADD \base, \thread, \tmp
+ /* Save $vr31 ($xr31 lower bits) with xvpickve2gr */
+ xvpickve2gr.d $r17, $xr31, 0
+ xvpickve2gr.d $r18, $xr31, 1
+ lasx_restore_upper $xr0, \base, $vr31, $xr31, (THREAD_FPR0-THREAD_FPR0)
+ lasx_restore_upper $xr1, \base, $vr31, $xr31, (THREAD_FPR1-THREAD_FPR0)
+ lasx_restore_upper $xr2, \base, $vr31, $xr31, (THREAD_FPR2-THREAD_FPR0)
+ lasx_restore_upper $xr3, \base, $vr31, $xr31, (THREAD_FPR3-THREAD_FPR0)
+ lasx_restore_upper $xr4, \base, $vr31, $xr31, (THREAD_FPR4-THREAD_FPR0)
+ lasx_restore_upper $xr5, \base, $vr31, $xr31, (THREAD_FPR5-THREAD_FPR0)
+ lasx_restore_upper $xr6, \base, $vr31, $xr31, (THREAD_FPR6-THREAD_FPR0)
+ lasx_restore_upper $xr7, \base, $vr31, $xr31, (THREAD_FPR7-THREAD_FPR0)
+ lasx_restore_upper $xr8, \base, $vr31, $xr31, (THREAD_FPR8-THREAD_FPR0)
+ lasx_restore_upper $xr9, \base, $vr31, $xr31, (THREAD_FPR9-THREAD_FPR0)
+ lasx_restore_upper $xr10, \base, $vr31, $xr31, (THREAD_FPR10-THREAD_FPR0)
+ lasx_restore_upper $xr11, \base, $vr31, $xr31, (THREAD_FPR11-THREAD_FPR0)
+ lasx_restore_upper $xr12, \base, $vr31, $xr31, (THREAD_FPR12-THREAD_FPR0)
+ lasx_restore_upper $xr13, \base, $vr31, $xr31, (THREAD_FPR13-THREAD_FPR0)
+ lasx_restore_upper $xr14, \base, $vr31, $xr31, (THREAD_FPR14-THREAD_FPR0)
+ lasx_restore_upper $xr15, \base, $vr31, $xr31, (THREAD_FPR15-THREAD_FPR0)
+ lasx_restore_upper $xr16, \base, $vr31, $xr31, (THREAD_FPR16-THREAD_FPR0)
+ lasx_restore_upper $xr17, \base, $vr31, $xr31, (THREAD_FPR17-THREAD_FPR0)
+ lasx_restore_upper $xr18, \base, $vr31, $xr31, (THREAD_FPR18-THREAD_FPR0)
+ lasx_restore_upper $xr19, \base, $vr31, $xr31, (THREAD_FPR19-THREAD_FPR0)
+ lasx_restore_upper $xr20, \base, $vr31, $xr31, (THREAD_FPR20-THREAD_FPR0)
+ lasx_restore_upper $xr21, \base, $vr31, $xr31, (THREAD_FPR21-THREAD_FPR0)
+ lasx_restore_upper $xr22, \base, $vr31, $xr31, (THREAD_FPR22-THREAD_FPR0)
+ lasx_restore_upper $xr23, \base, $vr31, $xr31, (THREAD_FPR23-THREAD_FPR0)
+ lasx_restore_upper $xr24, \base, $vr31, $xr31, (THREAD_FPR24-THREAD_FPR0)
+ lasx_restore_upper $xr25, \base, $vr31, $xr31, (THREAD_FPR25-THREAD_FPR0)
+ lasx_restore_upper $xr26, \base, $vr31, $xr31, (THREAD_FPR26-THREAD_FPR0)
+ lasx_restore_upper $xr27, \base, $vr31, $xr31, (THREAD_FPR27-THREAD_FPR0)
+ lasx_restore_upper $xr28, \base, $vr31, $xr31, (THREAD_FPR28-THREAD_FPR0)
+ lasx_restore_upper $xr29, \base, $vr31, $xr31, (THREAD_FPR29-THREAD_FPR0)
+ lasx_restore_upper $xr30, \base, $vr31, $xr31, (THREAD_FPR30-THREAD_FPR0)
+ lasx_restore_upper $xr31, \base, $vr31, $xr31, (THREAD_FPR31-THREAD_FPR0)
+ /* Restore $vr31 ($xr31 lower bits) with xvinsgr2vr */
+ xvinsgr2vr.d $xr31, $r17, 0
+ xvinsgr2vr.d $xr31, $r18, 1
+ .endm
+
+ .macro lasx_init_upper xd tmp
+ xvinsgr2vr.d \xd, \tmp, 2
+ xvinsgr2vr.d \xd, \tmp, 3
+ .endm
+
+ .macro lasx_init_all_upper tmp
+ not \tmp, zero
+ lasx_init_upper $xr0 \tmp
+ lasx_init_upper $xr1 \tmp
+ lasx_init_upper $xr2 \tmp
+ lasx_init_upper $xr3 \tmp
+ lasx_init_upper $xr4 \tmp
+ lasx_init_upper $xr5 \tmp
+ lasx_init_upper $xr6 \tmp
+ lasx_init_upper $xr7 \tmp
+ lasx_init_upper $xr8 \tmp
+ lasx_init_upper $xr9 \tmp
+ lasx_init_upper $xr10 \tmp
+ lasx_init_upper $xr11 \tmp
+ lasx_init_upper $xr12 \tmp
+ lasx_init_upper $xr13 \tmp
+ lasx_init_upper $xr14 \tmp
+ lasx_init_upper $xr15 \tmp
+ lasx_init_upper $xr16 \tmp
+ lasx_init_upper $xr17 \tmp
+ lasx_init_upper $xr18 \tmp
+ lasx_init_upper $xr19 \tmp
+ lasx_init_upper $xr20 \tmp
+ lasx_init_upper $xr21 \tmp
+ lasx_init_upper $xr22 \tmp
+ lasx_init_upper $xr23 \tmp
+ lasx_init_upper $xr24 \tmp
+ lasx_init_upper $xr25 \tmp
+ lasx_init_upper $xr26 \tmp
+ lasx_init_upper $xr27 \tmp
+ lasx_init_upper $xr28 \tmp
+ lasx_init_upper $xr29 \tmp
+ lasx_init_upper $xr30 \tmp
+ lasx_init_upper $xr31 \tmp
+ .endm
+
.macro not dst src
nor \dst, \src, zero
.endm
diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index 192f8e35d912..e4193d637f66 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -28,6 +28,26 @@ extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);
+extern void _save_lsx(struct loongarch_fpu *fpu);
+extern void _restore_lsx(struct loongarch_fpu *fpu);
+extern void _init_lsx_upper(void);
+extern void _restore_lsx_upper(struct loongarch_fpu *fpu);
+
+extern void _save_lasx(struct loongarch_fpu *fpu);
+extern void _restore_lasx(struct loongarch_fpu *fpu);
+extern void _init_lasx_upper(void);
+extern void _restore_lasx_upper(struct loongarch_fpu *fpu);
+
+static inline void enable_lsx(void);
+static inline void disable_lsx(void);
+static inline void save_lsx(struct task_struct *t);
+static inline void restore_lsx(struct task_struct *t);
+
+static inline void enable_lasx(void);
+static inline void disable_lasx(void);
+static inline void save_lasx(struct task_struct *t);
+static inline void restore_lasx(struct task_struct *t);
+
/*
* Mask the FCSR Cause bits according to the Enable bits, observing
* that Unimplemented is always enabled.
@@ -44,6 +64,29 @@ static inline int is_fp_enabled(void)
1 : 0;
}
+static inline int is_lsx_enabled(void)
+{
+ if (!cpu_has_lsx)
+ return 0;
+
+ return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ?
+ 1 : 0;
+}
+
+static inline int is_lasx_enabled(void)
+{
+ if (!cpu_has_lasx)
+ return 0;
+
+ return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ?
+ 1 : 0;
+}
+
+static inline int is_simd_enabled(void)
+{
+ return is_lsx_enabled() | is_lasx_enabled();
+}
+
#define enable_fpu() set_csr_euen(CSR_EUEN_FPEN)
#define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN)
@@ -81,9 +124,22 @@ static inline void own_fpu(int restore)
static inline void lose_fpu_inatomic(int save, struct task_struct *tsk)
{
if (is_fpu_owner()) {
- if (save)
- _save_fp(&tsk->thread.fpu);
- disable_fpu();
+ if (!is_simd_enabled()) {
+ if (save)
+ _save_fp(&tsk->thread.fpu);
+ disable_fpu();
+ } else {
+ if (save) {
+ if (!is_lasx_enabled())
+ save_lsx(tsk);
+ else
+ save_lasx(tsk);
+ }
+ disable_fpu();
+ disable_lsx();
+ disable_lasx();
+ clear_tsk_thread_flag(tsk, TIF_USEDSIMD);
+ }
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
}
KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
@@ -129,4 +185,127 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
return tsk->thread.fpu.fpr;
}
+static inline int is_simd_owner(void)
+{
+ return test_thread_flag(TIF_USEDSIMD);
+}
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+static inline void enable_lsx(void)
+{
+ if (cpu_has_lsx)
+ csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lsx(void)
+{
+ if (cpu_has_lsx)
+ csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void save_lsx(struct task_struct *t)
+{
+ if (cpu_has_lsx)
+ _save_lsx(&t->thread.fpu);
+}
+
+static inline void restore_lsx(struct task_struct *t)
+{
+ if (cpu_has_lsx)
+ _restore_lsx(&t->thread.fpu);
+}
+
+static inline void init_lsx_upper(void)
+{
+ /*
+ * Check cpu_has_lsx only if it's a constant. This will allow the
+ * compiler to optimise out code for CPUs without LSX without adding
+ * an extra redundant check for CPUs with LSX.
+ */
+ if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
+ return;
+
+ _init_lsx_upper();
+}
+
+static inline void restore_lsx_upper(struct task_struct *t)
+{
+ if (cpu_has_lsx)
+ _restore_lsx_upper(&t->thread.fpu);
+}
+
+#else
+static inline void enable_lsx(void) {}
+static inline void disable_lsx(void) {}
+static inline void save_lsx(struct task_struct *t) {}
+static inline void restore_lsx(struct task_struct *t) {}
+static inline void init_lsx_upper(void) {}
+static inline void restore_lsx_upper(struct task_struct *t) {}
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+static inline void enable_lasx(void)
+{
+
+ if (cpu_has_lasx)
+ csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lasx(void)
+{
+ if (cpu_has_lasx)
+ csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void save_lasx(struct task_struct *t)
+{
+ if (cpu_has_lasx)
+ _save_lasx(&t->thread.fpu);
+}
+
+static inline void restore_lasx(struct task_struct *t)
+{
+ if (cpu_has_lasx)
+ _restore_lasx(&t->thread.fpu);
+}
+
+static inline void init_lasx_upper(void)
+{
+ if (cpu_has_lasx)
+ _init_lasx_upper();
+}
+
+static inline void restore_lasx_upper(struct task_struct *t)
+{
+ if (cpu_has_lasx)
+ _restore_lasx_upper(&t->thread.fpu);
+}
+
+#else
+static inline void enable_lasx(void) {}
+static inline void disable_lasx(void) {}
+static inline void save_lasx(struct task_struct *t) {}
+static inline void restore_lasx(struct task_struct *t) {}
+static inline void init_lasx_upper(void) {}
+static inline void restore_lasx_upper(struct task_struct *t) {}
+#endif
+
+static inline int thread_lsx_context_live(void)
+{
+ if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
+ return 0;
+
+ return test_thread_flag(TIF_LSX_CTX_LIVE);
+}
+
+static inline int thread_lasx_context_live(void)
+{
+ if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx)
+ return 0;
+
+ return test_thread_flag(TIF_LASX_CTX_LIVE);
+}
+
#endif /* _ASM_FPU_H */
diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h
index 82d811b5c6e9..06e3be52cb04 100644
--- a/arch/loongarch/include/uapi/asm/ptrace.h
+++ b/arch/loongarch/include/uapi/asm/ptrace.h
@@ -41,9 +41,19 @@ struct user_pt_regs {
} __attribute__((aligned(8)));
struct user_fp_state {
- uint64_t fpr[32];
- uint64_t fcc;
- uint32_t fcsr;
+ uint64_t fpr[32];
+ uint64_t fcc;
+ uint32_t fcsr;
+};
+
+struct user_lsx_state {
+ /* 32 registers, 128 bits width per register. */
+ uint64_t vregs[32*2];
+};
+
+struct user_lasx_state {
+ /* 32 registers, 256 bits width per register. */
+ uint64_t vregs[32*4];
};
struct user_watch_state {
diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h
index 52e49b8bf4be..4cd7d16f7037 100644
--- a/arch/loongarch/include/uapi/asm/sigcontext.h
+++ b/arch/loongarch/include/uapi/asm/sigcontext.h
@@ -41,4 +41,22 @@ struct fpu_context {
__u32 fcsr;
};
+/* LSX context */
+#define LSX_CTX_MAGIC 0x53580001
+#define LSX_CTX_ALIGN 16
+struct lsx_context {
+ __u64 regs[2*32];
+ __u64 fcc;
+ __u32 fcsr;
+};
+
+/* LASX context */
+#define LASX_CTX_MAGIC 0x41535801
+#define LASX_CTX_ALIGN 32
+struct lasx_context {
+ __u64 regs[4*32];
+ __u64 fcc;
+ __u32 fcsr;
+};
+
#endif /* _UAPI_ASM_SIGCONTEXT_H */
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 5adf0f736c6d..f42acc6c8df6 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -116,6 +116,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_FPU;
elf_hwcap |= HWCAP_LOONGARCH_FPU;
}
+#ifdef CONFIG_CPU_HAS_LSX
+ if (config & CPUCFG2_LSX) {
+ c->options |= LOONGARCH_CPU_LSX;
+ elf_hwcap |= HWCAP_LOONGARCH_LSX;
+ }
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
+ if (config & CPUCFG2_LASX) {
+ c->options |= LOONGARCH_CPU_LASX;
+ elf_hwcap |= HWCAP_LOONGARCH_LASX;
+ }
+#endif
if (config & CPUCFG2_COMPLEX) {
c->options |= LOONGARCH_CPU_COMPLEX;
elf_hwcap |= HWCAP_LOONGARCH_COMPLEX;
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index ccde94140c89..f3df5f0a4509 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -145,6 +145,154 @@
movgr2fcsr fcsr0, \tmp0
.endm
+ .macro sc_save_lsx base
+#ifdef CONFIG_CPU_HAS_LSX
+ EX vst $vr0, \base, (0 * LSX_REG_WIDTH)
+ EX vst $vr1, \base, (1 * LSX_REG_WIDTH)
+ EX vst $vr2, \base, (2 * LSX_REG_WIDTH)
+ EX vst $vr3, \base, (3 * LSX_REG_WIDTH)
+ EX vst $vr4, \base, (4 * LSX_REG_WIDTH)
+ EX vst $vr5, \base, (5 * LSX_REG_WIDTH)
+ EX vst $vr6, \base, (6 * LSX_REG_WIDTH)
+ EX vst $vr7, \base, (7 * LSX_REG_WIDTH)
+ EX vst $vr8, \base, (8 * LSX_REG_WIDTH)
+ EX vst $vr9, \base, (9 * LSX_REG_WIDTH)
+ EX vst $vr10, \base, (10 * LSX_REG_WIDTH)
+ EX vst $vr11, \base, (11 * LSX_REG_WIDTH)
+ EX vst $vr12, \base, (12 * LSX_REG_WIDTH)
+ EX vst $vr13, \base, (13 * LSX_REG_WIDTH)
+ EX vst $vr14, \base, (14 * LSX_REG_WIDTH)
+ EX vst $vr15, \base, (15 * LSX_REG_WIDTH)
+ EX vst $vr16, \base, (16 * LSX_REG_WIDTH)
+ EX vst $vr17, \base, (17 * LSX_REG_WIDTH)
+ EX vst $vr18, \base, (18 * LSX_REG_WIDTH)
+ EX vst $vr19, \base, (19 * LSX_REG_WIDTH)
+ EX vst $vr20, \base, (20 * LSX_REG_WIDTH)
+ EX vst $vr21, \base, (21 * LSX_REG_WIDTH)
+ EX vst $vr22, \base, (22 * LSX_REG_WIDTH)
+ EX vst $vr23, \base, (23 * LSX_REG_WIDTH)
+ EX vst $vr24, \base, (24 * LSX_REG_WIDTH)
+ EX vst $vr25, \base, (25 * LSX_REG_WIDTH)
+ EX vst $vr26, \base, (26 * LSX_REG_WIDTH)
+ EX vst $vr27, \base, (27 * LSX_REG_WIDTH)
+ EX vst $vr28, \base, (28 * LSX_REG_WIDTH)
+ EX vst $vr29, \base, (29 * LSX_REG_WIDTH)
+ EX vst $vr30, \base, (30 * LSX_REG_WIDTH)
+ EX vst $vr31, \base, (31 * LSX_REG_WIDTH)
+#endif
+ .endm
+
+ .macro sc_restore_lsx base
+#ifdef CONFIG_CPU_HAS_LSX
+ EX vld $vr0, \base, (0 * LSX_REG_WIDTH)
+ EX vld $vr1, \base, (1 * LSX_REG_WIDTH)
+ EX vld $vr2, \base, (2 * LSX_REG_WIDTH)
+ EX vld $vr3, \base, (3 * LSX_REG_WIDTH)
+ EX vld $vr4, \base, (4 * LSX_REG_WIDTH)
+ EX vld $vr5, \base, (5 * LSX_REG_WIDTH)
+ EX vld $vr6, \base, (6 * LSX_REG_WIDTH)
+ EX vld $vr7, \base, (7 * LSX_REG_WIDTH)
+ EX vld $vr8, \base, (8 * LSX_REG_WIDTH)
+ EX vld $vr9, \base, (9 * LSX_REG_WIDTH)
+ EX vld $vr10, \base, (10 * LSX_REG_WIDTH)
+ EX vld $vr11, \base, (11 * LSX_REG_WIDTH)
+ EX vld $vr12, \base, (12 * LSX_REG_WIDTH)
+ EX vld $vr13, \base, (13 * LSX_REG_WIDTH)
+ EX vld $vr14, \base, (14 * LSX_REG_WIDTH)
+ EX vld $vr15, \base, (15 * LSX_REG_WIDTH)
+ EX vld $vr16, \base, (16 * LSX_REG_WIDTH)
+ EX vld $vr17, \base, (17 * LSX_REG_WIDTH)
+ EX vld $vr18, \base, (18 * LSX_REG_WIDTH)
+ EX vld $vr19, \base, (19 * LSX_REG_WIDTH)
+ EX vld $vr20, \base, (20 * LSX_REG_WIDTH)
+ EX vld $vr21, \base, (21 * LSX_REG_WIDTH)
+ EX vld $vr22, \base, (22 * LSX_REG_WIDTH)
+ EX vld $vr23, \base, (23 * LSX_REG_WIDTH)
+ EX vld $vr24, \base, (24 * LSX_REG_WIDTH)
+ EX vld $vr25, \base, (25 * LSX_REG_WIDTH)
+ EX vld $vr26, \base, (26 * LSX_REG_WIDTH)
+ EX vld $vr27, \base, (27 * LSX_REG_WIDTH)
+ EX vld $vr28, \base, (28 * LSX_REG_WIDTH)
+ EX vld $vr29, \base, (29 * LSX_REG_WIDTH)
+ EX vld $vr30, \base, (30 * LSX_REG_WIDTH)
+ EX vld $vr31, \base, (31 * LSX_REG_WIDTH)
+#endif
+ .endm
+
+ .macro sc_save_lasx base
+#ifdef CONFIG_CPU_HAS_LASX
+ EX xvst $xr0, \base, (0 * LASX_REG_WIDTH)
+ EX xvst $xr1, \base, (1 * LASX_REG_WIDTH)
+ EX xvst $xr2, \base, (2 * LASX_REG_WIDTH)
+ EX xvst $xr3, \base, (3 * LASX_REG_WIDTH)
+ EX xvst $xr4, \base, (4 * LASX_REG_WIDTH)
+ EX xvst $xr5, \base, (5 * LASX_REG_WIDTH)
+ EX xvst $xr6, \base, (6 * LASX_REG_WIDTH)
+ EX xvst $xr7, \base, (7 * LASX_REG_WIDTH)
+ EX xvst $xr8, \base, (8 * LASX_REG_WIDTH)
+ EX xvst $xr9, \base, (9 * LASX_REG_WIDTH)
+ EX xvst $xr10, \base, (10 * LASX_REG_WIDTH)
+ EX xvst $xr11, \base, (11 * LASX_REG_WIDTH)
+ EX xvst $xr12, \base, (12 * LASX_REG_WIDTH)
+ EX xvst $xr13, \base, (13 * LASX_REG_WIDTH)
+ EX xvst $xr14, \base, (14 * LASX_REG_WIDTH)
+ EX xvst $xr15, \base, (15 * LASX_REG_WIDTH)
+ EX xvst $xr16, \base, (16 * LASX_REG_WIDTH)
+ EX xvst $xr17, \base, (17 * LASX_REG_WIDTH)
+ EX xvst $xr18, \base, (18 * LASX_REG_WIDTH)
+ EX xvst $xr19, \base, (19 * LASX_REG_WIDTH)
+ EX xvst $xr20, \base, (20 * LASX_REG_WIDTH)
+ EX xvst $xr21, \base, (21 * LASX_REG_WIDTH)
+ EX xvst $xr22, \base, (22 * LASX_REG_WIDTH)
+ EX xvst $xr23, \base, (23 * LASX_REG_WIDTH)
+ EX xvst $xr24, \base, (24 * LASX_REG_WIDTH)
+ EX xvst $xr25, \base, (25 * LASX_REG_WIDTH)
+ EX xvst $xr26, \base, (26 * LASX_REG_WIDTH)
+ EX xvst $xr27, \base, (27 * LASX_REG_WIDTH)
+ EX xvst $xr28, \base, (28 * LASX_REG_WIDTH)
+ EX xvst $xr29, \base, (29 * LASX_REG_WIDTH)
+ EX xvst $xr30, \base, (30 * LASX_REG_WIDTH)
+ EX xvst $xr31, \base, (31 * LASX_REG_WIDTH)
+#endif
+ .endm
+
+ .macro sc_restore_lasx base
+#ifdef CONFIG_CPU_HAS_LASX
+ EX xvld $xr0, \base, (0 * LASX_REG_WIDTH)
+ EX xvld $xr1, \base, (1 * LASX_REG_WIDTH)
+ EX xvld $xr2, \base, (2 * LASX_REG_WIDTH)
+ EX xvld $xr3, \base, (3 * LASX_REG_WIDTH)
+ EX xvld $xr4, \base, (4 * LASX_REG_WIDTH)
+ EX xvld $xr5, \base, (5 * LASX_REG_WIDTH)
+ EX xvld $xr6, \base, (6 * LASX_REG_WIDTH)
+ EX xvld $xr7, \base, (7 * LASX_REG_WIDTH)
+ EX xvld $xr8, \base, (8 * LASX_REG_WIDTH)
+ EX xvld $xr9, \base, (9 * LASX_REG_WIDTH)
+ EX xvld $xr10, \base, (10 * LASX_REG_WIDTH)
+ EX xvld $xr11, \base, (11 * LASX_REG_WIDTH)
+ EX xvld $xr12, \base, (12 * LASX_REG_WIDTH)
+ EX xvld $xr13, \base, (13 * LASX_REG_WIDTH)
+ EX xvld $xr14, \base, (14 * LASX_REG_WIDTH)
+ EX xvld $xr15, \base, (15 * LASX_REG_WIDTH)
+ EX xvld $xr16, \base, (16 * LASX_REG_WIDTH)
+ EX xvld $xr17, \base, (17 * LASX_REG_WIDTH)
+ EX xvld $xr18, \base, (18 * LASX_REG_WIDTH)
+ EX xvld $xr19, \base, (19 * LASX_REG_WIDTH)
+ EX xvld $xr20, \base, (20 * LASX_REG_WIDTH)
+ EX xvld $xr21, \base, (21 * LASX_REG_WIDTH)
+ EX xvld $xr22, \base, (22 * LASX_REG_WIDTH)
+ EX xvld $xr23, \base, (23 * LASX_REG_WIDTH)
+ EX xvld $xr24, \base, (24 * LASX_REG_WIDTH)
+ EX xvld $xr25, \base, (25 * LASX_REG_WIDTH)
+ EX xvld $xr26, \base, (26 * LASX_REG_WIDTH)
+ EX xvld $xr27, \base, (27 * LASX_REG_WIDTH)
+ EX xvld $xr28, \base, (28 * LASX_REG_WIDTH)
+ EX xvld $xr29, \base, (29 * LASX_REG_WIDTH)
+ EX xvld $xr30, \base, (30 * LASX_REG_WIDTH)
+ EX xvld $xr31, \base, (31 * LASX_REG_WIDTH)
+#endif
+ .endm
+
/*
* Save a thread's fp context.
*/
@@ -166,6 +314,76 @@ SYM_FUNC_START(_restore_fp)
jr ra
SYM_FUNC_END(_restore_fp)
+#ifdef CONFIG_CPU_HAS_LSX
+
+/*
+ * Save a thread's LSX vector context.
+ */
+SYM_FUNC_START(_save_lsx)
+ lsx_save_all a0 t1 t2
+ jr ra
+SYM_FUNC_END(_save_lsx)
+EXPORT_SYMBOL(_save_lsx)
+
+/*
+ * Restore a thread's LSX vector context.
+ */
+SYM_FUNC_START(_restore_lsx)
+ lsx_restore_all a0 t1 t2
+ jr ra
+SYM_FUNC_END(_restore_lsx)
+
+SYM_FUNC_START(_save_lsx_upper)
+ lsx_save_all_upper a0 t0 t1
+ jr ra
+SYM_FUNC_END(_save_lsx_upper)
+
+SYM_FUNC_START(_restore_lsx_upper)
+ lsx_restore_all_upper a0 t0 t1
+ jr ra
+SYM_FUNC_END(_restore_lsx_upper)
+
+SYM_FUNC_START(_init_lsx_upper)
+ lsx_init_all_upper t1
+ jr ra
+SYM_FUNC_END(_init_lsx_upper)
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+/*
+ * Save a thread's LASX vector context.
+ */
+SYM_FUNC_START(_save_lasx)
+ lasx_save_all a0 t1 t2
+ jr ra
+SYM_FUNC_END(_save_lasx)
+EXPORT_SYMBOL(_save_lasx)
+
+/*
+ * Restore a thread's LASX vector context.
+ */
+SYM_FUNC_START(_restore_lasx)
+ lasx_restore_all a0 t1 t2
+ jr ra
+SYM_FUNC_END(_restore_lasx)
+
+SYM_FUNC_START(_save_lasx_upper)
+ lasx_save_all_upper a0 t0 t1
+ jr ra
+SYM_FUNC_END(_save_lasx_upper)
+
+SYM_FUNC_START(_restore_lasx_upper)
+ lasx_restore_all_upper a0 t0 t1
+ jr ra
+SYM_FUNC_END(_restore_lasx_upper)
+
+SYM_FUNC_START(_init_lasx_upper)
+ lasx_init_all_upper t1
+ jr ra
+SYM_FUNC_END(_init_lasx_upper)
+#endif
+
/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
@@ -244,6 +462,58 @@ SYM_FUNC_START(_restore_fp_context)
jr ra
SYM_FUNC_END(_restore_fp_context)
+/*
+ * a0: fpregs
+ * a1: fcc
+ * a2: fcsr
+ */
+SYM_FUNC_START(_save_lsx_context)
+ sc_save_fcc a1, t0, t1
+ sc_save_fcsr a2, t0
+ sc_save_lsx a0
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_save_lsx_context)
+
+/*
+ * a0: fpregs
+ * a1: fcc
+ * a2: fcsr
+ */
+SYM_FUNC_START(_restore_lsx_context)
+ sc_restore_lsx a0
+ sc_restore_fcc a1, t1, t2
+ sc_restore_fcsr a2, t1
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_restore_lsx_context)
+
+/*
+ * a0: fpregs
+ * a1: fcc
+ * a2: fcsr
+ */
+SYM_FUNC_START(_save_lasx_context)
+ sc_save_fcc a1, t0, t1
+ sc_save_fcsr a2, t0
+ sc_save_lasx a0
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_save_lasx_context)
+
+/*
+ * a0: fpregs
+ * a1: fcc
+ * a2: fcsr
+ */
+SYM_FUNC_START(_restore_lasx_context)
+ sc_restore_lasx a0
+ sc_restore_fcc a1, t1, t2
+ sc_restore_fcsr a2, t1
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_restore_lasx_context)
+
SYM_FUNC_START(fault)
li.w a0, -EFAULT # failure
jr ra
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 9535a0662480..2e04eb07abb6 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -117,8 +117,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*/
preempt_disable();
- if (is_fpu_owner())
- save_fp(current);
+ if (is_fpu_owner()) {
+ if (is_lasx_enabled())
+ save_lasx(current);
+ else if (is_lsx_enabled())
+ save_lsx(current);
+ else
+ save_fp(current);
+ }
preempt_enable();
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index 5fcffb452367..a0767c3a0f0a 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -250,6 +250,90 @@ static int cfg_set(struct task_struct *target,
return 0;
}
+#ifdef CONFIG_CPU_HAS_LSX
+
+static void copy_pad_fprs(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf *to, unsigned int live_sz)
+{
+ int i, j;
+ unsigned long long fill = ~0ull;
+ unsigned int cp_sz, pad_sz;
+
+ cp_sz = min(regset->size, live_sz);
+ pad_sz = regset->size - cp_sz;
+ WARN_ON(pad_sz % sizeof(fill));
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ membuf_write(to, &target->thread.fpu.fpr[i], cp_sz);
+ for (j = 0; j < (pad_sz / sizeof(fill)); j++) {
+ membuf_store(to, fill);
+ }
+ }
+}
+
+static int simd_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ const unsigned int wr_size = NUM_FPU_REGS * regset->size;
+
+ if (!tsk_used_math(target)) {
+ /* The task hasn't used FP or LSX, fill with 0xff */
+ copy_pad_fprs(target, regset, &to, 0);
+ } else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) {
+ /* Copy scalar FP context, fill the rest with 0xff */
+ copy_pad_fprs(target, regset, &to, 8);
+#ifdef CONFIG_CPU_HAS_LASX
+ } else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) {
+ /* Copy LSX 128 Bit context, fill the rest with 0xff */
+ copy_pad_fprs(target, regset, &to, 16);
+#endif
+ } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
+ /* Trivially copy the vector registers */
+ membuf_write(&to, &target->thread.fpu.fpr, wr_size);
+ } else {
+ /* Copy as much context as possible, fill the rest with 0xff */
+ copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0]));
+ }
+
+ return 0;
+}
+
+static int simd_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ const unsigned int wr_size = NUM_FPU_REGS * regset->size;
+ unsigned int cp_sz;
+ int i, err, start;
+
+ init_fp_ctx(target);
+
+ if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
+ /* Trivially copy the vector registers */
+ err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fpr,
+ 0, wr_size);
+ } else {
+ /* Copy as much context as possible */
+ cp_sz = min_t(unsigned int, regset->size,
+ sizeof(target->thread.fpu.fpr[0]));
+
+ i = start = err = 0;
+ for (; i < NUM_FPU_REGS; i++, start += regset->size) {
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fpr[i],
+ start, start + cp_sz);
+ }
+ }
+
+ return err;
+}
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
@@ -708,6 +792,12 @@ enum loongarch_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_CPUCFG,
+#ifdef CONFIG_CPU_HAS_LSX
+ REGSET_LSX,
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
+ REGSET_LASX,
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
REGSET_HW_BREAK,
REGSET_HW_WATCH,
@@ -739,6 +829,26 @@ static const struct user_regset loongarch64_regsets[] = {
.regset_get = cfg_get,
.set = cfg_set,
},
+#ifdef CONFIG_CPU_HAS_LSX
+ [REGSET_LSX] = {
+ .core_note_type = NT_LOONGARCH_LSX,
+ .n = NUM_FPU_REGS,
+ .size = 16,
+ .align = 16,
+ .regset_get = simd_get,
+ .set = simd_set,
+ },
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
+ [REGSET_LASX] = {
+ .core_note_type = NT_LOONGARCH_LASX,
+ .n = NUM_FPU_REGS,
+ .size = 32,
+ .align = 32,
+ .regset_get = simd_get,
+ .set = simd_set,
+ },
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
.core_note_type = NT_LOONGARCH_HW_BREAK,
diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c
index 8f5b7986374b..ceb899366c0a 100644
--- a/arch/loongarch/kernel/signal.c
+++ b/arch/loongarch/kernel/signal.c
@@ -50,6 +50,14 @@ extern asmlinkage int
_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr);
extern asmlinkage int
_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr);
+extern asmlinkage int
+_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
+extern asmlinkage int
+_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
+extern asmlinkage int
+_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
+extern asmlinkage int
+_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
struct rt_sigframe {
struct siginfo rs_info;
@@ -65,6 +73,8 @@ struct extctx_layout {
unsigned long size;
unsigned int flags;
struct _ctx_layout fpu;
+ struct _ctx_layout lsx;
+ struct _ctx_layout lasx;
struct _ctx_layout end;
};
@@ -115,6 +125,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx)
return err;
}
+static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx)
+{
+ int i;
+ int err = 0;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 0),
+ &regs[2*i]);
+ err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
+ &regs[2*i+1]);
+ }
+ err |= __put_user(current->thread.fpu.fcc, fcc);
+ err |= __put_user(current->thread.fpu.fcsr, fcsr);
+
+ return err;
+}
+
+static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx)
+{
+ int i;
+ int err = 0;
+ u64 fpr_val;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(fpr_val, &regs[2*i]);
+ set_fpr64(&current->thread.fpu.fpr[i], 0, fpr_val);
+ err |= __get_user(fpr_val, &regs[2*i+1]);
+ set_fpr64(&current->thread.fpu.fpr[i], 1, fpr_val);
+ }
+ err |= __get_user(current->thread.fpu.fcc, fcc);
+ err |= __get_user(current->thread.fpu.fcsr, fcsr);
+
+ return err;
+}
+
+static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx)
+{
+ int i;
+ int err = 0;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 0),
+ &regs[4*i]);
+ err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
+ &regs[4*i+1]);
+ err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 2),
+ &regs[4*i+2]);
+ err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 3),
+ &regs[4*i+3]);
+ }
+ err |= __put_user(current->thread.fpu.fcc, fcc);
+ err |= __put_user(current->thread.fpu.fcsr, fcsr);
+
+ return err;
+}
+
+static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
+{
+ int i;
+ int err = 0;
+ u64 fpr_val;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ err |= __get_user(fpr_val, &regs[4*i]);
+ set_fpr64(&current->thread.fpu.fpr[i], 0, fpr_val);
+ err |= __get_user(fpr_val, &regs[4*i+1]);
+ set_fpr64(&current->thread.fpu.fpr[i], 1, fpr_val);
+ err |= __get_user(fpr_val, &regs[4*i+2]);
+ set_fpr64(&current->thread.fpu.fpr[i], 2, fpr_val);
+ err |= __get_user(fpr_val, &regs[4*i+3]);
+ set_fpr64(&current->thread.fpu.fpr[i], 3, fpr_val);
+ }
+ err |= __get_user(current->thread.fpu.fcc, fcc);
+ err |= __get_user(current->thread.fpu.fcsr, fcsr);
+
+ return err;
+}
+
/*
* Wrappers for the assembly _{save,restore}_fp_context functions.
*/
@@ -136,6 +236,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx)
return _restore_fp_context(regs, fcc, fcsr);
}
+static int save_hw_lsx_context(struct lsx_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ return _save_lsx_context(regs, fcc, fcsr);
+}
+
+static int restore_hw_lsx_context(struct lsx_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ return _restore_lsx_context(regs, fcc, fcsr);
+}
+
+static int save_hw_lasx_context(struct lasx_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ return _save_lasx_context(regs, fcc, fcsr);
+}
+
+static int restore_hw_lasx_context(struct lasx_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint64_t __user *fcc = &ctx->fcc;
+ uint32_t __user *fcsr = &ctx->fcsr;
+
+ return _restore_lasx_context(regs, fcc, fcsr);
+}
+
static int fcsr_pending(unsigned int __user *fcsr)
{
int err, sig = 0;
@@ -227,6 +363,162 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx)
return err ?: sig;
}
+static int protected_save_lsx_context(struct extctx_layout *extctx)
+{
+ int err = 0;
+ struct sctx_info __user *info = extctx->lsx.addr;
+ struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs;
+ uint64_t __user *fcc = &lsx_ctx->fcc;
+ uint32_t __user *fcsr = &lsx_ctx->fcsr;
+
+ while (1) {
+ lock_fpu_owner();
+ if (is_lsx_enabled())
+ err = save_hw_lsx_context(lsx_ctx);
+ else {
+ if (is_fpu_owner())
+ save_fp(current);
+ err = copy_lsx_to_sigcontext(lsx_ctx);
+ }
+ unlock_fpu_owner();
+
+ err |= __put_user(LSX_CTX_MAGIC, &info->magic);
+ err |= __put_user(extctx->lsx.size, &info->size);
+
+ if (likely(!err))
+ break;
+ /* Touch the LSX context and try again */
+ err = __put_user(0, &regs[0]) |
+ __put_user(0, &regs[32*2-1]) |
+ __put_user(0, fcc) |
+ __put_user(0, fcsr);
+ if (err)
+ return err; /* really bad sigcontext */
+ }
+
+ return err;
+}
+
+static int protected_restore_lsx_context(struct extctx_layout *extctx)
+{
+ int err = 0, sig = 0, tmp __maybe_unused;
+ struct sctx_info __user *info = extctx->lsx.addr;
+ struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs;
+ uint64_t __user *fcc = &lsx_ctx->fcc;
+ uint32_t __user *fcsr = &lsx_ctx->fcsr;
+
+ err = sig = fcsr_pending(fcsr);
+ if (err < 0)
+ return err;
+
+ while (1) {
+ lock_fpu_owner();
+ if (is_lsx_enabled())
+ err = restore_hw_lsx_context(lsx_ctx);
+ else {
+ err = copy_lsx_from_sigcontext(lsx_ctx);
+ if (is_fpu_owner())
+ restore_fp(current);
+ }
+ unlock_fpu_owner();
+
+ if (likely(!err))
+ break;
+ /* Touch the LSX context and try again */
+ err = __get_user(tmp, &regs[0]) |
+ __get_user(tmp, &regs[32*2-1]) |
+ __get_user(tmp, fcc) |
+ __get_user(tmp, fcsr);
+ if (err)
+ break; /* really bad sigcontext */
+ }
+
+ return err ?: sig;
+}
+
+static int protected_save_lasx_context(struct extctx_layout *extctx)
+{
+ int err = 0;
+ struct sctx_info __user *info = extctx->lasx.addr;
+ struct lasx_context __user *lasx_ctx =
+ (struct lasx_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs;
+ uint64_t __user *fcc = &lasx_ctx->fcc;
+ uint32_t __user *fcsr = &lasx_ctx->fcsr;
+
+ while (1) {
+ lock_fpu_owner();
+ if (is_lasx_enabled())
+ err = save_hw_lasx_context(lasx_ctx);
+ else {
+ if (is_lsx_enabled())
+ save_lsx(current);
+ else if (is_fpu_owner())
+ save_fp(current);
+ err = copy_lasx_to_sigcontext(lasx_ctx);
+ }
+ unlock_fpu_owner();
+
+ err |= __put_user(LASX_CTX_MAGIC, &info->magic);
+ err |= __put_user(extctx->lasx.size, &info->size);
+
+ if (likely(!err))
+ break;
+ /* Touch the LASX context and try again */
+ err = __put_user(0, &regs[0]) |
+ __put_user(0, &regs[32*4-1]) |
+ __put_user(0, fcc) |
+ __put_user(0, fcsr);
+ if (err)
+ return err; /* really bad sigcontext */
+ }
+
+ return err;
+}
+
+static int protected_restore_lasx_context(struct extctx_layout *extctx)
+{
+ int err = 0, sig = 0, tmp __maybe_unused;
+ struct sctx_info __user *info = extctx->lasx.addr;
+ struct lasx_context __user *lasx_ctx =
+ (struct lasx_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs;
+ uint64_t __user *fcc = &lasx_ctx->fcc;
+ uint32_t __user *fcsr = &lasx_ctx->fcsr;
+
+ err = sig = fcsr_pending(fcsr);
+ if (err < 0)
+ return err;
+
+ while (1) {
+ lock_fpu_owner();
+ if (is_lasx_enabled())
+ err = restore_hw_lasx_context(lasx_ctx);
+ else {
+ err = copy_lasx_from_sigcontext(lasx_ctx);
+ if (is_lsx_enabled())
+ restore_lsx(current);
+ else if (is_fpu_owner())
+ restore_fp(current);
+ }
+ unlock_fpu_owner();
+
+ if (likely(!err))
+ break;
+ /* Touch the LASX context and try again */
+ err = __get_user(tmp, &regs[0]) |
+ __get_user(tmp, &regs[32*4-1]) |
+ __get_user(tmp, fcc) |
+ __get_user(tmp, fcsr);
+ if (err)
+ break; /* really bad sigcontext */
+ }
+
+ return err ?: sig;
+}
+
static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
struct extctx_layout *extctx)
{
@@ -240,7 +532,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
for (i = 1; i < 32; i++)
err |= __put_user(regs->regs[i], &sc->sc_regs[i]);
- if (extctx->fpu.addr)
+ if (extctx->lasx.addr)
+ err |= protected_save_lasx_context(extctx);
+ else if (extctx->lsx.addr)
+ err |= protected_save_lsx_context(extctx);
+ else if (extctx->fpu.addr)
err |= protected_save_fpu_context(extctx);
/* Set the "end" magic */
@@ -274,6 +570,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
extctx->fpu.addr = info;
break;
+ case LSX_CTX_MAGIC:
+ if (size < (sizeof(struct sctx_info) +
+ sizeof(struct lsx_context)))
+ goto invalid;
+ extctx->lsx.addr = info;
+ break;
+
+ case LASX_CTX_MAGIC:
+ if (size < (sizeof(struct sctx_info) +
+ sizeof(struct lasx_context)))
+ goto invalid;
+ extctx->lasx.addr = info;
+ break;
+
default:
goto invalid;
}
@@ -319,7 +629,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
for (i = 1; i < 32; i++)
err |= __get_user(regs->regs[i], &sc->sc_regs[i]);
- if (extctx.fpu.addr)
+ if (extctx.lasx.addr)
+ err |= protected_restore_lasx_context(&extctx);
+ else if (extctx.lsx.addr)
+ err |= protected_restore_lsx_context(&extctx);
+ else if (extctx.fpu.addr)
err |= protected_restore_fpu_context(&extctx);
bad:
@@ -375,7 +689,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
extctx->size += extctx->end.size;
if (extctx->flags & SC_USED_FP) {
- if (cpu_has_fpu)
+ if (cpu_has_lasx && thread_lasx_context_live())
+ new_sp = extframe_alloc(extctx, &extctx->lasx,
+ sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp);
+ else if (cpu_has_lsx && thread_lsx_context_live())
+ new_sp = extframe_alloc(extctx, &extctx->lsx,
+ sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp);
+ else if (cpu_has_fpu)
new_sp = extframe_alloc(extctx, &extctx->fpu,
sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
}
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index e73d9bbe1658..e56df45f7202 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -852,12 +852,67 @@ static void init_restore_fp(void)
BUG_ON(!is_fp_enabled());
}
+static void init_restore_lsx(void)
+{
+ enable_lsx();
+
+ if (!thread_lsx_context_live()) {
+ /* First time LSX context user */
+ init_restore_fp();
+ init_lsx_upper();
+ set_thread_flag(TIF_LSX_CTX_LIVE);
+ } else {
+ if (!is_simd_owner()) {
+ if (is_fpu_owner()) {
+ restore_lsx_upper(current);
+ } else {
+ __own_fpu();
+ restore_lsx(current);
+ }
+ }
+ }
+
+ set_thread_flag(TIF_USEDSIMD);
+
+ BUG_ON(!is_fp_enabled());
+ BUG_ON(!is_lsx_enabled());
+}
+
+static void init_restore_lasx(void)
+{
+ enable_lasx();
+
+ if (!thread_lasx_context_live()) {
+ /* First time LASX context user */
+ init_restore_lsx();
+ init_lasx_upper();
+ set_thread_flag(TIF_LASX_CTX_LIVE);
+ } else {
+ if (is_fpu_owner() || is_simd_owner()) {
+ init_restore_lsx();
+ restore_lasx_upper(current);
+ } else {
+ __own_fpu();
+ enable_lsx();
+ restore_lasx(current);
+ }
+ }
+
+ set_thread_flag(TIF_USEDSIMD);
+
+ BUG_ON(!is_fp_enabled());
+ BUG_ON(!is_lsx_enabled());
+ BUG_ON(!is_lasx_enabled());
+}
+
asmlinkage void noinstr do_fpu(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
die_if_kernel("do_fpu invoked from kernel context!", regs);
+ BUG_ON(is_lsx_enabled());
+ BUG_ON(is_lasx_enabled());
preempt_disable();
init_restore_fp();
@@ -872,9 +927,20 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
- force_sig(SIGILL);
- local_irq_disable();
+ if (!cpu_has_lsx) {
+ force_sig(SIGILL);
+ goto out;
+ }
+
+ die_if_kernel("do_lsx invoked from kernel context!", regs);
+ BUG_ON(is_lasx_enabled());
+ preempt_disable();
+ init_restore_lsx();
+ preempt_enable();
+
+out:
+ local_irq_disable();
irqentry_exit(regs, state);
}
@@ -883,9 +949,19 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
- force_sig(SIGILL);
- local_irq_disable();
+ if (!cpu_has_lasx) {
+ force_sig(SIGILL);
+ goto out;
+ }
+
+ die_if_kernel("do_lasx invoked from kernel context!", regs);
+ preempt_disable();
+ init_restore_lasx();
+ preempt_enable();
+
+out:
+ local_irq_disable();
irqentry_exit(regs, state);
}