summaryrefslogtreecommitdiff
path: root/arch/s390/include/asm/fpu-insn.h
diff options
context:
space:
mode:
authorHeiko Carstens <hca@linux.ibm.com>2024-02-03 13:45:07 +0300
committerHeiko Carstens <hca@linux.ibm.com>2024-02-16 16:30:15 +0300
commit3a5866a001e83e1aa143fc0aeba0248247483962 (patch)
tree9439898f3b6e59c909d28315e5af297b4817bc13 /arch/s390/include/asm/fpu-insn.h
parentf4e3de75d0c4ebe9bbbfef19d7845ee70cb017bd (diff)
downloadlinux-3a5866a001e83e1aa143fc0aeba0248247483962.tar.xz
s390/fpu: provide and use vlm and vstm inline assemblies
Instead of open-coding vlm and vstm inline assemblies at several locations, provide an fpu_* function for each instruction, and use them in the new save_vx_regs() and load_vx_regs() helper functions. Note that "O" and "R" inline assembly operand modifiers are used in order to pass the displacement and base register of the memory operands to the existing VLM and VSTM macros. The two operand modifiers are not available for clang. Therefore provide two variants of each inline assembly. The clang variant always uses and clobbers general purpose register 1, like in the previous inline assemblies, so it can be used as base register with a zero displacement. This generates slightly less efficient code, but can be removed as soon as clang has support for the used operand modifiers. Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/fpu-insn.h')
-rw-r--r--arch/s390/include/asm/fpu-insn.h70
1 files changed, 70 insertions, 0 deletions
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index df2cad95b598..538201864a47 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -108,5 +108,75 @@ static __always_inline void fpu_stfpc(unsigned int *fpc)
: "memory");
}
+#ifdef CONFIG_CC_IS_CLANG
+
+#define fpu_vlm(_v1, _v3, _vxrs) do { \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_read(_v, size); \
+ asm volatile("\n" \
+ " la 1,%[vxrs]\n" \
+ " VLM %[v1],%[v3],0,1\n" \
+ : \
+ : [vxrs] "R" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
+} while (0)
+
+#else /* CONFIG_CC_IS_CLANG */
+
+#define fpu_vlm(_v1, _v3, _vxrs) do { \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_read(_v, size); \
+ asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : \
+ : [vxrs] "Q" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
+} while (0)
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+#define fpu_vstm(_v1, _v3, _vxrs) do { \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_write(_v, size); \
+ asm volatile("\n" \
+ " la 1,%[vxrs]\n" \
+ " VSTM %[v1],%[v3],0,1\n" \
+ : [vxrs] "=R" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
+} while (0)
+
+#else /* CONFIG_CC_IS_CLANG */
+
+#define fpu_vstm(_v1, _v3, _vxrs) do { \
+ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \
+ struct { \
+ __vector128 _v[(_v3) - (_v1) + 1]; \
+ } *_v = (void *)(_vxrs); \
+ \
+ instrument_write(_v, size); \
+ asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : [vxrs] "=Q" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
+} while (0)
+
+#endif /* CONFIG_CC_IS_CLANG */
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_FPU_INSN_H */