From 8d16ce148858669f05b8e117a0634010397e17d6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 5 Feb 2024 16:48:44 +0100 Subject: s390/fpu: make use of __uninitialized macro Code sections in s390 specific kernel code which use floating point or vector registers all come with a 520 byte stack variable to save already in use registers, if required. With INIT_STACK_ALL_PATTERN or INIT_STACK_ALL_ZERO enabled this variable will always be initialized on function entry in addition to saving register contents, which contradicts the intention (performance improvement) of such code sections. Therefore provide a DECLARE_KERNEL_FPU_ONSTACK() macro which provides struct kernel_fpu variables with an __uninitialized attribute, and convert all existing code to use this. This way only this specific type of stack variable will not be initialized, regardless of config options. Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20240205154844.3757121-3-hca@linux.ibm.com Signed-off-by: Heiko Carstens --- lib/raid6/s390vx.uc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index cd0b9e95f499..7b0b355e1a26 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -81,7 +81,7 @@ static inline void COPY_VEC(int x, int y) static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) { - struct kernel_fpu vxstate; + DECLARE_KERNEL_FPU_ONSTACK(vxstate); u8 **dptr, *p, *q; int d, z, z0; @@ -114,7 +114,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, size_t bytes, void **ptrs) { - struct kernel_fpu vxstate; + DECLARE_KERNEL_FPU_ONSTACK(vxstate); u8 **dptr, *p, *q; int d, z, z0; -- cgit v1.2.3 From fd2527f20915d041e838b6e4a08122dbc73c7abc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:02 +0100 Subject: s390/fpu: move, rename, and merge header files Move, rename, and merge the fpu and vx header files. This way fpu header files have a consistent naming scheme (fpu*.h). Also get rid of the fpu subdirectory and move header files to asm directory, so that all fpu and vx header files can be found at the same location. Merge internal.h header file into other header files, since the internal helpers are used at many locations. so those helper functions are really not internal. Signed-off-by: Heiko Carstens --- arch/s390/crypto/chacha-glue.c | 2 +- arch/s390/crypto/chacha-s390.S | 2 +- arch/s390/crypto/crc32-vx.c | 2 +- arch/s390/crypto/crc32be-vx.S | 2 +- arch/s390/crypto/crc32le-vx.S | 2 +- arch/s390/include/asm/asm-prototypes.h | 2 +- arch/s390/include/asm/entry-common.h | 2 +- arch/s390/include/asm/fpu-insn-asm.h | 703 +++++++++++++++++++++++++++++++++ arch/s390/include/asm/fpu-insn.h | 42 ++ arch/s390/include/asm/fpu-types.h | 43 ++ arch/s390/include/asm/fpu.h | 155 ++++++++ arch/s390/include/asm/fpu/api.h | 127 ------ arch/s390/include/asm/fpu/internal.h | 65 --- arch/s390/include/asm/fpu/types.h | 43 -- arch/s390/include/asm/kvm_host.h | 2 +- arch/s390/include/asm/processor.h | 3 +- arch/s390/include/asm/vx-insn-asm.h | 703 --------------------------------- arch/s390/include/asm/vx-insn.h | 19 - arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/crash_dump.c | 2 +- arch/s390/kernel/early.c | 2 +- arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/fpu.c | 4 +- arch/s390/kernel/machine_kexec.c | 1 + arch/s390/kernel/nmi.c | 3 +- arch/s390/kernel/perf_regs.c | 3 +- arch/s390/kernel/process.c | 1 + arch/s390/kernel/ptrace.c | 2 +- arch/s390/kernel/smp.c | 1 + arch/s390/kernel/sysinfo.c | 2 +- arch/s390/kernel/traps.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/vsie.c | 2 +- lib/raid6/s390vx.uc | 3 +- 34 files changed, 968 insertions(+), 985 deletions(-) create mode 100644 arch/s390/include/asm/fpu-insn-asm.h create mode 100644 arch/s390/include/asm/fpu-insn.h create mode 100644 arch/s390/include/asm/fpu-types.h create mode 100644 arch/s390/include/asm/fpu.h delete mode 100644 arch/s390/include/asm/fpu/api.h delete mode 100644 arch/s390/include/asm/fpu/internal.h delete mode 100644 arch/s390/include/asm/fpu/types.h delete mode 100644 arch/s390/include/asm/vx-insn-asm.h delete mode 100644 arch/s390/include/asm/vx-insn.h (limited to 'lib') diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index a823132fc563..97098add2079 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "chacha-s390.h" static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src, diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S index 37cb63f25b17..63f3102678c0 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/crypto/chacha-s390.S @@ -8,7 +8,7 @@ #include #include -#include +#include #define SP %r15 #define FRAME (16 * 8 + 4 * 8) diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 6ae3e3ff5b0a..dc2997f18e30 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #define CRC32_BLOCK_SIZE 1 diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S index 34ee47926891..f2dc8a688afb 100644 --- a/arch/s390/crypto/crc32be-vx.S +++ b/arch/s390/crypto/crc32be-vx.S @@ -14,7 +14,7 @@ #include #include -#include +#include /* Vector register range containing CRC-32 constants */ #define CONST_R1R2 %v9 diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S index 5a819ae09a0b..df19e06ff8bc 100644 --- a/arch/s390/crypto/crc32le-vx.S +++ b/arch/s390/crypto/crc32le-vx.S @@ -15,7 +15,7 @@ #include #include -#include +#include /* Vector register range containing CRC-32 constants */ #define CONST_PERM_LE2BE %v9 diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h index a873e873e1ee..56096ae26f29 100644 --- a/arch/s390/include/asm/asm-prototypes.h +++ b/arch/s390/include/asm/asm-prototypes.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include __int128_t __ashlti3(__int128_t a, int b); diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index fdd319a622b0..a1dbab19c0bd 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h new file mode 100644 index 000000000000..789d626599ee --- /dev/null +++ b/arch/s390/include/asm/fpu-insn-asm.h @@ -0,0 +1,703 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for Vector Instructions + * + * Assembler macros to generate .byte/.word code for particular + * vector instructions that are supported by recent binutils (>= 2.26) only. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#ifndef __ASM_S390_FPU_INSN_ASM_H +#define __ASM_S390_FPU_INSN_ASM_H + +#ifndef __ASM_S390_FPU_INSN_H +#error only can be included directly +#endif + +#ifdef __ASSEMBLY__ + +/* Macros to generate vector instruction byte code */ + +/* GR_NUM - Retrieve general-purpose register number + * + * @opd: Operand to store register number + * @r64: String designation register in the format "%rN" + */ +.macro GR_NUM opd gr + \opd = 255 + .ifc \gr,%r0 + \opd = 0 + .endif + .ifc \gr,%r1 + \opd = 1 + .endif + .ifc \gr,%r2 + \opd = 2 + .endif + .ifc \gr,%r3 + \opd = 3 + .endif + .ifc \gr,%r4 + \opd = 4 + .endif + .ifc \gr,%r5 + \opd = 5 + .endif + .ifc \gr,%r6 + \opd = 6 + .endif + .ifc \gr,%r7 + \opd = 7 + .endif + .ifc \gr,%r8 + \opd = 8 + .endif + .ifc \gr,%r9 + \opd = 9 + .endif + .ifc \gr,%r10 + \opd = 10 + .endif + .ifc \gr,%r11 + \opd = 11 + .endif + .ifc \gr,%r12 + \opd = 12 + .endif + .ifc \gr,%r13 + \opd = 13 + .endif + .ifc \gr,%r14 + \opd = 14 + .endif + .ifc \gr,%r15 + \opd = 15 + .endif + .if \opd == 255 + \opd = \gr + .endif +.endm + +/* VX_NUM - Retrieve vector register number + * + * @opd: Operand to store register number + * @vxr: String designation register in the format "%vN" + * + * The vector register number is used for as input number to the + * instruction and, as well as, to compute the RXB field of the + * instruction. + */ +.macro VX_NUM opd vxr + \opd = 255 + .ifc \vxr,%v0 + \opd = 0 + .endif + .ifc \vxr,%v1 + \opd = 1 + .endif + .ifc \vxr,%v2 + \opd = 2 + .endif + .ifc \vxr,%v3 + \opd = 3 + .endif + .ifc \vxr,%v4 + \opd = 4 + .endif + .ifc \vxr,%v5 + \opd = 5 + .endif + .ifc \vxr,%v6 + \opd = 6 + .endif + .ifc \vxr,%v7 + \opd = 7 + .endif + .ifc \vxr,%v8 + \opd = 8 + .endif + .ifc \vxr,%v9 + \opd = 9 + .endif + .ifc \vxr,%v10 + \opd = 10 + .endif + .ifc \vxr,%v11 + \opd = 11 + .endif + .ifc \vxr,%v12 + \opd = 12 + .endif + .ifc \vxr,%v13 + \opd = 13 + .endif + .ifc \vxr,%v14 + \opd = 14 + .endif + .ifc \vxr,%v15 + \opd = 15 + .endif + .ifc \vxr,%v16 + \opd = 16 + .endif + .ifc \vxr,%v17 + \opd = 17 + .endif + .ifc \vxr,%v18 + \opd = 18 + .endif + .ifc \vxr,%v19 + \opd = 19 + .endif + .ifc \vxr,%v20 + \opd = 20 + .endif + .ifc \vxr,%v21 + \opd = 21 + .endif + .ifc \vxr,%v22 + \opd = 22 + .endif + .ifc \vxr,%v23 + \opd = 23 + .endif + .ifc \vxr,%v24 + \opd = 24 + .endif + .ifc \vxr,%v25 + \opd = 25 + .endif + .ifc \vxr,%v26 + \opd = 26 + .endif + .ifc \vxr,%v27 + \opd = 27 + .endif + .ifc \vxr,%v28 + \opd = 28 + .endif + .ifc \vxr,%v29 + \opd = 29 + .endif + .ifc \vxr,%v30 + \opd = 30 + .endif + .ifc \vxr,%v31 + \opd = 31 + .endif + .if \opd == 255 + \opd = \vxr + .endif +.endm + +/* RXB - Compute most significant bit used vector registers + * + * @rxb: Operand to store computed RXB value + * @v1: Vector register designated operand whose MSB is stored in + * RXB bit 0 (instruction bit 36) and whose remaining bits + * are stored in instruction bits 8-11. + * @v2: Vector register designated operand whose MSB is stored in + * RXB bit 1 (instruction bit 37) and whose remaining bits + * are stored in instruction bits 12-15. + * @v3: Vector register designated operand whose MSB is stored in + * RXB bit 2 (instruction bit 38) and whose remaining bits + * are stored in instruction bits 16-19. + * @v4: Vector register designated operand whose MSB is stored in + * RXB bit 3 (instruction bit 39) and whose remaining bits + * are stored in instruction bits 32-35. + * + * Note: In most vector instruction formats [1] V1, V2, V3, and V4 directly + * correspond to @v1, @v2, @v3, and @v4. But there are exceptions, such as but + * not limited to the vector instruction formats VRR-g, VRR-h, VRS-a, VRS-d, + * and VSI. + * + * [1] IBM z/Architecture Principles of Operation, chapter "Program + * Execution, section "Instructions", subsection "Instruction Formats". + */ +.macro RXB rxb v1 v2=0 v3=0 v4=0 + \rxb = 0 + .if \v1 & 0x10 + \rxb = \rxb | 0x08 + .endif + .if \v2 & 0x10 + \rxb = \rxb | 0x04 + .endif + .if \v3 & 0x10 + \rxb = \rxb | 0x02 + .endif + .if \v4 & 0x10 + \rxb = \rxb | 0x01 + .endif +.endm + +/* MRXB - Generate Element Size Control and RXB value + * + * @m: Element size control + * @v1: First vector register designated operand (for RXB) + * @v2: Second vector register designated operand (for RXB) + * @v3: Third vector register designated operand (for RXB) + * @v4: Fourth vector register designated operand (for RXB) + * + * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro + * description for further details. + */ +.macro MRXB m v1 v2=0 v3=0 v4=0 + rxb = 0 + RXB rxb, \v1, \v2, \v3, \v4 + .byte (\m << 4) | rxb +.endm + +/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields + * + * @m: Element size control + * @opc: Opcode + * @v1: First vector register designated operand (for RXB) + * @v2: Second vector register designated operand (for RXB) + * @v3: Third vector register designated operand (for RXB) + * @v4: Fourth vector register designated operand (for RXB) + * + * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro + * description for further details. + */ +.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0 + MRXB \m, \v1, \v2, \v3, \v4 + .byte \opc +.endm + +/* Vector support instructions */ + +/* VECTOR GENERATE BYTE MASK */ +.macro VGBM vr imm2 + VX_NUM v1, \vr + .word (0xE700 | ((v1&15) << 4)) + .word \imm2 + MRXBOPC 0, 0x44, v1 +.endm +.macro VZERO vxr + VGBM \vxr, 0 +.endm +.macro VONE vxr + VGBM \vxr, 0xFFFF +.endm + +/* VECTOR LOAD VR ELEMENT FROM GR */ +.macro VLVG v, gr, disp, m + VX_NUM v1, \v + GR_NUM b2, "%r0" + GR_NUM r3, \gr + .word 0xE700 | ((v1&15) << 4) | r3 + .word (b2 << 12) | (\disp) + MRXBOPC \m, 0x22, v1 +.endm +.macro VLVGB v, gr, index, base + VLVG \v, \gr, \index, \base, 0 +.endm +.macro VLVGH v, gr, index + VLVG \v, \gr, \index, 1 +.endm +.macro VLVGF v, gr, index + VLVG \v, \gr, \index, 2 +.endm +.macro VLVGG v, gr, index + VLVG \v, \gr, \index, 3 +.endm + +/* VECTOR LOAD REGISTER */ +.macro VLR v1, v2 + VX_NUM v1, \v1 + VX_NUM v2, \v2 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word 0 + MRXBOPC 0, 0x56, v1, v2 +.endm + +/* VECTOR LOAD */ +.macro VL v, disp, index="%r0", base + VX_NUM v1, \v + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | x2 + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x06, v1 +.endm + +/* VECTOR LOAD ELEMENT */ +.macro VLEx vr1, disp, index="%r0", base, m3, opc + VX_NUM v1, \vr1 + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | x2 + .word (b2 << 12) | (\disp) + MRXBOPC \m3, \opc, v1 +.endm +.macro VLEB vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x00 +.endm +.macro VLEH vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x01 +.endm +.macro VLEF vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x03 +.endm +.macro VLEG vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x02 +.endm + +/* VECTOR LOAD ELEMENT IMMEDIATE */ +.macro VLEIx vr1, imm2, m3, opc + VX_NUM v1, \vr1 + .word 0xE700 | ((v1&15) << 4) + .word \imm2 + MRXBOPC \m3, \opc, v1 +.endm +.macro VLEIB vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x40 +.endm +.macro VLEIH vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x41 +.endm +.macro VLEIF vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x43 +.endm +.macro VLEIG vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x42 +.endm + +/* VECTOR LOAD GR FROM VR ELEMENT */ +.macro VLGV gr, vr, disp, base="%r0", m + GR_NUM r1, \gr + GR_NUM b2, \base + VX_NUM v3, \vr + .word 0xE700 | (r1 << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \m, 0x21, 0, v3 +.endm +.macro VLGVB gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 0 +.endm +.macro VLGVH gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 1 +.endm +.macro VLGVF gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 2 +.endm +.macro VLGVG gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 3 +.endm + +/* VECTOR LOAD MULTIPLE */ +.macro VLM vfrom, vto, disp, base, hint=3 + VX_NUM v1, \vfrom + VX_NUM v3, \vto + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \hint, 0x36, v1, v3 +.endm + +/* VECTOR STORE */ +.macro VST vr1, disp, index="%r0", base + VX_NUM v1, \vr1 + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (x2&15) + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x0E, v1 +.endm + +/* VECTOR STORE MULTIPLE */ +.macro VSTM vfrom, vto, disp, base, hint=3 + VX_NUM v1, \vfrom + VX_NUM v3, \vto + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \hint, 0x3E, v1, v3 +.endm + +/* VECTOR PERMUTE */ +.macro VPERM vr1, vr2, vr3, vr4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + VX_NUM v4, \vr4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4 +.endm + +/* VECTOR UNPACK LOGICAL LOW */ +.macro VUPLL vr1, vr2, m3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word 0x0000 + MRXBOPC \m3, 0xD4, v1, v2 +.endm +.macro VUPLLB vr1, vr2 + VUPLL \vr1, \vr2, 0 +.endm +.macro VUPLLH vr1, vr2 + VUPLL \vr1, \vr2, 1 +.endm +.macro VUPLLF vr1, vr2 + VUPLL \vr1, \vr2, 2 +.endm + +/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ +.macro VPDI vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x84, v1, v2, v3 +.endm + +/* VECTOR REPLICATE */ +.macro VREP vr1, vr3, imm2, m4 + VX_NUM v1, \vr1 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word \imm2 + MRXBOPC \m4, 0x4D, v1, v3 +.endm +.macro VREPB vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 0 +.endm +.macro VREPH vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 1 +.endm +.macro VREPF vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 2 +.endm +.macro VREPG vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 3 +.endm + +/* VECTOR MERGE HIGH */ +.macro VMRH vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x61, v1, v2, v3 +.endm +.macro VMRHB vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 0 +.endm +.macro VMRHH vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 1 +.endm +.macro VMRHF vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 2 +.endm +.macro VMRHG vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR MERGE LOW */ +.macro VMRL vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x60, v1, v2, v3 +.endm +.macro VMRLB vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 0 +.endm +.macro VMRLH vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 1 +.endm +.macro VMRLF vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 2 +.endm +.macro VMRLG vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 3 +.endm + + +/* Vector integer instructions */ + +/* VECTOR AND */ +.macro VN vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x68, v1, v2, v3 +.endm + +/* VECTOR EXCLUSIVE OR */ +.macro VX vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x6D, v1, v2, v3 +.endm + +/* VECTOR GALOIS FIELD MULTIPLY SUM */ +.macro VGFM vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0xB4, v1, v2, v3 +.endm +.macro VGFMB vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 0 +.endm +.macro VGFMH vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 1 +.endm +.macro VGFMF vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 2 +.endm +.macro VGFMG vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */ +.macro VGFMA vr1, vr2, vr3, vr4, m5 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + VX_NUM v4, \vr4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) | (\m5 << 8) + MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4 +.endm +.macro VGFMAB vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 0 +.endm +.macro VGFMAH vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 1 +.endm +.macro VGFMAF vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 2 +.endm +.macro VGFMAG vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 3 +.endm + +/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ +.macro VSRLB vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x7D, v1, v2, v3 +.endm + +/* VECTOR REPLICATE IMMEDIATE */ +.macro VREPI vr1, imm2, m3 + VX_NUM v1, \vr1 + .word 0xE700 | ((v1&15) << 4) + .word \imm2 + MRXBOPC \m3, 0x45, v1 +.endm +.macro VREPIB vr1, imm2 + VREPI \vr1, \imm2, 0 +.endm +.macro VREPIH vr1, imm2 + VREPI \vr1, \imm2, 1 +.endm +.macro VREPIF vr1, imm2 + VREPI \vr1, \imm2, 2 +.endm +.macro VREPIG vr1, imm2 + VREP \vr1, \imm2, 3 +.endm + +/* VECTOR ADD */ +.macro VA vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0xF3, v1, v2, v3 +.endm +.macro VAB vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 0 +.endm +.macro VAH vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 1 +.endm +.macro VAF vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 2 +.endm +.macro VAG vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 3 +.endm +.macro VAQ vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 4 +.endm + +/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ +.macro VESRAV vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x7A, v1, v2, v3 +.endm + +.macro VESRAVB vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 0 +.endm +.macro VESRAVH vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 1 +.endm +.macro VESRAVF vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 2 +.endm +.macro VESRAVG vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR ELEMENT ROTATE LEFT LOGICAL */ +.macro VERLL vr1, vr3, disp, base="%r0", m4 + VX_NUM v1, \vr1 + VX_NUM v3, \vr3 + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \m4, 0x33, v1, v3 +.endm +.macro VERLLB vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 0 +.endm +.macro VERLLH vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 1 +.endm +.macro VERLLF vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 2 +.endm +.macro VERLLG vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 3 +.endm + +/* VECTOR SHIFT LEFT DOUBLE BY BYTE */ +.macro VSLDB vr1, vr2, vr3, imm4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) | (\imm4) + MRXBOPC 0, 0x77, v1, v2, v3 +.endm + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_FPU_INSN_ASM_H */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h new file mode 100644 index 000000000000..32d2894b60ca --- /dev/null +++ b/arch/s390/include/asm/fpu-insn.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for Floating Point and Vector Instructions + * + */ + +#ifndef __ASM_S390_FPU_INSN_H +#define __ASM_S390_FPU_INSN_H + +#include + +#ifndef __ASSEMBLY__ + +#include + +asm(".include \"asm/fpu-insn-asm.h\"\n"); + +/** + * sfpc_safe - Set floating point control register safely. + * @fpc: new value for floating point control register + * + * Set floating point control register. This may lead to an exception, + * since a saved value may have been modified by user space (ptrace, + * signal return, kvm registers) to an invalid value. In such a case + * set the floating point control register to zero. + */ +static inline void sfpc_safe(u32 fpc) +{ + asm volatile("\n" + "0: sfpc %[fpc]\n" + "1: nopr %%r7\n" + ".pushsection .fixup, \"ax\"\n" + "2: lghi %[fpc],0\n" + " jg 0b\n" + ".popsection\n" + EX_TABLE(1b, 2b) + : [fpc] "+d" (fpc) + : : "memory"); +} + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_FPU_INSN_H */ diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h new file mode 100644 index 000000000000..1caaf31209fc --- /dev/null +++ b/arch/s390/include/asm/fpu-types.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * FPU data structures + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#ifndef _ASM_S390_FPU_TYPES_H +#define _ASM_S390_FPU_TYPES_H + +#include + +struct fpu { + __u32 fpc; /* Floating-point control */ + void *regs; /* Pointer to the current save area */ + union { + /* Floating-point register save area */ + freg_t fprs[__NUM_FPRS]; + /* Vector register save area */ + __vector128 vxrs[__NUM_VXRS]; + }; +}; + +/* VX array structure for address operand constraints in inline assemblies */ +struct vx_array { + __vector128 _[__NUM_VXRS]; +}; + +/* In-kernel FPU state structure */ +struct kernel_fpu { + u32 mask; + u32 fpc; + union { + freg_t fprs[__NUM_FPRS]; + __vector128 vxrs[__NUM_VXRS]; + }; +}; + +#define DECLARE_KERNEL_FPU_ONSTACK(name) \ + struct kernel_fpu name __uninitialized + +#endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h new file mode 100644 index 000000000000..eed430c868df --- /dev/null +++ b/arch/s390/include/asm/fpu.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * In-kernel FPU support functions + * + * + * Consider these guidelines before using in-kernel FPU functions: + * + * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel + * use of floating-point or vector registers and instructions. + * + * 2. For kernel_fpu_begin(), specify the vector register range you want to + * use with the KERNEL_VXR_* constants. Consider these usage guidelines: + * + * a) If your function typically runs in process-context, use the lower + * half of the vector registers, for example, specify KERNEL_VXR_LOW. + * b) If your function typically runs in soft-irq or hard-irq context, + * prefer using the upper half of the vector registers, for example, + * specify KERNEL_VXR_HIGH. + * + * If you adhere to these guidelines, an interrupted process context + * does not require to save and restore vector registers because of + * disjoint register ranges. + * + * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions + * includes logic to save and restore up to 16 vector registers at once. + * + * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different + * struct kernel_fpu states. Vector registers that are in use by outer + * levels are saved and restored. You can minimize the save and restore + * effort by choosing disjoint vector register ranges. + * + * 5. To use vector floating-point instructions, specify the KERNEL_FPC + * flag to save and restore floating-point controls in addition to any + * vector register range. + * + * 6. To use floating-point registers and instructions only, specify the + * KERNEL_FPR flag. This flag triggers a save and restore of vector + * registers V0 to V15 and floating-point controls. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#ifndef _ASM_S390_FPU_H +#define _ASM_S390_FPU_H + +#include +#include +#include +#include +#include +#include +#include + +static inline bool cpu_has_vx(void) +{ + return likely(test_facility(129)); +} + +void save_fpu_regs(void); +void load_fpu_regs(void); +void __load_fpu_regs(void); + +#define KERNEL_FPC 1 +#define KERNEL_VXR_V0V7 2 +#define KERNEL_VXR_V8V15 4 +#define KERNEL_VXR_V16V23 8 +#define KERNEL_VXR_V24V31 16 + +#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7 | KERNEL_VXR_V8V15) +#define KERNEL_VXR_MID (KERNEL_VXR_V8V15 | KERNEL_VXR_V16V23) +#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31) + +#define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) +#define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) + +/* + * Note the functions below must be called with preemption disabled. + * Do not enable preemption before calling __kernel_fpu_end() to prevent + * an corruption of an existing kernel FPU state. + * + * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. + */ +void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); +void __kernel_fpu_end(struct kernel_fpu *state, u32 flags); + +static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) +{ + preempt_disable(); + state->mask = S390_lowcore.fpu_flags; + if (!test_cpu_flag(CIF_FPU)) { + /* Save user space FPU state and register contents */ + save_fpu_regs(); + } else if (state->mask & flags) { + /* Save FPU/vector register in-use by the kernel */ + __kernel_fpu_begin(state, flags); + } + S390_lowcore.fpu_flags |= flags; +} + +static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags) +{ + S390_lowcore.fpu_flags = state->mask; + if (state->mask & flags) { + /* Restore FPU/vector register in-use by the kernel */ + __kernel_fpu_end(state, flags); + } + preempt_enable(); +} + +static inline void save_vx_regs(__vector128 *vxrs) +{ + asm volatile("\n" + " la 1,%0\n" + " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + : "=Q" (*(struct vx_array *)vxrs) : : "1"); +} + +static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) +{ + int i; + + for (i = 0; i < __NUM_FPRS; i++) + fprs[i].ui = vxrs[i].high; +} + +static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) +{ + int i; + + for (i = 0; i < __NUM_FPRS; i++) + vxrs[i].high = fprs[i].ui; +} + +static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) +{ + fpregs->pad = 0; + fpregs->fpc = fpu->fpc; + if (cpu_has_vx()) + convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); + else + memcpy((freg_t *)&fpregs->fprs, fpu->fprs, sizeof(fpregs->fprs)); +} + +static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) +{ + fpu->fpc = fpregs->fpc; + if (cpu_has_vx()) + convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); + else + memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, sizeof(fpregs->fprs)); +} + +#endif /* _ASM_S390_FPU_H */ diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h deleted file mode 100644 index 24da55d33129..000000000000 --- a/arch/s390/include/asm/fpu/api.h +++ /dev/null @@ -1,127 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * In-kernel FPU support functions - * - * - * Consider these guidelines before using in-kernel FPU functions: - * - * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel - * use of floating-point or vector registers and instructions. - * - * 2. For kernel_fpu_begin(), specify the vector register range you want to - * use with the KERNEL_VXR_* constants. Consider these usage guidelines: - * - * a) If your function typically runs in process-context, use the lower - * half of the vector registers, for example, specify KERNEL_VXR_LOW. - * b) If your function typically runs in soft-irq or hard-irq context, - * prefer using the upper half of the vector registers, for example, - * specify KERNEL_VXR_HIGH. - * - * If you adhere to these guidelines, an interrupted process context - * does not require to save and restore vector registers because of - * disjoint register ranges. - * - * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions - * includes logic to save and restore up to 16 vector registers at once. - * - * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different - * struct kernel_fpu states. Vector registers that are in use by outer - * levels are saved and restored. You can minimize the save and restore - * effort by choosing disjoint vector register ranges. - * - * 5. To use vector floating-point instructions, specify the KERNEL_FPC - * flag to save and restore floating-point controls in addition to any - * vector register range. - * - * 6. To use floating-point registers and instructions only, specify the - * KERNEL_FPR flag. This flag triggers a save and restore of vector - * registers V0 to V15 and floating-point controls. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef _ASM_S390_FPU_API_H -#define _ASM_S390_FPU_API_H - -#include -#include -#include - -void save_fpu_regs(void); -void load_fpu_regs(void); -void __load_fpu_regs(void); - -/** - * sfpc_safe - Set floating point control register safely. - * @fpc: new value for floating point control register - * - * Set floating point control register. This may lead to an exception, - * since a saved value may have been modified by user space (ptrace, - * signal return, kvm registers) to an invalid value. In such a case - * set the floating point control register to zero. - */ -static inline void sfpc_safe(u32 fpc) -{ - asm volatile("\n" - "0: sfpc %[fpc]\n" - "1: nopr %%r7\n" - ".pushsection .fixup, \"ax\"\n" - "2: lghi %[fpc],0\n" - " jg 0b\n" - ".popsection\n" - EX_TABLE(1b, 2b) - : [fpc] "+d" (fpc) - : : "memory"); -} - -#define KERNEL_FPC 1 -#define KERNEL_VXR_V0V7 2 -#define KERNEL_VXR_V8V15 4 -#define KERNEL_VXR_V16V23 8 -#define KERNEL_VXR_V24V31 16 - -#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7 | KERNEL_VXR_V8V15) -#define KERNEL_VXR_MID (KERNEL_VXR_V8V15 | KERNEL_VXR_V16V23) -#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31) - -#define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) -#define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) - -struct kernel_fpu; - -/* - * Note the functions below must be called with preemption disabled. - * Do not enable preemption before calling __kernel_fpu_end() to prevent - * an corruption of an existing kernel FPU state. - * - * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. - */ -void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); -void __kernel_fpu_end(struct kernel_fpu *state, u32 flags); - -static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) -{ - preempt_disable(); - state->mask = S390_lowcore.fpu_flags; - if (!test_cpu_flag(CIF_FPU)) { - /* Save user space FPU state and register contents */ - save_fpu_regs(); - } else if (state->mask & flags) { - /* Save FPU/vector register in-use by the kernel */ - __kernel_fpu_begin(state, flags); - } - S390_lowcore.fpu_flags |= flags; -} - -static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags) -{ - S390_lowcore.fpu_flags = state->mask; - if (state->mask & flags) { - /* Restore FPU/vector register in-use by the kernel */ - __kernel_fpu_end(state, flags); - } - preempt_enable(); -} - -#endif /* _ASM_S390_FPU_API_H */ diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h deleted file mode 100644 index d7c0a100a444..000000000000 --- a/arch/s390/include/asm/fpu/internal.h +++ /dev/null @@ -1,65 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * FPU state and register content conversion primitives - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef _ASM_S390_FPU_INTERNAL_H -#define _ASM_S390_FPU_INTERNAL_H - -#include -#include -#include - -static inline bool cpu_has_vx(void) -{ - return likely(test_facility(129)); -} - -static inline void save_vx_regs(__vector128 *vxrs) -{ - asm volatile("\n" - " la 1,%0\n" - " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ - : "=Q" (*(struct vx_array *)vxrs) : : "1"); -} - -static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) -{ - int i; - - for (i = 0; i < __NUM_FPRS; i++) - fprs[i].ui = vxrs[i].high; -} - -static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) -{ - int i; - - for (i = 0; i < __NUM_FPRS; i++) - vxrs[i].high = fprs[i].ui; -} - -static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) -{ - fpregs->pad = 0; - fpregs->fpc = fpu->fpc; - if (cpu_has_vx()) - convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); - else - memcpy((freg_t *)&fpregs->fprs, fpu->fprs, sizeof(fpregs->fprs)); -} - -static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) -{ - fpu->fpc = fpregs->fpc; - if (cpu_has_vx()) - convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); - else - memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, sizeof(fpregs->fprs)); -} - -#endif /* _ASM_S390_FPU_INTERNAL_H */ diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h deleted file mode 100644 index 1caaf31209fc..000000000000 --- a/arch/s390/include/asm/fpu/types.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * FPU data structures - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef _ASM_S390_FPU_TYPES_H -#define _ASM_S390_FPU_TYPES_H - -#include - -struct fpu { - __u32 fpc; /* Floating-point control */ - void *regs; /* Pointer to the current save area */ - union { - /* Floating-point register save area */ - freg_t fprs[__NUM_FPRS]; - /* Vector register save area */ - __vector128 vxrs[__NUM_VXRS]; - }; -}; - -/* VX array structure for address operand constraints in inline assemblies */ -struct vx_array { - __vector128 _[__NUM_VXRS]; -}; - -/* In-kernel FPU state structure */ -struct kernel_fpu { - u32 mask; - u32 fpc; - union { - freg_t fprs[__NUM_FPRS]; - __vector128 vxrs[__NUM_VXRS]; - }; -}; - -#define DECLARE_KERNEL_FPU_ONSTACK(name) \ - struct kernel_fpu name __uninitialized - -#endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 52664105a473..e336715eb7d2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c0b6e74d899a..a422a2cf9d05 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -33,13 +33,12 @@ #include #include #include +#include #include #include #include #include #include -#include -#include #include typedef long (*sys_call_ptr_t)(struct pt_regs *regs); diff --git a/arch/s390/include/asm/vx-insn-asm.h b/arch/s390/include/asm/vx-insn-asm.h deleted file mode 100644 index 0d9f64b80127..000000000000 --- a/arch/s390/include/asm/vx-insn-asm.h +++ /dev/null @@ -1,703 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Support for Vector Instructions - * - * Assembler macros to generate .byte/.word code for particular - * vector instructions that are supported by recent binutils (>= 2.26) only. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef __ASM_S390_VX_INSN_INTERNAL_H -#define __ASM_S390_VX_INSN_INTERNAL_H - -#ifndef __ASM_S390_VX_INSN_H -#error only can be included directly -#endif - -#ifdef __ASSEMBLY__ - -/* Macros to generate vector instruction byte code */ - -/* GR_NUM - Retrieve general-purpose register number - * - * @opd: Operand to store register number - * @r64: String designation register in the format "%rN" - */ -.macro GR_NUM opd gr - \opd = 255 - .ifc \gr,%r0 - \opd = 0 - .endif - .ifc \gr,%r1 - \opd = 1 - .endif - .ifc \gr,%r2 - \opd = 2 - .endif - .ifc \gr,%r3 - \opd = 3 - .endif - .ifc \gr,%r4 - \opd = 4 - .endif - .ifc \gr,%r5 - \opd = 5 - .endif - .ifc \gr,%r6 - \opd = 6 - .endif - .ifc \gr,%r7 - \opd = 7 - .endif - .ifc \gr,%r8 - \opd = 8 - .endif - .ifc \gr,%r9 - \opd = 9 - .endif - .ifc \gr,%r10 - \opd = 10 - .endif - .ifc \gr,%r11 - \opd = 11 - .endif - .ifc \gr,%r12 - \opd = 12 - .endif - .ifc \gr,%r13 - \opd = 13 - .endif - .ifc \gr,%r14 - \opd = 14 - .endif - .ifc \gr,%r15 - \opd = 15 - .endif - .if \opd == 255 - \opd = \gr - .endif -.endm - -/* VX_NUM - Retrieve vector register number - * - * @opd: Operand to store register number - * @vxr: String designation register in the format "%vN" - * - * The vector register number is used for as input number to the - * instruction and, as well as, to compute the RXB field of the - * instruction. - */ -.macro VX_NUM opd vxr - \opd = 255 - .ifc \vxr,%v0 - \opd = 0 - .endif - .ifc \vxr,%v1 - \opd = 1 - .endif - .ifc \vxr,%v2 - \opd = 2 - .endif - .ifc \vxr,%v3 - \opd = 3 - .endif - .ifc \vxr,%v4 - \opd = 4 - .endif - .ifc \vxr,%v5 - \opd = 5 - .endif - .ifc \vxr,%v6 - \opd = 6 - .endif - .ifc \vxr,%v7 - \opd = 7 - .endif - .ifc \vxr,%v8 - \opd = 8 - .endif - .ifc \vxr,%v9 - \opd = 9 - .endif - .ifc \vxr,%v10 - \opd = 10 - .endif - .ifc \vxr,%v11 - \opd = 11 - .endif - .ifc \vxr,%v12 - \opd = 12 - .endif - .ifc \vxr,%v13 - \opd = 13 - .endif - .ifc \vxr,%v14 - \opd = 14 - .endif - .ifc \vxr,%v15 - \opd = 15 - .endif - .ifc \vxr,%v16 - \opd = 16 - .endif - .ifc \vxr,%v17 - \opd = 17 - .endif - .ifc \vxr,%v18 - \opd = 18 - .endif - .ifc \vxr,%v19 - \opd = 19 - .endif - .ifc \vxr,%v20 - \opd = 20 - .endif - .ifc \vxr,%v21 - \opd = 21 - .endif - .ifc \vxr,%v22 - \opd = 22 - .endif - .ifc \vxr,%v23 - \opd = 23 - .endif - .ifc \vxr,%v24 - \opd = 24 - .endif - .ifc \vxr,%v25 - \opd = 25 - .endif - .ifc \vxr,%v26 - \opd = 26 - .endif - .ifc \vxr,%v27 - \opd = 27 - .endif - .ifc \vxr,%v28 - \opd = 28 - .endif - .ifc \vxr,%v29 - \opd = 29 - .endif - .ifc \vxr,%v30 - \opd = 30 - .endif - .ifc \vxr,%v31 - \opd = 31 - .endif - .if \opd == 255 - \opd = \vxr - .endif -.endm - -/* RXB - Compute most significant bit used vector registers - * - * @rxb: Operand to store computed RXB value - * @v1: Vector register designated operand whose MSB is stored in - * RXB bit 0 (instruction bit 36) and whose remaining bits - * are stored in instruction bits 8-11. - * @v2: Vector register designated operand whose MSB is stored in - * RXB bit 1 (instruction bit 37) and whose remaining bits - * are stored in instruction bits 12-15. - * @v3: Vector register designated operand whose MSB is stored in - * RXB bit 2 (instruction bit 38) and whose remaining bits - * are stored in instruction bits 16-19. - * @v4: Vector register designated operand whose MSB is stored in - * RXB bit 3 (instruction bit 39) and whose remaining bits - * are stored in instruction bits 32-35. - * - * Note: In most vector instruction formats [1] V1, V2, V3, and V4 directly - * correspond to @v1, @v2, @v3, and @v4. But there are exceptions, such as but - * not limited to the vector instruction formats VRR-g, VRR-h, VRS-a, VRS-d, - * and VSI. - * - * [1] IBM z/Architecture Principles of Operation, chapter "Program - * Execution, section "Instructions", subsection "Instruction Formats". - */ -.macro RXB rxb v1 v2=0 v3=0 v4=0 - \rxb = 0 - .if \v1 & 0x10 - \rxb = \rxb | 0x08 - .endif - .if \v2 & 0x10 - \rxb = \rxb | 0x04 - .endif - .if \v3 & 0x10 - \rxb = \rxb | 0x02 - .endif - .if \v4 & 0x10 - \rxb = \rxb | 0x01 - .endif -.endm - -/* MRXB - Generate Element Size Control and RXB value - * - * @m: Element size control - * @v1: First vector register designated operand (for RXB) - * @v2: Second vector register designated operand (for RXB) - * @v3: Third vector register designated operand (for RXB) - * @v4: Fourth vector register designated operand (for RXB) - * - * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro - * description for further details. - */ -.macro MRXB m v1 v2=0 v3=0 v4=0 - rxb = 0 - RXB rxb, \v1, \v2, \v3, \v4 - .byte (\m << 4) | rxb -.endm - -/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields - * - * @m: Element size control - * @opc: Opcode - * @v1: First vector register designated operand (for RXB) - * @v2: Second vector register designated operand (for RXB) - * @v3: Third vector register designated operand (for RXB) - * @v4: Fourth vector register designated operand (for RXB) - * - * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro - * description for further details. - */ -.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0 - MRXB \m, \v1, \v2, \v3, \v4 - .byte \opc -.endm - -/* Vector support instructions */ - -/* VECTOR GENERATE BYTE MASK */ -.macro VGBM vr imm2 - VX_NUM v1, \vr - .word (0xE700 | ((v1&15) << 4)) - .word \imm2 - MRXBOPC 0, 0x44, v1 -.endm -.macro VZERO vxr - VGBM \vxr, 0 -.endm -.macro VONE vxr - VGBM \vxr, 0xFFFF -.endm - -/* VECTOR LOAD VR ELEMENT FROM GR */ -.macro VLVG v, gr, disp, m - VX_NUM v1, \v - GR_NUM b2, "%r0" - GR_NUM r3, \gr - .word 0xE700 | ((v1&15) << 4) | r3 - .word (b2 << 12) | (\disp) - MRXBOPC \m, 0x22, v1 -.endm -.macro VLVGB v, gr, index, base - VLVG \v, \gr, \index, \base, 0 -.endm -.macro VLVGH v, gr, index - VLVG \v, \gr, \index, 1 -.endm -.macro VLVGF v, gr, index - VLVG \v, \gr, \index, 2 -.endm -.macro VLVGG v, gr, index - VLVG \v, \gr, \index, 3 -.endm - -/* VECTOR LOAD REGISTER */ -.macro VLR v1, v2 - VX_NUM v1, \v1 - VX_NUM v2, \v2 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word 0 - MRXBOPC 0, 0x56, v1, v2 -.endm - -/* VECTOR LOAD */ -.macro VL v, disp, index="%r0", base - VX_NUM v1, \v - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | x2 - .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x06, v1 -.endm - -/* VECTOR LOAD ELEMENT */ -.macro VLEx vr1, disp, index="%r0", base, m3, opc - VX_NUM v1, \vr1 - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | x2 - .word (b2 << 12) | (\disp) - MRXBOPC \m3, \opc, v1 -.endm -.macro VLEB vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x00 -.endm -.macro VLEH vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x01 -.endm -.macro VLEF vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x03 -.endm -.macro VLEG vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x02 -.endm - -/* VECTOR LOAD ELEMENT IMMEDIATE */ -.macro VLEIx vr1, imm2, m3, opc - VX_NUM v1, \vr1 - .word 0xE700 | ((v1&15) << 4) - .word \imm2 - MRXBOPC \m3, \opc, v1 -.endm -.macro VLEIB vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x40 -.endm -.macro VLEIH vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x41 -.endm -.macro VLEIF vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x43 -.endm -.macro VLEIG vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x42 -.endm - -/* VECTOR LOAD GR FROM VR ELEMENT */ -.macro VLGV gr, vr, disp, base="%r0", m - GR_NUM r1, \gr - GR_NUM b2, \base - VX_NUM v3, \vr - .word 0xE700 | (r1 << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \m, 0x21, 0, v3 -.endm -.macro VLGVB gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 0 -.endm -.macro VLGVH gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 1 -.endm -.macro VLGVF gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 2 -.endm -.macro VLGVG gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 3 -.endm - -/* VECTOR LOAD MULTIPLE */ -.macro VLM vfrom, vto, disp, base, hint=3 - VX_NUM v1, \vfrom - VX_NUM v3, \vto - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \hint, 0x36, v1, v3 -.endm - -/* VECTOR STORE */ -.macro VST vr1, disp, index="%r0", base - VX_NUM v1, \vr1 - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (x2&15) - .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x0E, v1 -.endm - -/* VECTOR STORE MULTIPLE */ -.macro VSTM vfrom, vto, disp, base, hint=3 - VX_NUM v1, \vfrom - VX_NUM v3, \vto - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \hint, 0x3E, v1, v3 -.endm - -/* VECTOR PERMUTE */ -.macro VPERM vr1, vr2, vr3, vr4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - VX_NUM v4, \vr4 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4 -.endm - -/* VECTOR UNPACK LOGICAL LOW */ -.macro VUPLL vr1, vr2, m3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word 0x0000 - MRXBOPC \m3, 0xD4, v1, v2 -.endm -.macro VUPLLB vr1, vr2 - VUPLL \vr1, \vr2, 0 -.endm -.macro VUPLLH vr1, vr2 - VUPLL \vr1, \vr2, 1 -.endm -.macro VUPLLF vr1, vr2 - VUPLL \vr1, \vr2, 2 -.endm - -/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ -.macro VPDI vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x84, v1, v2, v3 -.endm - -/* VECTOR REPLICATE */ -.macro VREP vr1, vr3, imm2, m4 - VX_NUM v1, \vr1 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word \imm2 - MRXBOPC \m4, 0x4D, v1, v3 -.endm -.macro VREPB vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 0 -.endm -.macro VREPH vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 1 -.endm -.macro VREPF vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 2 -.endm -.macro VREPG vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 3 -.endm - -/* VECTOR MERGE HIGH */ -.macro VMRH vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x61, v1, v2, v3 -.endm -.macro VMRHB vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 0 -.endm -.macro VMRHH vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 1 -.endm -.macro VMRHF vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 2 -.endm -.macro VMRHG vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR MERGE LOW */ -.macro VMRL vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x60, v1, v2, v3 -.endm -.macro VMRLB vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 0 -.endm -.macro VMRLH vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 1 -.endm -.macro VMRLF vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 2 -.endm -.macro VMRLG vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 3 -.endm - - -/* Vector integer instructions */ - -/* VECTOR AND */ -.macro VN vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x68, v1, v2, v3 -.endm - -/* VECTOR EXCLUSIVE OR */ -.macro VX vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x6D, v1, v2, v3 -.endm - -/* VECTOR GALOIS FIELD MULTIPLY SUM */ -.macro VGFM vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0xB4, v1, v2, v3 -.endm -.macro VGFMB vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 0 -.endm -.macro VGFMH vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 1 -.endm -.macro VGFMF vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 2 -.endm -.macro VGFMG vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */ -.macro VGFMA vr1, vr2, vr3, vr4, m5 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - VX_NUM v4, \vr4 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) | (\m5 << 8) - MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4 -.endm -.macro VGFMAB vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 0 -.endm -.macro VGFMAH vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 1 -.endm -.macro VGFMAF vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 2 -.endm -.macro VGFMAG vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 3 -.endm - -/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ -.macro VSRLB vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x7D, v1, v2, v3 -.endm - -/* VECTOR REPLICATE IMMEDIATE */ -.macro VREPI vr1, imm2, m3 - VX_NUM v1, \vr1 - .word 0xE700 | ((v1&15) << 4) - .word \imm2 - MRXBOPC \m3, 0x45, v1 -.endm -.macro VREPIB vr1, imm2 - VREPI \vr1, \imm2, 0 -.endm -.macro VREPIH vr1, imm2 - VREPI \vr1, \imm2, 1 -.endm -.macro VREPIF vr1, imm2 - VREPI \vr1, \imm2, 2 -.endm -.macro VREPIG vr1, imm2 - VREP \vr1, \imm2, 3 -.endm - -/* VECTOR ADD */ -.macro VA vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0xF3, v1, v2, v3 -.endm -.macro VAB vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 0 -.endm -.macro VAH vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 1 -.endm -.macro VAF vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 2 -.endm -.macro VAG vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 3 -.endm -.macro VAQ vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 4 -.endm - -/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ -.macro VESRAV vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x7A, v1, v2, v3 -.endm - -.macro VESRAVB vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 0 -.endm -.macro VESRAVH vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 1 -.endm -.macro VESRAVF vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 2 -.endm -.macro VESRAVG vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR ELEMENT ROTATE LEFT LOGICAL */ -.macro VERLL vr1, vr3, disp, base="%r0", m4 - VX_NUM v1, \vr1 - VX_NUM v3, \vr3 - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \m4, 0x33, v1, v3 -.endm -.macro VERLLB vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 0 -.endm -.macro VERLLH vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 1 -.endm -.macro VERLLF vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 2 -.endm -.macro VERLLG vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 3 -.endm - -/* VECTOR SHIFT LEFT DOUBLE BY BYTE */ -.macro VSLDB vr1, vr2, vr3, imm4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) | (\imm4) - MRXBOPC 0, 0x77, v1, v2, v3 -.endm - -#endif /* __ASSEMBLY__ */ -#endif /* __ASM_S390_VX_INSN_INTERNAL_H */ diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h deleted file mode 100644 index 8c188f1c6d27..000000000000 --- a/arch/s390/include/asm/vx-insn.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Support for Vector Instructions - * - * This wrapper header file allows to use the vector instruction macros in - * both assembler files as well as in inline assemblies in C files. - */ - -#ifndef __ASM_S390_VX_INSN_H -#define __ASM_S390_VX_INSN_H - -#include - -#ifndef __ASSEMBLY__ - -asm(".include \"asm/vx-insn-asm.h\"\n"); - -#endif /* __ASSEMBLY__ */ -#endif /* __ASM_S390_VX_INSN_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 13f41cb497be..6cd9bf925c82 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include "compat_linux.h" #include "compat_ptrace.h" #include "entry.h" diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 5c46c2659305..d09ebb6f5262 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 63d5b51cb45a..c666271433fb 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -20,9 +20,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c6ccfc9d3704..01c3b2d2821d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index d8e18a74b93d..5d6a2339db40 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -8,9 +8,7 @@ #include #include #include -#include -#include -#include +#include void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) { diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 032a1124f04b..c5d0c1cf984b 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 6dd3020d4a2f..c77382a67325 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -30,8 +31,6 @@ #include #include #include -#include -#include struct mcck_struct { unsigned int kill_task : 1; diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 3d93656bd948..c8e8fb728ddb 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -5,8 +5,7 @@ #include #include #include -#include -#include +#include u64 perf_reg_value(struct pt_regs *regs, int idx) { diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7227a73226f1..e502192da5f7 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 7c5e99be2155..2eafd6dcd592 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "entry.h" diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f77a54e1e4bd..0324649aae0a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index b439f17516eb..061d45cf0261 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include int topology_max_mnest; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 46dac4540ca8..08f8aee96d8f 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -28,8 +28,8 @@ #include #include #include -#include #include +#include #include "entry.h" static inline void __user *get_trap_ip(struct pt_regs *regs) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6eab70b19abf..8f4414539756 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -43,9 +43,9 @@ #include #include #include +#include #include #include -#include #include "kvm-s390.h" #include "gaccess.h" #include "pci.h" diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index fef42e2a80a2..457d92c2949a 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include "kvm-s390.h" #include "gaccess.h" diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 7b0b355e1a26..bc2f4fbe5a82 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -12,8 +12,7 @@ */ #include -#include -#include +#include #define NSIZE 16 -- cgit v1.2.3 From 066c40918bb495de8f2e45bd7eec06737a142712 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:17 +0100 Subject: s390/fpu: decrease stack usage for some cases The kernel_fpu structure has a quite large size of 520 bytes. In order to reduce stack footprint introduce several kernel fpu structures with different and also smaller sizes. This way every kernel fpu user must use the correct variant. A compile time check verifies that the correct variant is used. There are several users which use only 16 instead of all 32 vector registers. For those users the new kernel_fpu_16 structure with a size of only 266 bytes can be used. Signed-off-by: Heiko Carstens --- arch/s390/crypto/chacha-glue.c | 2 +- arch/s390/crypto/crc32-vx.c | 2 +- arch/s390/include/asm/fpu-types.h | 30 +++++++++++++++++++----- arch/s390/include/asm/fpu.h | 48 ++++++++++++++++++++++++++++++++++----- arch/s390/kernel/fpu.c | 48 +++++++++++++++++++-------------------- arch/s390/kernel/sysinfo.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- lib/raid6/s390vx.uc | 4 ++-- 8 files changed, 96 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index 97098add2079..f8b0c52e77a4 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -22,7 +22,7 @@ static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src, unsigned int nbytes, const u32 *key, u32 *counter) { - DECLARE_KERNEL_FPU_ONSTACK(vxstate); + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); kernel_fpu_begin(&vxstate, KERNEL_VXR); chacha20_vx(dst, src, nbytes, key, counter); diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index dc2997f18e30..d9f1fdb66691 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -50,7 +50,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); unsigned char const *data, size_t datalen) \ { \ unsigned long prealign, aligned, remaining; \ - DECLARE_KERNEL_FPU_ONSTACK(vxstate); \ + DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ \ if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ return ___crc32_sw(crc, data, datalen); \ diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h index 8e6927c23bdc..04c32b9fc849 100644 --- a/arch/s390/include/asm/fpu-types.h +++ b/arch/s390/include/asm/fpu-types.h @@ -16,14 +16,32 @@ struct fpu { __vector128 vxrs[__NUM_VXRS] __aligned(8); }; -/* In-kernel FPU state structure */ +struct kernel_fpu_hdr { + int mask; + u32 fpc; +}; + struct kernel_fpu { - int mask; - u32 fpc; - __vector128 vxrs[__NUM_VXRS] __aligned(8); + struct kernel_fpu_hdr hdr; + __vector128 vxrs[] __aligned(8); }; -#define DECLARE_KERNEL_FPU_ONSTACK(name) \ - struct kernel_fpu name __uninitialized +#define KERNEL_FPU_STRUCT(vxr_size) \ +struct kernel_fpu_##vxr_size { \ + struct kernel_fpu_hdr hdr; \ + __vector128 vxrs[vxr_size] __aligned(8); \ +} + +KERNEL_FPU_STRUCT(16); +KERNEL_FPU_STRUCT(32); + +#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name) \ + struct kernel_fpu_##vxr_size name __uninitialized + +#define DECLARE_KERNEL_FPU_ONSTACK16(name) \ + DECLARE_KERNEL_FPU_ONSTACK(16, name) + +#define DECLARE_KERNEL_FPU_ONSTACK32(name) \ + DECLARE_KERNEL_FPU_ONSTACK(32, name) #endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index e706af26c5d0..c1b3920092a1 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -162,28 +162,64 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs) __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); } -static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags) +static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) { - state->mask = READ_ONCE(current->thread.kfpu_flags); + state->hdr.mask = READ_ONCE(current->thread.kfpu_flags); if (!test_thread_flag(TIF_FPU)) { /* Save user space FPU state and register contents */ save_user_fpu_regs(); - } else if (state->mask & flags) { + } else if (state->hdr.mask & flags) { /* Save FPU/vector register in-use by the kernel */ __kernel_fpu_begin(state, flags); } __atomic_or(flags, ¤t->thread.kfpu_flags); } -static inline void kernel_fpu_end(struct kernel_fpu *state, int flags) +static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) { - WRITE_ONCE(current->thread.kfpu_flags, state->mask); - if (state->mask & flags) { + WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask); + if (state->hdr.mask & flags) { /* Restore FPU/vector register in-use by the kernel */ __kernel_fpu_end(state, flags); } } +void __kernel_fpu_invalid_size(void); + +static __always_inline void kernel_fpu_check_size(int flags, unsigned int size) +{ + unsigned int cnt = 0; + + if (flags & KERNEL_VXR_V0V7) + cnt += 8; + if (flags & KERNEL_VXR_V8V15) + cnt += 8; + if (flags & KERNEL_VXR_V16V23) + cnt += 8; + if (flags & KERNEL_VXR_V24V31) + cnt += 8; + if (cnt != size) + __kernel_fpu_invalid_size(); +} + +#define kernel_fpu_begin(state, flags) \ +{ \ + typeof(state) s = (state); \ + int _flags = (flags); \ + \ + kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ + _kernel_fpu_begin((struct kernel_fpu *)s, _flags); \ +} + +#define kernel_fpu_end(state, flags) \ +{ \ + typeof(state) s = (state); \ + int _flags = (flags); \ + \ + kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ + _kernel_fpu_end((struct kernel_fpu *)s, _flags); \ +} + static inline void save_kernel_fpu_regs(struct thread_struct *thread) { struct fpu *state = &thread->kfpu; diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 733e188951b7..62e9befe7890 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -19,41 +19,41 @@ void __kernel_fpu_begin(struct kernel_fpu *state, int flags) * Limit the save to the FPU/vector registers already * in use by the previous context. */ - flags &= state->mask; + flags &= state->hdr.mask; if (flags & KERNEL_FPC) - fpu_stfpc(&state->fpc); + fpu_stfpc(&state->hdr.fpc); if (!cpu_has_vx()) { if (flags & KERNEL_VXR_LOW) - save_fp_regs_vx(state->vxrs); + save_fp_regs_vx(vxrs); return; } mask = flags & KERNEL_VXR; if (mask == KERNEL_VXR) { - fpu_vstm(0, 15, &vxrs[0]); - fpu_vstm(16, 31, &vxrs[16]); + vxrs += fpu_vstm(0, 15, vxrs); + vxrs += fpu_vstm(16, 31, vxrs); return; } if (mask == KERNEL_VXR_MID) { - fpu_vstm(8, 23, &vxrs[8]); + vxrs += fpu_vstm(8, 23, vxrs); return; } mask = flags & KERNEL_VXR_LOW; if (mask) { if (mask == KERNEL_VXR_LOW) - fpu_vstm(0, 15, &vxrs[0]); + vxrs += fpu_vstm(0, 15, vxrs); else if (mask == KERNEL_VXR_V0V7) - fpu_vstm(0, 7, &vxrs[0]); + vxrs += fpu_vstm(0, 7, vxrs); else - fpu_vstm(8, 15, &vxrs[8]); + vxrs += fpu_vstm(8, 15, vxrs); } mask = flags & KERNEL_VXR_HIGH; if (mask) { if (mask == KERNEL_VXR_HIGH) - fpu_vstm(16, 31, &vxrs[16]); + vxrs += fpu_vstm(16, 31, vxrs); else if (mask == KERNEL_VXR_V16V23) - fpu_vstm(16, 23, &vxrs[16]); + vxrs += fpu_vstm(16, 23, vxrs); else - fpu_vstm(24, 31, &vxrs[24]); + vxrs += fpu_vstm(24, 31, vxrs); } } EXPORT_SYMBOL(__kernel_fpu_begin); @@ -68,41 +68,41 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags) * previous context that have been overwritten by the * current context. */ - flags &= state->mask; + flags &= state->hdr.mask; if (flags & KERNEL_FPC) - fpu_lfpc(&state->fpc); + fpu_lfpc(&state->hdr.fpc); if (!cpu_has_vx()) { if (flags & KERNEL_VXR_LOW) - load_fp_regs_vx(state->vxrs); + load_fp_regs_vx(vxrs); return; } mask = flags & KERNEL_VXR; if (mask == KERNEL_VXR) { - fpu_vlm(0, 15, &vxrs[0]); - fpu_vlm(16, 31, &vxrs[16]); + vxrs += fpu_vlm(0, 15, vxrs); + vxrs += fpu_vlm(16, 31, vxrs); return; } if (mask == KERNEL_VXR_MID) { - fpu_vlm(8, 23, &vxrs[8]); + vxrs += fpu_vlm(8, 23, vxrs); return; } mask = flags & KERNEL_VXR_LOW; if (mask) { if (mask == KERNEL_VXR_LOW) - fpu_vlm(0, 15, &vxrs[0]); + vxrs += fpu_vlm(0, 15, vxrs); else if (mask == KERNEL_VXR_V0V7) - fpu_vlm(0, 7, &vxrs[0]); + vxrs += fpu_vlm(0, 7, vxrs); else - fpu_vlm(8, 15, &vxrs[8]); + vxrs += fpu_vlm(8, 15, vxrs); } mask = flags & KERNEL_VXR_HIGH; if (mask) { if (mask == KERNEL_VXR_HIGH) - fpu_vlm(16, 31, &vxrs[16]); + vxrs += fpu_vlm(16, 31, vxrs); else if (mask == KERNEL_VXR_V16V23) - fpu_vlm(16, 23, &vxrs[16]); + vxrs += fpu_vlm(16, 23, vxrs); else - fpu_vlm(24, 31, &vxrs[24]); + vxrs += fpu_vlm(24, 31, vxrs); } } EXPORT_SYMBOL(__kernel_fpu_end); diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 061d45cf0261..4cd6428bfab2 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -426,7 +426,7 @@ subsys_initcall(create_proc_service_level); */ void s390_adjust_jiffies(void) { - DECLARE_KERNEL_FPU_ONSTACK(fpu); + DECLARE_KERNEL_FPU_ONSTACK16(fpu); struct sysinfo_1_2_2 *info; unsigned long capability; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8467945344b5..8c222b0dfbf2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5026,7 +5026,7 @@ static void store_regs(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - DECLARE_KERNEL_FPU_ONSTACK(fpu); + DECLARE_KERNEL_FPU_ONSTACK32(fpu); int rc; /* diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index bc2f4fbe5a82..92c05b7596bc 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -80,7 +80,7 @@ static inline void COPY_VEC(int x, int y) static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) { - DECLARE_KERNEL_FPU_ONSTACK(vxstate); + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); u8 **dptr, *p, *q; int d, z, z0; @@ -113,7 +113,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, size_t bytes, void **ptrs) { - DECLARE_KERNEL_FPU_ONSTACK(vxstate); + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); u8 **dptr, *p, *q; int d, z, z0; -- cgit v1.2.3 From c8dde11df19192c421f5b70c2b8ba55d32e07c66 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:24 +0100 Subject: s390/raid6: convert to use standard fpu_*() inline assemblies Move the s390 specific raid6 inline assemblies, make them generic, and reuse them to implement the raid6 gen/xor implementation. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/fpu-insn.h | 48 +++++++++++++++++++++++++++++++++++ lib/raid6/s390vx.uc | 55 ++++++++-------------------------------- 2 files changed, 58 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index 35c4fbe0bdd6..028d28570170 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -108,6 +108,14 @@ static __always_inline void fpu_stfpc(unsigned int *fpc) : "memory"); } +static __always_inline void fpu_vab(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VAB %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + static __always_inline void fpu_vcksm(u8 v1, u8 v2, u8 v3) { asm volatile("VCKSM %[v1],%[v2],%[v3]" @@ -116,6 +124,14 @@ static __always_inline void fpu_vcksm(u8 v1, u8 v2, u8 v3) : "memory"); } +static __always_inline void fpu_vesravb(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VESRAVB %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + #ifdef CONFIG_CC_IS_CLANG static __always_inline void fpu_vl(u8 v1, const void *vxr) @@ -231,6 +247,14 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) #endif /* CONFIG_CC_IS_CLANG */ +static __always_inline void fpu_vlr(u8 v1, u8 v2) +{ + asm volatile("VLR %[v1],%[v2]" + : + : [v1] "I" (v1), [v2] "I" (v2) + : "memory"); +} + static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index) { asm volatile("VLVGF %[v],%[val],%[index]" @@ -239,6 +263,22 @@ static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index) : "memory"); } +static __always_inline void fpu_vn(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VN %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +static __always_inline void fpu_vrepib(u8 v1, s16 i2) +{ + asm volatile("VREPIB %[v1],%[i2]" + : + : [v1] "I" (v1), [i2] "K" (i2) + : "memory"); +} + #ifdef CONFIG_CC_IS_CLANG static __always_inline void fpu_vst(u8 v1, const void *vxr) @@ -335,6 +375,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) #endif /* CONFIG_CC_IS_CLANG */ +static __always_inline void fpu_vx(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VX %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + static __always_inline void fpu_vzero(u8 v) { asm volatile("VZERO %[v]" diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 92c05b7596bc..863e2d320938 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -16,10 +16,10 @@ #define NSIZE 16 -static inline void LOAD_CONST(void) +static __always_inline void LOAD_CONST(void) { - asm volatile("VREPIB %v24,7"); - asm volatile("VREPIB %v25,0x1d"); + fpu_vrepib(24, 0x07); + fpu_vrepib(25, 0x1d); } /* @@ -27,10 +27,7 @@ static inline void LOAD_CONST(void) * vector register y left by 1 bit and stores the result in * vector register x. */ -static inline void SHLBYTE(int x, int y) -{ - asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y)); -} +#define SHLBYTE(x, y) fpu_vab(x, y, y) /* * For each of the 16 bytes in the vector register y the MASK() @@ -38,45 +35,13 @@ static inline void SHLBYTE(int x, int y) * or 0x00 if the high bit is 0. The result is stored in vector * register x. */ -static inline void MASK(int x, int y) -{ - asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y)); -} - -static inline void AND(int x, int y, int z) -{ - asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); -} - -static inline void XOR(int x, int y, int z) -{ - asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); -} +#define MASK(x, y) fpu_vesravb(x, y, 24) -static inline void LOAD_DATA(int x, u8 *ptr) -{ - typedef struct { u8 _[16 * $#]; } addrtype; - register addrtype *__ptr asm("1") = (addrtype *) ptr; - - asm volatile ("VLM %2,%3,0,%1" - : : "m" (*__ptr), "a" (__ptr), "i" (x), - "i" (x + $# - 1)); -} - -static inline void STORE_DATA(int x, u8 *ptr) -{ - typedef struct { u8 _[16 * $#]; } addrtype; - register addrtype *__ptr asm("1") = (addrtype *) ptr; - - asm volatile ("VSTM %2,%3,0,1" - : "=m" (*__ptr) : "a" (__ptr), "i" (x), - "i" (x + $# - 1)); -} - -static inline void COPY_VEC(int x, int y) -{ - asm volatile ("VLR %0,%1" : : "i" (x), "i" (y)); -} +#define AND(x, y, z) fpu_vn(x, y, z) +#define XOR(x, y, z) fpu_vx(x, y, z) +#define LOAD_DATA(x, ptr) fpu_vlm(x, x + $# - 1, ptr) +#define STORE_DATA(x, ptr) fpu_vstm(x, x + $# - 1, ptr) +#define COPY_VEC(x, y) fpu_vlr(x, y) static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) { -- cgit v1.2.3