summaryrefslogtreecommitdiff
path: root/arch/s390/include/asm/fpu.h
diff options
context:
space:
mode:
authorHeiko Carstens <hca@linux.ibm.com>2024-02-03 13:45:18 +0300
committerHeiko Carstens <hca@linux.ibm.com>2024-02-16 16:30:16 +0300
commit8c09871a950a3fe686e0e27fd4193179c5f74f37 (patch)
tree55b39ecbbdcb9e43dc3c24b733795f606068637a /arch/s390/include/asm/fpu.h
parent066c40918bb495de8f2e45bd7eec06737a142712 (diff)
downloadlinux-8c09871a950a3fe686e0e27fd4193179c5f74f37.tar.xz
s390/fpu: limit save and restore to used registers
The first invocation of kernel_fpu_begin() after switching from user to kernel context will save all vector registers, even if only parts of the vector registers are used within the kernel fpu context. Given that save and restore of all vector registers is quite expensive change the current approach in several ways: - Instead of saving and restoring all user registers limit this to those registers which are actually used within an kernel fpu context. - On context switch save all remaining user fpu registers, so they can be restored when the task is rescheduled. - Saving user registers within kernel_fpu_begin() is done without disabling and enabling interrupts - which also slightly reduces runtime. In worst case (e.g. interrupt context uses the same registers) this may lead to the situation that registers are saved several times, however the assumption is that this will not happen frequently, so that the new method is faster in nearly all cases. - save_user_fpu_regs() can still be called from all contexts and saves all (or all remaining) user registers to a tasks ufpu user fpu save area. Overall this reduces the time required to save and restore the user fpu context for nearly all cases. Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/fpu.h')
-rw-r--r--arch/s390/include/asm/fpu.h81
1 files changed, 49 insertions, 32 deletions
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h
index c1b3920092a1..c84cb33913e2 100644
--- a/arch/s390/include/asm/fpu.h
+++ b/arch/s390/include/asm/fpu.h
@@ -58,10 +58,6 @@ static inline bool cpu_has_vx(void)
return likely(test_facility(129));
}
-void save_user_fpu_regs(void);
-void load_user_fpu_regs(void);
-void __load_user_fpu_regs(void);
-
enum {
KERNEL_FPC_BIT = 0,
KERNEL_VXR_V0V7_BIT,
@@ -83,6 +79,8 @@ enum {
#define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH)
#define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW)
+void load_fpu_state(struct fpu *state, int flags);
+void save_fpu_state(struct fpu *state, int flags);
void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
void __kernel_fpu_end(struct kernel_fpu *state, int flags);
@@ -162,26 +160,57 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
}
-static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
+static inline void load_user_fpu_regs(void)
+{
+ struct thread_struct *thread = &current->thread;
+
+ if (!thread->ufpu_flags)
+ return;
+ load_fpu_state(&thread->ufpu, thread->ufpu_flags);
+ thread->ufpu_flags = 0;
+}
+
+static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
{
- state->hdr.mask = READ_ONCE(current->thread.kfpu_flags);
- if (!test_thread_flag(TIF_FPU)) {
- /* Save user space FPU state and register contents */
- save_user_fpu_regs();
- } else if (state->hdr.mask & flags) {
- /* Save FPU/vector register in-use by the kernel */
+ save_fpu_state(&thread->ufpu, flags);
+ __atomic_or(flags, &thread->ufpu_flags);
+}
+
+static inline void save_user_fpu_regs(void)
+{
+ struct thread_struct *thread = &current->thread;
+ int mask, flags;
+
+ mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
+ flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
+ if (flags)
+ __save_user_fpu_regs(thread, flags);
+ barrier();
+ WRITE_ONCE(thread->kfpu_flags, mask);
+}
+
+static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
+{
+ struct thread_struct *thread = &current->thread;
+ int mask, uflags;
+
+ mask = __atomic_or(flags, &thread->kfpu_flags);
+ state->hdr.mask = mask;
+ uflags = READ_ONCE(thread->ufpu_flags);
+ if ((uflags & flags) != flags)
+ __save_user_fpu_regs(thread, ~uflags & flags);
+ if (mask & flags)
__kernel_fpu_begin(state, flags);
- }
- __atomic_or(flags, &current->thread.kfpu_flags);
}
-static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
+static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
{
- WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask);
- if (state->hdr.mask & flags) {
- /* Restore FPU/vector register in-use by the kernel */
+ int mask = state->hdr.mask;
+
+ if (mask & flags)
__kernel_fpu_end(state, flags);
- }
+ barrier();
+ WRITE_ONCE(current->thread.kfpu_flags, mask);
}
void __kernel_fpu_invalid_size(void);
@@ -222,28 +251,16 @@ static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
static inline void save_kernel_fpu_regs(struct thread_struct *thread)
{
- struct fpu *state = &thread->kfpu;
-
if (!thread->kfpu_flags)
return;
- fpu_stfpc(&state->fpc);
- if (likely(cpu_has_vx()))
- save_vx_regs(state->vxrs);
- else
- save_fp_regs_vx(state->vxrs);
+ save_fpu_state(&thread->kfpu, thread->kfpu_flags);
}
static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
{
- struct fpu *state = &thread->kfpu;
-
if (!thread->kfpu_flags)
return;
- fpu_lfpc(&state->fpc);
- if (likely(cpu_has_vx()))
- load_vx_regs(state->vxrs);
- else
- load_fp_regs_vx(state->vxrs);
+ load_fpu_state(&thread->kfpu, thread->kfpu_flags);
}
static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)