summaryrefslogtreecommitdiff
path: root/arch/alpha/kernel
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2022-09-02 04:50:12 +0300
committerMatt Turner <mattst88@gmail.com>2023-02-25 07:14:22 +0300
commit050966666047b5013fe44944cef9e9605bdf6cfe (patch)
tree6c25361790776289d998deb98c50694bf41ff37f /arch/alpha/kernel
parenta7acb188e874be1b2752461a445af935e0a47da7 (diff)
downloadlinux-050966666047b5013fe44944cef9e9605bdf6cfe.tar.xz
alpha: lazy FPU switching
On each context switch we save the FPU registers on stack of old process and restore FPU registers from the stack of new one. That allows us to avoid doing that each time we enter/leave the kernel mode; however, that can get suboptimal in some cases. For one thing, we don't need to bother saving anything for kernel threads. For another, if between entering and leaving the kernel a thread gives CPU up more than once, it will do useless work, saving the same values every time, only to discard the saved copy as soon as it returns from switch_to(). Alternative solution: * move the array we save into from switch_stack to thread_info * have a (thread-synchronous) flag set when we save them * have another flag set when they should be restored on return to userland. * do *NOT* save/restore them in do_switch_stack()/undo_switch_stack(). * restore on the exit to user mode if the restore flag had been set. Clear both flags. * on context switch, entry to fork/clone/vfork, before entry into do_signal() and on entry into straced syscall save the registers and set the 'saved' flag unless it had been already set. * on context switch set the 'restore' flag as well. * have copy_thread() set both flags for child, so the registers would be restored once the child returns to userland. * use the saved data in setup_sigcontext(); have restore_sigcontext() set both flags and copy from sigframe to save area. * teach ptrace to look for FPU registers in thread_info instead of switch_stack. * teach isolated accesses to FPU registers (rdfpcr, wrfpcr, etc.) to check the 'saved' flag (under preempt_disable()) and work with the save area if it's been set; if 'saved' flag is found upon write access, set 'restore' flag as well. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'arch/alpha/kernel')
-rw-r--r--arch/alpha/kernel/asm-offsets.c2
-rw-r--r--arch/alpha/kernel/entry.S144
-rw-r--r--arch/alpha/kernel/process.c5
-rw-r--r--arch/alpha/kernel/ptrace.c18
-rw-r--r--arch/alpha/kernel/signal.c20
5 files changed, 100 insertions, 89 deletions
diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
index 2e125e5c1508..b121294bee26 100644
--- a/arch/alpha/kernel/asm-offsets.c
+++ b/arch/alpha/kernel/asm-offsets.c
@@ -17,6 +17,8 @@ void foo(void)
DEFINE(TI_TASK, offsetof(struct thread_info, task));
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+ DEFINE(TI_FP, offsetof(struct thread_info, fp));
+ DEFINE(TI_STATUS, offsetof(struct thread_info, status));
BLANK();
DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index cc84337afaec..eb51f93a70c8 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -17,7 +17,7 @@
/* Stack offsets. */
#define SP_OFF 184
-#define SWITCH_STACK_SIZE 320
+#define SWITCH_STACK_SIZE 64
.macro CFI_START_OSF_FRAME func
.align 4
@@ -159,7 +159,6 @@
.cfi_rel_offset $13, 32
.cfi_rel_offset $14, 40
.cfi_rel_offset $15, 48
- /* We don't really care about the FP registers for debugging. */
.endm
.macro UNDO_SWITCH_STACK
@@ -498,6 +497,10 @@ ret_to_user:
and $17, _TIF_WORK_MASK, $2
bne $2, work_pending
restore_all:
+ ldl $2, TI_STATUS($8)
+ and $2, TS_SAVED_FP | TS_RESTORE_FP, $3
+ bne $3, restore_fpu
+restore_other:
.cfi_remember_state
RESTORE_ALL
call_pal PAL_rti
@@ -506,7 +509,7 @@ ret_to_kernel:
.cfi_restore_state
lda $16, 7
call_pal PAL_swpipl
- br restore_all
+ br restore_other
.align 3
$syscall_error:
@@ -570,6 +573,14 @@ $work_notifysig:
.type strace, @function
strace:
/* set up signal stack, call syscall_trace */
+ // NB: if anyone adds preemption, this block will need to be protected
+ ldl $1, TI_STATUS($8)
+ and $1, TS_SAVED_FP, $3
+ or $1, TS_SAVED_FP, $2
+ bne $3, 1f
+ stl $2, TI_STATUS($8)
+ bsr $26, __save_fpu
+1:
DO_SWITCH_STACK
jsr $26, syscall_trace_enter /* returns the syscall number */
UNDO_SWITCH_STACK
@@ -649,40 +660,6 @@ do_switch_stack:
stq $14, 40($sp)
stq $15, 48($sp)
stq $26, 56($sp)
- stt $f0, 64($sp)
- stt $f1, 72($sp)
- stt $f2, 80($sp)
- stt $f3, 88($sp)
- stt $f4, 96($sp)
- stt $f5, 104($sp)
- stt $f6, 112($sp)
- stt $f7, 120($sp)
- stt $f8, 128($sp)
- stt $f9, 136($sp)
- stt $f10, 144($sp)
- stt $f11, 152($sp)
- stt $f12, 160($sp)
- stt $f13, 168($sp)
- stt $f14, 176($sp)
- stt $f15, 184($sp)
- stt $f16, 192($sp)
- stt $f17, 200($sp)
- stt $f18, 208($sp)
- stt $f19, 216($sp)
- stt $f20, 224($sp)
- stt $f21, 232($sp)
- stt $f22, 240($sp)
- stt $f23, 248($sp)
- stt $f24, 256($sp)
- stt $f25, 264($sp)
- stt $f26, 272($sp)
- stt $f27, 280($sp)
- mf_fpcr $f0 # get fpcr
- stt $f28, 288($sp)
- stt $f29, 296($sp)
- stt $f30, 304($sp)
- stt $f0, 312($sp) # save fpcr in slot of $f31
- ldt $f0, 64($sp) # dont let "do_switch_stack" change fp state.
ret $31, ($1), 1
.cfi_endproc
.size do_switch_stack, .-do_switch_stack
@@ -701,54 +678,71 @@ undo_switch_stack:
ldq $14, 40($sp)
ldq $15, 48($sp)
ldq $26, 56($sp)
- ldt $f30, 312($sp) # get saved fpcr
- ldt $f0, 64($sp)
- ldt $f1, 72($sp)
- ldt $f2, 80($sp)
- ldt $f3, 88($sp)
- mt_fpcr $f30 # install saved fpcr
- ldt $f4, 96($sp)
- ldt $f5, 104($sp)
- ldt $f6, 112($sp)
- ldt $f7, 120($sp)
- ldt $f8, 128($sp)
- ldt $f9, 136($sp)
- ldt $f10, 144($sp)
- ldt $f11, 152($sp)
- ldt $f12, 160($sp)
- ldt $f13, 168($sp)
- ldt $f14, 176($sp)
- ldt $f15, 184($sp)
- ldt $f16, 192($sp)
- ldt $f17, 200($sp)
- ldt $f18, 208($sp)
- ldt $f19, 216($sp)
- ldt $f20, 224($sp)
- ldt $f21, 232($sp)
- ldt $f22, 240($sp)
- ldt $f23, 248($sp)
- ldt $f24, 256($sp)
- ldt $f25, 264($sp)
- ldt $f26, 272($sp)
- ldt $f27, 280($sp)
- ldt $f28, 288($sp)
- ldt $f29, 296($sp)
- ldt $f30, 304($sp)
lda $sp, SWITCH_STACK_SIZE($sp)
ret $31, ($1), 1
.cfi_endproc
.size undo_switch_stack, .-undo_switch_stack
+
+#define FR(n) n * 8 + TI_FP($8)
+ .align 4
+ .globl __save_fpu
+ .type __save_fpu, @function
+__save_fpu:
+#define V(n) stt $f##n, FR(n)
+ V( 0); V( 1); V( 2); V( 3)
+ V( 4); V( 5); V( 6); V( 7)
+ V( 8); V( 9); V(10); V(11)
+ V(12); V(13); V(14); V(15)
+ V(16); V(17); V(18); V(19)
+ V(20); V(21); V(22); V(23)
+ V(24); V(25); V(26); V(27)
+ mf_fpcr $f0 # get fpcr
+ V(28); V(29); V(30)
+ stt $f0, FR(31) # save fpcr in slot of $f31
+ ldt $f0, FR(0) # don't let "__save_fpu" change fp state.
+ ret
+#undef V
+ .size __save_fpu, .-__save_fpu
+
+ .align 4
+restore_fpu:
+ and $3, TS_RESTORE_FP, $3
+ bic $2, TS_SAVED_FP | TS_RESTORE_FP, $2
+ beq $3, 1f
+#define V(n) ldt $f##n, FR(n)
+ ldt $f30, FR(31) # get saved fpcr
+ V( 0); V( 1); V( 2); V( 3)
+ mt_fpcr $f30 # install saved fpcr
+ V( 4); V( 5); V( 6); V( 7)
+ V( 8); V( 9); V(10); V(11)
+ V(12); V(13); V(14); V(15)
+ V(16); V(17); V(18); V(19)
+ V(20); V(21); V(22); V(23)
+ V(24); V(25); V(26); V(27)
+ V(28); V(29); V(30)
+1: stl $2, TI_STATUS($8)
+ br restore_other
+#undef V
+
/*
* The meat of the context switch code.
*/
-
.align 4
.globl alpha_switch_to
.type alpha_switch_to, @function
.cfi_startproc
alpha_switch_to:
DO_SWITCH_STACK
+ ldl $1, TI_STATUS($8)
+ and $1, TS_RESTORE_FP, $3
+ bne $3, 1f
+ or $1, TS_RESTORE_FP | TS_SAVED_FP, $2
+ and $1, TS_SAVED_FP, $3
+ stl $2, TI_STATUS($8)
+ bne $3, 1f
+ bsr $26, __save_fpu
+1:
call_pal PAL_swpctx
lda $8, 0x3fff
UNDO_SWITCH_STACK
@@ -799,6 +793,14 @@ ret_from_kernel_thread:
alpha_\name:
.prologue 0
bsr $1, do_switch_stack
+ // NB: if anyone adds preemption, this block will need to be protected
+ ldl $1, TI_STATUS($8)
+ and $1, TS_SAVED_FP, $3
+ or $1, TS_SAVED_FP, $2
+ bne $3, 1f
+ stl $2, TI_STATUS($8)
+ bsr $26, __save_fpu
+1:
jsr $26, sys_\name
ldq $26, 56($sp)
lda $sp, SWITCH_STACK_SIZE($sp)
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 65fdae9e48f3..114783e02117 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -244,6 +244,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
childstack = ((struct switch_stack *) childregs) - 1;
childti->pcb.ksp = (unsigned long) childstack;
childti->pcb.flags = 1; /* set FEN, clear everything else */
+ childti->status |= TS_SAVED_FP | TS_RESTORE_FP;
if (unlikely(args->fn)) {
/* kernel thread */
@@ -253,6 +254,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
childstack->r9 = (unsigned long) args->fn;
childstack->r10 = (unsigned long) args->fn_arg;
childregs->hae = alpha_mv.hae_cache;
+ memset(childti->fp, '\0', sizeof(childti->fp));
childti->pcb.usp = 0;
return 0;
}
@@ -335,8 +337,7 @@ EXPORT_SYMBOL(dump_elf_task);
int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
{
- struct switch_stack *sw = (struct switch_stack *)task_pt_regs(t) - 1;
- memcpy(fpu, sw->fp, 32 * 8);
+ memcpy(fpu, task_thread_info(t)->fp, 32 * 8);
return 1;
}
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index a1a239ea002d..fde4c68e7a0b 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -78,6 +78,8 @@ enum {
(PAGE_SIZE*2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \
+ offsetof(struct switch_stack, reg))
+#define FP_REG(reg) (offsetof(struct thread_info, reg))
+
static int regoff[] = {
PT_REG( r0), PT_REG( r1), PT_REG( r2), PT_REG( r3),
PT_REG( r4), PT_REG( r5), PT_REG( r6), PT_REG( r7),
@@ -87,14 +89,14 @@ static int regoff[] = {
PT_REG( r20), PT_REG( r21), PT_REG( r22), PT_REG( r23),
PT_REG( r24), PT_REG( r25), PT_REG( r26), PT_REG( r27),
PT_REG( r28), PT_REG( gp), -1, -1,
- SW_REG(fp[ 0]), SW_REG(fp[ 1]), SW_REG(fp[ 2]), SW_REG(fp[ 3]),
- SW_REG(fp[ 4]), SW_REG(fp[ 5]), SW_REG(fp[ 6]), SW_REG(fp[ 7]),
- SW_REG(fp[ 8]), SW_REG(fp[ 9]), SW_REG(fp[10]), SW_REG(fp[11]),
- SW_REG(fp[12]), SW_REG(fp[13]), SW_REG(fp[14]), SW_REG(fp[15]),
- SW_REG(fp[16]), SW_REG(fp[17]), SW_REG(fp[18]), SW_REG(fp[19]),
- SW_REG(fp[20]), SW_REG(fp[21]), SW_REG(fp[22]), SW_REG(fp[23]),
- SW_REG(fp[24]), SW_REG(fp[25]), SW_REG(fp[26]), SW_REG(fp[27]),
- SW_REG(fp[28]), SW_REG(fp[29]), SW_REG(fp[30]), SW_REG(fp[31]),
+ FP_REG(fp[ 0]), FP_REG(fp[ 1]), FP_REG(fp[ 2]), FP_REG(fp[ 3]),
+ FP_REG(fp[ 4]), FP_REG(fp[ 5]), FP_REG(fp[ 6]), FP_REG(fp[ 7]),
+ FP_REG(fp[ 8]), FP_REG(fp[ 9]), FP_REG(fp[10]), FP_REG(fp[11]),
+ FP_REG(fp[12]), FP_REG(fp[13]), FP_REG(fp[14]), FP_REG(fp[15]),
+ FP_REG(fp[16]), FP_REG(fp[17]), FP_REG(fp[18]), FP_REG(fp[19]),
+ FP_REG(fp[20]), FP_REG(fp[21]), FP_REG(fp[22]), FP_REG(fp[23]),
+ FP_REG(fp[24]), FP_REG(fp[25]), FP_REG(fp[26]), FP_REG(fp[27]),
+ FP_REG(fp[28]), FP_REG(fp[29]), FP_REG(fp[30]), FP_REG(fp[31]),
PT_REG( pc)
};
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 6f47f256fe80..e62d1d461b1f 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -150,9 +150,10 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
{
unsigned long usp;
struct switch_stack *sw = (struct switch_stack *)regs - 1;
- long i, err = __get_user(regs->pc, &sc->sc_pc);
+ long err = __get_user(regs->pc, &sc->sc_pc);
current->restart_block.fn = do_no_restart_syscall;
+ current_thread_info()->status |= TS_SAVED_FP | TS_RESTORE_FP;
sw->r26 = (unsigned long) ret_from_sys_call;
@@ -189,9 +190,9 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
err |= __get_user(usp, sc->sc_regs+30);
wrusp(usp);
- for (i = 0; i < 31; i++)
- err |= __get_user(sw->fp[i], sc->sc_fpregs+i);
- err |= __get_user(sw->fp[31], &sc->sc_fpcr);
+ err |= __copy_from_user(current_thread_info()->fp,
+ sc->sc_fpregs, 31 * 8);
+ err |= __get_user(current_thread_info()->fp[31], &sc->sc_fpcr);
return err;
}
@@ -272,7 +273,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
unsigned long mask, unsigned long sp)
{
struct switch_stack *sw = (struct switch_stack *)regs - 1;
- long i, err = 0;
+ long err = 0;
err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack);
err |= __put_user(mask, &sc->sc_mask);
@@ -312,10 +313,10 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
err |= __put_user(sp, sc->sc_regs+30);
err |= __put_user(0, sc->sc_regs+31);
- for (i = 0; i < 31; i++)
- err |= __put_user(sw->fp[i], sc->sc_fpregs+i);
+ err |= __copy_to_user(sc->sc_fpregs,
+ current_thread_info()->fp, 31 * 8);
err |= __put_user(0, sc->sc_fpregs+31);
- err |= __put_user(sw->fp[31], &sc->sc_fpcr);
+ err |= __put_user(current_thread_info()->fp[31], &sc->sc_fpcr);
err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0);
err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1);
@@ -528,6 +529,9 @@ do_work_pending(struct pt_regs *regs, unsigned long thread_flags,
} else {
local_irq_enable();
if (thread_flags & (_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL)) {
+ preempt_disable();
+ save_fpu();
+ preempt_enable();
do_signal(regs, r0, r19);
r0 = 0;
} else {