summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel/fpsimd.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel/fpsimd.c')
-rw-r--r--arch/arm64/kernel/fpsimd.c51
1 files changed, 34 insertions, 17 deletions
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 7a1aeb95d7c3..087c05aa960e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task)
unsigned int i;
__uint128_t const *p;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vl = thread_get_cur_vl(&task->thread);
@@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -847,6 +848,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
+ bool free_sme = false;
+
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
return -EINVAL;
@@ -897,24 +900,39 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
task->thread.fp_type = FP_STATE_FPSIMD;
}
- if (system_supports_sme() && type == ARM64_VEC_SME) {
- task->thread.svcr &= ~(SVCR_SM_MASK |
- SVCR_ZA_MASK);
- clear_thread_flag(TIF_SME);
+ if (system_supports_sme()) {
+ if (type == ARM64_VEC_SME ||
+ !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
+ /*
+ * We are changing the SME VL or weren't using
+ * SME anyway, discard the state and force a
+ * reallocation.
+ */
+ task->thread.svcr &= ~(SVCR_SM_MASK |
+ SVCR_ZA_MASK);
+ clear_tsk_thread_flag(task, TIF_SME);
+ free_sme = true;
+ }
}
if (task == current)
put_cpu_fpsimd_context();
+ task_set_vl(task, type, vl);
+
/*
- * Force reallocation of task SVE and SME state to the correct
- * size on next use:
+ * Free the changed states if they are not in use, SME will be
+ * reallocated to the correct size on next use and we just
+ * allocate SVE now in case it is needed for use in streaming
+ * mode.
*/
- sve_free(task);
- if (system_supports_sme() && type == ARM64_VEC_SME)
- sme_free(task);
+ if (system_supports_sve()) {
+ sve_free(task);
+ sve_alloc(task, true);
+ }
- task_set_vl(task, type, vl);
+ if (free_sme)
+ sme_free(task);
out:
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
@@ -1267,9 +1285,9 @@ void fpsimd_release_task(struct task_struct *dead_task)
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
-void sme_alloc(struct task_struct *task)
+void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.sme_state) {
+ if (task->thread.sme_state && flush) {
memset(task->thread.sme_state, 0, sme_state_size(task));
return;
}
@@ -1497,7 +1515,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
}
sve_alloc(current, false);
- sme_alloc(current);
+ sme_alloc(current, true);
if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
@@ -1649,7 +1667,6 @@ void fpsimd_flush_thread(void)
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
- sme_smstop();
}
current->thread.fp_type = FP_STATE_FPSIMD;