summaryrefslogtreecommitdiff
path: root/arch/riscv/kvm
diff options
context:
space:
mode:
authorAtish Patra <atish.patra@wdc.com>2021-09-27 14:40:12 +0300
committerAnup Patel <anup@brainfault.org>2021-10-04 13:38:23 +0300
commit5de52d4a23ad3346c3cb24aae728ef6642d1a92e (patch)
tree2e5e161bb93e0cecc740a8029a4d186ddb71ad27 /arch/riscv/kvm
parent3a9f66cb25e18a3eeca36c08d9f823a35b3ddc22 (diff)
downloadlinux-5de52d4a23ad3346c3cb24aae728ef6642d1a92e.tar.xz
RISC-V: KVM: FP lazy save/restore
This patch adds floating point (F and D extension) context save/restore for guest VCPUs. The FP context is saved and restored lazily only when kernel enter/exits the in-kernel run loop and not during the KVM world switch. This way FP save/restore has minimal impact on KVM performance. Signed-off-by: Atish Patra <atish.patra@wdc.com> Signed-off-by: Anup Patel <anup.patel@wdc.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Alexander Graf <graf@amazon.com> Acked-by: Palmer Dabbelt <palmerdabbelt@google.com>
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r--arch/riscv/kvm/vcpu.c91
-rw-r--r--arch/riscv/kvm/vcpu_switch.S174
2 files changed, 265 insertions, 0 deletions
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 840f4586796f..cf6832365345 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -38,6 +38,86 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
sizeof(kvm_vcpu_stats_desc),
};
+#ifdef CONFIG_FPU
+static void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
+{
+ unsigned long isa = vcpu->arch.isa;
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+ cntx->sstatus &= ~SR_FS;
+ if (riscv_isa_extension_available(&isa, f) ||
+ riscv_isa_extension_available(&isa, d))
+ cntx->sstatus |= SR_FS_INITIAL;
+ else
+ cntx->sstatus |= SR_FS_OFF;
+}
+
+static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
+{
+ cntx->sstatus &= ~SR_FS;
+ cntx->sstatus |= SR_FS_CLEAN;
+}
+
+static void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+ if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) {
+ if (riscv_isa_extension_available(&isa, d))
+ __kvm_riscv_fp_d_save(cntx);
+ else if (riscv_isa_extension_available(&isa, f))
+ __kvm_riscv_fp_f_save(cntx);
+ kvm_riscv_vcpu_fp_clean(cntx);
+ }
+}
+
+static void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+ if ((cntx->sstatus & SR_FS) != SR_FS_OFF) {
+ if (riscv_isa_extension_available(&isa, d))
+ __kvm_riscv_fp_d_restore(cntx);
+ else if (riscv_isa_extension_available(&isa, f))
+ __kvm_riscv_fp_f_restore(cntx);
+ kvm_riscv_vcpu_fp_clean(cntx);
+ }
+}
+
+static void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
+{
+ /* No need to check host sstatus as it can be modified outside */
+ if (riscv_isa_extension_available(NULL, d))
+ __kvm_riscv_fp_d_save(cntx);
+ else if (riscv_isa_extension_available(NULL, f))
+ __kvm_riscv_fp_f_save(cntx);
+}
+
+static void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx)
+{
+ if (riscv_isa_extension_available(NULL, d))
+ __kvm_riscv_fp_d_restore(cntx);
+ else if (riscv_isa_extension_available(NULL, f))
+ __kvm_riscv_fp_f_restore(cntx);
+}
+#else
+static void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
+{
+}
+static void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+}
+static void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+}
+static void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
+{
+}
+static void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx)
+{
+}
+#endif
+
#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \
riscv_isa_extension_mask(c) | \
riscv_isa_extension_mask(d) | \
@@ -58,6 +138,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
memcpy(cntx, reset_cntx, sizeof(*cntx));
+ kvm_riscv_vcpu_fp_reset(vcpu);
+
kvm_riscv_vcpu_timer_reset(vcpu);
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
@@ -192,6 +274,7 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
vcpu->arch.isa = reg_val;
vcpu->arch.isa &= riscv_isa_extension_base(NULL);
vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
+ kvm_riscv_vcpu_fp_reset(vcpu);
} else {
return -EOPNOTSUPP;
}
@@ -593,6 +676,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_riscv_vcpu_timer_restore(vcpu);
+ kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
+ kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
+
vcpu->cpu = cpu;
}
@@ -602,6 +689,10 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
+ kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
+ kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
+
csr_write(CSR_HGATP, 0);
csr->vsstatus = csr_read(CSR_VSSTATUS);
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
index e22721e1b892..029a28a195c6 100644
--- a/arch/riscv/kvm/vcpu_switch.S
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -224,3 +224,177 @@ ENTRY(__kvm_riscv_unpriv_trap)
REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0)
sret
ENDPROC(__kvm_riscv_unpriv_trap)
+
+#ifdef CONFIG_FPU
+ .align 3
+ .global __kvm_riscv_fp_f_save
+__kvm_riscv_fp_f_save:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ csrs CSR_SSTATUS, t1
+ frcsr t0
+ fsw f0, KVM_ARCH_FP_F_F0(a0)
+ fsw f1, KVM_ARCH_FP_F_F1(a0)
+ fsw f2, KVM_ARCH_FP_F_F2(a0)
+ fsw f3, KVM_ARCH_FP_F_F3(a0)
+ fsw f4, KVM_ARCH_FP_F_F4(a0)
+ fsw f5, KVM_ARCH_FP_F_F5(a0)
+ fsw f6, KVM_ARCH_FP_F_F6(a0)
+ fsw f7, KVM_ARCH_FP_F_F7(a0)
+ fsw f8, KVM_ARCH_FP_F_F8(a0)
+ fsw f9, KVM_ARCH_FP_F_F9(a0)
+ fsw f10, KVM_ARCH_FP_F_F10(a0)
+ fsw f11, KVM_ARCH_FP_F_F11(a0)
+ fsw f12, KVM_ARCH_FP_F_F12(a0)
+ fsw f13, KVM_ARCH_FP_F_F13(a0)
+ fsw f14, KVM_ARCH_FP_F_F14(a0)
+ fsw f15, KVM_ARCH_FP_F_F15(a0)
+ fsw f16, KVM_ARCH_FP_F_F16(a0)
+ fsw f17, KVM_ARCH_FP_F_F17(a0)
+ fsw f18, KVM_ARCH_FP_F_F18(a0)
+ fsw f19, KVM_ARCH_FP_F_F19(a0)
+ fsw f20, KVM_ARCH_FP_F_F20(a0)
+ fsw f21, KVM_ARCH_FP_F_F21(a0)
+ fsw f22, KVM_ARCH_FP_F_F22(a0)
+ fsw f23, KVM_ARCH_FP_F_F23(a0)
+ fsw f24, KVM_ARCH_FP_F_F24(a0)
+ fsw f25, KVM_ARCH_FP_F_F25(a0)
+ fsw f26, KVM_ARCH_FP_F_F26(a0)
+ fsw f27, KVM_ARCH_FP_F_F27(a0)
+ fsw f28, KVM_ARCH_FP_F_F28(a0)
+ fsw f29, KVM_ARCH_FP_F_F29(a0)
+ fsw f30, KVM_ARCH_FP_F_F30(a0)
+ fsw f31, KVM_ARCH_FP_F_F31(a0)
+ sw t0, KVM_ARCH_FP_F_FCSR(a0)
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_d_save
+__kvm_riscv_fp_d_save:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ csrs CSR_SSTATUS, t1
+ frcsr t0
+ fsd f0, KVM_ARCH_FP_D_F0(a0)
+ fsd f1, KVM_ARCH_FP_D_F1(a0)
+ fsd f2, KVM_ARCH_FP_D_F2(a0)
+ fsd f3, KVM_ARCH_FP_D_F3(a0)
+ fsd f4, KVM_ARCH_FP_D_F4(a0)
+ fsd f5, KVM_ARCH_FP_D_F5(a0)
+ fsd f6, KVM_ARCH_FP_D_F6(a0)
+ fsd f7, KVM_ARCH_FP_D_F7(a0)
+ fsd f8, KVM_ARCH_FP_D_F8(a0)
+ fsd f9, KVM_ARCH_FP_D_F9(a0)
+ fsd f10, KVM_ARCH_FP_D_F10(a0)
+ fsd f11, KVM_ARCH_FP_D_F11(a0)
+ fsd f12, KVM_ARCH_FP_D_F12(a0)
+ fsd f13, KVM_ARCH_FP_D_F13(a0)
+ fsd f14, KVM_ARCH_FP_D_F14(a0)
+ fsd f15, KVM_ARCH_FP_D_F15(a0)
+ fsd f16, KVM_ARCH_FP_D_F16(a0)
+ fsd f17, KVM_ARCH_FP_D_F17(a0)
+ fsd f18, KVM_ARCH_FP_D_F18(a0)
+ fsd f19, KVM_ARCH_FP_D_F19(a0)
+ fsd f20, KVM_ARCH_FP_D_F20(a0)
+ fsd f21, KVM_ARCH_FP_D_F21(a0)
+ fsd f22, KVM_ARCH_FP_D_F22(a0)
+ fsd f23, KVM_ARCH_FP_D_F23(a0)
+ fsd f24, KVM_ARCH_FP_D_F24(a0)
+ fsd f25, KVM_ARCH_FP_D_F25(a0)
+ fsd f26, KVM_ARCH_FP_D_F26(a0)
+ fsd f27, KVM_ARCH_FP_D_F27(a0)
+ fsd f28, KVM_ARCH_FP_D_F28(a0)
+ fsd f29, KVM_ARCH_FP_D_F29(a0)
+ fsd f30, KVM_ARCH_FP_D_F30(a0)
+ fsd f31, KVM_ARCH_FP_D_F31(a0)
+ sw t0, KVM_ARCH_FP_D_FCSR(a0)
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_f_restore
+__kvm_riscv_fp_f_restore:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ lw t0, KVM_ARCH_FP_F_FCSR(a0)
+ csrs CSR_SSTATUS, t1
+ flw f0, KVM_ARCH_FP_F_F0(a0)
+ flw f1, KVM_ARCH_FP_F_F1(a0)
+ flw f2, KVM_ARCH_FP_F_F2(a0)
+ flw f3, KVM_ARCH_FP_F_F3(a0)
+ flw f4, KVM_ARCH_FP_F_F4(a0)
+ flw f5, KVM_ARCH_FP_F_F5(a0)
+ flw f6, KVM_ARCH_FP_F_F6(a0)
+ flw f7, KVM_ARCH_FP_F_F7(a0)
+ flw f8, KVM_ARCH_FP_F_F8(a0)
+ flw f9, KVM_ARCH_FP_F_F9(a0)
+ flw f10, KVM_ARCH_FP_F_F10(a0)
+ flw f11, KVM_ARCH_FP_F_F11(a0)
+ flw f12, KVM_ARCH_FP_F_F12(a0)
+ flw f13, KVM_ARCH_FP_F_F13(a0)
+ flw f14, KVM_ARCH_FP_F_F14(a0)
+ flw f15, KVM_ARCH_FP_F_F15(a0)
+ flw f16, KVM_ARCH_FP_F_F16(a0)
+ flw f17, KVM_ARCH_FP_F_F17(a0)
+ flw f18, KVM_ARCH_FP_F_F18(a0)
+ flw f19, KVM_ARCH_FP_F_F19(a0)
+ flw f20, KVM_ARCH_FP_F_F20(a0)
+ flw f21, KVM_ARCH_FP_F_F21(a0)
+ flw f22, KVM_ARCH_FP_F_F22(a0)
+ flw f23, KVM_ARCH_FP_F_F23(a0)
+ flw f24, KVM_ARCH_FP_F_F24(a0)
+ flw f25, KVM_ARCH_FP_F_F25(a0)
+ flw f26, KVM_ARCH_FP_F_F26(a0)
+ flw f27, KVM_ARCH_FP_F_F27(a0)
+ flw f28, KVM_ARCH_FP_F_F28(a0)
+ flw f29, KVM_ARCH_FP_F_F29(a0)
+ flw f30, KVM_ARCH_FP_F_F30(a0)
+ flw f31, KVM_ARCH_FP_F_F31(a0)
+ fscsr t0
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_d_restore
+__kvm_riscv_fp_d_restore:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ lw t0, KVM_ARCH_FP_D_FCSR(a0)
+ csrs CSR_SSTATUS, t1
+ fld f0, KVM_ARCH_FP_D_F0(a0)
+ fld f1, KVM_ARCH_FP_D_F1(a0)
+ fld f2, KVM_ARCH_FP_D_F2(a0)
+ fld f3, KVM_ARCH_FP_D_F3(a0)
+ fld f4, KVM_ARCH_FP_D_F4(a0)
+ fld f5, KVM_ARCH_FP_D_F5(a0)
+ fld f6, KVM_ARCH_FP_D_F6(a0)
+ fld f7, KVM_ARCH_FP_D_F7(a0)
+ fld f8, KVM_ARCH_FP_D_F8(a0)
+ fld f9, KVM_ARCH_FP_D_F9(a0)
+ fld f10, KVM_ARCH_FP_D_F10(a0)
+ fld f11, KVM_ARCH_FP_D_F11(a0)
+ fld f12, KVM_ARCH_FP_D_F12(a0)
+ fld f13, KVM_ARCH_FP_D_F13(a0)
+ fld f14, KVM_ARCH_FP_D_F14(a0)
+ fld f15, KVM_ARCH_FP_D_F15(a0)
+ fld f16, KVM_ARCH_FP_D_F16(a0)
+ fld f17, KVM_ARCH_FP_D_F17(a0)
+ fld f18, KVM_ARCH_FP_D_F18(a0)
+ fld f19, KVM_ARCH_FP_D_F19(a0)
+ fld f20, KVM_ARCH_FP_D_F20(a0)
+ fld f21, KVM_ARCH_FP_D_F21(a0)
+ fld f22, KVM_ARCH_FP_D_F22(a0)
+ fld f23, KVM_ARCH_FP_D_F23(a0)
+ fld f24, KVM_ARCH_FP_D_F24(a0)
+ fld f25, KVM_ARCH_FP_D_F25(a0)
+ fld f26, KVM_ARCH_FP_D_F26(a0)
+ fld f27, KVM_ARCH_FP_D_F27(a0)
+ fld f28, KVM_ARCH_FP_D_F28(a0)
+ fld f29, KVM_ARCH_FP_D_F29(a0)
+ fld f30, KVM_ARCH_FP_D_F30(a0)
+ fld f31, KVM_ARCH_FP_D_F31(a0)
+ fscsr t0
+ csrw CSR_SSTATUS, t2
+ ret
+#endif