From 2def950c63e3f976af87a2606dabe0c9e21c605b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 8 Feb 2023 15:01:03 +0100 Subject: KVM: arm64: Limit length in kvm_vm_ioctl_mte_copy_tags() to INT_MAX In case of success, this function returns the amount of handled bytes. However, this does not work for large values: The function is called from kvm_arch_vm_ioctl() (which still returns a long), which in turn is called from kvm_vm_ioctl() in virt/kvm/kvm_main.c. And that function stores the return value in an "int r" variable. So the upper 32-bits of the "long" return value are lost there. KVM ioctl functions should only return "int" values, so let's limit the amount of bytes that can be requested here to INT_MAX to avoid the problem with the truncated return value. We can then also change the return type of the function to "int" to make it clearer that it is not possible to return a "long" here. Fixes: f0376edb1ddc ("KVM: arm64: Add ioctl to fetch/store tags in a guest") Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Reviewed-by: Gavin Shan Reviewed-by: Steven Price Message-Id: <20230208140105.655814-5-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/guest.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07444fa22888..26a2ebc465ea 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1019,8 +1019,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } -long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, - struct kvm_arm_copy_mte_tags *copy_tags) +int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) { gpa_t guest_ipa = copy_tags->guest_ipa; size_t length = copy_tags->length; @@ -1041,6 +1041,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) return -EINVAL; + /* Lengths above INT_MAX cannot be represented in the return value */ + if (length > INT_MAX) + return -EINVAL; + gfn = gpa_to_gfn(guest_ipa); mutex_lock(&kvm->slots_lock); -- cgit v1.2.3 From d8708b80fa0e6e21bc0c9e7276ad0bccef73b6e7 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 8 Feb 2023 15:01:05 +0100 Subject: KVM: Change return type of kvm_arch_vm_ioctl() to "int" All kvm_arch_vm_ioctl() implementations now only deal with "int" types as return values, so we can change the return type of these functions to use "int" instead of "long". Signed-off-by: Thomas Huth Acked-by: Anup Patel Message-Id: <20230208140105.655814-7-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/arm.c | 3 +-- arch/mips/kvm/mips.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 5 ++--- arch/riscv/kvm/vm.c | 3 +-- arch/s390/kvm/kvm-s390.c | 3 +-- arch/x86/kvm/x86.c | 3 +-- include/linux/kvm_host.h | 3 +-- 7 files changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..a43e1cb3b7e9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1439,8 +1439,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, } } -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 36c8991b5d39..884be4ef99dc 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -993,9 +993,9 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, kvm_flush_remote_tlbs(kvm); } -long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - long r; + int r; switch (ioctl) { default: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c5405fc5538..c0bac9cf2d87 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -2371,12 +2371,11 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) } #endif -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm __maybe_unused = filp->private_data; void __user *argp = (void __user *)arg; - long r; + int r; switch (ioctl) { case KVM_PPC_GET_PVINFO: { diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 65a964d7e70d..c13130ab459a 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -87,8 +87,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { return -EINVAL; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7039abda6d49..4c3edccb9911 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2898,8 +2898,7 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) } } -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4282daeaee84..237c483b1230 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6667,8 +6667,7 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp) return 0; } -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8ada23756b0e..90edc16d37e5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1397,8 +1397,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg); +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -- cgit v1.2.3 From 0acc7239c20a8401b8968c2adace8f7c9b0295ae Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:44 +0000 Subject: KVM: arm64: Avoid vcpu->mutex v. kvm->lock inversion in CPU_ON KVM/arm64 had the lock ordering backwards on vcpu->mutex and kvm->lock from the very beginning. One such example is the way vCPU resets are handled: the kvm->lock is acquired while handling a guest CPU_ON PSCI call. Add a dedicated lock to serialize writes to kvm_vcpu_arch::{mp_state, reset_state}. Promote all accessors of mp_state to {READ,WRITE}_ONCE() as readers do not acquire the mp_state_lock. While at it, plug yet another race by taking the mp_state_lock in the KVM_SET_MP_STATE ioctl handler. As changes to MP state are now guarded with a dedicated lock, drop the kvm->lock acquisition from the PSCI CPU_ON path. Similarly, move the reader of reset_state outside of the kvm->lock and instead protect it with the mp_state_lock. Note that writes to reset_state::reset have been demoted to regular stores as both readers and writers acquire the mp_state_lock. While the kvm->lock inversion still exists in kvm_reset_vcpu(), at least now PSCI CPU_ON no longer depends on it for serializing vCPU reset. Cc: stable@vger.kernel.org Tested-by: Jeremy Linton Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-2-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 31 ++++++++++++++++++++++--------- arch/arm64/kvm/psci.c | 28 ++++++++++++++++------------ arch/arm64/kvm/reset.c | 9 +++++---- 4 files changed, 44 insertions(+), 25 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bcd774d74f34..917586237a4d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -522,6 +522,7 @@ struct kvm_vcpu_arch { /* vcpu power state */ struct kvm_mp_state mp_state; + spinlock_t mp_state_lock; /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..647798da8c41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -326,6 +326,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err; + spin_lock_init(&vcpu->arch.mp_state_lock); + /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); @@ -443,34 +445,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) +static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_arm_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; } static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) { - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED); kvm_make_request(KVM_REQ_SUSPEND, vcpu); kvm_vcpu_kick(vcpu); } static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED; } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - *mp_state = vcpu->arch.mp_state; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -480,12 +489,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.mp_state = *mp_state; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_arm_vcpu_power_off(vcpu); + __kvm_arm_vcpu_power_off(vcpu); break; case KVM_MP_STATE_SUSPENDED: kvm_arm_vcpu_suspend(vcpu); @@ -494,6 +505,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } @@ -1213,7 +1226,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) kvm_arm_vcpu_power_off(vcpu); else - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); return 0; } diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 7fbc4c1b9df0..5767e6baa61a 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct vcpu_reset_state *reset_state; struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; + int ret = PSCI_RET_SUCCESS; unsigned long cpu_id; cpu_id = smccc_get_arg1(source_vcpu); @@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; + + spin_lock(&vcpu->arch.mp_state_lock); if (!kvm_arm_vcpu_stopped(vcpu)) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) - return PSCI_RET_ALREADY_ON; + ret = PSCI_RET_ALREADY_ON; else - return PSCI_RET_INVALID_PARAMS; + ret = PSCI_RET_INVALID_PARAMS; + + goto out_unlock; } reset_state = &vcpu->arch.reset_state; @@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ reset_state->r0 = smccc_get_arg3(source_vcpu); - WRITE_ONCE(reset_state->reset, true); + reset_state->reset = true; kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); /* @@ -108,7 +113,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; kvm_vcpu_wake_up(vcpu); - return PSCI_RET_SUCCESS; +out_unlock: + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) @@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) * after this call is handled and before the VCPUs have been * re-initialized. */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&tmp->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&tmp->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); @@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; int ret = 1; @@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) kvm_psci_narrow_to_32bit(vcpu); fallthrough; case PSCI_0_2_FN64_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: kvm_psci_narrow_to_32bit(vcpu); @@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; @@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; default: val = PSCI_RET_NOT_SUPPORTED; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 49a3257dec46..9e023546bde0 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -264,15 +264,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->kvm->lock); ret = kvm_set_vm_width(vcpu); - if (!ret) { - reset_state = vcpu->arch.reset_state; - WRITE_ONCE(vcpu->arch.reset_state.reset, false); - } mutex_unlock(&vcpu->kvm->lock); if (ret) return ret; + spin_lock(&vcpu->arch.mp_state_lock); + reset_state = vcpu->arch.reset_state; + vcpu->arch.reset_state.reset = false; + spin_unlock(&vcpu->arch.mp_state_lock); + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); -- cgit v1.2.3 From c43120afb5c66a3465c7468f5cf9806a26484cde Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:45 +0000 Subject: KVM: arm64: Avoid lock inversion when setting the VM register width kvm->lock must be taken outside of the vcpu->mutex. Of course, the locking documentation for KVM makes this abundantly clear. Nonetheless, the locking order in KVM/arm64 has been wrong for quite a while; we acquire the kvm->lock while holding the vcpu->mutex all over the shop. All was seemingly fine until commit 42a90008f890 ("KVM: Ensure lockdep knows about kvm->lock vs. vcpu->mutex ordering rule") caught us with our pants down, leading to lockdep barfing: ====================================================== WARNING: possible circular locking dependency detected 6.2.0-rc7+ #19 Not tainted ------------------------------------------------------ qemu-system-aar/859 is trying to acquire lock: ffff5aa69269eba0 (&host_kvm->lock){+.+.}-{3:3}, at: kvm_reset_vcpu+0x34/0x274 but task is already holding lock: ffff5aa68768c0b8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x8c/0xba0 which lock already depends on the new lock. Add a dedicated lock to serialize writes to VM-scoped configuration from the context of a vCPU. Protect the register width flags with the new lock, thus avoiding the need to grab the kvm->lock while holding vcpu->mutex in kvm_reset_vcpu(). Cc: stable@vger.kernel.org Reported-by: Jeremy Linton Link: https://lore.kernel.org/kvmarm/f6452cdd-65ff-34b8-bab0-5c06416da5f6@arm.com/ Tested-by: Jeremy Linton Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/arm.c | 18 ++++++++++++++++++ arch/arm64/kvm/reset.c | 6 +++--- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 917586237a4d..cd1ef8716719 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -199,6 +199,9 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; + /* Protects VM-scoped configuration data */ + struct mutex config_lock; + /* * If we encounter a data abort without valid instruction syndrome * information, report this to user space. User space can (and diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 647798da8c41..1620ec3d95ef 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -128,6 +128,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; + mutex_init(&kvm->arch.config_lock); + +#ifdef CONFIG_LOCKDEP + /* Clue in lockdep that the config_lock must be taken inside kvm->lock */ + mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); + mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->lock); +#endif + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) return ret; @@ -328,6 +338,14 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.mp_state_lock); +#ifdef CONFIG_LOCKDEP + /* Inform lockdep that the config_lock is acquired after vcpu->mutex */ + mutex_lock(&vcpu->mutex); + mutex_lock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->mutex); +#endif + /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 9e023546bde0..b5dee8e57e77 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -205,7 +205,7 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu) is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.config_lock); if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) { /* @@ -262,9 +262,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) bool loaded; u32 pstate; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_set_vm_width(vcpu); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); if (ret) return ret; -- cgit v1.2.3 From 4bba7f7def6f278266dadf845da472cfbfed784e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:46 +0000 Subject: KVM: arm64: Use config_lock to protect data ordered against KVM_RUN There are various bits of VM-scoped data that can only be configured before the first call to KVM_RUN, such as the hypercall bitmaps and the PMU. As these fields are protected by the kvm->lock and accessed while holding vcpu->mutex, this is yet another example of lock inversion. Change out the kvm->lock for kvm->arch.config_lock in all of these instances. Opportunistically simplify the locking mechanics of the PMU configuration by holding the config_lock for the entirety of kvm_arm_pmu_v3_set_attr(). Note that this also addresses a couple of bugs. There is an unguarded read of the PMU version in KVM_ARM_VCPU_PMU_V3_FILTER which could race with KVM_ARM_VCPU_PMU_V3_SET_PMU. Additionally, until now writes to the per-vCPU vPMU irq were not serialized VM-wide, meaning concurrent calls to KVM_ARM_VCPU_PMU_V3_IRQ could lead to a false positive in pmu_irq_is_valid(). Cc: stable@vger.kernel.org Tested-by: Jeremy Linton Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-4-oliver.upton@linux.dev --- arch/arm64/kvm/arm.c | 4 ++-- arch/arm64/kvm/guest.c | 2 ++ arch/arm64/kvm/hypercalls.c | 4 ++-- arch/arm64/kvm/pmu-emul.c | 23 ++++++----------------- 4 files changed, 12 insertions(+), 21 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1620ec3d95ef..fd8d355aca15 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -624,9 +624,9 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (kvm_vm_is_protected(kvm)) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07444fa22888..481c79cf22cd 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -957,7 +957,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_ARM_VCPU_PMU_V3_CTRL: + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_set_attr(vcpu, attr); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5da884e11337..fbdbf4257f76 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -377,7 +377,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) if (val & ~fw_reg_features) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) && val != *fw_reg_bmap) { @@ -387,7 +387,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) WRITE_ONCE(*fw_reg_bmap, val); out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 24908400e190..240168416838 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -874,7 +874,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) struct arm_pmu *arm_pmu; int ret = -ENXIO; - mutex_lock(&kvm->lock); + lockdep_assert_held(&kvm->arch.config_lock); mutex_lock(&arm_pmus_lock); list_for_each_entry(entry, &arm_pmus, entry) { @@ -894,7 +894,6 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) } mutex_unlock(&arm_pmus_lock); - mutex_unlock(&kvm->lock); return ret; } @@ -902,22 +901,20 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { struct kvm *kvm = vcpu->kvm; + lockdep_assert_held(&kvm->arch.config_lock); + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) return -EBUSY; - mutex_lock(&kvm->lock); if (!kvm->arch.arm_pmu) { /* No PMU set, get the default one */ kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); - if (!kvm->arch.arm_pmu) { - mutex_unlock(&kvm->lock); + if (!kvm->arch.arm_pmu) return -ENODEV; - } } - mutex_unlock(&kvm->lock); switch (attr->attr) { case KVM_ARM_VCPU_PMU_V3_IRQ: { @@ -961,19 +958,13 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) filter.action != KVM_PMU_EVENT_DENY)) return -EINVAL; - mutex_lock(&kvm->lock); - - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { - mutex_unlock(&kvm->lock); + if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) return -EBUSY; - } if (!kvm->arch.pmu_filter) { kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); - if (!kvm->arch.pmu_filter) { - mutex_unlock(&kvm->lock); + if (!kvm->arch.pmu_filter) return -ENOMEM; - } /* * The default depends on the first applied filter. @@ -992,8 +983,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) else bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); - mutex_unlock(&kvm->lock); - return 0; } case KVM_ARM_VCPU_PMU_V3_SET_PMU: { -- cgit v1.2.3 From f00327731131d1b5aa6a1aa9f50bcf8d620ace4c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:47 +0000 Subject: KVM: arm64: Use config_lock to protect vgic state Almost all of the vgic state is VM-scoped but accessed from the context of a vCPU. These accesses were serialized on the kvm->lock which cannot be nested within a vcpu->mutex critical section. Move over the vgic state to using the config_lock. Tweak the lock ordering where necessary to ensure that the config_lock is acquired after the vcpu->mutex. Acquire the config_lock in kvm_vgic_create() to avoid a race between the converted flows and GIC creation. Where necessary, continue to acquire kvm->lock to avoid a race with vCPU creation (i.e. flows that use lock_all_vcpus()). Finally, promote the locking expectations in comments to lockdep assertions and update the locking documentation for the config_lock as well as vcpu->mutex. Cc: stable@vger.kernel.org Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-5-oliver.upton@linux.dev --- arch/arm64/kvm/vgic/vgic-debug.c | 8 +++--- arch/arm64/kvm/vgic/vgic-init.c | 36 +++++++++++++++++---------- arch/arm64/kvm/vgic/vgic-its.c | 18 +++++++++----- arch/arm64/kvm/vgic/vgic-kvm-device.c | 47 +++++++++++++++++++++-------------- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 4 +-- arch/arm64/kvm/vgic/vgic-mmio.c | 12 ++++----- arch/arm64/kvm/vgic/vgic-v4.c | 11 ++++---- arch/arm64/kvm/vgic/vgic.c | 12 +++++---- 8 files changed, 88 insertions(+), 60 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 78cde687383c..07aa0437125a 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -85,7 +85,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) struct kvm *kvm = s->private; struct vgic_state_iter *iter; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; if (iter) { iter = ERR_PTR(-EBUSY); @@ -104,7 +104,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) if (end_of_vgic(iter)) iter = NULL; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return iter; } @@ -132,12 +132,12 @@ static void vgic_debug_stop(struct seq_file *s, void *v) if (IS_ERR(v)) return; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; kfree(iter->lpi_array); kfree(iter); kvm->arch.vgic.iter = NULL; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index cd134db41a57..9d42c7cb2b58 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -74,9 +74,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) unsigned long i; int ret; - if (irqchip_in_kernel(kvm)) - return -EEXIST; - /* * This function is also called by the KVM_CREATE_IRQCHIP handler, * which had no chance yet to check the availability of the GICv2 @@ -87,10 +84,20 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) !kvm_vgic_global_state.can_emulate_gicv2) return -ENODEV; + /* Must be held to avoid race with vCPU creation */ + lockdep_assert_held(&kvm->lock); + ret = -EBUSY; if (!lock_all_vcpus(kvm)) return ret; + mutex_lock(&kvm->arch.config_lock); + + if (irqchip_in_kernel(kvm)) { + ret = -EEXIST; + goto out_unlock; + } + kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu_has_run_once(vcpu)) goto out_unlock; @@ -118,6 +125,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); out_unlock: + mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); return ret; } @@ -227,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * KVM io device for the redistributor that belongs to this VCPU. */ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); ret = vgic_register_redist_iodev(vcpu); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } return ret; } @@ -250,7 +258,6 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) * The function is generally called when nr_spis has been explicitly set * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. * vgic_initialized() returns true when this function has succeeded. - * Must be called with kvm->lock held! */ int vgic_init(struct kvm *kvm) { @@ -259,6 +266,8 @@ int vgic_init(struct kvm *kvm) int ret = 0, i; unsigned long idx; + lockdep_assert_held(&kvm->arch.config_lock); + if (vgic_initialized(kvm)) return 0; @@ -373,12 +382,13 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; } -/* To be called with kvm->lock held */ static void __kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) @@ -389,9 +399,9 @@ static void __kvm_vgic_destroy(struct kvm *kvm) void kvm_vgic_destroy(struct kvm *kvm) { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); __kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } /** @@ -414,9 +424,9 @@ int vgic_lazy_init(struct kvm *kvm) if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) return -EBUSY; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } return ret; @@ -441,7 +451,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (likely(vgic_ready(kvm))) return 0; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); if (vgic_ready(kvm)) goto out; @@ -459,7 +469,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) dist->ready = true; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2642e9ce2819..7713cd06104e 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2045,6 +2045,13 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { ret = -ENXIO; goto out; @@ -2058,11 +2065,6 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, goto out; } - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - addr = its->vgic_its_base + offset; len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; @@ -2076,8 +2078,9 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, } else { *reg = region->its_read(dev->kvm, its, addr, len); } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return ret; } @@ -2757,6 +2760,8 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return -EBUSY; } + mutex_lock(&kvm->arch.config_lock); + switch (attr) { case KVM_DEV_ARM_ITS_CTRL_RESET: vgic_its_reset(kvm, its); @@ -2769,6 +2774,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; } + mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index edeac2380591..07e727023deb 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -46,7 +46,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev struct vgic_dist *vgic = &kvm->arch.vgic; int r; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -68,7 +68,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev r = -ENODEV; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return r; } @@ -102,7 +102,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri if (get_user(addr, uaddr)) return -EFAULT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); switch (attr->attr) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -191,7 +191,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri } out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); if (!r && !write) r = put_user(addr, uaddr); @@ -227,7 +227,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, (val & 31)) return -EINVAL; - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) ret = -EBUSY; @@ -235,16 +235,16 @@ static int vgic_set_common_attr(struct kvm_device *dev, dev->kvm->arch.vgic.nr_spis = val - VGIC_NR_PRIVATE_IRQS; - mutex_unlock(&dev->kvm->lock); + mutex_unlock(&dev->kvm->arch.config_lock); return ret; } case KVM_DEV_ARM_VGIC_GRP_CTRL: { switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); r = vgic_init(dev->kvm); - mutex_unlock(&dev->kvm->lock); + mutex_unlock(&dev->kvm->arch.config_lock); return r; case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: /* @@ -260,7 +260,10 @@ static int vgic_set_common_attr(struct kvm_device *dev, mutex_unlock(&dev->kvm->lock); return -EBUSY; } + + mutex_lock(&dev->kvm->arch.config_lock); r = vgic_v3_save_pending_tables(dev->kvm); + mutex_unlock(&dev->kvm->arch.config_lock); unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return r; @@ -411,15 +414,17 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + ret = vgic_init(dev->kvm); if (ret) goto out; - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val); @@ -432,8 +437,9 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && !is_write) @@ -569,12 +575,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (unlikely(!vgic_initialized(dev->kvm))) { - ret = -EBUSY; - goto out; + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; } - if (!lock_all_vcpus(dev->kvm)) { + mutex_lock(&dev->kvm->arch.config_lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { ret = -EBUSY; goto out; } @@ -609,8 +617,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && uaccess && !is_write) { diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 91201f743033..472b18ac92a2 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -111,7 +111,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, case GICD_CTLR: { bool was_enabled, is_hwsgi; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); was_enabled = dist->enabled; is_hwsgi = dist->nassgireq; @@ -139,7 +139,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, else if (!was_enabled && dist->enabled) vgic_kick_vcpus(vcpu->kvm); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; } case GICD_TYPER: diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index e67b3b2c8044..1939c94e0b24 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -530,13 +530,13 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 val; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); val = __vgic_mmio_read_active(vcpu, addr, len); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); return val; } @@ -625,13 +625,13 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_cactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, @@ -662,13 +662,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_sactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index a413718be92b..3bb003478060 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -232,9 +232,8 @@ int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) * @kvm: Pointer to the VM being initialized * * We may be called each time a vITS is created, or when the - * vgic is initialized. This relies on kvm->lock to be - * held. In both cases, the number of vcpus should now be - * fixed. + * vgic is initialized. In both cases, the number of vcpus + * should now be fixed. */ int vgic_v4_init(struct kvm *kvm) { @@ -243,6 +242,8 @@ int vgic_v4_init(struct kvm *kvm) int nr_vcpus, ret; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!kvm_vgic_global_state.has_gicv4) return 0; /* Nothing to see here... move along. */ @@ -309,14 +310,14 @@ int vgic_v4_init(struct kvm *kvm) /** * vgic_v4_teardown - Free the GICv4 data structures * @kvm: Pointer to the VM being destroyed - * - * Relies on kvm->lock to be held. */ void vgic_v4_teardown(struct kvm *kvm) { struct its_vm *its_vm = &kvm->arch.vgic.its_vm; int i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!its_vm->vpes) return; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d97e6080b421..0a005da83ae6 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -24,11 +24,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { /* * Locking order is always: * kvm->lock (mutex) - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock must be taken with IRQs disabled - * kvm->lpi_list_lock must be taken with IRQs disabled - * vgic_irq->irq_lock must be taken with IRQs disabled + * vcpu->mutex (mutex) + * kvm->arch.config_lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, * we have to disable IRQs before taking this lock and everything lower -- cgit v1.2.3 From 0d0ae656b71155ccc0be9388beef77a1f7e7558e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:41 +0100 Subject: KVM: arm64: timers: Use a per-vcpu, per-timer accumulator for fractional ns Instead of accumulating the fractional ns value generated every time we compute a ns delta in a global variable, use a per-vcpu, per-timer variable. This keeps the fractional ns local to the timer instead of contributing to any odd, unrelated timer. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-2-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 2 +- include/kvm/arm_arch_timer.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e1af4301b913..9515c645f03d 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -212,7 +212,7 @@ static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, ns = cyclecounter_cyc2ns(timecounter->cc, val - now, timecounter->mask, - &timecounter->frac); + &timer_ctx->ns_frac); return ns; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c52a6e6839da..70d47c4adc6a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -44,6 +44,7 @@ struct arch_timer_context { /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; + u64 ns_frac; /* Offset for this counter/timer */ struct arch_timer_offset offset; -- cgit v1.2.3 From 2b4825a8694018901e641ccc2eafd0fff58d1415 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:44 +0100 Subject: KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer With ECV and CNTPOFF_EL2, it is very easy to offer an offset for the physical timer. So let's do just that. Nothing can set the offset yet, so this should have no effect whatsoever (famous last words...). Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-5-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 18 +++++++++++++++++- arch/arm64/kvm/hypercalls.c | 2 +- include/clocksource/arm_arch_timer.h | 1 + include/kvm/arm_arch_timer.h | 2 ++ 4 files changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 9515c645f03d..3118ea0a1b41 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -52,6 +52,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +static bool has_cntpoff(void) +{ + return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); +} + u32 timer_get_ctl(struct arch_timer_context *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; @@ -84,7 +89,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static u64 timer_get_offset(struct arch_timer_context *ctxt) { - if (ctxt->offset.vm_offset) + if (ctxt && ctxt->offset.vm_offset) return *ctxt->offset.vm_offset; return 0; @@ -432,6 +437,12 @@ static void set_cntvoff(u64 cntvoff) kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); } +static void set_cntpoff(u64 cntpoff) +{ + if (has_cntpoff()) + write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); +} + static void timer_save_state(struct arch_timer_context *ctx) { struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); @@ -480,6 +491,7 @@ static void timer_save_state(struct arch_timer_context *ctx) write_sysreg_el0(0, SYS_CNTP_CTL); isb(); + set_cntpoff(0); break; case NR_KVM_TIMERS: BUG(); @@ -550,6 +562,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: + set_cntpoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); @@ -767,6 +780,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vtimer->vcpu = vcpu; vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset; ptimer->vcpu = vcpu; + ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; /* Synchronize cntvoff across all vtimers of a VM. */ timer_set_offset(vtimer, kvm_phys_timer_read()); @@ -1297,6 +1311,8 @@ void kvm_timer_init_vhe(void) val = read_sysreg(cnthctl_el2); val |= (CNTHCTL_EL1PCEN << cnthctl_shift); val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); + if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) + val |= CNTHCTL_ECV; write_sysreg(val, cnthctl_el2); } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5da884e11337..39a4707e081d 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset; break; case KVM_PTP_PHYS_COUNTER: - cycles = systime_snapshot.cycles; + cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset; break; default: return; diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 057c8964aefb..cbbc9a6dc571 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -21,6 +21,7 @@ #define CNTHCTL_EVNTEN (1 << 2) #define CNTHCTL_EVNTDIR (1 << 3) #define CNTHCTL_EVNTI (0xF << 4) +#define CNTHCTL_ECV (1 << 12) enum arch_timer_reg { ARCH_TIMER_REG_CTRL, diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 70d47c4adc6a..2dd0fd2406fb 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -34,6 +34,8 @@ struct arch_timer_offset { struct arch_timer_vm_data { /* Offset applied to the virtual timer/counter */ u64 voffset; + /* Offset applied to the physical timer/counter */ + u64 poffset; }; struct arch_timer_context { -- cgit v1.2.3 From c605ee245097d02ed5933e63ac601a8571712457 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:45 +0100 Subject: KVM: arm64: timers: Allow physical offset without CNTPOFF_EL2 CNTPOFF_EL2 is awesome, but it is mostly vapourware, and no publicly available implementation has it. So for the common mortals, let's implement the emulated version of this thing. It means trapping accesses to the physical counter and timer, and emulate some of it as necessary. As for CNTPOFF_EL2, nobody sets the offset yet. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-6-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 2 + arch/arm64/kvm/arch_timer.c | 98 +++++++++++++++++++++++++++++--------- arch/arm64/kvm/hyp/nvhe/timer-sr.c | 18 ++++--- arch/arm64/kvm/sys_regs.c | 9 ++++ 4 files changed, 98 insertions(+), 29 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e3ecba3c4e6..f8da9e1b0c11 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -388,6 +388,7 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) @@ -400,6 +401,7 @@ #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 3118ea0a1b41..bb64a71ae193 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -458,6 +458,8 @@ static void timer_save_state(struct arch_timer_context *ctx) goto out; switch (index) { + u64 cval; + case TIMER_VTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); @@ -485,7 +487,12 @@ static void timer_save_state(struct arch_timer_context *ctx) break; case TIMER_PTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); - timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); + cval = read_sysreg_el0(SYS_CNTP_CVAL); + + if (!has_cntpoff()) + cval -= timer_get_offset(ctx); + + timer_set_cval(ctx, cval); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTP_CTL); @@ -555,6 +562,8 @@ static void timer_restore_state(struct arch_timer_context *ctx) goto out; switch (index) { + u64 cval, offset; + case TIMER_VTIMER: set_cntvoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); @@ -562,8 +571,12 @@ static void timer_restore_state(struct arch_timer_context *ctx) write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: - set_cntpoff(timer_get_offset(ctx)); - write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); + cval = timer_get_cval(ctx); + offset = timer_get_offset(ctx); + set_cntpoff(offset); + if (!has_cntpoff()) + cval += offset; + write_sysreg_el0(cval, SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); break; @@ -634,6 +647,61 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } +/* If _pred is true, set bit in _set, otherwise set it in _clr */ +#define assign_clear_set_bit(_pred, _bit, _clr, _set) \ + do { \ + if (_pred) \ + (_set) |= (_bit); \ + else \ + (_clr) |= (_bit); \ + } while (0) + +static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) +{ + bool tpt, tpc; + u64 clr, set; + + /* + * No trapping gets configured here with nVHE. See + * __timer_enable_traps(), which is where the stuff happens. + */ + if (!has_vhe()) + return; + + /* + * Our default policy is not to trap anything. As we progress + * within this function, reality kicks in and we start adding + * traps based on emulation requirements. + */ + tpt = tpc = false; + + /* + * We have two possibility to deal with a physical offset: + * + * - Either we have CNTPOFF (yay!) or the offset is 0: + * we let the guest freely access the HW + * + * - or neither of these condition apply: + * we trap accesses to the HW, but still use it + * after correcting the physical offset + */ + if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) + tpt = tpc = true; + + /* + * Now that we have collected our requirements, compute the + * trap and enable bits. + */ + set = 0; + clr = 0; + + assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); + assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); + + /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */ + sysreg_clear_set(cntkctl_el1, clr, set); +} + void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -657,9 +725,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) timer_restore_state(map.direct_vtimer); if (map.direct_ptimer) timer_restore_state(map.direct_ptimer); - if (map.emul_ptimer) timer_emulate(map.emul_ptimer); + + timer_set_traps(vcpu, &map); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) @@ -1292,28 +1361,11 @@ no_vgic: return 0; } -/* - * On VHE system, we only need to configure the EL2 timer trap register once, - * not for every world switch. - * The host kernel runs at EL2 with HCR_EL2.TGE == 1, - * and this makes those bits have no effect for the host kernel execution. - */ +/* If we have CNTPOFF, permanently set ECV to enable it */ void kvm_timer_init_vhe(void) { - /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ - u32 cnthctl_shift = 10; - u64 val; - - /* - * VHE systems allow the guest direct access to the EL1 physical - * timer/counter. - */ - val = read_sysreg(cnthctl_el2); - val |= (CNTHCTL_EL1PCEN << cnthctl_shift); - val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) - val |= CNTHCTL_ECV; - write_sysreg(val, cnthctl_el2); + sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); } static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index 9072e71693ba..b185ac0dbd47 100644 --- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -9,6 +9,7 @@ #include #include +#include void __kvm_timer_set_cntvoff(u64 cntvoff) { @@ -35,14 +36,19 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu) */ void __timer_enable_traps(struct kvm_vcpu *vcpu) { - u64 val; + u64 clr = 0, set = 0; /* * Disallow physical timer access for the guest - * Physical counter access is allowed + * Physical counter access is allowed if no offset is enforced + * or running protected (we don't offset anything in this case). */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); + clr = CNTHCTL_EL1PCEN; + if (is_protected_kvm_enabled() || + !kern_hyp_va(vcpu->kvm)->arch.timer_data.poffset) + set |= CNTHCTL_EL1PCTEN; + else + clr |= CNTHCTL_EL1PCTEN; + + sysreg_clear_set(cnthctl_el2, clr, set); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 53749d3a0996..be7c2598e563 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1139,6 +1139,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, tmr = TIMER_PTIMER; treg = TIMER_REG_CVAL; break; + case SYS_CNTPCT_EL0: + case SYS_CNTPCTSS_EL0: + case SYS_AARCH32_CNTPCT: + tmr = TIMER_PTIMER; + treg = TIMER_REG_CNT; + break; default: print_sys_reg_msg(p, "%s", "Unhandled trapped timer register"); kvm_inject_undefined(vcpu); @@ -2075,6 +2081,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), + { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, @@ -2525,6 +2533,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, { CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ + { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ -- cgit v1.2.3 From 96906a9150a86a86b0464939625279b8e19f6e88 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:46 +0100 Subject: KVM: arm64: Expose {un,}lock_all_vcpus() to the rest of KVM Being able to lock/unlock all vcpus in one go is a feature that only the vgic has enjoyed so far. Let's be brave and expose it to the world. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-7-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/arm.c | 43 +++++++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic-kvm-device.c | 38 ------------------------------- arch/arm64/kvm/vgic/vgic.h | 3 --- 4 files changed, 46 insertions(+), 41 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bcd774d74f34..002a10cbade2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -922,6 +922,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); +bool lock_all_vcpus(struct kvm *kvm); +void unlock_all_vcpus(struct kvm *kvm); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..ae5110cc3bad 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1484,6 +1484,49 @@ long kvm_arch_vm_ioctl(struct file *filp, } } +/* unlocks vcpus from @vcpu_lock_idx and smaller */ +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) +{ + struct kvm_vcpu *tmp_vcpu; + + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&tmp_vcpu->mutex); + } +} + +void unlock_all_vcpus(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->lock); + + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); +} + +/* Returns true if all vcpus were locked, false otherwise */ +bool lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *tmp_vcpu; + unsigned long c; + + lockdep_assert_held(&kvm->lock); + + /* + * Any time a vcpu is in an ioctl (including running), the + * core KVM code tries to grab the vcpu->mutex. + * + * By grabbing the vcpu->mutex of all VCPUs we ensure that no + * other VCPUs can fiddle with the state while we access it. + */ + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { + if (!mutex_trylock(&tmp_vcpu->mutex)) { + unlock_vcpus(kvm, c - 1); + return false; + } + } + + return true; +} + static unsigned long nvhe_percpu_size(void) { return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index edeac2380591..04dd68835b3f 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -342,44 +342,6 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, return 0; } -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run and fiddle with the vgic state while we - * access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - /** * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state * diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 7f7f3c5ed85a..f9923beedd27 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -273,9 +273,6 @@ int vgic_init(struct kvm *kvm); void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); -bool lock_all_vcpus(struct kvm *kvm); -void unlock_all_vcpus(struct kvm *kvm); - static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; -- cgit v1.2.3 From 30ec7997d175cd689fc61bfc4059f4d35b11858c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:47 +0100 Subject: KVM: arm64: timers: Allow userspace to set the global counter offset And this is the moment you have all been waiting for: setting the counter offset from userspace. We expose a brand new capability that reports the ability to set the offset for both the virtual and physical sides. In keeping with the architecture, the offset is expressed as a delta that is substracted from the physical counter value. Once this new API is used, there is no going back, and the counters cannot be written to to set the offsets implicitly (the writes are instead ignored). Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-8-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 +++ arch/arm64/include/uapi/asm/kvm.h | 9 +++++++ arch/arm64/kvm/arch_timer.c | 54 +++++++++++++++++++++++++++++++++++---- arch/arm64/kvm/arm.c | 8 ++++++ include/uapi/linux/kvm.h | 3 +++ 5 files changed, 73 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 002a10cbade2..116233a390e9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -221,6 +221,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_EL1_32BIT 4 /* PSCI SYSTEM_SUSPEND enabled for the guest */ #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 + /* VM counter offset */ +#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 unsigned long flags; @@ -1010,6 +1012,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, struct kvm_arm_copy_mte_tags *copy_tags); +int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + struct kvm_arm_counter_offset *offset); /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f8129c624b07..12fb0d8a760a 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */ +struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; +}; + #define KVM_ARM_TAGS_TO_GUEST 0 #define KVM_ARM_TAGS_FROM_GUEST 1 diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index bb64a71ae193..771504c79711 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -851,9 +851,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) ptimer->vcpu = vcpu; ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; - /* Synchronize cntvoff across all vtimers of a VM. */ - timer_set_offset(vtimer, kvm_phys_timer_read()); - timer_set_offset(ptimer, 0); + /* Synchronize offsets across timers of a VM if not already provided */ + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { + timer_set_offset(vtimer, kvm_phys_timer_read()); + timer_set_offset(ptimer, 0); + } hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; @@ -897,8 +899,11 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; case KVM_REG_ARM_TIMER_CNT: - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, + &vcpu->kvm->arch.flags)) { + timer = vcpu_vtimer(vcpu); + timer_set_offset(timer, kvm_phys_timer_read() - value); + } break; case KVM_REG_ARM_TIMER_CVAL: timer = vcpu_vtimer(vcpu); @@ -908,6 +913,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) timer = vcpu_ptimer(vcpu); kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; + case KVM_REG_ARM_PTIMER_CNT: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, + &vcpu->kvm->arch.flags)) { + timer = vcpu_ptimer(vcpu); + timer_set_offset(timer, kvm_phys_timer_read() - value); + } + break; case KVM_REG_ARM_PTIMER_CVAL: timer = vcpu_ptimer(vcpu); kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); @@ -1443,3 +1455,35 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } + +int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + struct kvm_arm_counter_offset *offset) +{ + int ret = 0; + + if (offset->reserved) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (lock_all_vcpus(kvm)) { + set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); + + /* + * If userspace decides to set the offset using this + * API rather than merely restoring the counter + * values, the offset applies to both the virtual and + * physical views. + */ + kvm->arch.timer_data.voffset = offset->counter_offset; + kvm->arch.timer_data.poffset = offset->counter_offset; + + unlock_all_vcpus(kvm); + } else { + ret = -EBUSY; + } + + mutex_unlock(&kvm->lock); + + return ret; +} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ae5110cc3bad..1c8a4bbae684 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -220,6 +220,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: case KVM_CAP_ARM_SYSTEM_SUSPEND: + case KVM_CAP_COUNTER_OFFSET: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -1479,6 +1480,13 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); } + case KVM_ARM_SET_COUNTER_OFFSET: { + struct kvm_arm_counter_offset offset; + + if (copy_from_user(&offset, argp, sizeof(offset))) + return -EFAULT; + return kvm_vm_ioctl_set_counter_offset(kvm, &offset); + } default: return -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d77aef872a0a..6a7e1a0ecf04 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1184,6 +1184,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 +#define KVM_CAP_COUNTER_OFFSET 227 #ifdef KVM_CAP_IRQ_ROUTING @@ -1543,6 +1544,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) +/* Available with KVM_CAP_COUNTER_OFFSET */ +#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From 680232a94c1289aad25ffae02f2785823763b456 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:48 +0100 Subject: KVM: arm64: timers: Allow save/restoring of the physical timer Nothing like being 10 year late to a party! Now that userspace can set counter offsets, we can save/restore the physical timer as well! Nobody really cared so far, but you're welcome anyway. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-9-maz@kernel.org --- arch/arm64/kvm/guest.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07444fa22888..46e910819de6 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -590,11 +590,16 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -/** - * ARM64 versions of the TIMER registers, always available on arm64 - */ +static const u64 timer_reg_list[] = { + KVM_REG_ARM_TIMER_CTL, + KVM_REG_ARM_TIMER_CNT, + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_PTIMER_CTL, + KVM_REG_ARM_PTIMER_CNT, + KVM_REG_ARM_PTIMER_CVAL, +}; -#define NUM_TIMER_REGS 3 +#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) static bool is_timer_reg(u64 index) { @@ -602,6 +607,9 @@ static bool is_timer_reg(u64 index) case KVM_REG_ARM_TIMER_CTL: case KVM_REG_ARM_TIMER_CNT: case KVM_REG_ARM_TIMER_CVAL: + case KVM_REG_ARM_PTIMER_CTL: + case KVM_REG_ARM_PTIMER_CNT: + case KVM_REG_ARM_PTIMER_CVAL: return true; } return false; @@ -609,14 +617,11 @@ static bool is_timer_reg(u64 index) static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) - return -EFAULT; + for (int i = 0; i < NUM_TIMER_REGS; i++) { + if (put_user(timer_reg_list[i], uindices)) + return -EFAULT; + uindices++; + } return 0; } -- cgit v1.2.3 From 5591805d2c21b70838b723b71b8ff613de51cfff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:49 +0100 Subject: KVM: arm64: timers: Rationalise per-vcpu timer init The way we initialise our timer contexts may be satisfactory for two timers, but will be getting pretty annoying with four. Cleanup the whole thing by removing the code duplication and getting rid of unused IRQ configuration elements. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-10-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 73 +++++++++++++++++++++++--------------------- include/kvm/arm_arch_timer.h | 1 - 2 files changed, 39 insertions(+), 35 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 771504c79711..e46f04ed8f86 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -30,14 +30,9 @@ static u32 host_ptimer_irq_flags; static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, +static const u8 default_ppi[] = { + [TIMER_PTIMER] = 30, + [TIMER_VTIMER] = 27, }; static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); @@ -820,12 +815,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - timer_set_ctl(vcpu_vtimer(vcpu), 0); - timer_set_ctl(vcpu_ptimer(vcpu), 0); + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_set_ctl(vcpu_get_timer(vcpu, i), 0); + if (timer->enabled) { - kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); - kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm_timer_update_irq(vcpu, false, + vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); @@ -840,39 +837,47 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) return 0; } +static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) +{ + struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); + struct kvm *kvm = vcpu->kvm; + + ctxt->vcpu = vcpu; + + if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + + hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + ctxt->hrtimer.function = kvm_hrtimer_expire; + ctxt->irq.irq = default_ppi[timerid]; + + switch (timerid) { + case TIMER_PTIMER: + ctxt->host_timer_irq = host_ptimer_irq; + break; + case TIMER_VTIMER: + ctxt->host_timer_irq = host_vtimer_irq; + break; + } +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - vtimer->vcpu = vcpu; - vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset; - ptimer->vcpu = vcpu; - ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_context_init(vcpu, i); /* Synchronize offsets across timers of a VM if not already provided */ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { - timer_set_offset(vtimer, kvm_phys_timer_read()); - timer_set_offset(ptimer, 0); + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); + timer_set_offset(vcpu_ptimer(vcpu), 0); } hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; - - hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - vtimer->hrtimer.function = kvm_hrtimer_expire; - ptimer->hrtimer.function = kvm_hrtimer_expire; - - vtimer->irq.irq = default_vtimer_irq.irq; - ptimer->irq.irq = default_ptimer_irq.irq; - - vtimer->host_timer_irq = host_vtimer_irq; - ptimer->host_timer_irq = host_ptimer_irq; - - vtimer->host_timer_irq_flags = host_vtimer_irq_flags; - ptimer->host_timer_irq_flags = host_ptimer_irq_flags; } void kvm_timer_cpu_up(void) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 2dd0fd2406fb..c746ef64220b 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -59,7 +59,6 @@ struct arch_timer_context { /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; - u32 host_timer_irq_flags; }; struct timer_map { -- cgit v1.2.3 From 33c549460ef9119eb115484e81f54521122341db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:50 +0100 Subject: KVM: arm64: timers: Abstract per-timer IRQ access As we are about to move the location of the per-timer IRQ into the VM structure, abstract the location of the IRQ behind an accessor. This will make the repainting sligntly less painful. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-11-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 38 +++++++++++++++++++------------------- include/kvm/arm_arch_timer.h | 2 ++ 2 files changed, 21 insertions(+), 19 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e46f04ed8f86..d08d8c2fc30d 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -392,12 +392,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, int ret; timer_ctx->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), timer_ctx->irq.level); if (!userspace_irqchip(vcpu->kvm)) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer_ctx->irq.irq, + timer_irq(timer_ctx), timer_ctx->irq.level, timer_ctx); WARN_ON(ret); @@ -607,7 +607,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) - phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); + phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); phys_active |= ctx->irq.level; @@ -825,9 +825,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { - kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); if (map.direct_ptimer) - kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); } } @@ -851,7 +851,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ctxt->hrtimer.function = kvm_hrtimer_expire; - ctxt->irq.irq = default_ppi[timerid]; + timer_irq(ctxt) = default_ppi[timerid]; switch (timerid) { case TIMER_PTIMER: @@ -1295,19 +1295,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) int vtimer_irq, ptimer_irq, ret; unsigned long i; - vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); if (ret) return false; - ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); if (ret) return false; kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || - vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || + timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) return false; } @@ -1322,9 +1322,9 @@ bool kvm_arch_timer_get_input_level(int vintid) if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == vcpu_vtimer(vcpu)->irq.irq) + if (vintid == timer_irq(vcpu_vtimer(vcpu))) timer = vcpu_vtimer(vcpu); - else if (vintid == vcpu_ptimer(vcpu)->irq.irq) + else if (vintid == timer_irq(vcpu_ptimer(vcpu))) timer = vcpu_ptimer(vcpu); else BUG(); @@ -1358,7 +1358,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, - map.direct_vtimer->irq.irq, + timer_irq(map.direct_vtimer), &arch_timer_irq_ops); if (ret) return ret; @@ -1366,7 +1366,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (map.direct_ptimer) { ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, - map.direct_ptimer->irq.irq, + timer_irq(map.direct_ptimer), &arch_timer_irq_ops); } @@ -1391,8 +1391,8 @@ static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) unsigned long i; kvm_for_each_vcpu(i, vcpu, kvm) { - vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; - vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + timer_irq(vcpu_vtimer(vcpu)) = vtimer_irq; + timer_irq(vcpu_ptimer(vcpu)) = ptimer_irq; } } @@ -1417,10 +1417,10 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + set_timer_irqs(vcpu->kvm, irq, timer_irq(ptimer)); break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + set_timer_irqs(vcpu->kvm, timer_irq(vtimer), irq); break; default: return -ENXIO; @@ -1446,7 +1446,7 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } - irq = timer->irq.irq; + irq = timer_irq(timer); return put_user(irq, uaddr); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c746ef64220b..27cada09f588 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -109,6 +109,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) +#define timer_irq(ctx) ((ctx)->irq.irq) + u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, enum kvm_arch_timer_regs treg); -- cgit v1.2.3 From 8a5eb2d210807e7dbe9ece7075533014cf4b9c27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:51 +0100 Subject: KVM: arm64: timers: Move the timer IRQs into arch_timer_vm_data Having the timer IRQs duplicated into each vcpu isn't great, and becomes absolutely awful with NV. So let's move these into the per-VM arch_timer_vm_data structure. This simplifies a lot of code, but requires us to introduce a mutex so that we can reason about userspace trying to change an interrupt number while another vcpu is running, something that wasn't really well handled so far. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-12-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/kvm/arch_timer.c | 108 ++++++++++++++++++++++---------------- arch/arm64/kvm/arm.c | 2 + include/kvm/arm_arch_timer.h | 18 +++++-- 4 files changed, 82 insertions(+), 48 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 116233a390e9..1280154c9ef3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -223,6 +223,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 /* VM counter offset */ #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 + /* Timer PPIs made immutable */ +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 unsigned long flags; diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d08d8c2fc30d..1d811735e05f 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -851,7 +851,6 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ctxt->hrtimer.function = kvm_hrtimer_expire; - timer_irq(ctxt) = default_ppi[timerid]; switch (timerid) { case TIMER_PTIMER: @@ -880,6 +879,13 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) timer->bg_timer.function = kvm_bg_timer_expire; } +void kvm_timer_init_vm(struct kvm *kvm) +{ + mutex_init(&kvm->arch.timer_data.lock); + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm->arch.timer_data.ppi[i] = default_ppi[i]; +} + void kvm_timer_cpu_up(void) { enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); @@ -1292,44 +1298,56 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) { - int vtimer_irq, ptimer_irq, ret; - unsigned long i; + u32 ppis = 0; + bool valid; - vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); - if (ret) - return false; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); - ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); - if (ret) - return false; + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + int irq; - kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || - timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) - return false; + ctx = vcpu_get_timer(vcpu, i); + irq = timer_irq(ctx); + if (kvm_vgic_set_owner(vcpu, irq, ctx)) + break; + + /* + * We know by construction that we only have PPIs, so + * all values are less than 32. + */ + ppis |= BIT(irq); } - return true; + valid = hweight32(ppis) == NR_KVM_TIMERS; + + if (valid) + set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); + + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + + return valid; } bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); - struct arch_timer_context *timer; if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == timer_irq(vcpu_vtimer(vcpu))) - timer = vcpu_vtimer(vcpu); - else if (vintid == timer_irq(vcpu_ptimer(vcpu))) - timer = vcpu_ptimer(vcpu); - else - BUG(); + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + + ctx = vcpu_get_timer(vcpu, i); + if (timer_irq(ctx) == vintid) + return kvm_timer_should_fire(ctx); + } - return kvm_timer_should_fire(timer); + /* A timer IRQ has fired, but no matching timer was found? */ + WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); + + return false; } int kvm_timer_enable(struct kvm_vcpu *vcpu) @@ -1385,23 +1403,10 @@ void kvm_timer_init_vhe(void) sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); } -static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - timer_irq(vcpu_vtimer(vcpu)) = vtimer_irq; - timer_irq(vcpu_ptimer(vcpu)) = ptimer_irq; - } -} - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int __user *uaddr = (int __user *)(long)attr->addr; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - int irq; + int irq, idx, ret = 0; if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; @@ -1412,21 +1417,36 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq))) return -EINVAL; - if (vcpu->arch.timer_cpu.enabled) - return -EBUSY; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); + + if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, + &vcpu->kvm->arch.flags)) { + ret = -EBUSY; + goto out; + } switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, timer_irq(ptimer)); + idx = TIMER_VTIMER; break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, timer_irq(vtimer), irq); + idx = TIMER_PTIMER; break; default: - return -ENXIO; + ret = -ENXIO; + goto out; } - return 0; + /* + * We cannot validate the IRQ unicity before we run, so take it at + * face value. The verdict will be given on first vcpu run, for each + * vcpu. Yes this is late. Blame it on the stupid API. + */ + vcpu->kvm->arch.timer_data.ppi[idx] = irq; + +out: + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + return ret; } int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1c8a4bbae684..4c5e9dfbf83a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -148,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); + kvm_timer_init_vm(kvm); + /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->max_vcpus = kvm_arm_default_max_vcpus(); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 27cada09f588..f093ea9f540d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -36,14 +36,16 @@ struct arch_timer_vm_data { u64 voffset; /* Offset applied to the physical timer/counter */ u64 poffset; + + struct mutex lock; + + /* The PPI for each timer, global to the VM */ + u8 ppi[NR_KVM_TIMERS]; }; struct arch_timer_context { struct kvm_vcpu *vcpu; - /* Timer IRQ */ - struct kvm_irq_level irq; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -57,6 +59,11 @@ struct arch_timer_context { */ bool loaded; + /* Output level of the timer IRQ */ + struct { + bool level; + } irq; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -86,6 +93,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); +void kvm_timer_init_vm(struct kvm *kvm); + u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); @@ -109,7 +118,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) -#define timer_irq(ctx) ((ctx)->irq.irq) +#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, -- cgit v1.2.3 From e9adde432bf7371f1c83f67d9f8d75b95810f124 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:53 +0100 Subject: KVM: arm64: timers: Fast-track CNTPCT_EL0 trap handling Now that it is likely that CNTPCT_EL0 accesses will trap, fast-track the emulation of the counter read which doesn't need more that a simple offsetting. One day, we'll have CNTPOFF everywhere. One day. Suggested-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-14-maz@kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 07d37ff88a3f..9954368f639d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -326,6 +327,38 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } +static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *ctxt; + u32 sysreg; + u64 val; + + /* + * We only get here for 64bit guests, 32bit guests will hit + * the long and winding road all the way to the standard + * handling. Yes, it sucks to be irrelevant. + */ + sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + + switch (sysreg) { + case SYS_CNTPCT_EL0: + case SYS_CNTPCTSS_EL0: + ctxt = vcpu_ptimer(vcpu); + break; + default: + return false; + } + + val = arch_timer_read_cntpct_el0(); + + if (ctxt->offset.vm_offset) + val -= *kern_hyp_va(ctxt->offset.vm_offset); + + vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); + __kvm_skip_instr(vcpu); + return true; +} + static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) { if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && @@ -339,6 +372,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) return kvm_hyp_handle_ptrauth(vcpu, exit_code); + if (kvm_hyp_handle_cntpct(vcpu)) + return true; + return false; } -- cgit v1.2.3 From 476fcd4b7bb54ac959b683f30d0cf305c3e11f3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:54 +0100 Subject: KVM: arm64: timers: Abstract the number of valid timers per vcpu We so far have a pretty fixed number of timers to take care of. This is about to change as NV brings another two into the picture, and we must be careful not to try and emulate non-valid timers in a given VM. For this, abstract the number of timers for a given vcpu behind an accessor, which helpfully returns a constant for now. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-15-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 1d811735e05f..d3a7902269c1 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -52,6 +52,11 @@ static bool has_cntpoff(void) return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); } +static int nr_timers(struct kvm_vcpu *vcpu) +{ + return NR_KVM_TIMERS; +} + u32 timer_get_ctl(struct arch_timer_context *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; @@ -255,7 +260,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) u64 min_delta = ULLONG_MAX; int i; - for (i = 0; i < NR_KVM_TIMERS; i++) { + for (i = 0; i < nr_timers(vcpu); i++) { struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; WARN(ctx->loaded, "timer %d loaded\n", i); @@ -815,12 +820,12 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - for (int i = 0; i < NR_KVM_TIMERS; i++) + for (int i = 0; i < nr_timers(vcpu); i++) timer_set_ctl(vcpu_get_timer(vcpu, i), 0); if (timer->enabled) { - for (int i = 0; i < NR_KVM_TIMERS; i++) + for (int i = 0; i < nr_timers(vcpu); i++) kvm_timer_update_irq(vcpu, false, vcpu_get_timer(vcpu, i)); @@ -1303,7 +1308,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->kvm->arch.timer_data.lock); - for (int i = 0; i < NR_KVM_TIMERS; i++) { + for (int i = 0; i < nr_timers(vcpu); i++) { struct arch_timer_context *ctx; int irq; @@ -1319,7 +1324,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) ppis |= BIT(irq); } - valid = hweight32(ppis) == NR_KVM_TIMERS; + valid = hweight32(ppis) == nr_timers(vcpu); if (valid) set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); @@ -1336,7 +1341,7 @@ bool kvm_arch_timer_get_input_level(int vintid) if (WARN(!vcpu, "No vcpu context!\n")) return false; - for (int i = 0; i < NR_KVM_TIMERS; i++) { + for (int i = 0; i < nr_timers(vcpu); i++) { struct arch_timer_context *ctx; ctx = vcpu_get_timer(vcpu, i); -- cgit v1.2.3 From 1e0eec09d43a55125ff80e40b2d6e2f369a338b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:56 +0100 Subject: KVM: arm64: nv: timers: Add a per-timer, per-vcpu offset Being able to set a global offset isn't enough. With NV, we also need to a per-vcpu, per-timer offset (for example, CNTVCT_EL0 being offset by CNTVOFF_EL2). Use a similar method as the VM-wide offset to have a timer point to the shadow register that contains the offset value. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-17-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 13 ++++++++++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++ include/kvm/arm_arch_timer.h | 5 +++++ 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d3a7902269c1..b87bf182af33 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -89,10 +89,17 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static u64 timer_get_offset(struct arch_timer_context *ctxt) { - if (ctxt && ctxt->offset.vm_offset) - return *ctxt->offset.vm_offset; + u64 offset = 0; - return 0; + if (!ctxt) + return 0; + + if (ctxt->offset.vm_offset) + offset += *ctxt->offset.vm_offset; + if (ctxt->offset.vcpu_offset) + offset += *ctxt->offset.vcpu_offset; + + return offset; } static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 9954368f639d..d07cbc313889 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -353,6 +353,8 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) if (ctxt->offset.vm_offset) val -= *kern_hyp_va(ctxt->offset.vm_offset); + if (ctxt->offset.vcpu_offset) + val -= *kern_hyp_va(ctxt->offset.vcpu_offset); vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); __kvm_skip_instr(vcpu); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f093ea9f540d..209da0c2ac9f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -29,6 +29,11 @@ struct arch_timer_offset { * structure. If NULL, assume a zero offset. */ u64 *vm_offset; + /* + * If set, pointer to one of the offsets in the vcpu's sysreg + * array. If NULL, assume a zero offset. + */ + u64 *vcpu_offset; }; struct arch_timer_vm_data { -- cgit v1.2.3 From 81dc9504a7006b484cfcf074796094ee526b0c45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:57 +0100 Subject: KVM: arm64: nv: timers: Support hyp timer emulation Emulating EL2 also means emulating the EL2 timers. To do so, we expand our timer framework to deal with at most 4 timers. At any given time, two timers are using the HW timers, and the two others are purely emulated. The role of deciding which is which at any given time is left to a mapping function which is called every time we need to make such a decision. Reviewed-by: Colton Lewis Co-developed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-18-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 + arch/arm64/include/uapi/asm/kvm.h | 2 + arch/arm64/kvm/arch_timer.c | 180 ++++++++++++++++++++++++++++++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 15 +++ arch/arm64/kvm/trace_arm.h | 6 +- arch/arm64/kvm/vgic/vgic.c | 15 +++ include/kvm/arm_arch_timer.h | 9 +- include/kvm/arm_vgic.h | 1 + 8 files changed, 220 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1280154c9ef3..633a7c0750bb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -369,6 +369,10 @@ enum vcpu_sysreg { TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ + CNTHP_CTL_EL2, + CNTHP_CVAL_EL2, + CNTHV_CTL_EL2, + CNTHV_CVAL_EL2, NR_SYS_REGS /* Nothing after this line! */ }; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 12fb0d8a760a..0921f366c49f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -420,6 +420,8 @@ enum { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index b87bf182af33..c5c8cc3c25ae 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,8 @@ static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); static const u8 default_ppi[] = { [TIMER_PTIMER] = 30, [TIMER_VTIMER] = 27, + [TIMER_HPTIMER] = 26, + [TIMER_HVTIMER] = 28, }; static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); @@ -46,6 +49,11 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +static bool kvm_arch_timer_get_input_level(int vintid); + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; static bool has_cntpoff(void) { @@ -54,6 +62,9 @@ static bool has_cntpoff(void) static int nr_timers(struct kvm_vcpu *vcpu) { + if (!vcpu_has_nv(vcpu)) + return NR_KVM_EL0_TIMERS; + return NR_KVM_TIMERS; } @@ -66,6 +77,10 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); default: WARN_ON(1); return 0; @@ -81,6 +96,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); default: WARN_ON(1); return 0; @@ -113,6 +132,12 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + break; default: WARN_ON(1); } @@ -129,6 +154,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + break; default: WARN_ON(1); } @@ -151,13 +182,27 @@ u64 kvm_phys_timer_read(void) static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { - if (has_vhe()) { + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + map->direct_vtimer = vcpu_hvtimer(vcpu); + map->direct_ptimer = vcpu_hptimer(vcpu); + map->emul_vtimer = vcpu_vtimer(vcpu); + map->emul_ptimer = vcpu_ptimer(vcpu); + } else { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = vcpu_hvtimer(vcpu); + map->emul_ptimer = vcpu_hptimer(vcpu); + } + } else if (has_vhe()) { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = NULL; map->emul_ptimer = NULL; } else { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = NULL; + map->emul_vtimer = NULL; map->emul_ptimer = vcpu_ptimer(vcpu); } @@ -252,8 +297,11 @@ static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) { - struct arch_timer_context *ctx = vcpu_vtimer(vcpu); u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + struct arch_timer_context *ctx; + + ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu) + : vcpu_vtimer(vcpu); return kvm_counter_compute_delta(ctx, val); } @@ -350,9 +398,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) switch (index) { case TIMER_VTIMER: + case TIMER_HVTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); break; case TIMER_PTIMER: + case TIMER_HPTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); break; case NR_KVM_TIMERS: @@ -468,6 +518,7 @@ static void timer_save_state(struct arch_timer_context *ctx) u64 cval; case TIMER_VTIMER: + case TIMER_HVTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); @@ -493,6 +544,7 @@ static void timer_save_state(struct arch_timer_context *ctx) set_cntvoff(0); break; case TIMER_PTIMER: + case TIMER_HPTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); cval = read_sysreg_el0(SYS_CNTP_CVAL); @@ -536,6 +588,7 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) */ if (!kvm_timer_irq_can_fire(map.direct_vtimer) && !kvm_timer_irq_can_fire(map.direct_ptimer) && + !kvm_timer_irq_can_fire(map.emul_vtimer) && !kvm_timer_irq_can_fire(map.emul_ptimer) && !vcpu_has_wfit_active(vcpu)) return; @@ -572,12 +625,14 @@ static void timer_restore_state(struct arch_timer_context *ctx) u64 cval, offset; case TIMER_VTIMER: + case TIMER_HVTIMER: set_cntvoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: + case TIMER_HPTIMER: cval = timer_get_cval(ctx); offset = timer_get_offset(ctx); set_cntpoff(offset); @@ -663,6 +718,57 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) (_clr) |= (_bit); \ } while (0) +static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, + struct timer_map *map) +{ + int hw, ret; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + /* + * We only ever unmap the vtimer irq on a VHE system that runs nested + * virtualization, in which case we have both a valid emul_vtimer, + * emul_ptimer, direct_vtimer, and direct_ptimer. + * + * Since this is called from kvm_timer_vcpu_load(), a change between + * vEL2 and vEL1/0 will have just happened, and the timer_map will + * represent this, and therefore we switch the emul/direct mappings + * below. + */ + hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); + if (hw < 0) { + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); + + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_vtimer->host_timer_irq, + timer_irq(map->direct_vtimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_ptimer->host_timer_irq, + timer_irq(map->direct_ptimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + + /* + * The virtual offset behaviour is "interresting", as it + * always applies when HCR_EL2.E2H==0, but only when + * accessed from EL1 when HCR_EL2.E2H==1. So make sure we + * track E2H when putting the HV timer in "direct" mode. + */ + if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { + struct arch_timer_offset *offs = &map->direct_vtimer->offset; + + if (vcpu_el2_e2h_is_set(vcpu)) + offs->vcpu_offset = NULL; + else + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + } + } +} + static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) { bool tpt, tpc; @@ -695,6 +801,22 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) tpt = tpc = true; + /* + * Apply the enable bits that the guest hypervisor has requested for + * its own guest. We can only add traps that wouldn't have been set + * above. + */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + + /* Use the VHE format for mental sanity */ + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; + + tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); + tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); + } + /* * Now that we have collected our requirements, compute the * trap and enable bits. @@ -720,6 +842,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); if (static_branch_likely(&has_gic_active_state)) { + if (vcpu_has_nv(vcpu)) + kvm_timer_vcpu_load_nested_switch(vcpu, &map); + kvm_timer_vcpu_load_gic(map.direct_vtimer); if (map.direct_ptimer) kvm_timer_vcpu_load_gic(map.direct_ptimer); @@ -732,6 +857,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) timer_restore_state(map.direct_vtimer); if (map.direct_ptimer) timer_restore_state(map.direct_ptimer); + if (map.emul_vtimer) + timer_emulate(map.emul_vtimer); if (map.emul_ptimer) timer_emulate(map.emul_ptimer); @@ -778,6 +905,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * In any case, we re-schedule the hrtimer for the physical timer when * coming back to the VCPU thread in kvm_timer_vcpu_load(). */ + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -830,6 +959,17 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) for (int i = 0; i < nr_timers(vcpu); i++) timer_set_ctl(vcpu_get_timer(vcpu, i), 0); + /* + * A vcpu running at EL2 is in charge of the offset applied to + * the virtual timer, so use the physical VM offset, and point + * the vcpu offset to CNTVOFF_EL2. + */ + if (vcpu_has_nv(vcpu)) { + struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; + + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; + } if (timer->enabled) { for (int i = 0; i < nr_timers(vcpu); i++) @@ -843,6 +983,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) } } + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -866,9 +1008,11 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) switch (timerid) { case TIMER_PTIMER: + case TIMER_HPTIMER: ctxt->host_timer_irq = host_ptimer_irq; break; case TIMER_VTIMER: + case TIMER_HVTIMER: ctxt->host_timer_irq = host_vtimer_irq; break; } @@ -1020,6 +1164,10 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, val = kvm_phys_timer_read() - timer_get_offset(timer); break; + case TIMER_REG_VOFF: + val = *timer->offset.vcpu_offset; + break; + default: BUG(); } @@ -1038,7 +1186,7 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) + if (timer == map.emul_vtimer || timer == map.emul_ptimer) return kvm_arm_timer_read(vcpu, timer, treg); preempt_disable(); @@ -1070,6 +1218,10 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, timer_set_cval(timer, val); break; + case TIMER_REG_VOFF: + *timer->offset.vcpu_offset = val; + break; + default: BUG(); } @@ -1085,7 +1237,7 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) { + if (timer == map.emul_vtimer || timer == map.emul_ptimer) { soft_timer_cancel(&timer->hrtimer); kvm_arm_timer_write(vcpu, timer, treg, val); timer_emulate(timer); @@ -1165,10 +1317,6 @@ static const struct irq_domain_ops timer_domain_ops = { .free = timer_irq_domain_free, }; -static struct irq_ops arch_timer_irq_ops = { - .get_input_level = kvm_arch_timer_get_input_level, -}; - static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) { *flags = irq_get_trigger_type(virq); @@ -1341,7 +1489,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) return valid; } -bool kvm_arch_timer_get_input_level(int vintid) +static bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); @@ -1444,6 +1592,12 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: idx = TIMER_PTIMER; break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + idx = TIMER_HVTIMER; + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + idx = TIMER_HPTIMER; + break; default: ret = -ENXIO; goto out; @@ -1474,6 +1628,12 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: timer = vcpu_ptimer(vcpu); break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + timer = vcpu_hvtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + timer = vcpu_hptimer(vcpu); + break; default: return -ENXIO; } @@ -1487,6 +1647,8 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: return 0; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index d07cbc313889..c41166f1a1dd 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -343,6 +343,21 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) switch (sysreg) { case SYS_CNTPCT_EL0: case SYS_CNTPCTSS_EL0: + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + ctxt = vcpu_hptimer(vcpu); + break; + } + + /* Check for guest hypervisor trapping */ + val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & CNTHCTL_EL1PCTEN) << 10; + + if (!(val & (CNTHCTL_EL1PCTEN << 10))) + return false; + } + ctxt = vcpu_ptimer(vcpu); break; default: diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index f3e46a976125..6ce5c025218d 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -206,6 +206,7 @@ TRACE_EVENT(kvm_get_timer_map, __field( unsigned long, vcpu_id ) __field( int, direct_vtimer ) __field( int, direct_ptimer ) + __field( int, emul_vtimer ) __field( int, emul_ptimer ) ), @@ -214,14 +215,17 @@ TRACE_EVENT(kvm_get_timer_map, __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); __entry->direct_ptimer = (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; + __entry->emul_vtimer = + (map->emul_vtimer) ? arch_timer_ctx_index(map->emul_vtimer) : -1; __entry->emul_ptimer = (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; ), - TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", + TP_printk("VCPU: %ld, dv: %d, dp: %d, ev: %d, ep: %d", __entry->vcpu_id, __entry->direct_vtimer, __entry->direct_ptimer, + __entry->emul_vtimer, __entry->emul_ptimer) ); diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d97e6080b421..ae491ef97188 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -573,6 +573,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) return 0; } +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret = -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw) + ret = irq->hwintid; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + /** * kvm_vgic_set_owner - Set the owner of an interrupt for a VM * diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 209da0c2ac9f..52008f5cff06 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -13,6 +13,9 @@ enum kvm_arch_timers { TIMER_PTIMER, TIMER_VTIMER, + NR_KVM_EL0_TIMERS, + TIMER_HVTIMER = NR_KVM_EL0_TIMERS, + TIMER_HPTIMER, NR_KVM_TIMERS }; @@ -21,6 +24,7 @@ enum kvm_arch_timer_regs { TIMER_REG_CVAL, TIMER_REG_TVAL, TIMER_REG_CTL, + TIMER_REG_VOFF, }; struct arch_timer_offset { @@ -76,6 +80,7 @@ struct arch_timer_context { struct timer_map { struct arch_timer_context *direct_vtimer; struct arch_timer_context *direct_ptimer; + struct arch_timer_context *emul_vtimer; struct arch_timer_context *emul_ptimer; }; @@ -114,12 +119,12 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); -bool kvm_arch_timer_get_input_level(int vintid); - #define vcpu_timer(v) (&(v)->arch.timer_cpu) #define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)]) #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER]) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER]) +#define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) +#define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d3ad51fde9db..402b545959af 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -380,6 +380,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From de40bb8abb764f6866d82c4e2a43acdb22892cf4 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:39 +0000 Subject: KVM: arm64: Add a helper to check if a VM has ran once The test_bit(...) pattern is quite a lot of keystrokes. Replace existing callsites with a helper. No functional change intended. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/hypercalls.c | 3 +-- arch/arm64/kvm/pmu-emul.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bcd774d74f34..d091d1c9890b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1061,6 +1061,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (system_supports_32bit_el0() && \ !static_branch_unlikely(&arm64_mismatched_32bit_el0)) +#define kvm_vm_has_ran_once(kvm) \ + (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5da884e11337..a09a526a7d7c 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -379,8 +379,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) mutex_lock(&kvm->lock); - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) && - val != *fw_reg_bmap) { + if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) { ret = -EBUSY; goto out; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 24908400e190..a0fc569fdbca 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -880,7 +880,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) list_for_each_entry(entry, &arm_pmus, entry) { arm_pmu = entry->arm_pmu; if (arm_pmu->pmu.type == pmu_id) { - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || + if (kvm_vm_has_ran_once(kvm) || (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { ret = -EBUSY; break; @@ -963,7 +963,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) mutex_lock(&kvm->lock); - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { + if (kvm_vm_has_ran_once(kvm)) { mutex_unlock(&kvm->lock); return -EBUSY; } -- cgit v1.2.3 From e0fc6b21616dd917899ee4a2d4126b4a963c0871 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:40 +0000 Subject: KVM: arm64: Add vm fd device attribute accessors A subsequent change will allow userspace to convey a filter for hypercalls through a vm device attribute. Add the requisite boilerplate for vm attribute accessors. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-4-oliver.upton@linux.dev --- arch/arm64/kvm/arm.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..b6e26c0e65e5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1439,11 +1439,28 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, } } +static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + default: + return -ENXIO; + } +} + +static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + default: + return -ENXIO; + } +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; switch (ioctl) { case KVM_CREATE_IRQCHIP: { @@ -1479,6 +1496,18 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); } + case KVM_HAS_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_has_attr(kvm, &attr); + } + case KVM_SET_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_set_attr(kvm, &attr); + } default: return -EINVAL; } -- cgit v1.2.3 From aac94968126beb9846c12a940f1302ece7849b4f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:41 +0000 Subject: KVM: arm64: Rename SMC/HVC call handler to reflect reality KVM handles SMCCC calls from virtual EL2 that use the SMC instruction since commit bd36b1a9eb5a ("KVM: arm64: nv: Handle SMCs taken from virtual EL2"). Thus, the function name of the handler no longer reflects reality. Normalize the name on SMCCC, since that's the only hypercall interface KVM supports in the first place. No fuctional change intended. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-5-oliver.upton@linux.dev --- arch/arm64/kvm/handle_exit.c | 4 ++-- arch/arm64/kvm/hypercalls.c | 2 +- include/kvm/arm_hypercalls.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a798c0b4d717..5e4f9737cbd5 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -52,7 +52,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu) return 1; } - ret = kvm_hvc_call_handler(vcpu); + ret = kvm_smccc_call_handler(vcpu); if (ret < 0) { vcpu_set_reg(vcpu, 0, ~0UL); return 1; @@ -89,7 +89,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than * being treated as UNDEFINED. */ - ret = kvm_hvc_call_handler(vcpu); + ret = kvm_smccc_call_handler(vcpu); if (ret < 0) vcpu_set_reg(vcpu, 0, ~0UL); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index a09a526a7d7c..5ead6c6afff0 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,7 +121,7 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) } } -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index 1188f116cf4e..8f4e33bc43e8 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -6,7 +6,7 @@ #include -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu); static inline u32 smccc_get_function(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From c2d2e9b3d8ce9db825a5630d9d52d542f5138ae0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:42 +0000 Subject: KVM: arm64: Start handling SMCs from EL1 Whelp, the architecture gods have spoken and confirmed that the function ID space is common between SMCs and HVCs. Not only that, the expectation is that hypervisors handle calls to both SMC and HVC conduits. KVM recently picked up support for SMCCCs in commit bd36b1a9eb5a ("KVM: arm64: nv: Handle SMCs taken from virtual EL2") but scoped it only to a nested hypervisor. Let's just open the floodgates and let EL1 access our SMCCC implementation with the SMC instruction as well. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-6-oliver.upton@linux.dev --- arch/arm64/kvm/handle_exit.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5e4f9737cbd5..68f95dcd41a1 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -72,13 +72,15 @@ static int handle_smc(struct kvm_vcpu *vcpu) * * We need to advance the PC after the trap, as it would * otherwise return to the same address... - * - * Only handle SMCs from the virtual EL2 with an immediate of zero and - * skip it otherwise. */ - if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) { + kvm_incr_pc(vcpu); + + /* + * SMCs with a nonzero immediate are reserved according to DEN0028E 2.9 + * "SMC and HVC immediate value". + */ + if (kvm_vcpu_hvc_get_imm(vcpu)) { vcpu_set_reg(vcpu, 0, ~0UL); - kvm_incr_pc(vcpu); return 1; } @@ -93,8 +95,6 @@ static int handle_smc(struct kvm_vcpu *vcpu) if (ret < 0) vcpu_set_reg(vcpu, 0, ~0UL); - kvm_incr_pc(vcpu); - return ret; } -- cgit v1.2.3 From a8308b3fc9494953c453480fb277e24f82f7d2b9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:43 +0000 Subject: KVM: arm64: Refactor hvc filtering to support different actions KVM presently allows userspace to filter guest hypercalls with bitmaps expressed via pseudo-firmware registers. These bitmaps have a narrow scope and, of course, can only allow/deny a particular call. A subsequent change to KVM will introduce a generalized UAPI for filtering hypercalls, allowing functions to be forwarded to userspace. Refactor the existing hypercall filtering logic to make room for more than two actions. While at it, generalize the function names around SMCCC as it is the basis for the upcoming UAPI. No functional change intended. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-7-oliver.upton@linux.dev --- arch/arm64/include/uapi/asm/kvm.h | 9 +++++++++ arch/arm64/kvm/hypercalls.c | 26 ++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f8129c624b07..f9672ef1159a 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -469,6 +469,15 @@ enum { /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) +enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + +#ifdef __KERNEL__ + NR_SMCCC_FILTER_ACTIONS +#endif +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5ead6c6afff0..0be974e2f1fc 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -65,7 +65,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) val[3] = lower_32_bits(cycles); } -static bool kvm_hvc_call_default_allowed(u32 func_id) +static bool kvm_smccc_default_allowed(u32 func_id) { switch (func_id) { /* @@ -93,7 +93,7 @@ static bool kvm_hvc_call_default_allowed(u32 func_id) } } -static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) +static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; @@ -117,20 +117,38 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP, &smccc_feat->vendor_hyp_bmap); default: - return kvm_hvc_call_default_allowed(func_id); + return false; } } +static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) +{ + if (kvm_smccc_test_fw_bmap(vcpu, func_id) || + kvm_smccc_default_allowed(func_id)) + return KVM_SMCCC_FILTER_HANDLE; + + return KVM_SMCCC_FILTER_DENY; +} + int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); u64 val[4] = {SMCCC_RET_NOT_SUPPORTED}; u32 feature; + u8 action; gpa_t gpa; - if (!kvm_hvc_call_allowed(vcpu, func_id)) + action = kvm_smccc_get_action(vcpu, func_id); + switch (action) { + case KVM_SMCCC_FILTER_HANDLE: + break; + case KVM_SMCCC_FILTER_DENY: + goto out; + default: + WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action); goto out; + } switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: -- cgit v1.2.3 From fb88707dd39bd1d5ec4a058776de9ee99bcc7b72 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:44 +0000 Subject: KVM: arm64: Use a maple tree to represent the SMCCC filter Maple tree is an efficient B-tree implementation that is intended for storing non-overlapping intervals. Such a data structure is a good fit for the SMCCC filter as it is desirable to sparsely allocate the 32 bit function ID space. To that end, add a maple tree to kvm_arch and correctly init/teardown along with the VM. Wire in a test against the hypercall filter for HVCs which does nothing until the controls are exposed to userspace. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-8-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 5 +++- arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hypercalls.c | 57 +++++++++++++++++++++++++++++++++++++++ include/kvm/arm_hypercalls.h | 1 + 4 files changed, 64 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d091d1c9890b..2682b3fd0881 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -221,7 +222,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_EL1_32BIT 4 /* PSCI SYSTEM_SUSPEND enabled for the guest */ #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 - + /* SMCCC filter initialized for the VM */ +#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 6 unsigned long flags; /* @@ -242,6 +244,7 @@ struct kvm_arch { /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + struct maple_tree smccc_filter; /* * For an untrusted host VM, 'pkvm.handle' is used to lookup diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b6e26c0e65e5..1202ac03bee0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -192,6 +192,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); + + kvm_arm_teardown_hypercalls(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 0be974e2f1fc..ba7cd84c6668 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,8 +121,58 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) } } +#define SMCCC_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID +#define SMCCC_ARCH_RANGE_END \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, ARM_SMCCC_FUNC_MASK) + +static void init_smccc_filter(struct kvm *kvm) +{ + int r; + + mt_init(&kvm->arch.smccc_filter); + + /* + * Prevent userspace from handling any SMCCC calls in the architecture + * range, avoiding the risk of misrepresenting Spectre mitigation status + * to the guest. + */ + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMCCC_ARCH_RANGE_BEGIN, SMCCC_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); +} + +static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) +{ + unsigned long idx = func_id; + void *val; + + if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags)) + return KVM_SMCCC_FILTER_HANDLE; + + /* + * But where's the error handling, you say? + * + * mt_find() returns NULL if no entry was found, which just so happens + * to match KVM_SMCCC_FILTER_HANDLE. + */ + val = mt_find(&kvm->arch.smccc_filter, &idx, idx); + return xa_to_value(val); +} + static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) { + /* + * Intervening actions in the SMCCC filter take precedence over the + * pseudo-firmware register bitmaps. + */ + u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id); + if (action != KVM_SMCCC_FILTER_HANDLE) + return action; + if (kvm_smccc_test_fw_bmap(vcpu, func_id) || kvm_smccc_default_allowed(func_id)) return KVM_SMCCC_FILTER_HANDLE; @@ -263,6 +313,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; + + init_smccc_filter(kvm); +} + +void kvm_arm_teardown_hypercalls(struct kvm *kvm) +{ + mtree_destroy(&kvm->arch.smccc_filter); } int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index 8f4e33bc43e8..fe6c31575b05 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -43,6 +43,7 @@ static inline void smccc_set_retval(struct kvm_vcpu *vcpu, struct kvm_one_reg; void kvm_arm_init_hypercalls(struct kvm *kvm); +void kvm_arm_teardown_hypercalls(struct kvm *kvm); int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -- cgit v1.2.3 From d824dff1919bbd523d4d5c860437d043c0ad121d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:45 +0000 Subject: KVM: arm64: Add support for KVM_EXIT_HYPERCALL In anticipation of user hypercall filters, add the necessary plumbing to get SMCCC calls out to userspace. Even though the exit structure has space for KVM to pass register arguments, let's just avoid it altogether and let userspace poke at the registers via KVM_GET_ONE_REG. This deliberately stretches the definition of a 'hypercall' to cover SMCs from EL1 in addition to the HVCs we know and love. KVM doesn't support EL1 calls into secure services, but now we can paint that as a userspace problem and be done with it. Finally, we need a flag to let userspace know what conduit instruction was used (i.e. SMC vs. HVC). Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-9-oliver.upton@linux.dev --- Documentation/virt/kvm/api.rst | 18 ++++++++++++++++-- arch/arm64/include/uapi/asm/kvm.h | 4 ++++ arch/arm64/kvm/handle_exit.c | 4 +++- arch/arm64/kvm/hypercalls.c | 16 ++++++++++++++++ 4 files changed, 39 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9b01e3d0e757..9497792c4ee5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6221,11 +6221,25 @@ to the byte array. __u64 flags; } hypercall; -Unused. This was once used for 'hypercall to userspace'. To implement -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). + +It is strongly recommended that userspace use ``KVM_EXIT_IO`` (x86) or +``KVM_EXIT_MMIO`` (all except s390) to implement functionality that +requires a guest to interact with host userpace. .. note:: KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. +For arm64: +---------- + +``nr`` contains the function ID of the guest's SMCCC call. Userspace is +expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call +parameters from the vCPU's GPRs. + +Definition of ``flags``: + - ``KVM_HYPERCALL_EXIT_SMC``: Indicates that the guest used the SMC + conduit to initiate the SMCCC call. If this bit is 0 then the guest + used the HVC conduit for the SMCCC call. + :: /* KVM_EXIT_TPR_ACCESS */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f9672ef1159a..f86446c5a7e3 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -472,12 +472,16 @@ enum { enum kvm_smccc_filter_action { KVM_SMCCC_FILTER_HANDLE = 0, KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, #ifdef __KERNEL__ NR_SMCCC_FILTER_ACTIONS #endif }; +/* arm64-specific KVM_EXIT_HYPERCALL flags */ +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 68f95dcd41a1..3f43e20c48b6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -71,7 +71,9 @@ static int handle_smc(struct kvm_vcpu *vcpu) * Trap exception, not a Secure Monitor Call exception [...]" * * We need to advance the PC after the trap, as it would - * otherwise return to the same address... + * otherwise return to the same address. Furthermore, pre-incrementing + * the PC before potentially exiting to userspace maintains the same + * abstraction for both SMCs and HVCs. */ kvm_incr_pc(vcpu); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index ba7cd84c6668..2db53709bec1 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -180,6 +180,19 @@ static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) return KVM_SMCCC_FILTER_DENY; } +static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id) +{ + u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); + struct kvm_run *run = vcpu->run; + + run->exit_reason = KVM_EXIT_HYPERCALL; + run->hypercall.nr = func_id; + run->hypercall.flags = 0; + + if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64) + run->hypercall.flags |= KVM_HYPERCALL_EXIT_SMC; +} + int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; @@ -195,6 +208,9 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) break; case KVM_SMCCC_FILTER_DENY: goto out; + case KVM_SMCCC_FILTER_FWD_TO_USER: + kvm_prepare_hypercall_exit(vcpu, func_id); + return 0; default: WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action); goto out; -- cgit v1.2.3 From 821d935c87bc95253f82deec3cbb457ccf3de003 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:46 +0000 Subject: KVM: arm64: Introduce support for userspace SMCCC filtering As the SMCCC (and related specifications) march towards an 'everything and the kitchen sink' interface for interacting with a system it becomes less likely that KVM will support every related feature. We could do better by letting userspace have a crack at it instead. Allow userspace to define an 'SMCCC filter' that applies to both HVCs and SMCs initiated by the guest. Supporting both conduits with this interface is important for a couple of reasons. Guest SMC usage is table stakes for a nested guest, as HVCs are always taken to the virtual EL2. Additionally, guests may want to interact with a service on the secure side which can now be proxied by userspace. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-10-oliver.upton@linux.dev --- Documentation/virt/kvm/api.rst | 4 ++ Documentation/virt/kvm/devices/vm.rst | 79 +++++++++++++++++++++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 11 +++++ arch/arm64/kvm/arm.c | 4 ++ arch/arm64/kvm/hypercalls.c | 60 ++++++++++++++++++++++++++ include/kvm/arm_hypercalls.h | 3 ++ 6 files changed, 161 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9497792c4ee5..c8ab2f730945 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6231,6 +6231,10 @@ requires a guest to interact with host userpace. For arm64: ---------- +SMCCC exits can be enabled depending on the configuration of the SMCCC +filter. See the Documentation/virt/kvm/devices/vm.rst +``KVM_ARM_SMCCC_FILTER`` for more details. + ``nr`` contains the function ID of the guest's SMCCC call. Userspace is expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call parameters from the vCPU's GPRs. diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 147efec626e5..9d726e60ec47 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -321,3 +321,82 @@ Allows userspace to query the status of migration mode. if it is enabled :Returns: -EFAULT if the given address is not accessible from kernel space; 0 in case of success. + +6. GROUP: KVM_ARM_VM_SMCCC_CTRL +=============================== + +:Architectures: arm64 + +6.1. ATTRIBUTE: KVM_ARM_VM_SMCCC_FILTER (w/o) +--------------------------------------------- + +:Parameters: Pointer to a ``struct kvm_smccc_filter`` + +:Returns: + + ====== =========================================== + EEXIST Range intersects with a previously inserted + or reserved range + EBUSY A vCPU in the VM has already run + EINVAL Invalid filter configuration + ENOMEM Failed to allocate memory for the in-kernel + representation of the SMCCC filter + ====== =========================================== + +Requests the installation of an SMCCC call filter described as follows:: + + enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + }; + + struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; + }; + +The filter is defined as a set of non-overlapping ranges. Each +range defines an action to be applied to SMCCC calls within the range. +Userspace can insert multiple ranges into the filter by using +successive calls to this attribute. + +The default configuration of KVM is such that all implemented SMCCC +calls are allowed. Thus, the SMCCC filter can be defined sparsely +by userspace, only describing ranges that modify the default behavior. + +The range expressed by ``struct kvm_smccc_filter`` is +[``base``, ``base + nr_functions``). The range is not allowed to wrap, +i.e. userspace cannot rely on ``base + nr_functions`` overflowing. + +The SMCCC filter applies to both SMC and HVC calls initiated by the +guest. The SMCCC filter gates the in-kernel emulation of SMCCC calls +and as such takes effect before other interfaces that interact with +SMCCC calls (e.g. hypercall bitmap registers). + +Actions: + + - ``KVM_SMCCC_FILTER_HANDLE``: Allows the guest SMCCC call to be + handled in-kernel. It is strongly recommended that userspace *not* + explicitly describe the allowed SMCCC call ranges. + + - ``KVM_SMCCC_FILTER_DENY``: Rejects the guest SMCCC call in-kernel + and returns to the guest. + + - ``KVM_SMCCC_FILTER_FWD_TO_USER``: The guest SMCCC call is forwarded + to userspace with an exit reason of ``KVM_EXIT_HYPERCALL``. + +The ``pad`` field is reserved for future use and must be zero. KVM may +return ``-EINVAL`` if the field is nonzero. + +KVM reserves the 'Arm Architecture Calls' range of function IDs and +will reject attempts to define a filter for any portion of these ranges: + + =========== =============== + Start End (inclusive) + =========== =============== + 0x8000_0000 0x8000_FFFF + 0xC000_0000 0xC000_FFFF + =========== =============== diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f86446c5a7e3..3dcfa4bfdf83 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -372,6 +372,10 @@ enum { #endif }; +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -479,6 +483,13 @@ enum kvm_smccc_filter_action { #endif }; +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + /* arm64-specific KVM_EXIT_HYPERCALL flags */ #define KVM_HYPERCALL_EXIT_SMC (1U << 0) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1202ac03bee0..efee032c9560 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1444,6 +1444,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) { switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_has_attr(kvm, attr); default: return -ENXIO; } @@ -1452,6 +1454,8 @@ static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) { switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_set_attr(kvm, attr); default: return -ENXIO; } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 2db53709bec1..9a35d6d18193 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -145,6 +145,44 @@ static void init_smccc_filter(struct kvm *kvm) WARN_ON_ONCE(r); } +static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) +{ + const void *zero_page = page_to_virt(ZERO_PAGE(0)); + struct kvm_smccc_filter filter; + u32 start, end; + int r; + + if (copy_from_user(&filter, uaddr, sizeof(filter))) + return -EFAULT; + + if (memcmp(filter.pad, zero_page, sizeof(filter.pad))) + return -EINVAL; + + start = filter.base; + end = start + filter.nr_functions - 1; + + if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (kvm_vm_has_ran_once(kvm)) { + r = -EBUSY; + goto out_unlock; + } + + r = mtree_insert_range(&kvm->arch.smccc_filter, start, end, + xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT); + if (r) + goto out_unlock; + + set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags); + +out_unlock: + mutex_unlock(&kvm->lock); + return r; +} + static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) { unsigned long idx = func_id; @@ -569,3 +607,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; } + +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return 0; + default: + return -ENXIO; + } +} + +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user *)attr->addr; + + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return kvm_smccc_set_filter(kvm, uaddr); + default: + return -ENXIO; + } +} diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index fe6c31575b05..2df152207ccd 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -49,4 +49,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr); +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr); + #endif -- cgit v1.2.3 From 7e484d2785e2a2e526a6b2679d3e4c1402ffe0ec Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:47 +0000 Subject: KVM: arm64: Return NOT_SUPPORTED to guest for unknown PSCI version A subsequent change to KVM will allow negative returns from SMCCC handlers to exit to userspace. Make way for this change by explicitly returning SMCCC_RET_NOT_SUPPORTED to the guest if the VM is configured to use an unknown PSCI version. Add a WARN since this is undoubtedly a KVM bug. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-11-oliver.upton@linux.dev --- arch/arm64/kvm/psci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 7fbc4c1b9df0..aff54b106c30 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -435,6 +435,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) int kvm_psci_call(struct kvm_vcpu *vcpu) { u32 psci_fn = smccc_get_function(vcpu); + int version = kvm_psci_version(vcpu); unsigned long val; val = kvm_psci_check_allowed_function(vcpu, psci_fn); @@ -443,7 +444,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) return 1; } - switch (kvm_psci_version(vcpu)) { + switch (version) { case KVM_ARM_PSCI_1_1: return kvm_psci_1_x_call(vcpu, 1); case KVM_ARM_PSCI_1_0: @@ -453,6 +454,8 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) case KVM_ARM_PSCI_0_1: return kvm_psci_0_1_call(vcpu); default: - return -EINVAL; + WARN_ONCE(1, "Unknown PSCI version %d", version); + smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0); + return 1; } } -- cgit v1.2.3 From 37c8e494794786aa8e4acba1f0f5b45f37b11699 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:48 +0000 Subject: KVM: arm64: Let errors from SMCCC emulation to reach userspace Typically a negative return from an exit handler is used to request a return to userspace with the specified error. KVM's handling of SMCCC emulation (i.e. both HVCs and SMCs) deviates from the trend and resumes the guest instead. Stop handling negative returns this way and instead let the error percolate to userspace. Suggested-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-12-oliver.upton@linux.dev --- arch/arm64/kvm/handle_exit.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 3f43e20c48b6..6dcd6604b6bc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -36,8 +36,6 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr) static int handle_hvc(struct kvm_vcpu *vcpu) { - int ret; - trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; @@ -52,19 +50,11 @@ static int handle_hvc(struct kvm_vcpu *vcpu) return 1; } - ret = kvm_smccc_call_handler(vcpu); - if (ret < 0) { - vcpu_set_reg(vcpu, 0, ~0UL); - return 1; - } - - return ret; + return kvm_smccc_call_handler(vcpu); } static int handle_smc(struct kvm_vcpu *vcpu) { - int ret; - /* * "If an SMC instruction executed at Non-secure EL1 is * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a @@ -93,11 +83,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than * being treated as UNDEFINED. */ - ret = kvm_smccc_call_handler(vcpu); - if (ret < 0) - vcpu_set_reg(vcpu, 0, ~0UL); - - return ret; + return kvm_smccc_call_handler(vcpu); } /* -- cgit v1.2.3 From 0e5c9a9d6548e9b178d4696c696ae4a21c39ae58 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 5 Apr 2023 12:48:58 +0100 Subject: KVM: arm64: Expose SMC/HVC width to userspace When returning to userspace to handle a SMCCC call, we consistently set PC to point to the instruction immediately after the HVC/SMC. However, should userspace need to know the exact address of the trapping instruction, it needs to know about the *size* of that instruction. For AArch64, this is pretty easy. For AArch32, this is a bit more funky, as Thumb has 16bit encodings for both HVC and SMC. Expose this to userspace with a new flag that directly derives from ESR_EL2.IL. Also update the documentation to reflect the PC state at the point of exit. Finally, this fixes a small buglet where the hypercall.{args,ret} fields would not be cleared on exit, and could contain some random junk. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/86pm8iv8tj.wl-maz@kernel.org --- Documentation/virt/kvm/api.rst | 8 ++++++++ arch/arm64/include/uapi/asm/kvm.h | 3 ++- arch/arm64/kvm/hypercalls.c | 16 +++++++++++----- 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index c8ab2f730945..103f945959ed 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6244,6 +6244,14 @@ Definition of ``flags``: conduit to initiate the SMCCC call. If this bit is 0 then the guest used the HVC conduit for the SMCCC call. + - ``KVM_HYPERCALL_EXIT_16BIT``: Indicates that the guest used a 16bit + instruction to initiate the SMCCC call. If this bit is 0 then the + guest used a 32bit instruction. An AArch64 guest always has this + bit set to 0. + +At the point of exit, PC points to the instruction immediately following +the trapping instruction. + :: /* KVM_EXIT_TPR_ACCESS */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3dcfa4bfdf83..b1c1edf85480 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -491,7 +491,8 @@ struct kvm_smccc_filter { }; /* arm64-specific KVM_EXIT_HYPERCALL flags */ -#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_16BIT (1U << 1) #endif diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 9a35d6d18193..3b6523f25afc 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -222,13 +222,19 @@ static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id) { u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); struct kvm_run *run = vcpu->run; - - run->exit_reason = KVM_EXIT_HYPERCALL; - run->hypercall.nr = func_id; - run->hypercall.flags = 0; + u64 flags = 0; if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64) - run->hypercall.flags |= KVM_HYPERCALL_EXIT_SMC; + flags |= KVM_HYPERCALL_EXIT_SMC; + + if (!kvm_vcpu_trap_il_is32bit(vcpu)) + flags |= KVM_HYPERCALL_EXIT_16BIT; + + run->exit_reason = KVM_EXIT_HYPERCALL; + run->hypercall = (typeof(run->hypercall)) { + .nr = func_id, + .flags = flags, + }; } int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 5a23ad6510c82049f5ab3795841c30e8f3ca324d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 8 Apr 2023 12:17:31 +0000 Subject: KVM: arm64: Prevent userspace from handling SMC64 arch range Though presently unused, there is an SMC64 view of the Arm architecture calls defined by the SMCCC. The documentation of the SMCCC filter states that the SMC64 range is reserved, but nothing actually prevents userspace from applying a filter to the range. Insert a range with the HANDLE action for the SMC64 arch range, thereby preventing userspace from imposing filtering/forwarding on it. Fixes: fb88707dd39b ("KVM: arm64: Use a maple tree to represent the SMCCC filter") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230408121732.3411329-2-oliver.upton@linux.dev --- arch/arm64/kvm/hypercalls.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 3b6523f25afc..47254a361295 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,11 +121,17 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) } } -#define SMCCC_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID -#define SMCCC_ARCH_RANGE_END \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ - ARM_SMCCC_SMC_32, \ - 0, ARM_SMCCC_FUNC_MASK) +#define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID +#define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, ARM_SMCCC_FUNC_MASK) + +#define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, 0) +#define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, ARM_SMCCC_FUNC_MASK) static void init_smccc_filter(struct kvm *kvm) { @@ -139,10 +145,17 @@ static void init_smccc_filter(struct kvm *kvm) * to the guest. */ r = mtree_insert_range(&kvm->arch.smccc_filter, - SMCCC_ARCH_RANGE_BEGIN, SMCCC_ARCH_RANGE_END, + SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END, xa_mk_value(KVM_SMCCC_FILTER_HANDLE), GFP_KERNEL_ACCOUNT); WARN_ON_ONCE(r); + + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); + } static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) -- cgit v1.2.3 From 49e5d16b6fc003407a33a9961b4bcbb970bd1c76 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 12 Apr 2023 06:27:33 +0000 Subject: KVM: arm64: vgic: Don't acquire its_lock before config_lock commit f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") was meant to rectify a longstanding lock ordering issue in KVM where the kvm->lock is taken while holding vcpu->mutex. As it so happens, the aforementioned commit introduced yet another locking issue by acquiring the its_lock before acquiring the config lock. This is obviously wrong, especially considering that the lock ordering is well documented in vgic.c. Reshuffle the locks once more to take the config_lock before the its_lock. While at it, sprinkle in the lockdep hinting that has become popular as of late to keep lockdep apprised of our ordering. Cc: stable@vger.kernel.org Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230412062733.988229-1-oliver.upton@linux.dev --- arch/arm64/kvm/vgic/vgic-its.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 7713cd06104e..750e51e3779a 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1958,6 +1958,16 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) mutex_init(&its->its_lock); mutex_init(&its->cmd_lock); + /* Yep, even more trickery for lock ordering... */ +#ifdef CONFIG_LOCKDEP + mutex_lock(&dev->kvm->arch.config_lock); + mutex_lock(&its->cmd_lock); + mutex_lock(&its->its_lock); + mutex_unlock(&its->its_lock); + mutex_unlock(&its->cmd_lock); + mutex_unlock(&dev->kvm->arch.config_lock); +#endif + its->vgic_its_base = VGIC_ADDR_UNDEF; INIT_LIST_HEAD(&its->device_list); @@ -2752,15 +2762,14 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return 0; mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); return -EBUSY; } mutex_lock(&kvm->arch.config_lock); + mutex_lock(&its->its_lock); switch (attr) { case KVM_DEV_ARM_ITS_CTRL_RESET: @@ -2774,9 +2783,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; } + mutex_unlock(&its->its_lock); mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); - mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); return ret; } -- cgit v1.2.3 From 55b5bac15939dec3cbcbee1f6271bc3a4afd4534 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:23 +0100 Subject: KVM: arm64: nvhe: Synchronise with page table walker on vcpu run When taking an exception between the EL1&0 translation regime and the EL2 translation regime, the page table walker is allowed to complete the walks started from EL0 or EL1 while running at EL2. It means that altering the system registers that define the EL1&0 translation regime is fraught with danger *unless* we wait for the completion of such walk with a DSB (R_LFHQG and subsequent statements in the ARM ARM). We already did the right thing for other external agents (SPE, TRBE), but not the PTW. Rework the existing SPE/TRBE synchronisation to include the PTW, and add the missing DSB on guest exit. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 2 -- arch/arm64/kvm/hyp/nvhe/switch.c | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 2673bde62fad..d756b939f296 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -37,7 +37,6 @@ static void __debug_save_spe(u64 *pmscr_el1) /* Now drain all buffered data to memory */ psb_csync(); - dsb(nsh); } static void __debug_restore_spe(u64 pmscr_el1) @@ -69,7 +68,6 @@ static void __debug_save_trace(u64 *trfcr_el1) isb(); /* Drain the trace buffer to memory */ tsb_csync(); - dsb(nsh); } static void __debug_restore_trace(u64 trfcr_el1) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c2cb46ca4fb6..71fa16a0dc77 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -272,6 +272,17 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ __debug_save_host_buffers_nvhe(vcpu); + /* + * We're about to restore some new MMU state. Make sure + * ongoing page-table walks that have started before we + * trapped to EL2 have completed. This also synchronises the + * above disabling of SPE and TRBE. + * + * See DDI0487I.a D8.1.5 "Out-of-context translation regimes", + * rule R_LFHQG and subsequent information statements. + */ + dsb(nsh); + __kvm_adjust_pc(vcpu); /* @@ -306,6 +317,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __timer_disable_traps(vcpu); __hyp_vgic_save_state(vcpu); + /* + * Same thing as before the guest run: we're about to switch + * the MMU context, so let's make sure we don't have any + * ongoing EL1&0 translations. + */ + dsb(nsh); + __deactivate_traps(vcpu); __load_host_stage2(); -- cgit v1.2.3 From a6610435ac17de1ac727c90ad62c723d86c7ea36 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 13 Apr 2023 14:23:42 +0100 Subject: KVM: arm64: Handle 32bit CNTPCTSS traps When CNTPOFF isn't implemented and that we have a non-zero counter offset, CNTPCT and CNTPCTSS are trapped. We properly handle the former, but not the latter, as it is not present in the sysreg table (despite being actually handled in the code). Bummer. Just populate the cp15_64 table with the missing register. Reported-by: Reiji Watanabe Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kvm/sys_regs.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f8da9e1b0c11..a43f21559c3e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -403,6 +403,7 @@ #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) #define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) +#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index be7c2598e563..feca77083a5c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2538,6 +2538,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, + { SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer }, }; static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, -- cgit v1.2.3 From 7e1b2329c205d0a08ebaf3f619318a8a18f36644 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:24 +0100 Subject: KVM: arm64: nvhe: Synchronise with page table walker on TLBI A TLBI from EL2 impacting EL1 involves messing with the EL1&0 translation regime, and the page table walker may still be performing speculative walks. Piggyback on the existing DSBs to always have a DSB ISH that will synchronise all load/store operations that the PTW may still have. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/tlb.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index d296d617f589..978179133f4b 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -15,8 +15,31 @@ struct tlb_inv_context { }; static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt) + struct tlb_inv_context *cxt, + bool nsh) { + /* + * We have two requirements: + * + * - ensure that the page table updates are visible to all + * CPUs, for which a dsb(DOMAIN-st) is what we need, DOMAIN + * being either ish or nsh, depending on the invalidation + * type. + * + * - complete any speculative page table walk started before + * we trapped to EL2 so that we can mess with the MM + * registers out of context, for which dsb(nsh) is enough + * + * The composition of these two barriers is a dsb(DOMAIN), and + * the 'nsh' parameter tracks the distinction between + * Inner-Shareable and Non-Shareable, as specified by the + * callers. + */ + if (nsh) + dsb(nsh); + else + dsb(ish); + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; @@ -60,10 +83,8 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, { struct tlb_inv_context cxt; - dsb(ishst); - /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); /* * We could do so much better if we had the VA as well. @@ -113,10 +134,8 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; - dsb(ishst); - /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); __tlbi(vmalls12e1is); dsb(ish); @@ -130,7 +149,7 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); __tlbi(vmalle1); asm volatile("ic iallu"); @@ -142,7 +161,8 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) void __kvm_flush_vm_context(void) { - dsb(ishst); + /* Same remark as in __tlb_switch_to_guest() */ + dsb(ish); __tlbi(alle1is); /* -- cgit v1.2.3 From 8442d65373c6316876208c1ad27729e9682fa3cf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:25 +0100 Subject: KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc() We rely on the presence of a DSB at the end of kvm_flush_dcache_to_poc() that, on top of ensuring completion of the cache clean, also covers the speculative page table walk started from EL1. Document this dependency. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 552653fa18be..2e9ec4a2a4a3 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -297,6 +297,13 @@ int __pkvm_prot_finalize(void) params->vttbr = kvm_get_vttbr(mmu); params->vtcr = host_mmu.arch.vtcr; params->hcr_el2 |= HCR_VM; + + /* + * The CMO below not only cleans the updated params to the + * PoC, but also provides the DSB that ensures ongoing + * page-table walks that have started before we trapped to EL2 + * have completed. + */ kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); -- cgit v1.2.3 From 1ff2755d6800d60bee96fb303cbbf30f9bb483a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:26 +0100 Subject: KVM: arm64: vhe: Synchronise with page table walker on MMU update Contrary to nVHE, VHE is a lot easier when it comes to dealing with speculative page table walks started at EL1. As we only change EL1&0 translation regime when context-switching, we already benefit from the effect of the DSB that sits in the context switch code. We only need to take care of it in the NV case, where we can flip between between two EL1 contexts (one of them being the virtual EL2) without a context switch. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 7b44f6b3b547..b35a178e7e0d 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and @@ -69,6 +70,17 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; __sysreg_save_user_state(host_ctxt); + /* + * When running a normal EL1 guest, we only load a new vcpu + * after a context switch, which imvolves a DSB, so all + * speculative EL1&0 walks will have already completed. + * If running NV, the vcpu may transition between vEL1 and + * vEL2 without a context switch, so make sure we complete + * those walks before loading a new context. + */ + if (vcpu_has_nv(vcpu)) + dsb(nsh); + /* * Load guest EL1 and user state * -- cgit v1.2.3 From bcf3e7da3ad3bfea38ac6ba9f56b99b2877af51f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:27 +0100 Subject: KVM: arm64: vhe: Drop extra isb() on guest exit __kvm_vcpu_run_vhe() end on VHE with an isb(). However, this function is only reachable via kvm_call_hyp_ret(), which already contains an isb() in order to mimick the behaviour of nVHE and provide a context synchronisation event. We thus have two isb()s back to back, which is one too many. Drop the first one and solely rely on the one in the helper. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index cd3f3117bf16..3d868e84c7a0 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -227,11 +227,10 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. + * parameters, such as traps. We rely on the isb() in kvm_call_hyp*() + * to make sure these changes take effect before running the host or + * additional guests. */ - isb(); - return ret; } -- cgit v1.2.3 From 4ff910be01c0ca28c2ea8b354dd47a3a17524489 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Tue, 18 Apr 2023 19:18:51 -0700 Subject: KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init() kvm_arch_vcpu_ioctl_vcpu_init() doesn't acquire mp_state_lock when setting the mp_state to KVM_MP_STATE_RUNNABLE. Fix the code to acquire the lock. Signed-off-by: Reiji Watanabe [maz: minor refactor] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230419021852.2981107-2-reijiw@google.com --- arch/arm64/kvm/arm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fd8d355aca15..ad3655a7d122 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1241,11 +1241,15 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, /* * Handle the "start in power-off" case. */ + spin_lock(&vcpu->arch.mp_state_lock); + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - kvm_arm_vcpu_power_off(vcpu); + __kvm_arm_vcpu_power_off(vcpu); else WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); + spin_unlock(&vcpu->arch.mp_state_lock); + return 0; } -- cgit v1.2.3 From a189884bdc9238aeba941c50f02e25eb584fafed Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Tue, 18 Apr 2023 19:18:52 -0700 Subject: KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state All accessors of kvm_vcpu_arch::mp_state should be {READ,WRITE}_ONCE(), since readers of the mp_state don't acquire the mp_state_lock. Nonetheless, kvm_psci_vcpu_on() updates the mp_state without using WRITE_ONCE(). So, fix the code to update the mp_state using WRITE_ONCE. Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230419021852.2981107-3-reijiw@google.com --- arch/arm64/kvm/psci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 5767e6baa61a..d046e82e3723 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -110,7 +110,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ smp_wmb(); - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); out_unlock: -- cgit v1.2.3