summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hypercalls.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-01 22:06:20 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-01 22:06:20 +0300
commitc8c655c34e33544aec9d64b660872ab33c29b5f1 (patch)
tree4aad88f698f04cef9e5d9d573a6df6283085dadd /arch/arm64/kvm/hypercalls.c
parentd75439d64a1e2b35e0f08906205b00279753cbed (diff)
parentb3c98052d46948a8d65d2778c7f306ff38366aac (diff)
downloadlinux-c8c655c34e33544aec9d64b660872ab33c29b5f1.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "s390: - More phys_to_virt conversions - Improvement of AP management for VSIE (nested virtualization) ARM64: - Numerous fixes for the pathological lock inversion issue that plagued KVM/arm64 since... forever. - New framework allowing SMCCC-compliant hypercalls to be forwarded to userspace, hopefully paving the way for some more features being moved to VMMs rather than be implemented in the kernel. - Large rework of the timer code to allow a VM-wide offset to be applied to both virtual and physical counters as well as a per-timer, per-vcpu offset that complements the global one. This last part allows the NV timer code to be implemented on top. - A small set of fixes to make sure that we don't change anything affecting the EL1&0 translation regime just after having having taken an exception to EL2 until we have executed a DSB. This ensures that speculative walks started in EL1&0 have completed. - The usual selftest fixes and improvements. x86: - Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled, and by giving the guest control of CR0.WP when EPT is enabled on VMX (VMX-only because SVM doesn't support per-bit controls) - Add CR0/CR4 helpers to query single bits, and clean up related code where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return as a bool - Move AMD_PSFD to cpufeatures.h and purge KVM's definition - Avoid unnecessary writes+flushes when the guest is only adding new PTEs - Overhaul .sync_page() and .invlpg() to utilize .sync_page()'s optimizations when emulating invalidations - Clean up the range-based flushing APIs - Revamp the TDP MMU's reaping of Accessed/Dirty bits to clear a single A/D bit using a LOCK AND instead of XCHG, and skip all of the "handle changed SPTE" overhead associated with writing the entire entry - Track the number of "tail" entries in a pte_list_desc to avoid having to walk (potentially) all descriptors during insertion and deletion, which gets quite expensive if the guest is spamming fork() - Disallow virtualizing legacy LBRs if architectural LBRs are available, the two are mutually exclusive in hardware - Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES) after KVM_RUN, similar to CPUID features - Overhaul the vmx_pmu_caps selftest to better validate PERF_CAPABILITIES - Apply PMU filters to emulated events and add test coverage to the pmu_event_filter selftest - AMD SVM: - Add support for virtual NMIs - Fixes for edge cases related to virtual interrupts - Intel AMX: - Don't advertise XTILE_CFG in KVM_GET_SUPPORTED_CPUID if XTILE_DATA is not being reported due to userspace not opting in via prctl() - Fix a bug in emulation of ENCLS in compatibility mode - Allow emulation of NOP and PAUSE for L2 - AMX selftests improvements - Misc cleanups MIPS: - Constify MIPS's internal callbacks (a leftover from the hardware enabling rework that landed in 6.3) Generic: - Drop unnecessary casts from "void *" throughout kvm_main.c - Tweak the layout of "struct kvm_mmu_memory_cache" to shrink the struct size by 8 bytes on 64-bit kernels by utilizing a padding hole Documentation: - Fix goof introduced by the conversion to rST" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (211 commits) KVM: s390: pci: fix virtual-physical confusion on module unload/load KVM: s390: vsie: clarifications on setting the APCB KVM: s390: interrupt: fix virtual-physical confusion for next alert GISA KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init() KVM: selftests: Test the PMU event "Instructions retired" KVM: selftests: Copy full counter values from guest in PMU event filter test KVM: selftests: Use error codes to signal errors in PMU event filter test KVM: selftests: Print detailed info in PMU event filter asserts KVM: selftests: Add helpers for PMC asserts in PMU event filter test KVM: selftests: Add a common helper for the PMU event filter guest code KVM: selftests: Fix spelling mistake "perrmited" -> "permitted" KVM: arm64: vhe: Drop extra isb() on guest exit KVM: arm64: vhe: Synchronise with page table walker on MMU update KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc() KVM: arm64: nvhe: Synchronise with page table walker on TLBI KVM: arm64: Handle 32bit CNTPCTSS traps KVM: arm64: nvhe: Synchronise with page table walker on vcpu run KVM: arm64: vgic: Don't acquire its_lock before config_lock KVM: selftests: Add test to verify KVM's supported XCR0 ...
Diffstat (limited to 'arch/arm64/kvm/hypercalls.c')
-rw-r--r--arch/arm64/kvm/hypercalls.c189
1 files changed, 179 insertions, 10 deletions
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index c4b4678bc4a4..7fb4df0456de 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
break;
case KVM_PTP_PHYS_COUNTER:
- cycles = systime_snapshot.cycles;
+ cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset;
break;
default:
return;
@@ -65,7 +65,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
val[3] = lower_32_bits(cycles);
}
-static bool kvm_hvc_call_default_allowed(u32 func_id)
+static bool kvm_smccc_default_allowed(u32 func_id)
{
switch (func_id) {
/*
@@ -93,7 +93,7 @@ static bool kvm_hvc_call_default_allowed(u32 func_id)
}
}
-static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
+static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
@@ -117,20 +117,161 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP,
&smccc_feat->vendor_hyp_bmap);
default:
- return kvm_hvc_call_default_allowed(func_id);
+ return false;
+ }
+}
+
+#define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID
+#define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+#define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, 0)
+#define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+static void init_smccc_filter(struct kvm *kvm)
+{
+ int r;
+
+ mt_init(&kvm->arch.smccc_filter);
+
+ /*
+ * Prevent userspace from handling any SMCCC calls in the architecture
+ * range, avoiding the risk of misrepresenting Spectre mitigation status
+ * to the guest.
+ */
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+}
+
+static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr)
+{
+ const void *zero_page = page_to_virt(ZERO_PAGE(0));
+ struct kvm_smccc_filter filter;
+ u32 start, end;
+ int r;
+
+ if (copy_from_user(&filter, uaddr, sizeof(filter)))
+ return -EFAULT;
+
+ if (memcmp(filter.pad, zero_page, sizeof(filter.pad)))
+ return -EINVAL;
+
+ start = filter.base;
+ end = start + filter.nr_functions - 1;
+
+ if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS)
+ return -EINVAL;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ if (kvm_vm_has_ran_once(kvm)) {
+ r = -EBUSY;
+ goto out_unlock;
}
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter, start, end,
+ xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT);
+ if (r)
+ goto out_unlock;
+
+ set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags);
+
+out_unlock:
+ mutex_unlock(&kvm->arch.config_lock);
+ return r;
+}
+
+static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id)
+{
+ unsigned long idx = func_id;
+ void *val;
+
+ if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ /*
+ * But where's the error handling, you say?
+ *
+ * mt_find() returns NULL if no entry was found, which just so happens
+ * to match KVM_SMCCC_FILTER_HANDLE.
+ */
+ val = mt_find(&kvm->arch.smccc_filter, &idx, idx);
+ return xa_to_value(val);
}
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ /*
+ * Intervening actions in the SMCCC filter take precedence over the
+ * pseudo-firmware register bitmaps.
+ */
+ u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id);
+ if (action != KVM_SMCCC_FILTER_HANDLE)
+ return action;
+
+ if (kvm_smccc_test_fw_bmap(vcpu, func_id) ||
+ kvm_smccc_default_allowed(func_id))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ return KVM_SMCCC_FILTER_DENY;
+}
+
+static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
+ struct kvm_run *run = vcpu->run;
+ u64 flags = 0;
+
+ if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64)
+ flags |= KVM_HYPERCALL_EXIT_SMC;
+
+ if (!kvm_vcpu_trap_il_is32bit(vcpu))
+ flags |= KVM_HYPERCALL_EXIT_16BIT;
+
+ run->exit_reason = KVM_EXIT_HYPERCALL;
+ run->hypercall = (typeof(run->hypercall)) {
+ .nr = func_id,
+ .flags = flags,
+ };
+}
+
+int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
u32 func_id = smccc_get_function(vcpu);
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
u32 feature;
+ u8 action;
gpa_t gpa;
- if (!kvm_hvc_call_allowed(vcpu, func_id))
+ action = kvm_smccc_get_action(vcpu, func_id);
+ switch (action) {
+ case KVM_SMCCC_FILTER_HANDLE:
+ break;
+ case KVM_SMCCC_FILTER_DENY:
+ goto out;
+ case KVM_SMCCC_FILTER_FWD_TO_USER:
+ kvm_prepare_hypercall_exit(vcpu, func_id);
+ return 0;
+ default:
+ WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action);
goto out;
+ }
switch (func_id) {
case ARM_SMCCC_VERSION_FUNC_ID:
@@ -245,6 +386,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm)
smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES;
smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+
+ init_smccc_filter(kvm);
+}
+
+void kvm_arm_teardown_hypercalls(struct kvm *kvm)
+{
+ mtree_destroy(&kvm->arch.smccc_filter);
}
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
@@ -377,17 +525,16 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
if (val & ~fw_reg_features)
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
- val != *fw_reg_bmap) {
+ if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) {
ret = -EBUSY;
goto out;
}
WRITE_ONCE(*fw_reg_bmap, val);
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
@@ -481,3 +628,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
return -EINVAL;
}
+
+int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+}
+
+int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ void __user *uaddr = (void __user *)attr->addr;
+
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return kvm_smccc_set_filter(kvm, uaddr);
+ default:
+ return -ENXIO;
+ }
+}