summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu/mmu.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-04 01:32:22 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-04 01:32:22 +0300
commite8069f5a8e3bdb5fdeeff895780529388592ee7a (patch)
treece35ab85db9b66a7e488707fccdb33ce54f696dd /arch/x86/kvm/mmu/mmu.c
parenteded37770c9f80ecd5ba842359c4f1058d9812c3 (diff)
parent255006adb3da71bb75c334453786df781b415f54 (diff)
downloadlinux-e8069f5a8e3bdb5fdeeff895780529388592ee7a.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM64: - Eager page splitting optimization for dirty logging, optionally allowing for a VM to avoid the cost of hugepage splitting in the stage-2 fault path. - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with services that live in the Secure world. pKVM intervenes on FF-A calls to guarantee the host doesn't misuse memory donated to the hyp or a pKVM guest. - Support for running the split hypervisor with VHE enabled, known as 'hVHE' mode. This is extremely useful for testing the split hypervisor on VHE-only systems, and paves the way for new use cases that depend on having two TTBRs available at EL2. - Generalized framework for configurable ID registers from userspace. KVM/arm64 currently prevents arbitrary CPU feature set configuration from userspace, but the intent is to relax this limitation and allow userspace to select a feature set consistent with the CPU. - Enable the use of Branch Target Identification (FEAT_BTI) in the hypervisor. - Use a separate set of pointer authentication keys for the hypervisor when running in protected mode, as the host is untrusted at runtime. - Ensure timer IRQs are consistently released in the init failure paths. - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps (FEAT_EVT), as it is a register commonly read from userspace. - Erratum workaround for the upcoming AmpereOne part, which has broken hardware A/D state management. RISC-V: - Redirect AMO load/store misaligned traps to KVM guest - Trap-n-emulate AIA in-kernel irqchip for KVM guest - Svnapot support for KVM Guest s390: - New uvdevice secret API - CMM selftest and fixes - fix racy access to target CPU for diag 9c x86: - Fix missing/incorrect #GP checks on ENCLS - Use standard mmu_notifier hooks for handling APIC access page - Drop now unnecessary TR/TSS load after VM-Exit on AMD - Print more descriptive information about the status of SEV and SEV-ES during module load - Add a test for splitting and reconstituting hugepages during and after dirty logging - Add support for CPU pinning in demand paging test - Add support for AMD PerfMonV2, with a variety of cleanups and minor fixes included along the way - Add a "nx_huge_pages=never" option to effectively avoid creating NX hugepage recovery threads (because nx_huge_pages=off can be toggled at runtime) - Move handling of PAT out of MTRR code and dedup SVM+VMX code - Fix output of PIC poll command emulation when there's an interrupt - Add a maintainer's handbook to document KVM x86 processes, preferred coding style, testing expectations, etc. - Misc cleanups, fixes and comments Generic: - Miscellaneous bugfixes and cleanups Selftests: - Generate dependency files so that partial rebuilds work as expected" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits) Documentation/process: Add a maintainer handbook for KVM x86 Documentation/process: Add a label for the tip tree handbook's coding style KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index RISC-V: KVM: Remove unneeded semicolon RISC-V: KVM: Allow Svnapot extension for Guest/VM riscv: kvm: define vcpu_sbi_ext_pmu in header RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel emulation of AIA APLIC RISC-V: KVM: Implement device interface for AIA irqchip RISC-V: KVM: Skeletal in-kernel AIA irqchip support RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero RISC-V: KVM: Add APLIC related defines RISC-V: KVM: Add IMSIC related defines RISC-V: KVM: Implement guest external interrupt line management KVM: x86: Remove PRIx* definitions as they are solely for user space s390/uv: Update query for secret-UVCs s390/uv: replace scnprintf with sysfs_emit s390/uvdevice: Add 'Lock Secret Store' UVC ...
Diffstat (limited to 'arch/x86/kvm/mmu/mmu.c')
-rw-r--r--arch/x86/kvm/mmu/mmu.c53
1 files changed, 48 insertions, 5 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6eaa3d6994ae..ec169f5c7dce 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -58,6 +58,8 @@
extern bool itlb_multihit_kvm_mitigation;
+static bool nx_hugepage_mitigation_hard_disabled;
+
int __read_mostly nx_huge_pages = -1;
static uint __read_mostly nx_huge_pages_recovery_period_ms;
#ifdef CONFIG_PREEMPT_RT
@@ -67,12 +69,13 @@ static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
#endif
+static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp);
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops nx_huge_pages_ops = {
.set = set_nx_huge_pages,
- .get = param_get_bool,
+ .get = get_nx_huge_pages,
};
static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = {
@@ -1600,6 +1603,10 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
if (tdp_mmu_enabled)
flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
+ if (kvm_x86_ops.set_apic_access_page_addr &&
+ range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
return flush;
}
@@ -5797,6 +5804,14 @@ static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu
vcpu_clear_mmio_info(vcpu, addr);
+ /*
+ * Walking and synchronizing SPTEs both assume they are operating in
+ * the context of the current MMU, and would need to be reworked if
+ * this is ever used to sync the guest_mmu, e.g. to emulate INVEPT.
+ */
+ if (WARN_ON_ONCE(mmu != vcpu->arch.mmu))
+ return;
+
if (!VALID_PAGE(root_hpa))
return;
@@ -6844,6 +6859,14 @@ static void mmu_destroy_caches(void)
kmem_cache_destroy(mmu_page_header_cache);
}
+static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
+{
+ if (nx_hugepage_mitigation_hard_disabled)
+ return sprintf(buffer, "never\n");
+
+ return param_get_bool(buffer, kp);
+}
+
static bool get_nx_auto_mode(void)
{
/* Return true when CPU has the bug, and mitigations are ON */
@@ -6860,15 +6883,29 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
bool old_val = nx_huge_pages;
bool new_val;
+ if (nx_hugepage_mitigation_hard_disabled)
+ return -EPERM;
+
/* In "auto" mode deploy workaround only if CPU has the bug. */
- if (sysfs_streq(val, "off"))
+ if (sysfs_streq(val, "off")) {
new_val = 0;
- else if (sysfs_streq(val, "force"))
+ } else if (sysfs_streq(val, "force")) {
new_val = 1;
- else if (sysfs_streq(val, "auto"))
+ } else if (sysfs_streq(val, "auto")) {
new_val = get_nx_auto_mode();
- else if (kstrtobool(val, &new_val) < 0)
+ } else if (sysfs_streq(val, "never")) {
+ new_val = 0;
+
+ mutex_lock(&kvm_lock);
+ if (!list_empty(&vm_list)) {
+ mutex_unlock(&kvm_lock);
+ return -EBUSY;
+ }
+ nx_hugepage_mitigation_hard_disabled = true;
+ mutex_unlock(&kvm_lock);
+ } else if (kstrtobool(val, &new_val) < 0) {
return -EINVAL;
+ }
__set_nx_huge_pages(new_val);
@@ -7006,6 +7043,9 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
uint old_period, new_period;
int err;
+ if (nx_hugepage_mitigation_hard_disabled)
+ return -EPERM;
+
was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
err = param_set_uint(val, kp);
@@ -7164,6 +7204,9 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
{
int err;
+ if (nx_hugepage_mitigation_hard_disabled)
+ return 0;
+
err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
"kvm-nx-lpage-recovery",
&kvm->arch.nx_huge_page_recovery_thread);