summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/.gitignore2
-rw-r--r--arch/x86/kvm/Makefile12
-rw-r--r--arch/x86/kvm/cpuid.c13
-rw-r--r--arch/x86/kvm/debugfs.c7
-rw-r--r--arch/x86/kvm/emulate.c108
-rw-r--r--arch/x86/kvm/kvm-asm-offsets.c29
-rw-r--r--arch/x86/kvm/mmu/mmu.c4
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/pmu.c7
-rw-r--r--arch/x86/kvm/svm/sev.c6
-rw-r--r--arch/x86/kvm/svm/svm.c115
-rw-r--r--arch/x86/kvm/svm/svm.h11
-rw-r--r--arch/x86/kvm/svm/svm_ops.h5
-rw-r--r--arch/x86/kvm/svm/vmenter.S260
-rw-r--r--arch/x86/kvm/vmx/capabilities.h19
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/vmx/vmx.c23
-rw-r--r--arch/x86/kvm/x86.c144
-rw-r--r--arch/x86/kvm/xen.c64
20 files changed, 564 insertions, 273 deletions
diff --git a/arch/x86/kvm/.gitignore b/arch/x86/kvm/.gitignore
new file mode 100644
index 000000000000..615d6ff35c00
--- /dev/null
+++ b/arch/x86/kvm/.gitignore
@@ -0,0 +1,2 @@
+/kvm-asm-offsets.s
+/kvm-asm-offsets.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 30f244b64523..f453a0f96e24 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -34,3 +34,15 @@ endif
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
+
+AFLAGS_svm/vmenter.o := -iquote $(obj)
+$(obj)/svm/vmenter.o: $(obj)/kvm-asm-offsets.h
+
+AFLAGS_vmx/vmenter.o := -iquote $(obj)
+$(obj)/vmx/vmenter.o: $(obj)/kvm-asm-offsets.h
+
+$(obj)/kvm-asm-offsets.h: $(obj)/kvm-asm-offsets.s FORCE
+ $(call filechk,offsets,__KVM_ASM_OFFSETS_H__)
+
+targets += kvm-asm-offsets.s
+clean-files += kvm-asm-offsets.h
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7065462378e2..62bc7a01cecc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = max(entry->eax, 0x80000021);
break;
case 0x80000001:
+ entry->ebx &= ~GENMASK(27, 16);
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
case 0x80000006:
- /* L2 cache and TLB: pass through host info. */
+ /* Drop reserved bits, pass host L2 cache and TLB info. */
+ entry->edx &= ~GENMASK(17, 16);
break;
case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */
@@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
+ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break;
@@ -1186,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
+ entry->eax &= GENMASK(2, 0);
+ entry->ebx = entry->ecx = entry->edx = 0;
+ break;
case 0x8000001e:
break;
case 0x8000001F:
@@ -1193,7 +1199,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
} else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
-
+ /* Clear NumVMPL since KVM does not support VMPL. */
+ entry->ebx &= ~GENMASK(31, 12);
/*
* Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008).
@@ -1331,7 +1338,7 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
if (sanity_check_entries(entries, cpuid->nent, type))
return -EINVAL;
- array.entries = kvcalloc(sizeof(struct kvm_cpuid_entry2), cpuid->nent, GFP_KERNEL);
+ array.entries = kvcalloc(cpuid->nent, sizeof(struct kvm_cpuid_entry2), GFP_KERNEL);
if (!array.entries)
return -ENOMEM;
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index cfed36aba2f7..c1390357126a 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -158,11 +158,16 @@ out:
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
{
struct kvm *kvm = inode->i_private;
+ int r;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
- return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+ r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+ if (r < 0)
+ kvm_put_kvm(kvm);
+
+ return r;
}
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 3b27622d4642..4a43261d25a2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ctxt->mode, linear);
}
-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
- enum x86emul_mode mode)
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
{
ulong linear;
int rc;
@@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
}
+static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
+{
+ u64 efer;
+ struct desc_struct cs;
+ u16 selector;
+ u32 base3;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+ if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
+ /* Real mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_REAL;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (ctxt->eflags & X86_EFLAGS_VM) {
+ /* Protected/VM86 mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_VM86;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
+ return X86EMUL_UNHANDLEABLE;
+
+ if (efer & EFER_LMA) {
+ if (cs.l) {
+ /* Proper long mode */
+ ctxt->mode = X86EMUL_MODE_PROT64;
+ } else if (cs.d) {
+ /* 32 bit compatibility mode*/
+ ctxt->mode = X86EMUL_MODE_PROT32;
+ } else {
+ ctxt->mode = X86EMUL_MODE_PROT16;
+ }
+ } else {
+ /* Legacy 32 bit / 16 bit mode */
+ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- return assign_eip(ctxt, dst, ctxt->mode);
+ return assign_eip(ctxt, dst);
}
-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
- const struct desc_struct *cs_desc)
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- enum x86emul_mode mode = ctxt->mode;
- int rc;
+ int rc = emulator_recalc_and_set_mode(ctxt);
-#ifdef CONFIG_X86_64
- if (ctxt->mode >= X86EMUL_MODE_PROT16) {
- if (cs_desc->l) {
- u64 efer = 0;
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
- if (efer & EFER_LMA)
- mode = X86EMUL_MODE_PROT64;
- } else
- mode = X86EMUL_MODE_PROT32; /* temporary value */
- }
-#endif
- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
- rc = assign_eip(ctxt, dst, mode);
- if (rc == X86EMUL_CONTINUE)
- ctxt->mode = mode;
- return rc;
+ return assign_eip(ctxt, dst);
}
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, eip, &new_desc);
+ rc = assign_eip_far(ctxt, eip);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 8; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
val = GET_SMSTATE(u32, smstate, 0x7fcc);
@@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
u16 selector;
int i, r;
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
@@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
* those side effects need to be explicitly handled for both success
* and shutdown.
*/
- return X86EMUL_CONTINUE;
+ return emulator_recalc_and_set_mode(ctxt);
emulate_shutdown:
ctxt->ops->triple_fault(ctxt);
@@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = rdx;
+ ctxt->mode = usermode;
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
@@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE)
goto fail;
@@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+ int cr_num = ctxt->modrm_reg;
+ int r;
+
+ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0);
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
+
+ if (cr_num == 0) {
+ /*
+ * CR0 write might have updated CR0.PE and/or CR0.PG
+ * which can affect the cpu's execution mode.
+ */
+ r = emulator_recalc_and_set_mode(ctxt);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c
new file mode 100644
index 000000000000..24a710d37323
--- /dev/null
+++ b/arch/x86/kvm/kvm-asm-offsets.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <linux/kbuild.h>
+#include "vmx/vmx.h"
+#include "svm/svm.h"
+
+static void __used common(void)
+{
+ if (IS_ENABLED(CONFIG_KVM_AMD)) {
+ BLANK();
+ OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs);
+ OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb);
+ OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl);
+ OFFSET(SVM_vmcb01, vcpu_svm, vmcb01);
+ OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa);
+ OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa);
+ }
+
+ if (IS_ENABLED(CONFIG_KVM_INTEL)) {
+ BLANK();
+ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
+ }
+}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6f81539061d6..1ccb769f62af 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6056,7 +6056,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
write_lock(&kvm->mmu_lock);
- kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end);
+ kvm_mmu_invalidate_begin(kvm, 0, -1ul);
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
@@ -6070,7 +6070,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
gfn_end - gfn_start);
- kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end);
+ kvm_mmu_invalidate_end(kvm, 0, -1ul);
write_unlock(&kvm->mmu_lock);
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index d9b9a0f0db17..de1fd7369736 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -56,7 +56,7 @@ static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
* code. Each pmc, stored in kvm_pmc.idx field, is unique across
* all perf counters (both gp and fixed). The mapping relationship
* between pmc and perf counters is as the following:
- * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
+ * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
* [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index b68956299fa8..9d65cd095691 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+ BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > AMD64_NUM_COUNTERS_CORE);
+ BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 28064060413a..efaaef2b7ae1 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -196,7 +196,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
__set_bit(sev->asid, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
- sd = per_cpu(svm_data, cpu);
+ sd = per_cpu_ptr(&svm_data, cpu);
sd->sev_vmcbs[sev->asid] = NULL;
}
@@ -605,7 +605,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->dr6 = svm->vcpu.arch.dr6;
pr_debug("Virtual Machine Save Area (VMSA):\n");
- print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
+ print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
return 0;
}
@@ -2600,7 +2600,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
int asid = sev_get_asid(svm->vcpu.kvm);
/* Assign the asid allocated with this SEV guest */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 58f0077d9357..4b6d2b050e57 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -245,7 +245,7 @@ struct kvm_ldttss_desc {
u32 zero1;
} __attribute__((packed));
-DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+DEFINE_PER_CPU(struct svm_cpu_data, svm_data);
/*
* Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via
@@ -581,12 +581,7 @@ static int svm_hardware_enable(void)
pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
return -EINVAL;
}
- sd = per_cpu(svm_data, me);
- if (!sd) {
- pr_err("%s: svm_data is NULL on %d\n", __func__, me);
- return -EINVAL;
- }
-
+ sd = per_cpu_ptr(&svm_data, me);
sd->asid_generation = 1;
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
@@ -597,7 +592,7 @@ static int svm_hardware_enable(void)
wrmsrl(MSR_EFER, efer | EFER_SVME);
- wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
+ wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
/*
@@ -646,42 +641,37 @@ static int svm_hardware_enable(void)
static void svm_cpu_uninit(int cpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- if (!sd)
+ if (!sd->save_area)
return;
- per_cpu(svm_data, cpu) = NULL;
kfree(sd->sev_vmcbs);
__free_page(sd->save_area);
- kfree(sd);
+ sd->save_area_pa = 0;
+ sd->save_area = NULL;
}
static int svm_cpu_init(int cpu)
{
- struct svm_cpu_data *sd;
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
int ret = -ENOMEM;
- sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
- if (!sd)
- return ret;
- sd->cpu = cpu;
+ memset(sd, 0, sizeof(struct svm_cpu_data));
sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!sd->save_area)
- goto free_cpu_data;
+ return ret;
ret = sev_cpu_init(sd);
if (ret)
goto free_save_area;
- per_cpu(svm_data, cpu) = sd;
-
+ sd->save_area_pa = __sme_page_pa(sd->save_area);
return 0;
free_save_area:
__free_page(sd->save_area);
-free_cpu_data:
- kfree(sd);
+ sd->save_area = NULL;
return ret;
}
@@ -730,6 +720,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
u32 offset;
u32 *msrpm;
+ /*
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
to_svm(vcpu)->msrpm;
@@ -1425,7 +1424,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb)
int i;
for_each_online_cpu(i)
- cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+ cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
}
static void svm_vcpu_free(struct kvm_vcpu *vcpu)
@@ -1450,7 +1449,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
if (sev_es_guest(vcpu->kvm))
sev_es_unmap_ghcb(svm);
@@ -1462,7 +1461,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* Save additional host state that will be restored on VMEXIT (sev-es)
* or subsequent vmload of host save area.
*/
- vmsave(__sme_page_pa(sd->save_area));
+ vmsave(sd->save_area_pa);
if (sev_es_guest(vcpu->kvm)) {
struct sev_es_save_area *hostsa;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@@ -1487,7 +1486,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
@@ -2710,9 +2709,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
msr->data = 0;
switch (msr->index) {
- case MSR_F10H_DECFG:
- if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
- msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+ case MSR_AMD64_DE_CFG:
+ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
+ msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
break;
case MSR_IA32_PERF_CAPABILITIES:
return 0;
@@ -2813,7 +2812,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x1E;
}
break;
- case MSR_F10H_DECFG:
+ case MSR_AMD64_DE_CFG:
msr_info->data = svm->msr_decfg;
break;
default:
@@ -3042,7 +3041,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
- case MSR_F10H_DECFG: {
+ case MSR_AMD64_DE_CFG: {
struct kvm_msr_entry msr_entry;
msr_entry.index = msr->index;
@@ -3443,7 +3442,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
static void reload_tss(struct kvm_vcpu *vcpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
sd->tss_desc->type = 9; /* available 32/64-bit TSS */
load_TR_desc();
@@ -3451,7 +3450,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
static void pre_svm_run(struct kvm_vcpu *vcpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
/*
@@ -3911,30 +3910,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
struct vcpu_svm *svm = to_svm(vcpu);
- unsigned long vmcb_pa = svm->current_vmcb->pa;
guest_state_enter_irqoff();
- if (sev_es_guest(vcpu->kvm)) {
- __svm_sev_es_vcpu_run(vmcb_pa);
- } else {
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
-
- /*
- * Use a single vmcb (vmcb01 because it's always valid) for
- * context switching guest state via VMLOAD/VMSAVE, that way
- * the state doesn't need to be copied between vmcb01 and
- * vmcb02 when switching vmcbs for nested virtualization.
- */
- vmload(svm->vmcb01.pa);
- __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs);
- vmsave(svm->vmcb01.pa);
-
- vmload(__sme_page_pa(sd->save_area));
- }
+ if (sev_es_guest(vcpu->kvm))
+ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+ else
+ __svm_vcpu_run(svm, spec_ctrl_intercepted);
guest_state_exit_irqoff();
}
@@ -3942,6 +3927,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
trace_kvm_entry(vcpu);
@@ -3998,34 +3984,15 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
- x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->virt_spec_ctrl);
- svm_vcpu_enter_exit(vcpu);
-
- /*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
- *
- * For non-nested case:
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- *
- * For nested case:
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- */
- if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
- unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
- svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+ svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
if (!sev_es_guest(vcpu->kvm))
reload_tss(vcpu);
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
- x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+ x86_spec_ctrl_restore_host(svm->virt_spec_ctrl);
if (!sev_es_guest(vcpu->kvm)) {
vcpu->arch.cr2 = svm->vmcb->save.cr2;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 6a7686bf6900..199a2ecef1ce 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -209,7 +209,6 @@ struct vcpu_svm {
struct vmcb *vmcb;
struct kvm_vmcb_info vmcb01;
struct kvm_vmcb_info *current_vmcb;
- struct svm_cpu_data *svm_data;
u32 asid;
u32 sysenter_esp_hi;
u32 sysenter_eip_hi;
@@ -281,8 +280,6 @@ struct vcpu_svm {
};
struct svm_cpu_data {
- int cpu;
-
u64 asid_generation;
u32 max_asid;
u32 next_asid;
@@ -290,13 +287,15 @@ struct svm_cpu_data {
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+ unsigned long save_area_pa;
+
struct vmcb *current_vmcb;
/* index = sev_asid, value = vmcb pointer */
struct vmcb **sev_vmcbs;
};
-DECLARE_PER_CPU(struct svm_cpu_data *, svm_data);
+DECLARE_PER_CPU(struct svm_cpu_data, svm_data);
void recalc_intercepts(struct vcpu_svm *svm);
@@ -683,7 +682,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
-void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
+void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
#endif
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 9430d6437c9f..36c8af87a707 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -61,9 +61,4 @@ static __always_inline void vmsave(unsigned long pa)
svm_asm1(vmsave, "a" (pa), "memory");
}
-static __always_inline void vmload(unsigned long pa)
-{
- svm_asm1(vmload, "a" (pa), "memory");
-}
-
#endif /* __KVM_X86_SVM_OPS_H */
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 723f8534986c..34367dc203f2 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -4,35 +4,97 @@
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
+#include "kvm-asm-offsets.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
/* Intentionally omit RAX as it's context switched by hardware */
-#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE
-#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE
-#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE
+#define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE)
+#define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE)
+#define VCPU_RBX (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE)
/* Intentionally omit RSP as it's context switched by hardware */
-#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE
-#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE
-#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE
+#define VCPU_RBP (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE)
+#define VCPU_RSI (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE)
+#define VCPU_RDI (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE)
#ifdef CONFIG_X86_64
-#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE
-#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE
-#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE
-#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE
-#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE
-#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE
-#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE
-#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
+#define VCPU_R8 (SVM_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE)
+#define VCPU_R9 (SVM_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE)
+#define VCPU_R10 (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE)
+#define VCPU_R11 (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE)
+#define VCPU_R12 (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE)
+#define VCPU_R13 (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE)
+#define VCPU_R14 (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE)
+#define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE)
#endif
+#define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa)
+
.section .noinstr.text, "ax"
+.macro RESTORE_GUEST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "", \
+ "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "", X86_FEATURE_V_SPEC_CTRL
+801:
+.endm
+.macro RESTORE_GUEST_SPEC_CTRL_BODY
+800:
+ /*
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
+ * host's, write the MSR. This is kept out-of-line so that the common
+ * case does not have to jump.
+ *
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
+ * there must not be any returns or indirect branches between this code
+ * and vmentry.
+ */
+ movl SVM_spec_ctrl(%_ASM_DI), %eax
+ cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ je 801b
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ wrmsr
+ jmp 801b
+.endm
+
+.macro RESTORE_HOST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "", \
+ "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "", X86_FEATURE_V_SPEC_CTRL
+901:
+.endm
+.macro RESTORE_HOST_SPEC_CTRL_BODY
+900:
+ /* Same for after vmexit. */
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+
+ /*
+ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
+ * if it was not intercepted during guest execution.
+ */
+ cmpb $0, (%_ASM_SP)
+ jnz 998f
+ rdmsr
+ movl %eax, SVM_spec_ctrl(%_ASM_DI)
+998:
+
+ /* Now restore the host value of the MSR if different from the guest's. */
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ cmp SVM_spec_ctrl(%_ASM_DI), %eax
+ je 901b
+ xor %edx, %edx
+ wrmsr
+ jmp 901b
+.endm
+
+
/**
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
- * @vmcb_pa: unsigned long
- * @regs: unsigned long * (to guest registers)
+ * @svm: struct vcpu_svm *
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
@@ -47,49 +109,71 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
push %_ASM_BX
- /* Save @regs. */
+ /*
+ * Save variables needed after vmexit on the stack, in inverse
+ * order compared to when they are needed.
+ */
+
+ /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
push %_ASM_ARG2
- /* Save @vmcb. */
+ /* Needed to restore access to percpu variables. */
+ __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa)
+
+ /* Finally save @svm. */
push %_ASM_ARG1
- /* Move @regs to RAX. */
- mov %_ASM_ARG2, %_ASM_AX
+.ifnc _ASM_ARG1, _ASM_DI
+ /*
+ * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
+ * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
+ */
+ mov %_ASM_ARG1, %_ASM_DI
+.endif
+
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_GUEST_SPEC_CTRL
+
+ /*
+ * Use a single vmcb (vmcb01 because it's always valid) for
+ * context switching guest state via VMLOAD/VMSAVE, that way
+ * the state doesn't need to be copied between vmcb01 and
+ * vmcb02 when switching vmcbs for nested virtualization.
+ */
+ mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
+1: vmload %_ASM_AX
+2:
+
+ /* Get svm->current_vmcb->pa into RAX. */
+ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
+ mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
/* Load guest registers. */
- mov VCPU_RCX(%_ASM_AX), %_ASM_CX
- mov VCPU_RDX(%_ASM_AX), %_ASM_DX
- mov VCPU_RBX(%_ASM_AX), %_ASM_BX
- mov VCPU_RBP(%_ASM_AX), %_ASM_BP
- mov VCPU_RSI(%_ASM_AX), %_ASM_SI
- mov VCPU_RDI(%_ASM_AX), %_ASM_DI
+ mov VCPU_RCX(%_ASM_DI), %_ASM_CX
+ mov VCPU_RDX(%_ASM_DI), %_ASM_DX
+ mov VCPU_RBX(%_ASM_DI), %_ASM_BX
+ mov VCPU_RBP(%_ASM_DI), %_ASM_BP
+ mov VCPU_RSI(%_ASM_DI), %_ASM_SI
#ifdef CONFIG_X86_64
- mov VCPU_R8 (%_ASM_AX), %r8
- mov VCPU_R9 (%_ASM_AX), %r9
- mov VCPU_R10(%_ASM_AX), %r10
- mov VCPU_R11(%_ASM_AX), %r11
- mov VCPU_R12(%_ASM_AX), %r12
- mov VCPU_R13(%_ASM_AX), %r13
- mov VCPU_R14(%_ASM_AX), %r14
- mov VCPU_R15(%_ASM_AX), %r15
+ mov VCPU_R8 (%_ASM_DI), %r8
+ mov VCPU_R9 (%_ASM_DI), %r9
+ mov VCPU_R10(%_ASM_DI), %r10
+ mov VCPU_R11(%_ASM_DI), %r11
+ mov VCPU_R12(%_ASM_DI), %r12
+ mov VCPU_R13(%_ASM_DI), %r13
+ mov VCPU_R14(%_ASM_DI), %r14
+ mov VCPU_R15(%_ASM_DI), %r15
#endif
-
- /* "POP" @vmcb to RAX. */
- pop %_ASM_AX
+ mov VCPU_RDI(%_ASM_DI), %_ASM_DI
/* Enter guest mode */
sti
-1: vmrun %_ASM_AX
-
-2: cli
-
-#ifdef CONFIG_RETPOLINE
- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-#endif
+3: vmrun %_ASM_AX
+4:
+ cli
- /* "POP" @regs to RAX. */
+ /* Pop @svm to RAX while it's the only available register. */
pop %_ASM_AX
/* Save all guest registers. */
@@ -110,6 +194,26 @@ SYM_FUNC_START(__svm_vcpu_run)
mov %r15, VCPU_R15(%_ASM_AX)
#endif
+ /* @svm can stay in RDI from now on. */
+ mov %_ASM_AX, %_ASM_DI
+
+ mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
+5: vmsave %_ASM_AX
+6:
+
+ /* Restores GSBASE among other things, allowing access to percpu data. */
+ pop %_ASM_AX
+7: vmload %_ASM_AX
+8:
+
+#ifdef CONFIG_RETPOLINE
+ /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+#endif
+
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -145,6 +249,9 @@ SYM_FUNC_START(__svm_vcpu_run)
xor %r15d, %r15d
#endif
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
@@ -159,17 +266,33 @@ SYM_FUNC_START(__svm_vcpu_run)
pop %_ASM_BP
RET
-3: cmpb $0, kvm_rebooting
+ RESTORE_GUEST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY
+
+10: cmpb $0, kvm_rebooting
jne 2b
ud2
+30: cmpb $0, kvm_rebooting
+ jne 4b
+ ud2
+50: cmpb $0, kvm_rebooting
+ jne 6b
+ ud2
+70: cmpb $0, kvm_rebooting
+ jne 8b
+ ud2
- _ASM_EXTABLE(1b, 3b)
+ _ASM_EXTABLE(1b, 10b)
+ _ASM_EXTABLE(3b, 30b)
+ _ASM_EXTABLE(5b, 50b)
+ _ASM_EXTABLE(7b, 70b)
SYM_FUNC_END(__svm_vcpu_run)
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
- * @vmcb_pa: unsigned long
+ * @svm: struct vcpu_svm *
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
push %_ASM_BP
@@ -184,8 +307,31 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
#endif
push %_ASM_BX
- /* Move @vmcb to RAX. */
- mov %_ASM_ARG1, %_ASM_AX
+ /*
+ * Save variables needed after vmexit on the stack, in inverse
+ * order compared to when they are needed.
+ */
+
+ /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
+ push %_ASM_ARG2
+
+ /* Save @svm. */
+ push %_ASM_ARG1
+
+.ifnc _ASM_ARG1, _ASM_DI
+ /*
+ * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
+ * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
+ */
+ mov %_ASM_ARG1, %_ASM_DI
+.endif
+
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_GUEST_SPEC_CTRL
+
+ /* Get svm->current_vmcb->pa into RAX. */
+ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
+ mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
/* Enter guest mode */
sti
@@ -194,11 +340,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
2: cli
+ /* Pop @svm to RDI, guest registers have been saved already. */
+ pop %_ASM_DI
+
#ifdef CONFIG_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -208,6 +360,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
*/
UNTRAIN_RET
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
@@ -222,6 +377,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
pop %_ASM_BP
RET
+ RESTORE_GUEST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY
+
3: cmpb $0, kvm_rebooting
jne 2b
ud2
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 87c4e46daf37..07254314f3dd 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -24,8 +24,6 @@ extern int __read_mostly pt_mode;
#define PMU_CAP_FW_WRITES (1ULL << 13)
#define PMU_CAP_LBR_FMT 0x3f
-#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
-
struct nested_vmx_msrs {
/*
* We only store the "true" versions of the VMX capability MSRs. We
@@ -400,6 +398,7 @@ static inline bool vmx_pebs_supported(void)
static inline u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = PMU_CAP_FW_WRITES;
+ struct x86_pmu_lbr lbr;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -408,7 +407,8 @@ static inline u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr)
+ perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
@@ -419,19 +419,6 @@ static inline u64 vmx_get_perf_capabilities(void)
return perf_cap;
}
-static inline u64 vmx_supported_debugctl(void)
-{
- u64 debugctl = 0;
-
- if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
-
- if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
- debugctl |= DEBUGCTLMSR_LBR_MASK;
-
- return debugctl;
-}
-
static inline bool cpu_has_notify_vmexit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 25b70a85bef5..10b33da9bd05 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -617,7 +617,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
- for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+ for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -643,7 +643,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmc *pmc = NULL;
int i;
- for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+ for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 8477d8bdd69c..0b5db4de4d09 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -1,12 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include <asm/percpu.h>
#include <asm/segment.h>
+#include "kvm-asm-offsets.h"
#include "run_flags.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9dba04b6b019..63247c57c72c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2021,15 +2021,17 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
return (unsigned long)data;
}
-static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
+static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
{
- u64 debugctl = vmx_supported_debugctl();
+ u64 debugctl = 0;
- if (!intel_pmu_lbr_is_enabled(vcpu))
- debugctl &= ~DEBUGCTLMSR_LBR_MASK;
+ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+ (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
+ debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
- debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+ if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) &&
+ (host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
+ debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
return debugctl;
}
@@ -2103,7 +2105,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_DEBUGCTLMSR: {
- u64 invalid = data & ~vcpu_supported_debugctl(vcpu);
+ u64 invalid;
+
+ invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
if (report_ignored_msrs)
vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n",
@@ -8263,6 +8267,11 @@ static __init int hardware_setup(void)
if (!cpu_has_virtual_nmis())
enable_vnmi = 0;
+#ifdef CONFIG_X86_SGX_KVM
+ if (!cpu_has_vmx_encls_vmexit())
+ enable_sgx = false;
+#endif
+
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4bd5f8a751de..490ec23c8450 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1438,32 +1438,27 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+ MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
+
+ /* This part of MSRs should match KVM_INTEL_PMC_MAX_GENERIC. */
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
- MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
- MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
- MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
- MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
- MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
- MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
- MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
- MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
- MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
- MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
- MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
+
+ /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
@@ -1562,7 +1557,7 @@ static const u32 msr_based_features_all[] = {
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
- MSR_F10H_DECFG,
+ MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
@@ -2315,11 +2310,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
/* we verify if the enable bit is set... */
if (system_time & 1) {
- kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
- KVM_HOST_USES_PFN, system_time & ~1ULL,
- sizeof(struct pvclock_vcpu_time_info));
+ kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
+ KVM_HOST_USES_PFN, system_time & ~1ULL,
+ sizeof(struct pvclock_vcpu_time_info));
} else {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
}
return;
@@ -3388,7 +3383,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
vcpu->arch.time = 0;
}
@@ -6442,26 +6437,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
return 0;
}
-static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
+ struct kvm_msr_filter *filter)
{
- struct kvm_msr_filter __user *user_msr_filter = argp;
struct kvm_x86_msr_filter *new_filter, *old_filter;
- struct kvm_msr_filter filter;
bool default_allow;
bool empty = true;
int r = 0;
u32 i;
- if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
- return -EFAULT;
-
- if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
- empty &= !filter.ranges[i].nmsrs;
+ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++)
+ empty &= !filter->ranges[i].nmsrs;
- default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
+ default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY);
if (empty && !default_allow)
return -EINVAL;
@@ -6469,8 +6460,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (!new_filter)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
- r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
+ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) {
+ r = kvm_add_msr_filter(new_filter, &filter->ranges[i]);
if (r) {
kvm_free_msr_filter(new_filter);
return r;
@@ -6493,6 +6484,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
return 0;
}
+#ifdef CONFIG_KVM_COMPAT
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range_compat {
+ __u32 flags;
+ __u32 nmsrs;
+ __u32 base;
+ __u32 bitmap;
+};
+
+struct kvm_msr_filter_compat {
+ __u32 flags;
+ struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
+
+#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat)
+
+long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct kvm *kvm = filp->private_data;
+ long r = -ENOTTY;
+
+ switch (ioctl) {
+ case KVM_X86_SET_MSR_FILTER_COMPAT: {
+ struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_msr_filter_compat filter_compat;
+ struct kvm_msr_filter filter;
+ int i;
+
+ if (copy_from_user(&filter_compat, user_msr_filter,
+ sizeof(filter_compat)))
+ return -EFAULT;
+
+ filter.flags = filter_compat.flags;
+ for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
+ struct kvm_msr_filter_range_compat *cr;
+
+ cr = &filter_compat.ranges[i];
+ filter.ranges[i] = (struct kvm_msr_filter_range) {
+ .flags = cr->flags,
+ .nmsrs = cr->nmsrs,
+ .base = cr->base,
+ .bitmap = (__u8 *)(ulong)cr->bitmap,
+ };
+ }
+
+ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
+ break;
+ }
+ }
+
+ return r;
+}
+#endif
+
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
static int kvm_arch_suspend_notifier(struct kvm *kvm)
{
@@ -6915,9 +6962,16 @@ set_pit2_out:
case KVM_SET_PMU_EVENT_FILTER:
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
break;
- case KVM_X86_SET_MSR_FILTER:
- r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
+ case KVM_X86_SET_MSR_FILTER: {
+ struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_msr_filter filter;
+
+ if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+ return -EFAULT;
+
+ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
break;
+ }
default:
r = -ENOTTY;
}
@@ -6982,14 +7036,14 @@ static void kvm_init_msr_list(void)
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
continue;
break;
- case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
+ case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR_MAX:
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
- min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
+ min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
continue;
break;
- case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
+ case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL_MAX:
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
- min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
+ min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
continue;
break;
case MSR_IA32_XFD:
@@ -9985,7 +10039,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
kvm_x86_ops.nested_ops->has_events(vcpu))
*req_immediate_exit = true;
- WARN_ON(kvm_is_exception_pending(vcpu));
+ /*
+ * KVM must never queue a new exception while injecting an event; KVM
+ * is done emulating and should only propagate the to-be-injected event
+ * to the VMCS/VMCB. Queueing a new exception can put the vCPU into an
+ * infinite loop as KVM will bail from VM-Enter to inject the pending
+ * exception and start the cycle all over.
+ *
+ * Exempt triple faults as they have special handling and won't put the
+ * vCPU into an infinite loop. Triple fault can be queued when running
+ * VMX without unrestricted guest, as that requires KVM to emulate Real
+ * Mode events (see kvm_inject_realmode_interrupt()).
+ */
+ WARN_ON_ONCE(vcpu->arch.exception.pending ||
+ vcpu->arch.exception_vmexit.pending);
return 0;
out:
@@ -10332,7 +10399,10 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
kvm->arch.apicv_inhibit_reasons = new;
if (new) {
unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
+ int idx = srcu_read_lock(&kvm->srcu);
+
kvm_zap_gfn_range(kvm, gfn, gfn+1);
+ srcu_read_unlock(&kvm->srcu, idx);
}
} else {
kvm->arch.apicv_inhibit_reasons = new;
@@ -11757,6 +11827,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
+ kvm_gpc_init(&vcpu->arch.pv_time);
+
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 93c628d3e3a9..2dae413bd62a 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
int idx = srcu_read_lock(&kvm->srcu);
if (gfn == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
+ kvm_gpc_deactivate(kvm, gpc);
goto out;
}
do {
- ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN,
- gpa, PAGE_SIZE);
+ ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
+ PAGE_SIZE);
if (ret)
goto out;
@@ -554,15 +554,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.vcpu_info_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct vcpu_info));
+ r = kvm_gpc_activate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_info_cache, NULL,
+ KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct vcpu_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -570,16 +570,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_time_info_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct pvclock_vcpu_time_info));
+ r = kvm_gpc_activate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_time_info_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct pvclock_vcpu_time_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
break;
@@ -590,16 +590,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache);
+ kvm_gpc_deactivate(vcpu->kvm,
+ &vcpu->arch.xen.runstate_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct vcpu_runstate_info));
+ r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct vcpu_runstate_info));
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
@@ -1667,18 +1666,18 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
case EVTCHNSTAT_ipi:
/* IPI must map back to the same port# */
if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
- goto out; /* -EINVAL */
+ goto out_noeventfd; /* -EINVAL */
break;
case EVTCHNSTAT_interdomain:
if (data->u.evtchn.deliver.port.port) {
if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
- goto out; /* -EINVAL */
+ goto out_noeventfd; /* -EINVAL */
} else {
eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
if (IS_ERR(eventfd)) {
ret = PTR_ERR(eventfd);
- goto out;
+ goto out_noeventfd;
}
}
break;
@@ -1718,6 +1717,7 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
out:
if (eventfd)
eventfd_ctx_put(eventfd);
+out_noeventfd:
kfree(evtchnfd);
return ret;
}
@@ -1816,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
{
vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
vcpu->arch.xen.poll_evtchn = 0;
+
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
+
+ kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
+ kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
}
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -1824,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
if (kvm_xen_timer_enabled(vcpu))
kvm_xen_stop_timer(vcpu);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_info_cache);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
+
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
void kvm_xen_init_vm(struct kvm *kvm)
{
idr_init(&kvm->arch.xen.evtchn_ports);
+ kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
}
void kvm_xen_destroy_vm(struct kvm *kvm)
@@ -1843,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
struct evtchnfd *evtchnfd;
int i;
- kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
+ kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
if (!evtchnfd->deliver.port.port)