summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/coco/tdx/tdx.c25
-rw-r--r--arch/x86/include/asm/intel-family.h11
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h2
-rw-r--r--arch/x86/kvm/cpuid.c13
-rw-r--r--arch/x86/kvm/debugfs.c7
-rw-r--r--arch/x86/kvm/emulate.c108
-rw-r--r--arch/x86/kvm/vmx/capabilities.h19
-rw-r--r--arch/x86/kvm/vmx/vmx.c23
-rw-r--r--arch/x86/kvm/x86.c30
-rw-r--r--arch/x86/kvm/xen.c64
-rw-r--r--arch/x86/xen/pmu.c2
-rw-r--r--arch/x86/xen/setup.c23
12 files changed, 205 insertions, 122 deletions
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 928dcf7a20d9..b8998cf0508a 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -34,6 +34,8 @@
#define VE_GET_PORT_NUM(e) ((e) >> 16)
#define VE_IS_IO_STRING(e) ((e) & BIT(4))
+#define ATTR_SEPT_VE_DISABLE BIT(28)
+
/*
* Wrapper for standard use of __tdx_hypercall with no output aside from
* return code.
@@ -98,10 +100,11 @@ static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}
-static u64 get_cc_mask(void)
+static void tdx_parse_tdinfo(u64 *cc_mask)
{
struct tdx_module_output out;
unsigned int gpa_width;
+ u64 td_attr;
/*
* TDINFO TDX module call is used to get the TD execution environment
@@ -109,19 +112,27 @@ static u64 get_cc_mask(void)
* information, etc. More details about the ABI can be found in TDX
* Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
* [TDG.VP.INFO].
+ */
+ tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
+
+ /*
+ * The highest bit of a guest physical address is the "sharing" bit.
+ * Set it for shared pages and clear it for private pages.
*
* The GPA width that comes out of this call is critical. TDX guests
* can not meaningfully run without it.
*/
- tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
-
gpa_width = out.rcx & GENMASK(5, 0);
+ *cc_mask = BIT_ULL(gpa_width - 1);
/*
- * The highest bit of a guest physical address is the "sharing" bit.
- * Set it for shared pages and clear it for private pages.
+ * The kernel can not handle #VE's when accessing normal kernel
+ * memory. Ensure that no #VE will be delivered for accesses to
+ * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
*/
- return BIT_ULL(gpa_width - 1);
+ td_attr = out.rdx;
+ if (!(td_attr & ATTR_SEPT_VE_DISABLE))
+ panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n");
}
/*
@@ -758,7 +769,7 @@ void __init tdx_early_init(void)
setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
cc_set_vendor(CC_VENDOR_INTEL);
- cc_mask = get_cc_mask();
+ tdx_parse_tdinfo(&cc_mask);
cc_set_mask(cc_mask);
/*
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 5d75fe229342..347707d459c6 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -107,6 +107,11 @@
#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
+#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
+
+#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
+#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
+
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_N 0xBE
@@ -118,7 +123,7 @@
#define INTEL_FAM6_METEORLAKE 0xAC
#define INTEL_FAM6_METEORLAKE_L 0xAA
-/* "Small Core" Processors (Atom) */
+/* "Small Core" Processors (Atom/E-Core) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
@@ -145,6 +150,10 @@
#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
+#define INTEL_FAM6_SIERRAFOREST_X 0xAF
+
+#define INTEL_FAM6_GRANDRIDGE 0xB6
+
/* Xeon Phi */
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 59358d1bf880..fd2669b1cb2d 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -6,7 +6,7 @@
#ifndef _ASM_X86_SYSCALL_WRAPPER_H
#define _ASM_X86_SYSCALL_WRAPPER_H
-struct pt_regs;
+#include <asm/ptrace.h>
extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7065462378e2..62bc7a01cecc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = max(entry->eax, 0x80000021);
break;
case 0x80000001:
+ entry->ebx &= ~GENMASK(27, 16);
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
case 0x80000006:
- /* L2 cache and TLB: pass through host info. */
+ /* Drop reserved bits, pass host L2 cache and TLB info. */
+ entry->edx &= ~GENMASK(17, 16);
break;
case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */
@@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
+ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break;
@@ -1186,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
+ entry->eax &= GENMASK(2, 0);
+ entry->ebx = entry->ecx = entry->edx = 0;
+ break;
case 0x8000001e:
break;
case 0x8000001F:
@@ -1193,7 +1199,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
} else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
-
+ /* Clear NumVMPL since KVM does not support VMPL. */
+ entry->ebx &= ~GENMASK(31, 12);
/*
* Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008).
@@ -1331,7 +1338,7 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
if (sanity_check_entries(entries, cpuid->nent, type))
return -EINVAL;
- array.entries = kvcalloc(sizeof(struct kvm_cpuid_entry2), cpuid->nent, GFP_KERNEL);
+ array.entries = kvcalloc(cpuid->nent, sizeof(struct kvm_cpuid_entry2), GFP_KERNEL);
if (!array.entries)
return -ENOMEM;
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index cfed36aba2f7..c1390357126a 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -158,11 +158,16 @@ out:
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
{
struct kvm *kvm = inode->i_private;
+ int r;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
- return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+ r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+ if (r < 0)
+ kvm_put_kvm(kvm);
+
+ return r;
}
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 3b27622d4642..4a43261d25a2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ctxt->mode, linear);
}
-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
- enum x86emul_mode mode)
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
{
ulong linear;
int rc;
@@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
}
+static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
+{
+ u64 efer;
+ struct desc_struct cs;
+ u16 selector;
+ u32 base3;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+ if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
+ /* Real mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_REAL;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (ctxt->eflags & X86_EFLAGS_VM) {
+ /* Protected/VM86 mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_VM86;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
+ return X86EMUL_UNHANDLEABLE;
+
+ if (efer & EFER_LMA) {
+ if (cs.l) {
+ /* Proper long mode */
+ ctxt->mode = X86EMUL_MODE_PROT64;
+ } else if (cs.d) {
+ /* 32 bit compatibility mode*/
+ ctxt->mode = X86EMUL_MODE_PROT32;
+ } else {
+ ctxt->mode = X86EMUL_MODE_PROT16;
+ }
+ } else {
+ /* Legacy 32 bit / 16 bit mode */
+ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- return assign_eip(ctxt, dst, ctxt->mode);
+ return assign_eip(ctxt, dst);
}
-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
- const struct desc_struct *cs_desc)
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- enum x86emul_mode mode = ctxt->mode;
- int rc;
+ int rc = emulator_recalc_and_set_mode(ctxt);
-#ifdef CONFIG_X86_64
- if (ctxt->mode >= X86EMUL_MODE_PROT16) {
- if (cs_desc->l) {
- u64 efer = 0;
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
- if (efer & EFER_LMA)
- mode = X86EMUL_MODE_PROT64;
- } else
- mode = X86EMUL_MODE_PROT32; /* temporary value */
- }
-#endif
- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
- rc = assign_eip(ctxt, dst, mode);
- if (rc == X86EMUL_CONTINUE)
- ctxt->mode = mode;
- return rc;
+ return assign_eip(ctxt, dst);
}
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, eip, &new_desc);
+ rc = assign_eip_far(ctxt, eip);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 8; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
val = GET_SMSTATE(u32, smstate, 0x7fcc);
@@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
u16 selector;
int i, r;
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
@@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
* those side effects need to be explicitly handled for both success
* and shutdown.
*/
- return X86EMUL_CONTINUE;
+ return emulator_recalc_and_set_mode(ctxt);
emulate_shutdown:
ctxt->ops->triple_fault(ctxt);
@@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = rdx;
+ ctxt->mode = usermode;
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
@@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE)
goto fail;
@@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+ int cr_num = ctxt->modrm_reg;
+ int r;
+
+ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0);
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
+
+ if (cr_num == 0) {
+ /*
+ * CR0 write might have updated CR0.PE and/or CR0.PG
+ * which can affect the cpu's execution mode.
+ */
+ r = emulator_recalc_and_set_mode(ctxt);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 87c4e46daf37..07254314f3dd 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -24,8 +24,6 @@ extern int __read_mostly pt_mode;
#define PMU_CAP_FW_WRITES (1ULL << 13)
#define PMU_CAP_LBR_FMT 0x3f
-#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
-
struct nested_vmx_msrs {
/*
* We only store the "true" versions of the VMX capability MSRs. We
@@ -400,6 +398,7 @@ static inline bool vmx_pebs_supported(void)
static inline u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = PMU_CAP_FW_WRITES;
+ struct x86_pmu_lbr lbr;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -408,7 +407,8 @@ static inline u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr)
+ perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
@@ -419,19 +419,6 @@ static inline u64 vmx_get_perf_capabilities(void)
return perf_cap;
}
-static inline u64 vmx_supported_debugctl(void)
-{
- u64 debugctl = 0;
-
- if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
-
- if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
- debugctl |= DEBUGCTLMSR_LBR_MASK;
-
- return debugctl;
-}
-
static inline bool cpu_has_notify_vmexit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9dba04b6b019..63247c57c72c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2021,15 +2021,17 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
return (unsigned long)data;
}
-static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
+static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
{
- u64 debugctl = vmx_supported_debugctl();
+ u64 debugctl = 0;
- if (!intel_pmu_lbr_is_enabled(vcpu))
- debugctl &= ~DEBUGCTLMSR_LBR_MASK;
+ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+ (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
+ debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
- debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+ if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) &&
+ (host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
+ debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
return debugctl;
}
@@ -2103,7 +2105,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_DEBUGCTLMSR: {
- u64 invalid = data & ~vcpu_supported_debugctl(vcpu);
+ u64 invalid;
+
+ invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
if (report_ignored_msrs)
vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n",
@@ -8263,6 +8267,11 @@ static __init int hardware_setup(void)
if (!cpu_has_virtual_nmis())
enable_vnmi = 0;
+#ifdef CONFIG_X86_SGX_KVM
+ if (!cpu_has_vmx_encls_vmexit())
+ enable_sgx = false;
+#endif
+
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9cf1ba865562..5f5eb577d583 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2315,11 +2315,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
/* we verify if the enable bit is set... */
if (system_time & 1) {
- kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
- KVM_HOST_USES_PFN, system_time & ~1ULL,
- sizeof(struct pvclock_vcpu_time_info));
+ kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
+ KVM_HOST_USES_PFN, system_time & ~1ULL,
+ sizeof(struct pvclock_vcpu_time_info));
} else {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
}
return;
@@ -3388,7 +3388,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
vcpu->arch.time = 0;
}
@@ -10044,7 +10044,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
kvm_x86_ops.nested_ops->has_events(vcpu))
*req_immediate_exit = true;
- WARN_ON(kvm_is_exception_pending(vcpu));
+ /*
+ * KVM must never queue a new exception while injecting an event; KVM
+ * is done emulating and should only propagate the to-be-injected event
+ * to the VMCS/VMCB. Queueing a new exception can put the vCPU into an
+ * infinite loop as KVM will bail from VM-Enter to inject the pending
+ * exception and start the cycle all over.
+ *
+ * Exempt triple faults as they have special handling and won't put the
+ * vCPU into an infinite loop. Triple fault can be queued when running
+ * VMX without unrestricted guest, as that requires KVM to emulate Real
+ * Mode events (see kvm_inject_realmode_interrupt()).
+ */
+ WARN_ON_ONCE(vcpu->arch.exception.pending ||
+ vcpu->arch.exception_vmexit.pending);
return 0;
out:
@@ -10391,7 +10404,10 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
kvm->arch.apicv_inhibit_reasons = new;
if (new) {
unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
+ int idx = srcu_read_lock(&kvm->srcu);
+
kvm_zap_gfn_range(kvm, gfn, gfn+1);
+ srcu_read_unlock(&kvm->srcu, idx);
}
} else {
kvm->arch.apicv_inhibit_reasons = new;
@@ -11816,6 +11832,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
+ kvm_gpc_init(&vcpu->arch.pv_time);
+
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 93c628d3e3a9..2dae413bd62a 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
int idx = srcu_read_lock(&kvm->srcu);
if (gfn == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
+ kvm_gpc_deactivate(kvm, gpc);
goto out;
}
do {
- ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN,
- gpa, PAGE_SIZE);
+ ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
+ PAGE_SIZE);
if (ret)
goto out;
@@ -554,15 +554,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.vcpu_info_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct vcpu_info));
+ r = kvm_gpc_activate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_info_cache, NULL,
+ KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct vcpu_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -570,16 +570,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_time_info_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct pvclock_vcpu_time_info));
+ r = kvm_gpc_activate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_time_info_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct pvclock_vcpu_time_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
break;
@@ -590,16 +590,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache);
+ kvm_gpc_deactivate(vcpu->kvm,
+ &vcpu->arch.xen.runstate_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct vcpu_runstate_info));
+ r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct vcpu_runstate_info));
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
@@ -1667,18 +1666,18 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
case EVTCHNSTAT_ipi:
/* IPI must map back to the same port# */
if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
- goto out; /* -EINVAL */
+ goto out_noeventfd; /* -EINVAL */
break;
case EVTCHNSTAT_interdomain:
if (data->u.evtchn.deliver.port.port) {
if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
- goto out; /* -EINVAL */
+ goto out_noeventfd; /* -EINVAL */
} else {
eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
if (IS_ERR(eventfd)) {
ret = PTR_ERR(eventfd);
- goto out;
+ goto out_noeventfd;
}
}
break;
@@ -1718,6 +1717,7 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
out:
if (eventfd)
eventfd_ctx_put(eventfd);
+out_noeventfd:
kfree(evtchnfd);
return ret;
}
@@ -1816,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
{
vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
vcpu->arch.xen.poll_evtchn = 0;
+
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
+
+ kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
+ kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
}
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -1824,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
if (kvm_xen_timer_enabled(vcpu))
kvm_xen_stop_timer(vcpu);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_info_cache);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
+
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
void kvm_xen_init_vm(struct kvm *kvm)
{
idr_init(&kvm->arch.xen.evtchn_ports);
+ kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
}
void kvm_xen_destroy_vm(struct kvm *kvm)
@@ -1843,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
struct evtchnfd *evtchnfd;
int i;
- kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
+ kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
if (!evtchnfd->deliver.port.port)
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 68aff1382872..246d67dab510 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -302,7 +302,7 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read,
bool *emul)
{
- int type, index;
+ int type, index = 0;
if (is_amd_pmu_msr(msr))
*emul = xen_amd_pmu_emulate(msr, val, is_read);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index cfa99e8f054b..4f4309500559 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -910,17 +910,9 @@ static int register_callback(unsigned type, const void *func)
void xen_enable_sysenter(void)
{
- int ret;
- unsigned sysenter_feature;
-
- sysenter_feature = X86_FEATURE_SYSENTER32;
-
- if (!boot_cpu_has(sysenter_feature))
- return;
-
- ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
- if(ret != 0)
- setup_clear_cpu_cap(sysenter_feature);
+ if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) &&
+ register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat))
+ setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
}
void xen_enable_syscall(void)
@@ -934,12 +926,9 @@ void xen_enable_syscall(void)
mechanism for syscalls. */
}
- if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
- ret = register_callback(CALLBACKTYPE_syscall32,
- xen_entry_SYSCALL_compat);
- if (ret != 0)
- setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
- }
+ if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) &&
+ register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat))
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
static void __init xen_pvmmu_arch_setup(void)