summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/emulate.c56
-rw-r--r--arch/x86/kvm/mmu.c27
-rw-r--r--arch/x86/kvm/svm.c32
-rw-r--r--arch/x86/kvm/vmx.c43
-rw-r--r--arch/x86/kvm/x86.c99
5 files changed, 154 insertions, 103 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e7d04d0c8008..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op)
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fninit");
- ctxt->ops->put_fpu(ctxt);
return X86EMUL_CONTINUE;
}
@@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstcw %0": "+m"(fcw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fcw;
@@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstsw %0": "+m"(fsw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fsw;
@@ -2404,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
}
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- u64 cr0, u64 cr4)
+ u64 cr0, u64 cr3, u64 cr4)
{
int bad;
+ u64 pcid;
+
+ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
+ pcid = 0;
+ if (cr4 & X86_CR4_PCIDE) {
+ pcid = cr3 & 0xfff;
+ cr3 &= ~0xfff;
+ }
+
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
/*
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2425,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
+ if (pcid) {
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
}
return X86EMUL_CONTINUE;
@@ -2435,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
struct desc_struct desc;
struct desc_ptr dt;
u16 selector;
- u32 val, cr0, cr4;
+ u32 val, cr0, cr3, cr4;
int i;
cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
@@ -2481,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
struct desc_struct desc;
struct desc_ptr dt;
- u64 val, cr0, cr4;
+ u64 val, cr0, cr3, cr4;
u32 base3;
u16 selector;
int i, r;
@@ -2505,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
+ cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2533,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;
@@ -4001,12 +4005,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
- ctxt->ops->put_fpu(ctxt);
-
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -4049,8 +4049,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
if (size < __fxstate_size(16)) {
rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
@@ -4066,8 +4064,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
out:
- ctxt->ops->put_fpu(ctxt);
-
return rc;
}
@@ -5317,9 +5313,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
int rc;
- ctxt->ops->get_fpu(ctxt);
rc = asm_safe("fwait");
- ctxt->ops->put_fpu(ctxt);
if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..2b8eb4da4d08 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if(make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, 0, 0,
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
0, ACC_ALL);
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
- kvm_event_needs_reinjection(vcpu)))
+ kvm_event_needs_reinjection(vcpu) ||
+ vcpu->arch.exception.pending))
return false;
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
int kvm_mmu_module_init(void)
{
+ int ret = -ENOMEM;
+
kvm_mmu_clear_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, SLAB_ACCOUNT, NULL);
if (!pte_list_desc_cache)
- goto nomem;
+ goto out;
mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
sizeof(struct kvm_mmu_page),
0, SLAB_ACCOUNT, NULL);
if (!mmu_page_header_cache)
- goto nomem;
+ goto out;
if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
- goto nomem;
+ goto out;
- register_shrinker(&mmu_shrinker);
+ ret = register_shrinker(&mmu_shrinker);
+ if (ret)
+ goto out;
return 0;
-nomem:
+out:
mmu_destroy_caches();
- return -ENOMEM;
+ return ret;
}
/*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index eb714f1cdf7e..f40d0da1f1d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -45,6 +45,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
struct nested_state *g;
- u32 h_intercept_exceptions;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested;
- /* No need to intercept #UD if L1 doesn't intercept it */
- h_intercept_exceptions =
- h->intercept_exceptions & ~(1U << UD_VECTOR);
-
c->intercept_cr = h->intercept_cr | g->intercept_cr;
c->intercept_dr = h->intercept_dr | g->intercept_dr;
- c->intercept_exceptions =
- h_intercept_exceptions | g->intercept_exceptions;
+ c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
}
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
{
int er;
- WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -4986,6 +4980,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
#endif
+ /*
+ * Clear host registers marked as clobbered to prevent
+ * speculative use.
+ */
+ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
+ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
+ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
+ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
+#ifdef CONFIG_X86_64
+ "xor %%r8, %%r8 \n\t"
+ "xor %%r9, %%r9 \n\t"
+ "xor %%r10, %%r10 \n\t"
+ "xor %%r11, %%r11 \n\t"
+ "xor %%r12, %%r12 \n\t"
+ "xor %%r13, %%r13 \n\t"
+ "xor %%r14, %%r14 \n\t"
+ "xor %%r15, %%r15 \n\t"
+#endif
"pop %%" _ASM_BP
:
: [svm]"a"(svm),
@@ -5015,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4704aaf6d19e..c829d89e2e63 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
#include "trace.h"
#include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
{
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- vmcs_field_to_offset_table[field] == 0)
+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ return -ENOENT;
+
+ /*
+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
+ * generic mechanism.
+ */
+ asm("lfence");
+
+ if (vmcs_field_to_offset_table[field] == 0)
return -ENOENT;
return vmcs_field_to_offset_table[field];
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
- eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
+ eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
- else
- eb |= 1u << UD_VECTOR;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -2302,7 +2309,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* processors. See 22.2.4.
*/
vmcs_writel(HOST_TR_BASE,
- (unsigned long)this_cpu_ptr(&cpu_tss));
+ (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
/*
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1; /* already handled by vmx_vcpu_run() */
if (is_invalid_opcode(intr_info)) {
- WARN_ON_ONCE(is_guest_mode(vcpu));
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -6751,16 +6757,10 @@ static __init int hardware_setup(void)
goto out;
}
- vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
@@ -9421,6 +9421,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
+ "setbe %c[fail](%0)\n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -9437,12 +9438,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
#endif
"mov %%cr2, %%" _ASM_AX " \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+ "xor %%eax, %%eax \n\t"
+ "xor %%ebx, %%ebx \n\t"
+ "xor %%esi, %%esi \n\t"
+ "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
- "setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9479,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eee8e7faf1af..c53298dfbf50 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
- kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
}
@@ -4385,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
handled += n;
addr += n;
len -= n;
@@ -4644,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4666,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
return X86EMUL_IO_NEEDED;
}
@@ -5252,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
emul_to_vcpu(ctxt)->arch.halt_request = 1;
}
-static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_disable();
- kvm_load_guest_fpu(emul_to_vcpu(ctxt));
-}
-
-static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_enable();
-}
-
static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@@ -5340,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = {
.halt = emulator_halt,
.wbinvd = emulator_wbinvd,
.fix_hypercall = emulator_fix_hypercall,
- .get_fpu = emulator_get_fpu,
- .put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.set_nmi_mask = emulator_set_nmi_mask,
@@ -6778,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
kvm_x86_ops->tlb_flush(vcpu);
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long apic_address;
+
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (start <= apic_address && apic_address < end)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
struct page *page = NULL;
@@ -6952,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
- kvm_load_guest_fpu(vcpu);
/*
* Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
@@ -7265,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
- struct fpu *fpu = &current->thread.fpu;
int r;
- fpu__initialize(fpu);
-
kvm_sigset_activate(vcpu);
+ kvm_load_guest_fpu(vcpu);
+
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
r = -EINTR;
@@ -7312,6 +7310,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
r = vcpu_run(vcpu);
out:
+ kvm_put_guest_fpu(vcpu);
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -7381,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
#endif
kvm_rip_write(vcpu, regs->rip);
- kvm_set_rflags(vcpu, regs->rflags);
+ kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
vcpu->arch.exception.pending = false;
@@ -7495,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
+ /*
+ * When EFER.LME and CR0.PG are set, the processor is in
+ * 64-bit mode (though maybe in a 32-bit code segment).
+ * CR4.PAE and EFER.LMA must be set.
+ */
+ if (!(sregs->cr4 & X86_CR4_PAE)
+ || !(sregs->efer & EFER_LMA))
+ return -EINVAL;
+ } else {
+ /*
+ * Not in 64-bit mode: EFER.LMA is clear and the code
+ * segment cannot be 64-bit.
+ */
+ if (sregs->efer & EFER_LMA || sregs->cs.l)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
@@ -7507,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
(sregs->cr4 & X86_CR4_OSXSAVE))
return -EINVAL;
+ if (kvm_valid_sregs(vcpu, sregs))
+ return -EINVAL;
+
apic_base_msr.data = sregs->apic_base;
apic_base_msr.host_initiated = true;
if (kvm_set_apic_base(vcpu, &apic_base_msr))
@@ -7704,32 +7729,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
+/* Swap (qemu) user FPU context for the guest FPU context. */
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_fpu_loaded)
- return;
-
- /*
- * Restore all possible states in the guest,
- * and assume host would use all available bits.
- * Guest xcr0 would be loaded later.
- */
- vcpu->guest_fpu_loaded = 1;
- __kernel_fpu_begin();
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
~XFEATURE_MASK_PKRU);
+ preempt_enable();
trace_kvm_fpu(1);
}
+/* When vcpu_run ends, restore user space FPU context. */
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->guest_fpu_loaded)
- return;
-
- vcpu->guest_fpu_loaded = 0;
+ preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- __kernel_fpu_end();
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ preempt_enable();
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -7846,7 +7864,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
* To avoid have the INIT path from kvm_apic_has_events() that be
* called with loaded FPU and does not let userspace fix the state.
*/
- kvm_put_guest_fpu(vcpu);
+ if (init_event)
+ kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
XFEATURE_MASK_BNDREGS);
if (mpx_state_buffer)
@@ -7855,6 +7874,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
XFEATURE_MASK_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+ if (init_event)
+ kvm_load_guest_fpu(vcpu);
}
if (!init_event) {