summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/avic.c51
-rw-r--r--arch/x86/kvm/svm/nested.c75
-rw-r--r--arch/x86/kvm/svm/sev.c62
-rw-r--r--arch/x86/kvm/svm/svm.c201
-rw-r--r--arch/x86/kvm/svm/svm.h15
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h2
-rw-r--r--arch/x86/kvm/svm/svm_ops.h2
7 files changed, 223 insertions, 185 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1d01da64c333..8052d92069e0 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -197,6 +197,8 @@ void avic_init_vmcb(struct vcpu_svm *svm)
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
+ vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
+
if (kvm_apicv_activated(svm->vcpu.kvm))
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
else
@@ -225,31 +227,26 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
* field of the VMCB. Therefore, we set up the
* APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
*/
-static int avic_update_access_page(struct kvm *kvm, bool activate)
+static int avic_alloc_access_page(struct kvm *kvm)
{
void __user *ret;
int r = 0;
mutex_lock(&kvm->slots_lock);
- /*
- * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
- * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
- * memory region. So, we need to ensure that kvm->mm == current->mm.
- */
- if ((kvm->arch.apic_access_memslot_enabled == activate) ||
- (kvm->mm != current->mm))
+
+ if (kvm->arch.apic_access_memslot_enabled)
goto out;
ret = __x86_set_memory_region(kvm,
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE,
- activate ? PAGE_SIZE : 0);
+ PAGE_SIZE);
if (IS_ERR(ret)) {
r = PTR_ERR(ret);
goto out;
}
- kvm->arch.apic_access_memslot_enabled = activate;
+ kvm->arch.apic_access_memslot_enabled = true;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -270,7 +267,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
if (kvm_apicv_activated(vcpu->kvm)) {
int ret;
- ret = avic_update_access_page(vcpu->kvm, true);
+ ret = avic_alloc_access_page(vcpu->kvm);
if (ret)
return ret;
}
@@ -587,17 +584,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu)
avic_handle_ldr_update(vcpu);
}
-void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
-{
- if (!enable_apicv || !lapic_in_kernel(vcpu))
- return;
-
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- kvm_request_apicv_update(vcpu->kvm, activate,
- APICV_INHIBIT_REASON_IRQWIN);
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-}
-
void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
return;
@@ -646,7 +632,7 @@ out:
void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
+ struct vmcb *vmcb = svm->vmcb01.ptr;
bool activated = kvm_vcpu_apicv_active(vcpu);
if (!enable_apicv)
@@ -667,6 +653,11 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
}
vmcb_mark_dirty(vmcb, VMCB_AVIC);
+ if (activated)
+ avic_vcpu_load(vcpu, vcpu->cpu);
+ else
+ avic_vcpu_put(vcpu);
+
svm_set_pi_irte_mode(vcpu, activated);
}
@@ -918,10 +909,6 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
return supported & BIT(bit);
}
-void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
-{
- avic_update_access_page(kvm, activate);
-}
static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
@@ -960,9 +947,6 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
- if (!kvm_vcpu_apicv_active(vcpu))
- return;
-
/*
* Since the host physical APIC id is 8 bits,
* we can support host APIC ID upto 255.
@@ -990,9 +974,6 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
- if (!kvm_vcpu_apicv_active(vcpu))
- return;
-
entry = READ_ONCE(*(svm->avic_physical_id_cache));
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
@@ -1009,6 +990,10 @@ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
struct vcpu_svm *svm = to_svm(vcpu);
svm->avic_is_running = is_run;
+
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
if (is_run)
avic_vcpu_load(vcpu, vcpu->cpu);
else
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 21d03e3a5dfd..2545d0c61985 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -154,6 +154,13 @@ void recalc_intercepts(struct vcpu_svm *svm)
for (i = 0; i < MAX_INTERCEPT; i++)
c->intercepts[i] |= g->intercepts[i];
+
+ /* If SMI is not intercepted, ignore guest SMI intercept as well */
+ if (!intercept_smi)
+ vmcb_clr_intercept(c, INTERCEPT_SMI);
+
+ vmcb_set_intercept(c, INTERCEPT_VMLOAD);
+ vmcb_set_intercept(c, INTERCEPT_VMSAVE);
}
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@ -304,8 +311,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
return true;
}
-static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
- struct vmcb_control_area *control)
+void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control)
{
copy_vmcb_control_area(&svm->nested.ctl, control);
@@ -499,7 +506,11 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
- const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ const u32 int_ctl_vmcb01_bits =
+ V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK;
+
+ const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+
struct kvm_vcpu *vcpu = &svm->vcpu;
/*
@@ -511,7 +522,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
* Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
* avic_physical_id.
*/
- WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
/* Copied from vmcb01. msrpm_base can be overwritten later. */
svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
@@ -531,8 +542,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
svm->vmcb->control.int_ctl =
- (svm->nested.ctl.int_ctl & ~mask) |
- (svm->vmcb01.ptr->control.int_ctl & mask);
+ (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
+ (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
@@ -618,6 +629,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
u64 vmcb12_gpa;
+ if (!svm->nested.hsave_msr) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
if (is_smm(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -650,11 +666,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
goto out;
}
-
- /* Clear internal status */
- kvm_clear_exception_queue(vcpu);
- kvm_clear_interrupt_queue(vcpu);
-
/*
* Since vmcb01 is not in use, we can use it to store some of the L1
* state.
@@ -692,7 +703,28 @@ out:
return ret;
}
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+/* Copy state save area fields which are handled by VMRUN */
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save)
+{
+ to_save->es = from_save->es;
+ to_save->cs = from_save->cs;
+ to_save->ss = from_save->ss;
+ to_save->ds = from_save->ds;
+ to_save->gdtr = from_save->gdtr;
+ to_save->idtr = from_save->idtr;
+ to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
+ to_save->efer = from_save->efer;
+ to_save->cr0 = from_save->cr0;
+ to_save->cr3 = from_save->cr3;
+ to_save->cr4 = from_save->cr4;
+ to_save->rax = from_save->rax;
+ to_save->rsp = from_save->rsp;
+ to_save->rip = from_save->rip;
+ to_save->cpl = 0;
+}
+
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1355,28 +1387,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- svm->vmcb01.ptr->save.es = save->es;
- svm->vmcb01.ptr->save.cs = save->cs;
- svm->vmcb01.ptr->save.ss = save->ss;
- svm->vmcb01.ptr->save.ds = save->ds;
- svm->vmcb01.ptr->save.gdtr = save->gdtr;
- svm->vmcb01.ptr->save.idtr = save->idtr;
- svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
- svm->vmcb01.ptr->save.efer = save->efer;
- svm->vmcb01.ptr->save.cr0 = save->cr0;
- svm->vmcb01.ptr->save.cr3 = save->cr3;
- svm->vmcb01.ptr->save.cr4 = save->cr4;
- svm->vmcb01.ptr->save.rax = save->rax;
- svm->vmcb01.ptr->save.rsp = save->rsp;
- svm->vmcb01.ptr->save.rip = save->rip;
- svm->vmcb01.ptr->save.cpl = 0;
-
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
nested_load_control_from_vmcb12(svm, ctl);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
-
nested_vmcb02_prepare_control(svm);
-
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 62926f1a5f7b..75e0b21ad07c 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -28,8 +28,6 @@
#include "cpuid.h"
#include "trace.h"
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
#ifndef CONFIG_KVM_AMD_SEV
/*
* When this config is not defined, SEV feature is not supported and APIs in
@@ -64,6 +62,7 @@ static DEFINE_MUTEX(sev_bitmap_lock);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
static unsigned long sev_me_mask;
+static unsigned int nr_asids;
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;
@@ -78,11 +77,11 @@ struct enc_region {
/* Called with the sev_bitmap_lock held, or on shutdown */
static int sev_flush_asids(int min_asid, int max_asid)
{
- int ret, pos, error = 0;
+ int ret, asid, error = 0;
/* Check if there are any ASIDs to reclaim before performing a flush */
- pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
- if (pos >= max_asid)
+ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+ if (asid > max_asid)
return -EBUSY;
/*
@@ -115,15 +114,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
- max_sev_asid);
- bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+ nr_asids);
+ bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
return true;
}
static int sev_asid_new(struct kvm_sev_info *sev)
{
- int pos, min_asid, max_asid, ret;
+ int asid, min_asid, max_asid, ret;
bool retry = true;
enum misc_res_type type;
@@ -143,11 +142,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
*/
- min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+ min_asid = sev->es_active ? 1 : min_sev_asid;
max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
- pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
- if (pos >= max_asid) {
+ asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+ if (asid > max_asid) {
if (retry && __sev_recycle_asids(min_asid, max_asid)) {
retry = false;
goto again;
@@ -157,11 +156,11 @@ again:
goto e_uncharge;
}
- __set_bit(pos, sev_asid_bitmap);
+ __set_bit(asid, sev_asid_bitmap);
mutex_unlock(&sev_bitmap_lock);
- return pos + 1;
+ return asid;
e_uncharge:
misc_cg_uncharge(type, sev->misc_cg, 1);
put_misc_cg(sev->misc_cg);
@@ -179,17 +178,16 @@ static int sev_get_asid(struct kvm *kvm)
static void sev_asid_free(struct kvm_sev_info *sev)
{
struct svm_cpu_data *sd;
- int cpu, pos;
+ int cpu;
enum misc_res_type type;
mutex_lock(&sev_bitmap_lock);
- pos = sev->asid - 1;
- __set_bit(pos, sev_reclaim_asid_bitmap);
+ __set_bit(sev->asid, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
- sd->sev_vmcbs[pos] = NULL;
+ sd->sev_vmcbs[sev->asid] = NULL;
}
mutex_unlock(&sev_bitmap_lock);
@@ -584,6 +582,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->xcr0 = svm->vcpu.arch.xcr0;
save->pkru = svm->vcpu.arch.pkru;
save->xss = svm->vcpu.arch.ia32_xss;
+ save->dr6 = svm->vcpu.arch.dr6;
/*
* SEV-ES will use a VMSA that is pointed to by the VMCB, not
@@ -1272,8 +1271,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
- if (!guest_page)
- return -EFAULT;
+ if (IS_ERR(guest_page))
+ return PTR_ERR(guest_page);
/* allocate memory for header and transport buffer */
ret = -ENOMEM;
@@ -1310,8 +1309,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
}
/* Copy packet header to userspace. */
- ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
- params.hdr_len);
+ if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
+ params.hdr_len))
+ ret = -EFAULT;
e_free_trans_data:
kfree(trans_data);
@@ -1463,11 +1463,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.trans_len = params.trans_len;
/* Pin guest memory */
- ret = -EFAULT;
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
- if (!guest_page)
+ if (IS_ERR(guest_page)) {
+ ret = PTR_ERR(guest_page);
goto e_free_trans;
+ }
/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
@@ -1855,12 +1856,17 @@ void __init sev_hardware_setup(void)
min_sev_asid = edx;
sev_me_mask = 1UL << (ebx & 0x3f);
- /* Initialize SEV ASID bitmaps */
- sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ /*
+ * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
+ * even though it's never used, so that the bitmap is indexed by the
+ * actual ASID.
+ */
+ nr_asids = max_sev_asid + 1;
+ sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_asid_bitmap)
goto out;
- sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_reclaim_asid_bitmap) {
bitmap_free(sev_asid_bitmap);
sev_asid_bitmap = NULL;
@@ -1905,7 +1911,7 @@ void sev_hardware_teardown(void)
return;
/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
- sev_flush_asids(0, max_sev_asid);
+ sev_flush_asids(1, max_sev_asid);
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
@@ -1919,7 +1925,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
if (!sev_enabled)
return 0;
- sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
+ sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
if (!sd->sev_vmcbs)
return -ENOMEM;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8834822c00cd..05e8d4d27969 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -46,8 +46,6 @@
#include "kvm_onhyperv.h"
#include "svm_onhyperv.h"
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -198,6 +196,11 @@ module_param(avic, bool, 0444);
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
+
+bool intercept_smi = true;
+module_param(intercept_smi, bool, 0444);
+
+
static bool svm_gp_erratum_intercept = true;
static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -256,7 +259,7 @@ u32 svm_msrpm_offset(u32 msr)
static int get_max_npt_level(void)
{
#ifdef CONFIG_X86_64
- return PT64_ROOT_4LEVEL;
+ return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
#else
return PT32E_ROOT_LEVEL;
#endif
@@ -457,11 +460,6 @@ static int has_svm(void)
return 0;
}
- if (pgtable_l5_enabled()) {
- pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
- return 0;
- }
-
return 1;
}
@@ -1010,7 +1008,9 @@ static __init int svm_hardware_setup(void)
if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;
- kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
+ /* Force VM NPT level equal to the host's max NPT level */
+ kvm_configure_mmu(npt_enabled, get_max_npt_level(),
+ get_max_npt_level(), PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
/* Note, SEV setup consumes npt_enabled. */
@@ -1156,8 +1156,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
- vcpu->arch.hflags = 0;
-
svm_set_intercept(svm, INTERCEPT_CR0_READ);
svm_set_intercept(svm, INTERCEPT_CR3_READ);
svm_set_intercept(svm, INTERCEPT_CR4_READ);
@@ -1185,7 +1183,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_INTR);
svm_set_intercept(svm, INTERCEPT_NMI);
- svm_set_intercept(svm, INTERCEPT_SMI);
+
+ if (intercept_smi)
+ svm_set_intercept(svm, INTERCEPT_SMI);
+
svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
svm_set_intercept(svm, INTERCEPT_RDPMC);
svm_set_intercept(svm, INTERCEPT_CPUID);
@@ -1233,29 +1234,14 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
save->cs.limit = 0xffff;
+ save->gdtr.base = 0;
save->gdtr.limit = 0xffff;
+ save->idtr.base = 0;
save->idtr.limit = 0xffff;
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- svm_set_cr4(vcpu, 0);
- svm_set_efer(vcpu, 0);
- save->dr6 = 0xffff0ff0;
- kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
- save->rip = 0x0000fff0;
- vcpu->arch.regs[VCPU_REGS_RIP] = save->rip;
-
- /*
- * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
- * It also updates the guest-visible cr0 value.
- */
- svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
- kvm_mmu_reset_context(vcpu);
-
- save->cr4 = X86_CR4_PAE;
- /* rdx = ?? */
-
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
@@ -1265,14 +1251,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
save->g_pat = vcpu->arch.pat;
save->cr3 = 0;
- save->cr4 = 0;
}
svm->current_vmcb->asid_generation = 0;
svm->asid = 0;
svm->nested.vmcb12_gpa = INVALID_GPA;
svm->nested.last_vmcb12_gpa = INVALID_GPA;
- vcpu->arch.hflags = 0;
if (!kvm_pause_in_guest(vcpu->kvm)) {
control->pause_filter_count = pause_filter_count;
@@ -1322,25 +1306,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u32 dummy;
- u32 eax = 1;
svm->spec_ctrl = 0;
svm->virt_spec_ctrl = 0;
- if (!init_event) {
- vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
- MSR_IA32_APICBASE_ENABLE;
- if (kvm_vcpu_is_reset_bsp(vcpu))
- vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
- }
init_vmcb(vcpu);
-
- kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
- kvm_rdx_write(vcpu, eax);
-
- if (kvm_vcpu_apicv_active(vcpu) && !init_event)
- avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
}
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
@@ -1398,8 +1368,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
goto error_free_vmsa_page;
}
- svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
svm->vmcb01.ptr = page_address(vmcb01_page);
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
@@ -1411,6 +1379,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_switch_vmcb(svm, &svm->vmcb01);
init_vmcb(vcpu);
+ svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x01000065;
@@ -1505,12 +1475,15 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
sd->current_vmcb = svm->vmcb;
indirect_branch_prediction_barrier();
}
- avic_vcpu_load(vcpu, cpu);
+ if (kvm_vcpu_apicv_active(vcpu))
+ avic_vcpu_load(vcpu, cpu);
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{
- avic_vcpu_put(vcpu);
+ if (kvm_vcpu_apicv_active(vcpu))
+ avic_vcpu_put(vcpu);
+
svm_prepare_host_switch(vcpu);
++vcpu->stat.host_state_reload;
@@ -1552,7 +1525,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
break;
default:
- WARN_ON_ONCE(1);
+ KVM_BUG_ON(1, vcpu->kvm);
}
}
@@ -1560,8 +1533,11 @@ static void svm_set_vintr(struct vcpu_svm *svm)
{
struct vmcb_control_area *control;
- /* The following fields are ignored when AVIC is enabled */
- WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+ /*
+ * The following fields are ignored when AVIC is enabled
+ */
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+
svm_set_intercept(svm, INTERCEPT_VINTR);
/*
@@ -1578,17 +1554,18 @@ static void svm_set_vintr(struct vcpu_svm *svm)
static void svm_clear_vintr(struct vcpu_svm *svm)
{
- const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
svm_clr_intercept(svm, INTERCEPT_VINTR);
/* Drop int_ctl fields related to VINTR injection. */
- svm->vmcb->control.int_ctl &= mask;
+ svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
if (is_guest_mode(&svm->vcpu)) {
- svm->vmcb01.ptr->control.int_ctl &= mask;
+ svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
(svm->nested.ctl.int_ctl & V_TPR_MASK));
- svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
+
+ svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
+ V_IRQ_INJECTION_BITS_MASK;
}
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
@@ -1923,7 +1900,7 @@ static int npf_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
+ u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
@@ -2066,11 +2043,15 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
return -EINVAL;
/*
- * VMCB is undefined after a SHUTDOWN intercept
- * so reinitialize it.
+ * VMCB is undefined after a SHUTDOWN intercept. INIT the vCPU to put
+ * the VMCB in a known good state. Unfortuately, KVM doesn't have
+ * KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking
+ * userspace. At a platform view, INIT is acceptable behavior as
+ * there exist bare metal platforms that automatically INIT the CPU
+ * in response to shutdown.
*/
clear_page(svm->vmcb);
- init_vmcb(vcpu);
+ kvm_vcpu_reset(vcpu, true);
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
return 0;
@@ -2106,6 +2087,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu)
return 1;
}
+static int smi_interception(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
static int intr_interception(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_exits;
@@ -2134,11 +2120,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
ret = kvm_skip_emulated_instruction(vcpu);
if (vmload) {
- nested_svm_vmloadsave(vmcb12, svm->vmcb);
+ svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
svm->sysenter_eip_hi = 0;
svm->sysenter_esp_hi = 0;
- } else
- nested_svm_vmloadsave(svm->vmcb, vmcb12);
+ } else {
+ svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+ }
kvm_vcpu_unmap(vcpu, &map, true);
@@ -2941,7 +2928,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm_disable_lbrv(vcpu);
break;
case MSR_VM_HSAVE_PA:
- svm->nested.hsave_msr = data;
+ /*
+ * Old kernels did not validate the value written to
+ * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid
+ * value to allow live migrating buggy or malicious guests
+ * originating from those kernels.
+ */
+ if (!msr->host_initiated && !page_address_valid(vcpu, data))
+ return 1;
+
+ svm->nested.hsave_msr = data & PAGE_MASK;
break;
case MSR_VM_CR:
return svm_set_vm_cr(vcpu, data);
@@ -2966,10 +2962,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->msr_decfg = data;
break;
}
- case MSR_IA32_APICBASE:
- if (kvm_vcpu_apicv_active(vcpu))
- avic_update_vapic_bar(to_svm(vcpu), data);
- fallthrough;
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -2994,7 +2986,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
* In this case AVIC was temporarily disabled for
* requesting the IRQ window and we have to re-enable it.
*/
- svm_toggle_avic_for_irq_window(vcpu, true);
+ kvm_request_apicv_update(vcpu->kvm, true, APICV_INHIBIT_REASON_IRQWIN);
++vcpu->stat.irq_window_exits;
return 1;
@@ -3080,8 +3072,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
- [SVM_EXIT_SMI] = kvm_emulate_as_nop,
- [SVM_EXIT_INIT] = kvm_emulate_as_nop,
+ [SVM_EXIT_SMI] = smi_interception,
[SVM_EXIT_VINTR] = interrupt_window_interception,
[SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
[SVM_EXIT_CPUID] = kvm_emulate_cpuid,
@@ -3243,12 +3234,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
"excp_to:", save->last_excp_to);
}
-static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+static bool svm_check_exit_valid(struct kvm_vcpu *vcpu, u64 exit_code)
{
- if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
- svm_exit_handlers[exit_code])
- return 0;
+ return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+ svm_exit_handlers[exit_code]);
+}
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
dump_vmcb(vcpu);
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -3256,14 +3249,13 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
vcpu->run->internal.ndata = 2;
vcpu->run->internal.data[0] = exit_code;
vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-
- return -EINVAL;
+ return 0;
}
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
{
- if (svm_handle_invalid_exit(vcpu, exit_code))
- return 0;
+ if (!svm_check_exit_valid(vcpu, exit_code))
+ return svm_handle_invalid_exit(vcpu, exit_code);
#ifdef CONFIG_RETPOLINE
if (exit_code == SVM_EXIT_MSR)
@@ -3547,7 +3539,7 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
* via AVIC. In such case, we need to temporarily disable AVIC,
* and fallback to injecting IRQ via V_IRQ.
*/
- svm_toggle_avic_for_irq_window(vcpu, false);
+ kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_IRQWIN);
svm_set_vintr(svm);
}
}
@@ -3782,6 +3774,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
pre_svm_run(vcpu);
+ WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
+
sync_lapic_to_cr8(vcpu);
if (unlikely(svm->asid != svm->vmcb->control.asid)) {
@@ -4288,6 +4282,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_host_map map_save;
int ret;
if (is_guest_mode(vcpu)) {
@@ -4303,6 +4298,29 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
ret = nested_svm_vmexit(svm);
if (ret)
return ret;
+
+ /*
+ * KVM uses VMCB01 to store L1 host state while L2 runs but
+ * VMCB01 is going to be used during SMM and thus the state will
+ * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
+ * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
+ * format of the area is identical to guest save area offsetted
+ * by 0x400 (matches the offset of 'struct vmcb_save_area'
+ * within 'struct vmcb'). Note: HSAVE area may also be used by
+ * L1 hypervisor to save additional host context (e.g. KVM does
+ * that, see svm_prepare_guest_switch()) which must be
+ * preserved.
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
+
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
+
+ svm_copy_vmrun_state(map_save.hva + 0x400,
+ &svm->vmcb01.ptr->save);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
}
return 0;
}
@@ -4310,13 +4328,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_host_map map;
+ struct kvm_host_map map, map_save;
int ret = 0;
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
+ struct vmcb *vmcb12;
if (guest) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@@ -4332,8 +4351,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (svm_allocate_nested(svm))
return 1;
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva);
+ vmcb12 = map.hva;
+
+ nested_load_control_from_vmcb12(svm, &vmcb12->control);
+
+ ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
kvm_vcpu_unmap(vcpu, &map, true);
+
+ /*
+ * Restore L1 host state from L1 HSAVE area as VMCB01 was
+ * used during SMM (see svm_enter_smm())
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
+
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+ map_save.hva + 0x400);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
}
}
@@ -4542,7 +4578,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_virtual_apic_mode = svm_set_virtual_apic_mode,
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
.check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
- .pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl,
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f89b623bb591..524d943f3efc 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -31,6 +31,7 @@
#define MSRPM_OFFSETS 16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
+extern bool intercept_smi;
/*
* Clean bits in VMCB.
@@ -463,7 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save);
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
@@ -479,6 +482,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
+void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control);
void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
@@ -498,12 +503,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
-{
- svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
- vmcb_mark_dirty(svm->vmcb, VMCB_AVIC);
-}
-
static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -519,7 +518,6 @@ int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
void avic_init_vmcb(struct vcpu_svm *svm);
-void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
@@ -529,7 +527,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu);
void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
bool svm_check_apicv_inhibit_reasons(ulong bit);
-void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate);
void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 9b9a55abc29f..c53b8bf8d013 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
* as we mark it dirty unconditionally towards end of vcpu
* init phase.
*/
- if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+ if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 8170f2a5a16f..22e2b019de37 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -4,7 +4,7 @@
#include <linux/compiler_types.h>
-#include <asm/kvm_host.h>
+#include "x86.h"
#define svm_asm(insn, clobber...) \
do { \