summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/kvm_host.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/kvm_host.h')
-rw-r--r--arch/x86/include/asm/kvm_host.h145
1 files changed, 102 insertions, 43 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f05ebaa26f0f..f35f1ff4427b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
#include <linux/clocksource.h>
#include <linux/irqbypass.h>
#include <linux/hyperv.h>
+#include <linux/kfifo.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@@ -81,7 +82,9 @@
#define KVM_REQ_NMI KVM_ARCH_REQ(9)
#define KVM_REQ_PMU KVM_ARCH_REQ(10)
#define KVM_REQ_PMI KVM_ARCH_REQ(11)
+#ifdef CONFIG_KVM_SMM
#define KVM_REQ_SMI KVM_ARCH_REQ(12)
+#endif
#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
#define KVM_REQ_MCLOCK_INPROGRESS \
KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
@@ -108,6 +111,8 @@
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_TLB_FLUSH \
+ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -204,6 +209,7 @@ typedef enum exit_fastpath_completion fastpath_t;
struct x86_emulate_ctxt;
struct x86_exception;
+union kvm_smram;
enum x86_intercept;
enum x86_intercept_stage;
@@ -253,16 +259,16 @@ enum x86_intercept_stage;
#define PFERR_GUEST_PAGE_BIT 33
#define PFERR_IMPLICIT_ACCESS_BIT 48
-#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
-#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
-#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
-#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
-#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS (1ULL << PFERR_IMPLICIT_ACCESS_BIT)
+#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
PFERR_WRITE_MASK | \
@@ -488,17 +494,19 @@ enum pmc_type {
struct kvm_pmc {
enum pmc_type type;
u8 idx;
+ bool is_paused;
+ bool intr;
u64 counter;
+ u64 prev_counter;
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
/*
+ * only for creating or reusing perf_event,
* eventsel value for general purpose counters,
* ctrl value for fixed counters.
*/
u64 current_config;
- bool is_paused;
- bool intr;
};
/* More counters may conflict with other existing Architectural MSRs */
@@ -524,7 +532,16 @@ struct kvm_pmu {
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
- DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+
+ /*
+ * Overlay the bitmap with a 64-bit atomic so that all bits can be
+ * set in a single access, e.g. to reprogram all counters when the PMU
+ * filter changes.
+ */
+ union {
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+ atomic64_t __reprogram_pmi;
+ };
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
@@ -602,6 +619,29 @@ struct kvm_vcpu_hv_synic {
bool dont_zero_synic_pages;
};
+/* The maximum number of entries on the TLB flush fifo. */
+#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
+/*
+ * Note: the following 'magic' entry is made up by KVM to avoid putting
+ * anything besides GVA on the TLB flush fifo. It is theoretically possible
+ * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
+ * which will look identical. KVM's action to 'flush everything' instead of
+ * flushing these particular addresses is, however, fully legitimate as
+ * flushing more than requested is always OK.
+ */
+#define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1)
+
+enum hv_tlb_flush_fifos {
+ HV_L1_TLB_FLUSH_FIFO,
+ HV_L2_TLB_FLUSH_FIFO,
+ HV_NR_TLB_FLUSH_FIFOS,
+};
+
+struct kvm_vcpu_hv_tlb_flush_fifo {
+ spinlock_t write_lock;
+ DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
+};
+
/* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv {
struct kvm_vcpu *vcpu;
@@ -623,6 +663,19 @@ struct kvm_vcpu_hv {
u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
} cpuid_cache;
+
+ struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
+
+ /* Preallocated buffer for handling hypercalls passing sparse vCPU set */
+ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+
+ struct hv_vp_assist_page vp_assist_page;
+
+ struct {
+ u64 pa_page_gpa;
+ u64 vm_id;
+ u32 vp_id;
+ } nested;
};
/* Xen HVM per vcpu emulation context */
@@ -633,6 +686,7 @@ struct kvm_vcpu_xen {
struct gfn_to_pfn_cache vcpu_info_cache;
struct gfn_to_pfn_cache vcpu_time_info_cache;
struct gfn_to_pfn_cache runstate_cache;
+ struct gfn_to_pfn_cache runstate2_cache;
u64 last_steal;
u64 runstate_entry_time;
u64 runstate_times[4];
@@ -1059,6 +1113,7 @@ struct msr_bitmap_range {
struct kvm_xen {
u32 xen_version;
bool long_mode;
+ bool runstate_update_flag;
u8 upcall_vector;
struct gfn_to_pfn_cache shinfo_cache;
struct idr evtchn_ports;
@@ -1156,7 +1211,18 @@ struct kvm_arch {
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
- struct list_head lpage_disallowed_mmu_pages;
+ /*
+ * A list of kvm_mmu_page structs that, if zapped, could possibly be
+ * replaced by an NX huge page. A shadow page is on this list if its
+ * existence disallows an NX huge page (nx_huge_page_disallowed is set)
+ * and there are no other conditions that prevent a huge page, e.g.
+ * the backing host page is huge, dirtly logging is not enabled for its
+ * memslot, etc... Note, zapping shadow pages on this list doesn't
+ * guarantee an NX huge page will be created in its stead, e.g. if the
+ * guest attempts to execute from the region then KVM obviously can't
+ * create an NX huge page (without hanging the guest).
+ */
+ struct list_head possible_nx_huge_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
/*
@@ -1272,7 +1338,7 @@ struct kvm_arch {
bool sgx_provisioning_allowed;
struct kvm_pmu_event_filter __rcu *pmu_event_filter;
- struct task_struct *nx_lpage_recovery_thread;
+ struct task_struct *nx_huge_page_recovery_thread;
#ifdef CONFIG_X86_64
/*
@@ -1284,6 +1350,9 @@ struct kvm_arch {
*/
bool tdp_mmu_enabled;
+ /* The number of TDP MMU pages across all roots. */
+ atomic64_t tdp_mmu_pages;
+
/*
* List of kvm_mmu_page structs being used as roots.
* All kvm_mmu_page structs in the list should have
@@ -1305,20 +1374,12 @@ struct kvm_arch {
struct list_head tdp_mmu_roots;
/*
- * List of kvm_mmu_page structs not being used as roots.
- * All kvm_mmu_page structs in the list should have
- * tdp_mmu_page set and a tdp_mmu_root_count of 0.
- */
- struct list_head tdp_mmu_pages;
-
- /*
* Protects accesses to the following fields when the MMU lock
* is held in read mode:
* - tdp_mmu_roots (above)
- * - tdp_mmu_pages (above)
* - the link field of kvm_mmu_page structs used by the TDP MMU
- * - lpage_disallowed_mmu_pages
- * - the lpage_disallowed_link field of kvm_mmu_page structs used
+ * - possible_nx_huge_pages;
+ * - the possible_nx_huge_page_link field of kvm_mmu_page structs used
* by the TDP MMU
* It is acceptable, but not necessary, to acquire this lock when
* the thread holds the MMU lock in write mode.
@@ -1612,10 +1673,12 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM_SMM
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
- int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+ int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+ int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
+#endif
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
@@ -1630,7 +1693,7 @@ struct kvm_x86_ops {
void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
- int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
+ int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
void (*migrate_timers)(struct kvm_vcpu *vcpu);
void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
@@ -1663,6 +1726,7 @@ struct kvm_x86_nested_ops {
int (*enable_evmcs)(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+ void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu);
};
struct kvm_x86_init_ops {
@@ -1844,6 +1908,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
@@ -1909,8 +1974,6 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
-gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
- struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1994,14 +2057,18 @@ enum {
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
+
+#ifdef CONFIG_KVM_SMM
#define HF_SMM_MASK (1 << 6)
#define HF_SMM_INSIDE_NMI_MASK (1 << 7)
-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
-
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+# define KVM_ADDRESS_SPACE_NUM 2
+# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#else
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
+#endif
#define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -2089,14 +2156,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#endif
}
-#define put_smstate(type, buf, offset, val) \
- *(type *)((buf) + (offset) - 0x7e00) = val
-
-#define GET_SMSTATE(type, buf, offset) \
- (*(type *)((buf) + (offset) - 0x7e00))
-
-int kvm_cpu_dirty_log_size(void);
-
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
#define KVM_CLOCK_VALID_FLAGS \