summaryrefslogtreecommitdiff
path: root/include/linux/kvm_host.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-29 01:40:51 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-29 01:40:51 +0300
commit36824f198c621cebeb22966b5e244378fa341295 (patch)
treeee1e358a4ed0cd022ae12b4b7ba1fa3d0e5746d5 /include/linux/kvm_host.h
parent9840cfcb97fc8b6aa7b36cec3cc3fd763f14052e (diff)
parentb8917b4ae44d1b945f6fba3d8ee6777edb44633b (diff)
downloadlinux-36824f198c621cebeb22966b5e244378fa341295.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "This covers all architectures (except MIPS) so I don't expect any other feature pull requests this merge window. ARM: - Add MTE support in guests, complete with tag save/restore interface - Reduce the impact of CMOs by moving them in the page-table code - Allow device block mappings at stage-2 - Reduce the footprint of the vmemmap in protected mode - Support the vGIC on dumb systems such as the Apple M1 - Add selftest infrastructure to support multiple configuration and apply that to PMU/non-PMU setups - Add selftests for the debug architecture - The usual crop of PMU fixes PPC: - Support for the H_RPT_INVALIDATE hypercall - Conversion of Book3S entry/exit to C - Bug fixes S390: - new HW facilities for guests - make inline assembly more robust with KASAN and co x86: - Allow userspace to handle emulation errors (unknown instructions) - Lazy allocation of the rmap (host physical -> guest physical address) - Support for virtualizing TSC scaling on VMX machines - Optimizations to avoid shattering huge pages at the beginning of live migration - Support for initializing the PDPTRs without loading them from memory - Many TLB flushing cleanups - Refuse to load if two-stage paging is available but NX is not (this has been a requirement in practice for over a year) - A large series that separates the MMU mode (WP/SMAP/SMEP etc.) from CR0/CR4/EFER, using the MMU mode everywhere once it is computed from the CPU registers - Use PM notifier to notify the guest about host suspend or hibernate - Support for passing arguments to Hyper-V hypercalls using XMM registers - Support for Hyper-V TLB flush hypercalls and enlightened MSR bitmap on AMD processors - Hide Hyper-V hypercalls that are not included in the guest CPUID - Fixes for live migration of virtual machines that use the Hyper-V "enlightened VMCS" optimization of nested virtualization - Bugfixes (not many) Generic: - Support for retrieving statistics without debugfs - Cleanups for the KVM selftests API" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (314 commits) KVM: x86: rename apic_access_page_done to apic_access_memslot_enabled kvm: x86: disable the narrow guest module parameter on unload selftests: kvm: Allows userspace to handle emulation errors. kvm: x86: Allow userspace to handle emulation errors KVM: x86/mmu: Let guest use GBPAGES if supported in hardware and TDP is on KVM: x86/mmu: Get CR4.SMEP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Get CR0.WP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Drop redundant rsvd bits reset for nested NPT KVM: x86/mmu: Optimize and clean up so called "last nonleaf level" logic KVM: x86: Enhance comments for MMU roles and nested transition trickiness KVM: x86/mmu: WARN on any reserved SPTE value when making a valid SPTE KVM: x86/mmu: Add helpers to do full reserved SPTE checks w/ generic MMU KVM: x86/mmu: Use MMU's role to determine PTTYPE KVM: x86/mmu: Collapse 32-bit PAE and 64-bit statements for helpers KVM: x86/mmu: Add a helper to calculate root from role_regs KVM: x86/mmu: Add helper to update paging metadata KVM: x86/mmu: Don't update nested guest's paging bitmasks if CR0.PG=0 KVM: x86/mmu: Consolidate reset_rsvds_bits_mask() calls KVM: x86/mmu: Use MMU role_regs to get LA57, and drop vCPU LA57 helper KVM: x86/mmu: Get nested MMU's root level from the MMU's role ...
Diffstat (limited to 'include/linux/kvm_host.h')
-rw-r--r--include/linux/kvm_host.h128
1 files changed, 113 insertions, 15 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8583ed3ff344..ae7735b490b4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -28,6 +28,7 @@
#include <linux/rcuwait.h>
#include <linux/refcount.h>
#include <linux/nospec.h>
+#include <linux/notifier.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -304,7 +305,6 @@ struct kvm_vcpu {
struct pid __rcu *pid;
int sigset_active;
sigset_t sigset;
- struct kvm_vcpu_stat stat;
unsigned int halt_poll_ns;
bool valid_wakeup;
@@ -341,6 +341,8 @@ struct kvm_vcpu {
bool preempted;
bool ready;
struct kvm_vcpu_arch arch;
+ struct kvm_vcpu_stat stat;
+ char stats_id[KVM_STATS_NAME_SIZE];
struct kvm_dirty_ring dirty_ring;
};
@@ -523,6 +525,15 @@ struct kvm {
#endif /* KVM_HAVE_MMU_RWLOCK */
struct mutex slots_lock;
+
+ /*
+ * Protects the arch-specific fields of struct kvm_memory_slots in
+ * use by the VM. To be used under the slots_lock (above) or in a
+ * kvm->srcu critical section where acquiring the slots_lock would
+ * lead to deadlock with the synchronize_srcu in
+ * install_new_memslots.
+ */
+ struct mutex slots_arch_lock;
struct mm_struct *mm; /* userspace tied to this vm */
struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
@@ -585,6 +596,11 @@ struct kvm {
pid_t userspace_pid;
unsigned int max_halt_poll_ns;
u32 dirty_ring_size;
+
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+ struct notifier_block pm_notifier;
+#endif
+ char stats_id[KVM_STATS_NAME_SIZE];
};
#define kvm_err(fmt, ...) \
@@ -998,6 +1014,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
+#endif
+
#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
#endif
@@ -1244,26 +1264,104 @@ enum kvm_stat_kind {
struct kvm_stat_data {
struct kvm *kvm;
- struct kvm_stats_debugfs_item *dbgfs_item;
-};
-
-struct kvm_stats_debugfs_item {
- const char *name;
- int offset;
+ const struct _kvm_stats_desc *desc;
enum kvm_stat_kind kind;
- int mode;
};
-#define KVM_DBGFS_GET_MODE(dbgfs_item) \
- ((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)
+struct _kvm_stats_desc {
+ struct kvm_stats_desc desc;
+ char name[KVM_STATS_NAME_SIZE];
+};
-#define VM_STAT(n, x, ...) \
- { n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
-#define VCPU_STAT(n, x, ...) \
- { n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
+#define STATS_DESC_COMMON(type, unit, base, exp) \
+ .flags = type | unit | base | \
+ BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \
+ BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \
+ BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \
+ .exponent = exp, \
+ .size = 1
+
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vm_stat, generic.stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VM_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vm_stat, stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VCPU_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vcpu_stat, stat) \
+ }, \
+ .name = #stat, \
+ }
+/* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
+#define STATS_DESC(SCOPE, stat, type, unit, base, exp) \
+ SCOPE##_STATS_DESC(stat, type, unit, base, exp)
+
+#define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, unit, base, exponent)
+#define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, unit, base, exponent)
+#define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, unit, base, exponent)
+
+/* Cumulative counter, read/write */
+#define STATS_DESC_COUNTER(SCOPE, name) \
+ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+/* Instantaneous counter, read only */
+#define STATS_DESC_ICOUNTER(SCOPE, name) \
+ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+/* Peak counter, read/write */
+#define STATS_DESC_PCOUNTER(SCOPE, name) \
+ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+
+/* Cumulative time in nanosecond */
+#define STATS_DESC_TIME_NSEC(SCOPE, name) \
+ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
+ KVM_STATS_BASE_POW10, -9)
+
+#define KVM_GENERIC_VM_STATS() \
+ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
+
+#define KVM_GENERIC_VCPU_STATS() \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll), \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_attempted_poll), \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_poll_invalid), \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_wakeup), \
+ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns), \
+ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns)
-extern struct kvm_stats_debugfs_item debugfs_entries[];
extern struct dentry *kvm_debugfs_dir;
+ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
+ const struct _kvm_stats_desc *desc,
+ void *stats, size_t size_stats,
+ char __user *user_buffer, size_t size, loff_t *offset);
+extern const struct kvm_stats_header kvm_vm_stats_header;
+extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
+extern const struct kvm_stats_header kvm_vcpu_stats_header;
+extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)