summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/audit.h7
-rw-r--r--arch/x86/include/asm/boot.h45
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/efi.h32
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h10
-rw-r--r--arch/x86/include/asm/kexec.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h49
-rw-r--r--arch/x86/include/asm/kvm_page_track.h67
-rw-r--r--arch/x86/include/asm/linkage.h15
-rw-r--r--arch/x86/include/asm/mman.h15
-rw-r--r--arch/x86/include/asm/mmu_context.h3
-rw-r--r--arch/x86/include/asm/mshyperv.h73
-rw-r--r--arch/x86/include/asm/paravirt_types.h15
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/reboot.h7
-rw-r--r--arch/x86/include/asm/svm.h5
-rw-r--r--arch/x86/include/asm/virtext.h154
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h37
-rw-r--r--arch/x86/include/uapi/asm/mman.h8
21 files changed, 260 insertions, 297 deletions
diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h
index 36aec57ea7a3..fa918f01333e 100644
--- a/arch/x86/include/asm/audit.h
+++ b/arch/x86/include/asm/audit.h
@@ -4,4 +4,11 @@
int ia32_classify_syscall(unsigned int syscall);
+extern unsigned ia32_dir_class[];
+extern unsigned ia32_write_class[];
+extern unsigned ia32_read_class[];
+extern unsigned ia32_chattr_class[];
+extern unsigned ia32_signal_class[];
+
+
#endif /* _ASM_X86_AUDIT_H */
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 4ae14339cb8c..b3a7cfb0d99e 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -40,23 +40,40 @@
#ifdef CONFIG_X86_64
# define BOOT_STACK_SIZE 0x4000
+/*
+ * Used by decompressor's startup_32() to allocate page tables for identity
+ * mapping of the 4G of RAM in 4-level paging mode:
+ * - 1 level4 table;
+ * - 1 level3 table;
+ * - 4 level2 table that maps everything with 2M pages;
+ *
+ * The additional level5 table needed for 5-level paging is allocated from
+ * trampoline_32bit memory.
+ */
# define BOOT_INIT_PGT_SIZE (6*4096)
-# ifdef CONFIG_RANDOMIZE_BASE
+
/*
- * Assuming all cross the 512GB boundary:
- * 1 page for level4
- * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel
- * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP).
- * Total is 19 pages.
+ * Total number of page tables kernel_add_identity_map() can allocate,
+ * including page tables consumed by startup_32().
+ *
+ * Worst-case scenario:
+ * - 5-level paging needs 1 level5 table;
+ * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel,
+ * assuming all of them cross 256T boundary:
+ * + 4*2 level4 table;
+ * + 4*2 level3 table;
+ * + 4*2 level2 table;
+ * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM):
+ * + 1 level4 table;
+ * + 1 level3 table;
+ * + 1 level2 table;
+ * Total: 28 tables
+ *
+ * Add 4 spare table in case decompressor touches anything beyond what is
+ * accounted above. Warn if it happens.
*/
-# ifdef CONFIG_X86_VERBOSE_BOOTUP
-# define BOOT_PGT_SIZE (19*4096)
-# else /* !CONFIG_X86_VERBOSE_BOOTUP */
-# define BOOT_PGT_SIZE (17*4096)
-# endif
-# else /* !CONFIG_RANDOMIZE_BASE */
-# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE
-# endif
+# define BOOT_PGT_SIZE_WARN (28*4096)
+# define BOOT_PGT_SIZE (32*4096)
#else /* !CONFIG_X86_64 */
# define BOOT_STACK_SIZE 0x1000
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2061ed1c398f..58cb9495e40f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -439,6 +439,7 @@
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index b0994ae3bc23..c4555b269a1b 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -91,19 +91,6 @@ static inline void efi_fpu_end(void)
#ifdef CONFIG_X86_32
#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1)
-
-#define arch_efi_call_virt_setup() \
-({ \
- efi_fpu_begin(); \
- firmware_restrict_branch_speculation_start(); \
-})
-
-#define arch_efi_call_virt_teardown() \
-({ \
- firmware_restrict_branch_speculation_end(); \
- efi_fpu_end(); \
-})
-
#else /* !CONFIG_X86_32 */
#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT
@@ -116,14 +103,6 @@ extern bool efi_disable_ibt_for_runtime;
__efi_call(__VA_ARGS__); \
})
-#define arch_efi_call_virt_setup() \
-({ \
- efi_sync_low_kernel_mappings(); \
- efi_fpu_begin(); \
- firmware_restrict_branch_speculation_start(); \
- efi_enter_mm(); \
-})
-
#undef arch_efi_call_virt
#define arch_efi_call_virt(p, f, args...) ({ \
u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime); \
@@ -132,13 +111,6 @@ extern bool efi_disable_ibt_for_runtime;
ret; \
})
-#define arch_efi_call_virt_teardown() \
-({ \
- efi_leave_mm(); \
- firmware_restrict_branch_speculation_end(); \
- efi_fpu_end(); \
-})
-
#ifdef CONFIG_KASAN
/*
* CONFIG_KASAN may redefine memset to __memset. __memset function is present
@@ -168,8 +140,8 @@ extern void efi_delete_dummy_variable(void);
extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
-void efi_enter_mm(void);
-void efi_leave_mm(void);
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
/* kexec external ABI */
struct efi_setup_data {
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index cea95dcd27c2..2ff26f53cd62 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -169,7 +169,8 @@
enum hv_isolation_type {
HV_ISOLATION_TYPE_NONE = 0,
HV_ISOLATION_TYPE_VBS = 1,
- HV_ISOLATION_TYPE_SNP = 2
+ HV_ISOLATION_TYPE_SNP = 2,
+ HV_ISOLATION_TYPE_TDX = 3
};
/* Hyper-V specific model specific registers (MSRs) */
@@ -301,6 +302,13 @@ enum hv_isolation_type {
#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT
#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC
+/*
+ * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and
+ * there is not associated MSR address.
+ */
+#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003
+#define HV_X64_VTL_MASK GENMASK(3, 0)
+
/* Hyper-V memory host visibility */
enum hv_mem_host_visibility {
VMBUS_PAGE_NOT_VISIBLE = 0,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 3be6a98751f0..c9f6a6c5de3c 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
#endif
#endif
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_CRASH_HOTPLUG
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3bc146dfd38d..17715cb8731d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry;
* kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
* also includes TDP pages) to determine whether or not a page can be used in
* the given MMU context. This is a subset of the overall kvm_cpu_role to
- * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
- * 2 bytes per gfn instead of 4 bytes per gfn.
+ * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
+ * allocating 2 bytes per gfn instead of 4 bytes per gfn.
*
* Upper-level shadow pages having gptes are tracked for write-protection via
- * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
- * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
- * gfn_track will overflow and explosions will ensure.
+ * gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must
+ * not create more than 2^16-1 upper-level shadow pages at a single gfn,
+ * otherwise gfn_write_track will overflow and explosions will ensue.
*
* A unique shadow page (SP) for a gfn is created if and only if an existing SP
* cannot be reused. The ability to reuse a SP is tracked by its role, which
@@ -746,7 +746,6 @@ struct kvm_vcpu_arch {
u64 smi_count;
bool at_instruction_boundary;
bool tpr_access_reporting;
- bool xsaves_enabled;
bool xfd_no_write_intercept;
u64 ia32_xss;
u64 microcode_version;
@@ -831,6 +830,25 @@ struct kvm_vcpu_arch {
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
+ /*
+ * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
+ * when "struct kvm_vcpu_arch" is no longer defined in an
+ * arch/x86/include/asm header. The max is mostly arbitrary, i.e.
+ * can be increased as necessary.
+ */
+#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
+
+ /*
+ * Track whether or not the guest is allowed to use features that are
+ * governed by KVM, where "governed" means KVM needs to manage state
+ * and/or explicitly enable the feature in hardware. Typically, but
+ * not always, governed features can be used by the guest if and only
+ * if both KVM and userspace want to expose the feature to the guest.
+ */
+ struct {
+ DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
+ } governed_features;
+
u64 reserved_gpa_bits;
int maxphyaddr;
@@ -1005,7 +1023,7 @@ struct kvm_lpage_info {
struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
- unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+ unsigned short *gfn_write_track;
};
/*
@@ -1247,8 +1265,9 @@ struct kvm_arch {
* create an NX huge page (without hanging the guest).
*/
struct list_head possible_nx_huge_pages;
- struct kvm_page_track_notifier_node mmu_sp_tracker;
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
struct kvm_page_track_notifier_head track_notifier_head;
+#endif
/*
* Protects marking pages unsync during page faults, as TDP MMU page
* faults only take mmu_lock for read. For simplicity, the unsync
@@ -1400,7 +1419,6 @@ struct kvm_arch {
* the thread holds the MMU lock in write mode.
*/
spinlock_t tdp_mmu_pages_lock;
- struct workqueue_struct *tdp_mmu_zap_wq;
#endif /* CONFIG_X86_64 */
/*
@@ -1655,8 +1673,8 @@ struct kvm_x86_ops {
u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
- void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
- void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
+ void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+ void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
* Retrieve somewhat arbitrary exit information. Intended to
@@ -1795,8 +1813,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
#define __KVM_HAVE_ARCH_VM_FREE
void kvm_arch_free_vm(struct kvm *kvm);
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
if (kvm_x86_ops.flush_remote_tlbs &&
!static_call(kvm_x86_flush_remote_tlbs)(kvm))
@@ -1805,6 +1823,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
return -ENOTSUPP;
}
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
#define kvm_arch_pmi_in_guest(vcpu) \
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
@@ -1814,7 +1834,7 @@ void kvm_mmu_vendor_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-int kvm_mmu_init_vm(struct kvm *kvm);
+void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
@@ -1833,7 +1853,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
-void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index eb186bc57f6a..3d040741044b 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -2,11 +2,9 @@
#ifndef _ASM_X86_KVM_PAGE_TRACK_H
#define _ASM_X86_KVM_PAGE_TRACK_H
-enum kvm_page_track_mode {
- KVM_PAGE_TRACK_WRITE,
- KVM_PAGE_TRACK_MAX,
-};
+#include <linux/kvm_types.h>
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
/*
* The notifier represented by @kvm_page_track_notifier_node is linked into
* the head which will be notified when guest is triggering the track event.
@@ -26,54 +24,39 @@ struct kvm_page_track_notifier_node {
* It is called when guest is writing the write-tracked page
* and write emulation is finished at that time.
*
- * @vcpu: the vcpu where the write access happened.
* @gpa: the physical address written by guest.
* @new: the data was written to the address.
* @bytes: the written length.
* @node: this node
*/
- void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes, struct kvm_page_track_notifier_node *node);
+ void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
+ struct kvm_page_track_notifier_node *node);
+
/*
- * It is called when memory slot is being moved or removed
- * users can drop write-protection for the pages in that memory slot
+ * Invoked when a memory region is removed from the guest. Or in KVM
+ * terms, when a memslot is deleted.
*
- * @kvm: the kvm where memory slot being moved or removed
- * @slot: the memory slot being moved or removed
- * @node: this node
+ * @gfn: base gfn of the region being removed
+ * @nr_pages: number of pages in the to-be-removed region
+ * @node: this node
*/
- void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
- struct kvm_page_track_notifier_node *node);
+ void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+ struct kvm_page_track_notifier_node *node);
};
-int kvm_page_track_init(struct kvm *kvm);
-void kvm_page_track_cleanup(struct kvm *kvm);
+int kvm_page_track_register_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
-bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
-int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
-int kvm_page_track_create_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- unsigned long npages);
-
-void kvm_slot_page_track_add_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- gfn_t gfn, enum kvm_page_track_mode mode);
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
+#else
+/*
+ * Allow defining a node in a structure even if page tracking is disabled, e.g.
+ * to play nice with testing headers via direct inclusion from the command line.
+ */
+struct kvm_page_track_notifier_node {};
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n);
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#endif
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 97a3de7892d3..571fe4d2d232 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -8,6 +8,14 @@
#undef notrace
#define notrace __attribute__((no_instrument_function))
+#ifdef CONFIG_64BIT
+/*
+ * The generic version tends to create spurious ENDBR instructions under
+ * certain conditions.
+ */
+#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; })
+#endif
+
#ifdef CONFIG_X86_32
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
#endif /* CONFIG_X86_32 */
@@ -97,6 +105,13 @@
CFI_POST_PADDING \
SYM_FUNC_END(__cfi_##name)
+/* UML needs to be able to override memcpy() and friends for KASAN. */
+#ifdef CONFIG_UML
+# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS_WEAK
+#else
+# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS
+#endif
+
/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
#define SYM_TYPED_FUNC_START(name) \
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
new file mode 100644
index 000000000000..12b820259b9f
--- /dev/null
+++ b/arch/x86/include/asm/mman.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#define arch_calc_vm_prot_bits(prot, key) ( \
+ ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
+ ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
+ ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \
+ ((key) & 0x8 ? VM_PKEY_BIT3 : 0))
+#endif
+
+#include <uapi/asm/mman.h>
+
+#endif /* __ASM_MMAN_H__ */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 416901d406f8..8dac45a2c7fc 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -186,8 +186,7 @@ do { \
#else
#define deactivate_mm(tsk, mm) \
do { \
- if (!tsk->vfork_done) \
- shstk_free(tsk); \
+ shstk_free(tsk); \
load_gs_index(0); \
loadsegment(fs, 0); \
} while (0)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fa83d88e4c99..896445edc6a8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -26,6 +26,7 @@
union hv_ghcb;
DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
+DECLARE_STATIC_KEY_FALSE(isolation_type_tdx);
typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
@@ -40,6 +41,7 @@ static inline unsigned char hv_get_nmi_reason(void)
#if IS_ENABLED(CONFIG_HYPERV)
extern int hyperv_init_cpuhp;
+extern bool hyperv_paravisor_present;
extern void *hv_hypercall_pg;
@@ -47,10 +49,25 @@ extern u64 hv_current_partition_id;
extern union hv_ghcb * __percpu *hv_ghcb_pg;
+bool hv_isolation_type_snp(void);
+bool hv_isolation_type_tdx(void);
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+/*
+ * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
+ * to start AP in enlightened SEV guest.
+ */
+#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
+#define HV_AP_SEGMENT_LIMIT 0xffffffff
+
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
@@ -58,6 +75,19 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;
#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
+ return hv_tdx_hypercall(control, input_address, output_address);
+
+ if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
+ __asm__ __volatile__("mov %4, %%r8\n"
+ "vmmcall"
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ "+c" (control), "+d" (input_address)
+ : "r" (output_address)
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+ return hv_status;
+ }
+
if (!hv_hypercall_pg)
return U64_MAX;
@@ -101,7 +131,16 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
u64 hv_status;
#ifdef CONFIG_X86_64
- {
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
+ return hv_tdx_hypercall(control, input1, 0);
+
+ if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
+ __asm__ __volatile__(
+ "vmmcall"
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ "+c" (control), "+d" (input1)
+ :: "cc", "r8", "r9", "r10", "r11");
+ } else {
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
@@ -146,7 +185,17 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
u64 hv_status;
#ifdef CONFIG_X86_64
- {
+ if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
+ return hv_tdx_hypercall(control, input1, input2);
+
+ if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
+ __asm__ __volatile__("mov %4, %%r8\n"
+ "vmmcall"
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ "+c" (control), "+d" (input1)
+ : "r" (input2)
+ : "cc", "r8", "r9", "r10", "r11");
+ } else {
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -225,20 +274,24 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
#ifdef CONFIG_AMD_MEM_ENCRYPT
-void hv_ghcb_msr_write(u64 msr, u64 value);
-void hv_ghcb_msr_read(u64 msr, u64 *value);
bool hv_ghcb_negotiate_protocol(void);
void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
-void hv_vtom_init(void);
+int hv_snp_boot_ap(int cpu, unsigned long start_ip);
#else
-static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
-static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
-static inline void hv_vtom_init(void) {}
+static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; }
#endif
-extern bool hv_isolation_type_snp(void);
+#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
+void hv_vtom_init(void);
+void hv_ivm_msr_write(u64 msr, u64 value);
+void hv_ivm_msr_read(u64 msr, u64 *value);
+#else
+static inline void hv_vtom_init(void) {}
+static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
+static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
+#endif
static inline bool hv_is_synic_reg(unsigned int reg)
{
@@ -287,8 +340,10 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
#ifdef CONFIG_HYPERV_VTL_MODE
void __init hv_vtl_init_platform(void);
+int __init hv_vtl_early_init(void);
#else
static inline void __init hv_vtl_init_platform(void) {}
+static inline int __init hv_vtl_early_init(void) { return 0; }
#endif
#include <asm-generic/mshyperv.h>
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 4acbcddddc29..772d03487520 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -9,13 +9,6 @@ struct paravirt_patch_site {
u8 type; /* type of this instruction */
u8 len; /* length of original instruction */
};
-
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
- PARAVIRT_LAZY_NONE,
- PARAVIRT_LAZY_MMU,
- PARAVIRT_LAZY_CPU,
-};
#endif
#ifdef CONFIG_PARAVIRT
@@ -549,14 +542,6 @@ int paravirt_disable_iospace(void);
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_start_context_switch(struct task_struct *prev);
-void paravirt_end_context_switch(struct task_struct *next);
-
-void paravirt_enter_lazy_mmu(void);
-void paravirt_leave_lazy_mmu(void);
-void paravirt_flush_lazy_mmu(void);
-
void _paravirt_nop(void);
void paravirt_BUG(void);
unsigned long paravirt_ret0(void);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d6ad98ca1288..e02b179ec659 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -955,6 +955,14 @@ static inline int pte_same(pte_t a, pte_t b)
return a.pte == b.pte;
}
+static inline pte_t pte_next_pfn(pte_t pte)
+{
+ if (__pte_needs_invert(pte_val(pte)))
+ return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT));
+ return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
+}
+#define pte_next_pfn pte_next_pfn
+
static inline int pte_present(pte_t a)
{
return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0086920cda06..a3669a7774ed 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -683,13 +683,11 @@ extern u16 get_llc_id(unsigned int cpu);
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
-extern bool cpu_has_ibpb_brtype_microcode(void);
extern void amd_clear_divider(void);
extern void amd_check_microcode(void);
#else
static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
-static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
static inline void amd_clear_divider(void) { }
static inline void amd_check_microcode(void) { }
#endif
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 9177b4354c3f..6536873f8fc0 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,7 +25,14 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+typedef void (cpu_emergency_virt_cb)(void);
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
+#else
+static inline void cpu_emergency_disable_virtualization(void) {}
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e7c7379d6ac7..19bf955b67e0 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
struct vmcb_seg {
u16 selector;
@@ -345,7 +346,7 @@ struct vmcb_save_area {
u64 last_excp_from;
u64 last_excp_to;
u8 reserved_0x298[72];
- u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
+ u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
} __packed;
/* Save area definition for SEV-ES and SEV-SNP guests */
@@ -512,7 +513,7 @@ struct ghcb {
} __packed;
-#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
+#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
deleted file mode 100644
index 3b12e6b99412..000000000000
--- a/arch/x86/include/asm/virtext.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/tlbflush.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
- unsigned long ecx = cpuid_ecx(1);
- return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/**
- * cpu_vmxoff() - Disable VMX on the current CPU
- *
- * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
- *
- * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
- * atomically track post-VMXON state, e.g. this may be called in NMI context.
- * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
- * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
- * magically in RM, VM86, compat mode, or at CPL>0.
- */
-static inline int cpu_vmxoff(void)
-{
- asm_volatile_goto("1: vmxoff\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "cc", "memory" : fault);
-
- cr4_clear_bits(X86_CR4_VMXE);
- return 0;
-
-fault:
- cr4_clear_bits(X86_CR4_VMXE);
- return -EIO;
-}
-
-static inline int cpu_vmx_enabled(void)
-{
- return __read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
- if (cpu_vmx_enabled())
- cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
- if (cpu_has_vmx())
- __cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
- if (msg)
- *msg = "not amd or hygon";
- return 0;
- }
-
- if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
- if (msg)
- *msg = "can't execute cpuid_8000000a";
- return 0;
- }
-
- if (!boot_cpu_has(X86_FEATURE_SVM)) {
- if (msg)
- *msg = "svm not available";
- return 0;
- }
- return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
- uint64_t efer;
-
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
- if (efer & EFER_SVME) {
- /*
- * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
- * aren't blocked, e.g. if a fatal error occurred between CLGI
- * and STGI. Note, STGI may #UD if SVM is disabled from NMI
- * context between reading EFER and executing STGI. In that
- * case, GIF must already be set, otherwise the NMI would have
- * been blocked, so just eat the fault.
- */
- asm_volatile_goto("1: stgi\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "memory" : fault);
-fault:
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
- }
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
- if (cpu_has_svm(NULL))
- cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0d02c4aafa6f..0e73616b82f3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -71,7 +71,7 @@
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
-#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 5fc35f889cd1..7048dfacc04b 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -36,6 +36,7 @@
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
+#include <asm/bug.h>
#include <asm/processor.h>
#define XEN_SIGNATURE "XenVMMXenVMM"
@@ -63,4 +64,40 @@ void __init xen_pvh_init(struct boot_params *boot_params);
void __init mem_map_via_hcall(struct boot_params *boot_params_p);
#endif
+/* Lazy mode for batching updates / context switch */
+enum xen_lazy_mode {
+ XEN_LAZY_NONE,
+ XEN_LAZY_MMU,
+ XEN_LAZY_CPU,
+};
+
+DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode);
+DECLARE_PER_CPU(unsigned int, xen_lazy_nesting);
+
+static inline void enter_lazy(enum xen_lazy_mode mode)
+{
+ enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode);
+
+ if (mode == old_mode) {
+ this_cpu_inc(xen_lazy_nesting);
+ return;
+ }
+
+ BUG_ON(old_mode != XEN_LAZY_NONE);
+
+ this_cpu_write(xen_lazy_mode, mode);
+}
+
+static inline void leave_lazy(enum xen_lazy_mode mode)
+{
+ BUG_ON(this_cpu_read(xen_lazy_mode) != mode);
+
+ if (this_cpu_read(xen_lazy_nesting) == 0)
+ this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE);
+ else
+ this_cpu_dec(xen_lazy_nesting);
+}
+
+enum xen_lazy_mode xen_get_lazy_mode(void);
+
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 8148bdddbd2c..46cdc941f958 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,14 +5,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
#define MAP_ABOVE4G 0x80 /* only map above 4GB */
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-#define arch_calc_vm_prot_bits(prot, key) ( \
- ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
- ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
- ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \
- ((key) & 0x8 ? VM_PKEY_BIT3 : 0))
-#endif
-
/* Flags for map_shadow_stack(2) */
#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */