253 files changed, 4651 insertions, 8046 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d4bbebe1d72f..887bf86c447a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -68,6 +68,7 @@ config X86
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
+	select ARCH_HAS_MEM_ENCRYPT
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_PTE_DEVMAP		if X86_64
@@ -147,6 +148,7 @@ config X86
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
 	select HAVE_ARCH_VMAP_STACK		if X86_64
 	select HAVE_ARCH_WITHIN_STACK_FRAMES
+	select HAVE_ASM_MODVERSIONS
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_CONTEXT_TRACKING		if X86_64
@@ -794,6 +796,7 @@ config KVM_GUEST
 	bool "KVM Guest support (including kvmclock)"
 	depends on PARAVIRT
 	select PARAVIRT_CLOCK
+	select ARCH_CPUIDLE_HALTPOLL
 	default y
 	---help---
 	  This option enables various optimizations for running under the KVM
@@ -802,6 +805,12 @@ config KVM_GUEST
 	  underlying device model, the host provides the guest with
 	  timing infrastructure such as time of day, and system time
 
+config ARCH_CPUIDLE_HALTPOLL
+        def_bool n
+        prompt "Disable host haltpoll when loading haltpoll driver"
+        help
+	  If virtualized under KVM, disable host haltpoll.
+
 config PVH
 	bool "Support for running PVH guests"
 	---help---
@@ -1519,9 +1528,6 @@ config X86_CPA_STATISTICS
 	  helps to determine the effectiveness of preserving large and huge
 	  page mappings when mapping protections are changed.
 
-config ARCH_HAS_MEM_ENCRYPT
-	def_bool y
-
 config AMD_MEM_ENCRYPT
 	bool "AMD Secure Memory Encryption (SME) support"
 	depends on X86_64 && CPU_SUP_AMD
@@ -1935,6 +1941,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
 
 	  If unsure, say y.
 
+choice
+	prompt "TSX enable mode"
+	depends on CPU_SUP_INTEL
+	default X86_INTEL_TSX_MODE_OFF
+	help
+	  Intel's TSX (Transactional Synchronization Extensions) feature
+	  allows to optimize locking protocols through lock elision which
+	  can lead to a noticeable performance boost.
+
+	  On the other hand it has been shown that TSX can be exploited
+	  to form side channel attacks (e.g. TAA) and chances are there
+	  will be more of those attacks discovered in the future.
+
+	  Therefore TSX is not enabled by default (aka tsx=off). An admin
+	  might override this decision by tsx=on the command line parameter.
+	  Even with TSX enabled, the kernel will attempt to enable the best
+	  possible TAA mitigation setting depending on the microcode available
+	  for the particular machine.
+
+	  This option allows to set the default tsx mode between tsx=on, =off
+	  and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
+	  details.
+
+	  Say off if not sure, auto if TSX is in use but it should be used on safe
+	  platforms or on if TSX is in use and the security aspect of tsx is not
+	  relevant.
+
+config X86_INTEL_TSX_MODE_OFF
+	bool "off"
+	help
+	  TSX is disabled if possible - equals to tsx=off command line parameter.
+
+config X86_INTEL_TSX_MODE_ON
+	bool "on"
+	help
+	  TSX is always enabled on TSX capable HW - equals the tsx=on command
+	  line parameter.
+
+config X86_INTEL_TSX_MODE_AUTO
+	bool "auto"
+	help
+	  TSX is enabled on TSX capable HW that is believed to be safe against
+	  side channel attacks- equals the tsx=auto command line parameter.
+endchoice
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
@@ -2026,20 +2077,30 @@ config KEXEC_FILE
 config ARCH_HAS_KEXEC_PURGATORY
 	def_bool KEXEC_FILE
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
 	bool "Verify kernel signature during kexec_file_load() syscall"
 	depends on KEXEC_FILE
 	---help---
-	  This option makes kernel signature verification mandatory for
-	  the kexec_file_load() syscall.
 
-	  In addition to that option, you need to enable signature
+	  This option makes the kexec_file_load() syscall check for a valid
+	  signature of the kernel image.  The image can still be loaded without
+	  a valid signature unless you also enable KEXEC_SIG_FORCE, though if
+	  there's a signature that we can check, then it must be valid.
+
+	  In addition to this option, you need to enable signature
 	  verification for the corresponding kernel image type being
 	  loaded in order for this to work.
 
+config KEXEC_SIG_FORCE
+	bool "Require a valid signature in kexec_file_load() syscall"
+	depends on KEXEC_SIG
+	---help---
+	  This option makes kernel signature verification mandatory for
+	  the kexec_file_load() syscall.
+
 config KEXEC_BZIMAGE_VERIFY_SIG
 	bool "Enable bzImage signature verification support"
-	depends on KEXEC_VERIFY_SIG
+	depends on KEXEC_SIG
 	depends on SIGNED_PE_FILE_VERIFICATION
 	select SYSTEM_TRUSTED_KEYRING
 	---help---
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 71c92db47c41..bf9cd83de777 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -171,7 +171,7 @@ config HAVE_MMIOTRACE_SUPPORT
 
 config X86_DECODER_SELFTEST
 	bool "x86 instruction decoder selftest"
-	depends on DEBUG_KERNEL && KPROBES
+	depends on DEBUG_KERNEL && INSTRUCTION_DECODER
 	depends on !COMPILE_TEST
 	---help---
 	 Perform x86 instruction decoder selftests at build time.
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 19eca14b49a0..ca866f1cca2e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -28,8 +28,6 @@
 #include "cpuflags.h"
 
 /* Useful macros */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
 
 extern struct setup_header hdr;
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 15255f388a85..25019d42ae93 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -21,30 +21,6 @@
 struct mem_vector immovable_mem[MAX_NUMNODES*2];
 
 /*
- * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
- * digits, and '\0' for termination.
- */
-#define MAX_ADDR_LEN 19
-
-static acpi_physical_address get_acpi_rsdp(void)
-{
-	acpi_physical_address addr = 0;
-
-#ifdef CONFIG_KEXEC
-	char val[MAX_ADDR_LEN] = { };
-	int ret;
-
-	ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
-	if (ret < 0)
-		return 0;
-
-	if (kstrtoull(val, 16, &addr))
-		return 0;
-#endif
-	return addr;
-}
-
-/*
  * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
  * ACPI_TABLE_GUID are found, take the former, which has more features.
  */
@@ -278,10 +254,7 @@ acpi_physical_address get_rsdp_addr(void)
 {
 	acpi_physical_address pa;
 
-	pa = get_acpi_rsdp();
-
-	if (!pa)
-		pa = boot_params->acpi_rsdp_addr;
+	pa = boot_params->acpi_rsdp_addr;
 
 	/*
 	 * Try to get EFI data from setup_data. This can happen when we're a
@@ -301,6 +274,30 @@ acpi_physical_address get_rsdp_addr(void)
 }
 
 #if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static acpi_physical_address get_cmdline_acpi_rsdp(void)
+{
+	acpi_physical_address addr = 0;
+
+#ifdef CONFIG_KEXEC
+	char val[MAX_ADDR_LEN] = { };
+	int ret;
+
+	ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+	if (ret < 0)
+		return 0;
+
+	if (kstrtoull(val, 16, &addr))
+		return 0;
+#endif
+	return addr;
+}
+
 /* Compute SRAT address from RSDP. */
 static unsigned long get_acpi_srat_table(void)
 {
@@ -311,7 +308,17 @@ static unsigned long get_acpi_srat_table(void)
 	char arg[10];
 	u8 *entry;
 
-	rsdp = (struct acpi_table_rsdp *)(long)boot_params->acpi_rsdp_addr;
+	/*
+	 * Check whether we were given an RSDP on the command line. We don't
+	 * stash this in boot params because the kernel itself may have
+	 * different ideas about whether to trust a command-line parameter.
+	 */
+	rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
+
+	if (!rsdp)
+		rsdp = (struct acpi_table_rsdp *)(long)
+			boot_params->acpi_rsdp_addr;
+
 	if (!rsdp)
 		return 0;
 
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index d6662fdef300..82bc60c8acb2 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -13,6 +13,7 @@
 #include <asm/e820/types.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
+#include <asm/boot.h>
 
 #include "../string.h"
 #include "eboot.h"
@@ -813,7 +814,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
 		status = efi_relocate_kernel(sys_table, &bzimage_addr,
 					     hdr->init_size, hdr->init_size,
 					     hdr->pref_address,
-					     hdr->kernel_alignment);
+					     hdr->kernel_alignment,
+					     LOAD_PHYSICAL_ADDR);
 		if (status != EFI_SUCCESS) {
 			efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
 			goto fail;
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 37380c0d5999..5e30eaaf8576 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -140,7 +140,7 @@ ENTRY(startup_32)
 /*
  * Jump to the relocated address.
  */
-	leal	relocated(%ebx), %eax
+	leal	.Lrelocated(%ebx), %eax
 	jmp	*%eax
 ENDPROC(startup_32)
 
@@ -209,7 +209,7 @@ ENDPROC(efi32_stub_entry)
 #endif
 
 	.text
-relocated:
+.Lrelocated:
 
 /*
  * Clear BSS (stack is currently empty)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 6233ae35d0d9..d98cd483377e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -87,7 +87,7 @@ ENTRY(startup_32)
 
 	call	verify_cpu
 	testl	%eax, %eax
-	jnz	no_longmode
+	jnz	.Lno_longmode
 
 /*
  * Compute the delta between where we were compiled to run at
@@ -322,7 +322,7 @@ ENTRY(startup_64)
 1:	popq	%rdi
 	subq	$1b, %rdi
 
-	call	adjust_got
+	call	.Ladjust_got
 
 	/*
 	 * At this point we are in long mode with 4-level paging enabled,
@@ -421,7 +421,7 @@ trampoline_return:
 
 	/* The new adjustment is the relocation address */
 	movq	%rbx, %rdi
-	call	adjust_got
+	call	.Ladjust_got
 
 /*
  * Copy the compressed kernel to the end of our buffer
@@ -440,7 +440,7 @@ trampoline_return:
 /*
  * Jump to the relocated address.
  */
-	leaq	relocated(%rbx), %rax
+	leaq	.Lrelocated(%rbx), %rax
 	jmp	*%rax
 
 #ifdef CONFIG_EFI_STUB
@@ -511,7 +511,7 @@ ENDPROC(efi64_stub_entry)
 #endif
 
 	.text
-relocated:
+.Lrelocated:
 
 /*
  * Clear BSS (stack is currently empty)
@@ -548,7 +548,7 @@ relocated:
  * first time we touch GOT).
  * RDI is the new adjustment to apply.
  */
-adjust_got:
+.Ladjust_got:
 	/* Walk through the GOT adding the address to the entries */
 	leaq	_got(%rip), %rdx
 	leaq	_egot(%rip), %rcx
@@ -622,7 +622,7 @@ ENTRY(trampoline_32bit_src)
 	movl	%eax, %cr4
 
 	/* Calculate address of paging_enabled() once we are executing in the trampoline */
-	leal	paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
 
 	/* Prepare the stack for far return to Long Mode */
 	pushl	$__KERNEL_CS
@@ -635,7 +635,7 @@ ENTRY(trampoline_32bit_src)
 	lret
 
 	.code64
-paging_enabled:
+.Lpaging_enabled:
 	/* Return from the trampoline */
 	jmp	*%rdi
 
@@ -647,7 +647,7 @@ paging_enabled:
 	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
 
 	.code32
-no_longmode:
+.Lno_longmode:
 	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
 1:
 	hlt
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 53ac0cb2396d..9652d5c2afda 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -345,6 +345,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 {
 	const unsigned long kernel_total_size = VO__end - VO__text;
 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+	unsigned long needed_size;
 
 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
 	boot_params = rmode;
@@ -379,26 +380,38 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
+	/*
+	 * The memory hole needed for the kernel is the larger of either
+	 * the entire decompressed kernel plus relocation table, or the
+	 * entire decompressed kernel plus .bss and .brk sections.
+	 *
+	 * On X86_64, the memory is mapped with PMD pages. Round the
+	 * size up so that the full extent of PMD pages mapped is
+	 * included in the check against the valid memory table
+	 * entries. This ensures the full mapped area is usable RAM
+	 * and doesn't include any reserved areas.
+	 */
+	needed_size = max(output_len, kernel_total_size);
+#ifdef CONFIG_X86_64
+	needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
+#endif
+
 	/* Report initial kernel position details. */
 	debug_putaddr(input_data);
 	debug_putaddr(input_len);
 	debug_putaddr(output);
 	debug_putaddr(output_len);
 	debug_putaddr(kernel_total_size);
+	debug_putaddr(needed_size);
 
 #ifdef CONFIG_X86_64
 	/* Report address of 32-bit trampoline */
 	debug_putaddr(trampoline_32bit);
 #endif
 
-	/*
-	 * The memory hole needed for the kernel is the larger of either
-	 * the entire decompressed kernel plus relocation table, or the
-	 * entire decompressed kernel plus .bss and .brk sections.
-	 */
 	choose_random_location((unsigned long)input_data, input_len,
 				(unsigned long *)&output,
-				max(output_len, kernel_total_size),
+				needed_size,
 				&virt_addr);
 
 	/* Validate memory location choices. */
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 996df3d586f0..e3add857c2c9 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -10,6 +10,7 @@
 /*
  * Main module for the real-mode kernel code
  */
+#include <linux/build_bug.h>
 
 #include "boot.h"
 #include "string.h"
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 45734e1cf967..759b1a927826 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -14,11 +14,9 @@ sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
 
 obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
 
-obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
-obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
 obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
@@ -38,14 +36,6 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
 obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
-obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
-obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
-
-obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
-obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
-
-obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
-obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
 
 obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
@@ -64,15 +54,11 @@ endif
 ifeq ($(avx2_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
-
-	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
 endif
 
-aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
 serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
-aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
 camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
@@ -82,11 +68,6 @@ chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
 aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
-aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
-aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
-
-morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
-morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
 
 nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
 
@@ -106,8 +87,6 @@ ifeq ($(avx2_supported),yes)
 	chacha-x86_64-y += chacha-avx2-x86_64.o
 	serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
 
-	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
-
 	nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
 endif
 
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
deleted file mode 100644
index 1461ef00c0e8..000000000000
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ /dev/null
@@ -1,823 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AES-NI + SSE2 implementation of AEGIS-128L
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define STATE5	%xmm5
-#define STATE6	%xmm6
-#define STATE7	%xmm7
-#define MSG0	%xmm8
-#define MSG1	%xmm9
-#define T0	%xmm10
-#define T1	%xmm11
-#define T2	%xmm12
-#define T3	%xmm13
-
-#define STATEP	%rdi
-#define LEN	%rsi
-#define SRC	%rdx
-#define DST	%rcx
-
-.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
-.align 16
-.Laegis128l_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Laegis128l_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
-.align 16
-.Laegis128l_counter0:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-.Laegis128l_counter1:
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-/*
- * __load_partial: internal ABI
- * input:
- *   LEN - bytes
- *   SRC - src
- * output:
- *   MSG0 - first message block
- *   MSG1 - second message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG0, MSG0
-	pxor MSG1, MSG1
-
-	mov LEN, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov LEN, %r8
-	and $0x1E, %r8
-	add SRC, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov LEN, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov LEN, %r8
-	and $0x1C, %r8
-	add SRC, %r8
-	shl $0x10, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov LEN, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov LEN, %r8
-	and $0x18, %r8
-	add SRC, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG0
-
-	mov LEN, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov LEN, %r8
-	and $0x10, %r8
-	add SRC, %r8
-	pslldq $8, MSG0
-	movq (%r8), T0
-	pxor T0, MSG0
-
-.Lld_partial_8:
-	mov LEN, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	movdqa MSG0, MSG1
-	movdqu (SRC), MSG0
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   LEN - bytes
- *   DST - dst
- * output:
- *   T0   - first message block
- *   T1   - second message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov LEN, %r8
-	mov DST, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0, (%r9)
-	movdqa T1, T0
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $0x10, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-.macro update
-	movdqa STATE7, T0
-	aesenc STATE0, STATE7
-	aesenc STATE1, STATE0
-	aesenc STATE2, STATE1
-	aesenc STATE3, STATE2
-	aesenc STATE4, STATE3
-	aesenc STATE5, STATE4
-	aesenc STATE6, STATE5
-	aesenc T0,     STATE6
-.endm
-
-.macro update0
-	update
-	pxor MSG0, STATE7
-	pxor MSG1, STATE3
-.endm
-
-.macro update1
-	update
-	pxor MSG0, STATE6
-	pxor MSG1, STATE2
-.endm
-
-.macro update2
-	update
-	pxor MSG0, STATE5
-	pxor MSG1, STATE1
-.endm
-
-.macro update3
-	update
-	pxor MSG0, STATE4
-	pxor MSG1, STATE0
-.endm
-
-.macro update4
-	update
-	pxor MSG0, STATE3
-	pxor MSG1, STATE7
-.endm
-
-.macro update5
-	update
-	pxor MSG0, STATE2
-	pxor MSG1, STATE6
-.endm
-
-.macro update6
-	update
-	pxor MSG0, STATE1
-	pxor MSG1, STATE5
-.endm
-
-.macro update7
-	update
-	pxor MSG0, STATE0
-	pxor MSG1, STATE4
-.endm
-
-.macro state_load
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-	movdqu 0x50(STATEP), STATE5
-	movdqu 0x60(STATEP), STATE6
-	movdqu 0x70(STATEP), STATE7
-.endm
-
-.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
-	movdqu \s7, 0x00(STATEP)
-	movdqu \s0, 0x10(STATEP)
-	movdqu \s1, 0x20(STATEP)
-	movdqu \s2, 0x30(STATEP)
-	movdqu \s3, 0x40(STATEP)
-	movdqu \s4, 0x50(STATEP)
-	movdqu \s5, 0x60(STATEP)
-	movdqu \s6, 0x70(STATEP)
-.endm
-
-.macro state_store0
-	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
-.endm
-
-.macro state_store1
-	state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
-.endm
-
-.macro state_store2
-	state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro state_store3
-	state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro state_store4
-	state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro state_store5
-	state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
-.endm
-
-.macro state_store6
-	state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
-.endm
-
-.macro state_store7
-	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
-.endm
-
-/*
- * void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_aegis128l_aesni_init)
-	FRAME_BEGIN
-
-	/* load key: */
-	movdqa (%rsi), MSG1
-	movdqa MSG1, STATE0
-	movdqa MSG1, STATE4
-	movdqa MSG1, STATE5
-	movdqa MSG1, STATE6
-	movdqa MSG1, STATE7
-
-	/* load IV: */
-	movdqu (%rdx), MSG0
-	pxor MSG0, STATE0
-	pxor MSG0, STATE4
-
-	/* load the constants: */
-	movdqa .Laegis128l_const_0, STATE2
-	movdqa .Laegis128l_const_1, STATE1
-	movdqa STATE1, STATE3
-	pxor STATE2, STATE5
-	pxor STATE1, STATE6
-	pxor STATE2, STATE7
-
-	/* update 10 times with IV and KEY: */
-	update0
-	update1
-	update2
-	update3
-	update4
-	update5
-	update6
-	update7
-	update0
-	update1
-
-	state_store1
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_init)
-
-.macro ad_block a i
-	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
-	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
-	update\i
-	sub $0x20, LEN
-	cmp $0x20, LEN
-	jl .Lad_out_\i
-.endm
-
-/*
- * void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
- *                                const void *data);
- */
-ENTRY(crypto_aegis128l_aesni_ad)
-	FRAME_BEGIN
-
-	cmp $0x20, LEN
-	jb .Lad_out
-
-	state_load
-
-	mov  SRC, %r8
-	and $0xf, %r8
-	jnz .Lad_u_loop
-
-.align 8
-.Lad_a_loop:
-	ad_block a 0
-	ad_block a 1
-	ad_block a 2
-	ad_block a 3
-	ad_block a 4
-	ad_block a 5
-	ad_block a 6
-	ad_block a 7
-
-	add $0x100, SRC
-	jmp .Lad_a_loop
-
-.align 8
-.Lad_u_loop:
-	ad_block u 0
-	ad_block u 1
-	ad_block u 2
-	ad_block u 3
-	ad_block u 4
-	ad_block u 5
-	ad_block u 6
-	ad_block u 7
-
-	add $0x100, SRC
-	jmp .Lad_u_loop
-
-.Lad_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lad_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lad_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lad_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lad_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lad_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lad_out_6:
-	state_store6
-	FRAME_END
-	ret
-
-.Lad_out_7:
-	state_store7
-	FRAME_END
-	ret
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_ad)
-
-.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
-	pxor \s1, \m0
-	pxor \s6, \m0
-	movdqa \s2, T3
-	pand \s3, T3
-	pxor T3, \m0
-
-	pxor \s2, \m1
-	pxor \s5, \m1
-	movdqa \s6, T3
-	pand \s7, T3
-	pxor T3, \m1
-.endm
-
-.macro crypt0 m0 m1
-	crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
-.endm
-
-.macro crypt1 m0 m1
-	crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
-.endm
-
-.macro crypt2 m0 m1
-	crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro crypt3 m0 m1
-	crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro crypt4 m0 m1
-	crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro crypt5 m0 m1
-	crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
-.endm
-
-.macro crypt6 m0 m1
-	crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
-.endm
-
-.macro crypt7 m0 m1
-	crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
-.endm
-
-.macro encrypt_block a i
-	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
-	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
-	movdqa MSG0, T0
-	movdqa MSG1, T1
-	crypt\i T0, T1
-	movdq\a T0, (\i * 0x20 + 0x00)(DST)
-	movdq\a T1, (\i * 0x20 + 0x10)(DST)
-
-	update\i
-
-	sub $0x20, LEN
-	cmp $0x20, LEN
-	jl .Lenc_out_\i
-.endm
-
-.macro decrypt_block a i
-	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
-	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
-	crypt\i MSG0, MSG1
-	movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
-	movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
-
-	update\i
-
-	sub $0x20, LEN
-	cmp $0x20, LEN
-	jl .Ldec_out_\i
-.endm
-
-/*
- * void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
- *                                 const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_enc)
-	FRAME_BEGIN
-
-	cmp $0x20, LEN
-	jb .Lenc_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xf, %r8
-	jnz .Lenc_u_loop
-
-.align 8
-.Lenc_a_loop:
-	encrypt_block a 0
-	encrypt_block a 1
-	encrypt_block a 2
-	encrypt_block a 3
-	encrypt_block a 4
-	encrypt_block a 5
-	encrypt_block a 6
-	encrypt_block a 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Lenc_a_loop
-
-.align 8
-.Lenc_u_loop:
-	encrypt_block u 0
-	encrypt_block u 1
-	encrypt_block u 2
-	encrypt_block u 3
-	encrypt_block u 4
-	encrypt_block u 5
-	encrypt_block u 6
-	encrypt_block u 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Lenc_u_loop
-
-.Lenc_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lenc_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lenc_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lenc_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lenc_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lenc_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lenc_out_6:
-	state_store6
-	FRAME_END
-	ret
-
-.Lenc_out_7:
-	state_store7
-	FRAME_END
-	ret
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_enc)
-
-/*
- * void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
- *                                      const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_enc_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG0, T0
-	movdqa MSG1, T1
-	crypt0 T0, T1
-
-	call __store_partial
-
-	update0
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_enc_tail)
-
-/*
- * void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
- *                                 const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_dec)
-	FRAME_BEGIN
-
-	cmp $0x20, LEN
-	jb .Ldec_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 8
-.Ldec_a_loop:
-	decrypt_block a 0
-	decrypt_block a 1
-	decrypt_block a 2
-	decrypt_block a 3
-	decrypt_block a 4
-	decrypt_block a 5
-	decrypt_block a 6
-	decrypt_block a 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Ldec_a_loop
-
-.align 8
-.Ldec_u_loop:
-	decrypt_block u 0
-	decrypt_block u 1
-	decrypt_block u 2
-	decrypt_block u 3
-	decrypt_block u 4
-	decrypt_block u 5
-	decrypt_block u 6
-	decrypt_block u 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Ldec_u_loop
-
-.Ldec_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Ldec_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Ldec_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Ldec_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Ldec_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Ldec_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Ldec_out_6:
-	state_store6
-	FRAME_END
-	ret
-
-.Ldec_out_7:
-	state_store7
-	FRAME_END
-	ret
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_dec)
-
-/*
- * void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
- *                                      const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_dec_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* decrypt message: */
-	call __load_partial
-
-	crypt0 MSG0, MSG1
-
-	movdqa MSG0, T0
-	movdqa MSG1, T1
-	call __store_partial
-
-	/* mask with byte count: */
-	movq LEN, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa T0, T1
-	movdqa .Laegis128l_counter0, T2
-	movdqa .Laegis128l_counter1, T3
-	pcmpgtb T2, T0
-	pcmpgtb T3, T1
-	pand T0, MSG0
-	pand T1, MSG1
-
-	update0
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_dec_tail)
-
-/*
- * void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
- *                                   u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_aegis128l_aesni_final)
-	FRAME_BEGIN
-
-	state_load
-
-	/* prepare length block: */
-	movq %rdx, MSG0
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG0
-	psllq $3, MSG0 /* multiply by 8 (to get bit count) */
-
-	pxor STATE2, MSG0
-	movdqa MSG0, MSG1
-
-	/* update state: */
-	update0
-	update1
-	update2
-	update3
-	update4
-	update5
-	update6
-
-	/* xor tag: */
-	movdqu (%rsi), T0
-
-	pxor STATE1, T0
-	pxor STATE2, T0
-	pxor STATE3, T0
-	pxor STATE4, T0
-	pxor STATE5, T0
-	pxor STATE6, T0
-	pxor STATE7, T0
-
-	movdqu T0, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_final)
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
deleted file mode 100644
index 19eb28b316f0..000000000000
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The AEGIS-128L Authenticated-Encryption Algorithm
- *   Glue for AES-NI + SSE2 implementation
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-#define AEGIS128L_BLOCK_ALIGN 16
-#define AEGIS128L_BLOCK_SIZE 32
-#define AEGIS128L_NONCE_SIZE 16
-#define AEGIS128L_STATE_BLOCKS 8
-#define AEGIS128L_KEY_SIZE 16
-#define AEGIS128L_MIN_AUTH_SIZE 8
-#define AEGIS128L_MAX_AUTH_SIZE 16
-
-asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
-
-asmlinkage void crypto_aegis128l_aesni_ad(
-		void *state, unsigned int length, const void *data);
-
-asmlinkage void crypto_aegis128l_aesni_enc(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_dec(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_enc_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_dec_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_final(
-		void *state, void *tag_xor, unsigned int cryptlen,
-		unsigned int assoclen);
-
-struct aegis_block {
-	u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
-};
-
-struct aegis_state {
-	struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
-};
-
-struct aegis_ctx {
-	struct aegis_block key;
-};
-
-struct aegis_crypt_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, unsigned int length, const void *src,
-			     void *dst);
-	void (*crypt_tail)(void *state, unsigned int length, const void *src,
-			   void *dst);
-};
-
-static void crypto_aegis128l_aesni_process_ad(
-		struct aegis_state *state, struct scatterlist *sg_src,
-		unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct aegis_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= AEGIS128L_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				crypto_aegis128l_aesni_ad(state,
-							  AEGIS128L_BLOCK_SIZE,
-							  buf.bytes);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_aegis128l_aesni_ad(state, left, src);
-
-			src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
-			left &= AEGIS128L_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-		pos += left;
-		assoclen -= size;
-
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
-		crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
-	}
-}
-
-static void crypto_aegis128l_aesni_process_crypt(
-		struct aegis_state *state, struct skcipher_walk *walk,
-		const struct aegis_crypt_ops *ops)
-{
-	while (walk->nbytes >= AEGIS128L_BLOCK_SIZE) {
-		ops->crypt_blocks(state, round_down(walk->nbytes,
-						    AEGIS128L_BLOCK_SIZE),
-				  walk->src.virt.addr, walk->dst.virt.addr);
-		skcipher_walk_done(walk, walk->nbytes % AEGIS128L_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
-				walk->dst.virt.addr);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
-{
-	u8 *ctx = crypto_aead_ctx(aead);
-	ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
-	return (void *)ctx;
-}
-
-static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
-					 const u8 *key, unsigned int keylen)
-{
-	struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead);
-
-	if (keylen != AEGIS128L_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
-
-	return 0;
-}
-
-static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
-					      unsigned int authsize)
-{
-	if (authsize > AEGIS128L_MAX_AUTH_SIZE)
-		return -EINVAL;
-	if (authsize < AEGIS128L_MIN_AUTH_SIZE)
-		return -EINVAL;
-	return 0;
-}
-
-static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
-					 struct aegis_block *tag_xor,
-					 unsigned int cryptlen,
-					 const struct aegis_crypt_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
-	struct skcipher_walk walk;
-	struct aegis_state state;
-
-	ops->skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
-	crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis128l_aesni_process_crypt(&state, &walk, ops);
-	crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
-{
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = crypto_aegis128l_aesni_enc,
-		.crypt_tail = crypto_aegis128l_aesni_enc_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
-{
-	static const struct aegis_block zeros = {};
-
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = crypto_aegis128l_aesni_dec,
-		.crypt_tail = crypto_aegis128l_aesni_dec_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
-{
-	return 0;
-}
-
-static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
-{
-}
-
-static struct aead_alg crypto_aegis128l_aesni_alg = {
-	.setkey = crypto_aegis128l_aesni_setkey,
-	.setauthsize = crypto_aegis128l_aesni_setauthsize,
-	.encrypt = crypto_aegis128l_aesni_encrypt,
-	.decrypt = crypto_aegis128l_aesni_decrypt,
-	.init = crypto_aegis128l_aesni_init_tfm,
-	.exit = crypto_aegis128l_aesni_exit_tfm,
-
-	.ivsize = AEGIS128L_NONCE_SIZE,
-	.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
-	.chunksize = AEGIS128L_BLOCK_SIZE,
-
-	.base = {
-		.cra_flags = CRYPTO_ALG_INTERNAL,
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct aegis_ctx) +
-			       __alignof__(struct aegis_ctx),
-		.cra_alignmask = 0,
-		.cra_priority = 400,
-
-		.cra_name = "__aegis128l",
-		.cra_driver_name = "__aegis128l-aesni",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_aegis128l_aesni_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !boot_cpu_has(X86_FEATURE_AES) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_aegis128l_aesni_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg);
-}
-
-module_init(crypto_aegis128l_aesni_module_init);
-module_exit(crypto_aegis128l_aesni_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
-MODULE_ALIAS_CRYPTO("aegis128l");
-MODULE_ALIAS_CRYPTO("aegis128l-aesni");
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
deleted file mode 100644
index 37d9b13dfd85..000000000000
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ /dev/null
@@ -1,700 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AES-NI + SSE2 implementation of AEGIS-128L
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define STATE5	%xmm5
-#define MSG	%xmm6
-#define T0	%xmm7
-#define T1	%xmm8
-#define T2	%xmm9
-#define T3	%xmm10
-
-#define STATEP	%rdi
-#define LEN	%rsi
-#define SRC	%rdx
-#define DST	%rcx
-
-.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
-.align 16
-.Laegis256_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Laegis256_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
-.align 16
-.Laegis256_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-
-.text
-
-/*
- * __load_partial: internal ABI
- * input:
- *   LEN - bytes
- *   SRC - src
- * output:
- *   MSG  - message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG, MSG
-
-	mov LEN, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov LEN, %r8
-	and $0x1E, %r8
-	add SRC, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov LEN, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov LEN, %r8
-	and $0x1C, %r8
-	add SRC, %r8
-	shl $0x10, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov LEN, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov LEN, %r8
-	and $0x18, %r8
-	add SRC, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG
-
-	mov LEN, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov LEN, %r8
-	and $0x10, %r8
-	add SRC, %r8
-	pslldq $8, MSG
-	movq (%r8), T0
-	pxor T0, MSG
-
-.Lld_partial_8:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   LEN - bytes
- *   DST - dst
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov LEN, %r8
-	mov DST, %r9
-
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $0x10, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-.macro update
-	movdqa STATE5, T0
-	aesenc STATE0, STATE5
-	aesenc STATE1, STATE0
-	aesenc STATE2, STATE1
-	aesenc STATE3, STATE2
-	aesenc STATE4, STATE3
-	aesenc T0,     STATE4
-.endm
-
-.macro update0 m
-	update
-	pxor \m, STATE5
-.endm
-
-.macro update1 m
-	update
-	pxor \m, STATE4
-.endm
-
-.macro update2 m
-	update
-	pxor \m, STATE3
-.endm
-
-.macro update3 m
-	update
-	pxor \m, STATE2
-.endm
-
-.macro update4 m
-	update
-	pxor \m, STATE1
-.endm
-
-.macro update5 m
-	update
-	pxor \m, STATE0
-.endm
-
-.macro state_load
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-	movdqu 0x50(STATEP), STATE5
-.endm
-
-.macro state_store s0 s1 s2 s3 s4 s5
-	movdqu \s5, 0x00(STATEP)
-	movdqu \s0, 0x10(STATEP)
-	movdqu \s1, 0x20(STATEP)
-	movdqu \s2, 0x30(STATEP)
-	movdqu \s3, 0x40(STATEP)
-	movdqu \s4, 0x50(STATEP)
-.endm
-
-.macro state_store0
-	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro state_store1
-	state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro state_store2
-	state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro state_store3
-	state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
-.endm
-
-.macro state_store4
-	state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
-.endm
-
-.macro state_store5
-	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
-.endm
-
-/*
- * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_aegis256_aesni_init)
-	FRAME_BEGIN
-
-	/* load key: */
-	movdqa 0x00(%rsi), MSG
-	movdqa 0x10(%rsi), T1
-	movdqa MSG, STATE4
-	movdqa T1, STATE5
-
-	/* load IV: */
-	movdqu 0x00(%rdx), T2
-	movdqu 0x10(%rdx), T3
-	pxor MSG, T2
-	pxor T1, T3
-	movdqa T2, STATE0
-	movdqa T3, STATE1
-
-	/* load the constants: */
-	movdqa .Laegis256_const_0, STATE3
-	movdqa .Laegis256_const_1, STATE2
-	pxor STATE3, STATE4
-	pxor STATE2, STATE5
-
-	/* update 10 times with IV and KEY: */
-	update0 MSG
-	update1 T1
-	update2 T2
-	update3 T3
-	update4 MSG
-	update5 T1
-	update0 T2
-	update1 T3
-	update2 MSG
-	update3 T1
-	update4 T2
-	update5 T3
-	update0 MSG
-	update1 T1
-	update2 T2
-	update3 T3
-
-	state_store3
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_init)
-
-.macro ad_block a i
-	movdq\a (\i * 0x10)(SRC), MSG
-	update\i MSG
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_\i
-.endm
-
-/*
- * void crypto_aegis256_aesni_ad(void *state, unsigned int length,
- *                               const void *data);
- */
-ENTRY(crypto_aegis256_aesni_ad)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Lad_out
-
-	state_load
-
-	mov  SRC, %r8
-	and $0xf, %r8
-	jnz .Lad_u_loop
-
-.align 8
-.Lad_a_loop:
-	ad_block a 0
-	ad_block a 1
-	ad_block a 2
-	ad_block a 3
-	ad_block a 4
-	ad_block a 5
-
-	add $0x60, SRC
-	jmp .Lad_a_loop
-
-.align 8
-.Lad_u_loop:
-	ad_block u 0
-	ad_block u 1
-	ad_block u 2
-	ad_block u 3
-	ad_block u 4
-	ad_block u 5
-
-	add $0x60, SRC
-	jmp .Lad_u_loop
-
-.Lad_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lad_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lad_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lad_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lad_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lad_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_ad)
-
-.macro crypt m s0 s1 s2 s3 s4 s5
-	pxor \s1, \m
-	pxor \s4, \m
-	pxor \s5, \m
-	movdqa \s2, T3
-	pand \s3, T3
-	pxor T3, \m
-.endm
-
-.macro crypt0 m
-	crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro crypt1 m
-	crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro crypt2 m
-	crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro crypt3 m
-	crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
-.endm
-
-.macro crypt4 m
-	crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
-.endm
-
-.macro crypt5 m
-	crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
-.endm
-
-.macro encrypt_block a i
-	movdq\a (\i * 0x10)(SRC), MSG
-	movdqa MSG, T0
-	crypt\i T0
-	movdq\a T0, (\i * 0x10)(DST)
-
-	update\i MSG
-
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lenc_out_\i
-.endm
-
-.macro decrypt_block a i
-	movdq\a (\i * 0x10)(SRC), MSG
-	crypt\i MSG
-	movdq\a MSG, (\i * 0x10)(DST)
-
-	update\i MSG
-
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Ldec_out_\i
-.endm
-
-/*
- * void crypto_aegis256_aesni_enc(void *state, unsigned int length,
- *                                const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_enc)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Lenc_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xf, %r8
-	jnz .Lenc_u_loop
-
-.align 8
-.Lenc_a_loop:
-	encrypt_block a 0
-	encrypt_block a 1
-	encrypt_block a 2
-	encrypt_block a 3
-	encrypt_block a 4
-	encrypt_block a 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Lenc_a_loop
-
-.align 8
-.Lenc_u_loop:
-	encrypt_block u 0
-	encrypt_block u 1
-	encrypt_block u 2
-	encrypt_block u 3
-	encrypt_block u 4
-	encrypt_block u 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Lenc_u_loop
-
-.Lenc_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lenc_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lenc_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lenc_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lenc_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lenc_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_enc)
-
-/*
- * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
- *                                     const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_enc_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG, T0
-	crypt0 T0
-
-	call __store_partial
-
-	update0 MSG
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_enc_tail)
-
-/*
- * void crypto_aegis256_aesni_dec(void *state, unsigned int length,
- *                                const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_dec)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Ldec_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 8
-.Ldec_a_loop:
-	decrypt_block a 0
-	decrypt_block a 1
-	decrypt_block a 2
-	decrypt_block a 3
-	decrypt_block a 4
-	decrypt_block a 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Ldec_a_loop
-
-.align 8
-.Ldec_u_loop:
-	decrypt_block u 0
-	decrypt_block u 1
-	decrypt_block u 2
-	decrypt_block u 3
-	decrypt_block u 4
-	decrypt_block u 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Ldec_u_loop
-
-.Ldec_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Ldec_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Ldec_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Ldec_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Ldec_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Ldec_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_dec)
-
-/*
- * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
- *                                     const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_dec_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* decrypt message: */
-	call __load_partial
-
-	crypt0 MSG
-
-	movdqa MSG, T0
-	call __store_partial
-
-	/* mask with byte count: */
-	movq LEN, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa .Laegis256_counter, T1
-	pcmpgtb T1, T0
-	pand T0, MSG
-
-	update0 MSG
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_dec_tail)
-
-/*
- * void crypto_aegis256_aesni_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_aegis256_aesni_final)
-	FRAME_BEGIN
-
-	state_load
-
-	/* prepare length block: */
-	movq %rdx, MSG
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG
-	psllq $3, MSG /* multiply by 8 (to get bit count) */
-
-	pxor STATE3, MSG
-
-	/* update state: */
-	update0 MSG
-	update1 MSG
-	update2 MSG
-	update3 MSG
-	update4 MSG
-	update5 MSG
-	update0 MSG
-
-	/* xor tag: */
-	movdqu (%rsi), MSG
-
-	pxor STATE0, MSG
-	pxor STATE1, MSG
-	pxor STATE2, MSG
-	pxor STATE3, MSG
-	pxor STATE4, MSG
-	pxor STATE5, MSG
-
-	movdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_final)
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
deleted file mode 100644
index f84da27171d3..000000000000
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The AEGIS-256 Authenticated-Encryption Algorithm
- *   Glue for AES-NI + SSE2 implementation
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-#define AEGIS256_BLOCK_ALIGN 16
-#define AEGIS256_BLOCK_SIZE 16
-#define AEGIS256_NONCE_SIZE 32
-#define AEGIS256_STATE_BLOCKS 6
-#define AEGIS256_KEY_SIZE 32
-#define AEGIS256_MIN_AUTH_SIZE 8
-#define AEGIS256_MAX_AUTH_SIZE 16
-
-asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
-
-asmlinkage void crypto_aegis256_aesni_ad(
-		void *state, unsigned int length, const void *data);
-
-asmlinkage void crypto_aegis256_aesni_enc(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_dec(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_enc_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_dec_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_final(
-		void *state, void *tag_xor, unsigned int cryptlen,
-		unsigned int assoclen);
-
-struct aegis_block {
-	u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
-};
-
-struct aegis_state {
-	struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
-};
-
-struct aegis_ctx {
-	struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
-};
-
-struct aegis_crypt_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, unsigned int length, const void *src,
-			     void *dst);
-	void (*crypt_tail)(void *state, unsigned int length, const void *src,
-			   void *dst);
-};
-
-static void crypto_aegis256_aesni_process_ad(
-		struct aegis_state *state, struct scatterlist *sg_src,
-		unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct aegis_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= AEGIS256_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				crypto_aegis256_aesni_ad(state,
-							 AEGIS256_BLOCK_SIZE,
-							 buf.bytes);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_aegis256_aesni_ad(state, left, src);
-
-			src += left & ~(AEGIS256_BLOCK_SIZE - 1);
-			left &= AEGIS256_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-		pos += left;
-		assoclen -= size;
-
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
-		crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
-	}
-}
-
-static void crypto_aegis256_aesni_process_crypt(
-		struct aegis_state *state, struct skcipher_walk *walk,
-		const struct aegis_crypt_ops *ops)
-{
-	while (walk->nbytes >= AEGIS256_BLOCK_SIZE) {
-		ops->crypt_blocks(state,
-				  round_down(walk->nbytes, AEGIS256_BLOCK_SIZE),
-				  walk->src.virt.addr, walk->dst.virt.addr);
-		skcipher_walk_done(walk, walk->nbytes % AEGIS256_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
-				walk->dst.virt.addr);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
-{
-	u8 *ctx = crypto_aead_ctx(aead);
-	ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
-	return (void *)ctx;
-}
-
-static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
-					unsigned int keylen)
-{
-	struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
-
-	if (keylen != AEGIS256_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key, key, AEGIS256_KEY_SIZE);
-
-	return 0;
-}
-
-static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
-						unsigned int authsize)
-{
-	if (authsize > AEGIS256_MAX_AUTH_SIZE)
-		return -EINVAL;
-	if (authsize < AEGIS256_MIN_AUTH_SIZE)
-		return -EINVAL;
-	return 0;
-}
-
-static void crypto_aegis256_aesni_crypt(struct aead_request *req,
-					struct aegis_block *tag_xor,
-					unsigned int cryptlen,
-					const struct aegis_crypt_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
-	struct skcipher_walk walk;
-	struct aegis_state state;
-
-	ops->skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
-	crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis256_aesni_process_crypt(&state, &walk, ops);
-	crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
-{
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = crypto_aegis256_aesni_enc,
-		.crypt_tail = crypto_aegis256_aesni_enc_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
-{
-	static const struct aegis_block zeros = {};
-
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = crypto_aegis256_aesni_dec,
-		.crypt_tail = crypto_aegis256_aesni_dec_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
-{
-	return 0;
-}
-
-static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
-{
-}
-
-static struct aead_alg crypto_aegis256_aesni_alg = {
-	.setkey = crypto_aegis256_aesni_setkey,
-	.setauthsize = crypto_aegis256_aesni_setauthsize,
-	.encrypt = crypto_aegis256_aesni_encrypt,
-	.decrypt = crypto_aegis256_aesni_decrypt,
-	.init = crypto_aegis256_aesni_init_tfm,
-	.exit = crypto_aegis256_aesni_exit_tfm,
-
-	.ivsize = AEGIS256_NONCE_SIZE,
-	.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
-	.chunksize = AEGIS256_BLOCK_SIZE,
-
-	.base = {
-		.cra_flags = CRYPTO_ALG_INTERNAL,
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct aegis_ctx) +
-			       __alignof__(struct aegis_ctx),
-		.cra_alignmask = 0,
-		.cra_priority = 400,
-
-		.cra_name = "__aegis256",
-		.cra_driver_name = "__aegis256-aesni",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_aegis256_aesni_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !boot_cpu_has(X86_FEATURE_AES) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_aegis256_aesni_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg);
-}
-
-module_init(crypto_aegis256_aesni_module_init);
-module_exit(crypto_aegis256_aesni_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
-MODULE_ALIAS_CRYPTO("aegis256");
-MODULE_ALIAS_CRYPTO("aegis256-aesni");
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
deleted file mode 100644
index 2849dbc59e11..000000000000
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ /dev/null
@@ -1,362 +0,0 @@
-// -------------------------------------------------------------------------
-// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
-// All rights reserved.
-//
-// LICENSE TERMS
-//
-// The free distribution and use of this software in both source and binary 
-// form is allowed (with or without changes) provided that:
-//
-//   1. distributions of this source code include the above copyright 
-//      notice, this list of conditions and the following disclaimer//
-//
-//   2. distributions in binary form include the above copyright
-//      notice, this list of conditions and the following disclaimer
-//      in the documentation and/or other associated materials//
-//
-//   3. the copyright holder's name is not used to endorse products 
-//      built using this software without specific written permission.
-//
-//
-// ALTERNATIVELY, provided that this notice is retained in full, this product
-// may be distributed under the terms of the GNU General Public License (GPL),
-// in which case the provisions of the GPL apply INSTEAD OF those given above.
-//
-// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
-// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
-
-// DISCLAIMER
-//
-// This software is provided 'as is' with no explicit or implied warranties
-// in respect of its properties including, but not limited to, correctness 
-// and fitness for purpose.
-// -------------------------------------------------------------------------
-// Issue Date: 29/07/2002
-
-.file "aes-i586-asm.S"
-.text
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
-
-/* offsets to parameters with one register pushed onto stack */
-#define ctx 8
-#define out_blk 12
-#define in_blk 16
-
-/* offsets in crypto_aes_ctx structure */
-#define klen (480)
-#define ekey (0)
-#define dkey (240)
-
-// register mapping for encrypt and decrypt subroutines
-
-#define r0  eax
-#define r1  ebx
-#define r2  ecx
-#define r3  edx
-#define r4  esi
-#define r5  edi
-
-#define eaxl  al
-#define eaxh  ah
-#define ebxl  bl
-#define ebxh  bh
-#define ecxl  cl
-#define ecxh  ch
-#define edxl  dl
-#define edxh  dh
-
-#define _h(reg) reg##h
-#define h(reg) _h(reg)
-
-#define _l(reg) reg##l
-#define l(reg) _l(reg)
-
-// This macro takes a 32-bit word representing a column and uses
-// each of its four bytes to index into four tables of 256 32-bit
-// words to obtain values that are then xored into the appropriate
-// output registers r0, r1, r4 or r5.  
-
-// Parameters:
-// table table base address
-//   %1  out_state[0]
-//   %2  out_state[1]
-//   %3  out_state[2]
-//   %4  out_state[3]
-//   idx input register for the round (destroyed)
-//   tmp scratch register for the round
-// sched key schedule
-
-#define do_col(table, a1,a2,a3,a4, idx, tmp)	\
-	movzx   %l(idx),%tmp;			\
-	xor     table(,%tmp,4),%a1;		\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+2*tlen(,%tmp,4),%a3;	\
-	xor     table+3*tlen(,%idx,4),%a4;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
-	mov     0 sched,%a1;			\
-	movzx   %l(idx),%tmp;			\
-	mov     12 sched,%a2;			\
-	xor     table(,%tmp,4),%a1;		\
-	mov     4 sched,%a4;			\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+3*tlen(,%idx,4),%a4;	\
-	mov     %a3,%idx;			\
-	mov     8 sched,%a3;			\
-	xor     table+2*tlen(,%tmp,4),%a3;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
-	mov     0 sched,%a1;			\
-	movzx   %l(idx),%tmp;			\
-	mov     4 sched,%a2;			\
-	xor     table(,%tmp,4),%a1;		\
-	mov     12 sched,%a4;			\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+3*tlen(,%idx,4),%a4;	\
-	mov     %a3,%idx;			\
-	mov     8 sched,%a3;			\
-	xor     table+2*tlen(,%tmp,4),%a3;
-
-
-// original Gladman had conditional saves to MMX regs.
-#define save(a1, a2)		\
-	mov     %a2,4*a1(%esp)
-
-#define restore(a1, a2)		\
-	mov     4*a2(%esp),%a1
-
-// These macros perform a forward encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage.
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define fwd_rnd1(arg, table)						\
-	save   (0,r1);							\
-	save   (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_fcol(table, r2,r5,r4,r1, r0,r3, arg);	/* idx=r0 */	\
-	do_col (table, r4,r1,r2,r5, r0,r3);		/* idx=r4 */	\
-	restore(r0,0);							\
-	do_col (table, r1,r2,r5,r4, r0,r3);		/* idx=r1 */	\
-	restore(r0,1);							\
-	do_col (table, r5,r4,r1,r2, r0,r3);		/* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define fwd_rnd2(arg, table)						\
-	save   (0,r1);							\
-	save   (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_fcol(table, r0,r5,r4,r1, r2,r3, arg);	/* idx=r2 */	\
-	do_col (table, r4,r1,r0,r5, r2,r3);		/* idx=r4 */	\
-	restore(r2,0);							\
-	do_col (table, r1,r0,r5,r4, r2,r3);		/* idx=r1 */	\
-	restore(r2,1);							\
-	do_col (table, r5,r4,r1,r0, r2,r3);		/* idx=r5 */
-
-// These macros performs an inverse encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define inv_rnd1(arg, table)						\
-	save    (0,r1);							\
-	save    (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_icol(table, r2,r1,r4,r5, r0,r3, arg);	/* idx=r0 */	\
-	do_col (table, r4,r5,r2,r1, r0,r3);		/* idx=r4 */	\
-	restore(r0,0);							\
-	do_col (table, r1,r4,r5,r2, r0,r3);		/* idx=r1 */	\
-	restore(r0,1);							\
-	do_col (table, r5,r2,r1,r4, r0,r3);		/* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define inv_rnd2(arg, table)						\
-	save    (0,r1);							\
-	save    (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_icol(table, r0,r1,r4,r5, r2,r3, arg);	/* idx=r2 */	\
-	do_col (table, r4,r5,r0,r1, r2,r3);		/* idx=r4 */	\
-	restore(r2,0);							\
-	do_col (table, r1,r4,r5,r0, r2,r3);		/* idx=r1 */	\
-	restore(r2,1);							\
-	do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */
-
-// AES (Rijndael) Encryption Subroutine
-/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
-
-.extern  crypto_ft_tab
-.extern  crypto_fl_tab
-
-ENTRY(aes_enc_blk)
-	push    %ebp
-	mov     ctx(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:	push    %ebx
-	mov     in_blk+4(%esp),%r2
-	push    %esi
-	mov     klen(%ebp),%r3   // key size
-	push    %edi
-#if ekey != 0
-	lea     ekey(%ebp),%ebp  // key pointer
-#endif
-
-// input four columns and xor in first round key
-
-	mov     (%r2),%r0
-	mov     4(%r2),%r1
-	mov     8(%r2),%r4
-	mov     12(%r2),%r5
-	xor     (%ebp),%r0
-	xor     4(%ebp),%r1
-	xor     8(%ebp),%r4
-	xor     12(%ebp),%r5
-
-	sub     $8,%esp		// space for register saves on stack
-	add     $16,%ebp	// increment to next round key
-	cmp     $24,%r3
-	jb      4f		// 10 rounds for 128-bit key
-	lea     32(%ebp),%ebp
-	je      3f		// 12 rounds for 192-bit key
-	lea     32(%ebp),%ebp
-
-2:	fwd_rnd1( -64(%ebp), crypto_ft_tab)	// 14 rounds for 256-bit key
-	fwd_rnd2( -48(%ebp), crypto_ft_tab)
-3:	fwd_rnd1( -32(%ebp), crypto_ft_tab)	// 12 rounds for 192-bit key
-	fwd_rnd2( -16(%ebp), crypto_ft_tab)
-4:	fwd_rnd1(    (%ebp), crypto_ft_tab)	// 10 rounds for 128-bit key
-	fwd_rnd2( +16(%ebp), crypto_ft_tab)
-	fwd_rnd1( +32(%ebp), crypto_ft_tab)
-	fwd_rnd2( +48(%ebp), crypto_ft_tab)
-	fwd_rnd1( +64(%ebp), crypto_ft_tab)
-	fwd_rnd2( +80(%ebp), crypto_ft_tab)
-	fwd_rnd1( +96(%ebp), crypto_ft_tab)
-	fwd_rnd2(+112(%ebp), crypto_ft_tab)
-	fwd_rnd1(+128(%ebp), crypto_ft_tab)
-	fwd_rnd2(+144(%ebp), crypto_fl_tab)	// last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-	add     $8,%esp
-	mov     out_blk+12(%esp),%ebp
-	mov     %r5,12(%ebp)
-	pop     %edi
-	mov     %r4,8(%ebp)
-	pop     %esi
-	mov     %r1,4(%ebp)
-	pop     %ebx
-	mov     %r0,(%ebp)
-	pop     %ebp
-	ret
-ENDPROC(aes_enc_blk)
-
-// AES (Rijndael) Decryption Subroutine
-/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
-
-.extern  crypto_it_tab
-.extern  crypto_il_tab
-
-ENTRY(aes_dec_blk)
-	push    %ebp
-	mov     ctx(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:	push    %ebx
-	mov     in_blk+4(%esp),%r2
-	push    %esi
-	mov     klen(%ebp),%r3   // key size
-	push    %edi
-#if dkey != 0
-	lea     dkey(%ebp),%ebp  // key pointer
-#endif
-	
-// input four columns and xor in first round key
-
-	mov     (%r2),%r0
-	mov     4(%r2),%r1
-	mov     8(%r2),%r4
-	mov     12(%r2),%r5
-	xor     (%ebp),%r0
-	xor     4(%ebp),%r1
-	xor     8(%ebp),%r4
-	xor     12(%ebp),%r5
-
-	sub     $8,%esp		// space for register saves on stack
-	add     $16,%ebp	// increment to next round key
-	cmp     $24,%r3
-	jb      4f		// 10 rounds for 128-bit key
-	lea     32(%ebp),%ebp
-	je      3f		// 12 rounds for 192-bit key
-	lea     32(%ebp),%ebp
-
-2:	inv_rnd1( -64(%ebp), crypto_it_tab)	// 14 rounds for 256-bit key
-	inv_rnd2( -48(%ebp), crypto_it_tab)
-3:	inv_rnd1( -32(%ebp), crypto_it_tab)	// 12 rounds for 192-bit key
-	inv_rnd2( -16(%ebp), crypto_it_tab)
-4:	inv_rnd1(    (%ebp), crypto_it_tab)	// 10 rounds for 128-bit key
-	inv_rnd2( +16(%ebp), crypto_it_tab)
-	inv_rnd1( +32(%ebp), crypto_it_tab)
-	inv_rnd2( +48(%ebp), crypto_it_tab)
-	inv_rnd1( +64(%ebp), crypto_it_tab)
-	inv_rnd2( +80(%ebp), crypto_it_tab)
-	inv_rnd1( +96(%ebp), crypto_it_tab)
-	inv_rnd2(+112(%ebp), crypto_it_tab)
-	inv_rnd1(+128(%ebp), crypto_it_tab)
-	inv_rnd2(+144(%ebp), crypto_il_tab)	// last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-	add     $8,%esp
-	mov     out_blk+12(%esp),%ebp
-	mov     %r5,12(%ebp)
-	pop     %edi
-	mov     %r4,8(%ebp)
-	pop     %esi
-	mov     %r1,4(%ebp)
-	pop     %ebx
-	mov     %r0,(%ebp)
-	pop     %ebp
-	ret
-ENDPROC(aes_dec_blk)
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
deleted file mode 100644
index 8739cf7795de..000000000000
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ /dev/null
@@ -1,185 +0,0 @@
-/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
- *
- * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
- *
- * License:
- * This code can be distributed under the terms of the GNU General Public
- * License (GPL) Version 2 provided that the above header down to and
- * including this sentence is retained in full.
- */
-
-.extern crypto_ft_tab
-.extern crypto_it_tab
-.extern crypto_fl_tab
-.extern crypto_il_tab
-
-.text
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-#define R1	%rax
-#define R1E	%eax
-#define R1X	%ax
-#define R1H	%ah
-#define R1L	%al
-#define R2	%rbx
-#define R2E	%ebx
-#define R2X	%bx
-#define R2H	%bh
-#define R2L	%bl
-#define R3	%rcx
-#define R3E	%ecx
-#define R3X	%cx
-#define R3H	%ch
-#define R3L	%cl
-#define R4	%rdx
-#define R4E	%edx
-#define R4X	%dx
-#define R4H	%dh
-#define R4L	%dl
-#define R5	%rsi
-#define R5E	%esi
-#define R6	%rdi
-#define R6E	%edi
-#define R7	%r9	/* don't use %rbp; it breaks stack traces */
-#define R7E	%r9d
-#define R8	%r8
-#define R10	%r10
-#define R11	%r11
-
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
-	ENTRY(FUNC);			\
-	movq	r1,r2;			\
-	leaq	KEY+48(r8),r9;		\
-	movq	r10,r11;		\
-	movl	(r7),r5 ## E;		\
-	movl	4(r7),r1 ## E;		\
-	movl	8(r7),r6 ## E;		\
-	movl	12(r7),r7 ## E;		\
-	movl	480(r8),r10 ## E;	\
-	xorl	-48(r9),r5 ## E;	\
-	xorl	-44(r9),r1 ## E;	\
-	xorl	-40(r9),r6 ## E;	\
-	xorl	-36(r9),r7 ## E;	\
-	cmpl	$24,r10 ## E;		\
-	jb	B128;			\
-	leaq	32(r9),r9;		\
-	je	B192;			\
-	leaq	32(r9),r9;
-
-#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
-	movq	r1,r2;			\
-	movl	r5 ## E,(r9);		\
-	movl	r6 ## E,4(r9);		\
-	movl	r7 ## E,8(r9);		\
-	movl	r8 ## E,12(r9);		\
-	ret;				\
-	ENDPROC(FUNC);
-
-#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
-	movzbl	r2 ## H,r5 ## E;	\
-	movzbl	r2 ## L,r6 ## E;	\
-	movl	TAB+1024(,r5,4),r5 ## E;\
-	movw	r4 ## X,r2 ## X;	\
-	movl	TAB(,r6,4),r6 ## E;	\
-	roll	$16,r2 ## E;		\
-	shrl	$16,r4 ## E;		\
-	movzbl	r4 ## L,r7 ## E;	\
-	movzbl	r4 ## H,r4 ## E;	\
-	xorl	OFFSET(r8),ra ## E;	\
-	xorl	OFFSET+4(r8),rb ## E;	\
-	xorl	TAB+3072(,r4,4),r5 ## E;\
-	xorl	TAB+2048(,r7,4),r6 ## E;\
-	movzbl	r1 ## L,r7 ## E;	\
-	movzbl	r1 ## H,r4 ## E;	\
-	movl	TAB+1024(,r4,4),r4 ## E;\
-	movw	r3 ## X,r1 ## X;	\
-	roll	$16,r1 ## E;		\
-	shrl	$16,r3 ## E;		\
-	xorl	TAB(,r7,4),r5 ## E;	\
-	movzbl	r3 ## L,r7 ## E;	\
-	movzbl	r3 ## H,r3 ## E;	\
-	xorl	TAB+3072(,r3,4),r4 ## E;\
-	xorl	TAB+2048(,r7,4),r5 ## E;\
-	movzbl	r1 ## L,r7 ## E;	\
-	movzbl	r1 ## H,r3 ## E;	\
-	shrl	$16,r1 ## E;		\
-	xorl	TAB+3072(,r3,4),r6 ## E;\
-	movl	TAB+2048(,r7,4),r3 ## E;\
-	movzbl	r1 ## L,r7 ## E;	\
-	movzbl	r1 ## H,r1 ## E;	\
-	xorl	TAB+1024(,r1,4),r6 ## E;\
-	xorl	TAB(,r7,4),r3 ## E;	\
-	movzbl	r2 ## H,r1 ## E;	\
-	movzbl	r2 ## L,r7 ## E;	\
-	shrl	$16,r2 ## E;		\
-	xorl	TAB+3072(,r1,4),r3 ## E;\
-	xorl	TAB+2048(,r7,4),r4 ## E;\
-	movzbl	r2 ## H,r1 ## E;	\
-	movzbl	r2 ## L,r2 ## E;	\
-	xorl	OFFSET+8(r8),rc ## E;	\
-	xorl	OFFSET+12(r8),rd ## E;	\
-	xorl	TAB+1024(,r1,4),r3 ## E;\
-	xorl	TAB(,r2,4),r4 ## E;
-
-#define move_regs(r1,r2,r3,r4) \
-	movl	r3 ## E,r1 ## E;	\
-	movl	r4 ## E,r2 ## E;
-
-#define entry(FUNC,KEY,B128,B192) \
-	prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
-
-#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
-
-#define encrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
-
-#define encrypt_final(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
-
-#define decrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
-
-#define decrypt_final(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
-
-/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
-	entry(aes_enc_blk,0,.Le128,.Le192)
-	encrypt_round(crypto_ft_tab,-96)
-	encrypt_round(crypto_ft_tab,-80)
-.Le192:	encrypt_round(crypto_ft_tab,-64)
-	encrypt_round(crypto_ft_tab,-48)
-.Le128:	encrypt_round(crypto_ft_tab,-32)
-	encrypt_round(crypto_ft_tab,-16)
-	encrypt_round(crypto_ft_tab,  0)
-	encrypt_round(crypto_ft_tab, 16)
-	encrypt_round(crypto_ft_tab, 32)
-	encrypt_round(crypto_ft_tab, 48)
-	encrypt_round(crypto_ft_tab, 64)
-	encrypt_round(crypto_ft_tab, 80)
-	encrypt_round(crypto_ft_tab, 96)
-	encrypt_final(crypto_fl_tab,112)
-	return(aes_enc_blk)
-
-/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
-	entry(aes_dec_blk,240,.Ld128,.Ld192)
-	decrypt_round(crypto_it_tab,-96)
-	decrypt_round(crypto_it_tab,-80)
-.Ld192:	decrypt_round(crypto_it_tab,-64)
-	decrypt_round(crypto_it_tab,-48)
-.Ld128:	decrypt_round(crypto_it_tab,-32)
-	decrypt_round(crypto_it_tab,-16)
-	decrypt_round(crypto_it_tab,  0)
-	decrypt_round(crypto_it_tab, 16)
-	decrypt_round(crypto_it_tab, 32)
-	decrypt_round(crypto_it_tab, 48)
-	decrypt_round(crypto_it_tab, 64)
-	decrypt_round(crypto_it_tab, 80)
-	decrypt_round(crypto_it_tab, 96)
-	decrypt_final(crypto_il_tab,112)
-	return(aes_dec_blk)
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 9e9d819e8bc3..7b7dc05fa1a4 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -1,71 +1 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * Glue Code for the asm optimized version of the AES Cipher Algorithm
- *
- */
-
-#include <linux/module.h>
-#include <crypto/aes.h>
-#include <asm/crypto/aes.h>
-
-asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(ctx, dst, src);
-}
-EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
-
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(ctx, dst, src);
-}
-EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-	.cra_name		= "aes",
-	.cra_driver_name	= "aes-asm",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_module		= THIS_MODULE,
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= AES_MIN_KEY_SIZE,
-			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey		= crypto_aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
-		}
-	}
-};
-
-static int __init aes_init(void)
-{
-	return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-	crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("aes");
-MODULE_ALIAS_CRYPTO("aes-asm");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 73c0ccb009a0..3e707e81afdb 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -26,7 +26,6 @@
 #include <crypto/gcm.h>
 #include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
-#include <asm/crypto/aes.h>
 #include <asm/simd.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
@@ -329,7 +328,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
 	}
 
 	if (!crypto_simd_usable())
-		err = crypto_aes_expand_key(ctx, in_key, key_len);
+		err = aes_expandkey(ctx, in_key, key_len);
 	else {
 		kernel_fpu_begin();
 		err = aesni_set_key(ctx, in_key, key_len);
@@ -345,26 +344,26 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
 }
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (!crypto_simd_usable())
-		crypto_aes_encrypt_x86(ctx, dst, src);
-	else {
+	if (!crypto_simd_usable()) {
+		aes_encrypt(ctx, dst, src);
+	} else {
 		kernel_fpu_begin();
 		aesni_enc(ctx, dst, src);
 		kernel_fpu_end();
 	}
 }
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (!crypto_simd_usable())
-		crypto_aes_decrypt_x86(ctx, dst, src);
-	else {
+	if (!crypto_simd_usable()) {
+		aes_decrypt(ctx, dst, src);
+	} else {
 		kernel_fpu_begin();
 		aesni_dec(ctx, dst, src);
 		kernel_fpu_end();
@@ -610,7 +609,8 @@ static int xts_encrypt(struct skcipher_request *req)
 	return glue_xts_req_128bit(&aesni_enc_xts, req,
 				   XTS_TWEAK_CAST(aesni_xts_tweak),
 				   aes_ctx(ctx->raw_tweak_ctx),
-				   aes_ctx(ctx->raw_crypt_ctx));
+				   aes_ctx(ctx->raw_crypt_ctx),
+				   false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -621,32 +621,28 @@ static int xts_decrypt(struct skcipher_request *req)
 	return glue_xts_req_128bit(&aesni_dec_xts, req,
 				   XTS_TWEAK_CAST(aesni_xts_tweak),
 				   aes_ctx(ctx->raw_tweak_ctx),
-				   aes_ctx(ctx->raw_crypt_ctx));
+				   aes_ctx(ctx->raw_crypt_ctx),
+				   true);
 }
 
 static int
 rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
 {
-	struct crypto_cipher *tfm;
+	struct crypto_aes_ctx ctx;
 	int ret;
 
-	tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	ret = crypto_cipher_setkey(tfm, key, key_len);
+	ret = aes_expandkey(&ctx, key, key_len);
 	if (ret)
-		goto out_free_cipher;
+		return ret;
 
 	/* Clear the data in the hash sub key container to zero.*/
 	/* We want to cipher all zeros to create the hash sub key. */
 	memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
 
-	crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey);
+	aes_encrypt(&ctx, hash_subkey, hash_subkey);
 
-out_free_cipher:
-	crypto_free_cipher(tfm);
-	return ret;
+	memzero_explicit(&ctx, sizeof(ctx));
+	return 0;
 }
 
 static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
@@ -919,8 +915,8 @@ static struct crypto_alg aesni_cipher_alg = {
 			.cia_min_keysize	= AES_MIN_KEY_SIZE,
 			.cia_max_keysize	= AES_MAX_KEY_SIZE,
 			.cia_setkey		= aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
+			.cia_encrypt		= aesni_encrypt,
+			.cia_decrypt		= aesni_decrypt
 		}
 	}
 };
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index abf298c272dc..a4f00128ea55 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -182,7 +182,7 @@ static int xts_encrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&camellia_enc_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -192,7 +192,7 @@ static int xts_decrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&camellia_dec_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg camellia_algs[] = {
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 0c22d84750a3..f28d282779b8 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -208,7 +208,7 @@ static int xts_encrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&camellia_enc_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -218,7 +218,7 @@ static int xts_decrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&camellia_dec_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg camellia_algs[] = {
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 645f8f16815c..a8a38fffb4a9 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -201,7 +201,7 @@ static int xts_encrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&cast6_enc_xts, req,
 				   XTS_TWEAK_CAST(__cast6_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -211,7 +211,7 @@ static int xts_decrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&cast6_dec_xts, req,
 				   XTS_TWEAK_CAST(__cast6_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg cast6_algs[] = {
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 968386c21ef4..89830e531350 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -19,8 +19,8 @@
 #include <linux/types.h>
 
 struct des3_ede_x86_ctx {
-	u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
-	u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
+	struct des3_ede_ctx enc;
+	struct des3_ede_ctx dec;
 };
 
 /* regular block cipher functions */
@@ -34,7 +34,7 @@ asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
 static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 				    const u8 *src)
 {
-	u32 *enc_ctx = ctx->enc_expkey;
+	u32 *enc_ctx = ctx->enc.expkey;
 
 	des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
 }
@@ -42,7 +42,7 @@ static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 				    const u8 *src)
 {
-	u32 *dec_ctx = ctx->dec_expkey;
+	u32 *dec_ctx = ctx->dec.expkey;
 
 	des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
 }
@@ -50,7 +50,7 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
-	u32 *enc_ctx = ctx->enc_expkey;
+	u32 *enc_ctx = ctx->enc.expkey;
 
 	des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
 }
@@ -58,7 +58,7 @@ static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
 static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
-	u32 *dec_ctx = ctx->dec_expkey;
+	u32 *dec_ctx = ctx->dec.expkey;
 
 	des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
 }
@@ -122,7 +122,7 @@ static int ecb_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return ecb_crypt(req, ctx->enc_expkey);
+	return ecb_crypt(req, ctx->enc.expkey);
 }
 
 static int ecb_decrypt(struct skcipher_request *req)
@@ -130,7 +130,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return ecb_crypt(req, ctx->dec_expkey);
+	return ecb_crypt(req, ctx->dec.expkey);
 }
 
 static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
@@ -348,20 +348,28 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
 	u32 i, j, tmp;
 	int err;
 
-	/* Generate encryption context using generic implementation. */
-	err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
-	if (err < 0)
+	err = des3_ede_expand_key(&ctx->enc, key, keylen);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
+	}
+
+	if (err) {
+		memset(ctx, 0, sizeof(*ctx));
 		return err;
+	}
 
 	/* Fix encryption context for this implementation and form decryption
 	 * context. */
 	j = DES3_EDE_EXPKEY_WORDS - 2;
 	for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
-		tmp = ror32(ctx->enc_expkey[i + 1], 4);
-		ctx->enc_expkey[i + 1] = tmp;
+		tmp = ror32(ctx->enc.expkey[i + 1], 4);
+		ctx->enc.expkey[i + 1] = tmp;
 
-		ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
-		ctx->dec_expkey[j + 1] = tmp;
+		ctx->dec.expkey[j + 0] = ctx->enc.expkey[i + 0];
+		ctx->dec.expkey[j + 1] = tmp;
 	}
 
 	return 0;
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index ac76fe88ac4f..04d72a5a8ce9 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -357,6 +357,5 @@ module_init(ghash_pclmulqdqni_mod_init);
 module_exit(ghash_pclmulqdqni_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
-		   "accelerated by PCLMULQDQ-NI");
+MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 901551445387..d15b99397480 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -14,6 +14,7 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -259,17 +260,36 @@ done:
 int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			struct skcipher_request *req,
 			common_glue_func_t tweak_fn, void *tweak_ctx,
-			void *crypt_ctx)
+			void *crypt_ctx, bool decrypt)
 {
+	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
 	const unsigned int bsize = 128 / 8;
+	struct skcipher_request subreq;
 	struct skcipher_walk walk;
 	bool fpu_enabled = false;
-	unsigned int nbytes;
+	unsigned int nbytes, tail;
 	int err;
 
+	if (req->cryptlen < XTS_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(cts)) {
+		struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+
+		tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE;
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      crypto_skcipher_get_flags(tfm),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail, req->iv);
+		req = &subreq;
+	}
+
 	err = skcipher_walk_virt(&walk, req, false);
 	nbytes = walk.nbytes;
-	if (!nbytes)
+	if (err)
 		return err;
 
 	/* set minimum length to bsize, for tweak_fn */
@@ -287,6 +307,47 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 		nbytes = walk.nbytes;
 	}
 
+	if (unlikely(cts)) {
+		u8 *next_tweak, *final_tweak = req->iv;
+		struct scatterlist *src, *dst;
+		struct scatterlist s[2], d[2];
+		le128 b[2];
+
+		dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(d, req->dst, req->cryptlen);
+
+		if (decrypt) {
+			next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE);
+			gf128mul_x_ble(b, b);
+		} else {
+			next_tweak = req->iv;
+		}
+
+		skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE,
+					   next_tweak);
+
+		err = skcipher_walk_virt(&walk, req, false) ?:
+		      skcipher_walk_done(&walk,
+				__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
+		if (err)
+			goto out;
+
+		scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0);
+		memcpy(b + 1, b, tail - XTS_BLOCK_SIZE);
+		scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE,
+					 tail - XTS_BLOCK_SIZE, 0);
+		scatterwalk_map_and_copy(b, dst, 0, tail, 1);
+
+		skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE,
+					   final_tweak);
+
+		err = skcipher_walk_virt(&walk, req, false) ?:
+		      skcipher_walk_done(&walk,
+				__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
+	}
+
+out:
 	glue_fpu_end(fpu_enabled);
 
 	return err;
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
deleted file mode 100644
index 5413fee33481..000000000000
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ /dev/null
@@ -1,619 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AVX2 implementation of MORUS-1280
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
-
-#define STATE0		%ymm0
-#define STATE0_LOW	%xmm0
-#define STATE1		%ymm1
-#define STATE2		%ymm2
-#define STATE3		%ymm3
-#define STATE4		%ymm4
-#define KEY		%ymm5
-#define MSG		%ymm5
-#define MSG_LOW		%xmm5
-#define T0		%ymm6
-#define T0_LOW		%xmm6
-#define T1		%ymm7
-
-.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
-.align 32
-.Lmorus1280_const:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
-.align 32
-.Lmorus1280_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-.macro morus1280_round s0, s1, s2, s3, s4, b, w
-	vpand \s1, \s2, T0
-	vpxor T0, \s0, \s0
-	vpxor \s3, \s0, \s0
-	vpsllq $\b, \s0, T0
-	vpsrlq $(64 - \b), \s0, \s0
-	vpxor T0, \s0, \s0
-	vpermq $\w, \s3, \s3
-.endm
-
-/*
- * __morus1280_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update:
-	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
-	vpxor MSG, STATE1, STATE1
-	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
-	vpxor MSG, STATE2, STATE2
-	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
-	vpxor MSG, STATE3, STATE3
-	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
-	vpxor MSG, STATE4, STATE4
-	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
-	ret
-ENDPROC(__morus1280_update)
-
-/*
- * __morus1280_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update_zero:
-	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
-	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
-	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
-	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
-	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
-	ret
-ENDPROC(__morus1280_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	vpxor MSG, MSG, MSG
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG_LOW
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pshufd $MASK2, MSG_LOW, MSG_LOW
-	pinsrq $0, (%r8), MSG_LOW
-
-.Lld_partial_8:
-	mov %rcx, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	vpermq $MASK2, MSG, MSG
-	movdqu (%rsi), MSG_LOW
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0_LOW, (%r9)
-	vpermq $MASK2, T0, T0
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0_LOW, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	pextrq $1, T0_LOW, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus1280_avx2_init(void *state, const void *key,
- *                                 const void *iv);
- */
-ENTRY(crypto_morus1280_avx2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	vpxor STATE0, STATE0, STATE0
-	movdqu (%rdx), STATE0_LOW
-	/* load key: */
-	vmovdqu (%rsi), KEY
-	vmovdqa KEY, STATE1
-	/* load all ones: */
-	vpcmpeqd STATE2, STATE2, STATE2
-	/* load all zeros: */
-	vpxor STATE3, STATE3, STATE3
-	/* load the constant: */
-	vmovdqa .Lmorus1280_const, STATE4
-
-	/* update 16 times with zero: */
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-
-	/* xor-in the key again after updates: */
-	vpxor KEY, STATE1, STATE1
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_init)
-
-/*
- * void crypto_morus1280_avx2_ad(void *state, const void *data,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_ad)
-	FRAME_BEGIN
-
-	cmp $32, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	and $0x1F, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	vmovdqa (%rsi), MSG
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	vmovdqu (%rsi), MSG
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_ad)
-
-/*
- * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_enc)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	or  %rdx,  %r8
-	and $0x1F, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	vmovdqa (%rsi), MSG
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-	vmovdqa T0, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	vmovdqu (%rsi), MSG
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-	vmovdqu T0, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_enc)
-
-/*
- * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* encrypt message: */
-	call __load_partial
-
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-
-	call __store_partial
-
-	call __morus1280_update
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_enc_tail)
-
-/*
- * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_dec)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	or  %rdx,  %r8
-	and $0x1F, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	vmovdqa (%rsi), MSG
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqa MSG, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	vmovdqu (%rsi), MSG
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqu MSG, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_dec)
-
-/*
- * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* decrypt message: */
-	call __load_partial
-
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqa MSG, T0
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0_LOW
-	vpbroadcastb T0_LOW, T0
-	vmovdqa .Lmorus1280_counter, T1
-	vpcmpgtb T1, T0, T0
-	vpand T0, MSG, MSG
-
-	call __morus1280_update
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_dec_tail)
-
-/*
- * void crypto_morus1280_avx2_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus1280_avx2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* xor state[0] into state[4]: */
-	vpxor STATE0, STATE4, STATE4
-
-	/* prepare length block: */
-	vpxor MSG, MSG, MSG
-	vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
-	vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
-	vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
-
-	/* update state: */
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-
-	/* xor tag: */
-	vmovdqu (%rsi), MSG
-
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_final)
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
deleted file mode 100644
index 2d000d66ba4c..000000000000
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Glue for AVX2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus1280_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
-					   const void *iv);
-asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
-					 unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
-					  void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
-					  void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
-					    u64 assoclen, u64 cryptlen);
-
-MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus1280_avx2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
-	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus1280_avx2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus1280_avx2_module_init);
-module_exit(crypto_morus1280_avx2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-avx2");
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
deleted file mode 100644
index 0eece772866b..000000000000
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ /dev/null
@@ -1,893 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * SSE2 implementation of MORUS-1280
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-
-#define STATE0_LO	%xmm0
-#define STATE0_HI	%xmm1
-#define STATE1_LO	%xmm2
-#define STATE1_HI	%xmm3
-#define STATE2_LO	%xmm4
-#define STATE2_HI	%xmm5
-#define STATE3_LO	%xmm6
-#define STATE3_HI	%xmm7
-#define STATE4_LO	%xmm8
-#define STATE4_HI	%xmm9
-#define KEY_LO		%xmm10
-#define KEY_HI		%xmm11
-#define MSG_LO		%xmm10
-#define MSG_HI		%xmm11
-#define T0_LO		%xmm12
-#define T0_HI		%xmm13
-#define T1_LO		%xmm14
-#define T1_HI		%xmm15
-
-.section .rodata.cst16.morus640_const, "aM", @progbits, 16
-.align 16
-.Lmorus640_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Lmorus640_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
-.align 16
-.Lmorus640_counter_0:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-.Lmorus640_counter_1:
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-.macro rol1 hi, lo
-	/*
-	 * HI_1 | HI_0 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | HI_1 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | LO_1 || LO_0 | HI_1
-	 */
-	pshufd $MASK2, \hi, \hi
-	movdqa \hi, T0_LO
-	punpcklqdq \lo, T0_LO
-	punpckhqdq \hi, \lo
-	movdqa \lo, \hi
-	movdqa T0_LO, \lo
-.endm
-
-.macro rol2 hi, lo
-	movdqa \lo, T0_LO
-	movdqa \hi, \lo
-	movdqa T0_LO, \hi
-.endm
-
-.macro rol3 hi, lo
-	/*
-	 * HI_1 | HI_0 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | HI_1 || LO_1 | LO_0
-	 *  ==>
-	 * LO_0 | HI_1 || HI_0 | LO_1
-	 */
-	pshufd $MASK2, \hi, \hi
-	movdqa \lo, T0_LO
-	punpckhqdq \hi, T0_LO
-	punpcklqdq \lo, \hi
-	movdqa T0_LO, \lo
-.endm
-
-.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
-	movdqa \s1_l, T0_LO
-	pand \s2_l, T0_LO
-	pxor T0_LO, \s0_l
-
-	movdqa \s1_h, T0_LO
-	pand \s2_h, T0_LO
-	pxor T0_LO, \s0_h
-
-	pxor \s3_l, \s0_l
-	pxor \s3_h, \s0_h
-
-	movdqa \s0_l, T0_LO
-	psllq $\b, T0_LO
-	psrlq $(64 - \b), \s0_l
-	pxor T0_LO, \s0_l
-
-	movdqa \s0_h, T0_LO
-	psllq $\b, T0_LO
-	psrlq $(64 - \b), \s0_h
-	pxor T0_LO, \s0_h
-
-	\w \s3_h, \s3_l
-.endm
-
-/*
- * __morus1280_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update:
-	morus1280_round \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		13, rol1
-	pxor MSG_LO, STATE1_LO
-	pxor MSG_HI, STATE1_HI
-	morus1280_round \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		46, rol2
-	pxor MSG_LO, STATE2_LO
-	pxor MSG_HI, STATE2_HI
-	morus1280_round \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		38, rol3
-	pxor MSG_LO, STATE3_LO
-	pxor MSG_HI, STATE3_HI
-	morus1280_round \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		7, rol2
-	pxor MSG_LO, STATE4_LO
-	pxor MSG_HI, STATE4_HI
-	morus1280_round \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		4, rol1
-	ret
-ENDPROC(__morus1280_update)
-
-/*
- * __morus1280_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update_zero:
-	morus1280_round \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		13, rol1
-	morus1280_round \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		46, rol2
-	morus1280_round \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		38, rol3
-	morus1280_round \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		7, rol2
-	morus1280_round \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		4, rol1
-	ret
-ENDPROC(__morus1280_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG_LO, MSG_LO
-	pxor MSG_HI, MSG_HI
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG_LO
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pslldq $8, MSG_LO
-	movq (%r8), T0_LO
-	pxor T0_LO, MSG_LO
-
-.Lld_partial_8:
-	mov %rcx, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	movdqa MSG_LO, MSG_HI
-	movdqu (%rsi), MSG_LO
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0_LO, (%r9)
-	movdqa T0_HI, T0_LO
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0_LO, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0_LO
-	movq T0_LO, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus1280_sse2_init(void *state, const void *key,
- *                                 const void *iv);
- */
-ENTRY(crypto_morus1280_sse2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	pxor STATE0_HI, STATE0_HI
-	movdqu (%rdx), STATE0_LO
-	/* load key: */
-	movdqu  0(%rsi), KEY_LO
-	movdqu 16(%rsi), KEY_HI
-	movdqa KEY_LO, STATE1_LO
-	movdqa KEY_HI, STATE1_HI
-	/* load all ones: */
-	pcmpeqd STATE2_LO, STATE2_LO
-	pcmpeqd STATE2_HI, STATE2_HI
-	/* load all zeros: */
-	pxor STATE3_LO, STATE3_LO
-	pxor STATE3_HI, STATE3_HI
-	/* load the constant: */
-	movdqa .Lmorus640_const_0, STATE4_LO
-	movdqa .Lmorus640_const_1, STATE4_HI
-
-	/* update 16 times with zero: */
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-
-	/* xor-in the key again after updates: */
-	pxor KEY_LO, STATE1_LO
-	pxor KEY_HI, STATE1_HI
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_init)
-
-/*
- * void crypto_morus1280_sse2_ad(void *state, const void *data,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_ad)
-	FRAME_BEGIN
-
-	cmp $32, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	and $0xF, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_ad)
-
-/*
- * void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_enc)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	movdqa T0_LO,  0(%rdx)
-	movdqa T0_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	movdqu T0_LO,  0(%rdx)
-	movdqu T0_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_enc)
-
-/*
- * void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-
-	call __store_partial
-
-	call __morus1280_update
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_enc_tail)
-
-/*
- * void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_dec)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa MSG_LO,  0(%rdx)
-	movdqa MSG_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqu MSG_LO,  0(%rdx)
-	movdqu MSG_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_dec)
-
-/*
- * void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* decrypt message: */
-	call __load_partial
-
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	movdqa T0_LO, T0_HI
-	movdqa .Lmorus640_counter_0, T1_LO
-	movdqa .Lmorus640_counter_1, T1_HI
-	pcmpgtb T1_LO, T0_LO
-	pcmpgtb T1_HI, T0_HI
-	pand T0_LO, MSG_LO
-	pand T0_HI, MSG_HI
-
-	call __morus1280_update
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_dec_tail)
-
-/*
- * void crypto_morus1280_sse2_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus1280_sse2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* xor state[0] into state[4]: */
-	pxor STATE0_LO, STATE4_LO
-	pxor STATE0_HI, STATE4_HI
-
-	/* prepare length block: */
-	movq %rdx, MSG_LO
-	movq %rcx, T0_LO
-	pslldq $8, T0_LO
-	pxor T0_LO, MSG_LO
-	psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
-	pxor MSG_HI, MSG_HI
-
-	/* update state: */
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-
-	/* xor tag: */
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T0_LO
-	movdqa STATE1_HI, T0_HI
-	rol3 T0_HI, T0_LO
-	pxor T0_LO, MSG_LO
-	pxor T0_HI, MSG_HI
-	movdqa STATE2_LO, T0_LO
-	movdqa STATE2_HI, T0_HI
-	pand STATE3_LO, T0_LO
-	pand STATE3_HI, T0_HI
-	pxor T0_LO, MSG_LO
-	pxor T0_HI, MSG_HI
-
-	movdqu MSG_LO,  0(%rsi)
-	movdqu MSG_HI, 16(%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_final)
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
deleted file mode 100644
index aada9d774293..000000000000
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Glue for SSE2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus1280_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
-					   const void *iv);
-asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
-					 unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
-					  void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
-					  void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
-					    u64 assoclen, u64 cryptlen);
-
-MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus1280_sse2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus1280_sse2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus1280_sse2_module_init);
-module_exit(crypto_morus1280_sse2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-sse2");
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
deleted file mode 100644
index ffbde8b22838..000000000000
--- a/arch/x86/crypto/morus1280_glue.c
+++ /dev/null
@@ -1,205 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Common x86 SIMD glue skeleton
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus1280_glue.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <asm/fpu/api.h>
-
-struct morus1280_state {
-	struct morus1280_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus1280_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, const void *src, void *dst,
-			     unsigned int length);
-	void (*crypt_tail)(void *state, const void *src, void *dst,
-			   unsigned int length);
-};
-
-static void crypto_morus1280_glue_process_ad(
-		struct morus1280_state *state,
-		const struct morus1280_glue_ops *ops,
-		struct scatterlist *sg_src, unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus1280_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS1280_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			ops->ad(state, src, left);
-			src += left & ~(MORUS1280_BLOCK_SIZE - 1);
-			left &= MORUS1280_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
-		ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
-	}
-}
-
-static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
-						struct morus1280_ops ops,
-						struct skcipher_walk *walk)
-{
-	while (walk->nbytes >= MORUS1280_BLOCK_SIZE) {
-		ops.crypt_blocks(state, walk->src.virt.addr,
-				 walk->dst.virt.addr,
-				 round_down(walk->nbytes,
-					    MORUS1280_BLOCK_SIZE));
-		skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
-			       walk->nbytes);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				 unsigned int keylen)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen == MORUS1280_BLOCK_SIZE) {
-		memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
-	} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
-		memcpy(ctx->key.bytes, key, keylen);
-		memcpy(ctx->key.bytes + keylen, key, keylen);
-	} else {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
-
-int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
-				      unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
-
-static void crypto_morus1280_glue_crypt(struct aead_request *req,
-					struct morus1280_ops ops,
-					unsigned int cryptlen,
-					struct morus1280_block *tag_xor)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_state state;
-	struct skcipher_walk walk;
-
-	ops.skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	ctx->ops->init(&state, &ctx->key, req->iv);
-	crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
-	crypto_morus1280_glue_process_crypt(&state, ops, &walk);
-	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-int crypto_morus1280_glue_encrypt(struct aead_request *req)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = ctx->ops->enc,
-		.crypt_tail = ctx->ops->enc_tail,
-	};
-
-	struct morus1280_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
-
-int crypto_morus1280_glue_decrypt(struct aead_request *req)
-{
-	static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = ctx->ops->dec,
-		.crypt_tail = ctx->ops->dec_tail,
-	};
-
-	struct morus1280_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
-
-void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
-				    const struct morus1280_glue_ops *ops)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-	ctx->ops = ops;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
deleted file mode 100644
index a60891101bbd..000000000000
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ /dev/null
@@ -1,612 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * SSE2 implementation of MORUS-640
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define KEY	%xmm5
-#define MSG	%xmm5
-#define T0	%xmm6
-#define T1	%xmm7
-
-.section .rodata.cst16.morus640_const, "aM", @progbits, 32
-.align 16
-.Lmorus640_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Lmorus640_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
-.align 16
-.Lmorus640_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-
-.text
-
-.macro morus640_round s0, s1, s2, s3, s4, b, w
-	movdqa \s1, T0
-	pand \s2, T0
-	pxor T0, \s0
-	pxor \s3, \s0
-	movdqa \s0, T0
-	pslld $\b, T0
-	psrld $(32 - \b), \s0
-	pxor T0, \s0
-	pshufd $\w, \s3, \s3
-.endm
-
-/*
- * __morus640_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus640_update:
-	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
-	pxor MSG, STATE1
-	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
-	pxor MSG, STATE2
-	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
-	pxor MSG, STATE3
-	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
-	pxor MSG, STATE4
-	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
-	ret
-ENDPROC(__morus640_update)
-
-
-/*
- * __morus640_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus640_update_zero:
-	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
-	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
-	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
-	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
-	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
-	ret
-ENDPROC(__morus640_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG, MSG
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pslldq $8, MSG
-	movq (%r8), T0
-	pxor T0, MSG
-
-.Lld_partial_8:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_morus640_sse2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	movdqu (%rdx), STATE0
-	/* load key: */
-	movdqu (%rsi), KEY
-	movdqa KEY, STATE1
-	/* load all ones: */
-	pcmpeqd STATE2, STATE2
-	/* load the constants: */
-	movdqa .Lmorus640_const_0, STATE3
-	movdqa .Lmorus640_const_1, STATE4
-
-	/* update 16 times with zero: */
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-
-	/* xor-in the key again after updates: */
-	pxor KEY, STATE1
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_init)
-
-/*
- * void crypto_morus640_sse2_ad(void *state, const void *data,
- *                              unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_ad)
-	FRAME_BEGIN
-
-	cmp $16, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	and $0xF, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	movdqa (%rsi), MSG
-	call __morus640_update
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	movdqu (%rsi), MSG
-	call __morus640_update
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_ad)
-
-/*
- * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_enc)
-	FRAME_BEGIN
-
-	cmp $16, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	movdqa (%rsi), MSG
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-	movdqa T0, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	movdqu (%rsi), MSG
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-	movdqu T0, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_enc)
-
-/*
- * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
- *                                    unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-
-	call __store_partial
-
-	call __morus640_update
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_enc_tail)
-
-/*
- * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_dec)
-	FRAME_BEGIN
-
-	cmp $16, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	movdqa (%rsi), MSG
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqa MSG, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	movdqu (%rsi), MSG
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqu MSG, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_dec)
-
-/*
- * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
- *                                    unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* decrypt message: */
-	call __load_partial
-
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqa MSG, T0
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa .Lmorus640_counter, T1
-	pcmpgtb T1, T0
-	pand T0, MSG
-
-	call __morus640_update
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_dec_tail)
-
-/*
- * void crypto_morus640_sse2_final(void *state, void *tag_xor,
- *	                           u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus640_sse2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* xor state[0] into state[4]: */
-	pxor STATE0, STATE4
-
-	/* prepare length block: */
-	movq %rdx, MSG
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG
-	psllq $3, MSG /* multiply by 8 (to get bit count) */
-
-	/* update state: */
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-
-	/* xor tag: */
-	movdqu (%rsi), MSG
-
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-
-	movdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_final)
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
deleted file mode 100644
index 8ef68134aef4..000000000000
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Glue for SSE2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus640_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
-					  const void *iv);
-asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
-					unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
-					 void *dst, unsigned int length);
-asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
-					 void *dst, unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
-					      void *dst, unsigned int length);
-asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
-					      void *dst, unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
-					   u64 assoclen, u64 cryptlen);
-
-MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus640_sse2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus640_sse2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus640_sse2_module_init);
-module_exit(crypto_morus640_sse2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
-MODULE_ALIAS_CRYPTO("morus640");
-MODULE_ALIAS_CRYPTO("morus640-sse2");
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
deleted file mode 100644
index d8b5fd6cef29..000000000000
--- a/arch/x86/crypto/morus640_glue.c
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Common x86 SIMD glue skeleton
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus640_glue.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <asm/fpu/api.h>
-
-struct morus640_state {
-	struct morus640_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus640_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, const void *src, void *dst,
-			     unsigned int length);
-	void (*crypt_tail)(void *state, const void *src, void *dst,
-			   unsigned int length);
-};
-
-static void crypto_morus640_glue_process_ad(
-		struct morus640_state *state,
-		const struct morus640_glue_ops *ops,
-		struct scatterlist *sg_src, unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus640_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS640_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS640_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			ops->ad(state, src, left);
-			src += left & ~(MORUS640_BLOCK_SIZE - 1);
-			left &= MORUS640_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
-		ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
-	}
-}
-
-static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
-					       struct morus640_ops ops,
-					       struct skcipher_walk *walk)
-{
-	while (walk->nbytes >= MORUS640_BLOCK_SIZE) {
-		ops.crypt_blocks(state, walk->src.virt.addr,
-				 walk->dst.virt.addr,
-				 round_down(walk->nbytes, MORUS640_BLOCK_SIZE));
-		skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
-			       walk->nbytes);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				unsigned int keylen)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen != MORUS640_BLOCK_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
-
-int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
-				     unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
-
-static void crypto_morus640_glue_crypt(struct aead_request *req,
-				       struct morus640_ops ops,
-				       unsigned int cryptlen,
-				       struct morus640_block *tag_xor)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_state state;
-	struct skcipher_walk walk;
-
-	ops.skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	ctx->ops->init(&state, &ctx->key, req->iv);
-	crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
-	crypto_morus640_glue_process_crypt(&state, ops, &walk);
-	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-int crypto_morus640_glue_encrypt(struct aead_request *req)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = ctx->ops->enc,
-		.crypt_tail = ctx->ops->enc_tail,
-	};
-
-	struct morus640_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
-
-int crypto_morus640_glue_decrypt(struct aead_request *req)
-{
-	static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = ctx->ops->dec,
-		.crypt_tail = ctx->ops->dec_tail,
-	};
-
-	struct morus640_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
-
-void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
-				   const struct morus640_glue_ops *ops)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-	ctx->ops = ops;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index b871728e0b2f..13fd8d3d2da0 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -167,7 +167,7 @@ static int xts_encrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&serpent_enc_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -177,7 +177,7 @@ static int xts_decrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&serpent_dec_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 4a9a9f2ee1d8..7d3dca38a5a2 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -207,7 +207,7 @@ static int xts_encrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&serpent_enc_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -217,7 +217,7 @@ static int xts_decrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&serpent_dec_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 73867da3cbee..f9aff31fe59e 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -45,8 +45,8 @@ asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
 				       u64 rounds);
 typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds);
 
-static int sha256_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len, sha256_transform_fn *sha256_xform)
+static int _sha256_update(struct shash_desc *desc, const u8 *data,
+			  unsigned int len, sha256_transform_fn *sha256_xform)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
@@ -84,7 +84,7 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data,
 static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_transform_ssse3);
+	return _sha256_update(desc, data, len, sha256_transform_ssse3);
 }
 
 static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
@@ -151,7 +151,7 @@ asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
 static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_transform_avx);
+	return _sha256_update(desc, data, len, sha256_transform_avx);
 }
 
 static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
@@ -233,7 +233,7 @@ asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
 static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_transform_rorx);
+	return _sha256_update(desc, data, len, sha256_transform_rorx);
 }
 
 static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
@@ -313,7 +313,7 @@ asmlinkage void sha256_ni_transform(u32 *digest, const char *data,
 static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_ni_transform);
+	return _sha256_update(desc, data, len, sha256_ni_transform);
 }
 
 static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 0dbf8e8b09d7..d561c821788b 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -210,7 +210,7 @@ static int xts_encrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&twofish_enc_xts, req,
 				   XTS_TWEAK_CAST(twofish_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -220,7 +220,7 @@ static int xts_decrypt(struct skcipher_request *req)
 
 	return glue_xts_req_128bit(&twofish_dec_xts, req,
 				   XTS_TWEAK_CAST(twofish_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg twofish_algs[] = {
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 536b574b6161..3f8e22615812 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
 		nr = syscall_trace_enter(regs);
 
-	/*
-	 * NB: Native and x32 syscalls are dispatched from the same
-	 * table.  The only functional difference is the x32 bit in
-	 * regs->orig_ax, which changes the behavior of some syscalls.
-	 */
-	nr &= __SYSCALL_MASK;
 	if (likely(nr < NR_syscalls)) {
 		nr = array_index_nospec(nr, NR_syscalls);
 		regs->ax = sys_call_table[nr](regs);
+#ifdef CONFIG_X86_X32_ABI
+	} else if (likely((nr & __X32_SYSCALL_BIT) &&
+			  (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
+		nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
+					X32_NR_syscalls);
+		regs->ax = x32_sys_call_table[nr](regs);
+#endif
 	}
 
 	syscall_return_slowpath(regs);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4f86928246e7..f83ca5aa8b77 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -63,7 +63,7 @@
  * enough to patch inline, increasing performance.
  */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 # define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
 # define preempt_stop(clobbers)
@@ -1084,7 +1084,7 @@ restore_all:
 	INTERRUPT_RETURN
 
 restore_all_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	cmpl	$0, PER_CPU_VAR(__preempt_count)
 	jnz	.Lno_preempt
@@ -1364,7 +1364,7 @@ ENTRY(xen_hypervisor_callback)
 ENTRY(xen_do_upcall)
 1:	mov	%esp, %eax
 	call	xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 	call	xen_maybe_preempt_hcall
 #endif
 	jmp	ret_from_intr
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index be9ca198c581..b7c3ea4cb19d 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -664,7 +664,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 
 /* Returning to kernel space */
 retint_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	/* Interrupts are off */
 	/* Check if we need preemption */
 	btl	$9, EFLAGS(%rsp)		/* were interrupts off? */
@@ -1058,10 +1058,10 @@ ENTRY(native_load_gs_index)
 ENDPROC(native_load_gs_index)
 EXPORT_SYMBOL(native_load_gs_index)
 
-	_ASM_EXTABLE(.Lgs_change, bad_gs)
+	_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
 	.section .fixup, "ax"
 	/* running with kernelgs */
-bad_gs:
+.Lbad_gs:
 	SWAPGS					/* switch back to user gs */
 .macro ZAP_GS
 	/* This can't be a string because the preprocessor needs to see it. */
@@ -1115,7 +1115,7 @@ ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
 	call	xen_evtchn_do_upcall
 	LEAVE_IRQ_STACK
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 	call	xen_maybe_preempt_hcall
 #endif
 	jmp	error_exit
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index d5252bc1e380..b1bf31713374 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -10,10 +10,13 @@
 /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
 extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
 #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
+#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
+#undef __SYSCALL_X32
 
 #define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_X32(nr, sym, qual)
 
 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
@@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
+
+asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
+	/*
+	 * Smells like a compiler bug -- it doesn't work
+	 * when the & below is removed.
+	 */
+	[0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#endif
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c00019abd076..3fe02546aed3 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -186,11 +186,11 @@
 172	i386	prctl			sys_prctl			__ia32_sys_prctl
 173	i386	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
 174	i386	rt_sigaction		sys_rt_sigaction		__ia32_compat_sys_rt_sigaction
-175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_sys_rt_sigprocmask
+175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_compat_sys_rt_sigprocmask
 176	i386	rt_sigpending		sys_rt_sigpending		__ia32_compat_sys_rt_sigpending
 177	i386	rt_sigtimedwait		sys_rt_sigtimedwait_time32	__ia32_compat_sys_rt_sigtimedwait_time32
 178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		__ia32_compat_sys_rt_sigqueueinfo
-179	i386	rt_sigsuspend		sys_rt_sigsuspend		__ia32_sys_rt_sigsuspend
+179	i386	rt_sigsuspend		sys_rt_sigsuspend		__ia32_compat_sys_rt_sigsuspend
 180	i386	pread64			sys_pread64			__ia32_compat_sys_x86_pread
 181	i386	pwrite64		sys_pwrite64			__ia32_compat_sys_x86_pwrite
 182	i386	chown			sys_chown16			__ia32_sys_chown16
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 94fcd1951aca..1af2be39e7d9 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -1,13 +1,13 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 in="$1"
 out="$2"
 
 syscall_macro() {
-    abi="$1"
-    nr="$2"
-    entry="$3"
+    local abi="$1"
+    local nr="$2"
+    local entry="$3"
 
     # Entry can be either just a function name or "function/qualifier"
     real_entry="${entry%%/*}"
@@ -21,14 +21,14 @@ syscall_macro() {
 }
 
 emit() {
-    abi="$1"
-    nr="$2"
-    entry="$3"
-    compat="$4"
-    umlentry=""
+    local abi="$1"
+    local nr="$2"
+    local entry="$3"
+    local compat="$4"
+    local umlentry=""
 
-    if [ "$abi" = "64" -a -n "$compat" ]; then
-	echo "a compat entry for a 64-bit syscall makes no sense" >&2
+    if [ "$abi" != "I386" -a -n "$compat" ]; then
+	echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
 	exit 1
     fi
 
@@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | (
     while read nr abi name entry compat; do
 	abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
 	if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
-	    # COMMON is the same as 64, except that we don't expect X32
-	    # programs to use it.  Our expectation has nothing to do with
-	    # any generated code, so treat them the same.
 	    emit 64 "$nr" "$entry" "$compat"
+	    if [ "$abi" = "COMMON" ]; then
+		# COMMON means that this syscall exists in the same form for
+		# 64-bit and X32.
+		echo "#ifdef CONFIG_X86_X32_ABI"
+		emit X32 "$nr" "$entry" "$compat"
+		echo "#endif"
+	    fi
 	elif [ "$abi" = "X32" ]; then
-	    # X32 is equivalent to 64 on an X32-compatible kernel.
 	    echo "#ifdef CONFIG_X86_X32_ABI"
-	    emit 64 "$nr" "$entry" "$compat"
+	    emit X32 "$nr" "$entry" "$compat"
 	    echo "#endif"
 	elif [ "$abi" = "I386" ]; then
 	    emit "$abi" "$nr" "$entry" "$compat"
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index cb3464525b37..2713490611a3 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -34,7 +34,7 @@
 	THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
 #endif
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	THUNK ___preempt_schedule, preempt_schedule
 	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
 	EXPORT_SYMBOL(___preempt_schedule)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cc20465b2867..ea5c4167086c 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -46,7 +46,7 @@
 	THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
 #endif
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	THUNK ___preempt_schedule, preempt_schedule
 	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
 	EXPORT_SYMBOL(___preempt_schedule)
@@ -55,7 +55,7 @@
 
 #if defined(CONFIG_TRACE_IRQFLAGS) \
  || defined(CONFIG_DEBUG_LOCK_ALLOC) \
- || defined(CONFIG_PREEMPT)
+ || defined(CONFIG_PREEMPTION)
 .L_restore:
 	popq %r11
 	popq %r10
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 8df549138193..0f2154106d01 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -89,6 +89,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
 #
 CFLAGS_REMOVE_vdso-note.o = -pg
 CFLAGS_REMOVE_vclock_gettime.o = -pg
+CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
 CFLAGS_REMOVE_vvar.o = -pg
 
@@ -128,7 +129,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
 	$(call if_changed,vdso_and_check)
 
-CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
 
 targets += vdso32/vdso32.lds
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 349a61d8bf34..f5937742b290 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -122,7 +122,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 			return vmf_insert_pfn(vma, vmf->address,
-					vmalloc_to_pfn(tsc_pg));
+					virt_to_phys(tsc_pg) >> PAGE_SHIFT);
 	}
 
 	return VM_FAULT_SIGBUS;
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index e7d35f60d53f..64c3e70b0556 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -5,12 +5,14 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/jiffies.h>
 #include <asm/apicdef.h>
 #include <asm/nmi.h>
 
 #include "../perf_event.h"
 
-static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
+static unsigned long perf_nmi_window;
 
 static __initconst const u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
@@ -641,11 +643,12 @@ static void amd_pmu_disable_event(struct perf_event *event)
  * handler when multiple PMCs are active or PMC overflow while handling some
  * other source of an NMI.
  *
- * Attempt to mitigate this by using the number of active PMCs to determine
- * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
- * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
- * number of active PMCs or 2. The value of 2 is used in case an NMI does not
- * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
+ * received during this window will be claimed. This prevents extending the
+ * window past when it is possible that latent NMIs should be received. The
+ * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
+ * handled a counter. When an un-handled NMI is received, it will be claimed
+ * only if arriving within that window.
  */
 static int amd_pmu_handle_irq(struct pt_regs *regs)
 {
@@ -663,21 +666,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 	handled = x86_pmu_handle_irq(regs);
 
 	/*
-	 * If a counter was handled, record the number of possible remaining
-	 * NMIs that can occur.
+	 * If a counter was handled, record a timestamp such that un-handled
+	 * NMIs will be claimed if arriving within that window.
 	 */
 	if (handled) {
-		this_cpu_write(perf_nmi_counter,
-			       min_t(unsigned int, 2, active));
+		this_cpu_write(perf_nmi_tstamp,
+			       jiffies + perf_nmi_window);
 
 		return handled;
 	}
 
-	if (!this_cpu_read(perf_nmi_counter))
+	if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
 		return NMI_DONE;
 
-	this_cpu_dec(perf_nmi_counter);
-
 	return NMI_HANDLED;
 }
 
@@ -909,6 +910,9 @@ static int __init amd_core_pmu_init(void)
 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
 		return 0;
 
+	/* Avoid calulating the value each time in the NMI handler */
+	perf_nmi_window = msecs_to_jiffies(100);
+
 	switch (boot_cpu_data.x86) {
 	case 0x15:
 		pr_cont("Fam15h ");
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 5b35b7ea5d72..26c36357c4c9 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -377,7 +377,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
 					  struct hw_perf_event *hwc, u64 config)
 {
 	config &= ~perf_ibs->cnt_mask;
-	wrmsrl(hwc->config_base, config);
+	if (boot_cpu_data.x86 == 0x10)
+		wrmsrl(hwc->config_base, config);
 	config &= ~perf_ibs->enable_mask;
 	wrmsrl(hwc->config_base, config);
 }
@@ -553,7 +554,8 @@ static struct perf_ibs perf_ibs_op = {
 	},
 	.msr			= MSR_AMD64_IBSOPCTL,
 	.config_mask		= IBS_OP_CONFIG_MASK,
-	.cnt_mask		= IBS_OP_MAX_CNT,
+	.cnt_mask		= IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
+				  IBS_OP_CUR_CNT_RAND,
 	.enable_mask		= IBS_OP_ENABLE,
 	.valid_mask		= IBS_OP_VAL,
 	.max_period		= IBS_OP_MAX_CNT << 4,
@@ -614,7 +616,7 @@ fail:
 	if (event->attr.sample_type & PERF_SAMPLE_RAW)
 		offset_max = perf_ibs->offset_max;
 	else if (check_rip)
-		offset_max = 2;
+		offset_max = 3;
 	else
 		offset_max = 1;
 	do {
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 4886fc66fd88..7b21455d7504 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1005,6 +1005,27 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 
 	/* current number of events already accepted */
 	n = cpuc->n_events;
+	if (!cpuc->n_events)
+		cpuc->pebs_output = 0;
+
+	if (!cpuc->is_fake && leader->attr.precise_ip) {
+		/*
+		 * For PEBS->PT, if !aux_event, the group leader (PT) went
+		 * away, the group was broken down and this singleton event
+		 * can't schedule any more.
+		 */
+		if (is_pebs_pt(leader) && !leader->aux_event)
+			return -EINVAL;
+
+		/*
+		 * pebs_output: 0: no PEBS so far, 1: PT, 2: DS
+		 */
+		if (cpuc->pebs_output &&
+		    cpuc->pebs_output != is_pebs_pt(leader) + 1)
+			return -EINVAL;
+
+		cpuc->pebs_output = is_pebs_pt(leader) + 1;
+	}
 
 	if (is_x86_event(leader)) {
 		if (n >= max_count)
@@ -2241,6 +2262,17 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
 	return 0;
 }
 
+static int x86_pmu_aux_output_match(struct perf_event *event)
+{
+	if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
+		return 0;
+
+	if (x86_pmu.aux_output_match)
+		return x86_pmu.aux_output_match(event);
+
+	return 0;
+}
+
 static struct pmu pmu = {
 	.pmu_enable		= x86_pmu_enable,
 	.pmu_disable		= x86_pmu_disable,
@@ -2266,6 +2298,8 @@ static struct pmu pmu = {
 	.sched_task		= x86_pmu_sched_task,
 	.task_ctx_size          = sizeof(struct x86_perf_task_context),
 	.check_period		= x86_pmu_check_period,
+
+	.aux_output_match	= x86_pmu_aux_output_match,
 };
 
 void arch_perf_update_userpage(struct perf_event *event,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index e4c2cb65ea50..fcef678c3423 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
 #include <asm/cpufeature.h>
 #include <asm/hardirq.h>
 #include <asm/intel-family.h>
+#include <asm/intel_pt.h>
 #include <asm/apic.h>
 #include <asm/cpu_device_id.h>
 
@@ -3298,6 +3299,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		}
 	}
 
+	if (event->attr.aux_output) {
+		if (!event->attr.precise_ip)
+			return -EINVAL;
+
+		event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
+	}
+
 	if (event->attr.type != PERF_TYPE_RAW)
 		return 0;
 
@@ -3816,6 +3824,14 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
 	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
 }
 
+static int intel_pmu_aux_output_match(struct perf_event *event)
+{
+	if (!x86_pmu.intel_cap.pebs_output_pt_available)
+		return 0;
+
+	return is_intel_pt_event(event);
+}
+
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3940,6 +3956,8 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.sched_task		= intel_pmu_sched_task,
 
 	.check_period		= intel_pmu_check_period,
+
+	.aux_output_match	= intel_pmu_aux_output_match,
 };
 
 static __init void intel_clovertown_quirk(void)
@@ -3969,31 +3987,31 @@ static __init void intel_clovertown_quirk(void)
 }
 
 static const struct x86_cpu_desc isolation_ucodes[] = {
-	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE,		 3, 0x0000001f),
-	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT,		 1, 0x0000001e),
-	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E,		 1, 0x00000015),
+	INTEL_CPU_DESC(INTEL_FAM6_HASWELL,		 3, 0x0000001f),
+	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L,		 1, 0x0000001e),
+	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G,		 1, 0x00000015),
 	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,		 2, 0x00000037),
 	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,		 4, 0x0000000a),
-	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE,	 4, 0x00000023),
-	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E,	 1, 0x00000014),
-	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,	 2, 0x00000010),
-	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,	 3, 0x07000009),
-	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,	 4, 0x0f000009),
-	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,	 5, 0x0e000002),
+	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL,		 4, 0x00000023),
+	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G,		 1, 0x00000014),
+	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 2, 0x00000010),
+	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 3, 0x07000009),
+	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 4, 0x0f000009),
+	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 5, 0x0e000002),
 	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X,		 2, 0x0b000014),
 	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 3, 0x00000021),
 	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 4, 0x00000000),
-	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE,	 3, 0x0000007c),
-	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP,	 3, 0x0000007c),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,	 9, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,	 9, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,	10, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,	11, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,	12, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,	10, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,	11, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,	12, 0x0000004e),
-	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,	13, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L,		 3, 0x0000007c),
+	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE,		 3, 0x0000007c),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		 9, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		 9, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		10, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		11, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		12, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		10, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		11, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		12, 0x0000004e),
+	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		13, 0x0000004e),
 	{}
 };
 
@@ -4151,7 +4169,7 @@ static const struct x86_cpu_desc counter_freezing_ucodes[] = {
 	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,	 2, 0x0000000e),
 	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,	 9, 0x0000002e),
 	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,	10, 0x00000008),
-	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_X,	 1, 0x00000028),
+	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D,	 1, 0x00000028),
 	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,	 1, 0x00000028),
 	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,	 8, 0x00000006),
 	{}
@@ -4649,7 +4667,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case INTEL_FAM6_ATOM_SILVERMONT:
-	case INTEL_FAM6_ATOM_SILVERMONT_X:
+	case INTEL_FAM6_ATOM_SILVERMONT_D:
 	case INTEL_FAM6_ATOM_SILVERMONT_MID:
 	case INTEL_FAM6_ATOM_AIRMONT:
 	case INTEL_FAM6_ATOM_AIRMONT_MID:
@@ -4671,7 +4689,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case INTEL_FAM6_ATOM_GOLDMONT:
-	case INTEL_FAM6_ATOM_GOLDMONT_X:
+	case INTEL_FAM6_ATOM_GOLDMONT_D:
 		x86_add_quirk(intel_counter_freezing_quirk);
 		memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
@@ -4727,7 +4745,7 @@ __init int intel_pmu_init(void)
 		name = "goldmont_plus";
 		break;
 
-	case INTEL_FAM6_ATOM_TREMONT_X:
+	case INTEL_FAM6_ATOM_TREMONT_D:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
@@ -4863,10 +4881,10 @@ __init int intel_pmu_init(void)
 		break;
 
 
-	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL:
 	case INTEL_FAM6_HASWELL_X:
-	case INTEL_FAM6_HASWELL_ULT:
-	case INTEL_FAM6_HASWELL_GT3E:
+	case INTEL_FAM6_HASWELL_L:
+	case INTEL_FAM6_HASWELL_G:
 		x86_add_quirk(intel_ht_bug);
 		x86_add_quirk(intel_pebs_isolation_quirk);
 		x86_pmu.late_ack = true;
@@ -4896,9 +4914,9 @@ __init int intel_pmu_init(void)
 		name = "haswell";
 		break;
 
-	case INTEL_FAM6_BROADWELL_CORE:
-	case INTEL_FAM6_BROADWELL_XEON_D:
-	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL:
+	case INTEL_FAM6_BROADWELL_D:
+	case INTEL_FAM6_BROADWELL_G:
 	case INTEL_FAM6_BROADWELL_X:
 		x86_add_quirk(intel_pebs_isolation_quirk);
 		x86_pmu.late_ack = true;
@@ -4961,10 +4979,12 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_SKYLAKE_X:
 		pmem = true;
 		/* fall through */
-	case INTEL_FAM6_SKYLAKE_MOBILE:
-	case INTEL_FAM6_SKYLAKE_DESKTOP:
-	case INTEL_FAM6_KABYLAKE_MOBILE:
-	case INTEL_FAM6_KABYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_L:
+	case INTEL_FAM6_SKYLAKE:
+	case INTEL_FAM6_KABYLAKE_L:
+	case INTEL_FAM6_KABYLAKE:
+	case INTEL_FAM6_COMETLAKE_L:
+	case INTEL_FAM6_COMETLAKE:
 		x86_add_quirk(intel_pebs_isolation_quirk);
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -5008,11 +5028,13 @@ __init int intel_pmu_init(void)
 		break;
 
 	case INTEL_FAM6_ICELAKE_X:
-	case INTEL_FAM6_ICELAKE_XEON_D:
+	case INTEL_FAM6_ICELAKE_D:
 		pmem = true;
 		/* fall through */
-	case INTEL_FAM6_ICELAKE_MOBILE:
-	case INTEL_FAM6_ICELAKE_DESKTOP:
+	case INTEL_FAM6_ICELAKE_L:
+	case INTEL_FAM6_ICELAKE:
+	case INTEL_FAM6_TIGERLAKE_L:
+	case INTEL_FAM6_TIGERLAKE:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 688592b34564..e1daf4151e11 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -45,46 +45,49 @@
  *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
  *			       perf code: 0x01
  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
-						CNL
+ *						CNL,KBL,CML
  *			       Scope: Core
  *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
  *			       perf code: 0x02
  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
- *						SKL,KNL,GLM,CNL
+ *						SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
  *			       Scope: Core
  *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *			       perf code: 0x03
- *			       Available model: SNB,IVB,HSW,BDW,SKL,CNL
+ *			       Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
+ *						ICL,TGL
  *			       Scope: Core
  *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *			       perf code: 0x00
- *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL
+ *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
+ *						KBL,CML,ICL,TGL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *			       perf code: 0x01
  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
- *						GLM,CNL
+ *						GLM,CNL,KBL,CML,ICL,TGL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *			       perf code: 0x02
  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- *						SKL,KNL,GLM,CNL
+ *						SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *			       perf code: 0x03
- *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
+ *						KBL,CML,ICL,TGL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
  *			       perf code: 0x04
- *			       Available model: HSW ULT,KBL,CNL
+ *			       Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
  *			       perf code: 0x05
- *			       Available model: HSW ULT,KBL,CNL
+ *			       Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
  *			       perf code: 0x06
- *			       Available model: HSW ULT,KBL,GLM,CNL
+ *			       Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
  *			       Scope: Package (physical package)
  *
  */
@@ -446,7 +449,7 @@ static int cstate_cpu_init(unsigned int cpu)
 	return 0;
 }
 
-const struct attribute_group *core_attr_update[] = {
+static const struct attribute_group *core_attr_update[] = {
 	&group_cstate_core_c1,
 	&group_cstate_core_c3,
 	&group_cstate_core_c6,
@@ -454,7 +457,7 @@ const struct attribute_group *core_attr_update[] = {
 	NULL,
 };
 
-const struct attribute_group *pkg_attr_update[] = {
+static const struct attribute_group *pkg_attr_update[] = {
 	&group_cstate_pkg_c2,
 	&group_cstate_pkg_c3,
 	&group_cstate_pkg_c6,
@@ -544,6 +547,19 @@ static const struct cstate_model cnl_cstates __initconst = {
 				  BIT(PERF_CSTATE_PKG_C10_RES),
 };
 
+static const struct cstate_model icl_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C6_RES) |
+				  BIT(PERF_CSTATE_CORE_C7_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
+				  BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES) |
+				  BIT(PERF_CSTATE_PKG_C7_RES) |
+				  BIT(PERF_CSTATE_PKG_C8_RES) |
+				  BIT(PERF_CSTATE_PKG_C9_RES) |
+				  BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
 static const struct cstate_model slm_cstates __initconst = {
 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
 				  BIT(PERF_CSTATE_CORE_C6_RES),
@@ -593,40 +609,44 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,	   snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_G, snb_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_L, hswult_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT,   slm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_D, slm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,      slm_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_D, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_G, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_L, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE,   snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  hswult_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE,   hswult_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
 
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT,   glm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates),
 
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE,   icl_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index f1269e804e9b..ce83950036c5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -902,6 +902,9 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
  */
 static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 {
+	if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
+		return false;
+
 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
 }
 
@@ -919,6 +922,9 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 	u64 threshold;
 	int reserved;
 
+	if (cpuc->n_pebs_via_pt)
+		return;
+
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
 		reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
 	else
@@ -1059,10 +1065,40 @@ void intel_pmu_pebs_add(struct perf_event *event)
 	cpuc->n_pebs++;
 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
 		cpuc->n_large_pebs++;
+	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
+		cpuc->n_pebs_via_pt++;
 
 	pebs_update_state(needed_cb, cpuc, event, true);
 }
 
+static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!is_pebs_pt(event))
+		return;
+
+	if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
+		cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
+}
+
+static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	struct debug_store *ds = cpuc->ds;
+
+	if (!is_pebs_pt(event))
+		return;
+
+	if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
+		cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
+
+	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
+
+	wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
+}
+
 void intel_pmu_pebs_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1100,6 +1136,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	} else {
 		ds->pebs_event_reset[hwc->idx] = 0;
 	}
+
+	intel_pmu_pebs_via_pt_enable(event);
 }
 
 void intel_pmu_pebs_del(struct perf_event *event)
@@ -1111,6 +1149,8 @@ void intel_pmu_pebs_del(struct perf_event *event)
 	cpuc->n_pebs--;
 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
 		cpuc->n_large_pebs--;
+	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
+		cpuc->n_pebs_via_pt--;
 
 	pebs_update_state(needed_cb, cpuc, event, false);
 }
@@ -1120,7 +1160,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (cpuc->n_pebs == cpuc->n_large_pebs)
+	if (cpuc->n_pebs == cpuc->n_large_pebs &&
+	    cpuc->n_pebs != cpuc->n_pebs_via_pt)
 		intel_pmu_drain_pebs_buffer();
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -1131,6 +1172,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled &= ~(1ULL << 63);
 
+	intel_pmu_pebs_via_pt_disable(event);
+
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
@@ -2031,6 +2074,12 @@ void __init intel_ds_init(void)
 					  PERF_SAMPLE_REGS_INTR);
 			}
 			pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+
+			if (x86_pmu.intel_cap.pebs_output_pt_available) {
+				pr_cont("PEBS-via-PT, ");
+				x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
+			}
+
 			break;
 
 		default:
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6f814a27416b..ea54634eabf3 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -273,7 +273,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
 	return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
 }
 
-DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
 
 /* If quirk is enabled, ensure sign extension is 63 bits: */
 inline u64 lbr_from_signext_quirk_wr(u64 val)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index d3dc2274ddd4..05e43d0f430b 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -204,9 +204,9 @@ static int __init pt_pmu_hw_init(void)
 
 	/* model-specific quirks */
 	switch (boot_cpu_data.x86_model) {
-	case INTEL_FAM6_BROADWELL_CORE:
-	case INTEL_FAM6_BROADWELL_XEON_D:
-	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL:
+	case INTEL_FAM6_BROADWELL_D:
+	case INTEL_FAM6_BROADWELL_G:
 	case INTEL_FAM6_BROADWELL_X:
 		/* not setting BRANCH_EN will #GP, erratum BDM106 */
 		pt_pmu.branch_en_always_on = true;
@@ -545,33 +545,62 @@ static void pt_config_buffer(void *buf, unsigned int topa_idx,
 	wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
 }
 
-/*
- * Keep ToPA table-related metadata on the same page as the actual table,
- * taking up a few words from the top
- */
-
-#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
-
 /**
- * struct topa - page-sized ToPA table with metadata at the top
- * @table:	actual ToPA table entries, as understood by PT hardware
+ * struct topa - ToPA metadata
  * @list:	linkage to struct pt_buffer's list of tables
- * @phys:	physical address of this page
  * @offset:	offset of the first entry in this table in the buffer
  * @size:	total size of all entries in this table
  * @last:	index of the last initialized entry in this table
+ * @z_count:	how many times the first entry repeats
  */
 struct topa {
-	struct topa_entry	table[TENTS_PER_PAGE];
 	struct list_head	list;
-	u64			phys;
 	u64			offset;
 	size_t			size;
 	int			last;
+	unsigned int		z_count;
 };
 
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE	\
+	((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry))
+
+/**
+ * struct topa_page - page-sized ToPA table with metadata at the top
+ * @table:	actual ToPA table entries, as understood by PT hardware
+ * @topa:	metadata
+ */
+struct topa_page {
+	struct topa_entry	table[TENTS_PER_PAGE];
+	struct topa		topa;
+};
+
+static inline struct topa_page *topa_to_page(struct topa *topa)
+{
+	return container_of(topa, struct topa_page, topa);
+}
+
+static inline struct topa_page *topa_entry_to_page(struct topa_entry *te)
+{
+	return (struct topa_page *)((unsigned long)te & PAGE_MASK);
+}
+
+static inline phys_addr_t topa_pfn(struct topa *topa)
+{
+	return PFN_DOWN(virt_to_phys(topa_to_page(topa)));
+}
+
 /* make -1 stand for the last table entry */
-#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+#define TOPA_ENTRY(t, i)				\
+	((i) == -1					\
+		? &topa_to_page(t)->table[(t)->last]	\
+		: &topa_to_page(t)->table[(i)])
+#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
+#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
 
 /**
  * topa_alloc() - allocate page-sized ToPA table
@@ -583,27 +612,26 @@ struct topa {
 static struct topa *topa_alloc(int cpu, gfp_t gfp)
 {
 	int node = cpu_to_node(cpu);
-	struct topa *topa;
+	struct topa_page *tp;
 	struct page *p;
 
 	p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
 	if (!p)
 		return NULL;
 
-	topa = page_address(p);
-	topa->last = 0;
-	topa->phys = page_to_phys(p);
+	tp = page_address(p);
+	tp->topa.last = 0;
 
 	/*
 	 * In case of singe-entry ToPA, always put the self-referencing END
 	 * link as the 2nd entry in the table
 	 */
 	if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
-		TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
-		TOPA_ENTRY(topa, 1)->end = 1;
+		TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT;
+		TOPA_ENTRY(&tp->topa, 1)->end = 1;
 	}
 
-	return topa;
+	return &tp->topa;
 }
 
 /**
@@ -643,7 +671,7 @@ static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
 
 	BUG_ON(last->last != TENTS_PER_PAGE - 1);
 
-	TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+	TOPA_ENTRY(last, -1)->base = topa_pfn(topa);
 	TOPA_ENTRY(last, -1)->end = 1;
 }
 
@@ -670,7 +698,7 @@ static bool topa_table_full(struct topa *topa)
  *
  * Return:	0 on success or error code.
  */
-static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp)
 {
 	struct topa *topa = buf->last;
 	int order = 0;
@@ -681,13 +709,18 @@ static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
 		order = page_private(p);
 
 	if (topa_table_full(topa)) {
-		topa = topa_alloc(buf->cpu, gfp);
+		topa = topa_alloc(cpu, gfp);
 		if (!topa)
 			return -ENOMEM;
 
 		topa_insert_table(buf, topa);
 	}
 
+	if (topa->z_count == topa->last - 1) {
+		if (order == TOPA_ENTRY(topa, topa->last - 1)->size)
+			topa->z_count++;
+	}
+
 	TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
 	TOPA_ENTRY(topa, -1)->size = order;
 	if (!buf->snapshot &&
@@ -713,23 +746,26 @@ static void pt_topa_dump(struct pt_buffer *buf)
 	struct topa *topa;
 
 	list_for_each_entry(topa, &buf->tables, list) {
+		struct topa_page *tp = topa_to_page(topa);
 		int i;
 
-		pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
-			 topa->phys, topa->offset, topa->size);
+		pr_debug("# table @%p, off %llx size %zx\n", tp->table,
+			 topa->offset, topa->size);
 		for (i = 0; i < TENTS_PER_PAGE; i++) {
 			pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
-				 &topa->table[i],
-				 (unsigned long)topa->table[i].base << TOPA_SHIFT,
-				 sizes(topa->table[i].size),
-				 topa->table[i].end ?  'E' : ' ',
-				 topa->table[i].intr ? 'I' : ' ',
-				 topa->table[i].stop ? 'S' : ' ',
-				 *(u64 *)&topa->table[i]);
+				 &tp->table[i],
+				 (unsigned long)tp->table[i].base << TOPA_SHIFT,
+				 sizes(tp->table[i].size),
+				 tp->table[i].end ?  'E' : ' ',
+				 tp->table[i].intr ? 'I' : ' ',
+				 tp->table[i].stop ? 'S' : ' ',
+				 *(u64 *)&tp->table[i]);
 			if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
-			     topa->table[i].stop) ||
-			    topa->table[i].end)
+			     tp->table[i].stop) ||
+			    tp->table[i].end)
 				break;
+			if (!i && topa->z_count)
+				i += topa->z_count;
 		}
 	}
 }
@@ -771,7 +807,7 @@ static void pt_update_head(struct pt *pt)
 
 	/* offset of the current output region within this table */
 	for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
-		base += sizes(buf->cur->table[topa_idx].size);
+		base += TOPA_ENTRY_SIZE(buf->cur, topa_idx);
 
 	if (buf->snapshot) {
 		local_set(&buf->data_size, base);
@@ -791,7 +827,7 @@ static void pt_update_head(struct pt *pt)
  */
 static void *pt_buffer_region(struct pt_buffer *buf)
 {
-	return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+	return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
 }
 
 /**
@@ -800,7 +836,7 @@ static void *pt_buffer_region(struct pt_buffer *buf)
  */
 static size_t pt_buffer_region_size(struct pt_buffer *buf)
 {
-	return sizes(buf->cur->table[buf->cur_idx].size);
+	return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx);
 }
 
 /**
@@ -830,7 +866,7 @@ static void pt_handle_status(struct pt *pt)
 		 * know.
 		 */
 		if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
-		    buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+		    buf->output_off == pt_buffer_region_size(buf)) {
 			perf_aux_output_flag(&pt->handle,
 			                     PERF_AUX_FLAG_TRUNCATED);
 			advance++;
@@ -868,9 +904,11 @@ static void pt_handle_status(struct pt *pt)
 static void pt_read_offset(struct pt_buffer *buf)
 {
 	u64 offset, base_topa;
+	struct topa_page *tp;
 
 	rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
-	buf->cur = phys_to_virt(base_topa);
+	tp = phys_to_virt(base_topa);
+	buf->cur = &tp->topa;
 
 	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
 	/* offset within current output region */
@@ -879,29 +917,97 @@ static void pt_read_offset(struct pt_buffer *buf)
 	buf->cur_idx = (offset & 0xffffff80) >> 7;
 }
 
-/**
- * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
- * @buf:	PT buffer.
- * @pg:		Page offset in the buffer.
- *
- * When advancing to the next output region (ToPA entry), given a page offset
- * into the buffer, we need to find the offset of the first page in the next
- * region.
- */
-static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+static struct topa_entry *
+pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
 {
-	struct topa_entry *te = buf->topa_index[pg];
+	struct topa_page *tp;
+	struct topa *topa;
+	unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0;
 
-	/* one region */
-	if (buf->first == buf->last && buf->first->last == 1)
-		return pg;
+	/*
+	 * Indicates a bug in the caller.
+	 */
+	if (WARN_ON_ONCE(pg >= buf->nr_pages))
+		return NULL;
+
+	/*
+	 * First, find the ToPA table where @pg fits. With high
+	 * order allocations, there shouldn't be many of these.
+	 */
+	list_for_each_entry(topa, &buf->tables, list) {
+		if (topa->offset + topa->size > pg << PAGE_SHIFT)
+			goto found;
+	}
+
+	/*
+	 * Hitting this means we have a problem in the ToPA
+	 * allocation code.
+	 */
+	WARN_ON_ONCE(1);
 
-	do {
-		pg++;
-		pg &= buf->nr_pages - 1;
-	} while (buf->topa_index[pg] == te);
+	return NULL;
 
-	return pg;
+found:
+	/*
+	 * Indicates a problem in the ToPA allocation code.
+	 */
+	if (WARN_ON_ONCE(topa->last == -1))
+		return NULL;
+
+	tp = topa_to_page(topa);
+	cur_pg = PFN_DOWN(topa->offset);
+	if (topa->z_count) {
+		z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1);
+		start_idx = topa->z_count + 1;
+	}
+
+	/*
+	 * Multiple entries at the beginning of the table have the same size,
+	 * ideally all of them; if @pg falls there, the search is done.
+	 */
+	if (pg >= cur_pg && pg < cur_pg + z_pg) {
+		idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0);
+		return &tp->table[idx];
+	}
+
+	/*
+	 * Otherwise, slow path: iterate through the remaining entries.
+	 */
+	for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) {
+		if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg)
+			return &tp->table[idx];
+
+		cur_pg += TOPA_ENTRY_PAGES(topa, idx);
+	}
+
+	/*
+	 * Means we couldn't find a ToPA entry in the table that does match.
+	 */
+	WARN_ON_ONCE(1);
+
+	return NULL;
+}
+
+static struct topa_entry *
+pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te)
+{
+	unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1);
+	struct topa_page *tp;
+	struct topa *topa;
+
+	tp = (struct topa_page *)table;
+	if (tp->table != te)
+		return --te;
+
+	topa = &tp->topa;
+	if (topa == buf->first)
+		topa = buf->last;
+	else
+		topa = list_prev_entry(topa, list);
+
+	tp = topa_to_page(topa);
+
+	return &tp->table[topa->last - 1];
 }
 
 /**
@@ -925,8 +1031,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 	unsigned long idx, npages, wakeup;
 
 	/* can't stop in the middle of an output region */
-	if (buf->output_off + handle->size + 1 <
-	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+	if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
 		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
 		return -EINVAL;
 	}
@@ -937,9 +1042,13 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 		return 0;
 
 	/* clear STOP and INT from current entry */
-	buf->topa_index[buf->stop_pos]->stop = 0;
-	buf->topa_index[buf->stop_pos]->intr = 0;
-	buf->topa_index[buf->intr_pos]->intr = 0;
+	if (buf->stop_te) {
+		buf->stop_te->stop = 0;
+		buf->stop_te->intr = 0;
+	}
+
+	if (buf->intr_te)
+		buf->intr_te->intr = 0;
 
 	/* how many pages till the STOP marker */
 	npages = handle->size >> PAGE_SHIFT;
@@ -950,7 +1059,12 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 
 	idx = (head >> PAGE_SHIFT) + npages;
 	idx &= buf->nr_pages - 1;
-	buf->stop_pos = idx;
+
+	if (idx != buf->stop_pos) {
+		buf->stop_pos = idx;
+		buf->stop_te = pt_topa_entry_for_page(buf, idx);
+		buf->stop_te = pt_topa_prev_entry(buf, buf->stop_te);
+	}
 
 	wakeup = handle->wakeup >> PAGE_SHIFT;
 
@@ -960,51 +1074,20 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 		idx = wakeup;
 
 	idx &= buf->nr_pages - 1;
-	buf->intr_pos = idx;
+	if (idx != buf->intr_pos) {
+		buf->intr_pos = idx;
+		buf->intr_te = pt_topa_entry_for_page(buf, idx);
+		buf->intr_te = pt_topa_prev_entry(buf, buf->intr_te);
+	}
 
-	buf->topa_index[buf->stop_pos]->stop = 1;
-	buf->topa_index[buf->stop_pos]->intr = 1;
-	buf->topa_index[buf->intr_pos]->intr = 1;
+	buf->stop_te->stop = 1;
+	buf->stop_te->intr = 1;
+	buf->intr_te->intr = 1;
 
 	return 0;
 }
 
 /**
- * pt_buffer_setup_topa_index() - build topa_index[] table of regions
- * @buf:	PT buffer.
- *
- * topa_index[] references output regions indexed by offset into the
- * buffer for purposes of quick reverse lookup.
- */
-static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
-{
-	struct topa *cur = buf->first, *prev = buf->last;
-	struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
-		*te_prev = TOPA_ENTRY(prev, prev->last - 1);
-	int pg = 0, idx = 0;
-
-	while (pg < buf->nr_pages) {
-		int tidx;
-
-		/* pages within one topa entry */
-		for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
-			buf->topa_index[pg] = te_prev;
-
-		te_prev = te_cur;
-
-		if (idx == cur->last - 1) {
-			/* advance to next topa table */
-			idx = 0;
-			cur = list_entry(cur->list.next, struct topa, list);
-		} else {
-			idx++;
-		}
-		te_cur = TOPA_ENTRY(cur, idx);
-	}
-
-}
-
-/**
  * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
  * @buf:	PT buffer.
  * @head:	Write pointer (aux_head) from AUX buffer.
@@ -1021,18 +1104,20 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
  */
 static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
 {
+	struct topa_page *cur_tp;
+	struct topa_entry *te;
 	int pg;
 
 	if (buf->snapshot)
 		head &= (buf->nr_pages << PAGE_SHIFT) - 1;
 
 	pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
-	pg = pt_topa_next_entry(buf, pg);
+	te = pt_topa_entry_for_page(buf, pg);
 
-	buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
-	buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
-			(unsigned long)buf->cur) / sizeof(struct topa_entry);
-	buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+	cur_tp = topa_entry_to_page(te);
+	buf->cur = &cur_tp->topa;
+	buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
+	buf->output_off = head & (pt_buffer_region_size(buf) - 1);
 
 	local64_set(&buf->head, head);
 	local_set(&buf->data_size, 0);
@@ -1061,31 +1146,29 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
  * @size:	Total size of all regions within this ToPA.
  * @gfp:	Allocation flags.
  */
-static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
-			       gfp_t gfp)
+static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
+			       unsigned long nr_pages, gfp_t gfp)
 {
 	struct topa *topa;
 	int err;
 
-	topa = topa_alloc(buf->cpu, gfp);
+	topa = topa_alloc(cpu, gfp);
 	if (!topa)
 		return -ENOMEM;
 
 	topa_insert_table(buf, topa);
 
 	while (buf->nr_pages < nr_pages) {
-		err = topa_insert_pages(buf, gfp);
+		err = topa_insert_pages(buf, cpu, gfp);
 		if (err) {
 			pt_buffer_fini_topa(buf);
 			return -ENOMEM;
 		}
 	}
 
-	pt_buffer_setup_topa_index(buf);
-
 	/* link last table to the first one, unless we're double buffering */
 	if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
-		TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+		TOPA_ENTRY(buf->last, -1)->base = topa_pfn(buf->first);
 		TOPA_ENTRY(buf->last, -1)->end = 1;
 	}
 
@@ -1119,18 +1202,18 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
 		cpu = raw_smp_processor_id();
 	node = cpu_to_node(cpu);
 
-	buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
-			   GFP_KERNEL, node);
+	buf = kzalloc_node(sizeof(struct pt_buffer), GFP_KERNEL, node);
 	if (!buf)
 		return NULL;
 
-	buf->cpu = cpu;
 	buf->snapshot = snapshot;
 	buf->data_pages = pages;
+	buf->stop_pos = -1;
+	buf->intr_pos = -1;
 
 	INIT_LIST_HEAD(&buf->tables);
 
-	ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+	ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
 	if (ret) {
 		kfree(buf);
 		return NULL;
@@ -1296,7 +1379,7 @@ void intel_pt_interrupt(void)
 			return;
 		}
 
-		pt_config_buffer(buf->cur->table, buf->cur_idx,
+		pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
 				 buf->output_off);
 		pt_config(event);
 	}
@@ -1361,7 +1444,7 @@ static void pt_event_start(struct perf_event *event, int mode)
 	WRITE_ONCE(pt->handle_nmi, 1);
 	hwc->state = 0;
 
-	pt_config_buffer(buf->cur->table, buf->cur_idx,
+	pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
 			 buf->output_off);
 	pt_config(event);
 
@@ -1481,6 +1564,11 @@ void cpu_emergency_stop_pt(void)
 		pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
 }
 
+int is_intel_pt_event(struct perf_event *event)
+{
+	return event->pmu == &pt_pmu.pmu;
+}
+
 static __init int pt_init(void)
 {
 	int ret, cpu, prior_warn = 0;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 63fe4063fbd6..1d2bb7572374 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -53,7 +53,6 @@ struct pt_pmu {
 /**
  * struct pt_buffer - buffer configuration; one buffer per task_struct or
  *		cpu, depending on perf event configuration
- * @cpu:	cpu for per-cpu allocation
  * @tables:	list of ToPA tables in this buffer
  * @first:	shorthand for first topa table
  * @last:	shorthand for last topa table
@@ -65,13 +64,14 @@ struct pt_pmu {
  * @lost:	if data was lost/truncated
  * @head:	logical write offset inside the buffer
  * @snapshot:	if this is for a snapshot/overwrite counter
- * @stop_pos:	STOP topa entry in the buffer
- * @intr_pos:	INT topa entry in the buffer
+ * @stop_pos:	STOP topa entry index
+ * @intr_pos:	INT topa entry index
+ * @stop_te:	STOP topa entry pointer
+ * @intr_te:	INT topa entry pointer
  * @data_pages:	array of pages from perf
  * @topa_index:	table of topa entries indexed by page offset
  */
 struct pt_buffer {
-	int			cpu;
 	struct list_head	tables;
 	struct topa		*first, *last, *cur;
 	unsigned int		cur_idx;
@@ -80,9 +80,9 @@ struct pt_buffer {
 	local_t			data_size;
 	local64_t		head;
 	bool			snapshot;
-	unsigned long		stop_pos, intr_pos;
+	long			stop_pos, intr_pos;
+	struct topa_entry	*stop_te, *intr_te;
 	void			**data_pages;
-	struct topa_entry	*topa_index[0];
 };
 
 #define PT_FILTERS_NUM	4
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 64ab51ffdf06..5053a403e4ae 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -634,7 +634,7 @@ static void cleanup_rapl_pmus(void)
 	kfree(rapl_pmus);
 }
 
-const struct attribute_group *rapl_attr_update[] = {
+static const struct attribute_group *rapl_attr_update[] = {
 	&rapl_events_cores_group,
 	&rapl_events_pkg_group,
 	&rapl_events_ram_group,
@@ -720,27 +720,27 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,		model_snbep),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,		model_snb),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,		model_snbep),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,		model_hsw),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL,		model_hsw),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,		model_hsx),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,		model_hsw),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,		model_hsw),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,		model_hsw),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,		model_hsw),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_L,		model_hsw),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_G,		model_hsw),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL,		model_hsw),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_G,		model_hsw),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,		model_hsx),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D,	model_hsx),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_D,		model_hsx),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,		model_knl),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM,		model_knl),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,		model_skl),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,	model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L,		model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE,		model_skl),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,		model_hsx),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE,	model_skl),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP,	model_skl),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE,	model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L,		model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE,		model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_L,		model_skl),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT,		model_hsw),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X,	model_hsw),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_D,	model_hsw),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS,	model_hsw),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE,		model_skl),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP,	model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L,		model_skl),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE,		model_skl),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 3694a5d0703d..86467f85c383 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -502,10 +502,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags)
 	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
 	uncore_enable_event(box, event);
 
-	if (box->n_active == 1) {
-		uncore_enable_box(box);
+	if (box->n_active == 1)
 		uncore_pmu_start_hrtimer(box);
-	}
 }
 
 void uncore_pmu_event_stop(struct perf_event *event, int flags)
@@ -529,10 +527,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags)
 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 		hwc->state |= PERF_HES_STOPPED;
 
-		if (box->n_active == 0) {
-			uncore_disable_box(box);
+		if (box->n_active == 0)
 			uncore_pmu_cancel_hrtimer(box);
-		}
 	}
 
 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
@@ -778,6 +774,40 @@ static int uncore_pmu_event_init(struct perf_event *event)
 	return ret;
 }
 
+static void uncore_pmu_enable(struct pmu *pmu)
+{
+	struct intel_uncore_pmu *uncore_pmu;
+	struct intel_uncore_box *box;
+
+	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+	if (!uncore_pmu)
+		return;
+
+	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+	if (!box)
+		return;
+
+	if (uncore_pmu->type->ops->enable_box)
+		uncore_pmu->type->ops->enable_box(box);
+}
+
+static void uncore_pmu_disable(struct pmu *pmu)
+{
+	struct intel_uncore_pmu *uncore_pmu;
+	struct intel_uncore_box *box;
+
+	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+	if (!uncore_pmu)
+		return;
+
+	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+	if (!box)
+		return;
+
+	if (uncore_pmu->type->ops->disable_box)
+		uncore_pmu->type->ops->disable_box(box);
+}
+
 static ssize_t uncore_get_attr_cpumask(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -803,6 +833,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 		pmu->pmu = (struct pmu) {
 			.attr_groups	= pmu->type->attr_groups,
 			.task_ctx_nr	= perf_invalid_context,
+			.pmu_enable	= uncore_pmu_enable,
+			.pmu_disable	= uncore_pmu_disable,
 			.event_init	= uncore_pmu_event_init,
 			.add		= uncore_pmu_event_add,
 			.del		= uncore_pmu_event_del,
@@ -1451,29 +1483,29 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP,	  nhm_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,	  snb_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,	  ivb_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,	  hsw_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,	  hsw_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,	  hsw_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_L,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_G,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL,	  bdw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_G,	  bdw_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,  snbep_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX,	  nhmex_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX,	  nhmex_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,	  ivbep_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X,	  hswep_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  bdx_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_D,	  bdx_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM,	  knl_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE,	  skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L,	  skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,      skx_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init),
-	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L,	  skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE,	  skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L,	  icl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI,	  icl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE,	  icl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index f36f7bebbc1b..bbfdaa720b45 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -441,18 +441,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box,
 	return -EINVAL;
 }
 
-static inline void uncore_disable_box(struct intel_uncore_box *box)
-{
-	if (box->pmu->type->ops->disable_box)
-		box->pmu->type->ops->disable_box(box);
-}
-
-static inline void uncore_enable_box(struct intel_uncore_box *box)
-{
-	if (box->pmu->type->ops->enable_box)
-		box->pmu->type->ops->enable_box(box);
-}
-
 static inline void uncore_disable_event(struct intel_uncore_box *box,
 				struct perf_event *event)
 {
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 9431447541e9..6f86650b3f77 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -59,22 +59,22 @@ static bool test_intel(int idx, void *data)
 	case INTEL_FAM6_IVYBRIDGE:
 	case INTEL_FAM6_IVYBRIDGE_X:
 
-	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL:
 	case INTEL_FAM6_HASWELL_X:
-	case INTEL_FAM6_HASWELL_ULT:
-	case INTEL_FAM6_HASWELL_GT3E:
+	case INTEL_FAM6_HASWELL_L:
+	case INTEL_FAM6_HASWELL_G:
 
-	case INTEL_FAM6_BROADWELL_CORE:
-	case INTEL_FAM6_BROADWELL_XEON_D:
-	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL:
+	case INTEL_FAM6_BROADWELL_D:
+	case INTEL_FAM6_BROADWELL_G:
 	case INTEL_FAM6_BROADWELL_X:
 
 	case INTEL_FAM6_ATOM_SILVERMONT:
-	case INTEL_FAM6_ATOM_SILVERMONT_X:
+	case INTEL_FAM6_ATOM_SILVERMONT_D:
 	case INTEL_FAM6_ATOM_AIRMONT:
 
 	case INTEL_FAM6_ATOM_GOLDMONT:
-	case INTEL_FAM6_ATOM_GOLDMONT_X:
+	case INTEL_FAM6_ATOM_GOLDMONT_D:
 
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 
@@ -84,12 +84,19 @@ static bool test_intel(int idx, void *data)
 			return true;
 		break;
 
-	case INTEL_FAM6_SKYLAKE_MOBILE:
-	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_L:
+	case INTEL_FAM6_SKYLAKE:
 	case INTEL_FAM6_SKYLAKE_X:
-	case INTEL_FAM6_KABYLAKE_MOBILE:
-	case INTEL_FAM6_KABYLAKE_DESKTOP:
-	case INTEL_FAM6_ICELAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_L:
+	case INTEL_FAM6_KABYLAKE:
+	case INTEL_FAM6_COMETLAKE_L:
+	case INTEL_FAM6_COMETLAKE:
+	case INTEL_FAM6_ICELAKE_L:
+	case INTEL_FAM6_ICELAKE:
+	case INTEL_FAM6_ICELAKE_X:
+	case INTEL_FAM6_ICELAKE_D:
+	case INTEL_FAM6_TIGERLAKE_L:
+	case INTEL_FAM6_TIGERLAKE:
 		if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 			return true;
 		break;
@@ -167,7 +174,7 @@ static const struct attribute_group *attr_groups[] = {
 	NULL,
 };
 
-const struct attribute_group *attr_update[] = {
+static const struct attribute_group *attr_update[] = {
 	&group_aperf,
 	&group_mperf,
 	&group_pperf,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8751008fc170..ecacfbf4ebc1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -76,6 +76,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
 #define PERF_X86_EVENT_EXCL_ACCT	0x0100 /* accounted EXCL event */
 #define PERF_X86_EVENT_AUTO_RELOAD	0x0200 /* use PEBS auto-reload */
 #define PERF_X86_EVENT_LARGE_PEBS	0x0400 /* use large PEBS */
+#define PERF_X86_EVENT_PEBS_VIA_PT	0x0800 /* use PT buffer for PEBS */
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
@@ -85,6 +86,11 @@ struct amd_nb {
 };
 
 #define PEBS_COUNTER_MASK	((1ULL << MAX_PEBS_EVENTS) - 1)
+#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60)
+#define PEBS_OUTPUT_OFFSET	61
+#define PEBS_OUTPUT_MASK	(3ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_OUTPUT_PT		(1ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_VIA_PT_MASK	(PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD)
 
 /*
  * Flags PEBS can handle without an PMI.
@@ -211,6 +217,8 @@ struct cpu_hw_events {
 	u64			pebs_enabled;
 	int			n_pebs;
 	int			n_large_pebs;
+	int			n_pebs_via_pt;
+	int			pebs_output;
 
 	/* Current super set of events hardware configuration */
 	u64			pebs_data_cfg;
@@ -510,6 +518,8 @@ union perf_capabilities {
 		 */
 		u64	full_width_write:1;
 		u64     pebs_baseline:1;
+		u64	pebs_metrics_available:1;
+		u64	pebs_output_pt_available:1;
 	};
 	u64	capabilities;
 };
@@ -692,6 +702,8 @@ struct x86_pmu {
 	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
 	 */
 	int (*check_period) (struct perf_event *event, u64 period);
+
+	int (*aux_output_match) (struct perf_event *event);
 };
 
 struct x86_perf_task_context {
@@ -901,6 +913,11 @@ static inline int amd_pmu_init(void)
 
 #endif /* CONFIG_CPU_SUP_AMD */
 
+static inline int is_pebs_pt(struct perf_event *event)
+{
+	return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT);
+}
+
 #ifdef CONFIG_CPU_SUP_INTEL
 
 static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period)
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5c056b8aebef..e01078e93dd3 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -260,11 +260,21 @@ void __init hv_apic_init(void)
 	}
 
 	if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
-		pr_info("Hyper-V: Using MSR based APIC access\n");
+		pr_info("Hyper-V: Using enlightened APIC (%s mode)",
+			x2apic_enabled() ? "x2apic" : "xapic");
+		/*
+		 * With x2apic, architectural x2apic MSRs are equivalent to the
+		 * respective synthetic MSRs, so there's no need to override
+		 * the apic accessors.  The only exception is
+		 * hv_apic_eoi_write, because it benefits from lazy EOI when
+		 * available, but it works for both xapic and x2apic modes.
+		 */
 		apic_set_eoi_write(hv_apic_eoi_write);
-		apic->read      = hv_apic_read;
-		apic->write     = hv_apic_write;
-		apic->icr_write = hv_apic_icr_write;
-		apic->icr_read  = hv_apic_icr_read;
+		if (!x2apic_enabled()) {
+			apic->read      = hv_apic_read;
+			apic->write     = hv_apic_write;
+			apic->icr_write = hv_apic_icr_write;
+			apic->icr_read  = hv_apic_icr_read;
+		}
 	}
 }
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0d258688c8cf..2db3972c0e0f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
 u32 hv_max_vp_index;
 EXPORT_SYMBOL_GPL(hv_max_vp_index);
 
+void *hv_alloc_hyperv_page(void)
+{
+	BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
+
+	return (void *)__get_free_page(GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
+
+void hv_free_hyperv_page(unsigned long addr)
+{
+	free_page(addr);
+}
+EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
+
 static int hv_cpu_init(unsigned int cpu)
 {
 	u64 msr_vp_index;
@@ -301,8 +315,6 @@ void __init hyperv_init(void)
 
 	x86_init.pci.arch_init = hv_pci_init;
 
-	/* Register Hyper-V specific clocksource */
-	hv_init_clocksource();
 	return;
 
 remove_cpuhp_state:
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index e65d7fe6489f..5208ba49c89a 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
 		 * Lower 12 bits encode the number of additional
 		 * pages to flush (in addition to the 'cur' page).
 		 */
-		if (diff >= HV_TLB_FLUSH_UNIT)
+		if (diff >= HV_TLB_FLUSH_UNIT) {
 			gva_list[gva_n] |= ~PAGE_MASK;
-		else if (diff)
+			cur += HV_TLB_FLUSH_UNIT;
+		}  else if (diff) {
 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+			cur = end;
+		}
 
-		cur += HV_TLB_FLUSH_UNIT;
 		gva_n++;
 
 	} while (cur < end);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index aac686e1e005..bc9693c9107e 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -117,6 +117,12 @@ static inline bool acpi_has_cpu_in_madt(void)
 	return !!acpi_lapic;
 }
 
+#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+	x86_init.acpi.set_root_pointer(addr);
+}
+
 #define ACPI_HAVE_ARCH_GET_ROOT_POINTER
 static inline u64 acpi_arch_get_root_pointer(void)
 {
@@ -125,6 +131,7 @@ static inline u64 acpi_arch_get_root_pointer(void)
 
 void acpi_generic_reduced_hw_init(void);
 
+void x86_default_set_root_pointer(u64 addr);
 u64 x86_default_get_root_pointer(void);
 
 #else /* !CONFIG_ACPI */
@@ -138,6 +145,8 @@ static inline void disable_acpi(void) { }
 
 static inline void acpi_generic_reduced_hw_init(void) { }
 
+static inline void x86_default_set_root_pointer(u64 addr) { }
+
 static inline u64 x86_default_get_root_pointer(void)
 {
 	return 0;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 094fbc9c0b1c..13adca37c99a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -201,10 +201,10 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, newinstr, feature)			\
-	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+	asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
-	asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
 
 /*
  * Alternative inline assembly with input.
@@ -218,7 +218,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
+	asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature)	\
 		: : "i" (0), ## input)
 
 /*
@@ -231,18 +231,18 @@ static inline int alternatives_text_reserved(void *start, void *end)
  */
 #define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2,	     \
 			   feature2, input...)				     \
-	asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1,	     \
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1,     \
 		newinstr2, feature2)					     \
 		: : "i" (0), ## input)
 
 /* Like alternative_input, but with a single output argument */
 #define alternative_io(oldinstr, newinstr, feature, output, input...)	\
-	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
+	asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature)	\
 		: output : "i" (0), ## input)
 
 /* Like alternative_io, but for replacing a direct call with another one. */
 #define alternative_call(oldfunc, newfunc, feature, output, input...)	\
-	asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
+	asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
 		: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
 
 /*
@@ -253,7 +253,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
  */
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
 			   output, input...)				      \
-	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
+	asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
 		"call %P[new2]", feature2)				      \
 		: output, ASM_CALL_CONSTRAINT				      \
 		: [old] "i" (oldfunc), [new1] "i" (newfunc1),		      \
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e647aa095867..2ebc17d9c72c 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
+extern void apic_soft_disable(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
@@ -176,6 +177,8 @@ extern void lapic_online(void);
 extern void lapic_offline(void);
 extern bool apic_needs_pit(void);
 
+extern void apic_send_IPI_allbutself(unsigned int vector);
+
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok		1
@@ -465,12 +468,6 @@ static inline unsigned default_get_apic_id(unsigned long x)
 #define TRAMPOLINE_PHYS_LOW		0x467
 #define TRAMPOLINE_PHYS_HIGH		0x469
 
-#ifdef CONFIG_X86_64
-extern void apic_send_IPI_self(int vector);
-
-DECLARE_PER_CPU(int, x2apic_extra_bits);
-#endif
-
 extern void generic_bigsmp_probe(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -506,8 +503,10 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #ifdef CONFIG_SMP
 bool apic_id_is_primary_thread(unsigned int id);
+void apic_smt_update(void);
 #else
 static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+static inline void apic_smt_update(void) { }
 #endif
 
 extern void irq_enter(void);
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
deleted file mode 100644
index d3a2b3876ce6..000000000000
--- a/arch/x86/include/asm/apic_flat_64.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_APIC_FLAT_64_H
-#define _ASM_X86_APIC_FLAT_64_H
-
-extern void flat_init_apic_ldr(void);
-
-#endif
-
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 84f848c2541a..7f828fe49797 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -49,8 +49,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define array_index_mask_nospec array_index_mask_nospec
 
 /* Prevent speculative execution past this barrier. */
-#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
-					   "lfence", X86_FEATURE_LFENCE_RDTSC)
+#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
 
 #define dma_rmb()	barrier()
 #define dma_wmb()	barrier()
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ba15d53c1ca7..7d1f6a49bfae 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -45,14 +45,13 @@
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
-#define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
 #define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 static __always_inline void
 arch_set_bit(long nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr)) {
+	if (__builtin_constant_p(nr)) {
 		asm volatile(LOCK_PREFIX "orb %1,%0"
 			: CONST_MASK_ADDR(nr, addr)
 			: "iq" ((u8)CONST_MASK(nr))
@@ -72,7 +71,7 @@ arch___set_bit(long nr, volatile unsigned long *addr)
 static __always_inline void
 arch_clear_bit(long nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr)) {
+	if (__builtin_constant_p(nr)) {
 		asm volatile(LOCK_PREFIX "andb %1,%0"
 			: CONST_MASK_ADDR(nr, addr)
 			: "iq" ((u8)~CONST_MASK(nr)));
@@ -123,7 +122,7 @@ arch___change_bit(long nr, volatile unsigned long *addr)
 static __always_inline void
 arch_change_bit(long nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr)) {
+	if (__builtin_constant_p(nr)) {
 		asm volatile(LOCK_PREFIX "xorb %1,%0"
 			: CONST_MASK_ADDR(nr, addr)
 			: "iq" ((u8)CONST_MASK(nr)));
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 9e5f3c722c33..981fe923a59f 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -70,6 +70,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
 			BOOT_PARAM_PRESERVE(eddbuf_entries),
 			BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
 			BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+			BOOT_PARAM_PRESERVE(secure_boot),
 			BOOT_PARAM_PRESERVE(hdr),
 			BOOT_PARAM_PRESERVE(e820_table),
 			BOOT_PARAM_PRESERVE(eddbuf),
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 6804d6642767..facba9bc30ca 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -32,7 +32,7 @@
 
 #define _BUG_FLAGS(ins, flags)						\
 do {									\
-	asm volatile("1:\t" ins "\n"					\
+	asm_inline volatile("1:\t" ins "\n"				\
 		     ".pushsection __bug_table,\"aw\"\n"		\
 		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
 		     "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"	\
@@ -49,7 +49,7 @@ do {									\
 
 #define _BUG_FLAGS(ins, flags)						\
 do {									\
-	asm volatile("1:\t" ins "\n"					\
+	asm_inline volatile("1:\t" ins "\n"				\
 		     ".pushsection __bug_table,\"aw\"\n"		\
 		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
 		     "\t.word %c0"        "\t# bug_entry::flags\n"	\
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 542509b53e0f..794eb2129bc6 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -18,4 +18,6 @@ int ppro_with_ram_bug(void);
 static inline int ppro_with_ram_bug(void) { return 0; }
 #endif
 
+extern void cpu_bugs_smt_update(void);
+
 #endif /* _ASM_X86_BUGS_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index cff3f3f3bfe0..8348f7d69fd5 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #ifndef _ASM_X86_CPU_ENTRY_AREA_H
 #define _ASM_X86_CPU_ENTRY_AREA_H
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 58acda503817..59bf91c57aa8 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -61,6 +61,13 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 #define CHECK_BIT_IN_MASK_WORD(maskname, word, bit)	\
 	(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
 
+/*
+ * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the
+ * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all
+ * header macros which use NCAPINTS need to be changed. The duplicated macro
+ * use causes the compiler to issue errors for all headers so that all usage
+ * sites can be corrected.
+ */
 #define REQUIRED_MASK_BIT_SET(feature_bit)		\
 	 ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  0, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  1, feature_bit) ||	\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index e880f2408e29..c4fbe379cc0b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,6 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC	( 3*32+17) /* "" MFENCE synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -232,6 +231,8 @@
 #define X86_FEATURE_VMMCALL		( 8*32+15) /* Prefer VMMCALL to VMCALL */
 #define X86_FEATURE_XENPV		( 8*32+16) /* "" Xen paravirtual guest */
 #define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL		( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -355,6 +356,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
@@ -397,5 +399,7 @@
 #define X86_BUG_MDS			X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
 #define X86_BUG_MSBDS_ONLY		X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
 #define X86_BUG_SWAPGS			X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA			X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..c8b39c6716ff
--- /dev/null
+++ b/arch/x86/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_HALTPOLL_H
+#define _ARCH_HALTPOLL_H
+
+void arch_haltpoll_enable(unsigned int cpu);
+void arch_haltpoll_disable(unsigned int cpu);
+
+#endif
diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h
deleted file mode 100644
index c508521dd190..000000000000
--- a/arch/x86/include/asm/crypto/aes.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_AES_H
-#define ASM_X86_AES_H
-
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
-			    const u8 *src);
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
-			    const u8 *src);
-#endif
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index d1818634ae7e..8d4a8e1226ee 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -114,7 +114,7 @@ extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
 extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			       struct skcipher_request *req,
 			       common_glue_func_t tweak_fn, void *tweak_ctx,
-			       void *crypt_ctx);
+			       void *crypt_ctx, bool decrypt);
 
 extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
 				      le128 *iv, common_glue_func_t fn);
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 20a46150e0a8..9b8cb50768c2 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -73,6 +73,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
 
 #else
 # include <asm-generic/div64.h>
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+	u64 q;
+
+	asm ("mulq %2; divq %3" : "=a" (q)
+				: "a" (a), "rm" ((u64)mul), "rm" ((u64)div)
+				: "rdx");
+
+	return q;
+}
+#define mul_u64_u32_div	mul_u64_u32_div
+
 #endif /* CONFIG_X86_32 */
 
 #endif /* _ASM_X86_DIV64_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 606a4b6a9812..43a82e59c59d 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -242,6 +242,7 @@ static inline bool efi_is_64bit(void)
 		__efi_early()->runtime_services), __VA_ARGS__)
 
 extern bool efi_reboot_required(void);
+extern bool efi_is_table_address(unsigned long phys_addr);
 
 #else
 static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
@@ -249,6 +250,10 @@ static inline bool efi_reboot_required(void)
 {
 	return false;
 }
+static inline  bool efi_is_table_address(unsigned long phys_addr)
+{
+	return false;
+}
 #endif /* CONFIG_EFI */
 
 #endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
deleted file mode 100644
index 47b7a1296245..000000000000
--- a/arch/x86/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-asmlinkage void just_return_func(void);
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index cbd97e22d2f3..4154bc5f6a4e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -153,8 +153,8 @@ extern char irq_entries_start[];
 extern char spurious_entries_start[];
 
 #define VECTOR_UNUSED		NULL
-#define VECTOR_SHUTDOWN		((void *)~0UL)
-#define VECTOR_RETRIGGERED	((void *)~1UL)
+#define VECTOR_SHUTDOWN		((void *)-1L)
+#define VECTOR_RETRIGGERED	((void *)-2L)
 
 typedef struct irq_desc* vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index af78cd72b8f3..7741e211f7f5 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -13,6 +13,16 @@
 #include <asm/page.h>
 
 /*
+ * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
+ * of 4096. These definitions are used when communicating with Hyper-V using
+ * guest physical pages and guest physical page addresses, since the guest page
+ * size may not be 4096 on all architectures.
+ */
+#define HV_HYP_PAGE_SHIFT      12
+#define HV_HYP_PAGE_SIZE       BIT(HV_HYP_PAGE_SHIFT)
+#define HV_HYP_PAGE_MASK       (~(HV_HYP_PAGE_SIZE - 1))
+
+/*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
  * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
  */
@@ -170,7 +180,15 @@
 /* Recommend using enlightened VMCS */
 #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED		BIT(14)
 
+/*
+ * Virtual processor will never share a physical core with another virtual
+ * processor, except for virtual processors that are reported as sibling SMT
+ * threads.
+ */
+#define HV_X64_NO_NONARCH_CORESHARING                  BIT(18)
+
 /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
+#define HV_X64_NESTED_DIRECT_FLUSH			BIT(17)
 #define HV_X64_NESTED_GUEST_MAPPING_FLUSH		BIT(18)
 #define HV_X64_NESTED_MSR_BITMAP			BIT(19)
 
@@ -514,14 +532,24 @@ struct hv_timer_message_payload {
 	__u64 delivery_time;	/* When the message was delivered */
 } __packed;
 
+struct hv_nested_enlightenments_control {
+	struct {
+		__u32 directhypercall:1;
+		__u32 reserved:31;
+	} features;
+	struct {
+		__u32 reserved;
+	} hypercallControls;
+} __packed;
+
 /* Define virtual processor assist page structure. */
 struct hv_vp_assist_page {
 	__u32 apic_assist;
-	__u32 reserved;
-	__u64 vtl_control[2];
-	__u64 nested_enlightenments_control[2];
-	__u32 enlighten_vmentry;
-	__u32 padding;
+	__u32 reserved1;
+	__u64 vtl_control[3];
+	struct hv_nested_enlightenments_control nested_control;
+	__u8 enlighten_vmentry;
+	__u8 reserved2[7];
 	__u64 current_nested_vmcs;
 } __packed;
 
@@ -847,7 +875,7 @@ union hv_gpa_page_range {
  * count is equal with how many entries of union hv_gpa_page_range can
  * be populated into the input parameter page.
  */
-#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) /	\
+#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) /	\
 				sizeof(union hv_gpa_page_range))
 
 struct hv_guest_mapping_flush_list {
@@ -872,4 +900,7 @@ struct hv_tlb_flush_ex {
 	u64 gva_list[];
 } __packed;
 
+struct hv_partition_assist_pg {
+	u32 tlb_lock_count;
+};
 #endif
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index fe7c205233f1..c606c0b70738 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -5,9 +5,6 @@
 /*
  * "Big Core" Processors (Branded as Core, Xeon, etc...)
  *
- * The "_X" parts are generally the EP and EX Xeons, or the
- * "Extreme" ones, like Broadwell-E, or Atom microserver.
- *
  * While adding a new CPUID for a new microarchitecture, add a new
  * group to keep logically sorted out in chronological order. Within
  * that group keep the CPUID for the variants sorted by model number.
@@ -21,9 +18,19 @@
  * MICROARCH	Is the code name for the micro-architecture for this core.
  *		N.B. Not the platform name.
  * OPTDIFF	If needed, a short string to differentiate by market segment.
- *		Exact strings here will vary over time. _DESKTOP, _MOBILE, and
- *		_X (short for Xeon server) should be used when they are
- *		appropriate.
+ *
+ *		Common OPTDIFFs:
+ *
+ *			- regular client parts
+ *		_L	- regular mobile parts
+ *		_G	- parts with extra graphics on
+ *		_X	- regular server parts
+ *		_D	- micro server parts
+ *
+ *		Historical OPTDIFFs:
+ *
+ *		_EP	- 2 socket server parts
+ *		_EX	- 4+ socket server parts
  *
  * The #define line may optionally include a comment including platform names.
  */
@@ -49,30 +56,36 @@
 #define INTEL_FAM6_IVYBRIDGE		0x3A
 #define INTEL_FAM6_IVYBRIDGE_X		0x3E
 
-#define INTEL_FAM6_HASWELL_CORE		0x3C
+#define INTEL_FAM6_HASWELL		0x3C
 #define INTEL_FAM6_HASWELL_X		0x3F
-#define INTEL_FAM6_HASWELL_ULT		0x45
-#define INTEL_FAM6_HASWELL_GT3E		0x46
+#define INTEL_FAM6_HASWELL_L		0x45
+#define INTEL_FAM6_HASWELL_G		0x46
 
-#define INTEL_FAM6_BROADWELL_CORE	0x3D
-#define INTEL_FAM6_BROADWELL_GT3E	0x47
+#define INTEL_FAM6_BROADWELL		0x3D
+#define INTEL_FAM6_BROADWELL_G		0x47
 #define INTEL_FAM6_BROADWELL_X		0x4F
-#define INTEL_FAM6_BROADWELL_XEON_D	0x56
+#define INTEL_FAM6_BROADWELL_D		0x56
 
-#define INTEL_FAM6_SKYLAKE_MOBILE	0x4E
-#define INTEL_FAM6_SKYLAKE_DESKTOP	0x5E
+#define INTEL_FAM6_SKYLAKE_L		0x4E
+#define INTEL_FAM6_SKYLAKE		0x5E
 #define INTEL_FAM6_SKYLAKE_X		0x55
-#define INTEL_FAM6_KABYLAKE_MOBILE	0x8E
-#define INTEL_FAM6_KABYLAKE_DESKTOP	0x9E
+#define INTEL_FAM6_KABYLAKE_L		0x8E
+#define INTEL_FAM6_KABYLAKE		0x9E
 
-#define INTEL_FAM6_CANNONLAKE_MOBILE	0x66
+#define INTEL_FAM6_CANNONLAKE_L		0x66
 
 #define INTEL_FAM6_ICELAKE_X		0x6A
-#define INTEL_FAM6_ICELAKE_XEON_D	0x6C
-#define INTEL_FAM6_ICELAKE_DESKTOP	0x7D
-#define INTEL_FAM6_ICELAKE_MOBILE	0x7E
+#define INTEL_FAM6_ICELAKE_D		0x6C
+#define INTEL_FAM6_ICELAKE		0x7D
+#define INTEL_FAM6_ICELAKE_L		0x7E
 #define INTEL_FAM6_ICELAKE_NNPI		0x9D
 
+#define INTEL_FAM6_TIGERLAKE_L		0x8C
+#define INTEL_FAM6_TIGERLAKE		0x8D
+
+#define INTEL_FAM6_COMETLAKE		0xA5
+#define INTEL_FAM6_COMETLAKE_L		0xA6
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
@@ -83,17 +96,21 @@
 #define INTEL_FAM6_ATOM_SALTWELL_TABLET	0x35 /* Cloverview */
 
 #define INTEL_FAM6_ATOM_SILVERMONT	0x37 /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_X	0x4D /* Avaton, Rangely */
+#define INTEL_FAM6_ATOM_SILVERMONT_D	0x4D /* Avaton, Rangely */
 #define INTEL_FAM6_ATOM_SILVERMONT_MID	0x4A /* Merriefield */
 
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* Cherry Trail, Braswell */
 #define INTEL_FAM6_ATOM_AIRMONT_MID	0x5A /* Moorefield */
+#define INTEL_FAM6_ATOM_AIRMONT_NP	0x75 /* Lightning Mountain */
 
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_X	0x5F /* Denverton */
+#define INTEL_FAM6_ATOM_GOLDMONT_D	0x5F /* Denverton */
+
+/* Note: the micro-architecture is "Goldmont Plus" */
 #define INTEL_FAM6_ATOM_GOLDMONT_PLUS	0x7A /* Gemini Lake */
 
-#define INTEL_FAM6_ATOM_TREMONT_X	0x86 /* Jacobsville */
+#define INTEL_FAM6_ATOM_TREMONT_D	0x86 /* Jacobsville */
+#define INTEL_FAM6_ATOM_TREMONT		0x96 /* Elkhart Lake */
 
 /* Xeon Phi */
 
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 634f99b1dc22..423b788f495e 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -28,10 +28,12 @@ enum pt_capabilities {
 void cpu_emergency_stop_pt(void);
 extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
 extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap);
+extern int is_intel_pt_event(struct perf_event *event);
 #else
 static inline void cpu_emergency_stop_pt(void) {}
 static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; }
 static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; }
+static inline int is_intel_pt_event(struct perf_event *event) { return 0; }
 #endif
 
 #endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index baedab8ac538..b91623d521d9 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -4,7 +4,6 @@
 
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-extern int iommu_pass_through;
 
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
deleted file mode 100644
index f73076be546a..000000000000
--- a/arch/x86/include/asm/ipi.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ASM_X86_IPI_H
-#define _ASM_X86_IPI_H
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Copyright 2004 James Cleverdon, IBM.
- *
- * Generic APIC InterProcessor Interrupt code.
- *
- * Moved to include file by James Cleverdon from
- * arch/x86-64/kernel/smp.c
- *
- * Copyrights from kernel/smp.c:
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- * (c) 2002,2003 Andi Kleen, SuSE Labs.
- */
-
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/smp.h>
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
-					 unsigned int dest)
-{
-	unsigned int icr = shortcut | dest;
-
-	switch (vector) {
-	default:
-		icr |= APIC_DM_FIXED | vector;
-		break;
-	case NMI_VECTOR:
-		icr |= APIC_DM_NMI;
-		break;
-	}
-	return icr;
-}
-
-static inline int __prepare_ICR2(unsigned int mask)
-{
-	return SET_APIC_DEST_FIELD(mask);
-}
-
-static inline void __xapic_wait_icr_idle(void)
-{
-	while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
-		cpu_relax();
-}
-
-void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
-
-/*
- * This is used to send an IPI with no shorthand notation (the destination is
- * specified in bits 56 to 63 of the ICR).
- */
-void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
-
-extern void default_send_IPI_single(int cpu, int vector);
-extern void default_send_IPI_single_phys(int cpu, int vector);
-extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
-						 int vector);
-extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
-							 int vector);
-
-/* Avoid include hell */
-#define NMI_VECTOR 0x02
-
-extern int no_broadcast;
-
-static inline void __default_local_send_IPI_allbutself(int vector)
-{
-	if (no_broadcast || vector == NMI_VECTOR)
-		apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
-	else
-		__default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
-}
-
-static inline void __default_local_send_IPI_all(int vector)
-{
-	if (no_broadcast || vector == NMI_VECTOR)
-		apic->send_IPI_mask(cpu_online_mask, vector);
-	else
-		__default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
-}
-
-#ifdef CONFIG_X86_32
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
-							 int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
-							 int vector);
-extern void default_send_IPI_mask_logical(const struct cpumask *mask,
-						 int vector);
-extern void default_send_IPI_allbutself(int vector);
-extern void default_send_IPI_all(int vector);
-extern void default_send_IPI_self(int vector);
-#endif
-
-#endif
-
-#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 8f95686ec27e..a176f6165d85 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -34,7 +34,7 @@ extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs);
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
+extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
 
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index feab24cac610..77cf6c11f66b 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -229,7 +229,7 @@ struct x86_emulate_ops {
 	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate);
 	void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
-
+	int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
 };
 
 typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -429,6 +429,7 @@ enum x86_intercept {
 	x86_intercept_ins,
 	x86_intercept_out,
 	x86_intercept_outs,
+	x86_intercept_xsetbv,
 
 	nr_x86_intercepts
 };
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74e88e5edd9c..4fc61483919a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -219,13 +219,6 @@ enum {
 				 PFERR_WRITE_MASK |		\
 				 PFERR_PRESENT_MASK)
 
-/*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
- * with the SVE bit in EPT PTEs.
- */
-#define SPTE_SPECIAL_MASK (1ULL << 62)
-
 /* apic attention bits */
 #define KVM_APIC_CHECK_VAPIC	0
 /*
@@ -319,8 +312,12 @@ struct kvm_rmap_head {
 struct kvm_mmu_page {
 	struct list_head link;
 	struct hlist_node hash_link;
+	struct list_head lpage_disallowed_link;
+
 	bool unsync;
+	u8 mmu_valid_gen;
 	bool mmio_cached;
+	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
 
 	/*
 	 * The following two entries are used to key the shadow page in the
@@ -717,7 +714,7 @@ struct kvm_vcpu_arch {
 
 	/* Cache MMIO info */
 	u64 mmio_gva;
-	unsigned access;
+	unsigned mmio_access;
 	gfn_t mmio_gfn;
 	u64 mmio_gen;
 
@@ -843,6 +840,8 @@ struct kvm_hv {
 
 	/* How many vCPUs have VP index != vCPU index */
 	atomic_t num_mismatched_vp_indexes;
+
+	struct hv_partition_assist_pg *hv_pa_pg;
 };
 
 enum kvm_irqchip_mode {
@@ -856,11 +855,14 @@ struct kvm_arch {
 	unsigned long n_requested_mmu_pages;
 	unsigned long n_max_mmu_pages;
 	unsigned int indirect_shadow_pages;
+	u8 mmu_valid_gen;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	/*
 	 * Hash table of struct kvm_mmu_page.
 	 */
 	struct list_head active_mmu_pages;
+	struct list_head zapped_obsolete_pages;
+	struct list_head lpage_disallowed_mmu_pages;
 	struct kvm_page_track_notifier_node mmu_sp_tracker;
 	struct kvm_page_track_notifier_head track_notifier_head;
 
@@ -935,6 +937,7 @@ struct kvm_arch {
 	bool exception_payload_enabled;
 
 	struct kvm_pmu_event_filter *pmu_event_filter;
+	struct task_struct *nx_lpage_recovery_thread;
 };
 
 struct kvm_vm_stat {
@@ -948,6 +951,7 @@ struct kvm_vm_stat {
 	ulong mmu_unsync;
 	ulong remote_tlb_flush;
 	ulong lpages;
+	ulong nx_lpage_splits;
 	ulong max_mmu_page_hash_collisions;
 };
 
@@ -1070,7 +1074,7 @@ struct kvm_x86_ops {
 
 	void (*run)(struct kvm_vcpu *vcpu);
 	int (*handle_exit)(struct kvm_vcpu *vcpu);
-	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
@@ -1191,7 +1195,7 @@ struct kvm_x86_ops {
 	int (*set_nested_state)(struct kvm_vcpu *vcpu,
 				struct kvm_nested_state __user *user_kvm_nested_state,
 				struct kvm_nested_state *kvm_state);
-	void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+	bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
 
 	int (*smi_allowed)(struct kvm_vcpu *vcpu);
 	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
@@ -1209,6 +1213,9 @@ struct kvm_x86_ops {
 	uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
 
 	bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
+
+	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1308,26 +1315,52 @@ extern u64  kvm_default_tsc_scaling_ratio;
 
 extern u64 kvm_mce_cap_supported;
 
-enum emulation_result {
-	EMULATE_DONE,         /* no further processing */
-	EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
-	EMULATE_FAIL,         /* can't emulate this instruction */
-};
-
+/*
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
+ *			userspace I/O) to indicate that the emulation context
+ *			should be resued as is, i.e. skip initialization of
+ *			emulation context, instruction fetch and decode.
+ *
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
+ *		      Indicates that only select instructions (tagged with
+ *		      EmulateOnUD) should be emulated (to minimize the emulator
+ *		      attack surface).  See also EMULTYPE_TRAP_UD_FORCED.
+ *
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
+ *		   decode the instruction length.  For use *only* by
+ *		   kvm_x86_ops->skip_emulated_instruction() implementations.
+ *
+ * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
+ *			  retry native execution under certain conditions.
+ *
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
+ *			     triggered by KVM's magic "force emulation" prefix,
+ *			     which is opt in via module param (off by default).
+ *			     Bypasses EmulateOnUD restriction despite emulating
+ *			     due to an intercepted #UD (see EMULTYPE_TRAP_UD).
+ *			     Used to test the full emulator from userspace.
+ *
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
+ *			backdoor emulation, which is opt in via module param.
+ *			VMware backoor emulation handles select instructions
+ *			and reinjects the #GP for all other cases.
+ */
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
 #define EMULTYPE_ALLOW_RETRY	    (1 << 3)
-#define EMULTYPE_NO_UD_ON_FAIL	    (1 << 4)
-#define EMULTYPE_VMWARE		    (1 << 5)
+#define EMULTYPE_TRAP_UD_FORCED	    (1 << 4)
+#define EMULTYPE_VMWARE_GP	    (1 << 5)
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 					void *insn, int insn_len);
 
 void kvm_enable_efer_bits(u64);
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
 
 struct x86_emulate_ctxt;
 
@@ -1500,7 +1533,7 @@ enum {
 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
 
-asmlinkage void __noreturn kvm_spurious_fault(void);
+asmlinkage void kvm_spurious_fault(void);
 
 /*
  * Hardware virtualization extension instructions may fault if a
@@ -1508,24 +1541,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void);
  * Usually after catching the fault we just panic; during reboot
  * instead the instruction is ignored.
  */
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)		\
+#define __kvm_handle_fault_on_reboot(insn)				\
 	"666: \n\t"							\
 	insn "\n\t"							\
 	"jmp	668f \n\t"						\
 	"667: \n\t"							\
 	"call	kvm_spurious_fault \n\t"				\
 	"668: \n\t"							\
-	".pushsection .fixup, \"ax\" \n\t"				\
-	"700: \n\t"							\
-	cleanup_insn "\n\t"						\
-	"cmpb	$0, kvm_rebooting\n\t"					\
-	"je	667b \n\t"						\
-	"jmp	668b \n\t"						\
-	".popsection \n\t"						\
-	_ASM_EXTABLE(666b, 700b)
-
-#define __kvm_handle_fault_on_reboot(insn)		\
-	____kvm_handle_fault_on_reboot(insn, "")
+	_ASM_EXTABLE(666b, 667b)
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
@@ -1581,6 +1604,13 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 		     struct kvm_lapic_irq *irq);
 
+static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
+{
+	/* We can only post Fixed and LowPrio IRQs */
+	return (irq->delivery_mode == dest_Fixed ||
+		irq->delivery_mode == dest_LowestPrio);
+}
+
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
 	if (kvm_x86_ops->vcpu_blocking)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 0c196c47d621..848ce43b9040 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -92,6 +92,16 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
 
 extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
 
+static inline bool mem_encrypt_active(void)
+{
+	return sme_me_mask;
+}
+
+static inline u64 sme_get_me_mask(void)
+{
+	return sme_me_mask;
+}
+
 #endif	/* __ASSEMBLY__ */
 
 #endif	/* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 2ef31cc8c529..f4138aeb4280 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -218,7 +218,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
 
 void __init hyperv_init(void);
 void hyperv_setup_mmu_ops(void);
-
+void *hv_alloc_hyperv_page(void);
+void hv_free_hyperv_page(unsigned long addr);
 void hyperv_reenlightenment_intr(struct pt_regs *regs);
 void set_hv_tscchange_cb(void (*cb)(void));
 void clear_hv_tscchange_cb(void);
@@ -241,6 +242,8 @@ static inline void hv_apic_init(void) {}
 #else /* CONFIG_HYPERV */
 static inline void hyperv_init(void) {}
 static inline void hyperv_setup_mmu_ops(void) {}
+static inline void *hv_alloc_hyperv_page(void) { return NULL; }
+static inline void hv_free_hyperv_page(unsigned long addr) {}
 static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
 static inline void clear_hv_tscchange_cb(void) {}
 static inline void hyperv_stop_tsc_emulation(void) {};
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 271d837d69a8..6a3124664289 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -93,6 +93,18 @@
 						  * Microarchitectural Data
 						  * Sampling (MDS) vulnerabilities.
 						  */
+#define ARCH_CAP_PSCHANGE_MC_NO		BIT(6)	 /*
+						  * The processor is not susceptible to a
+						  * machine check error due to modifying the
+						  * code page size along with either the
+						  * physical address or cache type
+						  * without TLB invalidation.
+						  */
+#define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO			BIT(8)	/*
+						 * Not susceptible to
+						 * TSX Async Abort (TAA) vulnerabilities.
+						 */
 
 #define MSR_IA32_FLUSH_CMD		0x0000010b
 #define L1D_FLUSH			BIT(0)	/*
@@ -103,6 +115,10 @@
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
 
+#define MSR_IA32_TSX_CTRL		0x00000122
+#define TSX_CTRL_RTM_DISABLE		BIT(0)	/* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR		BIT(1)	/* Disable TSX enumeration */
+
 #define MSR_IA32_SYSENTER_CS		0x00000174
 #define MSR_IA32_SYSENTER_ESP		0x00000175
 #define MSR_IA32_SYSENTER_EIP		0x00000176
@@ -375,14 +391,22 @@
 /* Alternative perfctr range with full access. */
 #define MSR_IA32_PMC0			0x000004c1
 
-/* AMD64 MSRs. Not complete. See the architecture manual for a more
-   complete list. */
+/* Auto-reload via MSR instead of DS area */
+#define MSR_RELOAD_PMC0			0x000014c1
+#define MSR_RELOAD_FIXED_CTR0		0x00001309
 
+/*
+ * AMD64 MSRs. Not complete. See the architecture manual for a more
+ * complete list.
+ */
 #define MSR_AMD64_PATCH_LEVEL		0x0000008b
 #define MSR_AMD64_TSC_RATIO		0xc0000104
 #define MSR_AMD64_NB_CFG		0xc001001f
 #define MSR_AMD64_CPUID_FN_1		0xc0011004
 #define MSR_AMD64_PATCH_LOADER		0xc0010020
+#define MSR_AMD_PERF_CTL		0xc0010062
+#define MSR_AMD_PERF_STATUS		0xc0010063
+#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
 #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
 #define MSR_AMD64_OSVW_STATUS		0xc0010141
 #define MSR_AMD64_LS_CFG		0xc0011020
@@ -561,9 +585,6 @@
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
 #define INTEL_PERF_CTL_MASK		0xffff
-#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
-#define MSR_AMD_PERF_STATUS		0xc0010063
-#define MSR_AMD_PERF_CTL		0xc0010062
 
 #define MSR_IA32_MPERF			0x000000e7
 #define MSR_IA32_APERF			0x000000e8
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 5cc3930cb465..86f20d520a07 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -233,8 +233,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
 	 * Thus, use the preferred barrier on the respective CPU, aiming for
 	 * RDTSCP as the default.
 	 */
-	asm volatile(ALTERNATIVE_3("rdtsc",
-				   "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC,
+	asm volatile(ALTERNATIVE_2("rdtsc",
 				   "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
 				   "rdtscp", X86_FEATURE_RDTSCP)
 			: EAX_EDX_RET(val, low, high)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e28f8b723b5c..9d5252c9685c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -21,7 +21,7 @@
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
 #define MWAITX_ECX_TIMER_ENABLE		BIT(1)
 #define MWAITX_MAX_LOOPS		((u32)-1)
-#define MWAITX_DISABLE_CSTATES		0xf
+#define MWAITX_DISABLE_CSTATES		0xf0
 
 static inline void __monitor(const void *eax, unsigned long ecx,
 			     unsigned long edx)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 80bc209c0708..5c24a7b35166 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
 #include <asm/segment.h>
 
 /**
- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
  *
  * This uses the otherwise unused and obsolete VERW instruction in
  * combination with microcode which triggers a CPU buffer flush when the
@@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void)
 }
 
 /**
- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
  *
  * Clear CPU buffers if the corresponding static key is enabled
  */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index dce26f1d13e1..69089d46f128 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -139,18 +139,6 @@ static inline void __write_cr4(unsigned long x)
 	PVOP_VCALL1(cpu.write_cr4, x);
 }
 
-#ifdef CONFIG_X86_64
-static inline unsigned long read_cr8(void)
-{
-	return PVOP_CALL0(unsigned long, cpu.read_cr8);
-}
-
-static inline void write_cr8(unsigned long x)
-{
-	PVOP_VCALL1(cpu.write_cr8, x);
-}
-#endif
-
 static inline void arch_safe_halt(void)
 {
 	PVOP_VCALL0(irq.safe_halt);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 639b2df445ee..70b654f3ffe5 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -119,11 +119,6 @@ struct pv_cpu_ops {
 
 	void (*write_cr4)(unsigned long);
 
-#ifdef CONFIG_X86_64
-	unsigned long (*read_cr8)(void);
-	void (*write_cr8)(unsigned long);
-#endif
-
 	/* Segment descriptor handling */
 	void (*load_tr_desc)(void);
 	void (*load_gdt)(const struct desc_ptr *);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index c78da8eda8f2..0dca7f7aeff2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -29,8 +29,6 @@ extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
 extern pmd_t initial_pg_pmd[];
 
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
 void paging_init(void);
 void sync_initial_page_table(void);
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4990d26dfc73..0b6c4042942a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -241,9 +241,6 @@ extern void cleanup_highmap(void);
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init()   do { } while (0)
-#define check_pgt_cache()      do { } while (0)
-
 #define PAGE_AGP    PAGE_KERNEL_NOCACHE
 #define HAVE_PAGE_AGP 1
 
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..3d4cb83a8828 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -102,7 +102,7 @@ static __always_inline bool should_resched(int preempt_offset)
 	return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
   extern asmlinkage void ___preempt_schedule(void);
 # define __preempt_schedule() \
 	asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6e0a3b43d027..54f5d54280f6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -988,4 +988,11 @@ enum mds_mitigations {
 	MDS_MITIGATION_VMWERV,
 };
 
+enum taa_mitigations {
+	TAA_MITIGATION_OFF,
+	TAA_MITIGATION_UCODE_NEEDED,
+	TAA_MITIGATION_VERW,
+	TAA_MITIGATION_TSX_DISABLED,
+};
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 5df09a0b80b8..07375b476c4f 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_PTI_H
 #define _ASM_X86_PTI_H
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index bd5ac6cc37db..444d6fd9a6d8 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -63,10 +63,25 @@ static inline bool vcpu_is_preempted(long cpu)
 #endif
 
 #ifdef CONFIG_PARAVIRT
+/*
+ * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack.
+ *
+ * Native (and PV wanting native due to vCPU pinning) should disable this key.
+ * It is done in this backwards fashion to only have a single direction change,
+ * which removes ordering between native_pv_spin_init() and HV setup.
+ */
 DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
 
 void native_pv_lock_init(void) __init;
 
+/*
+ * Shortcut for the queued_spin_lock_slowpath() function that allows
+ * virt to hijack it.
+ *
+ * Returns:
+ *   true - lock has been negotiated, all done;
+ *   false - queued_spin_lock_slowpath() will do its thing.
+ */
 #define virt_spin_lock virt_spin_lock
 static inline bool virt_spin_lock(struct qspinlock *lock)
 {
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index c53682303c9c..09ecc32f6524 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -20,7 +20,6 @@ struct real_mode_header {
 	u32	ro_end;
 	/* SMP trampoline */
 	u32	trampoline_start;
-	u32	trampoline_status;
 	u32	trampoline_header;
 #ifdef CONFIG_X86_64
 	u32	trampoline_pgd;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index e1356a3b8223..e15f364efbcc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -143,6 +143,7 @@ void play_dead_common(void);
 void wbinvd_on_cpu(int cpu);
 int wbinvd_on_all_cpus(void);
 
+void native_smp_send_reschedule(int cpu);
 void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
 void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 219be88a59d2..6d37b8fcfc77 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -73,20 +73,6 @@ static inline unsigned long native_read_cr4(void)
 
 void native_write_cr4(unsigned long val);
 
-#ifdef CONFIG_X86_64
-static inline unsigned long native_read_cr8(void)
-{
-	unsigned long cr8;
-	asm volatile("movq %%cr8,%0" : "=r" (cr8));
-	return cr8;
-}
-
-static inline void native_write_cr8(unsigned long val)
-{
-	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-#endif
-
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 static inline u32 rdpkru(void)
 {
@@ -200,16 +186,6 @@ static inline void wbinvd(void)
 
 #ifdef CONFIG_X86_64
 
-static inline unsigned long read_cr8(void)
-{
-	return native_read_cr8();
-}
-
-static inline void write_cr8(unsigned long x)
-{
-	native_write_cr8(x);
-}
-
 static inline void load_gs_index(unsigned selector)
 {
 	native_load_gs_index(selector);
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index a7af9f53c0cb..35bb35d28733 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -34,7 +34,7 @@ struct saved_context {
 	 */
 	unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
 
-	unsigned long cr0, cr2, cr3, cr4, cr8;
+	unsigned long cr0, cr2, cr3, cr4;
 	u64 misc_enable;
 	bool misc_enable_saved;
 	struct saved_msrs saved_msrs;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index dec9c1e84c78..6ece8561ba66 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -52,6 +52,7 @@ enum {
 	INTERCEPT_MWAIT,
 	INTERCEPT_MWAIT_COND,
 	INTERCEPT_XSETBV,
+	INTERCEPT_RDPRU,
 };
 
 
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2dc4a021beea..8db3fdb6102e 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
 extern const sys_call_ptr_t ia32_sys_call_table[];
 #endif
 
+#ifdef CONFIG_X86_X32_ABI
+extern const sys_call_ptr_t x32_sys_call_table[];
+#endif
+
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
  * This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 70c09967a999..5e8319bb207a 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -45,8 +45,8 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
  * no thread can be preempted in the instructions being modified (no iret to an
  * invalid instruction possible) or if the instructions are changed from a
  * consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
+ * On the local CPU you need to be protected against NMI or MCE handlers seeing
+ * an inconsistent instruction while you patch.
  */
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 9c4435307ff8..61d93f062a36 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -444,8 +444,10 @@ __pu_label:							\
 ({									\
 	int __gu_err;							\
 	__inttype(*(ptr)) __gu_val;					\
+	__typeof__(ptr) __gu_ptr = (ptr);				\
+	__typeof__(size) __gu_size = (size);				\
 	__uaccess_begin_nospec();					\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
+	__get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT);	\
 	__uaccess_end();						\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__builtin_expect(__gu_err, 0);					\
@@ -732,5 +734,28 @@ do {										\
 	if (unlikely(__gu_err)) goto err_label;					\
 } while (0)
 
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label)			\
+	while (len >= sizeof(type)) {					\
+		unsafe_put_user(*(type *)src,(type __user *)dst,label);	\
+		dst += sizeof(type);					\
+		src += sizeof(type);					\
+		len -= sizeof(type);					\
+	}
+
+#define unsafe_copy_to_user(_dst,_src,_len,label)			\
+do {									\
+	char __user *__ucu_dst = (_dst);				\
+	const char *__ucu_src = (_src);					\
+	size_t __ucu_len = (_len);					\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
+} while (0)
+
 #endif /* _ASM_X86_UACCESS_H */
 
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 097589753fec..a7dd080749ce 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -5,12 +5,6 @@
 #include <uapi/asm/unistd.h>
 
 
-# ifdef CONFIG_X86_X32_ABI
-#  define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
-# else
-#  define __SYSCALL_MASK (~0)
-# endif
-
 # ifdef CONFIG_X86_32
 
 #  include <asm/unistd_32.h>
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e60c45fd3679..6bc6d89d8e2a 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,10 +12,12 @@ struct mm_struct;
 #ifdef CONFIG_X86_UV
 #include <linux/efi.h>
 
+extern unsigned long uv_systab_phys;
+
 extern enum uv_system_type get_uv_system_type(void);
 static inline bool is_early_uv_system(void)
 {
-	return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+	return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
 }
 extern int is_uv_system(void);
 extern int is_uv_hubless(void);
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index ba71a63cdac4..e9ee139cf29e 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -51,7 +51,7 @@ extern struct pvclock_vsyscall_time_info pvclock_page
 	__attribute__((visibility("hidden")));
 #endif
 
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
 extern struct ms_hyperv_tsc_page hvclock_page
 	__attribute__((visibility("hidden")));
 #endif
@@ -228,7 +228,7 @@ static u64 vread_pvclock(void)
 }
 #endif
 
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
 static u64 vread_hvclock(void)
 {
 	return hv_read_tsc_page(&hvclock_page);
@@ -251,7 +251,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode)
 		return vread_pvclock();
 	}
 #endif
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
 	if (clock_mode == VCLOCK_HVCLOCK) {
 		barrier();
 		return vread_hvclock();
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..ac9fc51e2b18
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+#ifndef _ASM_X86_VMWARE_H
+#define _ASM_X86_VMWARE_H
+
+#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
+#include <linux/stringify.h>
+
+/*
+ * The hypercall definitions differ in the low word of the %edx argument
+ * in the following way: the old port base interface uses the port
+ * number to distinguish between high- and low bandwidth versions.
+ *
+ * The new vmcall interface instead uses a set of flags to select
+ * bandwidth mode and transfer direction. The flags should be loaded
+ * into %dx by any user and are automatically replaced by the port
+ * number if the VMWARE_HYPERVISOR_PORT method is used.
+ *
+ * In short, new driver code should strictly use the new definition of
+ * %dx content.
+ */
+
+/* Old port-based version */
+#define VMWARE_HYPERVISOR_PORT    0x5658
+#define VMWARE_HYPERVISOR_PORT_HB 0x5659
+
+/* Current vmcall / vmmcall version */
+#define VMWARE_HYPERVISOR_HB   BIT(0)
+#define VMWARE_HYPERVISOR_OUT  BIT(1)
+
+/* The low bandwidth call. The low word of edx is presumed clear. */
+#define VMWARE_HYPERCALL						\
+	ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
+		      "inl (%%dx), %%eax",				\
+		      "vmcall", X86_FEATURE_VMCALL,			\
+		      "vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+/*
+ * The high bandwidth out call. The low word of edx is presumed to have the
+ * HB and OUT bits set.
+ */
+#define VMWARE_HYPERCALL_HB_OUT						\
+	ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+		      "rep outsb",					\
+		      "vmcall", X86_FEATURE_VMCALL,			\
+		      "vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+/*
+ * The high bandwidth in call. The low word of edx is presumed to have the
+ * HB bit set.
+ */
+#define VMWARE_HYPERCALL_HB_IN						\
+	ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+		      "rep insb",					\
+		      "vmcall", X86_FEATURE_VMCALL,			\
+		      "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a39136b0d509..1835767aa335 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PT_USE_GPA		0x01000000
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	0x00400000
 #define SECONDARY_EXEC_TSC_SCALING              0x02000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE	0x04000000
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
@@ -110,6 +111,7 @@
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 #define VMX_MISC_ACTIVITY_HLT			0x00000040
 #define VMX_MISC_ZERO_LEN_INS			0x40000000
+#define VMX_MISC_MSR_LIST_MULTIPLIER		512
 
 /* VMFUNC functions */
 #define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
@@ -562,6 +564,20 @@ enum vm_instruction_error_number {
 	VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
 };
 
+/*
+ * VM-instruction errors that can be encountered on VM-Enter, used to trace
+ * nested VM-Enter failures reported by hardware.  Errors unique to VM-Enter
+ * from a SMI Transfer Monitor are not included as things have gone seriously
+ * sideways if we get one of those...
+ */
+#define VMX_VMENTER_INSTRUCTION_ERRORS \
+	{ VMXERR_VMLAUNCH_NONCLEAR_VMCS,		"VMLAUNCH_NONCLEAR_VMCS" }, \
+	{ VMXERR_VMRESUME_NONLAUNCHED_VMCS,		"VMRESUME_NONLAUNCHED_VMCS" }, \
+	{ VMXERR_VMRESUME_AFTER_VMXOFF,			"VMRESUME_AFTER_VMXOFF" }, \
+	{ VMXERR_ENTRY_INVALID_CONTROL_FIELD,		"VMENTRY_INVALID_CONTROL_FIELD" }, \
+	{ VMXERR_ENTRY_INVALID_HOST_STATE_FIELD,	"VMENTRY_INVALID_HOST_STATE_FIELD" }, \
+	{ VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS,	"VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
+
 enum vmx_l1d_flush_state {
 	VMENTER_L1D_FLUSH_AUTO,
 	VMENTER_L1D_FLUSH_NEVER,
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index ac0934189017..19435858df5f 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -134,10 +134,12 @@ struct x86_hyper_init {
 
 /**
  * struct x86_init_acpi - x86 ACPI init functions
+ * @set_root_poitner:		set RSDP address
  * @get_root_pointer:		get RSDP address
  * @reduced_hw_early_init:	hardware reduced platform early init
  */
 struct x86_init_acpi {
+	void (*set_root_pointer)(u64 addr);
 	u64 (*get_root_pointer)(void);
 	void (*reduced_hw_early_init)(void);
 };
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index 116777e7f387..63cd41b2e17a 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -21,18 +21,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
 	free_pages((unsigned long) cpu_addr, get_order(size));
 }
 
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs) { }
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		unsigned long attrs) { }
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
 #endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/arch/x86/include/uapi/asm/errno.h b/arch/x86/include/uapi/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/x86/include/uapi/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/x86/include/uapi/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/x86/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/x86/include/uapi/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/x86/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/x86/include/uapi/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/x86/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/x86/include/uapi/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/x86/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/x86/include/uapi/asm/param.h b/arch/x86/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/x86/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/x86/include/uapi/asm/resource.h b/arch/x86/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/x86/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index a9731f8a480f..2e8a30f06c74 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -75,6 +75,7 @@
 #define SVM_EXIT_MWAIT         0x08b
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_RDPRU         0x08e
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
diff --git a/arch/x86/include/uapi/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/x86/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/x86/include/uapi/asm/termios.h b/arch/x86/include/uapi/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/x86/include/uapi/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/x86/include/uapi/asm/types.h b/arch/x86/include/uapi/asm/types.h
deleted file mode 100644
index 9d5c11a24279..000000000000
--- a/arch/x86/include/uapi/asm/types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
-
-#include <asm-generic/types.h>
-
-#endif /* _ASM_X86_TYPES_H */
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
index 30d7d04d72d6..196fdd02b8b1 100644
--- a/arch/x86/include/uapi/asm/unistd.h
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -3,7 +3,7 @@
 #define _UAPI_ASM_X86_UNISTD_H
 
 /* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT	0x40000000
+#define __X32_SYSCALL_BIT	0x40000000UL
 
 #ifndef __KERNEL__
 # ifdef __i386__
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..3eb8411ab60e 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -31,6 +31,7 @@
 #define EXIT_REASON_EXCEPTION_NMI       0
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
 #define EXIT_REASON_TRIPLE_FAULT        2
+#define EXIT_REASON_INIT_SIGNAL			3
 
 #define EXIT_REASON_PENDING_INTERRUPT   7
 #define EXIT_REASON_NMI_WINDOW          8
@@ -85,11 +86,14 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
+#define EXIT_REASON_UMWAIT              67
+#define EXIT_REASON_TPAUSE              68
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
 	{ EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
 	{ EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
+	{ EXIT_REASON_INIT_SIGNAL,           "INIT_SIGNAL" }, \
 	{ EXIT_REASON_PENDING_INTERRUPT,     "PENDING_INTERRUPT" }, \
 	{ EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
 	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
@@ -142,7 +146,9 @@
 	{ EXIT_REASON_RDSEED,                "RDSEED" }, \
 	{ EXIT_REASON_PML_FULL,              "PML_FULL" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
+	{ EXIT_REASON_UMWAIT,                "UMWAIT" }, \
+	{ EXIT_REASON_TPAUSE,                "TPAUSE" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 17b33ef604f3..04205ce127a1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1760,6 +1760,11 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
 	e820__update_table_print();
 }
 
+void x86_default_set_root_pointer(u64 addr)
+{
+	boot_params.acpi_rsdp_addr = addr;
+}
+
 u64 x86_default_get_root_pointer(void)
 {
 	return boot_params.acpi_rsdp_addr;
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index b0715c3ac18d..7f9ade13bbcf 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -18,8 +18,13 @@ ENTRY(wakeup_long64)
 	movq	saved_magic, %rax
 	movq	$0x123456789abcdef0, %rdx
 	cmpq	%rdx, %rax
-	jne	bogus_64_magic
+	je	2f
 
+	/* stop here on a saved_magic mismatch */
+	movq $0xbad6d61676963, %rcx
+1:
+	jmp 1b
+2:
 	movw	$__KERNEL_DS, %ax
 	movw	%ax, %ss	
 	movw	%ax, %ds
@@ -37,9 +42,6 @@ ENTRY(wakeup_long64)
 	jmp	*%rax
 ENDPROC(wakeup_long64)
 
-bogus_64_magic:
-	jmp	bogus_64_magic
-
 ENTRY(do_suspend_lowlevel)
 	FRAME_BEGIN
 	subq	$8, %rsp
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ccd32013c47a..9d3a971ea364 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -713,7 +713,7 @@ void __init alternative_instructions(void)
 	 * Don't stop machine check exceptions while patching.
 	 * MCEs only happen when something got corrupted and in this
 	 * case we must do something about the corruption.
-	 * Ignoring it is worse than a unlikely patching race.
+	 * Ignoring it is worse than an unlikely patching race.
 	 * Also machine checks tend to be broadcast and if one CPU
 	 * goes into machine check the others follow quickly, so we don't
 	 * expect a machine check to cause undue problems during to code
@@ -753,8 +753,8 @@ void __init alternative_instructions(void)
  * When you use this code to patch more than one byte of an instruction
  * you need to make sure that other CPUs cannot execute this code in parallel.
  * Also no thread must be currently preempted in the middle of these
- * instructions. And on the local CPU you need to be protected again NMI or MCE
- * handlers seeing an inconsistent instruction while you patch.
+ * instructions. And on the local CPU you need to be protected against NMI or
+ * MCE handlers seeing an inconsistent instruction while you patch.
  */
 void __init_or_module text_poke_early(void *addr, const void *opcode,
 				      size_t len)
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index a585ea6f686a..a6ac3712db8b 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -677,7 +677,10 @@ static const struct dma_map_ops gart_dma_ops = {
 	.unmap_page			= gart_unmap_page,
 	.alloc				= gart_alloc_coherent,
 	.free				= gart_free_coherent,
+	.mmap				= dma_common_mmap,
+	.get_sgtable			= dma_common_get_sgtable,
 	.dma_supported			= dma_direct_supported,
+	.get_required_mask		= dma_direct_get_required_mask,
 };
 
 static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index d63e63b7d1d9..251c795b4eb3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -21,6 +21,7 @@
 #define PCI_DEVICE_ID_AMD_17H_DF_F4	0x1464
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
 
 /* Protect the PCI config register pairs used for SMN and DF indirect access. */
 static DEFINE_MUTEX(smn_mutex);
@@ -50,6 +51,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -63,6 +65,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dba2828b779a..2b0faf86da1b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -65,10 +65,10 @@ unsigned int num_processors;
 unsigned disabled_cpus;
 
 /* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
 
-u8 boot_cpu_apic_version;
+u8 boot_cpu_apic_version __ro_after_init;
 
 /*
  * The highest APIC ID seen during enumeration.
@@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map;
  * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
  * avoid undefined behaviour caused by sending INIT from AP to BSP.
  */
-static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
+static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
 
 /*
  * This variable controls which CPUs receive external NMIs.  By default,
  * external NMIs are delivered only to the BSP.
  */
-static int apic_extnmi = APIC_EXTNMI_BSP;
+static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
 
 /*
  * Map cpu index to physical APIC ID
@@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
 DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
 
 /* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
+static int enabled_via_apicbase __ro_after_init;
 
 /*
  * Handle interrupt mode configuration register (IMCR).
@@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
-unsigned long mp_lapic_addr;
-int disable_apic;
+unsigned long mp_lapic_addr __ro_after_init;
+int disable_apic __ro_after_init;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __initdata;
 /* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
+int local_apic_timer_c2_ok __ro_after_init;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
 /*
  * Debug level, exported for io_apic.c
  */
-int apic_verbosity;
+int apic_verbosity __ro_after_init;
 
-int pic_mode;
+int pic_mode __ro_after_init;
 
 /* Have we found an MP table */
-int smp_found_config;
+int smp_found_config __ro_after_init;
 
 static struct resource lapic_resource = {
 	.name = "Local APIC",
@@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0;
 
 static void apic_pm_activate(void);
 
-static unsigned long apic_phys;
+static unsigned long apic_phys __ro_after_init;
 
 /*
  * Get the LAPIC version
@@ -590,21 +590,21 @@ static u32 skx_deadline_rev(void)
 static const struct x86_cpu_id deadline_match[] = {
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020),
-	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D,	bdx_deadline_rev),
+	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D,	bdx_deadline_rev),
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev),
 
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,	0x22),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,	0x20),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E,	0x17),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL,		0x22),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L,	0x20),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G,	0x17),
 
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE,	0x25),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E,	0x17),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL,	0x25),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G,	0x17),
 
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE,	0xb2),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP,	0xb2),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L,	0xb2),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE,		0xb2),
 
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE,	0x52),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP,	0x52),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L,	0x52),
+	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE,		0x52),
 
 	{},
 };
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
 	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return true;
 
+	/* Virt guests may lack ARAT, but still have DEADLINE */
+	if (!boot_cpu_has(X86_FEATURE_ARAT))
+		return true;
+
 	/* Deadline timer is based on TSC so no further PIT action required */
 	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 		return false;
@@ -1179,10 +1183,6 @@ void clear_local_APIC(void)
 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 	v = apic_read(APIC_LVT1);
 	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-	if (!x2apic_enabled()) {
-		v = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
-		apic_write(APIC_LDR, v);
-	}
 	if (maxlvt >= 4) {
 		v = apic_read(APIC_LVTPC);
 		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
@@ -1224,25 +1224,38 @@ void clear_local_APIC(void)
 }
 
 /**
- * disable_local_APIC - clear and disable the local APIC
+ * apic_soft_disable - Clears and software disables the local APIC on hotplug
+ *
+ * Contrary to disable_local_APIC() this does not touch the enable bit in
+ * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
+ * bus would require a hardware reset as the APIC would lose track of bus
+ * arbitration. On systems with FSB delivery APICBASE could be disabled,
+ * but it has to be guaranteed that no interrupt is sent to the APIC while
+ * in that state and it's not clear from the SDM whether it still responds
+ * to INIT/SIPI messages. Stay on the safe side and use software disable.
  */
-void disable_local_APIC(void)
+void apic_soft_disable(void)
 {
-	unsigned int value;
-
-	/* APIC hasn't been mapped yet */
-	if (!x2apic_mode && !apic_phys)
-		return;
+	u32 value;
 
 	clear_local_APIC();
 
-	/*
-	 * Disable APIC (implies clearing of registers
-	 * for 82489DX!).
-	 */
+	/* Soft disable APIC (implies clearing of registers for 82489DX!). */
 	value = apic_read(APIC_SPIV);
 	value &= ~APIC_SPIV_APIC_ENABLED;
 	apic_write(APIC_SPIV, value);
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+	/* APIC hasn't been mapped yet */
+	if (!x2apic_mode && !apic_phys)
+		return;
+
+	apic_soft_disable();
 
 #ifdef CONFIG_X86_32
 	/*
@@ -1307,7 +1320,7 @@ void __init sync_Arb_IDs(void)
 			APIC_INT_LEVELTRIG | APIC_DM_INIT);
 }
 
-enum apic_intr_mode_id apic_intr_mode;
+enum apic_intr_mode_id apic_intr_mode __ro_after_init;
 
 static int __init apic_intr_mode_select(void)
 {
@@ -1495,54 +1508,72 @@ static void lapic_setup_esr(void)
 			oldvalue, value);
 }
 
-static void apic_pending_intr_clear(void)
+#define APIC_IR_REGS		APIC_ISR_NR
+#define APIC_IR_BITS		(APIC_IR_REGS * 32)
+#define APIC_IR_MAPSIZE		(APIC_IR_BITS / BITS_PER_LONG)
+
+union apic_ir {
+	unsigned long	map[APIC_IR_MAPSIZE];
+	u32		regs[APIC_IR_REGS];
+};
+
+static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
 {
-	long long max_loops = cpu_khz ? cpu_khz : 1000000;
-	unsigned long long tsc = 0, ntsc;
-	unsigned int queued;
-	unsigned long value;
-	int i, j, acked = 0;
+	int i, bit;
+
+	/* Read the IRRs */
+	for (i = 0; i < APIC_IR_REGS; i++)
+		irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
+
+	/* Read the ISRs */
+	for (i = 0; i < APIC_IR_REGS; i++)
+		isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
 
-	if (boot_cpu_has(X86_FEATURE_TSC))
-		tsc = rdtsc();
 	/*
-	 * After a crash, we no longer service the interrupts and a pending
-	 * interrupt from previous kernel might still have ISR bit set.
-	 *
-	 * Most probably by now CPU has serviced that pending interrupt and
-	 * it might not have done the ack_APIC_irq() because it thought,
-	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-	 * does not clear the ISR bit and cpu thinks it has already serivced
-	 * the interrupt. Hence a vector might get locked. It was noticed
-	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+	 * If the ISR map is not empty. ACK the APIC and run another round
+	 * to verify whether a pending IRR has been unblocked and turned
+	 * into a ISR.
 	 */
-	do {
-		queued = 0;
-		for (i = APIC_ISR_NR - 1; i >= 0; i--)
-			queued |= apic_read(APIC_IRR + i*0x10);
-
-		for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-			value = apic_read(APIC_ISR + i*0x10);
-			for_each_set_bit(j, &value, 32) {
-				ack_APIC_irq();
-				acked++;
-			}
-		}
-		if (acked > 256) {
-			pr_err("LAPIC pending interrupts after %d EOI\n", acked);
-			break;
-		}
-		if (queued) {
-			if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
-				ntsc = rdtsc();
-				max_loops = (long long)cpu_khz << 10;
-				max_loops -= ntsc - tsc;
-			} else {
-				max_loops--;
-			}
-		}
-	} while (queued && max_loops > 0);
-	WARN_ON(max_loops <= 0);
+	if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
+		/*
+		 * There can be multiple ISR bits set when a high priority
+		 * interrupt preempted a lower priority one. Issue an ACK
+		 * per set bit.
+		 */
+		for_each_set_bit(bit, isr->map, APIC_IR_BITS)
+			ack_APIC_irq();
+		return true;
+	}
+
+	return !bitmap_empty(irr->map, APIC_IR_BITS);
+}
+
+/*
+ * After a crash, we no longer service the interrupts and a pending
+ * interrupt from previous kernel might still have ISR bit set.
+ *
+ * Most probably by now the CPU has serviced that pending interrupt and it
+ * might not have done the ack_APIC_irq() because it thought, interrupt
+ * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
+ * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
+ * a vector might get locked. It was noticed for timer irq (vector
+ * 0x31). Issue an extra EOI to clear ISR.
+ *
+ * If there are pending IRR bits they turn into ISR bits after a higher
+ * priority ISR bit has been acked.
+ */
+static void apic_pending_intr_clear(void)
+{
+	union apic_ir irr, isr;
+	unsigned int i;
+
+	/* 512 loops are way oversized and give the APIC a chance to obey. */
+	for (i = 0; i < 512; i++) {
+		if (!apic_check_and_ack(&irr, &isr))
+			return;
+	}
+	/* Dump the IRR/ISR content if that failed */
+	pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
 }
 
 /**
@@ -1555,16 +1586,20 @@ static void setup_local_APIC(void)
 {
 	int cpu = smp_processor_id();
 	unsigned int value;
-#ifdef CONFIG_X86_32
-	int logical_apicid, ldr_apicid;
-#endif
-
 
 	if (disable_apic) {
 		disable_ioapic_support();
 		return;
 	}
 
+	/*
+	 * If this comes from kexec/kcrash the APIC might be enabled in
+	 * SPIV. Soft disable it before doing further initialization.
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_SPIV_APIC_ENABLED;
+	apic_write(APIC_SPIV, value);
+
 #ifdef CONFIG_X86_32
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
 	if (lapic_is_integrated() && apic->disable_esr) {
@@ -1574,8 +1609,6 @@ static void setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 	}
 #endif
-	perf_events_lapic_init();
-
 	/*
 	 * Double-check whether this APIC is really registered.
 	 * This is meaningless in clustered apic mode, so we skip it.
@@ -1590,26 +1623,35 @@ static void setup_local_APIC(void)
 	apic->init_apic_ldr();
 
 #ifdef CONFIG_X86_32
-	/*
-	 * APIC LDR is initialized.  If logical_apicid mapping was
-	 * initialized during get_smp_config(), make sure it matches the
-	 * actual value.
-	 */
-	logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-	ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-	WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid);
-	/* always use the value from LDR */
-	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+	if (apic->dest_logical) {
+		int logical_apicid, ldr_apicid;
+
+		/*
+		 * APIC LDR is initialized.  If logical_apicid mapping was
+		 * initialized during get_smp_config(), make sure it matches
+		 * the actual value.
+		 */
+		logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+		ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+		if (logical_apicid != BAD_APICID)
+			WARN_ON(logical_apicid != ldr_apicid);
+		/* Always use the value from LDR. */
+		early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+	}
 #endif
 
 	/*
-	 * Set Task Priority to 'accept all'. We never change this
-	 * later on.
+	 * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
+	 * vector in the 16-31 range could be delivered if TPR == 0, but we
+	 * would think it's an exception and terrible things will happen.  We
+	 * never change this later on.
 	 */
 	value = apic_read(APIC_TASKPRI);
 	value &= ~APIC_TPRI_MASK;
+	value |= 0x10;
 	apic_write(APIC_TASKPRI, value);
 
+	/* Clear eventually stale ISR/IRR bits */
 	apic_pending_intr_clear();
 
 	/*
@@ -1656,6 +1698,8 @@ static void setup_local_APIC(void)
 	value |= SPURIOUS_APIC_VECTOR;
 	apic_write(APIC_SPIV, value);
 
+	perf_events_lapic_init();
+
 	/*
 	 * Set up LVT0, LVT1:
 	 *
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index bbdca603f94a..7862b152a052 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -8,21 +8,14 @@
  * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
  * James Cleverdon.
  */
-#include <linux/acpi.h>
-#include <linux/errno.h>
-#include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/hardirq.h>
 #include <linux/export.h>
+#include <linux/acpi.h>
 
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include <asm/apic.h>
-#include <asm/apic_flat_64.h>
 #include <asm/jailhouse_para.h>
+#include <asm/apic.h>
+
+#include "local.h"
 
 static struct apic apic_physflat;
 static struct apic apic_flat;
@@ -83,35 +76,6 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
 	_flat_send_IPI_mask(mask, vector);
 }
 
-static void flat_send_IPI_allbutself(int vector)
-{
-	int cpu = smp_processor_id();
-
-	if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) {
-		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
-			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
-
-			if (cpu < BITS_PER_LONG)
-				__clear_bit(cpu, &mask);
-
-			_flat_send_IPI_mask(mask, vector);
-		}
-	} else if (num_online_cpus() > 1) {
-		__default_send_IPI_shortcut(APIC_DEST_ALLBUT,
-					    vector, apic->dest_logical);
-	}
-}
-
-static void flat_send_IPI_all(int vector)
-{
-	if (vector == NMI_VECTOR) {
-		flat_send_IPI_mask(cpu_online_mask, vector);
-	} else {
-		__default_send_IPI_shortcut(APIC_DEST_ALLINC,
-					    vector, apic->dest_logical);
-	}
-}
-
 static unsigned int flat_get_apic_id(unsigned long x)
 {
 	return (x >> 24) & 0xFF;
@@ -173,9 +137,9 @@ static struct apic apic_flat __ro_after_init = {
 	.send_IPI			= default_send_IPI_single,
 	.send_IPI_mask			= flat_send_IPI_mask,
 	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself,
-	.send_IPI_allbutself		= flat_send_IPI_allbutself,
-	.send_IPI_all			= flat_send_IPI_all,
-	.send_IPI_self			= apic_send_IPI_self,
+	.send_IPI_allbutself		= default_send_IPI_allbutself,
+	.send_IPI_all			= default_send_IPI_all,
+	.send_IPI_self			= default_send_IPI_self,
 
 	.inquire_remote_apic		= default_inquire_remote_apic,
 
@@ -225,16 +189,6 @@ static void physflat_init_apic_ldr(void)
 	 */
 }
 
-static void physflat_send_IPI_allbutself(int vector)
-{
-	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void physflat_send_IPI_all(int vector)
-{
-	default_send_IPI_mask_sequence_phys(cpu_online_mask, vector);
-}
-
 static int physflat_probe(void)
 {
 	if (apic == &apic_physflat || num_possible_cpus() > 8 ||
@@ -276,9 +230,9 @@ static struct apic apic_physflat __ro_after_init = {
 	.send_IPI			= default_send_IPI_single_phys,
 	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,
 	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_phys,
-	.send_IPI_allbutself		= physflat_send_IPI_allbutself,
-	.send_IPI_all			= physflat_send_IPI_all,
-	.send_IPI_self			= apic_send_IPI_self,
+	.send_IPI_allbutself		= default_send_IPI_allbutself,
+	.send_IPI_all			= default_send_IPI_all,
+	.send_IPI_self			= default_send_IPI_self,
 
 	.inquire_remote_apic		= default_inquire_remote_apic,
 
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 5078b5ce63a7..98c9bb75d185 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -9,25 +9,9 @@
  * to not uglify the caller's code and allow to call (some) apic routines
  * like self-ipi, etc...
  */
-
-#include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/errno.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-#include <asm/apic.h>
-#include <asm/setup.h>
 
-#include <linux/smp.h>
-#include <asm/ipi.h>
-
-#include <linux/interrupt.h>
-#include <asm/acpi.h>
-#include <asm/e820/api.h>
+#include <asm/apic.h>
 
 static void noop_init_apic_ldr(void) { }
 static void noop_send_IPI(int cpu, int vector) { }
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index a5464b8b6c46..cdf45b4700f2 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -10,15 +10,15 @@
  * Send feedback to <support@numascale.com>
  *
  */
-
+#include <linux/types.h>
 #include <linux/init.h>
 
 #include <asm/numachip/numachip.h>
 #include <asm/numachip/numachip_csr.h>
-#include <asm/ipi.h>
-#include <asm/apic_flat_64.h>
+
 #include <asm/pgtable.h>
-#include <asm/pci_x86.h>
+
+#include "local.h"
 
 u8 numachip_system __read_mostly;
 static const struct apic apic_numachip1;
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index caedd8d60d36..38b5b51d42f6 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -4,18 +4,13 @@
  *
  * Drives the local APIC in "clustered mode".
  */
-#include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/dmi.h>
 #include <linux/smp.h>
 
-#include <asm/apicdef.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
 #include <asm/apic.h>
-#include <asm/ipi.h>
+
+#include "local.h"
 
 static unsigned bigsmp_get_apic_id(unsigned long x)
 {
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 82f9244fe61f..6ca0f91372fd 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -1,24 +1,113 @@
 // SPDX-License-Identifier: GPL-2.0
+
 #include <linux/cpumask.h>
-#include <linux/interrupt.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/cpu.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/apic.h>
-#include <asm/proto.h>
-#include <asm/ipi.h>
-
-void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
+#include <linux/smp.h>
+
+#include "local.h"
+
+DEFINE_STATIC_KEY_FALSE(apic_use_ipi_shorthand);
+
+#ifdef CONFIG_SMP
+static int apic_ipi_shorthand_off __ro_after_init;
+
+static __init int apic_ipi_shorthand(char *str)
+{
+	get_option(&str, &apic_ipi_shorthand_off);
+	return 1;
+}
+__setup("no_ipi_broadcast=", apic_ipi_shorthand);
+
+static int __init print_ipi_mode(void)
+{
+	pr_info("IPI shorthand broadcast: %s\n",
+		apic_ipi_shorthand_off ? "disabled" : "enabled");
+	return 0;
+}
+late_initcall(print_ipi_mode);
+
+void apic_smt_update(void)
+{
+	/*
+	 * Do not switch to broadcast mode if:
+	 * - Disabled on the command line
+	 * - Only a single CPU is online
+	 * - Not all present CPUs have been at least booted once
+	 *
+	 * The latter is important as the local APIC might be in some
+	 * random state and a broadcast might cause havoc. That's
+	 * especially true for NMI broadcasting.
+	 */
+	if (apic_ipi_shorthand_off || num_online_cpus() == 1 ||
+	    !cpumask_equal(cpu_present_mask, &cpus_booted_once_mask)) {
+		static_branch_disable(&apic_use_ipi_shorthand);
+	} else {
+		static_branch_enable(&apic_use_ipi_shorthand);
+	}
+}
+
+void apic_send_IPI_allbutself(unsigned int vector)
+{
+	if (num_online_cpus() < 2)
+		return;
+
+	if (static_branch_likely(&apic_use_ipi_shorthand))
+		apic->send_IPI_allbutself(vector);
+	else
+		apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
+}
+
+/*
+ * Send a 'reschedule' IPI to another CPU. It goes straight through and
+ * wastes no time serializing anything. Worst case is that we lose a
+ * reschedule ...
+ */
+void native_smp_send_reschedule(int cpu)
+{
+	if (unlikely(cpu_is_offline(cpu))) {
+		WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu);
+		return;
+	}
+	apic->send_IPI(cpu, RESCHEDULE_VECTOR);
+}
+
+void native_send_call_func_single_ipi(int cpu)
+{
+	apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
+}
+
+void native_send_call_func_ipi(const struct cpumask *mask)
+{
+	if (static_branch_likely(&apic_use_ipi_shorthand)) {
+		unsigned int cpu = smp_processor_id();
+
+		if (!cpumask_or_equal(mask, cpumask_of(cpu), cpu_online_mask))
+			goto sendmask;
+
+		if (cpumask_test_cpu(cpu, mask))
+			apic->send_IPI_all(CALL_FUNCTION_VECTOR);
+		else if (num_online_cpus() > 1)
+			apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+		return;
+	}
+
+sendmask:
+	apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+}
+
+#endif /* CONFIG_SMP */
+
+static inline int __prepare_ICR2(unsigned int mask)
+{
+	return SET_APIC_DEST_FIELD(mask);
+}
+
+static inline void __xapic_wait_icr_idle(void)
+{
+	while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
+		cpu_relax();
+}
+
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector)
 {
 	/*
 	 * Subtle. In the case of the 'never do double writes' workaround
@@ -32,12 +121,16 @@ void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int
 	/*
 	 * Wait for idle.
 	 */
-	__xapic_wait_icr_idle();
+	if (unlikely(vector == NMI_VECTOR))
+		safe_apic_wait_icr_idle();
+	else
+		__xapic_wait_icr_idle();
 
 	/*
-	 * No need to touch the target chip field
+	 * No need to touch the target chip field. Also the destination
+	 * mode is ignored when a shorthand is used.
 	 */
-	cfg = __prepare_ICR(shortcut, vector, dest);
+	cfg = __prepare_ICR(shortcut, vector, 0);
 
 	/*
 	 * Send the IPI. The write to APIC_ICR fires this off.
@@ -133,6 +226,21 @@ void default_send_IPI_single(int cpu, int vector)
 	apic->send_IPI_mask(cpumask_of(cpu), vector);
 }
 
+void default_send_IPI_allbutself(int vector)
+{
+	__default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+void default_send_IPI_all(int vector)
+{
+	__default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+void default_send_IPI_self(int vector)
+{
+	__default_send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
 #ifdef CONFIG_X86_32
 
 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
@@ -192,28 +300,6 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
 	local_irq_restore(flags);
 }
 
-void default_send_IPI_allbutself(int vector)
-{
-	/*
-	 * if there are no other CPUs in the system then we get an APIC send
-	 * error if we try to broadcast, thus avoid sending IPIs in this case.
-	 */
-	if (!(num_online_cpus() > 1))
-		return;
-
-	__default_local_send_IPI_allbutself(vector);
-}
-
-void default_send_IPI_all(int vector)
-{
-	__default_local_send_IPI_all(vector);
-}
-
-void default_send_IPI_self(int vector)
-{
-	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
-}
-
 /* must come after the send_IPI functions above for inlining */
 static int convert_apicid_to_cpu(int apic_id)
 {
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
new file mode 100644
index 000000000000..04797f05ce94
--- /dev/null
+++ b/arch/x86/kernel/apic/local.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Historical copyright notices:
+ *
+ * Copyright 2004 James Cleverdon, IBM.
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 2002,2003 Andi Kleen, SuSE Labs.
+ */
+
+#include <linux/jump_label.h>
+
+#include <asm/apic.h>
+
+/* APIC flat 64 */
+void flat_init_apic_ldr(void);
+
+/* X2APIC */
+int x2apic_apic_id_valid(u32 apicid);
+int x2apic_apic_id_registered(void);
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
+unsigned int x2apic_get_apic_id(unsigned long id);
+u32 x2apic_set_apic_id(unsigned int id);
+int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
+void x2apic_send_IPI_self(int vector);
+void __x2apic_send_IPI_shorthand(int vector, u32 which);
+
+/* IPI */
+
+DECLARE_STATIC_KEY_FALSE(apic_use_ipi_shorthand);
+
+static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
+					 unsigned int dest)
+{
+	unsigned int icr = shortcut | dest;
+
+	switch (vector) {
+	default:
+		icr |= APIC_DM_FIXED | vector;
+		break;
+	case NMI_VECTOR:
+		icr |= APIC_DM_NMI;
+		break;
+	}
+	return icr;
+}
+
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector);
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
+
+void default_send_IPI_single(int cpu, int vector);
+void default_send_IPI_single_phys(int cpu, int vector);
+void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector);
+void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector);
+void default_send_IPI_allbutself(int vector);
+void default_send_IPI_all(int vector);
+void default_send_IPI_self(int vector);
+
+#ifdef CONFIG_X86_32
+void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector);
+void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector);
+void default_send_IPI_mask_logical(const struct cpumask *mask, int vector);
+#endif
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index ee2d91e382f1..67b33d67002f 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -6,51 +6,14 @@
  *
  * Generic x86 APIC driver probe layer.
  */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
 #include <linux/export.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/errno.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-#include <asm/apic.h>
-#include <asm/setup.h>
-
 #include <linux/smp.h>
-#include <asm/ipi.h>
 
-#include <linux/interrupt.h>
+#include <asm/apic.h>
 #include <asm/acpi.h>
-#include <asm/e820/api.h>
 
-#ifdef CONFIG_HOTPLUG_CPU
-#define DEFAULT_SEND_IPI	(1)
-#else
-#define DEFAULT_SEND_IPI	(0)
-#endif
-
-int no_broadcast = DEFAULT_SEND_IPI;
-
-static __init int no_ipi_broadcast(char *str)
-{
-	get_option(&str, &no_broadcast);
-	pr_info("Using %s mode\n",
-		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
-	return 1;
-}
-__setup("no_ipi_broadcast=", no_ipi_broadcast);
-
-static int __init print_ipi_mode(void)
-{
-	pr_info("Using IPI %s mode\n",
-		no_broadcast ? "No-Shortcut" : "Shortcut");
-	return 0;
-}
-late_initcall(print_ipi_mode);
+#include "local.h"
 
 static int default_x86_32_early_logical_apicid(int cpu)
 {
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index e6560a02eb46..29f0e0984557 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -8,19 +8,9 @@
  * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
  * James Cleverdon.
  */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/hardirq.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
 #include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/setup.h>
+
+#include "local.h"
 
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
@@ -46,13 +36,6 @@ void __init default_setup_apic_routing(void)
 		x86_platform.apic_post_init();
 }
 
-/* Same for both flat and physical. */
-
-void apic_send_IPI_self(int vector)
-{
-	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
-}
-
 int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	struct apic **drv;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index fdacb864c3dd..2c5676b0a6e7 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -398,6 +398,17 @@ static int activate_reserved(struct irq_data *irqd)
 		if (!irqd_can_reserve(irqd))
 			apicd->can_reserve = false;
 	}
+
+	/*
+	 * Check to ensure that the effective affinity mask is a subset
+	 * the user supplied affinity mask, and warn the user if it is not
+	 */
+	if (!cpumask_subset(irq_data_get_effective_affinity_mask(irqd),
+			    irq_data_get_affinity_mask(irqd))) {
+		pr_warn("irq %u: Affinity broken due to vector space exhaustion.\n",
+			irqd->irq);
+	}
+
 	return ret;
 }
 
diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
deleted file mode 100644
index a49b3604027f..000000000000
--- a/arch/x86/kernel/apic/x2apic.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Common bits for X2APIC cluster/physical modes. */
-
-int x2apic_apic_id_valid(u32 apicid);
-int x2apic_apic_id_registered(void);
-void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
-unsigned int x2apic_get_apic_id(unsigned long id);
-u32 x2apic_set_apic_id(unsigned int id);
-int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
-void x2apic_send_IPI_self(int vector);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 609e499387a1..b0889c48a2ac 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -1,15 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/threads.h>
+
+#include <linux/cpuhotplug.h>
 #include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/dmar.h>
-#include <linux/irq.h>
-#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/apic.h>
 
-#include <asm/smp.h>
-#include "x2apic.h"
+#include "local.h"
 
 struct cluster_mask {
 	unsigned int	clusterid;
@@ -84,12 +82,12 @@ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
-	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC);
 }
 
 static u32 x2apic_calc_apicid(unsigned int cpu)
@@ -158,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
 {
 	struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
 
-	cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+	if (cmsk)
+		cpumask_clear_cpu(dead_cpu, &cmsk->mask);
 	free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
 	return 0;
 }
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index b5cf9e7b3830..bc9693841353 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -1,14 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/threads.h>
+
 #include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/dmar.h>
+#include <linux/acpi.h>
 
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include "x2apic.h"
+#include "local.h"
 
 int x2apic_phys;
 
@@ -80,12 +75,12 @@ static void
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
-	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC);
 }
 
 static void init_x2apic_ldr(void)
@@ -117,6 +112,14 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
 	native_x2apic_icr_write(cfg, apicid);
 }
 
+void __x2apic_send_IPI_shorthand(int vector, u32 which)
+{
+	unsigned long cfg = __prepare_ICR(which, vector, 0);
+
+	x2apic_wrmsr_fence();
+	native_x2apic_icr_write(cfg, 0);
+}
+
 unsigned int x2apic_get_apic_id(unsigned long id)
 {
 	return id;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1e225528f0d7..e6230af19864 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -7,42 +7,22 @@
  *
  * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
  */
+#include <linux/crash_dump.h>
+#include <linux/cpuhotplug.h>
 #include <linux/cpumask.h>
-#include <linux/hardirq.h>
 #include <linux/proc_fs.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
+#include <linux/memory.h>
 #include <linux/export.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/io.h>
 #include <linux/pci.h>
-#include <linux/kdebug.h>
-#include <linux/delay.h>
-#include <linux/crash_dump.h>
-#include <linux/reboot.h>
-#include <linux/memory.h>
-#include <linux/numa.h>
 
+#include <asm/e820/api.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
-#include <asm/current.h>
-#include <asm/pgtable.h>
 #include <asm/uv/bios.h>
 #include <asm/uv/uv.h>
 #include <asm/apic.h>
-#include <asm/e820/api.h>
-#include <asm/ipi.h>
-#include <asm/smp.h>
-#include <asm/x86_init.h>
-#include <asm/nmi.h>
 
-DEFINE_PER_CPU(int, x2apic_extra_bits);
+static DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 static enum uv_system_type	uv_system_type;
 static bool			uv_hubless_system;
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d3d075226c0a..24d2fde30d00 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -6,13 +6,28 @@
 #include <asm/ia32.h>
 
 #define __SYSCALL_64(nr, sym, qual) [nr] = 1,
+#define __SYSCALL_X32(nr, sym, qual)
 static char syscalls_64[] = {
 #include <asm/syscalls_64.h>
 };
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
+static char syscalls_x32[] = {
+#include <asm/syscalls_64.h>
+};
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+#endif
+
 #define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls_ia32[] = {
 #include <asm/syscalls_32.h>
 };
+#undef __SYSCALL_I386
 
 #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 #include <asm/kvm_para.h>
@@ -62,7 +77,6 @@ int main(void)
 	ENTRY(cr2);
 	ENTRY(cr3);
 	ENTRY(cr4);
-	ENTRY(cr8);
 	ENTRY(gdt_desc);
 	BLANK();
 #undef ENTRY
@@ -80,6 +94,11 @@ int main(void)
 	DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
 	DEFINE(NR_syscalls, sizeof(syscalls_64));
 
+#ifdef CONFIG_X86_X32_ABI
+	DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
+	DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
+#endif
+
 	DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
 	DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d7a1e5a9331c..890f60083eca 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,7 +30,7 @@ obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
 
 ifdef CONFIG_CPU_SUP_INTEL
-obj-y			+= intel.o intel_pconfig.o
+obj-y			+= intel.o intel_pconfig.o tsx.o
 obj-$(CONFIG_PM)	+= intel_epb.o
 endif
 obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 68c363c341bf..90f75e515876 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/random.h>
+#include <linux/topology.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cacheinfo.h>
@@ -889,6 +890,10 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
 {
 	set_cpu_cap(c, X86_FEATURE_ZEN);
 
+#ifdef CONFIG_NUMA
+	node_reclaim_distance = 32;
+#endif
+
 	/*
 	 * Fix erratum 1076: CPB feature bit not being set in CPUID.
 	 * Always set it, except when running under a hypervisor.
@@ -945,12 +950,8 @@ static void init_amd(struct cpuinfo_x86 *c)
 	init_amd_cacheinfo(c);
 
 	if (cpu_has(c, X86_FEATURE_XMM2)) {
-		unsigned long long val;
-		int ret;
-
 		/*
-		 * A serializing LFENCE has less overhead than MFENCE, so
-		 * use it for execution serialization.  On families which
+		 * Use LFENCE for execution serialization.  On families which
 		 * don't have that MSR, LFENCE is already serializing.
 		 * msr_set_bit() uses the safe accessors, too, even if the MSR
 		 * is not present.
@@ -958,19 +959,8 @@ static void init_amd(struct cpuinfo_x86 *c)
 		msr_set_bit(MSR_F10H_DECFG,
 			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
 
-		/*
-		 * Verify that the MSR write was successful (could be running
-		 * under a hypervisor) and only then assume that LFENCE is
-		 * serializing.
-		 */
-		ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
-		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
-			/* A serializing LFENCE stops RDTSC speculation */
-			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
-		} else {
-			/* MFENCE stops RDTSC speculation */
-			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
-		}
+		/* A serializing LFENCE stops RDTSC speculation */
+		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 	}
 
 	/*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c6fa3ef10b4e..4c7b0fa15a19 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
 static void __init l1tf_select_mitigation(void);
 static void __init mds_select_mitigation(void);
+static void __init taa_select_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
 u64 x86_spec_ctrl_base;
@@ -105,6 +106,7 @@ void __init check_bugs(void)
 	ssb_select_mitigation();
 	l1tf_select_mitigation();
 	mds_select_mitigation();
+	taa_select_mitigation();
 
 	arch_smt_update();
 
@@ -269,6 +271,100 @@ static int __init mds_cmdline(char *str)
 early_param("mds", mds_cmdline);
 
 #undef pr_fmt
+#define pr_fmt(fmt)	"TAA: " fmt
+
+/* Default mitigation for TAA-affected CPUs */
+static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
+static bool taa_nosmt __ro_after_init;
+
+static const char * const taa_strings[] = {
+	[TAA_MITIGATION_OFF]		= "Vulnerable",
+	[TAA_MITIGATION_UCODE_NEEDED]	= "Vulnerable: Clear CPU buffers attempted, no microcode",
+	[TAA_MITIGATION_VERW]		= "Mitigation: Clear CPU buffers",
+	[TAA_MITIGATION_TSX_DISABLED]	= "Mitigation: TSX disabled",
+};
+
+static void __init taa_select_mitigation(void)
+{
+	u64 ia32_cap;
+
+	if (!boot_cpu_has_bug(X86_BUG_TAA)) {
+		taa_mitigation = TAA_MITIGATION_OFF;
+		return;
+	}
+
+	/* TSX previously disabled by tsx=off */
+	if (!boot_cpu_has(X86_FEATURE_RTM)) {
+		taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
+		goto out;
+	}
+
+	if (cpu_mitigations_off()) {
+		taa_mitigation = TAA_MITIGATION_OFF;
+		return;
+	}
+
+	/* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
+	if (taa_mitigation == TAA_MITIGATION_OFF)
+		goto out;
+
+	if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+		taa_mitigation = TAA_MITIGATION_VERW;
+	else
+		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+	/*
+	 * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
+	 * A microcode update fixes this behavior to clear CPU buffers. It also
+	 * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
+	 * ARCH_CAP_TSX_CTRL_MSR bit.
+	 *
+	 * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
+	 * update is required.
+	 */
+	ia32_cap = x86_read_arch_cap_msr();
+	if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
+	    !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+	/*
+	 * TSX is enabled, select alternate mitigation for TAA which is
+	 * the same as MDS. Enable MDS static branch to clear CPU buffers.
+	 *
+	 * For guests that can't determine whether the correct microcode is
+	 * present on host, enable the mitigation for UCODE_NEEDED as well.
+	 */
+	static_branch_enable(&mds_user_clear);
+
+	if (taa_nosmt || cpu_mitigations_auto_nosmt())
+		cpu_smt_disable(false);
+
+out:
+	pr_info("%s\n", taa_strings[taa_mitigation]);
+}
+
+static int __init tsx_async_abort_parse_cmdline(char *str)
+{
+	if (!boot_cpu_has_bug(X86_BUG_TAA))
+		return 0;
+
+	if (!str)
+		return -EINVAL;
+
+	if (!strcmp(str, "off")) {
+		taa_mitigation = TAA_MITIGATION_OFF;
+	} else if (!strcmp(str, "full")) {
+		taa_mitigation = TAA_MITIGATION_VERW;
+	} else if (!strcmp(str, "full,nosmt")) {
+		taa_mitigation = TAA_MITIGATION_VERW;
+		taa_nosmt = true;
+	}
+
+	return 0;
+}
+early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
+
+#undef pr_fmt
 #define pr_fmt(fmt)     "Spectre V1 : " fmt
 
 enum spectre_v1_mitigation {
@@ -786,13 +882,10 @@ static void update_mds_branch_idle(void)
 }
 
 #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
 
-void arch_smt_update(void)
+void cpu_bugs_smt_update(void)
 {
-	/* Enhanced IBRS implies STIBP. No update required. */
-	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
-		return;
-
 	mutex_lock(&spec_ctrl_mutex);
 
 	switch (spectre_v2_user) {
@@ -819,6 +912,17 @@ void arch_smt_update(void)
 		break;
 	}
 
+	switch (taa_mitigation) {
+	case TAA_MITIGATION_VERW:
+	case TAA_MITIGATION_UCODE_NEEDED:
+		if (sched_smt_active())
+			pr_warn_once(TAA_MSG_SMT);
+		break;
+	case TAA_MITIGATION_TSX_DISABLED:
+	case TAA_MITIGATION_OFF:
+		break;
+	}
+
 	mutex_unlock(&spec_ctrl_mutex);
 }
 
@@ -1149,6 +1253,9 @@ void x86_spec_ctrl_setup_ap(void)
 		x86_amd_ssb_disable();
 }
 
+bool itlb_multihit_kvm_mitigation;
+EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
+
 #undef pr_fmt
 #define pr_fmt(fmt)	"L1TF: " fmt
 
@@ -1184,15 +1291,15 @@ static void override_cache_bits(struct cpuinfo_x86 *c)
 	case INTEL_FAM6_WESTMERE:
 	case INTEL_FAM6_SANDYBRIDGE:
 	case INTEL_FAM6_IVYBRIDGE:
-	case INTEL_FAM6_HASWELL_CORE:
-	case INTEL_FAM6_HASWELL_ULT:
-	case INTEL_FAM6_HASWELL_GT3E:
-	case INTEL_FAM6_BROADWELL_CORE:
-	case INTEL_FAM6_BROADWELL_GT3E:
-	case INTEL_FAM6_SKYLAKE_MOBILE:
-	case INTEL_FAM6_SKYLAKE_DESKTOP:
-	case INTEL_FAM6_KABYLAKE_MOBILE:
-	case INTEL_FAM6_KABYLAKE_DESKTOP:
+	case INTEL_FAM6_HASWELL:
+	case INTEL_FAM6_HASWELL_L:
+	case INTEL_FAM6_HASWELL_G:
+	case INTEL_FAM6_BROADWELL:
+	case INTEL_FAM6_BROADWELL_G:
+	case INTEL_FAM6_SKYLAKE_L:
+	case INTEL_FAM6_SKYLAKE:
+	case INTEL_FAM6_KABYLAKE_L:
+	case INTEL_FAM6_KABYLAKE:
 		if (c->x86_cache_bits < 44)
 			c->x86_cache_bits = 44;
 		break;
@@ -1304,11 +1411,24 @@ static ssize_t l1tf_show_state(char *buf)
 		       l1tf_vmx_states[l1tf_vmx_mitigation],
 		       sched_smt_active() ? "vulnerable" : "disabled");
 }
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+	if (itlb_multihit_kvm_mitigation)
+		return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
+	else
+		return sprintf(buf, "KVM: Vulnerable\n");
+}
 #else
 static ssize_t l1tf_show_state(char *buf)
 {
 	return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
 }
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+	return sprintf(buf, "Processor vulnerable\n");
+}
 #endif
 
 static ssize_t mds_show_state(char *buf)
@@ -1328,6 +1448,21 @@ static ssize_t mds_show_state(char *buf)
 		       sched_smt_active() ? "vulnerable" : "disabled");
 }
 
+static ssize_t tsx_async_abort_show_state(char *buf)
+{
+	if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
+	    (taa_mitigation == TAA_MITIGATION_OFF))
+		return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
+
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+		return sprintf(buf, "%s; SMT Host state unknown\n",
+			       taa_strings[taa_mitigation]);
+	}
+
+	return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
+		       sched_smt_active() ? "vulnerable" : "disabled");
+}
+
 static char *stibp_state(void)
 {
 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
@@ -1398,6 +1533,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_MDS:
 		return mds_show_state(buf);
 
+	case X86_BUG_TAA:
+		return tsx_async_abort_show_state(buf);
+
+	case X86_BUG_ITLB_MULTIHIT:
+		return itlb_multihit_show_state(buf);
+
 	default:
 		break;
 	}
@@ -1434,4 +1575,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
 }
+
+ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
+}
+
+ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f125bf7ecb6f..fffe21945374 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #endif
 }
 
-#define NO_SPECULATION	BIT(0)
-#define NO_MELTDOWN	BIT(1)
-#define NO_SSB		BIT(2)
-#define NO_L1TF		BIT(3)
-#define NO_MDS		BIT(4)
-#define MSBDS_ONLY	BIT(5)
-#define NO_SWAPGS	BIT(6)
+#define NO_SPECULATION		BIT(0)
+#define NO_MELTDOWN		BIT(1)
+#define NO_SSB			BIT(2)
+#define NO_L1TF			BIT(3)
+#define NO_MDS			BIT(4)
+#define MSBDS_ONLY		BIT(5)
+#define NO_SWAPGS		BIT(6)
+#define NO_ITLB_MULTIHIT	BIT(7)
 
 #define VULNWL(_vendor, _family, _model, _whitelist)	\
 	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1043,26 +1044,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	VULNWL(NSC,	5, X86_MODEL_ANY,	NO_SPECULATION),
 
 	/* Intel Family 6 */
-	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION),
-	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION),
-	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION),
-	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION),
-	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION),
-
-	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_SILVERMONT_X,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
+
+	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SILVERMONT_D,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
 	VULNWL_INTEL(CORE_YONAH,		NO_SSB),
 
-	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_AIRMONT_NP,		NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
-	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_GOLDMONT_X,		NO_MDS | NO_L1TF | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS),
+	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_GOLDMONT_D,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
 	/*
 	 * Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1072,15 +1074,17 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	 * good enough for our purposes.
 	 */
 
+	VULNWL_INTEL(ATOM_TREMONT_D,		NO_ITLB_MULTIHIT),
+
 	/* AMD Family 0xf - 0x12 */
-	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
 	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
-	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
-	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 	{}
 };
 
@@ -1091,19 +1095,30 @@ static bool __init cpu_matches(unsigned long which)
 	return m && !!(m->driver_data & which);
 }
 
-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+u64 x86_read_arch_cap_msr(void)
 {
 	u64 ia32_cap = 0;
 
+	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+	return ia32_cap;
+}
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+{
+	u64 ia32_cap = x86_read_arch_cap_msr();
+
+	/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
+	if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+		setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
+
 	if (cpu_matches(NO_SPECULATION))
 		return;
 
 	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
 	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
 
-	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
-
 	if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
 	   !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
 		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1120,6 +1135,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 	if (!cpu_matches(NO_SWAPGS))
 		setup_force_cpu_bug(X86_BUG_SWAPGS);
 
+	/*
+	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
+	 *	- TSX is supported or
+	 *	- TSX_CTRL is present
+	 *
+	 * TSX_CTRL check is needed for cases when TSX could be disabled before
+	 * the kernel boot e.g. kexec.
+	 * TSX_CTRL check alone is not sufficient for cases when the microcode
+	 * update is not present or running as guest that don't get TSX_CTRL.
+	 */
+	if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+	    (cpu_has(c, X86_FEATURE_RTM) ||
+	     (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+		setup_force_cpu_bug(X86_BUG_TAA);
+
 	if (cpu_matches(NO_MELTDOWN))
 		return;
 
@@ -1553,6 +1583,8 @@ void __init identify_boot_cpu(void)
 #endif
 	cpu_detect_tlb(&boot_cpu_data);
 	setup_cr_pinning();
+
+	tsx_init();
 }
 
 void identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1957,3 +1989,14 @@ void microcode_check(void)
 	pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
 	pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
 }
+
+/*
+ * Invoked from core CPU hotplug code after hotplug operations
+ */
+void arch_smt_update(void)
+{
+	/* Handle the speculative execution misfeatures */
+	cpu_bugs_smt_update();
+	/* Check whether IPI broadcasting can be enabled */
+	apic_smt_update();
+}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index c0e2407abdd6..38ab6e115eac 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -44,6 +44,22 @@ struct _tlb_table {
 extern const struct cpu_dev *const __x86_cpu_dev_start[],
 			    *const __x86_cpu_dev_end[];
 
+#ifdef CONFIG_CPU_SUP_INTEL
+enum tsx_ctrl_states {
+	TSX_CTRL_ENABLE,
+	TSX_CTRL_DISABLE,
+	TSX_CTRL_NOT_SUPPORTED,
+};
+
+extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
+
+extern void __init tsx_init(void);
+extern void tsx_enable(void);
+extern void tsx_disable(void);
+#else
+static inline void tsx_init(void) { }
+#endif /* CONFIG_CPU_SUP_INTEL */
+
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
@@ -62,4 +78,6 @@ unsigned int aperfmperf_get_khz(int cpu);
 
 extern void x86_spec_ctrl_setup_ap(void);
 
+extern u64 x86_read_arch_cap_msr(void);
+
 #endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index b5353244749b..3cbe24ca80ab 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -20,54 +20,55 @@ struct cpuid_dep {
  * but it's difficult to tell that to the init reference checker.
  */
 static const struct cpuid_dep cpuid_deps[] = {
-	{ X86_FEATURE_FXSR,		X86_FEATURE_FPU	      },
-	{ X86_FEATURE_XSAVEOPT,		X86_FEATURE_XSAVE     },
-	{ X86_FEATURE_XSAVEC,		X86_FEATURE_XSAVE     },
-	{ X86_FEATURE_XSAVES,		X86_FEATURE_XSAVE     },
-	{ X86_FEATURE_AVX,		X86_FEATURE_XSAVE     },
-	{ X86_FEATURE_PKU,		X86_FEATURE_XSAVE     },
-	{ X86_FEATURE_MPX,		X86_FEATURE_XSAVE     },
-	{ X86_FEATURE_XGETBV1,		X86_FEATURE_XSAVE     },
-	{ X86_FEATURE_CMOV,		X86_FEATURE_FXSR      },
-	{ X86_FEATURE_MMX,		X86_FEATURE_FXSR      },
-	{ X86_FEATURE_MMXEXT,		X86_FEATURE_MMX       },
-	{ X86_FEATURE_FXSR_OPT,		X86_FEATURE_FXSR      },
-	{ X86_FEATURE_XSAVE,		X86_FEATURE_FXSR      },
-	{ X86_FEATURE_XMM,		X86_FEATURE_FXSR      },
-	{ X86_FEATURE_XMM2,		X86_FEATURE_XMM       },
-	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
-	{ X86_FEATURE_XMM4_1,		X86_FEATURE_XMM2      },
-	{ X86_FEATURE_XMM4_2,		X86_FEATURE_XMM2      },
-	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
-	{ X86_FEATURE_PCLMULQDQ,	X86_FEATURE_XMM2      },
-	{ X86_FEATURE_SSSE3,		X86_FEATURE_XMM2,     },
-	{ X86_FEATURE_F16C,		X86_FEATURE_XMM2,     },
-	{ X86_FEATURE_AES,		X86_FEATURE_XMM2      },
-	{ X86_FEATURE_SHA_NI,		X86_FEATURE_XMM2      },
-	{ X86_FEATURE_FMA,		X86_FEATURE_AVX       },
-	{ X86_FEATURE_AVX2,		X86_FEATURE_AVX,      },
-	{ X86_FEATURE_AVX512F,		X86_FEATURE_AVX,      },
-	{ X86_FEATURE_AVX512IFMA,	X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512PF,		X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512ER,		X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512CD,		X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512DQ,		X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512BW,		X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512VL,		X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512VBMI,	X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512_VBMI2,	X86_FEATURE_AVX512VL  },
-	{ X86_FEATURE_GFNI,		X86_FEATURE_AVX512VL  },
-	{ X86_FEATURE_VAES,		X86_FEATURE_AVX512VL  },
-	{ X86_FEATURE_VPCLMULQDQ,	X86_FEATURE_AVX512VL  },
-	{ X86_FEATURE_AVX512_VNNI,	X86_FEATURE_AVX512VL  },
-	{ X86_FEATURE_AVX512_BITALG,	X86_FEATURE_AVX512VL  },
-	{ X86_FEATURE_AVX512_4VNNIW,	X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512_4FMAPS,	X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
-	{ X86_FEATURE_CQM_OCCUP_LLC,	X86_FEATURE_CQM_LLC   },
-	{ X86_FEATURE_CQM_MBM_TOTAL,	X86_FEATURE_CQM_LLC   },
-	{ X86_FEATURE_CQM_MBM_LOCAL,	X86_FEATURE_CQM_LLC   },
-	{ X86_FEATURE_AVX512_BF16,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_FXSR,			X86_FEATURE_FPU	      },
+	{ X86_FEATURE_XSAVEOPT,			X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVEC,			X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVES,			X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_AVX,			X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_PKU,			X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_MPX,			X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XGETBV1,			X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_CMOV,			X86_FEATURE_FXSR      },
+	{ X86_FEATURE_MMX,			X86_FEATURE_FXSR      },
+	{ X86_FEATURE_MMXEXT,			X86_FEATURE_MMX       },
+	{ X86_FEATURE_FXSR_OPT,			X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XSAVE,			X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM,			X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM2,			X86_FEATURE_XMM       },
+	{ X86_FEATURE_XMM3,			X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_1,			X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_2,			X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM3,			X86_FEATURE_XMM2      },
+	{ X86_FEATURE_PCLMULQDQ,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SSSE3,			X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_F16C,			X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_AES,			X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SHA_NI,			X86_FEATURE_XMM2      },
+	{ X86_FEATURE_FMA,			X86_FEATURE_AVX       },
+	{ X86_FEATURE_AVX2,			X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512F,			X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512IFMA,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512PF,			X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512ER,			X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512CD,			X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512DQ,			X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512BW,			X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VL,			X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VBMI,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VBMI2,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_GFNI,			X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VAES,			X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VPCLMULQDQ,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_VNNI,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_BITALG,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_4VNNIW,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_4FMAPS,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VPOPCNTDQ,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VP2INTERSECT,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_CQM_OCCUP_LLC,		X86_FEATURE_CQM_LLC   },
+	{ X86_FEATURE_CQM_MBM_TOTAL,		X86_FEATURE_CQM_LLC   },
+	{ X86_FEATURE_CQM_MBM_LOCAL,		X86_FEATURE_CQM_LLC   },
+	{ X86_FEATURE_AVX512_BF16,		X86_FEATURE_AVX512VL  },
 	{}
 };
 
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 415621ddb8a2..4e28c1fc8749 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -330,12 +330,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
 	init_hygon_cacheinfo(c);
 
 	if (cpu_has(c, X86_FEATURE_XMM2)) {
-		unsigned long long val;
-		int ret;
-
 		/*
-		 * A serializing LFENCE has less overhead than MFENCE, so
-		 * use it for execution serialization.  On families which
+		 * Use LFENCE for execution serialization.  On families which
 		 * don't have that MSR, LFENCE is already serializing.
 		 * msr_set_bit() uses the safe accessors, too, even if the MSR
 		 * is not present.
@@ -343,19 +339,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
 		msr_set_bit(MSR_F10H_DECFG,
 			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
 
-		/*
-		 * Verify that the MSR write was successful (could be running
-		 * under a hypervisor) and only then assume that LFENCE is
-		 * serializing.
-		 */
-		ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
-		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
-			/* A serializing LFENCE stops RDTSC speculation */
-			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
-		} else {
-			/* MFENCE stops RDTSC speculation */
-			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
-		}
+		/* A serializing LFENCE stops RDTSC speculation */
+		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 	}
 
 	/*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24e619d1bf79..4a900804a023 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -142,21 +142,21 @@ struct sku_microcode {
 	u32 microcode;
 };
 static const struct sku_microcode spectre_bad_microcodes[] = {
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x80 },
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x80 },
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x80 },
-	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x80 },
-	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x80 },
+	{ INTEL_FAM6_KABYLAKE,		0x0B,	0x80 },
+	{ INTEL_FAM6_KABYLAKE,		0x0A,	0x80 },
+	{ INTEL_FAM6_KABYLAKE,		0x09,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_L,	0x0A,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_L,	0x09,	0x80 },
 	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
 	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
-	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
-	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
-	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },
-	{ INTEL_FAM6_BROADWELL_XEON_D,	0x03,	0x07000011 },
+	{ INTEL_FAM6_BROADWELL,		0x04,	0x28 },
+	{ INTEL_FAM6_BROADWELL_G,	0x01,	0x1b },
+	{ INTEL_FAM6_BROADWELL_D,	0x02,	0x14 },
+	{ INTEL_FAM6_BROADWELL_D,	0x03,	0x07000011 },
 	{ INTEL_FAM6_BROADWELL_X,	0x01,	0x0b000025 },
-	{ INTEL_FAM6_HASWELL_ULT,	0x01,	0x21 },
-	{ INTEL_FAM6_HASWELL_GT3E,	0x01,	0x18 },
-	{ INTEL_FAM6_HASWELL_CORE,	0x03,	0x23 },
+	{ INTEL_FAM6_HASWELL_L,		0x01,	0x21 },
+	{ INTEL_FAM6_HASWELL_G,		0x01,	0x18 },
+	{ INTEL_FAM6_HASWELL,		0x03,	0x23 },
 	{ INTEL_FAM6_HASWELL_X,		0x02,	0x3b },
 	{ INTEL_FAM6_HASWELL_X,		0x04,	0x10 },
 	{ INTEL_FAM6_IVYBRIDGE_X,	0x04,	0x42a },
@@ -265,9 +265,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
 	if (c->x86 == 6) {
 		switch (c->x86_model) {
-		case 0x27:	/* Penwell */
-		case 0x35:	/* Cloverview */
-		case 0x4a:	/* Merrifield */
+		case INTEL_FAM6_ATOM_SALTWELL_MID:
+		case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+		case INTEL_FAM6_ATOM_SILVERMONT_MID:
+		case INTEL_FAM6_ATOM_AIRMONT_NP:
 			set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
 			break;
 		default:
@@ -761,6 +762,11 @@ static void init_intel(struct cpuinfo_x86 *c)
 		detect_tme(c);
 
 	init_intel_misc_features(c);
+
+	if (tsx_ctrl_state == TSX_CTRL_ENABLE)
+		tsx_enable();
+	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+		tsx_disable();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index e43eb6732630..88cd9598fa57 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -479,7 +479,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
 	switch (c->x86_model) {
 	case INTEL_FAM6_IVYBRIDGE_X:
 	case INTEL_FAM6_HASWELL_X:
-	case INTEL_FAM6_BROADWELL_XEON_D:
+	case INTEL_FAM6_BROADWELL_D:
 	case INTEL_FAM6_BROADWELL_X:
 	case INTEL_FAM6_SKYLAKE_X:
 	case INTEL_FAM6_XEON_PHI_KNL:
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 210f1f5db5f7..87bcdc6dc2f0 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -107,11 +107,11 @@ static struct severity {
 	 */
 	MCESEV(
 		AO, "Action optional: memory scrubbing error",
-		SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
+		SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
 		),
 	MCESEV(
 		AO, "Action optional: last level cache writeback error",
-		SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
+		SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
 		),
 
 	/* ignore OVER for UCNA */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 062f77279ce3..c656d92cd708 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -29,6 +29,7 @@
 #include <asm/timer.h>
 #include <asm/reboot.h>
 #include <asm/nmi.h>
+#include <clocksource/hyperv_timer.h>
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -215,6 +216,10 @@ static void __init ms_hyperv_init_platform(void)
 	int hv_host_info_ecx;
 	int hv_host_info_edx;
 
+#ifdef CONFIG_PARAVIRT
+	pv_info.name = "Hyper-V";
+#endif
+
 	/*
 	 * Extract the features and hints
 	 */
@@ -338,6 +343,15 @@ static void __init ms_hyperv_init_platform(void)
 		x2apic_phys = 1;
 # endif
 
+	/* Register Hyper-V specific clocksource */
+	hv_init_clocksource();
+#endif
+}
+
+void hv_setup_sched_clock(void *sched_clock)
+{
+#ifdef CONFIG_PARAVIRT
+	pv_ops.time.sched_clock = sched_clock;
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index efbd54cc4e69..055c8613b531 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -522,6 +522,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 	int ret = 0;
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		ret = -ENOENT;
+		goto out;
+	}
 
 	md.priv = of->kn->priv;
 	resid = md.u.rid;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index a46dee8e78db..2e3b06d6bbc6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 	}
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	rdt_last_cmd_clear();
 	if (!rdtgrp) {
 		ret = -ENOENT;
-		rdt_last_cmd_puts("Directory was removed\n");
 		goto unlock;
 	}
 
@@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	int ret;
 
 	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
-	rdt_last_cmd_clear();
 	if (!prdtgrp) {
 		ret = -ENODEV;
-		rdt_last_cmd_puts("Directory was removed\n");
 		goto out_unlock;
 	}
 
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
new file mode 100644
index 000000000000..3e20d322bc98
--- /dev/null
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Transactional Synchronization Extensions (TSX) control.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ *	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+ */
+
+#include <linux/cpufeature.h>
+
+#include <asm/cmdline.h>
+
+#include "cpu.h"
+
+enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
+
+void tsx_disable(void)
+{
+	u64 tsx;
+
+	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+	/* Force all transactions to immediately abort */
+	tsx |= TSX_CTRL_RTM_DISABLE;
+
+	/*
+	 * Ensure TSX support is not enumerated in CPUID.
+	 * This is visible to userspace and will ensure they
+	 * do not waste resources trying TSX transactions that
+	 * will always abort.
+	 */
+	tsx |= TSX_CTRL_CPUID_CLEAR;
+
+	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+void tsx_enable(void)
+{
+	u64 tsx;
+
+	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+	/* Enable the RTM feature in the cpu */
+	tsx &= ~TSX_CTRL_RTM_DISABLE;
+
+	/*
+	 * Ensure TSX support is enumerated in CPUID.
+	 * This is visible to userspace and will ensure they
+	 * can enumerate and use the TSX feature.
+	 */
+	tsx &= ~TSX_CTRL_CPUID_CLEAR;
+
+	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+static bool __init tsx_ctrl_is_supported(void)
+{
+	u64 ia32_cap = x86_read_arch_cap_msr();
+
+	/*
+	 * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
+	 * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+	 *
+	 * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+	 * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+	 * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+	 * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+	 * tsx= cmdline requests will do nothing on CPUs without
+	 * MSR_IA32_TSX_CTRL support.
+	 */
+	return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+}
+
+static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+{
+	if (boot_cpu_has_bug(X86_BUG_TAA))
+		return TSX_CTRL_DISABLE;
+
+	return TSX_CTRL_ENABLE;
+}
+
+void __init tsx_init(void)
+{
+	char arg[5] = {};
+	int ret;
+
+	if (!tsx_ctrl_is_supported())
+		return;
+
+	ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
+	if (ret >= 0) {
+		if (!strcmp(arg, "on")) {
+			tsx_ctrl_state = TSX_CTRL_ENABLE;
+		} else if (!strcmp(arg, "off")) {
+			tsx_ctrl_state = TSX_CTRL_DISABLE;
+		} else if (!strcmp(arg, "auto")) {
+			tsx_ctrl_state = x86_get_tsx_auto_mode();
+		} else {
+			tsx_ctrl_state = TSX_CTRL_DISABLE;
+			pr_err("tsx: invalid option, defaulting to off\n");
+		}
+	} else {
+		/* tsx= not provided */
+		if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
+			tsx_ctrl_state = x86_get_tsx_auto_mode();
+		else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
+			tsx_ctrl_state = TSX_CTRL_DISABLE;
+		else
+			tsx_ctrl_state = TSX_CTRL_ENABLE;
+	}
+
+	if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
+		tsx_disable();
+
+		/*
+		 * tsx_disable() will change the state of the
+		 * RTM CPUID bit.  Clear it here since it is now
+		 * expected to be not set.
+		 */
+		setup_clear_cpu_cap(X86_FEATURE_RTM);
+	} else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
+
+		/*
+		 * HW defaults TSX to be enabled at bootup.
+		 * We may still need the TSX enable support
+		 * during init for special cases like
+		 * kexec after TSX is disabled.
+		 */
+		tsx_enable();
+
+		/*
+		 * tsx_enable() will change the state of the
+		 * RTM CPUID bit.  Force it here since it is now
+		 * expected to be set.
+		 */
+		setup_force_cpu_cap(X86_FEATURE_RTM);
+	}
+}
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index 32b4dc9030aa..c222f283b456 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -17,6 +17,12 @@
  */
 static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
 
+u32 get_umwait_control_msr(void)
+{
+	return umwait_control_cached;
+}
+EXPORT_SYMBOL_GPL(get_umwait_control_msr);
+
 /*
  * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
  * hardware or BIOS before kernel boot.
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 3c648476d4fb..46d732696c1c 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,34 +30,69 @@
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
 #include <asm/apic.h>
+#include <asm/vmware.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"vmware: " fmt
 
-#define CPUID_VMWARE_INFO_LEAF	0x40000000
+#define CPUID_VMWARE_INFO_LEAF               0x40000000
+#define CPUID_VMWARE_FEATURES_LEAF           0x40000010
+#define CPUID_VMWARE_FEATURES_ECX_VMMCALL    BIT(0)
+#define CPUID_VMWARE_FEATURES_ECX_VMCALL     BIT(1)
+
 #define VMWARE_HYPERVISOR_MAGIC	0x564D5868
-#define VMWARE_HYPERVISOR_PORT	0x5658
 
-#define VMWARE_PORT_CMD_GETVERSION	10
-#define VMWARE_PORT_CMD_GETHZ		45
-#define VMWARE_PORT_CMD_GETVCPU_INFO	68
-#define VMWARE_PORT_CMD_LEGACY_X2APIC	3
-#define VMWARE_PORT_CMD_VCPU_RESERVED	31
+#define VMWARE_CMD_GETVERSION    10
+#define VMWARE_CMD_GETHZ         45
+#define VMWARE_CMD_GETVCPU_INFO  68
+#define VMWARE_CMD_LEGACY_X2APIC  3
+#define VMWARE_CMD_VCPU_RESERVED 31
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
-	__asm__("inl (%%dx)" :						\
-			"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :	\
-			"0"(VMWARE_HYPERVISOR_MAGIC),			\
-			"1"(VMWARE_PORT_CMD_##cmd),			\
-			"2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) :	\
-			"memory");
+	__asm__("inl (%%dx), %%eax" :					\
+		"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :		\
+		"a"(VMWARE_HYPERVISOR_MAGIC),				\
+		"c"(VMWARE_CMD_##cmd),					\
+		"d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) :		\
+		"memory")
+
+#define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx)				\
+	__asm__("vmcall" :						\
+		"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :		\
+		"a"(VMWARE_HYPERVISOR_MAGIC),				\
+		"c"(VMWARE_CMD_##cmd),					\
+		"d"(0), "b"(UINT_MAX) :					\
+		"memory")
+
+#define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx)                         \
+	__asm__("vmmcall" :						\
+		"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :		\
+		"a"(VMWARE_HYPERVISOR_MAGIC),				\
+		"c"(VMWARE_CMD_##cmd),					\
+		"d"(0), "b"(UINT_MAX) :					\
+		"memory")
+
+#define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do {		\
+	switch (vmware_hypercall_mode) {			\
+	case CPUID_VMWARE_FEATURES_ECX_VMCALL:			\
+		VMWARE_VMCALL(cmd, eax, ebx, ecx, edx);		\
+		break;						\
+	case CPUID_VMWARE_FEATURES_ECX_VMMCALL:			\
+		VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx);	\
+		break;						\
+	default:						\
+		VMWARE_PORT(cmd, eax, ebx, ecx, edx);		\
+		break;						\
+	}							\
+	} while (0)
 
 static unsigned long vmware_tsc_khz __ro_after_init;
+static u8 vmware_hypercall_mode     __ro_after_init;
 
 static inline int __vmware_platform(void)
 {
 	uint32_t eax, ebx, ecx, edx;
-	VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
+	VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
 	return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
 }
 
@@ -129,6 +164,10 @@ static void __init vmware_set_capabilities(void)
 {
 	setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
 	setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+	if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
+		setup_force_cpu_cap(X86_FEATURE_VMCALL);
+	else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
+		setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL);
 }
 
 static void __init vmware_platform_setup(void)
@@ -136,7 +175,7 @@ static void __init vmware_platform_setup(void)
 	uint32_t eax, ebx, ecx, edx;
 	uint64_t lpj, tsc_khz;
 
-	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+	VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
 
 	if (ebx != UINT_MAX) {
 		lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
@@ -174,10 +213,21 @@ static void __init vmware_platform_setup(void)
 	vmware_set_capabilities();
 }
 
+static u8 vmware_select_hypercall(void)
+{
+	int eax, ebx, ecx, edx;
+
+	cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx);
+	return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL |
+		       CPUID_VMWARE_FEATURES_ECX_VMCALL));
+}
+
 /*
  * While checking the dmi string information, just checking the product
  * serial key should be enough, as this will always have a VMware
  * specific string when running under VMware hypervisor.
+ * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
+ * intentionally defaults to 0.
  */
 static uint32_t __init vmware_platform(void)
 {
@@ -187,8 +237,16 @@ static uint32_t __init vmware_platform(void)
 
 		cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
 		      &hyper_vendor_id[1], &hyper_vendor_id[2]);
-		if (!memcmp(hyper_vendor_id, "VMwareVMware", 12))
+		if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) {
+			if (eax >= CPUID_VMWARE_FEATURES_LEAF)
+				vmware_hypercall_mode =
+					vmware_select_hypercall();
+
+			pr_info("hypercall mode: 0x%02x\n",
+				(unsigned int) vmware_hypercall_mode);
+
 			return CPUID_VMWARE_INFO_LEAF;
+		}
 	} else if (dmi_available && dmi_name_in_serial("VMware") &&
 		   __vmware_platform())
 		return 1;
@@ -200,9 +258,9 @@ static uint32_t __init vmware_platform(void)
 static bool __init vmware_legacy_x2apic_available(void)
 {
 	uint32_t eax, ebx, ecx, edx;
-	VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
-	return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
-	       (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
+	VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
+	return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 &&
+	       (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
 }
 
 const __initconst struct hypervisor_x86 x86_hyper_vmware = {
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2bf70a2fed90..eb651fbde92a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -225,8 +225,6 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
 	if (crashk_low_res.end) {
 		ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
 							crashk_low_res.end);
-		if (ret)
-			return ret;
 	}
 
 	return ret;
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 22369dd5de3b..045e82e8945b 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -70,3 +70,8 @@ ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
 {
 	return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, true);
 }
+
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	return read_from_oldmem(buf, count, ppos, 0, sev_active());
+}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2b5886401e5f..e07424e19274 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -367,13 +367,18 @@ NOKPROBE_SYMBOL(oops_end);
 
 int __die(const char *str, struct pt_regs *regs, long err)
 {
+	const char *pr = "";
+
 	/* Save the regs of the first oops for the executive summary later. */
 	if (!die_counter)
 		exec_summary_regs = *regs;
 
+	if (IS_ENABLED(CONFIG_PREEMPTION))
+		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
 	printk(KERN_DEFAULT
 	       "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
-	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
+	       pr,
 	       IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
 	       debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
 	       IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 753b8cfe8b8a..87b97897a881 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -94,6 +94,13 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 
 	begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
+	/*
+	 * Handle the case where stack trace is collected _before_
+	 * cea_exception_stacks had been initialized.
+	 */
+	if (!begin)
+		return false;
+
 	end = begin + sizeof(struct cea_exception_stacks);
 	/* Bail if @stack is outside the exception stack area. */
 	if (stk < begin || stk >= end)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6c4f01540833..4cba91ec8049 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -549,6 +549,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
 	INTEL_CNL_IDS(&gen9_early_ops),
 	INTEL_ICL_11_IDS(&gen11_early_ops),
 	INTEL_EHL_IDS(&gen11_early_ops),
+	INTEL_TGL_12_IDS(&gen11_early_ops),
 };
 
 struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0);
@@ -709,6 +710,8 @@ static struct chipset early_qrk[] __initdata = {
 	 */
 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+	{ PCI_VENDOR_ID_INTEL, 0x3ec4,
+		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
 	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
 	{}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 29ffa495bd1c..206a4b6144c2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -222,13 +222,31 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	 * we might write invalid pmds, when the kernel is relocated
 	 * cleanup_highmap() fixes this up along with the mappings
 	 * beyond _end.
+	 *
+	 * Only the region occupied by the kernel image has so far
+	 * been checked against the table of usable memory regions
+	 * provided by the firmware, so invalidate pages outside that
+	 * region. A page table entry that maps to a reserved area of
+	 * memory would allow processor speculation into that area,
+	 * and on some hardware (particularly the UV platform) even
+	 * speculative access to some reserved areas is caught as an
+	 * error, causing the BIOS to halt the system.
 	 */
 
 	pmd = fixup_pointer(level2_kernel_pgt, physaddr);
-	for (i = 0; i < PTRS_PER_PMD; i++) {
+
+	/* invalidate pages before the kernel image */
+	for (i = 0; i < pmd_index((unsigned long)_text); i++)
+		pmd[i] &= ~_PAGE_PRESENT;
+
+	/* fixup pages that are part of the kernel image */
+	for (; i <= pmd_index((unsigned long)_end); i++)
 		if (pmd[i] & _PAGE_PRESENT)
 			pmd[i] += load_delta;
-	}
+
+	/* invalidate pages after the kernel image */
+	for (; i < PTRS_PER_PMD; i++)
+		pmd[i] &= ~_PAGE_PRESENT;
 
 	/*
 	 * Fixup phys_base - remove the memory encryption mask to obtain
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index 4c407833faca..4d4f5d9faac3 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -74,9 +74,9 @@ bool arch_ima_get_secureboot(void)
 
 /* secureboot arch rules */
 static const char * const sb_arch_rules[] = {
-#if !IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG)
+#if !IS_ENABLED(CONFIG_KEXEC_SIG)
 	"appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig",
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 	"measure func=KEXEC_KERNEL_CHECK",
 #if !IS_ENABLED(CONFIG_MODULE_SIG)
 	"appraise func=MODULE_CHECK appraise_type=imasig",
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 0fe1c8782208..61a89d3c0382 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/ioport.h>
+#include <linux/security.h>
 #include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
@@ -31,7 +32,8 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
+	if (turn_on && (!capable(CAP_SYS_RAWIO) ||
+			security_locked_down(LOCKDOWN_IOPORT)))
 		return -EPERM;
 
 	/*
@@ -126,7 +128,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
 		return -EINVAL;
 	/* Trying to gain more privileges? */
 	if (level > old) {
-		if (!capable(CAP_SYS_RAWIO))
+		if (!capable(CAP_SYS_RAWIO) ||
+		    security_locked_down(LOCKDOWN_IOPORT))
 			return -EPERM;
 	}
 	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 4215653f8a8e..21efee32e2b1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -243,11 +243,15 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
 
 	desc = __this_cpu_read(vector_irq[vector]);
-
-	if (!handle_irq(desc, regs)) {
+	if (likely(!IS_ERR_OR_NULL(desc))) {
+		if (IS_ENABLED(CONFIG_X86_32))
+			handle_irq(desc, regs);
+		else
+			generic_handle_irq_desc(desc);
+	} else {
 		ack_APIC_irq();
 
-		if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
+		if (desc == VECTOR_UNUSED) {
 			pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
 					     __func__, smp_processor_id(),
 					     vector);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index fc34816c6f04..a759ca97cd01 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -148,18 +148,13 @@ void do_softirq_own_stack(void)
 	call_on_stack(__do_softirq, isp);
 }
 
-bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
+void handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 {
 	int overflow = check_stack_overflow();
 
-	if (IS_ERR_OR_NULL(desc))
-		return false;
-
 	if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
 		generic_handle_irq_desc(desc);
 	}
-
-	return true;
 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 6bf6517a05bb..12df3a4abfdd 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,15 +26,6 @@
 DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
 DECLARE_INIT_PER_CPU(irq_stack_backing_store);
 
-bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
-{
-	if (IS_ERR_OR_NULL(desc))
-		return false;
-
-	generic_handle_irq_desc(desc);
-	return true;
-}
-
 #ifdef CONFIG_VMAP_STACK
 /*
  * VMAP the backing store with guard pages
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 5ebcd02cbca7..d2f4e706a428 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -180,6 +180,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return 0;
 
+	params->secure_boot = boot_params.secure_boot;
 	ei->efi_loader_signature = current_ei->efi_loader_signature;
 	ei->efi_systab = current_ei->efi_systab;
 	ei->efi_systab_hi = current_ei->efi_systab_hi;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 23297ea64f5f..c44fe7d8d9a4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -416,7 +416,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs)
  */
 void kgdb_roundup_cpus(void)
 {
-	apic->send_IPI_allbutself(APIC_DM_NMI);
+	apic_send_IPI_allbutself(NMI_VECTOR);
 }
 #endif
 
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0e0b08008b5a..43fc13c831af 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -580,7 +580,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
 	if (setup_detour_execution(p, regs, reenter))
 		return;
 
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
 	if (p->ainsn.boostable && !p->post_handler) {
 		/* Boost up -- we can execute copied instructions directly */
 		if (!reenter)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 9d4aedece363..b348dd506d58 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -403,7 +403,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 			   (u8 *)op->kp.addr + op->optinsn.size);
 	len += RELATIVEJUMP_SIZE;
 
-	/* We have to use text_poke for instuction buffer because it is RO */
+	/* We have to use text_poke() for instruction buffer because it is RO */
 	text_poke(slot, buf, len);
 	ret = 0;
 out:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4ab377c9fffe..e820568ed4d5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -311,7 +311,7 @@ static void kvm_guest_cpu_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
 		u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 		pa |= KVM_ASYNC_PF_SEND_ALWAYS;
 #endif
 		pa |= KVM_ASYNC_PF_ENABLED;
@@ -502,16 +502,6 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
 	__send_ipi_mask(local_mask, vector);
 }
 
-static void kvm_send_ipi_allbutself(int vector)
-{
-	kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
-}
-
-static void kvm_send_ipi_all(int vector)
-{
-	__send_ipi_mask(cpu_online_mask, vector);
-}
-
 /*
  * Set the IPI entry points
  */
@@ -519,8 +509,6 @@ static void kvm_setup_pv_ipi(void)
 {
 	apic->send_IPI_mask = kvm_send_ipi_mask;
 	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
-	apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
-	apic->send_IPI_all = kvm_send_ipi_all;
 	pr_info("KVM setup pv IPIs\n");
 }
 
@@ -705,6 +693,7 @@ unsigned int kvm_arch_para_hints(void)
 {
 	return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
 }
+EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
 
 static uint32_t __init kvm_detect(void)
 {
@@ -867,3 +856,39 @@ void __init kvm_spinlock_init(void)
 }
 
 #endif	/* CONFIG_PARAVIRT_SPINLOCKS */
+
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
+
+static void kvm_disable_host_haltpoll(void *i)
+{
+	wrmsrl(MSR_KVM_POLL_CONTROL, 0);
+}
+
+static void kvm_enable_host_haltpoll(void *i)
+{
+	wrmsrl(MSR_KVM_POLL_CONTROL, 1);
+}
+
+void arch_haltpoll_enable(unsigned int cpu)
+{
+	if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
+		pr_err_once("kvm: host does not support poll control\n");
+		pr_err_once("kvm: host upgrade recommended\n");
+		return;
+	}
+
+	/* Enable guest halt poll disables host halt poll */
+	smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
+
+void arch_haltpoll_disable(unsigned int cpu)
+{
+	if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
+		return;
+
+	/* Enable guest halt poll disables host halt poll */
+	smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
+#endif
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 3db2252b958d..1547be359d7f 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -34,6 +34,7 @@
 #include <linux/notifier.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/security.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -79,6 +80,10 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
 	int err = 0;
 	ssize_t bytes = 0;
 
+	err = security_locked_down(LOCKDOWN_MSR);
+	if (err)
+		return err;
+
 	if (count % 8)
 		return -EINVAL;	/* Invalid chunk size */
 
@@ -130,6 +135,9 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
 			err = -EFAULT;
 			break;
 		}
+		err = security_locked_down(LOCKDOWN_MSR);
+		if (err)
+			break;
 		err = wrmsr_safe_regs_on_cpu(cpu, regs);
 		if (err)
 			break;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4df7705022b9..e676a9916c49 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -512,6 +512,9 @@ NOKPROBE_SYMBOL(is_debug_stack);
 dotraplinkage notrace void
 do_nmi(struct pt_regs *regs, long error_code)
 {
+	if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
+		return;
+
 	if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
 		this_cpu_write(nmi_state, NMI_LATCHED);
 		return;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 0aa6256eedd8..59d3d2763a9e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -311,10 +311,6 @@ struct paravirt_patch_template pv_ops = {
 	.cpu.read_cr0		= native_read_cr0,
 	.cpu.write_cr0		= native_write_cr0,
 	.cpu.write_cr4		= native_write_cr4,
-#ifdef CONFIG_X86_64
-	.cpu.read_cr8		= native_read_cr8,
-	.cpu.write_cr8		= native_write_cr8,
-#endif
 	.cpu.wbinvd		= native_wbinvd,
 	.cpu.read_msr		= native_read_msr,
 	.cpu.write_msr		= native_write_msr,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 9d4343aa481b..23fdec030c37 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -468,6 +468,8 @@ static const struct dma_map_ops calgary_dma_ops = {
 	.map_page = calgary_map_page,
 	.unmap_page = calgary_unmap_page,
 	.dma_supported = dma_direct_supported,
+	.mmap = dma_common_mmap,
+	.get_sgtable = dma_common_get_sgtable,
 };
 
 static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f62b498b18fb..fa4352dce491 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/dma-direct.h>
 #include <linux/dma-debug.h>
+#include <linux/iommu.h>
 #include <linux/dmar.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
@@ -34,21 +35,6 @@ int no_iommu __read_mostly;
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
 
-/*
- * This variable becomes 1 if iommu=pt is passed on the kernel command line.
- * If this variable is 1, IOMMU implementations do no DMA translation for
- * devices and allow every device to access to whole physical memory. This is
- * useful if a user wants to use an IOMMU only for KVM device assignment to
- * guests and not for driver dma translation.
- * It is also possible to disable by default in kernel config, and enable with
- * iommu=nopt at boot time.
- */
-#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
-int iommu_pass_through __read_mostly = 1;
-#else
-int iommu_pass_through __read_mostly;
-#endif
-
 extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
 
 void __init pci_iommu_alloc(void)
@@ -120,9 +106,9 @@ static __init int iommu_setup(char *p)
 			swiotlb = 1;
 #endif
 		if (!strncmp(p, "pt", 2))
-			iommu_pass_through = 1;
+			iommu_set_default_passthrough(true);
 		if (!strncmp(p, "nopt", 4))
-			iommu_pass_through = 0;
+			iommu_set_default_translated(true);
 
 		gart_parse_options(p);
 
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 5f5302028a9a..c2cfa5e7c152 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Glue code to lib/swiotlb.c */
 
 #include <linux/pci.h>
 #include <linux/cache.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 75fea0d48c0e..5e94c4354d4e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -580,7 +580,7 @@ void __cpuidle default_idle(void)
 	safe_halt();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 }
-#ifdef CONFIG_APM_MODULE
+#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
 EXPORT_SYMBOL(default_idle);
 #endif
 
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
index 320ab978fb1f..1d0797b2338a 100644
--- a/arch/x86/kernel/process.h
+++ b/arch/x86/kernel/process.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 //
 // Code shared between 32 and 64 bit
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 8451f38ad399..1daf8f2aa21f 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -90,8 +90,6 @@ static void ich_force_hpet_resume(void)
 		BUG();
 	else
 		printk(KERN_DEBUG "Force enabled HPET at resume\n");
-
-	return;
 }
 
 static void ich_force_enable_hpet(struct pci_dev *dev)
@@ -448,7 +446,6 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev)
 	dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
 		force_hpet_address);
 	cached_dev = dev;
-	return;
 }
 
 /* ISA Bridges */
@@ -513,7 +510,6 @@ static void e6xx_force_enable_hpet(struct pci_dev *dev)
 	force_hpet_resume_type = NONE_FORCE_HPET_RESUME;
 	dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
 		"0x%lx\n", force_hpet_address);
-	return;
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
 			 e6xx_force_enable_hpet);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 09d6bded3c1e..0cc7c0b106bb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -828,11 +828,6 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
 	return NMI_HANDLED;
 }
 
-static void smp_send_nmi_allbutself(void)
-{
-	apic->send_IPI_allbutself(NMI_VECTOR);
-}
-
 /*
  * Halt all other CPUs, calling the specified function on each of them
  *
@@ -861,7 +856,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 	 */
 	wmb();
 
-	smp_send_nmi_allbutself();
+	apic_send_IPI_allbutself(NMI_VECTOR);
 
 	/* Kick CPUs looping in NMI context. */
 	WRITE_ONCE(crash_ipi_issued, 1);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bbe35bf879f5..77ea96b794bd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -486,7 +486,7 @@ static int __init reserve_crashkernel_low(void)
 	ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
 	if (ret) {
 		/*
-		 * two parts from lib/swiotlb.c:
+		 * two parts from kernel/dma/swiotlb.c:
 		 * -swiotlb size: user-specified with swiotlb= or default.
 		 *
 		 * -swiotlb overflow buffer: now hardcoded to 32k. We round it
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 96421f97e75c..b8d4e9c3c070 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -115,46 +115,6 @@
 static atomic_t stopping_cpu = ATOMIC_INIT(-1);
 static bool smp_no_nmi_ipi = false;
 
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-static void native_smp_send_reschedule(int cpu)
-{
-	if (unlikely(cpu_is_offline(cpu))) {
-		WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu);
-		return;
-	}
-	apic->send_IPI(cpu, RESCHEDULE_VECTOR);
-}
-
-void native_send_call_func_single_ipi(int cpu)
-{
-	apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
-}
-
-void native_send_call_func_ipi(const struct cpumask *mask)
-{
-	cpumask_var_t allbutself;
-
-	if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
-		apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-		return;
-	}
-
-	cpumask_copy(allbutself, cpu_online_mask);
-	__cpumask_clear_cpu(smp_processor_id(), allbutself);
-
-	if (cpumask_equal(mask, allbutself) &&
-	    cpumask_equal(cpu_online_mask, cpu_callout_mask))
-		apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-	else
-		apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	free_cpumask_var(allbutself);
-}
-
 static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
 {
 	/* We are registered on stopping cpu too, avoid spurious NMI */
@@ -179,6 +139,12 @@ asmlinkage __visible void smp_reboot_interrupt(void)
 	irq_exit();
 }
 
+static int register_stop_handler(void)
+{
+	return register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
+				    NMI_FLAG_FIRST, "smp_stop");
+}
+
 static void native_stop_other_cpus(int wait)
 {
 	unsigned long flags;
@@ -209,42 +175,44 @@ static void native_stop_other_cpus(int wait)
 		/* sync above data before sending IRQ */
 		wmb();
 
-		apic->send_IPI_allbutself(REBOOT_VECTOR);
+		apic_send_IPI_allbutself(REBOOT_VECTOR);
 
 		/*
-		 * Don't wait longer than a second if the caller
-		 * didn't ask us to wait.
+		 * Don't wait longer than a second for IPI completion. The
+		 * wait request is not checked here because that would
+		 * prevent an NMI shutdown attempt in case that not all
+		 * CPUs reach shutdown state.
 		 */
 		timeout = USEC_PER_SEC;
-		while (num_online_cpus() > 1 && (wait || timeout--))
+		while (num_online_cpus() > 1 && timeout--)
 			udelay(1);
 	}
-	
-	/* if the REBOOT_VECTOR didn't work, try with the NMI */
-	if ((num_online_cpus() > 1) && (!smp_no_nmi_ipi))  {
-		if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
-					 NMI_FLAG_FIRST, "smp_stop"))
-			/* Note: we ignore failures here */
-			/* Hope the REBOOT_IRQ is good enough */
-			goto finish;
 
-		/* sync above data before sending IRQ */
-		wmb();
-
-		pr_emerg("Shutting down cpus with NMI\n");
+	/* if the REBOOT_VECTOR didn't work, try with the NMI */
+	if (num_online_cpus() > 1) {
+		/*
+		 * If NMI IPI is enabled, try to register the stop handler
+		 * and send the IPI. In any case try to wait for the other
+		 * CPUs to stop.
+		 */
+		if (!smp_no_nmi_ipi && !register_stop_handler()) {
+			/* Sync above data before sending IRQ */
+			wmb();
 
-		apic->send_IPI_allbutself(NMI_VECTOR);
+			pr_emerg("Shutting down cpus with NMI\n");
 
+			apic_send_IPI_allbutself(NMI_VECTOR);
+		}
 		/*
-		 * Don't wait longer than a 10 ms if the caller
-		 * didn't ask us to wait.
+		 * Don't wait longer than 10 ms if the caller didn't
+		 * reqeust it. If wait is true, the machine hangs here if
+		 * one or more CPUs do not reach shutdown state.
 		 */
 		timeout = USEC_PER_MSEC * 10;
 		while (num_online_cpus() > 1 && (wait || timeout--))
 			udelay(1);
 	}
 
-finish:
 	local_irq_save(flags);
 	disable_local_APIC();
 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fdbd47ceb84d..69881b2d446c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1023,8 +1023,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
 static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 		       int *cpu0_nmi_registered)
 {
-	volatile u32 *trampoline_status =
-		(volatile u32 *) __va(real_mode_header->trampoline_status);
 	/* start_ip had better be page-aligned! */
 	unsigned long start_ip = real_mode_header->trampoline_start;
 
@@ -1116,9 +1114,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 		}
 	}
 
-	/* mark "stuck" area as not stuck */
-	*trampoline_status = 0;
-
 	if (x86_platform.legacy.warm_reset) {
 		/*
 		 * Cleanup possible dangling ends...
@@ -1596,7 +1591,12 @@ int native_cpu_disable(void)
 	if (ret)
 		return ret;
 
-	clear_local_APIC();
+	/*
+	 * Disable the local APIC. Otherwise IPI broadcasts will reach
+	 * it. It still responds normally to INIT, NMI, SMI, and SIPI
+	 * messages.
+	 */
+	apic_soft_disable();
 	cpu_disable_common();
 
 	return 0;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 57d87f79558f..7e322e2daaf5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -638,7 +638,7 @@ unsigned long native_calibrate_tsc(void)
 	 * clock.
 	 */
 	if (crystal_khz == 0 &&
-			boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
+			boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D)
 		crystal_khz = 25000;
 
 	/*
@@ -1505,6 +1505,9 @@ void __init tsc_init(void)
 		return;
 	}
 
+	if (tsc_clocksource_reliable || no_tsc_watchdog)
+		clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+
 	clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
 	detect_art();
 }
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 067858fe4db8..e0cbe4f2af49 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -58,6 +58,10 @@ static const struct freq_desc freq_desc_ann = {
 	1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 }
 };
 
+static const struct freq_desc freq_desc_lgm = {
+	1, { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }
+};
+
 static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
 	INTEL_CPU_FAM6(ATOM_SALTWELL_MID,	freq_desc_pnw),
 	INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET,	freq_desc_clv),
@@ -65,6 +69,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
 	INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,	freq_desc_tng),
 	INTEL_CPU_FAM6(ATOM_AIRMONT,		freq_desc_cht),
 	INTEL_CPU_FAM6(ATOM_AIRMONT_MID,	freq_desc_ann),
+	INTEL_CPU_FAM6(ATOM_AIRMONT_NP,		freq_desc_lgm),
 	{}
 };
 
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 5b345add550f..548fefed71ee 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -19,7 +19,7 @@
 /** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
  *
  * The feature User-Mode Instruction Prevention present in recent Intel
- * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
+ * processor prevents a group of instructions (SGDT, SIDT, SLDT, SMSW and STR)
  * from being executed with CPL > 0. Otherwise, a general protection fault is
  * issued.
  *
@@ -36,8 +36,8 @@
  * DOSEMU2) rely on this subset of instructions to function.
  *
  * The instructions protected by UMIP can be split in two groups. Those which
- * return a kernel memory address (sgdt and sidt) and those which return a
- * value (sldt, str and smsw).
+ * return a kernel memory address (SGDT and SIDT) and those which return a
+ * value (SLDT, STR and SMSW).
  *
  * For the instructions that return a kernel memory address, applications
  * such as WineHQ rely on the result being located in the kernel memory space,
@@ -45,15 +45,13 @@
  * value that, lies close to the top of the kernel memory. The limit for the GDT
  * and the IDT are set to zero.
  *
- * Given that sldt and str are not commonly used in programs that run on WineHQ
+ * Given that SLDT and STR are not commonly used in programs that run on WineHQ
  * or DOSEMU2, they are not emulated.
  *
  * The instruction smsw is emulated to return the value that the register CR0
  * has at boot time as set in the head_32.
  *
- * Also, emulation is provided only for 32-bit processes; 64-bit processes
- * that attempt to use the instructions that UMIP protects will receive the
- * SIGSEGV signal issued as a consequence of the general protection fault.
+ * Emulation is provided for both 32-bit and 64-bit processes.
  *
  * Care is taken to appropriately emulate the results when segmentation is
  * used. That is, rather than relying on USER_DS and USER_CS, the function
@@ -63,17 +61,18 @@
  * application uses a local descriptor table.
  */
 
-#define UMIP_DUMMY_GDT_BASE 0xfffe0000
-#define UMIP_DUMMY_IDT_BASE 0xffff0000
+#define UMIP_DUMMY_GDT_BASE 0xfffffffffffe0000ULL
+#define UMIP_DUMMY_IDT_BASE 0xffffffffffff0000ULL
 
 /*
  * The SGDT and SIDT instructions store the contents of the global descriptor
  * table and interrupt table registers, respectively. The destination is a
  * memory operand of X+2 bytes. X bytes are used to store the base address of
- * the table and 2 bytes are used to store the limit. In 32-bit processes, the
- * only processes for which emulation is provided, X has a value of 4.
+ * the table and 2 bytes are used to store the limit. In 32-bit processes X
+ * has a value of 4, in 64-bit processes X has a value of 8.
  */
-#define UMIP_GDT_IDT_BASE_SIZE 4
+#define UMIP_GDT_IDT_BASE_SIZE_64BIT 8
+#define UMIP_GDT_IDT_BASE_SIZE_32BIT 4
 #define UMIP_GDT_IDT_LIMIT_SIZE 2
 
 #define	UMIP_INST_SGDT	0	/* 0F 01 /0 */
@@ -189,6 +188,7 @@ static int identify_insn(struct insn *insn)
  * @umip_inst:	A constant indicating the instruction to emulate
  * @data:	Buffer into which the dummy result is stored
  * @data_size:	Size of the emulated result
+ * @x86_64:	true if process is 64-bit, false otherwise
  *
  * Emulate an instruction protected by UMIP and provide a dummy result. The
  * result of the emulation is saved in @data. The size of the results depends
@@ -202,11 +202,8 @@ static int identify_insn(struct insn *insn)
  * 0 on success, -EINVAL on error while emulating.
  */
 static int emulate_umip_insn(struct insn *insn, int umip_inst,
-			     unsigned char *data, int *data_size)
+			     unsigned char *data, int *data_size, bool x86_64)
 {
-	unsigned long dummy_base_addr, dummy_value;
-	unsigned short dummy_limit = 0;
-
 	if (!data || !data_size || !insn)
 		return -EINVAL;
 	/*
@@ -219,6 +216,9 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
 	 * is always returned irrespective of the operand size.
 	 */
 	if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
+		u64 dummy_base_addr;
+		u16 dummy_limit = 0;
+
 		/* SGDT and SIDT do not use registers operands. */
 		if (X86_MODRM_MOD(insn->modrm.value) == 3)
 			return -EINVAL;
@@ -228,13 +228,24 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
 		else
 			dummy_base_addr = UMIP_DUMMY_IDT_BASE;
 
-		*data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
+		/*
+		 * 64-bit processes use the entire dummy base address.
+		 * 32-bit processes use the lower 32 bits of the base address.
+		 * dummy_base_addr is always 64 bits, but we memcpy the correct
+		 * number of bytes from it to the destination.
+		 */
+		if (x86_64)
+			*data_size = UMIP_GDT_IDT_BASE_SIZE_64BIT;
+		else
+			*data_size = UMIP_GDT_IDT_BASE_SIZE_32BIT;
+
+		memcpy(data + 2, &dummy_base_addr, *data_size);
 
-		memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
+		*data_size += UMIP_GDT_IDT_LIMIT_SIZE;
 		memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
 
 	} else if (umip_inst == UMIP_INST_SMSW) {
-		dummy_value = CR0_STATE;
+		unsigned long dummy_value = CR0_STATE;
 
 		/*
 		 * Even though the CR0 register has 4 bytes, the number
@@ -290,11 +301,10 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
  * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
  * @regs:	Registers as saved when entering the #GP handler
  *
- * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
- * fault if executed with CPL > 0 (i.e., from user space). If the offending
- * user-space process is not in long mode, this function fixes the exception
- * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
- * fixed up. Also long mode user-space processes are not fixed up.
+ * The instructions SGDT, SIDT, STR, SMSW and SLDT cause a general protection
+ * fault if executed with CPL > 0 (i.e., from user space). This function fixes
+ * the exception up and provides dummy results for SGDT, SIDT and SMSW; STR
+ * and SLDT are not fixed up.
  *
  * If operands are memory addresses, results are copied to user-space memory as
  * indicated by the instruction pointed by eIP using the registers indicated in
@@ -373,13 +383,14 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
 			umip_insns[umip_inst]);
 
-	/* Do not emulate SLDT, STR or user long mode processes. */
-	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+	/* Do not emulate (spoof) SLDT or STR. */
+	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
 		return false;
 
 	umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
 
-	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
+	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
+			      user_64bit_mode(regs)))
 		return false;
 
 	/*
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 1bef687faf22..18a799c8fa28 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -95,6 +95,7 @@ struct x86_init_ops x86_init __initdata = {
 	},
 
 	.acpi = {
+		.set_root_pointer	= x86_default_set_root_pointer,
 		.get_root_pointer	= x86_default_get_root_pointer,
 		.reduced_hw_early_init	= acpi_generic_reduced_hw_init,
 	},
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 22c2720cd948..f68c0c753c38 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
 	case 7:
 	case 0xb:
 	case 0xd:
+	case 0xf:
+	case 0x10:
+	case 0x12:
 	case 0x14:
+	case 0x17:
+	case 0x18:
+	case 0x1f:
 	case 0x8000001d:
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		break;
@@ -357,10 +363,10 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
 
 	/* cpuid 7.0.ecx*/
 	const u32 kvm_cpuid_7_0_ecx_x86_features =
-		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
 
 	/* cpuid 7.0.edx*/
 	const u32 kvm_cpuid_7_0_edx_x86_features =
@@ -392,6 +398,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
 
 		entry->edx &= kvm_cpuid_7_0_edx_x86_features;
 		cpuid_mask(&entry->edx, CPUID_7_EDX);
+		if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
+			entry->edx |= F(SPEC_CTRL);
+		if (boot_cpu_has(X86_FEATURE_STIBP))
+			entry->edx |= F(INTEL_STIBP);
+		if (boot_cpu_has(X86_FEATURE_SSBD))
+			entry->edx |= F(SPEC_CTRL_SSBD);
 		/*
 		 * We emulate ARCH_CAPABILITIES in software even
 		 * if the host doesn't support it.
@@ -473,6 +485,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	/* cpuid 0x80000008.ebx */
 	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+		F(CLZERO) | F(XSAVEERPTR) |
 		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
 		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
 
@@ -606,16 +619,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 	 */
 	case 0x1f:
 	case 0xb: {
-		int i, level_type;
+		int i;
 
-		/* read more entries until level_type is zero */
-		for (i = 1; ; ++i) {
+		/*
+		 * We filled in entry[0] for CPUID(EAX=<function>,
+		 * ECX=00H) above.  If its level type (ECX[15:8]) is
+		 * zero, then the leaf is unimplemented, and we're
+		 * done.  Otherwise, continue to populate entries
+		 * until the level type (ECX[15:8]) of the previously
+		 * added entry is zero.
+		 */
+		for (i = 1; entry[i - 1].ecx & 0xff00; ++i) {
 			if (*nent >= maxnent)
 				goto out;
 
-			level_type = entry[i - 1].ecx & 0xff00;
-			if (!level_type)
-				break;
 			do_host_cpuid(&entry[i], function, i);
 			++*nent;
 		}
@@ -729,18 +746,23 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 			g_phys_as = phys_as;
 		entry->eax = g_phys_as | (virt_as << 8);
 		entry->edx = 0;
+		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
 		/*
-		 * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
-		 * hardware cpuid
+		 * AMD has separate bits for each SPEC_CTRL bit.
+		 * arch/x86/kernel/cpu/bugs.c is kind enough to
+		 * record that in cpufeatures so use them.
 		 */
-		if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+		if (boot_cpu_has(X86_FEATURE_IBPB))
 			entry->ebx |= F(AMD_IBPB);
-		if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+		if (boot_cpu_has(X86_FEATURE_IBRS))
 			entry->ebx |= F(AMD_IBRS);
-		if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
-			entry->ebx |= F(VIRT_SSBD);
-		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
-		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+		if (boot_cpu_has(X86_FEATURE_STIBP))
+			entry->ebx |= F(AMD_STIBP);
+		if (boot_cpu_has(X86_FEATURE_SSBD))
+			entry->ebx |= F(AMD_SSBD);
+		if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+			entry->ebx |= F(AMD_SSB_NO);
 		/*
 		 * The preference is to use SPEC CTRL MSR instead of the
 		 * VIRT_SPEC MSR.
@@ -952,53 +974,66 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
 /*
- * If no match is found, check whether we exceed the vCPU's limit
- * and return the content of the highest valid _standard_ leaf instead.
- * This is to satisfy the CPUID specification.
+ * If the basic or extended CPUID leaf requested is higher than the
+ * maximum supported basic or extended leaf, respectively, then it is
+ * out of range.
  */
-static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
-                                                  u32 function, u32 index)
+static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
 {
-	struct kvm_cpuid_entry2 *maxlevel;
-
-	maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
-	if (!maxlevel || maxlevel->eax >= function)
-		return NULL;
-	if (function & 0x80000000) {
-		maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
-		if (!maxlevel)
-			return NULL;
-	}
-	return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
+	struct kvm_cpuid_entry2 *max;
+
+	max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
+	return max && function <= max->eax;
 }
 
 bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
 	       u32 *ecx, u32 *edx, bool check_limit)
 {
 	u32 function = *eax, index = *ecx;
-	struct kvm_cpuid_entry2 *best;
-	bool entry_found = true;
-
-	best = kvm_find_cpuid_entry(vcpu, function, index);
+	struct kvm_cpuid_entry2 *entry;
+	struct kvm_cpuid_entry2 *max;
+	bool found;
 
-	if (!best) {
-		entry_found = false;
-		if (!check_limit)
-			goto out;
-
-		best = check_cpuid_limit(vcpu, function, index);
+	entry = kvm_find_cpuid_entry(vcpu, function, index);
+	found = entry;
+	/*
+	 * Intel CPUID semantics treats any query for an out-of-range
+	 * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
+	 * requested. AMD CPUID semantics returns all zeroes for any
+	 * undefined leaf, whether or not the leaf is in range.
+	 */
+	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
+	    !cpuid_function_in_range(vcpu, function)) {
+		max = kvm_find_cpuid_entry(vcpu, 0, 0);
+		if (max) {
+			function = max->eax;
+			entry = kvm_find_cpuid_entry(vcpu, function, index);
+		}
 	}
-
-out:
-	if (best) {
-		*eax = best->eax;
-		*ebx = best->ebx;
-		*ecx = best->ecx;
-		*edx = best->edx;
-	} else
+	if (entry) {
+		*eax = entry->eax;
+		*ebx = entry->ebx;
+		*ecx = entry->ecx;
+		*edx = entry->edx;
+	} else {
 		*eax = *ebx = *ecx = *edx = 0;
-	trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found);
-	return entry_found;
+		/*
+		 * When leaf 0BH or 1FH is defined, CL is pass-through
+		 * and EDX is always the x2APIC ID, even for undefined
+		 * subleaves. Index 1 will exist iff the leaf is
+		 * implemented, so we pass through CL iff leaf 1
+		 * exists. EDX can be copied from any existing index.
+		 */
+		if (function == 0xb || function == 0x1f) {
+			entry = kvm_find_cpuid_entry(vcpu, function, 1);
+			if (entry) {
+				*ecx = index & 0xff;
+				*edx = entry->edx;
+			}
+		}
+	}
+	trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
+	return found;
 }
 EXPORT_SYMBOL_GPL(kvm_cpuid);
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 718f7d9afedc..698efb8c3897 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4156,6 +4156,20 @@ out:
 	return rc;
 }
 
+static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
+{
+	u32 eax, ecx, edx;
+
+	eax = reg_read(ctxt, VCPU_REGS_RAX);
+	edx = reg_read(ctxt, VCPU_REGS_RDX);
+	ecx = reg_read(ctxt, VCPU_REGS_RCX);
+
+	if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
+		return emulate_gp(ctxt, 0);
+
+	return X86EMUL_CONTINUE;
+}
+
 static bool valid_cr(int nr)
 {
 	switch (nr) {
@@ -4409,6 +4423,12 @@ static const struct opcode group7_rm1[] = {
 	N, N, N, N, N, N,
 };
 
+static const struct opcode group7_rm2[] = {
+	N,
+	II(ImplicitOps | Priv,			em_xsetbv,	xsetbv),
+	N, N, N, N, N, N,
+};
+
 static const struct opcode group7_rm3[] = {
 	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
 	II(SrcNone  | Prot | EmulateOnUD,	em_hypercall,	vmmcall),
@@ -4498,7 +4518,8 @@ static const struct group_dual group7 = { {
 }, {
 	EXT(0, group7_rm0),
 	EXT(0, group7_rm1),
-	N, EXT(0, group7_rm3),
+	EXT(0, group7_rm2),
+	EXT(0, group7_rm3),
 	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
 	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
 	EXT(0, group7_rm7),
@@ -5144,7 +5165,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	else {
 		rc = __do_insn_fetch_bytes(ctxt, 1);
 		if (rc != X86EMUL_CONTINUE)
-			return rc;
+			goto done;
 	}
 
 	switch (mode) {
@@ -5395,6 +5416,8 @@ done_prefixes:
 					ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
+	if (rc == X86EMUL_PROPAGATE_FAULT)
+		ctxt->have_exception = true;
 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index fff790a3f4ee..23ff65504d7e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -23,6 +23,7 @@
 #include "ioapic.h"
 #include "hyperv.h"
 
+#include <linux/cpu.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
@@ -645,7 +646,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
 		.vector = stimer->config.apic_vector
 	};
 
-	return !kvm_apic_set_irq(vcpu, &irq, NULL);
+	if (lapic_in_kernel(vcpu))
+		return !kvm_apic_set_irq(vcpu, &irq, NULL);
+	return 0;
 }
 
 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
@@ -1852,7 +1855,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 
 			ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
 			ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
-			ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+
+			/*
+			 * Direct Synthetic timers only make sense with in-kernel
+			 * LAPIC
+			 */
+			if (lapic_in_kernel(vcpu))
+				ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
 
 			break;
 
@@ -1864,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 			ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
 			if (evmcs_ver)
 				ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
-
+			if (!cpu_smt_possible())
+				ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
 			/*
 			 * Default number of spinlock retry attempts, matches
 			 * HyperV 2016.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e904ff06a83d..b29d00b661ff 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -65,8 +65,11 @@
 #define APIC_BROADCAST			0xFF
 #define X2APIC_BROADCAST		0xFFFFFFFFul
 
-#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
-#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
+static bool lapic_timer_advance_dynamic __read_mostly;
+#define LAPIC_TIMER_ADVANCE_ADJUST_MIN	100	/* clock cycles */
+#define LAPIC_TIMER_ADVANCE_ADJUST_MAX	10000	/* clock cycles */
+#define LAPIC_TIMER_ADVANCE_NS_INIT	1000
+#define LAPIC_TIMER_ADVANCE_NS_MAX     5000
 /* step-by-step approximation to mitigate fluctuation */
 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
 
@@ -108,11 +111,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
-static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
-{
-	return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
-}
-
 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 {
 	return apic->vcpu->vcpu_id;
@@ -1198,10 +1196,8 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
 }
 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
 
-static void apic_send_ipi(struct kvm_lapic *apic)
+static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
 {
-	u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
-	u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
 	struct kvm_lapic_irq irq;
 
 	irq.vector = icr_low & APIC_VECTOR_MASK;
@@ -1487,26 +1483,25 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
 	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
 	u64 ns;
 
+	/* Do not adjust for tiny fluctuations or large random spikes. */
+	if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
+	    abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
+		return;
+
 	/* too early */
 	if (advance_expire_delta < 0) {
 		ns = -advance_expire_delta * 1000000ULL;
 		do_div(ns, vcpu->arch.virtual_tsc_khz);
-		timer_advance_ns -= min((u32)ns,
-			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+		timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
 	} else {
 	/* too late */
 		ns = advance_expire_delta * 1000000ULL;
 		do_div(ns, vcpu->arch.virtual_tsc_khz);
-		timer_advance_ns += min((u32)ns,
-			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+		timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
 	}
 
-	if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
-		apic->lapic_timer.timer_advance_adjust_done = true;
-	if (unlikely(timer_advance_ns > 5000)) {
-		timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
-		apic->lapic_timer.timer_advance_adjust_done = false;
-	}
+	if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
+		timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
@@ -1526,7 +1521,7 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (guest_tsc < tsc_deadline)
 		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
-	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
+	if (lapic_timer_advance_dynamic)
 		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
 }
 
@@ -1598,7 +1593,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
 		expire = ktime_add_ns(now, ns);
 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
-		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
+		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
 	} else
 		apic_timer_expired(apic);
 
@@ -1914,8 +1909,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	}
 	case APIC_ICR:
 		/* No delay here, so we always clear the pending bit */
-		kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
-		apic_send_ipi(apic);
+		val &= ~(1 << 12);
+		apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
+		kvm_lapic_set_reg(apic, APIC_ICR, val);
 		break;
 
 	case APIC_ICR2:
@@ -2299,17 +2295,16 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS);
+		     HRTIMER_MODE_ABS_HARD);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 	if (timer_advance_ns == -1) {
-		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
-		apic->lapic_timer.timer_advance_adjust_done = false;
+		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
+		lapic_timer_advance_dynamic = true;
 	} else {
 		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
-		apic->lapic_timer.timer_advance_adjust_done = true;
+		lapic_timer_advance_dynamic = false;
 	}
 
-
 	/*
 	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
 	 * thinking that APIC state has changed.
@@ -2484,7 +2479,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
 }
 
 /*
@@ -2707,11 +2702,14 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 		return;
 
 	/*
-	 * INITs are latched while in SMM.  Because an SMM CPU cannot
-	 * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
-	 * and delay processing of INIT until the next RSM.
+	 * INITs are latched while CPU is in specific states
+	 * (SMM, VMX non-root mode, SVM with GIF=0).
+	 * Because a CPU cannot be in these states immediately
+	 * after it has processed an INIT signal (and thus in
+	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
+	 * and leave the INIT pending.
 	 */
-	if (is_smm(vcpu)) {
+	if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
 		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
 		if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
 			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 50053d2b8b7b..1f5014852e20 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -35,7 +35,6 @@ struct kvm_timer {
 	s64 advance_expire_delta;
 	atomic_t pending;			/* accumulated triggered timers */
 	bool hv_timer_in_use;
-	bool timer_advance_adjust_done;
 };
 
 struct kvm_lapic {
@@ -243,4 +242,9 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
 	return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
 }
 
+static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
+{
+	return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+}
+
 #endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 218b277bfda3..2ce9da58611e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -37,6 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/hash.h>
 #include <linux/kern_levels.h>
+#include <linux/kthread.h>
 
 #include <asm/page.h>
 #include <asm/pat.h>
@@ -47,6 +48,35 @@
 #include <asm/kvm_page_track.h>
 #include "trace.h"
 
+extern bool itlb_multihit_kvm_mitigation;
+
+static int __read_mostly nx_huge_pages = -1;
+#ifdef CONFIG_PREEMPT_RT
+/* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
+static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
+#else
+static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
+#endif
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+
+static struct kernel_param_ops nx_huge_pages_ops = {
+	.set = set_nx_huge_pages,
+	.get = param_get_bool,
+};
+
+static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
+	.set = set_nx_huge_pages_recovery_ratio,
+	.get = param_get_uint,
+};
+
+module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages, "bool");
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+		&nx_huge_pages_recovery_ratio, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
  * where the hardware walks 2 page tables:
@@ -83,7 +113,17 @@ module_param(dbg, bool, 0644);
 #define PTE_PREFETCH_NUM		8
 
 #define PT_FIRST_AVAIL_BITS_SHIFT 10
-#define PT64_SECOND_AVAIL_BITS_SHIFT 52
+#define PT64_SECOND_AVAIL_BITS_SHIFT 54
+
+/*
+ * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
+ * Access Tracking SPTEs.
+ */
+#define SPTE_SPECIAL_MASK (3ULL << 52)
+#define SPTE_AD_ENABLED_MASK (0ULL << 52)
+#define SPTE_AD_DISABLED_MASK (1ULL << 52)
+#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52)
+#define SPTE_MMIO_MASK (3ULL << 52)
 
 #define PT64_LEVEL_BITS 9
 
@@ -214,16 +254,16 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mmio_mask;
 static u64 __read_mostly shadow_mmio_value;
+static u64 __read_mostly shadow_mmio_access_mask;
 static u64 __read_mostly shadow_present_mask;
 static u64 __read_mostly shadow_me_mask;
 
 /*
- * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
- * Non-present SPTEs with shadow_acc_track_value set are in place for access
- * tracking.
+ * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
+ * shadow_acc_track_mask is the set of bits to be cleared in non-accessed
+ * pages.
  */
 static u64 __read_mostly shadow_acc_track_mask;
-static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
 
 /*
  * The mask/shift to use for saving the original R/X bits when marking the PTE
@@ -299,34 +339,63 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
 	kvm_flush_remote_tlbs_with_range(kvm, &range);
 }
 
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
 {
+	BUG_ON((u64)(unsigned)access_mask != access_mask);
 	BUG_ON((mmio_mask & mmio_value) != mmio_value);
-	shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
+	shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
 	shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
+	shadow_mmio_access_mask = access_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 
+static bool is_mmio_spte(u64 spte)
+{
+	return (spte & shadow_mmio_mask) == shadow_mmio_value;
+}
+
 static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
 {
 	return sp->role.ad_disabled;
 }
 
+static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * When using the EPT page-modification log, the GPAs in the log
+	 * would come from L2 rather than L1.  Therefore, we need to rely
+	 * on write protection to record dirty pages.  This also bypasses
+	 * PML, since writes now result in a vmexit.
+	 */
+	return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+}
+
 static inline bool spte_ad_enabled(u64 spte)
 {
-	MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
-	return !(spte & shadow_acc_track_value);
+	MMU_WARN_ON(is_mmio_spte(spte));
+	return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK;
+}
+
+static inline bool spte_ad_need_write_protect(u64 spte)
+{
+	MMU_WARN_ON(is_mmio_spte(spte));
+	return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
+}
+
+static bool is_nx_huge_page_enabled(void)
+{
+	return READ_ONCE(nx_huge_pages);
 }
 
 static inline u64 spte_shadow_accessed_mask(u64 spte)
 {
-	MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+	MMU_WARN_ON(is_mmio_spte(spte));
 	return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
 }
 
 static inline u64 spte_shadow_dirty_mask(u64 spte)
 {
-	MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+	MMU_WARN_ON(is_mmio_spte(spte));
 	return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
 }
 
@@ -389,23 +458,16 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
 	u64 mask = generation_mmio_spte_mask(gen);
 	u64 gpa = gfn << PAGE_SHIFT;
 
-	access &= ACC_WRITE_MASK | ACC_USER_MASK;
+	access &= shadow_mmio_access_mask;
 	mask |= shadow_mmio_value | access;
 	mask |= gpa | shadow_nonpresent_or_rsvd_mask;
 	mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
 		<< shadow_nonpresent_or_rsvd_mask_len;
 
-	page_header(__pa(sptep))->mmio_cached = true;
-
 	trace_mark_mmio_spte(sptep, gfn, access, gen);
 	mmu_spte_set(sptep, mask);
 }
 
-static bool is_mmio_spte(u64 spte)
-{
-	return (spte & shadow_mmio_mask) == shadow_mmio_value;
-}
-
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
 	u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
@@ -418,8 +480,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask;
-	return (spte & ~mask) & ~PAGE_MASK;
+	return spte & shadow_mmio_access_mask;
 }
 
 static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -461,7 +522,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 {
 	BUG_ON(!dirty_mask != !accessed_mask);
 	BUG_ON(!accessed_mask && !acc_track_mask);
-	BUG_ON(acc_track_mask & shadow_acc_track_value);
+	BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK);
 
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
@@ -1164,6 +1225,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
 }
 
+static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	if (sp->lpage_disallowed)
+		return;
+
+	++kvm->stat.nx_lpage_splits;
+	list_add_tail(&sp->lpage_disallowed_link,
+		      &kvm->arch.lpage_disallowed_mmu_pages);
+	sp->lpage_disallowed = true;
+}
+
 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	struct kvm_memslots *slots;
@@ -1181,6 +1253,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
 
+static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	--kvm->stat.nx_lpage_splits;
+	sp->lpage_disallowed = false;
+	list_del(&sp->lpage_disallowed_link);
+}
+
 static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
 					  struct kvm_memory_slot *slot)
 {
@@ -1589,16 +1668,16 @@ static bool spte_clear_dirty(u64 *sptep)
 
 	rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
 
+	MMU_WARN_ON(!spte_ad_enabled(spte));
 	spte &= ~shadow_dirty_mask;
-
 	return mmu_spte_update(sptep, spte);
 }
 
-static bool wrprot_ad_disabled_spte(u64 *sptep)
+static bool spte_wrprot_for_clear_dirty(u64 *sptep)
 {
 	bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
 					       (unsigned long *)sptep);
-	if (was_writable)
+	if (was_writable && !spte_ad_enabled(*sptep))
 		kvm_set_pfn_dirty(spte_to_pfn(*sptep));
 
 	return was_writable;
@@ -1617,10 +1696,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 	bool flush = false;
 
 	for_each_rmap_spte(rmap_head, &iter, sptep)
-		if (spte_ad_enabled(*sptep))
-			flush |= spte_clear_dirty(sptep);
+		if (spte_ad_need_write_protect(*sptep))
+			flush |= spte_wrprot_for_clear_dirty(sptep);
 		else
-			flush |= wrprot_ad_disabled_spte(sptep);
+			flush |= spte_clear_dirty(sptep);
 
 	return flush;
 }
@@ -1631,6 +1710,11 @@ static bool spte_set_dirty(u64 *sptep)
 
 	rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
 
+	/*
+	 * Similar to the !kvm_x86_ops->slot_disable_log_dirty case,
+	 * do not bother adding back write access to pages marked
+	 * SPTE_AD_WRPROT_ONLY_MASK.
+	 */
 	spte |= shadow_dirty_mask;
 
 	return mmu_spte_update(sptep, spte);
@@ -2095,6 +2179,13 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
 	if (!direct)
 		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+	/*
+	 * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
+	 * depends on valid pages being added to the head of the list.  See
+	 * comments in kvm_zap_obsolete_pages().
+	 */
+	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
 	return sp;
@@ -2244,7 +2335,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 #define for_each_valid_sp(_kvm, _sp, _gfn)				\
 	hlist_for_each_entry(_sp,					\
 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-		if ((_sp)->role.invalid) {    \
+		if (is_obsolete_sp((_kvm), (_sp))) {			\
 		} else
 
 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
@@ -2301,6 +2392,12 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
 static void mmu_audit_disable(void) { }
 #endif
 
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	return sp->role.invalid ||
+	       unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
 static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			 struct list_head *invalid_list)
 {
@@ -2609,7 +2706,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
 	       shadow_user_mask | shadow_x_mask | shadow_me_mask;
 
 	if (sp_ad_disabled(sp))
-		spte |= shadow_acc_track_value;
+		spte |= SPTE_AD_DISABLED_MASK;
 	else
 		spte |= shadow_accessed_mask;
 
@@ -2739,10 +2836,18 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
 	} else {
 		list_move(&sp->link, &kvm->arch.active_mmu_pages);
 
-		if (!sp->role.invalid)
+		/*
+		 * Obsolete pages cannot be used on any vCPUs, see the comment
+		 * in kvm_mmu_zap_all_fast().  Note, is_obsolete_sp() also
+		 * treats invalid shadow pages as being obsolete.
+		 */
+		if (!is_obsolete_sp(kvm, sp))
 			kvm_reload_remote_mmus(kvm);
 	}
 
+	if (sp->lpage_disallowed)
+		unaccount_huge_nx_page(kvm, sp);
+
 	sp->role.invalid = 1;
 	return list_unstable;
 }
@@ -2950,7 +3055,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 	sp = page_header(__pa(sptep));
 	if (sp_ad_disabled(sp))
-		spte |= shadow_acc_track_value;
+		spte |= SPTE_AD_DISABLED_MASK;
+	else if (kvm_vcpu_ad_need_write_protect(vcpu))
+		spte |= SPTE_AD_WRPROT_ONLY_MASK;
 
 	/*
 	 * For the EPT case, shadow_present_mask is 0 if hardware
@@ -2962,6 +3069,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (!speculative)
 		spte |= spte_shadow_accessed_mask(spte);
 
+	if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
+	    is_nx_huge_page_enabled()) {
+		pte_access &= ~ACC_EXEC_MASK;
+	}
+
 	if (pte_access & ACC_EXEC_MASK)
 		spte |= shadow_x_mask;
 	else
@@ -3182,9 +3294,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
 	__direct_pte_prefetch(vcpu, sp, sptep);
 }
 
+static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
+				       gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
+{
+	int level = *levelp;
+	u64 spte = *it.sptep;
+
+	if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
+	    is_nx_huge_page_enabled() &&
+	    is_shadow_present_pte(spte) &&
+	    !is_large_pte(spte)) {
+		/*
+		 * A small SPTE exists for this pfn, but FNAME(fetch)
+		 * and __direct_map would like to create a large PTE
+		 * instead: just force them to go down another level,
+		 * patching back for them into pfn the next 9 bits of
+		 * the address.
+		 */
+		u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
+		*pfnp |= gfn & page_mask;
+		(*levelp)--;
+	}
+}
+
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 			int map_writable, int level, kvm_pfn_t pfn,
-			bool prefault)
+			bool prefault, bool lpage_disallowed)
 {
 	struct kvm_shadow_walk_iterator it;
 	struct kvm_mmu_page *sp;
@@ -3197,6 +3332,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 
 	trace_kvm_mmu_spte_requested(gpa, level, pfn);
 	for_each_shadow_entry(vcpu, gpa, it) {
+		/*
+		 * We cannot overwrite existing page tables with an NX
+		 * large page, as the leaf could be executable.
+		 */
+		disallowed_hugepage_adjust(it, gfn, &pfn, &level);
+
 		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 		if (it.level == level)
 			break;
@@ -3207,6 +3348,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 					      it.level - 1, true, ACC_ALL);
 
 			link_shadow_page(vcpu, it.sptep, sp);
+			if (lpage_disallowed)
+				account_huge_nx_page(vcpu->kvm, sp);
 		}
 	}
 
@@ -3255,7 +3398,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
 	 * here.
 	 */
 	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
-	    level == PT_PAGE_TABLE_LEVEL &&
+	    !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
 	    PageTransCompoundMap(pfn_to_page(pfn)) &&
 	    !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
 		unsigned long mask;
@@ -3290,7 +3433,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	}
 
 	if (unlikely(is_noslot_pfn(pfn)))
-		vcpu_cache_mmio_info(vcpu, gva, gfn, access);
+		vcpu_cache_mmio_info(vcpu, gva, gfn,
+				     access & shadow_mmio_access_mask);
 
 	return false;
 }
@@ -3498,11 +3642,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 {
 	int r;
 	int level;
-	bool force_pt_level = false;
+	bool force_pt_level;
 	kvm_pfn_t pfn;
 	unsigned long mmu_seq;
 	bool map_writable, write = error_code & PFERR_WRITE_MASK;
+	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+				is_nx_huge_page_enabled();
 
+	force_pt_level = lpage_disallowed;
 	level = mapping_level(vcpu, gfn, &force_pt_level);
 	if (likely(!force_pt_level)) {
 		/*
@@ -3536,7 +3683,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 		goto out_unlock;
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
-	r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+	r = __direct_map(vcpu, v, write, map_writable, level, pfn,
+			 prefault, false);
 out_unlock:
 	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
@@ -4122,6 +4270,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	unsigned long mmu_seq;
 	int write = error_code & PFERR_WRITE_MASK;
 	bool map_writable;
+	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+				is_nx_huge_page_enabled();
 
 	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
@@ -4132,8 +4282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	if (r)
 		return r;
 
-	force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
-							   PT_DIRECTORY_LEVEL);
+	force_pt_level =
+		lpage_disallowed ||
+		!check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
 	level = mapping_level(vcpu, gfn, &force_pt_level);
 	if (likely(!force_pt_level)) {
 		if (level > PT_DIRECTORY_LEVEL &&
@@ -4162,7 +4313,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 		goto out_unlock;
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
-	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
+			 prefault, lpage_disallowed);
 out_unlock:
 	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
@@ -4233,6 +4385,13 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 			return false;
 
 		if (cached_root_available(vcpu, new_cr3, new_role)) {
+			/*
+			 * It is possible that the cached previous root page is
+			 * obsolete because of a change in the MMU generation
+			 * number. However, changing the generation number is
+			 * accompanied by KVM_REQ_MMU_RELOAD, which will free
+			 * the root set here and allocate a new one.
+			 */
 			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
 			if (!skip_tlb_flush) {
 				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -5361,7 +5520,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 		       void *insn, int insn_len)
 {
 	int r, emulation_type = 0;
-	enum emulation_result er;
 	bool direct = vcpu->arch.mmu->direct_map;
 
 	/* With shadow page tables, fault_address contains a GVA or nGPA.  */
@@ -5428,19 +5586,8 @@ emulate:
 			return 1;
 	}
 
-	er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
-
-	switch (er) {
-	case EMULATE_DONE:
-		return 1;
-	case EMULATE_USER_EXIT:
-		++vcpu->stat.mmio_exits;
-		/* fall through */
-	case EMULATE_FAIL:
-		return 0;
-	default:
-		BUG();
-	}
+	return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
+				       insn_len);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
@@ -5592,13 +5739,13 @@ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
 				 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
 }
 
-static void free_mmu_pages(struct kvm_vcpu *vcpu)
+static void free_mmu_pages(struct kvm_mmu *mmu)
 {
-	free_page((unsigned long)vcpu->arch.mmu->pae_root);
-	free_page((unsigned long)vcpu->arch.mmu->lm_root);
+	free_page((unsigned long)mmu->pae_root);
+	free_page((unsigned long)mmu->lm_root);
 }
 
-static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
+static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 {
 	struct page *page;
 	int i;
@@ -5619,9 +5766,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 	if (!page)
 		return -ENOMEM;
 
-	vcpu->arch.mmu->pae_root = page_address(page);
+	mmu->pae_root = page_address(page);
 	for (i = 0; i < 4; ++i)
-		vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
+		mmu->pae_root[i] = INVALID_PAGE;
 
 	return 0;
 }
@@ -5629,6 +5776,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
 	uint i;
+	int ret;
 
 	vcpu->arch.mmu = &vcpu->arch.root_mmu;
 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
@@ -5646,14 +5794,122 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
 	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
-	return alloc_mmu_pages(vcpu);
+
+	ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
+	if (ret)
+		return ret;
+
+	ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
+	if (ret)
+		goto fail_allocate_root;
+
+	return ret;
+ fail_allocate_root:
+	free_mmu_pages(&vcpu->arch.guest_mmu);
+	return ret;
+}
+
+#define BATCH_ZAP_PAGES	10
+static void kvm_zap_obsolete_pages(struct kvm *kvm)
+{
+	struct kvm_mmu_page *sp, *node;
+	int nr_zapped, batch = 0;
+
+restart:
+	list_for_each_entry_safe_reverse(sp, node,
+	      &kvm->arch.active_mmu_pages, link) {
+		/*
+		 * No obsolete valid page exists before a newly created page
+		 * since active_mmu_pages is a FIFO list.
+		 */
+		if (!is_obsolete_sp(kvm, sp))
+			break;
+
+		/*
+		 * Skip invalid pages with a non-zero root count, zapping pages
+		 * with a non-zero root count will never succeed, i.e. the page
+		 * will get thrown back on active_mmu_pages and we'll get stuck
+		 * in an infinite loop.
+		 */
+		if (sp->role.invalid && sp->root_count)
+			continue;
+
+		/*
+		 * No need to flush the TLB since we're only zapping shadow
+		 * pages with an obsolete generation number and all vCPUS have
+		 * loaded a new root, i.e. the shadow pages being zapped cannot
+		 * be in active use by the guest.
+		 */
+		if (batch >= BATCH_ZAP_PAGES &&
+		    cond_resched_lock(&kvm->mmu_lock)) {
+			batch = 0;
+			goto restart;
+		}
+
+		if (__kvm_mmu_prepare_zap_page(kvm, sp,
+				&kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
+			batch += nr_zapped;
+			goto restart;
+		}
+	}
+
+	/*
+	 * Trigger a remote TLB flush before freeing the page tables to ensure
+	 * KVM is not in the middle of a lockless shadow page table walk, which
+	 * may reference the pages.
+	 */
+	kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
+}
+
+/*
+ * Fast invalidate all shadow pages and use lock-break technique
+ * to zap obsolete pages.
+ *
+ * It's required when memslot is being deleted or VM is being
+ * destroyed, in these cases, we should ensure that KVM MMU does
+ * not use any resource of the being-deleted slot or all slots
+ * after calling the function.
+ */
+static void kvm_mmu_zap_all_fast(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->slots_lock);
+
+	spin_lock(&kvm->mmu_lock);
+	trace_kvm_mmu_zap_all_fast(kvm);
+
+	/*
+	 * Toggle mmu_valid_gen between '0' and '1'.  Because slots_lock is
+	 * held for the entire duration of zapping obsolete pages, it's
+	 * impossible for there to be multiple invalid generations associated
+	 * with *valid* shadow pages at any given time, i.e. there is exactly
+	 * one valid generation and (at most) one invalid generation.
+	 */
+	kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1;
+
+	/*
+	 * Notify all vcpus to reload its shadow page table and flush TLB.
+	 * Then all vcpus will switch to new shadow page table with the new
+	 * mmu_valid_gen.
+	 *
+	 * Note: we need to do this under the protection of mmu_lock,
+	 * otherwise, vcpu would purge shadow page but miss tlb flush.
+	 */
+	kvm_reload_remote_mmus(kvm);
+
+	kvm_zap_obsolete_pages(kvm);
+	spin_unlock(&kvm->mmu_lock);
+}
+
+static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+{
+	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
 }
 
 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
 			struct kvm_memory_slot *slot,
 			struct kvm_page_track_notifier_node *node)
 {
-	kvm_mmu_zap_all(kvm);
+	kvm_mmu_zap_all_fast(kvm);
 }
 
 void kvm_mmu_init_vm(struct kvm *kvm)
@@ -5758,9 +6014,9 @@ restart:
 		 * the guest, and the guest page table is using 4K page size
 		 * mapping if the indirect sp has level = 1.
 		 */
-		if (sp->role.direct &&
-			!kvm_is_reserved_pfn(pfn) &&
-			PageTransCompoundMap(pfn_to_page(pfn))) {
+		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+		    !kvm_is_zone_device_pfn(pfn) &&
+		    PageTransCompoundMap(pfn_to_page(pfn))) {
 			pte_list_remove(rmap_head, sptep);
 
 			if (kvm_available_flush_tlb_with_range())
@@ -5846,7 +6102,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
 
-static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
+void kvm_mmu_zap_all(struct kvm *kvm)
 {
 	struct kvm_mmu_page *sp, *node;
 	LIST_HEAD(invalid_list);
@@ -5855,14 +6111,10 @@ static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
 	spin_lock(&kvm->mmu_lock);
 restart:
 	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-		if (mmio_only && !sp->mmio_cached)
-			continue;
 		if (sp->role.invalid && sp->root_count)
 			continue;
-		if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) {
-			WARN_ON_ONCE(mmio_only);
+		if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
 			goto restart;
-		}
 		if (cond_resched_lock(&kvm->mmu_lock))
 			goto restart;
 	}
@@ -5871,11 +6123,6 @@ restart:
 	spin_unlock(&kvm->mmu_lock);
 }
 
-void kvm_mmu_zap_all(struct kvm *kvm)
-{
-	return __kvm_mmu_zap_all(kvm, false);
-}
-
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
 	WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
@@ -5897,7 +6144,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 	 */
 	if (unlikely(gen == 0)) {
 		kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
-		__kvm_mmu_zap_all(kvm, true);
+		kvm_mmu_zap_all_fast(kvm);
 	}
 }
 
@@ -5928,16 +6175,24 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 		 * want to shrink a VM that only started to populate its MMU
 		 * anyway.
 		 */
-		if (!kvm->arch.n_used_mmu_pages)
+		if (!kvm->arch.n_used_mmu_pages &&
+		    !kvm_has_zapped_obsolete_pages(kvm))
 			continue;
 
 		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 
+		if (kvm_has_zapped_obsolete_pages(kvm)) {
+			kvm_mmu_commit_zap_page(kvm,
+			      &kvm->arch.zapped_obsolete_pages);
+			goto unlock;
+		}
+
 		if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
 			freed++;
 		kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
+unlock:
 		spin_unlock(&kvm->mmu_lock);
 		srcu_read_unlock(&kvm->srcu, idx);
 
@@ -5997,13 +6252,62 @@ static void kvm_set_mmio_spte_mask(void)
 	if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
 		mask &= ~1ull;
 
-	kvm_mmu_set_mmio_spte_mask(mask, mask);
+	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
+}
+
+static bool get_nx_auto_mode(void)
+{
+	/* Return true when CPU has the bug, and mitigations are ON */
+	return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
+}
+
+static void __set_nx_huge_pages(bool val)
+{
+	nx_huge_pages = itlb_multihit_kvm_mitigation = val;
+}
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+{
+	bool old_val = nx_huge_pages;
+	bool new_val;
+
+	/* In "auto" mode deploy workaround only if CPU has the bug. */
+	if (sysfs_streq(val, "off"))
+		new_val = 0;
+	else if (sysfs_streq(val, "force"))
+		new_val = 1;
+	else if (sysfs_streq(val, "auto"))
+		new_val = get_nx_auto_mode();
+	else if (strtobool(val, &new_val) < 0)
+		return -EINVAL;
+
+	__set_nx_huge_pages(new_val);
+
+	if (new_val != old_val) {
+		struct kvm *kvm;
+
+		mutex_lock(&kvm_lock);
+
+		list_for_each_entry(kvm, &vm_list, vm_list) {
+			mutex_lock(&kvm->slots_lock);
+			kvm_mmu_zap_all_fast(kvm);
+			mutex_unlock(&kvm->slots_lock);
+
+			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+		}
+		mutex_unlock(&kvm_lock);
+	}
+
+	return 0;
 }
 
 int kvm_mmu_module_init(void)
 {
 	int ret = -ENOMEM;
 
+	if (nx_huge_pages == -1)
+		__set_nx_huge_pages(get_nx_auto_mode());
+
 	/*
 	 * MMU roles use union aliasing which is, generally speaking, an
 	 * undefined behavior. However, we supposedly know how compilers behave
@@ -6071,7 +6375,8 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_unload(vcpu);
-	free_mmu_pages(vcpu);
+	free_mmu_pages(&vcpu->arch.root_mmu);
+	free_mmu_pages(&vcpu->arch.guest_mmu);
 	mmu_free_memory_caches(vcpu);
 }
 
@@ -6082,3 +6387,116 @@ void kvm_mmu_module_exit(void)
 	unregister_shrinker(&mmu_shrinker);
 	mmu_audit_disable();
 }
+
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
+{
+	unsigned int old_val;
+	int err;
+
+	old_val = nx_huge_pages_recovery_ratio;
+	err = param_set_uint(val, kp);
+	if (err)
+		return err;
+
+	if (READ_ONCE(nx_huge_pages) &&
+	    !old_val && nx_huge_pages_recovery_ratio) {
+		struct kvm *kvm;
+
+		mutex_lock(&kvm_lock);
+
+		list_for_each_entry(kvm, &vm_list, vm_list)
+			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+
+		mutex_unlock(&kvm_lock);
+	}
+
+	return err;
+}
+
+static void kvm_recover_nx_lpages(struct kvm *kvm)
+{
+	int rcu_idx;
+	struct kvm_mmu_page *sp;
+	unsigned int ratio;
+	LIST_HEAD(invalid_list);
+	ulong to_zap;
+
+	rcu_idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+	to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
+	while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+		/*
+		 * We use a separate list instead of just using active_mmu_pages
+		 * because the number of lpage_disallowed pages is expected to
+		 * be relatively small compared to the total.
+		 */
+		sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+				      struct kvm_mmu_page,
+				      lpage_disallowed_link);
+		WARN_ON_ONCE(!sp->lpage_disallowed);
+		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+		WARN_ON_ONCE(sp->lpage_disallowed);
+
+		if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+			kvm_mmu_commit_zap_page(kvm, &invalid_list);
+			if (to_zap)
+				cond_resched_lock(&kvm->mmu_lock);
+		}
+	}
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, rcu_idx);
+}
+
+static long get_nx_lpage_recovery_timeout(u64 start_time)
+{
+	return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+		? start_time + 60 * HZ - get_jiffies_64()
+		: MAX_SCHEDULE_TIMEOUT;
+}
+
+static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+{
+	u64 start_time;
+	long remaining_time;
+
+	while (true) {
+		start_time = get_jiffies_64();
+		remaining_time = get_nx_lpage_recovery_timeout(start_time);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		while (!kthread_should_stop() && remaining_time > 0) {
+			schedule_timeout(remaining_time);
+			remaining_time = get_nx_lpage_recovery_timeout(start_time);
+			set_current_state(TASK_INTERRUPTIBLE);
+		}
+
+		set_current_state(TASK_RUNNING);
+
+		if (kthread_should_stop())
+			return 0;
+
+		kvm_recover_nx_lpages(kvm);
+	}
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+	int err;
+
+	err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+					  "kvm-nx-lpage-recovery",
+					  &kvm->arch.nx_lpage_recovery_thread);
+	if (!err)
+		kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+
+	return err;
+}
+
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
+{
+	if (kvm->arch.nx_lpage_recovery_thread)
+		kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 54c2a377795b..d55674f44a18 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -51,7 +51,7 @@ static inline u64 rsvd_bits(int s, int e)
 	return ((1ULL << (e - s + 1)) - 1) << s;
 }
 
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask);
 
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
@@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn);
 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
+
+int kvm_mmu_post_init_vm(struct kvm *kvm);
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
+
 #endif
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index d8001b4bca05..7ca8831c7d1a 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -8,16 +8,18 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvmmmu
 
-#define KVM_MMU_PAGE_FIELDS \
-	__field(__u64, gfn) \
-	__field(__u32, role) \
-	__field(__u32, root_count) \
+#define KVM_MMU_PAGE_FIELDS		\
+	__field(__u8, mmu_valid_gen)	\
+	__field(__u64, gfn)		\
+	__field(__u32, role)		\
+	__field(__u32, root_count)	\
 	__field(bool, unsync)
 
-#define KVM_MMU_PAGE_ASSIGN(sp)			     \
-	__entry->gfn = sp->gfn;			     \
-	__entry->role = sp->role.word;		     \
-	__entry->root_count = sp->root_count;        \
+#define KVM_MMU_PAGE_ASSIGN(sp)				\
+	__entry->mmu_valid_gen = sp->mmu_valid_gen;	\
+	__entry->gfn = sp->gfn;				\
+	__entry->role = sp->role.word;			\
+	__entry->root_count = sp->root_count;		\
 	__entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({				        \
@@ -29,8 +31,9 @@
 								        \
 	role.word = __entry->role;					\
 									\
-	trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s"	\
+	trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s"	\
 			 " %snxe %sad root %u %s%c",			\
+			 __entry->mmu_valid_gen,			\
 			 __entry->gfn, role.level,			\
 			 role.gpte_is_8_bytes ? 8 : 4,			\
 			 role.quadrant,					\
@@ -280,6 +283,27 @@ TRACE_EVENT(
 );
 
 TRACE_EVENT(
+	kvm_mmu_zap_all_fast,
+	TP_PROTO(struct kvm *kvm),
+	TP_ARGS(kvm),
+
+	TP_STRUCT__entry(
+		__field(__u8, mmu_valid_gen)
+		__field(unsigned int, mmu_used_pages)
+	),
+
+	TP_fast_assign(
+		__entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
+		__entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
+	),
+
+	TP_printk("kvm-mmu-valid-gen %u used_pages %x",
+		  __entry->mmu_valid_gen, __entry->mmu_used_pages
+	)
+);
+
+
+TRACE_EVENT(
 	check_mmio_spte,
 	TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
 	TP_ARGS(spte, kvm_gen, spte_gen),
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7d5cdb3af594..97b21e7fd013 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			 struct guest_walker *gw,
 			 int write_fault, int hlevel,
-			 kvm_pfn_t pfn, bool map_writable, bool prefault)
+			 kvm_pfn_t pfn, bool map_writable, bool prefault,
+			 bool lpage_disallowed)
 {
 	struct kvm_mmu_page *sp = NULL;
 	struct kvm_shadow_walk_iterator it;
 	unsigned direct_access, access = gw->pt_access;
 	int top_level, ret;
-	gfn_t base_gfn;
+	gfn_t gfn, base_gfn;
 
 	direct_access = gw->pte_access;
 
@@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			link_shadow_page(vcpu, it.sptep, sp);
 	}
 
-	base_gfn = gw->gfn;
+	/*
+	 * FNAME(page_fault) might have clobbered the bottom bits of
+	 * gw->gfn, restore them from the virtual address.
+	 */
+	gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
+	base_gfn = gfn;
 
 	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
 
 	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
 		clear_sp_write_flooding_count(it.sptep);
-		base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+
+		/*
+		 * We cannot overwrite existing page tables with an NX
+		 * large page, as the leaf could be executable.
+		 */
+		disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
+
+		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 		if (it.level == hlevel)
 			break;
 
@@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
 					      it.level - 1, true, direct_access);
 			link_shadow_page(vcpu, it.sptep, sp);
+			if (lpage_disallowed)
+				account_huge_nx_page(vcpu->kvm, sp);
 		}
 	}
 
@@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	int r;
 	kvm_pfn_t pfn;
 	int level = PT_PAGE_TABLE_LEVEL;
-	bool force_pt_level = false;
 	unsigned long mmu_seq;
 	bool map_writable, is_self_change_mapping;
+	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+				is_nx_huge_page_enabled();
+	bool force_pt_level = lpage_disallowed;
 
 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 
@@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	if (!force_pt_level)
 		transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
 	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
-			 level, pfn, map_writable, prefault);
+			 level, pfn, map_writable, prefault, lpage_disallowed);
 	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
 
 out_unlock:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e0368076a1ef..c5673bda4b66 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -68,10 +68,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 #define SEG_TYPE_LDT 2
 #define SEG_TYPE_BUSY_TSS16 3
 
-#define SVM_FEATURE_NPT            (1 <<  0)
 #define SVM_FEATURE_LBRV           (1 <<  1)
 #define SVM_FEATURE_SVML           (1 <<  2)
-#define SVM_FEATURE_NRIP           (1 <<  3)
 #define SVM_FEATURE_TSC_RATE       (1 <<  4)
 #define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
 #define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
@@ -736,8 +734,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
 	vcpu->arch.efer = efer;
-	if (!npt_enabled && !(efer & EFER_LMA))
-		efer &= ~EFER_LME;
+
+	if (!npt_enabled) {
+		/* Shadow paging assumes NX to be available.  */
+		efer |= EFER_NX;
+
+		if (!(efer & EFER_LMA))
+			efer &= ~EFER_LME;
+	}
 
 	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
 	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
@@ -770,7 +774,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
 
 }
 
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -780,17 +784,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	}
 
 	if (!svm->next_rip) {
-		if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
-				EMULATE_DONE)
-			printk(KERN_DEBUG "%s: NOP\n", __func__);
-		return;
+		if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+			return 0;
+	} else {
+		if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
+			pr_err("%s: ip 0x%lx next 0x%llx\n",
+			       __func__, kvm_rip_read(vcpu), svm->next_rip);
+		kvm_rip_write(vcpu, svm->next_rip);
 	}
-	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
-		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
-		       __func__, kvm_rip_read(vcpu), svm->next_rip);
-
-	kvm_rip_write(vcpu, svm->next_rip);
 	svm_set_interrupt_shadow(vcpu, 0);
+
+	return 1;
 }
 
 static void svm_queue_exception(struct kvm_vcpu *vcpu)
@@ -821,7 +825,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
 		 * raises a fault that is not intercepted. Still better than
 		 * failing in all cases.
 		 */
-		skip_emulated_instruction(&svm->vcpu);
+		(void)skip_emulated_instruction(&svm->vcpu);
 		rip = kvm_rip_read(&svm->vcpu);
 		svm->int3_rip = rip + svm->vmcb->save.cs.base;
 		svm->int3_injected = rip - old_rip;
@@ -1269,11 +1273,11 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
 							pause_filter_count_grow,
 							pause_filter_count_max);
 
-	if (control->pause_filter_count != old)
+	if (control->pause_filter_count != old) {
 		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-
-	trace_kvm_ple_window_grow(vcpu->vcpu_id,
-				  control->pause_filter_count, old);
+		trace_kvm_ple_window_update(vcpu->vcpu_id,
+					    control->pause_filter_count, old);
+	}
 }
 
 static void shrink_ple_window(struct kvm_vcpu *vcpu)
@@ -1287,11 +1291,11 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
 						    pause_filter_count,
 						    pause_filter_count_shrink,
 						    pause_filter_count);
-	if (control->pause_filter_count != old)
+	if (control->pause_filter_count != old) {
 		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-
-	trace_kvm_ple_window_shrink(vcpu->vcpu_id,
-				    control->pause_filter_count, old);
+		trace_kvm_ple_window_update(vcpu->vcpu_id,
+					    control->pause_filter_count, old);
+	}
 }
 
 static __init int svm_hardware_setup(void)
@@ -1542,6 +1546,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_intercept(svm, INTERCEPT_SKINIT);
 	set_intercept(svm, INTERCEPT_WBINVD);
 	set_intercept(svm, INTERCEPT_XSETBV);
+	set_intercept(svm, INTERCEPT_RDPRU);
 	set_intercept(svm, INTERCEPT_RSM);
 
 	if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
@@ -2136,6 +2141,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	struct page *nested_msrpm_pages;
 	int err;
 
+	BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
+		"struct kvm_vcpu must be at offset 0 for arch usercopy region");
+
 	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
 	if (!svm) {
 		err = -ENOMEM;
@@ -2768,17 +2776,18 @@ static int gp_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	u32 error_code = svm->vmcb->control.exit_info_1;
-	int er;
 
 	WARN_ON_ONCE(!enable_vmware_backdoor);
 
-	er = kvm_emulate_instruction(vcpu,
-		EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
-	if (er == EMULATE_USER_EXIT)
-		return 0;
-	else if (er != EMULATE_DONE)
+	/*
+	 * VMware backdoor emulation on #GP interception only handles IN{S},
+	 * OUT{S}, and RDPMC, none of which generate a non-zero error code.
+	 */
+	if (error_code) {
 		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-	return 1;
+		return 1;
+	}
+	return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
 }
 
 static bool is_erratum_383(void)
@@ -2876,7 +2885,7 @@ static int io_interception(struct vcpu_svm *svm)
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
 	if (string)
-		return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+		return kvm_emulate_instruction(vcpu, 0);
 
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -2903,13 +2912,11 @@ static int nop_on_interception(struct vcpu_svm *svm)
 
 static int halt_interception(struct vcpu_svm *svm)
 {
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
 	return kvm_emulate_halt(&svm->vcpu);
 }
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	return kvm_emulate_hypercall(&svm->vcpu);
 }
 
@@ -3588,9 +3595,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 	mark_all_dirty(svm->vmcb);
 }
 
-static bool nested_svm_vmrun(struct vcpu_svm *svm)
+static int nested_svm_vmrun(struct vcpu_svm *svm)
 {
-	int rc;
+	int ret;
 	struct vmcb *nested_vmcb;
 	struct vmcb *hsave = svm->nested.hsave;
 	struct vmcb *vmcb = svm->vmcb;
@@ -3599,13 +3606,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
 	vmcb_gpa = svm->vmcb->save.rax;
 
-	rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
-	if (rc) {
-		if (rc == -EINVAL)
-			kvm_inject_gp(&svm->vcpu, 0);
-		return false;
+	ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
+	if (ret == -EINVAL) {
+		kvm_inject_gp(&svm->vcpu, 0);
+		return 1;
+	} else if (ret) {
+		return kvm_skip_emulated_instruction(&svm->vcpu);
 	}
 
+	ret = kvm_skip_emulated_instruction(&svm->vcpu);
+
 	nested_vmcb = map.hva;
 
 	if (!nested_vmcb_checks(nested_vmcb)) {
@@ -3616,7 +3626,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
 		kvm_vcpu_unmap(&svm->vcpu, &map, true);
 
-		return false;
+		return ret;
 	}
 
 	trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
@@ -3660,7 +3670,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
 	enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
 
-	return true;
+	if (!nested_svm_vmrun_msrpm(svm)) {
+		svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
+		svm->vmcb->control.exit_code_hi = 0;
+		svm->vmcb->control.exit_info_1  = 0;
+		svm->vmcb->control.exit_info_2  = 0;
+
+		nested_svm_vmexit(svm);
+	}
+
+	return ret;
 }
 
 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
@@ -3697,7 +3716,6 @@ static int vmload_interception(struct vcpu_svm *svm)
 
 	nested_vmcb = map.hva;
 
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
 	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
@@ -3724,7 +3742,6 @@ static int vmsave_interception(struct vcpu_svm *svm)
 
 	nested_vmcb = map.hva;
 
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
 	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
@@ -3738,27 +3755,7 @@ static int vmrun_interception(struct vcpu_svm *svm)
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
-	/* Save rip after vmrun instruction */
-	kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
-
-	if (!nested_svm_vmrun(svm))
-		return 1;
-
-	if (!nested_svm_vmrun_msrpm(svm))
-		goto failed;
-
-	return 1;
-
-failed:
-
-	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
-	svm->vmcb->control.exit_code_hi = 0;
-	svm->vmcb->control.exit_info_1  = 0;
-	svm->vmcb->control.exit_info_2  = 0;
-
-	nested_svm_vmexit(svm);
-
-	return 1;
+	return nested_svm_vmrun(svm);
 }
 
 static int stgi_interception(struct vcpu_svm *svm)
@@ -3775,7 +3772,6 @@ static int stgi_interception(struct vcpu_svm *svm)
 	if (vgif_enabled(svm))
 		clr_intercept(svm, INTERCEPT_STGI);
 
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 
@@ -3791,7 +3787,6 @@ static int clgi_interception(struct vcpu_svm *svm)
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
 	disable_gif(svm);
@@ -3816,7 +3811,6 @@ static int invlpga_interception(struct vcpu_svm *svm)
 	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
 	kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
 
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
@@ -3839,13 +3833,18 @@ static int xsetbv_interception(struct vcpu_svm *svm)
 	u32 index = kvm_rcx_read(&svm->vcpu);
 
 	if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
-		svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 		return kvm_skip_emulated_instruction(&svm->vcpu);
 	}
 
 	return 1;
 }
 
+static int rdpru_interception(struct vcpu_svm *svm)
+{
+	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+	return 1;
+}
+
 static int task_switch_interception(struct vcpu_svm *svm)
 {
 	u16 tss_selector;
@@ -3898,25 +3897,20 @@ static int task_switch_interception(struct vcpu_svm *svm)
 	if (reason != TASK_SWITCH_GATE ||
 	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
 	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
-	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
-		skip_emulated_instruction(&svm->vcpu);
+	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
+		if (!skip_emulated_instruction(&svm->vcpu))
+			return 0;
+	}
 
 	if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
 		int_vec = -1;
 
-	if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
-				has_error_code, error_code) == EMULATE_FAIL) {
-		svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-		svm->vcpu.run->internal.ndata = 0;
-		return 0;
-	}
-	return 1;
+	return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
+			       has_error_code, error_code);
 }
 
 static int cpuid_interception(struct vcpu_svm *svm)
 {
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
 	return kvm_emulate_cpuid(&svm->vcpu);
 }
 
@@ -3933,7 +3927,7 @@ static int iret_interception(struct vcpu_svm *svm)
 static int invlpg_interception(struct vcpu_svm *svm)
 {
 	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
-		return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+		return kvm_emulate_instruction(&svm->vcpu, 0);
 
 	kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
 	return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3941,13 +3935,12 @@ static int invlpg_interception(struct vcpu_svm *svm)
 
 static int emulate_on_interception(struct vcpu_svm *svm)
 {
-	return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(&svm->vcpu, 0);
 }
 
 static int rsm_interception(struct vcpu_svm *svm)
 {
-	return kvm_emulate_instruction_from_buffer(&svm->vcpu,
-					rsm_ins_bytes, 2) == EMULATE_DONE;
+	return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
 }
 
 static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4232,23 +4225,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-	u32 ecx = kvm_rcx_read(&svm->vcpu);
-	struct msr_data msr_info;
-
-	msr_info.index = ecx;
-	msr_info.host_initiated = false;
-	if (svm_get_msr(&svm->vcpu, &msr_info)) {
-		trace_kvm_msr_read_ex(ecx);
-		kvm_inject_gp(&svm->vcpu, 0);
-		return 1;
-	} else {
-		trace_kvm_msr_read(ecx, msr_info.data);
-
-		kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
-		kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
-		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-		return kvm_skip_emulated_instruction(&svm->vcpu);
-	}
+	return kvm_emulate_rdmsr(&svm->vcpu);
 }
 
 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -4438,23 +4415,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
-	struct msr_data msr;
-	u32 ecx = kvm_rcx_read(&svm->vcpu);
-	u64 data = kvm_read_edx_eax(&svm->vcpu);
-
-	msr.data = data;
-	msr.index = ecx;
-	msr.host_initiated = false;
-
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-	if (kvm_set_msr(&svm->vcpu, &msr)) {
-		trace_kvm_msr_write_ex(ecx, data);
-		kvm_inject_gp(&svm->vcpu, 0);
-		return 1;
-	} else {
-		trace_kvm_msr_write(ecx, data);
-		return kvm_skip_emulated_instruction(&svm->vcpu);
-	}
+	return kvm_emulate_wrmsr(&svm->vcpu);
 }
 
 static int msr_interception(struct vcpu_svm *svm)
@@ -4636,6 +4597,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
 	int ret = 0;
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
+	u32 id = kvm_xapic_id(vcpu->arch.apic);
 
 	if (ldr == svm->ldr_reg)
 		return 0;
@@ -4643,7 +4605,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
 	avic_invalidate_logical_id_entry(vcpu);
 
 	if (ldr)
-		ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr);
+		ret = avic_ldr_write(vcpu, id, ldr);
 
 	if (!ret)
 		svm->ldr_reg = ldr;
@@ -4655,8 +4617,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
 {
 	u64 *old, *new;
 	struct vcpu_svm *svm = to_svm(vcpu);
-	u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
-	u32 id = (apic_id_reg >> 24) & 0xff;
+	u32 id = kvm_xapic_id(vcpu->arch.apic);
 
 	if (vcpu->vcpu_id == id)
 		return 0;
@@ -4768,7 +4729,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
 		ret = avic_unaccel_trap_write(svm);
 	} else {
 		/* Handling Fault */
-		ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+		ret = kvm_emulate_instruction(&svm->vcpu, 0);
 	}
 
 	return ret;
@@ -4835,6 +4796,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_MONITOR]			= monitor_interception,
 	[SVM_EXIT_MWAIT]			= mwait_interception,
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
+	[SVM_EXIT_RDPRU]			= rdpru_interception,
 	[SVM_EXIT_NPF]				= npf_interception,
 	[SVM_EXIT_RSM]                          = rsm_interception,
 	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,
@@ -5025,9 +4987,14 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
-		WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
+		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
+		dump_vmcb(vcpu);
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror =
+			KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+		vcpu->run->internal.ndata = 1;
+		vcpu->run->internal.data[0] = exit_code;
+		return 0;
 	}
 
 	return svm_exit_handlers[exit_code](svm);
@@ -5274,7 +5241,8 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 
 	kvm_set_msi_irq(kvm, e, &irq);
 
-	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+	    !kvm_irq_is_postable(&irq)) {
 		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
 			 __func__, irq.vector);
 		return -1;
@@ -5328,6 +5296,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 		 * 1. When cannot target interrupt to a specific vcpu.
 		 * 2. Unsetting posted interrupt.
 		 * 3. APIC virtialization is disabled for the vcpu.
+		 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
 		 */
 		if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
 		    kvm_vcpu_apicv_active(&svm->vcpu)) {
@@ -5933,6 +5902,8 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 	guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 {
 	switch (func) {
@@ -5944,6 +5915,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 		if (nested)
 			entry->ecx |= (1 << 2); /* Set SVM bit */
 		break;
+	case 0x80000008:
+		if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+		     boot_cpu_has(X86_FEATURE_AMD_SSBD))
+			entry->ebx |= F(VIRT_SSBD);
+		break;
 	case 0x8000000A:
 		entry->eax = 1; /* SVM revision 1 */
 		entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
@@ -5954,11 +5930,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 
 		/* Support next_rip if host supports it */
 		if (boot_cpu_has(X86_FEATURE_NRIPS))
-			entry->edx |= SVM_FEATURE_NRIP;
+			entry->edx |= F(NRIPS);
 
 		/* Support NPT for the guest if enabled */
 		if (npt_enabled)
-			entry->edx |= SVM_FEATURE_NPT;
+			entry->edx |= F(NPT);
 
 		break;
 	case 0x8000001F:
@@ -6067,6 +6043,7 @@ static const struct __x86_intercept {
 	[x86_intercept_ins]		= POST_EX(SVM_EXIT_IOIO),
 	[x86_intercept_out]		= POST_EX(SVM_EXIT_IOIO),
 	[x86_intercept_outs]		= POST_EX(SVM_EXIT_IOIO),
+	[x86_intercept_xsetbv]		= PRE_EX(SVM_EXIT_XSETBV),
 };
 
 #undef PRE_EX
@@ -7128,13 +7105,6 @@ failed:
 	return ret;
 }
 
-static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
-				   uint16_t *vmcs_version)
-{
-	/* Intel-only feature */
-	return -ENODEV;
-}
-
 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 {
 	unsigned long cr4 = kvm_read_cr4(vcpu);
@@ -7193,6 +7163,21 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 	return false;
 }
 
+static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	/*
+	 * TODO: Last condition latch INIT signals on vCPU when
+	 * vCPU is in guest-mode and vmcb12 defines intercept on INIT.
+	 * To properly emulate the INIT intercept, SVM should implement
+	 * kvm_x86_ops->check_nested_events() and call nested_svm_vmexit()
+	 * there if an INIT signal is pending.
+	 */
+	return !gif_set(svm) ||
+		   (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
+}
+
 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -7325,10 +7310,12 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.mem_enc_reg_region = svm_register_enc_region,
 	.mem_enc_unreg_region = svm_unregister_enc_region,
 
-	.nested_enable_evmcs = nested_enable_evmcs,
+	.nested_enable_evmcs = NULL,
 	.nested_get_evmcs_version = NULL,
 
 	.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
+
+	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b5c831e79094..7c741a0c5f80 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -232,17 +232,20 @@ TRACE_EVENT(kvm_exit,
 		__field(	u32,	        isa             )
 		__field(	u64,	        info1           )
 		__field(	u64,	        info2           )
+		__field(	unsigned int,	vcpu_id         )
 	),
 
 	TP_fast_assign(
 		__entry->exit_reason	= exit_reason;
 		__entry->guest_rip	= kvm_rip_read(vcpu);
 		__entry->isa            = isa;
+		__entry->vcpu_id        = vcpu->vcpu_id;
 		kvm_x86_ops->get_exit_info(vcpu, &__entry->info1,
 					   &__entry->info2);
 	),
 
-	TP_printk("reason %s rip 0x%lx info %llx %llx",
+	TP_printk("vcpu %u reason %s rip 0x%lx info %llx %llx",
+		  __entry->vcpu_id,
 		 (__entry->isa == KVM_ISA_VMX) ?
 		 __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) :
 		 __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS),
@@ -887,36 +890,27 @@ TRACE_EVENT(kvm_pml_full,
 	TP_printk("vcpu %d: PML full", __entry->vcpu_id)
 );
 
-TRACE_EVENT(kvm_ple_window,
-	TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
-	TP_ARGS(grow, vcpu_id, new, old),
+TRACE_EVENT(kvm_ple_window_update,
+	TP_PROTO(unsigned int vcpu_id, unsigned int new, unsigned int old),
+	TP_ARGS(vcpu_id, new, old),
 
 	TP_STRUCT__entry(
-		__field(                bool,      grow         )
 		__field(        unsigned int,   vcpu_id         )
-		__field(                 int,       new         )
-		__field(                 int,       old         )
+		__field(        unsigned int,       new         )
+		__field(        unsigned int,       old         )
 	),
 
 	TP_fast_assign(
-		__entry->grow           = grow;
 		__entry->vcpu_id        = vcpu_id;
 		__entry->new            = new;
 		__entry->old            = old;
 	),
 
-	TP_printk("vcpu %u: ple_window %d (%s %d)",
-	          __entry->vcpu_id,
-	          __entry->new,
-	          __entry->grow ? "grow" : "shrink",
-	          __entry->old)
+	TP_printk("vcpu %u old %u new %u (%s)",
+	          __entry->vcpu_id, __entry->old, __entry->new,
+		  __entry->old < __entry->new ? "growed" : "shrinked")
 );
 
-#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
-	trace_kvm_ple_window(true, vcpu_id, new, old)
-#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
-	trace_kvm_ple_window(false, vcpu_id, new, old)
-
 TRACE_EVENT(kvm_pvclock_update,
 	TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
 	TP_ARGS(vcpu_id, pvclock),
@@ -1320,7 +1314,7 @@ TRACE_EVENT(kvm_avic_incomplete_ipi,
 		__entry->index = index;
 	),
 
-	TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
+	TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u",
 		  __entry->vcpu, __entry->icrh, __entry->icrl,
 		  __entry->id, __entry->index)
 );
@@ -1345,7 +1339,7 @@ TRACE_EVENT(kvm_avic_unaccelerated_access,
 		__entry->vec = vec;
 	),
 
-	TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
+	TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x",
 		  __entry->vcpu,
 		  __entry->offset,
 		  __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
@@ -1462,6 +1456,46 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
 		  __entry->vector, __entry->format,
 		  __entry->valid_bank_mask)
 );
+
+TRACE_EVENT(kvm_pv_tlb_flush,
+	TP_PROTO(unsigned int vcpu_id, bool need_flush_tlb),
+	TP_ARGS(vcpu_id, need_flush_tlb),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id		)
+		__field(	bool,	need_flush_tlb		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+		__entry->need_flush_tlb = need_flush_tlb;
+	),
+
+	TP_printk("vcpu %u need_flush_tlb %s", __entry->vcpu_id,
+		__entry->need_flush_tlb ? "true" : "false")
+);
+
+/*
+ * Tracepoint for failed nested VMX VM-Enter.
+ */
+TRACE_EVENT(kvm_nested_vmenter_failed,
+	TP_PROTO(const char *msg, u32 err),
+	TP_ARGS(msg, err),
+
+	TP_STRUCT__entry(
+		__field(const char *, msg)
+		__field(u32, err)
+	),
+
+	TP_fast_assign(
+		__entry->msg = msg;
+		__entry->err = err;
+	),
+
+	TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
+		__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d6664ee3d127..7aa69716d516 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void)
 		SECONDARY_EXEC_XSAVES;
 }
 
+static inline bool vmx_waitpkg_supported(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 static inline bool cpu_has_vmx_tsc_scaling(void)
 {
 	return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 39a24eec8884..07ebf6882a45 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr)
 	struct hv_vp_assist_page *vp_ap =
 		hv_get_vp_assist_page(smp_processor_id());
 
+	if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
+		vp_ap->nested_control.features.directhypercall = 1;
 	vp_ap->current_nested_vmcs = phys_addr;
 	vp_ap->enlighten_vmentry = 1;
 }
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ced9fba32598..0e7c9301fe86 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,14 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 static bool __read_mostly nested_early_check = 0;
 module_param(nested_early_check, bool, S_IRUGO);
 
+#define CC(consistency_check)						\
+({									\
+	bool failed = (consistency_check);				\
+	if (failed)							\
+		trace_kvm_nested_vmenter_failed(#consistency_check, 0);	\
+	failed;								\
+})
+
 /*
  * Hyper-V requires all of these, so mark them as supported even though
  * they are just treated the same as all-context.
@@ -190,6 +198,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
 	pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
 }
 
+static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
+{
+	return fixed_bits_valid(control, low, high);
+}
+
+static inline u64 vmx_control_msr(u32 low, u32 high)
+{
+	return low | ((u64)high << 32);
+}
+
 static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
 {
 	secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
@@ -430,8 +448,8 @@ static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
 	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
 		return 0;
 
-	if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
-	    !page_address_valid(vcpu, vmcs12->io_bitmap_b))
+	if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
+	    CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
 		return -EINVAL;
 
 	return 0;
@@ -443,7 +461,7 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
 	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
 		return 0;
 
-	if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
+	if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
 		return -EINVAL;
 
 	return 0;
@@ -455,7 +473,7 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
 	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
 		return 0;
 
-	if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
+	if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
 		return -EINVAL;
 
 	return 0;
@@ -688,7 +706,7 @@ static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
 					  struct vmcs12 *vmcs12)
 {
 	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
-	    !page_address_valid(vcpu, vmcs12->apic_access_addr))
+	    CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
 		return -EINVAL;
 	else
 		return 0;
@@ -707,16 +725,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
 	 * If virtualize x2apic mode is enabled,
 	 * virtualize apic access must be disabled.
 	 */
-	if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
-	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+	if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
+	       nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
 		return -EINVAL;
 
 	/*
 	 * If virtual interrupt delivery is enabled,
 	 * we must exit on external interrupts.
 	 */
-	if (nested_cpu_has_vid(vmcs12) &&
-	   !nested_exit_on_intr(vcpu))
+	if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
 		return -EINVAL;
 
 	/*
@@ -727,15 +744,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
 	 * bits 5:0 of posted_intr_desc_addr should be zero.
 	 */
 	if (nested_cpu_has_posted_intr(vmcs12) &&
-	   (!nested_cpu_has_vid(vmcs12) ||
-	    !nested_exit_intr_ack_set(vcpu) ||
-	    (vmcs12->posted_intr_nv & 0xff00) ||
-	    (vmcs12->posted_intr_desc_addr & 0x3f) ||
-	    (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))
+	   (CC(!nested_cpu_has_vid(vmcs12)) ||
+	    CC(!nested_exit_intr_ack_set(vcpu)) ||
+	    CC((vmcs12->posted_intr_nv & 0xff00)) ||
+	    CC((vmcs12->posted_intr_desc_addr & 0x3f)) ||
+	    CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))))
 		return -EINVAL;
 
 	/* tpr shadow is needed by all apicv features. */
-	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+	if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
 		return -EINVAL;
 
 	return 0;
@@ -759,10 +776,12 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
 						     struct vmcs12 *vmcs12)
 {
-	if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_load_count,
-					vmcs12->vm_exit_msr_load_addr) ||
-	    nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_store_count,
-					vmcs12->vm_exit_msr_store_addr))
+	if (CC(nested_vmx_check_msr_switch(vcpu,
+					   vmcs12->vm_exit_msr_load_count,
+					   vmcs12->vm_exit_msr_load_addr)) ||
+	    CC(nested_vmx_check_msr_switch(vcpu,
+					   vmcs12->vm_exit_msr_store_count,
+					   vmcs12->vm_exit_msr_store_addr)))
 		return -EINVAL;
 
 	return 0;
@@ -771,8 +790,9 @@ static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
                                                       struct vmcs12 *vmcs12)
 {
-	if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_entry_msr_load_count,
-                                        vmcs12->vm_entry_msr_load_addr))
+	if (CC(nested_vmx_check_msr_switch(vcpu,
+					   vmcs12->vm_entry_msr_load_count,
+					   vmcs12->vm_entry_msr_load_addr)))
                 return -EINVAL;
 
 	return 0;
@@ -784,8 +804,8 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
 	if (!nested_cpu_has_pml(vmcs12))
 		return 0;
 
-	if (!nested_cpu_has_ept(vmcs12) ||
-	    !page_address_valid(vcpu, vmcs12->pml_address))
+	if (CC(!nested_cpu_has_ept(vmcs12)) ||
+	    CC(!page_address_valid(vcpu, vmcs12->pml_address)))
 		return -EINVAL;
 
 	return 0;
@@ -794,8 +814,8 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
 							struct vmcs12 *vmcs12)
 {
-	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
-	    !nested_cpu_has_ept(vmcs12))
+	if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
+	       !nested_cpu_has_ept(vmcs12)))
 		return -EINVAL;
 	return 0;
 }
@@ -803,8 +823,8 @@ static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
 							 struct vmcs12 *vmcs12)
 {
-	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
-	    !nested_cpu_has_ept(vmcs12))
+	if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
+	       !nested_cpu_has_ept(vmcs12)))
 		return -EINVAL;
 	return 0;
 }
@@ -815,8 +835,8 @@ static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
 	if (!nested_cpu_has_shadow_vmcs(vmcs12))
 		return 0;
 
-	if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
-	    !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
+	if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
+	    CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
 		return -EINVAL;
 
 	return 0;
@@ -826,12 +846,12 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
 				       struct vmx_msr_entry *e)
 {
 	/* x2APIC MSR accesses are not allowed */
-	if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
+	if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
 		return -EINVAL;
-	if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
-	    e->index == MSR_IA32_UCODE_REV)
+	if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
+	    CC(e->index == MSR_IA32_UCODE_REV))
 		return -EINVAL;
-	if (e->reserved != 0)
+	if (CC(e->reserved != 0))
 		return -EINVAL;
 	return 0;
 }
@@ -839,9 +859,9 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
 static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
 				     struct vmx_msr_entry *e)
 {
-	if (e->index == MSR_FS_BASE ||
-	    e->index == MSR_GS_BASE ||
-	    e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
+	if (CC(e->index == MSR_FS_BASE) ||
+	    CC(e->index == MSR_GS_BASE) ||
+	    CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
 	    nested_vmx_msr_check_common(vcpu, e))
 		return -EINVAL;
 	return 0;
@@ -850,24 +870,40 @@ static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
 static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
 				      struct vmx_msr_entry *e)
 {
-	if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
+	if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
 	    nested_vmx_msr_check_common(vcpu, e))
 		return -EINVAL;
 	return 0;
 }
 
+static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+				       vmx->nested.msrs.misc_high);
+
+	return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
+}
+
 /*
  * Load guest's/host's msr at nested entry/exit.
  * return 0 for success, entry index for failure.
+ *
+ * One of the failure modes for MSR load/store is when a list exceeds the
+ * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
+ * as possible, process all valid entries before failing rather than precheck
+ * for a capacity violation.
  */
 static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 {
 	u32 i;
 	struct vmx_msr_entry e;
-	struct msr_data msr;
+	u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
 
-	msr.host_initiated = false;
 	for (i = 0; i < count; i++) {
+		if (unlikely(i >= max_msr_list_size))
+			goto fail;
+
 		if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
 					&e, sizeof(e))) {
 			pr_debug_ratelimited(
@@ -881,9 +917,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 				__func__, i, e.index, e.reserved);
 			goto fail;
 		}
-		msr.index = e.index;
-		msr.data = e.value;
-		if (kvm_set_msr(vcpu, &msr)) {
+		if (kvm_set_msr(vcpu, e.index, e.value)) {
 			pr_debug_ratelimited(
 				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
 				__func__, i, e.index, e.value);
@@ -897,11 +931,15 @@ fail:
 
 static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 {
+	u64 data;
 	u32 i;
 	struct vmx_msr_entry e;
+	u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
 
 	for (i = 0; i < count; i++) {
-		struct msr_data msr_info;
+		if (unlikely(i >= max_msr_list_size))
+			return -EINVAL;
+
 		if (kvm_vcpu_read_guest(vcpu,
 					gpa + i * sizeof(e),
 					&e, 2 * sizeof(u32))) {
@@ -916,9 +954,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 				__func__, i, e.index, e.reserved);
 			return -EINVAL;
 		}
-		msr_info.host_initiated = false;
-		msr_info.index = e.index;
-		if (kvm_get_msr(vcpu, &msr_info)) {
+		if (kvm_get_msr(vcpu, e.index, &data)) {
 			pr_debug_ratelimited(
 				"%s cannot read MSR (%u, 0x%x)\n",
 				__func__, i, e.index);
@@ -927,10 +963,10 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 		if (kvm_vcpu_write_guest(vcpu,
 					 gpa + i * sizeof(e) +
 					     offsetof(struct vmx_msr_entry, value),
-					 &msr_info.data, sizeof(msr_info.data))) {
+					 &data, sizeof(data))) {
 			pr_debug_ratelimited(
 				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
-				__func__, i, e.index, msr_info.data);
+				__func__, i, e.index, data);
 			return -EINVAL;
 		}
 	}
@@ -955,7 +991,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 			       u32 *entry_failure_code)
 {
 	if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
-		if (!nested_cr3_valid(vcpu, cr3)) {
+		if (CC(!nested_cr3_valid(vcpu, cr3))) {
 			*entry_failure_code = ENTRY_FAIL_DEFAULT;
 			return -EINVAL;
 		}
@@ -965,7 +1001,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 		 * must not be dereferenced.
 		 */
 		if (is_pae_paging(vcpu) && !nested_ept) {
-			if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
+			if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
 				*entry_failure_code = ENTRY_FAIL_PDPTE;
 				return -EINVAL;
 			}
@@ -1009,17 +1045,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
 	return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
 }
 
-
-static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
-{
-	return fixed_bits_valid(control, low, high);
-}
-
-static inline u64 vmx_control_msr(u32 low, u32 high)
-{
-	return low | ((u64)high << 32);
-}
-
 static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
 {
 	superset &= mask;
@@ -2085,6 +2110,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 				  SECONDARY_EXEC_ENABLE_INVPCID |
 				  SECONDARY_EXEC_RDTSCP |
 				  SECONDARY_EXEC_XSAVES |
+				  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_ENABLE_VMFUNC);
@@ -2411,12 +2437,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
 static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
 {
-	if (!nested_cpu_has_nmi_exiting(vmcs12) &&
-	    nested_cpu_has_virtual_nmis(vmcs12))
+	if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
+	       nested_cpu_has_virtual_nmis(vmcs12)))
 		return -EINVAL;
 
-	if (!nested_cpu_has_virtual_nmis(vmcs12) &&
-	    nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
+	if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
+	       nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)))
 		return -EINVAL;
 
 	return 0;
@@ -2430,11 +2456,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
 	/* Check for memory type validity */
 	switch (address & VMX_EPTP_MT_MASK) {
 	case VMX_EPTP_MT_UC:
-		if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
+		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
 			return false;
 		break;
 	case VMX_EPTP_MT_WB:
-		if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
+		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
 			return false;
 		break;
 	default:
@@ -2442,16 +2468,16 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
 	}
 
 	/* only 4 levels page-walk length are valid */
-	if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
+	if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
 		return false;
 
 	/* Reserved bits should not be set */
-	if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+	if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
 		return false;
 
 	/* AD, if set, should be supported */
 	if (address & VMX_EPTP_AD_ENABLE_BIT) {
-		if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
+		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
 			return false;
 	}
 
@@ -2466,21 +2492,21 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
-				vmx->nested.msrs.pinbased_ctls_low,
-				vmx->nested.msrs.pinbased_ctls_high) ||
-	    !vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
-				vmx->nested.msrs.procbased_ctls_low,
-				vmx->nested.msrs.procbased_ctls_high))
+	if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
+				   vmx->nested.msrs.pinbased_ctls_low,
+				   vmx->nested.msrs.pinbased_ctls_high)) ||
+	    CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
+				   vmx->nested.msrs.procbased_ctls_low,
+				   vmx->nested.msrs.procbased_ctls_high)))
 		return -EINVAL;
 
 	if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
-	    !vmx_control_verify(vmcs12->secondary_vm_exec_control,
-				 vmx->nested.msrs.secondary_ctls_low,
-				 vmx->nested.msrs.secondary_ctls_high))
+	    CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
+				   vmx->nested.msrs.secondary_ctls_low,
+				   vmx->nested.msrs.secondary_ctls_high)))
 		return -EINVAL;
 
-	if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu) ||
+	if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
 	    nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
 	    nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
 	    nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
@@ -2491,7 +2517,7 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
 	    nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
 	    nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
 	    nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
-	    (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
+	    CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
 		return -EINVAL;
 
 	if (!nested_cpu_has_preemption_timer(vmcs12) &&
@@ -2499,17 +2525,17 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	if (nested_cpu_has_ept(vmcs12) &&
-	    !valid_ept_address(vcpu, vmcs12->ept_pointer))
+	    CC(!valid_ept_address(vcpu, vmcs12->ept_pointer)))
 		return -EINVAL;
 
 	if (nested_cpu_has_vmfunc(vmcs12)) {
-		if (vmcs12->vm_function_control &
-		    ~vmx->nested.msrs.vmfunc_controls)
+		if (CC(vmcs12->vm_function_control &
+		       ~vmx->nested.msrs.vmfunc_controls))
 			return -EINVAL;
 
 		if (nested_cpu_has_eptp_switching(vmcs12)) {
-			if (!nested_cpu_has_ept(vmcs12) ||
-			    !page_address_valid(vcpu, vmcs12->eptp_list_address))
+			if (CC(!nested_cpu_has_ept(vmcs12)) ||
+			    CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
 				return -EINVAL;
 		}
 	}
@@ -2525,10 +2551,10 @@ static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!vmx_control_verify(vmcs12->vm_exit_controls,
-				vmx->nested.msrs.exit_ctls_low,
-				vmx->nested.msrs.exit_ctls_high) ||
-	    nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12))
+	if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
+				    vmx->nested.msrs.exit_ctls_low,
+				    vmx->nested.msrs.exit_ctls_high)) ||
+	    CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
 		return -EINVAL;
 
 	return 0;
@@ -2542,9 +2568,9 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!vmx_control_verify(vmcs12->vm_entry_controls,
-				vmx->nested.msrs.entry_ctls_low,
-				vmx->nested.msrs.entry_ctls_high))
+	if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
+				    vmx->nested.msrs.entry_ctls_low,
+				    vmx->nested.msrs.entry_ctls_high)))
 		return -EINVAL;
 
 	/*
@@ -2564,31 +2590,31 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
 		bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
 
 		/* VM-entry interruption-info field: interruption type */
-		if (intr_type == INTR_TYPE_RESERVED ||
-		    (intr_type == INTR_TYPE_OTHER_EVENT &&
-		     !nested_cpu_supports_monitor_trap_flag(vcpu)))
+		if (CC(intr_type == INTR_TYPE_RESERVED) ||
+		    CC(intr_type == INTR_TYPE_OTHER_EVENT &&
+		       !nested_cpu_supports_monitor_trap_flag(vcpu)))
 			return -EINVAL;
 
 		/* VM-entry interruption-info field: vector */
-		if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
-		    (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
-		    (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
+		if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
+		    CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
+		    CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
 			return -EINVAL;
 
 		/* VM-entry interruption-info field: deliver error code */
 		should_have_error_code =
 			intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
 			x86_exception_has_error_code(vector);
-		if (has_error_code != should_have_error_code)
+		if (CC(has_error_code != should_have_error_code))
 			return -EINVAL;
 
 		/* VM-entry exception error code */
-		if (has_error_code &&
-		    vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
+		if (CC(has_error_code &&
+		       vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
 			return -EINVAL;
 
 		/* VM-entry interruption-info field: reserved bits */
-		if (intr_info & INTR_INFO_RESVD_BITS_MASK)
+		if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
 			return -EINVAL;
 
 		/* VM-entry instruction length */
@@ -2596,9 +2622,9 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
 		case INTR_TYPE_SOFT_EXCEPTION:
 		case INTR_TYPE_SOFT_INTR:
 		case INTR_TYPE_PRIV_SW_EXCEPTION:
-			if ((vmcs12->vm_entry_instruction_len > 15) ||
-			    (vmcs12->vm_entry_instruction_len == 0 &&
-			     !nested_cpu_has_zero_length_injection(vcpu)))
+			if (CC(vmcs12->vm_entry_instruction_len > 15) ||
+			    CC(vmcs12->vm_entry_instruction_len == 0 &&
+			    CC(!nested_cpu_has_zero_length_injection(vcpu))))
 				return -EINVAL;
 		}
 	}
@@ -2625,40 +2651,56 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 {
 	bool ia32e;
 
-	if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
-	    !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
-	    !nested_cr3_valid(vcpu, vmcs12->host_cr3))
+	if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
+	    CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
+	    CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3)))
 		return -EINVAL;
 
-	if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) ||
-	    is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))
+	if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
+	    CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
 		return -EINVAL;
 
 	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
-	    !kvm_pat_valid(vmcs12->host_ia32_pat))
+	    CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
 		return -EINVAL;
 
-	ia32e = (vmcs12->vm_exit_controls &
-		 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
-
-	if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-	    vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-	    vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-	    vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-	    vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-	    vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-	    vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-	    vmcs12->host_cs_selector == 0 ||
-	    vmcs12->host_tr_selector == 0 ||
-	    (vmcs12->host_ss_selector == 0 && !ia32e))
+#ifdef CONFIG_X86_64
+	ia32e = !!(vcpu->arch.efer & EFER_LMA);
+#else
+	ia32e = false;
+#endif
+
+	if (ia32e) {
+		if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
+		    CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+			return -EINVAL;
+	} else {
+		if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
+		    CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+		    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
+		    CC((vmcs12->host_rip) >> 32))
+			return -EINVAL;
+	}
+
+	if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+	    CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+	    CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+	    CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+	    CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+	    CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+	    CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+	    CC(vmcs12->host_cs_selector == 0) ||
+	    CC(vmcs12->host_tr_selector == 0) ||
+	    CC(vmcs12->host_ss_selector == 0 && !ia32e))
 		return -EINVAL;
 
 #ifdef CONFIG_X86_64
-	if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) ||
-	    is_noncanonical_address(vmcs12->host_gs_base, vcpu) ||
-	    is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) ||
-	    is_noncanonical_address(vmcs12->host_idtr_base, vcpu) ||
-	    is_noncanonical_address(vmcs12->host_tr_base, vcpu))
+	if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
+	    CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
+	    CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
+	    CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
+	    CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
+	    CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
 		return -EINVAL;
 #endif
 
@@ -2669,9 +2711,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 	 * the host address-space size VM-exit control.
 	 */
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
-		if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
-		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
-		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
+		if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
+		    CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
+		    CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
 			return -EINVAL;
 	}
 
@@ -2688,16 +2730,16 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
 	if (vmcs12->vmcs_link_pointer == -1ull)
 		return 0;
 
-	if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
+	if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
 		return -EINVAL;
 
-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+	if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
 		return -EINVAL;
 
 	shadow = map.hva;
 
-	if (shadow->hdr.revision_id != VMCS12_REVISION ||
-	    shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
+	if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
+	    CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
 		r = -EINVAL;
 
 	kvm_vcpu_unmap(vcpu, &map, false);
@@ -2709,8 +2751,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
  */
 static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
 {
-	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
-	    vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
+	if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
+	       vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT))
 		return -EINVAL;
 
 	return 0;
@@ -2724,12 +2766,12 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 
 	*exit_qual = ENTRY_FAIL_DEFAULT;
 
-	if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
-	    !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
+	if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
+	    CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
 		return -EINVAL;
 
 	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
-	    !kvm_pat_valid(vmcs12->guest_ia32_pat))
+	    CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
 		return -EINVAL;
 
 	if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
@@ -2749,16 +2791,16 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 	if (to_vmx(vcpu)->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
 		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
-		if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
-		    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
-		    ((vmcs12->guest_cr0 & X86_CR0_PG) &&
-		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
+		if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
+		    CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
+		    CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
+		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
 			return -EINVAL;
 	}
 
 	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
-	    (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
-	     (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+	    (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
+	     CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
 		return -EINVAL;
 
 	if (nested_check_guest_non_reg_state(vmcs12))
@@ -2841,9 +2883,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 
 	if (vm_fail) {
+		u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
+
 		preempt_enable();
-		WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
-			     VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
+		trace_kvm_nested_vmenter_failed(
+			"early hardware check VM-instruction error: ", error);
+		WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
 
@@ -2871,7 +2917,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 						 struct vmcs12 *vmcs12);
 
-static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2891,19 +2937,18 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 			vmx->nested.apic_access_page = NULL;
 		}
 		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
-		/*
-		 * If translation failed, no matter: This feature asks
-		 * to exit when accessing the given address, and if it
-		 * can never be accessed, this feature won't do
-		 * anything anyway.
-		 */
 		if (!is_error_page(page)) {
 			vmx->nested.apic_access_page = page;
 			hpa = page_to_phys(vmx->nested.apic_access_page);
 			vmcs_write64(APIC_ACCESS_ADDR, hpa);
 		} else {
-			secondary_exec_controls_clearbit(vmx,
-				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+			pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
+					     __func__);
+			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+			vcpu->run->internal.suberror =
+				KVM_INTERNAL_ERROR_EMULATION;
+			vcpu->run->internal.ndata = 0;
+			return false;
 		}
 	}
 
@@ -2948,6 +2993,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 		exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
 	else
 		exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
+	return true;
 }
 
 /*
@@ -2986,13 +3032,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 /*
  * If from_vmentry is false, this is being called from state restore (either RSM
  * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
-+ *
-+ * Returns:
-+ *   0 - success, i.e. proceed with actual VMEnter
-+ *   1 - consistency check VMExit
-+ *  -1 - consistency check VMFail
+ *
+ * Returns:
+ *	NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
+ *	NVMX_ENTRY_VMFAIL:  Consistency check VMFail
+ *	NVMX_ENTRY_VMEXIT:  Consistency check VMExit
+ *	NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
  */
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+							bool from_vmentry)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -3035,11 +3083,12 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
 	prepare_vmcs02_early(vmx, vmcs12);
 
 	if (from_vmentry) {
-		nested_get_vmcs12_pages(vcpu);
+		if (unlikely(!nested_get_vmcs12_pages(vcpu)))
+			return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
 
 		if (nested_vmx_check_vmentry_hw(vcpu)) {
 			vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-			return -1;
+			return NVMX_VMENTRY_VMFAIL;
 		}
 
 		if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
@@ -3103,7 +3152,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
 	 * returned as far as L1 is concerned. It will only return (and set
 	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
 	 */
-	return 0;
+	return NVMX_VMENTRY_SUCCESS;
 
 	/*
 	 * A failed consistency check that leads to a VMExit during L1's
@@ -3119,14 +3168,14 @@ vmentry_fail_vmexit:
 	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
 
 	if (!from_vmentry)
-		return 1;
+		return NVMX_VMENTRY_VMEXIT;
 
 	load_vmcs12_host_state(vcpu, vmcs12);
 	vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
 	vmcs12->exit_qualification = exit_qual;
 	if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
 		vmx->nested.need_vmcs12_to_shadow_sync = true;
-	return 1;
+	return NVMX_VMENTRY_VMEXIT;
 }
 
 /*
@@ -3136,9 +3185,9 @@ vmentry_fail_vmexit:
 static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 {
 	struct vmcs12 *vmcs12;
+	enum nvmx_vmentry_status status;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
-	int ret;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -3198,13 +3247,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * the nested entry.
 	 */
 	vmx->nested.nested_run_pending = 1;
-	ret = nested_vmx_enter_non_root_mode(vcpu, true);
-	vmx->nested.nested_run_pending = !ret;
-	if (ret > 0)
-		return 1;
-	else if (ret)
-		return nested_vmx_failValid(vcpu,
-			VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+	status = nested_vmx_enter_non_root_mode(vcpu, true);
+	if (unlikely(status != NVMX_VMENTRY_SUCCESS))
+		goto vmentry_failed;
 
 	/* Hide L1D cache contents from the nested guest.  */
 	vmx->vcpu.arch.l1tf_flush_l1d = true;
@@ -3235,6 +3280,15 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return kvm_vcpu_halt(vcpu);
 	}
 	return 1;
+
+vmentry_failed:
+	vmx->nested.nested_run_pending = 0;
+	if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
+		return 0;
+	if (status == NVMX_VMENTRY_VMEXIT)
+		return 1;
+	WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
+	return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 }
 
 /*
@@ -3401,6 +3455,15 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 	unsigned long exit_qual;
 	bool block_nested_events =
 	    vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (lapic_in_kernel(vcpu) &&
+		test_bit(KVM_APIC_INIT, &apic->pending_events)) {
+		if (block_nested_events)
+			return -EBUSY;
+		nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
+		return 0;
+	}
 
 	if (vcpu->arch.exception.pending &&
 		nested_vmx_check_exception(vcpu, &exit_qual)) {
@@ -3889,7 +3952,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmx_msr_entry g, h;
-	struct msr_data msr;
 	gpa_t gpa;
 	u32 i, j;
 
@@ -3949,7 +4011,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 	 * from the guest value.  The intent is to stuff host state as
 	 * silently as possible, not to fully process the exit load list.
 	 */
-	msr.host_initiated = false;
 	for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
 		gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
 		if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
@@ -3979,9 +4040,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 				goto vmabort;
 			}
 
-			msr.index = h.index;
-			msr.data = h.value;
-			if (kvm_set_msr(vcpu, &msr)) {
+			if (kvm_set_msr(vcpu, h.index, h.value)) {
 				pr_debug_ratelimited(
 					"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
 					__func__, j, h.index, h.value);
@@ -4466,7 +4525,12 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 {
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
+
 	free_nested(vcpu);
+
+	/* Process a latched INIT during time CPU was in VMX operation */
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	return nested_vmx_succeed(vcpu);
 }
 
@@ -4540,6 +4604,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	int len;
 	gva_t gva = 0;
 	struct vmcs12 *vmcs12;
+	struct x86_exception e;
 	short offset;
 
 	if (!nested_vmx_check_permission(vcpu))
@@ -4588,7 +4653,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 				vmx_instruction_info, true, len, &gva))
 			return 1;
 		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
-		kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
+		if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
+			kvm_inject_page_fault(vcpu, &e);
 	}
 
 	return nested_vmx_succeed(vcpu);
@@ -5259,8 +5325,9 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 		return false;
 
 	if (unlikely(vmx->fail)) {
-		pr_info_ratelimited("%s failed vm entry %x\n", __func__,
-				    vmcs_read32(VM_INSTRUCTION_ERROR));
+		trace_kvm_nested_vmenter_failed(
+			"hardware VM-instruction error: ",
+			vmcs_read32(VM_INSTRUCTION_ERROR));
 		return true;
 	}
 
@@ -5420,6 +5487,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 	case EXIT_REASON_ENCLS:
 		/* SGX is never exposed to L1 */
 		return false;
+	case EXIT_REASON_UMWAIT:
+	case EXIT_REASON_TPAUSE:
+		return nested_cpu_has2(vmcs12,
+			SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
 	default:
 		return true;
 	}
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 187d39bf0bf1..6280f33e5fa6 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -6,6 +6,16 @@
 #include "vmcs12.h"
 #include "vmx.h"
 
+/*
+ * Status returned by nested_vmx_enter_non_root_mode():
+ */
+enum nvmx_vmentry_status {
+	NVMX_VMENTRY_SUCCESS,		/* Entered VMX non-root mode */
+	NVMX_VMENTRY_VMFAIL,		/* Consistency check VMFail */
+	NVMX_VMENTRY_VMEXIT,		/* Consistency check VMExit */
+	NVMX_VMENTRY_KVM_INTERNAL_ERROR,/* KVM internal error */
+};
+
 void vmx_leave_nested(struct kvm_vcpu *vcpu);
 void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
 				bool apicv);
@@ -13,7 +23,8 @@ void nested_vmx_hardware_unsetup(void);
 __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
 void nested_vmx_vcpu_setup(void);
 void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+						     bool from_vmentry);
 bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
 void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		       u32 exit_intr_info, unsigned long exit_qualification);
diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h
index 2200fb698dd0..45eaedee2ac0 100644
--- a/arch/x86/kvm/vmx/ops.h
+++ b/arch/x86/kvm/vmx/ops.h
@@ -11,8 +11,13 @@
 #include "vmcs.h"
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
-#define __ex_clear(x, reg) \
-	____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
+
+asmlinkage void vmread_error(unsigned long field, bool fault);
+void vmwrite_error(unsigned long field, unsigned long value);
+void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
+void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
+void invvpid_error(unsigned long ext, u16 vpid, gva_t gva);
+void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
 
 static __always_inline void vmcs_check16(unsigned long field)
 {
@@ -62,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
 {
 	unsigned long value;
 
-	asm volatile (__ex_clear("vmread %1, %0", "%k0")
-		      : "=r"(value) : "r"(field));
+	asm volatile("1: vmread %2, %1\n\t"
+		     ".byte 0x3e\n\t" /* branch taken hint */
+		     "ja 3f\n\t"
+		     "mov %2, %%" _ASM_ARG1 "\n\t"
+		     "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t"
+		     "2: call vmread_error\n\t"
+		     "xor %k1, %k1\n\t"
+		     "3:\n\t"
+
+		     ".pushsection .fixup, \"ax\"\n\t"
+		     "4: mov %2, %%" _ASM_ARG1 "\n\t"
+		     "mov $1, %%" _ASM_ARG2 "\n\t"
+		     "jmp 2b\n\t"
+		     ".popsection\n\t"
+		     _ASM_EXTABLE(1b, 4b)
+		     : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc");
 	return value;
 }
 
@@ -103,21 +122,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
 	return __vmcs_readl(field);
 }
 
-static noinline void vmwrite_error(unsigned long field, unsigned long value)
-{
-	printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
-	       field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
-	dump_stack();
-}
+#define vmx_asm1(insn, op1, error_args...)				\
+do {									\
+	asm_volatile_goto("1: " __stringify(insn) " %0\n\t"		\
+			  ".byte 0x2e\n\t" /* branch not taken hint */	\
+			  "jna %l[error]\n\t"				\
+			  _ASM_EXTABLE(1b, %l[fault])			\
+			  : : op1 : "cc" : error, fault);		\
+	return;								\
+error:									\
+	insn##_error(error_args);					\
+	return;								\
+fault:									\
+	kvm_spurious_fault();						\
+} while (0)
+
+#define vmx_asm2(insn, op1, op2, error_args...)				\
+do {									\
+	asm_volatile_goto("1: "  __stringify(insn) " %1, %0\n\t"	\
+			  ".byte 0x2e\n\t" /* branch not taken hint */	\
+			  "jna %l[error]\n\t"				\
+			  _ASM_EXTABLE(1b, %l[fault])			\
+			  : : op1, op2 : "cc" : error, fault);		\
+	return;								\
+error:									\
+	insn##_error(error_args);					\
+	return;								\
+fault:									\
+	kvm_spurious_fault();						\
+} while (0)
 
 static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
 {
-	bool error;
-
-	asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
-		      : CC_OUT(na) (error) : "r"(field), "rm"(value));
-	if (unlikely(error))
-		vmwrite_error(field, value);
+	vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value);
 }
 
 static __always_inline void vmcs_write16(unsigned long field, u16 value)
@@ -182,28 +219,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
 static inline void vmcs_clear(struct vmcs *vmcs)
 {
 	u64 phys_addr = __pa(vmcs);
-	bool error;
 
-	asm volatile (__ex("vmclear %1") CC_SET(na)
-		      : CC_OUT(na) (error) : "m"(phys_addr));
-	if (unlikely(error))
-		printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
-		       vmcs, phys_addr);
+	vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr);
 }
 
 static inline void vmcs_load(struct vmcs *vmcs)
 {
 	u64 phys_addr = __pa(vmcs);
-	bool error;
 
 	if (static_branch_unlikely(&enable_evmcs))
 		return evmcs_load(phys_addr);
 
-	asm volatile (__ex("vmptrld %1") CC_SET(na)
-		      : CC_OUT(na) (error) : "m"(phys_addr));
-	if (unlikely(error))
-		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
-		       vmcs, phys_addr);
+	vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr);
 }
 
 static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
@@ -213,11 +240,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
 		u64 rsvd : 48;
 		u64 gva;
 	} operand = { vpid, 0, gva };
-	bool error;
 
-	asm volatile (__ex("invvpid %2, %1") CC_SET(na)
-		      : CC_OUT(na) (error) : "r"(ext), "m"(operand));
-	BUG_ON(error);
+	vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva);
 }
 
 static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
@@ -225,11 +249,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
 	struct {
 		u64 eptp, gpa;
 	} operand = {eptp, gpa};
-	bool error;
 
-	asm volatile (__ex("invept %2, %1") CC_SET(na)
-		      : CC_OUT(na) (error) : "r"(ext), "m"(operand));
-	BUG_ON(error);
+	vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa);
 }
 
 static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4dea0e0e7e39..3e9c059099e9 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -262,6 +262,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct x86_pmu_capability x86_pmu;
 	struct kvm_cpuid_entry2 *entry;
 	union cpuid10_eax eax;
 	union cpuid10_edx edx;
@@ -283,8 +284,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	if (!pmu->version)
 		return;
 
+	perf_get_x86_pmu_capability(&x86_pmu);
+
 	pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
-					INTEL_PMC_MAX_GENERIC);
+					 x86_pmu.num_counters_gp);
 	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
 	pmu->available_event_types = ~entry->ebx &
 					((1ull << eax.split.mask_length) - 1);
@@ -294,7 +297,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	} else {
 		pmu->nr_arch_fixed_counters =
 			min_t(int, edx.split.num_counters_fixed,
-				INTEL_PMC_MAX_FIXED);
+			      x86_pmu.num_counters_fixed);
 		pmu->counter_bitmask[KVM_PMC_FIXED] =
 			((u64)1 << edx.split.bit_width_fixed) - 1;
 	}
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 4010d519eb8c..751a384c2eb0 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -94,7 +94,7 @@ ENDPROC(vmx_vmexit)
 
 /**
  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
- * @vmx:	struct vcpu_vmx *
+ * @vmx:	struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
  * @regs:	unsigned long * (to guest registers)
  * @launched:	%true if the VMCS has been launched
  *
@@ -151,7 +151,7 @@ ENTRY(__vmx_vcpu_run)
 	mov VCPU_R14(%_ASM_AX), %r14
 	mov VCPU_R15(%_ASM_AX), %r15
 #endif
-	/* Load guest RAX.  This kills the vmx_vcpu pointer! */
+	/* Load guest RAX.  This kills the @regs pointer! */
 	mov VCPU_RAX(%_ASM_AX), %_ASM_AX
 
 	/* Enter guest mode */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c030c96fc81a..04a8212704c1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -209,6 +209,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 	struct page *page;
 	unsigned int i;
 
+	if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
+		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+		return 0;
+	}
+
 	if (!enable_ept) {
 		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
 		return 0;
@@ -343,6 +348,48 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
 
 void vmx_vmexit(void);
 
+#define vmx_insn_failed(fmt...)		\
+do {					\
+	WARN_ONCE(1, fmt);		\
+	pr_warn_ratelimited(fmt);	\
+} while (0)
+
+asmlinkage void vmread_error(unsigned long field, bool fault)
+{
+	if (fault)
+		kvm_spurious_fault();
+	else
+		vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
+}
+
+noinline void vmwrite_error(unsigned long field, unsigned long value)
+{
+	vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
+			field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+}
+
+noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
+{
+	vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
+}
+
+noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
+{
+	vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
+}
+
+noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
+{
+	vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
+			ext, vpid, gva);
+}
+
+noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
+{
+	vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
+			ext, eptp, gpa);
+}
+
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
@@ -486,6 +533,31 @@ static int hv_remote_flush_tlb(struct kvm *kvm)
 	return hv_remote_flush_tlb_with_range(kvm, NULL);
 }
 
+static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+	struct hv_enlightened_vmcs *evmcs;
+	struct hv_partition_assist_pg **p_hv_pa_pg =
+			&vcpu->kvm->arch.hyperv.hv_pa_pg;
+	/*
+	 * Synthetic VM-Exit is not enabled in current code and so All
+	 * evmcs in singe VM shares same assist page.
+	 */
+	if (!*p_hv_pa_pg)
+		*p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+	if (!*p_hv_pa_pg)
+		return -ENOMEM;
+
+	evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
+
+	evmcs->partition_assist_page =
+		__pa(*p_hv_pa_pg);
+	evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
+	evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+
+	return 0;
+}
+
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 /*
@@ -897,17 +969,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 	u64 guest_efer = vmx->vcpu.arch.efer;
 	u64 ignore_bits = 0;
 
-	if (!enable_ept) {
-		/*
-		 * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
-		 * host CPUID is more efficient than testing guest CPUID
-		 * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
-		 */
-		if (boot_cpu_has(X86_FEATURE_SMEP))
-			guest_efer |= EFER_NX;
-		else if (!(guest_efer & EFER_NX))
-			ignore_bits |= EFER_NX;
-	}
+	/* Shadow paging assumes NX to be available.  */
+	if (!enable_ept)
+		guest_efer |= EFER_NX;
 
 	/*
 	 * LMA and LME handled by hardware; SCE meaningless outside long mode.
@@ -1204,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
 		return;
 
+	/*
+	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+	 * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
+	 * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
+	 * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
+	 * correctly.
+	 */
+	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
+		pi_clear_sn(pi_desc);
+		goto after_clear_sn;
+	}
+
 	/* The full case.  */
 	do {
 		old.control = new.control = pi_desc->control;
@@ -1219,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 	} while (cmpxchg64(&pi_desc->control, old.control,
 			   new.control) != old.control);
 
+after_clear_sn:
+
 	/*
 	 * Clear SN before reading the bitmap.  The VT-d firmware
 	 * writes the bitmap and reads SN atomically (5.2.3 in the
@@ -1227,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 	 */
 	smp_mb__after_atomic();
 
-	if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
+	if (!pi_is_pir_empty(pi_desc))
 		pi_set_on(pi_desc);
 }
 
@@ -1472,17 +1550,32 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
 	return 0;
 }
 
-
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	unsigned long rip;
 
-	rip = kvm_rip_read(vcpu);
-	rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-	kvm_rip_write(vcpu, rip);
+	/*
+	 * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
+	 * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
+	 * set when EPT misconfig occurs.  In practice, real hardware updates
+	 * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
+	 * (namely Hyper-V) don't set it due to it being undefined behavior,
+	 * i.e. we end up advancing IP with some random value.
+	 */
+	if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
+	    to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
+		rip = kvm_rip_read(vcpu);
+		rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		kvm_rip_write(vcpu, rip);
+	} else {
+		if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+			return 0;
+	}
 
 	/* skipping an emulated instruction also counts */
 	vmx_set_interrupt_shadow(vcpu, 0);
+
+	return 1;
 }
 
 static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
@@ -1517,8 +1610,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 		int inc_eip = 0;
 		if (kvm_exception_is_soft(nr))
 			inc_eip = vcpu->arch.event_exit_inst_len;
-		if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
-			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+		kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
 		return;
 	}
 
@@ -1690,6 +1782,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 #endif
 	case MSR_EFER:
 		return kvm_get_msr_common(vcpu, msr_info);
+	case MSR_IA32_UMWAIT_CONTROL:
+		if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
+			return 1;
+
+		msr_info->data = vmx->msr_ia32_umwait_control;
+		break;
 	case MSR_IA32_SPEC_CTRL:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -1863,6 +1961,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		vmcs_write64(GUEST_BNDCFGS, data);
 		break;
+	case MSR_IA32_UMWAIT_CONTROL:
+		if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
+			return 1;
+
+		/* The reserved bit 1 and non-32 bit [63:32] should be zero */
+		if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
+			return 1;
+
+		vmx->msr_ia32_umwait_control = data;
+		break;
 	case MSR_IA32_SPEC_CTRL:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2280,6 +2388,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 			SECONDARY_EXEC_RDRAND_EXITING |
 			SECONDARY_EXEC_ENABLE_PML |
 			SECONDARY_EXEC_TSC_SCALING |
+			SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
 			SECONDARY_EXEC_PT_USE_GPA |
 			SECONDARY_EXEC_PT_CONCEAL_VMX |
 			SECONDARY_EXEC_ENABLE_VMFUNC |
@@ -4016,6 +4125,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 		}
 	}
 
+	if (vmx_waitpkg_supported()) {
+		bool waitpkg_enabled =
+			guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
+
+		if (!waitpkg_enabled)
+			exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
+		if (nested) {
+			if (waitpkg_enabled)
+				vmx->nested.msrs.secondary_ctls_high |=
+					SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+			else
+				vmx->nested.msrs.secondary_ctls_high &=
+					~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+		}
+	}
+
 	vmx->secondary_exec_control = exec_control;
 }
 
@@ -4026,7 +4152,7 @@ static void ept_set_mmio_spte_mask(void)
 	 * of an EPT paging-structure entry is 110b (write/execute).
 	 */
 	kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
-				   VMX_EPT_MISCONFIG_WX_VALUE);
+				   VMX_EPT_MISCONFIG_WX_VALUE, 0);
 }
 
 #define VMX_XSS_EXIT_BITMAP 0
@@ -4150,8 +4276,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx->rmode.vm86_active = 0;
 	vmx->spec_ctrl = 0;
 
+	vmx->msr_ia32_umwait_control = 0;
+
 	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
+	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
 
 	if (!init_event) {
@@ -4266,8 +4395,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 		int inc_eip = 0;
 		if (vcpu->arch.interrupt.soft)
 			inc_eip = vcpu->arch.event_exit_inst_len;
-		if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
-			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+		kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
 		return;
 	}
 	intr = irq | INTR_INFO_VALID_MASK;
@@ -4303,8 +4431,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 	vmx->loaded_vmcs->nmi_known_unmasked = false;
 
 	if (vmx->rmode.vm86_active) {
-		if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
-			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+		kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
 		return;
 	}
 
@@ -4431,7 +4558,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 	 * Cause the #SS fault with 0 error code in VM86 mode.
 	 */
 	if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
-		if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+		if (kvm_emulate_instruction(vcpu, 0)) {
 			if (vcpu->arch.halt_request) {
 				vcpu->arch.halt_request = 0;
 				return kvm_vcpu_halt(vcpu);
@@ -4482,7 +4609,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	u32 intr_info, ex_no, error_code;
 	unsigned long cr2, rip, dr6;
 	u32 vect_info;
-	enum emulation_result er;
 
 	vect_info = vmx->idt_vectoring_info;
 	intr_info = vmx->exit_intr_info;
@@ -4499,13 +4625,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 
 	if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
 		WARN_ON_ONCE(!enable_vmware_backdoor);
-		er = kvm_emulate_instruction(vcpu,
-			EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
-		if (er == EMULATE_USER_EXIT)
-			return 0;
-		else if (er != EMULATE_DONE)
+
+		/*
+		 * VMware backdoor emulation on #GP interception only handles
+		 * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
+		 * error code on #GP.
+		 */
+		if (error_code) {
 			kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-		return 1;
+			return 1;
+		}
+		return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
 	}
 
 	/*
@@ -4547,7 +4677,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 			vcpu->arch.dr6 &= ~DR_TRAP_BITS;
 			vcpu->arch.dr6 |= dr6 | DR6_RTM;
 			if (is_icebp(intr_info))
-				skip_emulated_instruction(vcpu);
+				WARN_ON(!skip_emulated_instruction(vcpu));
 
 			kvm_queue_exception(vcpu, DB_VECTOR);
 			return 1;
@@ -4602,7 +4732,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
 	++vcpu->stat.io_exits;
 
 	if (string)
-		return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+		return kvm_emulate_instruction(vcpu, 0);
 
 	port = exit_qualification >> 16;
 	size = (exit_qualification & 7) + 1;
@@ -4676,7 +4806,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 static int handle_desc(struct kvm_vcpu *vcpu)
 {
 	WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
-	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_cr(struct kvm_vcpu *vcpu)
@@ -4856,41 +4986,12 @@ static int handle_cpuid(struct kvm_vcpu *vcpu)
 
 static int handle_rdmsr(struct kvm_vcpu *vcpu)
 {
-	u32 ecx = kvm_rcx_read(vcpu);
-	struct msr_data msr_info;
-
-	msr_info.index = ecx;
-	msr_info.host_initiated = false;
-	if (vmx_get_msr(vcpu, &msr_info)) {
-		trace_kvm_msr_read_ex(ecx);
-		kvm_inject_gp(vcpu, 0);
-		return 1;
-	}
-
-	trace_kvm_msr_read(ecx, msr_info.data);
-
-	kvm_rax_write(vcpu, msr_info.data & -1u);
-	kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u);
-	return kvm_skip_emulated_instruction(vcpu);
+	return kvm_emulate_rdmsr(vcpu);
 }
 
 static int handle_wrmsr(struct kvm_vcpu *vcpu)
 {
-	struct msr_data msr;
-	u32 ecx = kvm_rcx_read(vcpu);
-	u64 data = kvm_read_edx_eax(vcpu);
-
-	msr.data = data;
-	msr.index = ecx;
-	msr.host_initiated = false;
-	if (kvm_set_msr(vcpu, &msr) != 0) {
-		trace_kvm_msr_write_ex(ecx, data);
-		kvm_inject_gp(vcpu, 0);
-		return 1;
-	}
-
-	trace_kvm_msr_write(ecx, data);
-	return kvm_skip_emulated_instruction(vcpu);
+	return kvm_emulate_wrmsr(vcpu);
 }
 
 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
@@ -4921,7 +5022,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
 
 static int handle_invd(struct kvm_vcpu *vcpu)
 {
-	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -4955,20 +5056,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int handle_xsaves(struct kvm_vcpu *vcpu)
-{
-	kvm_skip_emulated_instruction(vcpu);
-	WARN(1, "this should never happen\n");
-	return 1;
-}
-
-static int handle_xrstors(struct kvm_vcpu *vcpu)
-{
-	kvm_skip_emulated_instruction(vcpu);
-	WARN(1, "this should never happen\n");
-	return 1;
-}
-
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	if (likely(fasteoi)) {
@@ -4988,7 +5075,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
 			return kvm_skip_emulated_instruction(vcpu);
 		}
 	}
-	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(vcpu, 0);
 }
 
 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -5057,23 +5144,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
 	if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
 		       type != INTR_TYPE_EXT_INTR &&
 		       type != INTR_TYPE_NMI_INTR))
-		skip_emulated_instruction(vcpu);
-
-	if (kvm_task_switch(vcpu, tss_selector,
-			    type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
-			    has_error_code, error_code) == EMULATE_FAIL) {
-		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-		vcpu->run->internal.ndata = 0;
-		return 0;
-	}
+		WARN_ON(!skip_emulated_instruction(vcpu));
 
 	/*
 	 * TODO: What about debug traps on tss switch?
 	 *       Are we supposed to inject them and update dr6?
 	 */
-
-	return 1;
+	return kvm_task_switch(vcpu, tss_selector,
+			       type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
+			       reason, has_error_code, error_code);
 }
 
 static int handle_ept_violation(struct kvm_vcpu *vcpu)
@@ -5132,21 +5211,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	if (!is_guest_mode(vcpu) &&
 	    !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
 		trace_kvm_fast_mmio(gpa);
-		/*
-		 * Doing kvm_skip_emulated_instruction() depends on undefined
-		 * behavior: Intel's manual doesn't mandate
-		 * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
-		 * occurs and while on real hardware it was observed to be set,
-		 * other hypervisors (namely Hyper-V) don't set it, we end up
-		 * advancing IP with some random value. Disable fast mmio when
-		 * running nested and keep it for real hardware in hope that
-		 * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
-		 */
-		if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
-			return kvm_skip_emulated_instruction(vcpu);
-		else
-			return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
-								EMULATE_DONE;
+		return kvm_skip_emulated_instruction(vcpu);
 	}
 
 	return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -5165,8 +5230,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
 static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	enum emulation_result err = EMULATE_DONE;
-	int ret = 1;
 	bool intr_window_requested;
 	unsigned count = 130;
 
@@ -5187,71 +5250,67 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 		if (kvm_test_request(KVM_REQ_EVENT, vcpu))
 			return 1;
 
-		err = kvm_emulate_instruction(vcpu, 0);
-
-		if (err == EMULATE_USER_EXIT) {
-			++vcpu->stat.mmio_exits;
-			ret = 0;
-			goto out;
-		}
-
-		if (err != EMULATE_DONE)
-			goto emulation_error;
+		if (!kvm_emulate_instruction(vcpu, 0))
+			return 0;
 
 		if (vmx->emulation_required && !vmx->rmode.vm86_active &&
-		    vcpu->arch.exception.pending)
-			goto emulation_error;
+		    vcpu->arch.exception.pending) {
+			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+			vcpu->run->internal.suberror =
+						KVM_INTERNAL_ERROR_EMULATION;
+			vcpu->run->internal.ndata = 0;
+			return 0;
+		}
 
 		if (vcpu->arch.halt_request) {
 			vcpu->arch.halt_request = 0;
-			ret = kvm_vcpu_halt(vcpu);
-			goto out;
+			return kvm_vcpu_halt(vcpu);
 		}
 
+		/*
+		 * Note, return 1 and not 0, vcpu_run() is responsible for
+		 * morphing the pending signal into the proper return code.
+		 */
 		if (signal_pending(current))
-			goto out;
+			return 1;
+
 		if (need_resched())
 			schedule();
 	}
 
-out:
-	return ret;
-
-emulation_error:
-	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-	vcpu->run->internal.ndata = 0;
-	return 0;
+	return 1;
 }
 
 static void grow_ple_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	int old = vmx->ple_window;
+	unsigned int old = vmx->ple_window;
 
 	vmx->ple_window = __grow_ple_window(old, ple_window,
 					    ple_window_grow,
 					    ple_window_max);
 
-	if (vmx->ple_window != old)
+	if (vmx->ple_window != old) {
 		vmx->ple_window_dirty = true;
-
-	trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+		trace_kvm_ple_window_update(vcpu->vcpu_id,
+					    vmx->ple_window, old);
+	}
 }
 
 static void shrink_ple_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	int old = vmx->ple_window;
+	unsigned int old = vmx->ple_window;
 
 	vmx->ple_window = __shrink_ple_window(old, ple_window,
 					      ple_window_shrink,
 					      ple_window);
 
-	if (vmx->ple_window != old)
+	if (vmx->ple_window != old) {
 		vmx->ple_window_dirty = true;
-
-	trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+		trace_kvm_ple_window_update(vcpu->vcpu_id,
+					    vmx->ple_window, old);
+	}
 }
 
 /*
@@ -5541,8 +5600,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_INVVPID]                 = handle_vmx_instruction,
 	[EXIT_REASON_RDRAND]                  = handle_invalid_op,
 	[EXIT_REASON_RDSEED]                  = handle_invalid_op,
-	[EXIT_REASON_XSAVES]                  = handle_xsaves,
-	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
 	[EXIT_REASON_INVPCID]                 = handle_invpcid,
 	[EXIT_REASON_VMFUNC]		      = handle_vmx_instruction,
@@ -5887,8 +5944,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	else {
 		vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
 				exit_reason);
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
+		dump_vmcs();
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror =
+			KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+		vcpu->run->internal.ndata = 1;
+		vcpu->run->internal.data[0] = exit_reason;
+		return 0;
 	}
 }
 
@@ -6089,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	if (pi_test_on(&vmx->pi_desc)) {
 		pi_clear_on(&vmx->pi_desc);
 		/*
-		 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+		 * IOMMU can write to PID.ON, so the barrier matters even on UP.
 		 * But on x86 this is just a compiler barrier anyway.
 		 */
 		smp_mb__after_atomic();
@@ -6119,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 
 static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
 {
-	return pi_test_on(vcpu_to_pi_desc(vcpu));
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+	return pi_test_on(pi_desc) ||
+		(pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
 }
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -6373,6 +6438,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 					msrs[i].host, false);
 }
 
+static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
+{
+	u32 host_umwait_control;
+
+	if (!vmx_has_waitpkg(vmx))
+		return;
+
+	host_umwait_control = get_umwait_control_msr();
+
+	if (vmx->msr_ia32_umwait_control != host_umwait_control)
+		add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
+			vmx->msr_ia32_umwait_control,
+			host_umwait_control, false);
+	else
+		clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
+}
+
 static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6467,6 +6549,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	pt_guest_enter(vmx);
 
 	atomic_switch_perf_msrs(vmx);
+	atomic_switch_umwait_control_msr(vmx);
 
 	if (enable_preemption_timer)
 		vmx_update_hv_timer(vcpu);
@@ -6522,6 +6605,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		current_evmcs->hv_clean_fields |=
 			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 
+	if (static_branch_unlikely(&enable_evmcs))
+		current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
+
 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 	if (vmx->host_debugctlmsr)
 		update_debugctlmsr(vmx->host_debugctlmsr);
@@ -6589,6 +6675,7 @@ static struct kvm *vmx_vm_alloc(void)
 
 static void vmx_vm_free(struct kvm *kvm)
 {
+	kfree(kvm->arch.hyperv.hv_pa_pg);
 	vfree(to_kvm_vmx(kvm));
 }
 
@@ -6615,6 +6702,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	unsigned long *msr_bitmap;
 	int cpu;
 
+	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
+		"struct kvm_vcpu must be at offset 0 for arch usercopy region");
+
 	vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
 	if (!vmx)
 		return ERR_PTR(-ENOMEM);
@@ -7369,10 +7459,14 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 		 * irqbalance to make the interrupts single-CPU.
 		 *
 		 * We will support full lowest-priority interrupt later.
+		 *
+		 * In addition, we can only inject generic interrupts using
+		 * the PI mechanism, refuse to route others through it.
 		 */
 
 		kvm_set_msi_irq(kvm, e, &irq);
-		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+		    !kvm_irq_is_postable(&irq)) {
 			/*
 			 * Make sure the IRTE is in remapped mode if
 			 * we don't handle it in posted mode.
@@ -7474,6 +7568,11 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 	return false;
 }
 
+static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->nested.vmxon;
+}
+
 static __init int hardware_setup(void)
 {
 	unsigned long host_bndcfgs;
@@ -7799,6 +7898,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.nested_enable_evmcs = NULL,
 	.nested_get_evmcs_version = NULL,
 	.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
+	.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
 };
 
 static void vmx_cleanup_l1d_flush(void)
@@ -7835,6 +7935,7 @@ static void vmx_exit(void)
 			if (!vp_ap)
 				continue;
 
+			vp_ap->nested_control.features.directhypercall = 0;
 			vp_ap->current_nested_vmcs = 0;
 			vp_ap->enlighten_vmentry = 0;
 		}
@@ -7874,6 +7975,11 @@ static int __init vmx_init(void)
 			pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
 			static_branch_enable(&enable_evmcs);
 		}
+
+		if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
+			vmx_x86_ops.enable_direct_tlbflush
+				= hv_enable_direct_tlbflush;
+
 	} else {
 		enlightened_vmcs = false;
 	}
@@ -7891,12 +7997,10 @@ static int __init vmx_init(void)
 	 * contain 'auto' which will be turned into the default 'cond'
 	 * mitigation mode.
 	 */
-	if (boot_cpu_has(X86_BUG_L1TF)) {
-		r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
-		if (r) {
-			vmx_exit();
-			return r;
-		}
+	r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
+	if (r) {
+		vmx_exit();
+		return r;
 	}
 
 #ifdef CONFIG_KEXEC_CORE
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 82d0bc3a4d52..5a0f34b1e226 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -14,6 +14,8 @@
 extern const u32 vmx_msr_index[];
 extern u64 host_efer;
 
+extern u32 get_umwait_control_msr(void);
+
 #define MSR_TYPE_R	1
 #define MSR_TYPE_W	2
 #define MSR_TYPE_RW	3
@@ -211,6 +213,7 @@ struct vcpu_vmx {
 #endif
 
 	u64		      spec_ctrl;
+	u32		      msr_ia32_umwait_control;
 
 	u32 secondary_exec_control;
 
@@ -253,7 +256,7 @@ struct vcpu_vmx {
 	struct nested_vmx nested;
 
 	/* Dynamic PLE window. */
-	int ple_window;
+	unsigned int ple_window;
 	bool ple_window_dirty;
 
 	bool req_immediate_exit;
@@ -352,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+	return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
 static inline void pi_set_sn(struct pi_desc *pi_desc)
 {
 	set_bit(POSTED_INTR_SN,
@@ -370,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
 		(unsigned long *)&pi_desc->control);
 }
 
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+	clear_bit(POSTED_INTR_SN,
+		(unsigned long *)&pi_desc->control);
+}
+
 static inline int pi_test_on(struct pi_desc *pi_desc)
 {
 	return test_bit(POSTED_INTR_ON,
@@ -497,6 +511,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
 	vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
 }
 
+static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
+{
+	return vmx->secondary_exec_control &
+		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 void dump_vmcs(void);
 
 #endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 290c3c3efb87..5d530521f11d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
 static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #endif
 
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
 
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
@@ -212,7 +212,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
-	{ "largepages", VM_STAT(lpages) },
+	{ "largepages", VM_STAT(lpages, .mode = 0444) },
+	{ "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
 	{ "max_mmu_page_hash_collisions",
 		VM_STAT(max_mmu_page_hash_collisions) },
 	{ NULL }
@@ -360,7 +361,7 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 asmlinkage __visible void kvm_spurious_fault(void)
 {
 	/* Fault while not rebooting.  We want the trace. */
-	BUG();
+	BUG_ON(!kvm_rebooting);
 }
 EXPORT_SYMBOL_GPL(kvm_spurious_fault);
 
@@ -674,8 +675,14 @@ static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 				       data, offset, len, access);
 }
 
+static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
+{
+	return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
+	       rsvd_bits(1, 2);
+}
+
 /*
- * Load the pae pdptrs.  Return true is they are all valid.
+ * Load the pae pdptrs.  Return 1 if they are all valid, 0 otherwise.
  */
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 {
@@ -694,8 +701,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
 		if ((pdpte[i] & PT_PRESENT_MASK) &&
-		    (pdpte[i] &
-		     vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
+		    (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
 			ret = 0;
 			goto out;
 		}
@@ -879,34 +885,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 }
 EXPORT_SYMBOL_GPL(kvm_set_xcr);
 
-int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-	unsigned long old_cr4 = kvm_read_cr4(vcpu);
-	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
-				   X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
-
 	if (cr4 & CR4_RESERVED_BITS)
-		return 1;
+		return -EINVAL;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
-		return 1;
+		return -EINVAL;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
-		return 1;
+		return -EINVAL;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
-		return 1;
+		return -EINVAL;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
-		return 1;
+		return -EINVAL;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
-		return 1;
+		return -EINVAL;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
-		return 1;
+		return -EINVAL;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+		return -EINVAL;
+
+	return 0;
+}
+
+int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+	unsigned long old_cr4 = kvm_read_cr4(vcpu);
+	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+				   X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+
+	if (kvm_valid_cr4(vcpu, cr4))
 		return 1;
 
 	if (is_long_mode(vcpu)) {
@@ -1119,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
  *
- * This list is modified at module load time to reflect the
+ * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
+ * extract the supported MSRs from the related const lists.
+ * msrs_to_save is selected from the msrs_to_save_all to reflect the
  * capabilities of the host cpu. This capabilities test skips MSRs that are
- * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+ * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
  * may depend on host virtualization features rather than host cpu features.
  */
 
-static u32 msrs_to_save[] = {
+static const u32 msrs_to_save_all[] = {
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_STAR,
 #ifdef CONFIG_X86_64
@@ -1140,11 +1156,36 @@ static u32 msrs_to_save[] = {
 	MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
 	MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
 	MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+	MSR_IA32_UMWAIT_CONTROL,
+
+	MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
+	MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
+	MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
+	MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+	MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
+	MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
+	MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
+	MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
+	MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
+	MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
+	MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
+	MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
+	MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
+	MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
+	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
 };
 
+static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
 static unsigned num_msrs_to_save;
 
-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs_all[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
@@ -1183,7 +1224,7 @@ static u32 emulated_msrs[] = {
 	 * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
 	 * We always support the "true" VMX control MSRs, even if the host
 	 * processor does not, so I am putting these registers here rather
-	 * than in msrs_to_save.
+	 * than in msrs_to_save_all.
 	 */
 	MSR_IA32_VMX_BASIC,
 	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@ -1202,13 +1243,14 @@ static u32 emulated_msrs[] = {
 	MSR_KVM_POLL_CONTROL,
 };
 
+static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
 static unsigned num_emulated_msrs;
 
 /*
  * List of msr numbers which are used to expose MSR-based features that
  * can be used by a hypervisor to validate requested CPU features.
  */
-static u32 msr_based_features[] = {
+static const u32 msr_based_features_all[] = {
 	MSR_IA32_VMX_BASIC,
 	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
 	MSR_IA32_VMX_PINBASED_CTLS,
@@ -1233,6 +1275,7 @@ static u32 msr_based_features[] = {
 	MSR_IA32_ARCH_CAPABILITIES,
 };
 
+static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
 static unsigned int num_msr_based_features;
 
 static u64 kvm_get_arch_capabilities(void)
@@ -1243,6 +1286,14 @@ static u64 kvm_get_arch_capabilities(void)
 		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
 
 	/*
+	 * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
+	 * the nested hypervisor runs with NX huge pages.  If it is not,
+	 * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
+	 * L1 guests, so it need not worry about its own (L2) guests.
+	 */
+	data |= ARCH_CAP_PSCHANGE_MC_NO;
+
+	/*
 	 * If we're doing cache flushes (either "always" or "cond")
 	 * we will do one whenever the guest does a vmlaunch/vmresume.
 	 * If an outer hypervisor is doing the cache flush for us
@@ -1254,6 +1305,32 @@ static u64 kvm_get_arch_capabilities(void)
 	if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
 		data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
 
+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		data |= ARCH_CAP_RDCL_NO;
+	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+		data |= ARCH_CAP_SSB_NO;
+	if (!boot_cpu_has_bug(X86_BUG_MDS))
+		data |= ARCH_CAP_MDS_NO;
+
+	/*
+	 * On TAA affected systems, export MDS_NO=0 when:
+	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
+	 *	- Updated microcode is present. This is detected by
+	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
+	 *	  that VERW clears CPU buffers.
+	 *
+	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
+	 * mitigation and don't complain:
+	 *
+	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
+	 *
+	 * If TSX is disabled on the system, guests are also mitigated against
+	 * TAA and clear CPU buffer mitigation is not required for guests.
+	 */
+	if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
+	    (data & ARCH_CAP_TSX_CTRL_MSR))
+		data &= ~ARCH_CAP_MDS_NO;
+
 	return data;
 }
 
@@ -1351,19 +1428,23 @@ void kvm_enable_efer_bits(u64 mask)
 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
 
 /*
- * Writes msr value into into the appropriate "register".
+ * Write @data into the MSR specified by @index.  Select MSR specific fault
+ * checks are bypassed if @host_initiated is %true.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
  */
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
+			 bool host_initiated)
 {
-	switch (msr->index) {
+	struct msr_data msr;
+
+	switch (index) {
 	case MSR_FS_BASE:
 	case MSR_GS_BASE:
 	case MSR_KERNEL_GS_BASE:
 	case MSR_CSTAR:
 	case MSR_LSTAR:
-		if (is_noncanonical_address(msr->data, vcpu))
+		if (is_noncanonical_address(data, vcpu))
 			return 1;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
@@ -1380,38 +1461,95 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		 * value, and that something deterministic happens if the guest
 		 * invokes 64-bit SYSENTER.
 		 */
-		msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
+		data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
 	}
-	return kvm_x86_ops->set_msr(vcpu, msr);
+
+	msr.data = data;
+	msr.index = index;
+	msr.host_initiated = host_initiated;
+
+	return kvm_x86_ops->set_msr(vcpu, &msr);
 }
-EXPORT_SYMBOL_GPL(kvm_set_msr);
 
 /*
- * Adapt set_msr() to msr_io()'s calling convention
+ * Read the MSR specified by @index into @data.  Select MSR specific fault
+ * checks are bypassed if @host_initiated is %true.
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
  */
-static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+			 bool host_initiated)
 {
 	struct msr_data msr;
-	int r;
+	int ret;
 
 	msr.index = index;
-	msr.host_initiated = true;
-	r = kvm_get_msr(vcpu, &msr);
-	if (r)
-		return r;
+	msr.host_initiated = host_initiated;
 
-	*data = msr.data;
-	return 0;
+	ret = kvm_x86_ops->get_msr(vcpu, &msr);
+	if (!ret)
+		*data = msr.data;
+	return ret;
 }
 
-static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
 {
-	struct msr_data msr;
+	return __kvm_get_msr(vcpu, index, data, false);
+}
+EXPORT_SYMBOL_GPL(kvm_get_msr);
 
-	msr.data = *data;
-	msr.index = index;
-	msr.host_initiated = true;
-	return kvm_set_msr(vcpu, &msr);
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+{
+	return __kvm_set_msr(vcpu, index, data, false);
+}
+EXPORT_SYMBOL_GPL(kvm_set_msr);
+
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
+{
+	u32 ecx = kvm_rcx_read(vcpu);
+	u64 data;
+
+	if (kvm_get_msr(vcpu, ecx, &data)) {
+		trace_kvm_msr_read_ex(ecx);
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	trace_kvm_msr_read(ecx, data);
+
+	kvm_rax_write(vcpu, data & -1u);
+	kvm_rdx_write(vcpu, (data >> 32) & -1u);
+	return kvm_skip_emulated_instruction(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
+
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+{
+	u32 ecx = kvm_rcx_read(vcpu);
+	u64 data = kvm_read_edx_eax(vcpu);
+
+	if (kvm_set_msr(vcpu, ecx, data)) {
+		trace_kvm_msr_write_ex(ecx, data);
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	trace_kvm_msr_write(ecx, data);
+	return kvm_skip_emulated_instruction(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+
+/*
+ * Adapt set_msr() to msr_io()'s calling convention
+ */
+static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+	return __kvm_get_msr(vcpu, index, data, true);
+}
+
+static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+	return __kvm_set_msr(vcpu, index, *data, true);
 }
 
 #ifdef CONFIG_X86_64
@@ -2431,6 +2569,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pv_time_enabled = false;
+	vcpu->arch.time = 0;
 }
 
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
@@ -2452,6 +2591,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	 * Doing a TLB flush here, on the guest's behalf, can avoid
 	 * expensive IPIs.
 	 */
+	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+		vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB);
 	if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
 		kvm_vcpu_flush_tlb(vcpu, false);
 
@@ -2594,8 +2735,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_KVM_SYSTEM_TIME: {
 		struct kvm_arch *ka = &vcpu->kvm->arch;
 
-		kvmclock_reset(vcpu);
-
 		if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
 			bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
 
@@ -2609,14 +2748,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
 
 		/* we verify if the enable bit is set... */
+		vcpu->arch.pv_time_enabled = false;
 		if (!(data & 1))
 			break;
 
-		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+		if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
 		     &vcpu->arch.pv_time, data & ~1ULL,
 		     sizeof(struct pvclock_vcpu_time_info)))
-			vcpu->arch.pv_time_enabled = false;
-		else
 			vcpu->arch.pv_time_enabled = true;
 
 		break;
@@ -2748,18 +2886,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 }
 EXPORT_SYMBOL_GPL(kvm_set_msr_common);
 
-
-/*
- * Reads an msr value (of 'msr_index') into 'pdata'.
- * Returns 0 on success, non-0 otherwise.
- * Assumes vcpu_load() was already called.
- */
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
-{
-	return kvm_x86_ops->get_msr(vcpu, msr);
-}
-EXPORT_SYMBOL_GPL(kvm_get_msr);
-
 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 {
 	u64 data;
@@ -3106,7 +3232,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_EVENTFD:
 	case KVM_CAP_HYPERV_TLBFLUSH:
 	case KVM_CAP_HYPERV_SEND_IPI:
-	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
 	case KVM_CAP_HYPERV_CPUID:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
@@ -3183,6 +3308,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = kvm_x86_ops->get_nested_state ?
 			kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
 		break;
+	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+		r = kvm_x86_ops->enable_direct_tlbflush != NULL;
+		break;
+	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+		r = kvm_x86_ops->nested_enable_evmcs != NULL;
+		break;
 	default:
 		break;
 	}
@@ -3506,8 +3637,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
 	for (bank = 0; bank < bank_num; bank++)
 		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
 
-	if (kvm_x86_ops->setup_mce)
-		kvm_x86_ops->setup_mce(vcpu);
+	kvm_x86_ops->setup_mce(vcpu);
 out:
 	return r;
 }
@@ -3957,6 +4087,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				r = -EFAULT;
 		}
 		return r;
+	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+		if (!kvm_x86_ops->enable_direct_tlbflush)
+			return -ENOTTY;
+
+		return kvm_x86_ops->enable_direct_tlbflush(vcpu);
 
 	default:
 		return -EINVAL;
@@ -4986,18 +5121,28 @@ out:
 
 static void kvm_init_msr_list(void)
 {
+	struct x86_pmu_capability x86_pmu;
 	u32 dummy[2];
-	unsigned i, j;
+	unsigned i;
 
-	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
-		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
+	BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
+			 "Please update the fixed PMCs in msrs_to_saved_all[]");
+
+	perf_get_x86_pmu_capability(&x86_pmu);
+
+	num_msrs_to_save = 0;
+	num_emulated_msrs = 0;
+	num_msr_based_features = 0;
+
+	for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
+		if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
 			continue;
 
 		/*
 		 * Even MSRs that are valid in the host may not be exposed
 		 * to the guests in some cases.
 		 */
-		switch (msrs_to_save[i]) {
+		switch (msrs_to_save_all[i]) {
 		case MSR_IA32_BNDCFGS:
 			if (!kvm_mpx_supported())
 				continue;
@@ -5025,43 +5170,43 @@ static void kvm_init_msr_list(void)
 			break;
 		case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
 			if (!kvm_x86_ops->pt_supported() ||
-				msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
+				msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
 				intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
 				continue;
 			break;
+		case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
+			if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+			    min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+				continue;
+			break;
+		case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
+			if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+			    min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+				continue;
 		}
 		default:
 			break;
 		}
 
-		if (j < i)
-			msrs_to_save[j] = msrs_to_save[i];
-		j++;
+		msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
 	}
-	num_msrs_to_save = j;
 
-	for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
-		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+	for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
+		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
 			continue;
 
-		if (j < i)
-			emulated_msrs[j] = emulated_msrs[i];
-		j++;
+		emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
 	}
-	num_emulated_msrs = j;
 
-	for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+	for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
 		struct kvm_msr_entry msr;
 
-		msr.index = msr_based_features[i];
+		msr.index = msr_based_features_all[i];
 		if (kvm_get_msr_feature(&msr))
 			continue;
 
-		if (j < i)
-			msr_based_features[j] = msr_based_features[i];
-		j++;
+		msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
 	}
-	num_msr_based_features = j;
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -5312,6 +5457,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
 	/* kvm_write_guest_virt_system can pull in tons of pages. */
 	vcpu->arch.l1tf_flush_l1d = true;
 
+	/*
+	 * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
+	 * is returned, but our callers are not ready for that and they blindly
+	 * call kvm_inject_page_fault.  Ensure that they at least do not leak
+	 * uninitialized kernel stack memory into cr2 and error code.
+	 */
+	memset(exception, 0, sizeof(*exception));
 	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
 					   PFERR_WRITE_MASK, exception);
 }
@@ -5320,7 +5472,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 int handle_ud(struct kvm_vcpu *vcpu)
 {
 	int emul_type = EMULTYPE_TRAP_UD;
-	enum emulation_result er;
 	char sig[5]; /* ud2; .ascii "kvm" */
 	struct x86_exception e;
 
@@ -5329,15 +5480,10 @@ int handle_ud(struct kvm_vcpu *vcpu)
 				sig, sizeof(sig), &e) == 0 &&
 	    memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
 		kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
-		emul_type = 0;
+		emul_type = EMULTYPE_TRAP_UD_FORCED;
 	}
 
-	er = kvm_emulate_instruction(vcpu, emul_type);
-	if (er == EMULATE_USER_EXIT)
-		return 0;
-	if (er != EMULATE_DONE)
-		kvm_queue_exception(vcpu, UD_VECTOR);
-	return 1;
+	return kvm_emulate_instruction(vcpu, emul_type);
 }
 EXPORT_SYMBOL_GPL(handle_ud);
 
@@ -5370,7 +5516,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 	 */
 	if (vcpu_match_mmio_gva(vcpu, gva)
 	    && !permission_fault(vcpu, vcpu->arch.walk_mmu,
-				 vcpu->arch.access, 0, access)) {
+				 vcpu->arch.mmio_access, 0, access)) {
 		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
 					(gva & (PAGE_SIZE - 1));
 		trace_vcpu_match_mmio(gva, *gpa, write, false);
@@ -5964,28 +6110,13 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
 static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
 			    u32 msr_index, u64 *pdata)
 {
-	struct msr_data msr;
-	int r;
-
-	msr.index = msr_index;
-	msr.host_initiated = false;
-	r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
-	if (r)
-		return r;
-
-	*pdata = msr.data;
-	return 0;
+	return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
 }
 
 static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 			    u32 msr_index, u64 data)
 {
-	struct msr_data msr;
-
-	msr.data = data;
-	msr.index = msr_index;
-	msr.host_initiated = false;
-	return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
+	return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
 }
 
 static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
@@ -6068,6 +6199,11 @@ static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
 	kvm_smm_changed(emul_to_vcpu(ctxt));
 }
 
+static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
+{
+	return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
+}
+
 static const struct x86_emulate_ops emulate_ops = {
 	.read_gpr            = emulator_read_gpr,
 	.write_gpr           = emulator_write_gpr,
@@ -6109,6 +6245,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.set_hflags          = emulator_set_hflags,
 	.pre_leave_smm       = emulator_pre_leave_smm,
 	.post_leave_smm      = emulator_post_leave_smm,
+	.set_xcr             = emulator_set_xcr,
 };
 
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -6168,7 +6305,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
+void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int ret;
@@ -6180,37 +6317,43 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 	ctxt->_eip = ctxt->eip + inc_eip;
 	ret = emulate_int_real(ctxt, irq);
 
-	if (ret != X86EMUL_CONTINUE)
-		return EMULATE_FAIL;
-
-	ctxt->eip = ctxt->_eip;
-	kvm_rip_write(vcpu, ctxt->eip);
-	kvm_set_rflags(vcpu, ctxt->eflags);
-
-	return EMULATE_DONE;
+	if (ret != X86EMUL_CONTINUE) {
+		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+	} else {
+		ctxt->eip = ctxt->_eip;
+		kvm_rip_write(vcpu, ctxt->eip);
+		kvm_set_rflags(vcpu, ctxt->eflags);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 {
-	int r = EMULATE_DONE;
-
 	++vcpu->stat.insn_emulation_fail;
 	trace_kvm_emulate_insn_failed(vcpu);
 
-	if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
-		return EMULATE_FAIL;
+	if (emulation_type & EMULTYPE_VMWARE_GP) {
+		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+		return 1;
+	}
 
-	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+	if (emulation_type & EMULTYPE_SKIP) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 		vcpu->run->internal.ndata = 0;
-		r = EMULATE_USER_EXIT;
+		return 0;
 	}
 
 	kvm_queue_exception(vcpu, UD_VECTOR);
 
-	return r;
+	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		return 0;
+	}
+
+	return 1;
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
@@ -6365,7 +6508,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 	return dr6;
 }
 
-static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
+static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *kvm_run = vcpu->run;
 
@@ -6374,18 +6517,20 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
 		kvm_run->debug.arch.exception = DB_VECTOR;
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
-		*r = EMULATE_USER_EXIT;
-	} else {
-		kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+		return 0;
 	}
+	kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+	return 1;
 }
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-	int r = EMULATE_DONE;
+	int r;
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	r = kvm_x86_ops->skip_emulated_instruction(vcpu);
+	if (unlikely(!r))
+		return 0;
 
 	/*
 	 * rflags is the old, "raw" value of the flags.  The new value has
@@ -6396,8 +6541,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	 * that sets the TF flag".
 	 */
 	if (unlikely(rflags & X86_EFLAGS_TF))
-		kvm_vcpu_do_singlestep(vcpu, &r);
-	return r == EMULATE_DONE;
+		r = kvm_vcpu_do_singlestep(vcpu);
+	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
 
@@ -6416,7 +6561,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 			kvm_run->debug.arch.pc = eip;
 			kvm_run->debug.arch.exception = DB_VECTOR;
 			kvm_run->exit_reason = KVM_EXIT_DEBUG;
-			*r = EMULATE_USER_EXIT;
+			*r = 0;
 			return true;
 		}
 	}
@@ -6432,7 +6577,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 			vcpu->arch.dr6 &= ~DR_TRAP_BITS;
 			vcpu->arch.dr6 |= dr6 | DR6_RTM;
 			kvm_queue_exception(vcpu, DB_VECTOR);
-			*r = EMULATE_DONE;
+			*r = 1;
 			return true;
 		}
 	}
@@ -6516,32 +6661,48 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 		trace_kvm_emulate_insn_start(vcpu);
 		++vcpu->stat.insn_emulation;
 		if (r != EMULATION_OK)  {
-			if (emulation_type & EMULTYPE_TRAP_UD)
-				return EMULATE_FAIL;
+			if ((emulation_type & EMULTYPE_TRAP_UD) ||
+			    (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
+				kvm_queue_exception(vcpu, UD_VECTOR);
+				return 1;
+			}
 			if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
 						emulation_type))
-				return EMULATE_DONE;
-			if (ctxt->have_exception && inject_emulated_exception(vcpu))
-				return EMULATE_DONE;
-			if (emulation_type & EMULTYPE_SKIP)
-				return EMULATE_FAIL;
+				return 1;
+			if (ctxt->have_exception) {
+				/*
+				 * #UD should result in just EMULATION_FAILED, and trap-like
+				 * exception should not be encountered during decode.
+				 */
+				WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
+					     exception_type(ctxt->exception.vector) == EXCPT_TRAP);
+				inject_emulated_exception(vcpu);
+				return 1;
+			}
 			return handle_emulation_failure(vcpu, emulation_type);
 		}
 	}
 
-	if ((emulation_type & EMULTYPE_VMWARE) &&
-	    !is_vmware_backdoor_opcode(ctxt))
-		return EMULATE_FAIL;
+	if ((emulation_type & EMULTYPE_VMWARE_GP) &&
+	    !is_vmware_backdoor_opcode(ctxt)) {
+		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+		return 1;
+	}
 
+	/*
+	 * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
+	 * for kvm_skip_emulated_instruction().  The caller is responsible for
+	 * updating interruptibility state and injecting single-step #DBs.
+	 */
 	if (emulation_type & EMULTYPE_SKIP) {
 		kvm_rip_write(vcpu, ctxt->_eip);
 		if (ctxt->eflags & X86_EFLAGS_RF)
 			kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
-		return EMULATE_DONE;
+		return 1;
 	}
 
 	if (retry_instruction(ctxt, cr2, emulation_type))
-		return EMULATE_DONE;
+		return 1;
 
 	/* this is needed for vmware backdoor interface to work since it
 	   changes registers values  during IO operation */
@@ -6557,18 +6718,18 @@ restart:
 	r = x86_emulate_insn(ctxt);
 
 	if (r == EMULATION_INTERCEPTED)
-		return EMULATE_DONE;
+		return 1;
 
 	if (r == EMULATION_FAILED) {
 		if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
 					emulation_type))
-			return EMULATE_DONE;
+			return 1;
 
 		return handle_emulation_failure(vcpu, emulation_type);
 	}
 
 	if (ctxt->have_exception) {
-		r = EMULATE_DONE;
+		r = 1;
 		if (inject_emulated_exception(vcpu))
 			return r;
 	} else if (vcpu->arch.pio.count) {
@@ -6579,16 +6740,18 @@ restart:
 			writeback = false;
 			vcpu->arch.complete_userspace_io = complete_emulated_pio;
 		}
-		r = EMULATE_USER_EXIT;
+		r = 0;
 	} else if (vcpu->mmio_needed) {
+		++vcpu->stat.mmio_exits;
+
 		if (!vcpu->mmio_is_write)
 			writeback = false;
-		r = EMULATE_USER_EXIT;
+		r = 0;
 		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
-		r = EMULATE_DONE;
+		r = 1;
 
 	if (writeback) {
 		unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -6597,8 +6760,8 @@ restart:
 		if (!ctxt->have_exception ||
 		    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
 			kvm_rip_write(vcpu, ctxt->eip);
-			if (r == EMULATE_DONE && ctxt->tf)
-				kvm_vcpu_do_singlestep(vcpu, &r);
+			if (r && ctxt->tf)
+				r = kvm_vcpu_do_singlestep(vcpu);
 			__kvm_set_rflags(vcpu, ctxt->eflags);
 		}
 
@@ -7803,8 +7966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	bool req_immediate_exit = false;
 
 	if (kvm_request_pending(vcpu)) {
-		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
-			kvm_x86_ops->get_vmcs12_pages(vcpu);
+		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
+			if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
+				r = 0;
+				goto out;
+			}
+		}
 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
 			kvm_mmu_unload(vcpu);
 		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -8192,12 +8359,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
 	int r;
+
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 	r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	if (r != EMULATE_DONE)
-		return 0;
-	return 1;
+	return r;
 }
 
 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
@@ -8565,23 +8731,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 
 	ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
 				   has_error_code, error_code);
-
-	if (ret)
-		return EMULATE_FAIL;
+	if (ret) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		return 0;
+	}
 
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	return EMULATE_DONE;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
 static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
-	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-			(sregs->cr4 & X86_CR4_OSXSAVE))
-		return  -EINVAL;
-
 	if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
 		/*
 		 * When EFER.LME and CR0.PG are set, the processor is in
@@ -8600,7 +8765,7 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 			return -EINVAL;
 	}
 
-	return 0;
+	return kvm_valid_cr4(vcpu, sregs->cr4);
 }
 
 static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -9290,6 +9455,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
+	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
@@ -9315,10 +9482,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_page_track_init(kvm);
 	kvm_mmu_init_vm(kvm);
 
-	if (kvm_x86_ops->vm_init)
-		return kvm_x86_ops->vm_init(kvm);
+	return kvm_x86_ops->vm_init(kvm);
+}
 
-	return 0;
+int kvm_arch_post_init_vm(struct kvm *kvm)
+{
+	return kvm_mmu_post_init_vm(kvm);
 }
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
@@ -9422,6 +9591,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 }
 EXPORT_SYMBOL_GPL(x86_set_memory_region);
 
+void kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+	kvm_mmu_pre_destroy_vm(kvm);
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	if (current->mm == kvm->mm) {
@@ -9622,8 +9796,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	 * Scan sptes if dirty logging has been stopped, dropping those
 	 * which can be collapsed into a single large-page spte.  Later
 	 * page faults will create the large-page sptes.
+	 *
+	 * There is no need to do this in any of the following cases:
+	 * CREATE:	No dirty mappings will already exist.
+	 * MOVE/DELETE:	The old mappings will already have been cleaned up by
+	 *		kvm_arch_flush_shadow_memslot()
 	 */
-	if ((change != KVM_MR_DELETE) &&
+	if (change == KVM_MR_FLAGS_ONLY &&
 		(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
 		!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
 		kvm_mmu_zap_collapsible_sptes(kvm, new);
@@ -10010,7 +10189,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
 bool kvm_arch_has_irq_bypass(void)
 {
-	return kvm_x86_ops->update_pi_irte != NULL;
+	return true;
 }
 
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
@@ -10050,9 +10229,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
 int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
 				   uint32_t guest_irq, bool set)
 {
-	if (!kvm_x86_ops->update_pi_irte)
-		return -EINVAL;
-
 	return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
 }
 
@@ -10079,11 +10255,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 6594020c0691..dbf7442a822b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -196,7 +196,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 	 * actually a nGPA.
 	 */
 	vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
-	vcpu->arch.access = access;
+	vcpu->arch.mmio_access = access;
 	vcpu->arch.mmio_gfn = gfn;
 	vcpu->arch.mmio_gen = gen;
 }
@@ -261,7 +261,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
 }
 
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
+void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
 u64 get_kvmclock_ns(struct kvm *kvm);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 4fe1601dbc5d..86976b55ae74 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -33,7 +33,7 @@
 102:
 	.section .fixup,"ax"
 103:	addl %ecx,%edx			/* ecx is zerorest also */
-	jmp copy_user_handle_tail
+	jmp .Lcopy_user_handle_tail
 	.previous
 
 	_ASM_EXTABLE_UA(100b, 103b)
@@ -113,7 +113,7 @@ ENTRY(copy_user_generic_unrolled)
 40:	leal (%rdx,%rcx,8),%edx
 	jmp 60f
 50:	movl %ecx,%edx
-60:	jmp copy_user_handle_tail /* ecx is zerorest also */
+60:	jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
 	.previous
 
 	_ASM_EXTABLE_UA(1b, 30b)
@@ -177,7 +177,7 @@ ENTRY(copy_user_generic_string)
 	.section .fixup,"ax"
 11:	leal (%rdx,%rcx,8),%ecx
 12:	movl %ecx,%edx		/* ecx is zerorest also */
-	jmp copy_user_handle_tail
+	jmp .Lcopy_user_handle_tail
 	.previous
 
 	_ASM_EXTABLE_UA(1b, 11b)
@@ -210,7 +210,7 @@ ENTRY(copy_user_enhanced_fast_string)
 
 	.section .fixup,"ax"
 12:	movl %ecx,%edx		/* ecx is zerorest also */
-	jmp copy_user_handle_tail
+	jmp .Lcopy_user_handle_tail
 	.previous
 
 	_ASM_EXTABLE_UA(1b, 12b)
@@ -231,7 +231,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  * eax uncopied bytes or 0 if successful.
  */
 ALIGN;
-copy_user_handle_tail:
+.Lcopy_user_handle_tail:
 	movl %edx,%ecx
 1:	rep movsb
 2:	mov %ecx,%eax
@@ -239,7 +239,7 @@ copy_user_handle_tail:
 	ret
 
 	_ASM_EXTABLE_UA(1b, 2b)
-END(copy_user_handle_tail)
+END(.Lcopy_user_handle_tail)
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
@@ -364,7 +364,7 @@ ENTRY(__copy_user_nocache)
 	movl %ecx,%edx
 .L_fixup_handle_tail:
 	sfence
-	jmp copy_user_handle_tail
+	jmp .Lcopy_user_handle_tail
 	.previous
 
 	_ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index b7375dc6898f..c126571e5e2e 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -113,8 +113,8 @@ static void delay_mwaitx(unsigned long __loops)
 		__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
 		/*
-		 * AMD, like Intel, supports the EAX hint and EAX=0xf
-		 * means, do not enter any deep C-state and we use it
+		 * AMD, like Intel's MWAIT version, supports the EAX hint and
+		 * EAX=0xf0 means, do not enter any deep C-state and we use it
 		 * here in delay() to minimize wakeup latency.
 		 */
 		__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 304f958c27b2..9578eb88fc87 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -115,7 +115,7 @@ ENDPROC(__get_user_8)
 EXPORT_SYMBOL(__get_user_8)
 
 
-bad_get_user_clac:
+.Lbad_get_user_clac:
 	ASM_CLAC
 bad_get_user:
 	xor %edx,%edx
@@ -123,7 +123,7 @@ bad_get_user:
 	ret
 
 #ifdef CONFIG_X86_32
-bad_get_user_8_clac:
+.Lbad_get_user_8_clac:
 	ASM_CLAC
 bad_get_user_8:
 	xor %edx,%edx
@@ -132,12 +132,12 @@ bad_get_user_8:
 	ret
 #endif
 
-	_ASM_EXTABLE_UA(1b, bad_get_user_clac)
-	_ASM_EXTABLE_UA(2b, bad_get_user_clac)
-	_ASM_EXTABLE_UA(3b, bad_get_user_clac)
+	_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
+	_ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
+	_ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
 #ifdef CONFIG_X86_64
-	_ASM_EXTABLE_UA(4b, bad_get_user_clac)
+	_ASM_EXTABLE_UA(4b, .Lbad_get_user_clac)
 #else
-	_ASM_EXTABLE_UA(4b, bad_get_user_8_clac)
-	_ASM_EXTABLE_UA(5b, bad_get_user_8_clac)
+	_ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
+	_ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
 #endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 14bf78341d3c..126dd6a9ec9b 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -37,7 +37,7 @@
 ENTRY(__put_user_1)
 	ENTER
 	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
-	jae bad_put_user
+	jae .Lbad_put_user
 	ASM_STAC
 1:	movb %al,(%_ASM_CX)
 	xor %eax,%eax
@@ -51,7 +51,7 @@ ENTRY(__put_user_2)
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $1,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
-	jae bad_put_user
+	jae .Lbad_put_user
 	ASM_STAC
 2:	movw %ax,(%_ASM_CX)
 	xor %eax,%eax
@@ -65,7 +65,7 @@ ENTRY(__put_user_4)
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $3,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
-	jae bad_put_user
+	jae .Lbad_put_user
 	ASM_STAC
 3:	movl %eax,(%_ASM_CX)
 	xor %eax,%eax
@@ -79,7 +79,7 @@ ENTRY(__put_user_8)
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $7,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
-	jae bad_put_user
+	jae .Lbad_put_user
 	ASM_STAC
 4:	mov %_ASM_AX,(%_ASM_CX)
 #ifdef CONFIG_X86_32
@@ -91,16 +91,16 @@ ENTRY(__put_user_8)
 ENDPROC(__put_user_8)
 EXPORT_SYMBOL(__put_user_8)
 
-bad_put_user_clac:
+.Lbad_put_user_clac:
 	ASM_CLAC
-bad_put_user:
+.Lbad_put_user:
 	movl $-EFAULT,%eax
 	RET
 
-	_ASM_EXTABLE_UA(1b, bad_put_user_clac)
-	_ASM_EXTABLE_UA(2b, bad_put_user_clac)
-	_ASM_EXTABLE_UA(3b, bad_put_user_clac)
-	_ASM_EXTABLE_UA(4b, bad_put_user_clac)
+	_ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
+	_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
+	_ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
+	_ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
 #ifdef CONFIG_X86_32
-	_ASM_EXTABLE_UA(5b, bad_put_user_clac)
+	_ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
 #endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 63e99f15d7cf..a39dcdb5ae34 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -19,6 +19,7 @@
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
+#include <asm/efi.h>
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index fece30ca8b0c..9268c12458c8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -344,13 +344,11 @@ bool sme_active(void)
 {
 	return sme_me_mask && !sev_enabled;
 }
-EXPORT_SYMBOL(sme_active);
 
 bool sev_active(void)
 {
 	return sme_me_mask && sev_enabled;
 }
-EXPORT_SYMBOL(sev_active);
 
 /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
 bool force_dma_unencrypted(struct device *dev)
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index fa16036fa592..65ebe4b88f7c 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -54,23 +54,10 @@ static u64 get_subtree_max_end(struct rb_node *node)
 	return ret;
 }
 
-static u64 compute_subtree_max_end(struct memtype *data)
-{
-	u64 max_end = data->end, child_max_end;
-
-	child_max_end = get_subtree_max_end(data->rb.rb_right);
-	if (child_max_end > max_end)
-		max_end = child_max_end;
-
-	child_max_end = get_subtree_max_end(data->rb.rb_left);
-	if (child_max_end > max_end)
-		max_end = child_max_end;
-
-	return max_end;
-}
+#define NODE_END(node) ((node)->end)
 
-RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
-		     u64, subtree_max_end, compute_subtree_max_end)
+RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
+			 struct memtype, rb, u64, subtree_max_end, NODE_END)
 
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 44816ff6411f..3e4b9035bb9a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -45,7 +45,7 @@ early_param("userpte", setup_userpte);
 
 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
-	pgtable_page_dtor(pte);
+	pgtable_pte_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
 	paravirt_tlb_remove_table(tlb, pte);
 }
@@ -357,7 +357,7 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
 
 static struct kmem_cache *pgd_cache;
 
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
 {
 	/*
 	 * When PAE kernel is running as a Xen domain, it does not use
@@ -402,10 +402,6 @@ static inline void _pgd_free(pgd_t *pgd)
 }
 #else
 
-void __init pgd_cache_init(void)
-{
-}
-
 static inline pgd_t *_pgd_alloc(void)
 {
 	return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index b196524759ec..7f2140414440 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -330,13 +330,15 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
 
 		pud = pud_offset(p4d, addr);
 		if (pud_none(*pud)) {
-			addr += PUD_SIZE;
+			WARN_ON_ONCE(addr & ~PUD_MASK);
+			addr = round_up(addr + 1, PUD_SIZE);
 			continue;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
-			addr += PMD_SIZE;
+			WARN_ON_ONCE(addr & ~PMD_MASK);
+			addr = round_up(addr + 1, PMD_SIZE);
 			continue;
 		}
 
@@ -666,6 +668,8 @@ void __init pti_init(void)
  */
 void pti_finalize(void)
 {
+	if (!boot_cpu_has(X86_FEATURE_PTI))
+		return;
 	/*
 	 * We need to clone everything (again) that maps parts of the
 	 * kernel image.
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 0881e1ff1e58..a8bd952e136d 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mmiotrace.h>
+#include <linux/security.h>
 
 static unsigned long mmio_address;
 module_param_hw(mmio_address, ulong, iomem, 0);
@@ -115,6 +116,10 @@ static void do_test_bulk_ioremapping(void)
 static int __init init(void)
 {
 	unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
+	int ret = security_locked_down(LOCKDOWN_MMIOTRACE);
+
+	if (ret)
+		return ret;
 
 	if (mmio_address == 0) {
 		pr_err("you have to use the module argument mmio_address.\n");
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 7389db538c30..6fa42e9c4e6f 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -29,6 +29,7 @@
 static bool pci_mmcfg_running_state;
 static bool pci_mmcfg_arch_init_failed;
 static DEFINE_MUTEX(pci_mmcfg_lock);
+#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map)
 
 LIST_HEAD(pci_mmcfg_list);
 
@@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new)
 	struct pci_mmcfg_region *cfg;
 
 	/* keep list sorted by segment and starting bus number */
-	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
+	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) {
 		if (cfg->segment > new->segment ||
 		    (cfg->segment == new->segment &&
 		     cfg->start_bus >= new->start_bus)) {
@@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
 {
 	struct pci_mmcfg_region *cfg;
 
-	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held())
 		if (cfg->segment == segment &&
 		    cfg->start_bus <= bus && bus <= cfg->end_bus)
 			return cfg;
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 97bbc12dd6b2..6269a175385d 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -1,8 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * arch/x86/pci/sta2x11-fixup.c
- * glue code for lib/swiotlb.c and DMA translation between STA2x11
- * AMBA memory mapping and the X86 memory mapping
+ * DMA translation between STA2x11 AMBA memory mapping and the x86 memory mapping
  *
  * ST Microelectronics ConneXt (STA2X11/STA2X10)
  *
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a7189a3b4d70..425e025341db 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -59,11 +59,34 @@ static efi_system_table_t efi_systab __initdata;
 
 static efi_config_table_type_t arch_tables[] __initdata = {
 #ifdef CONFIG_X86_UV
-	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+	{UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
 #endif
 	{NULL_GUID, NULL, NULL},
 };
 
+static const unsigned long * const efi_tables[] = {
+	&efi.mps,
+	&efi.acpi,
+	&efi.acpi20,
+	&efi.smbios,
+	&efi.smbios3,
+	&efi.boot_info,
+	&efi.hcdp,
+	&efi.uga,
+#ifdef CONFIG_X86_UV
+	&uv_systab_phys,
+#endif
+	&efi.fw_vendor,
+	&efi.runtime,
+	&efi.config_table,
+	&efi.esrt,
+	&efi.properties_table,
+	&efi.mem_attr_table,
+#ifdef CONFIG_EFI_RCI2_TABLE
+	&rci2_table_phys,
+#endif
+};
+
 u64 efi_setup;		/* efi setup_data physical address */
 
 static int add_efi_memmap __initdata;
@@ -894,9 +917,6 @@ static void __init kexec_enter_virtual_mode(void)
 
 	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
 		runtime_code_page_mkexec();
-
-	/* clean DUMMY object */
-	efi_delete_dummy_variable();
 #endif
 }
 
@@ -1049,3 +1069,17 @@ static int __init arch_parse_efi_cmdline(char *str)
 	return 0;
 }
 early_param("efi", arch_parse_efi_cmdline);
+
+bool efi_is_table_address(unsigned long phys_addr)
+{
+	unsigned int i;
+
+	if (phys_addr == EFI_INVALID_TABLE_ADDR)
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+		if (*(efi_tables[i]) == phys_addr)
+			return true;
+
+	return false;
+}
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index 2e796b54cbde..9e2444500428 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 #include <linux/capability.h>
 #include <linux/pm_qos.h>
+#include <linux/wait.h>
 
 #include <asm/iosf_mbi.h>
 
@@ -201,23 +202,45 @@ EXPORT_SYMBOL(iosf_mbi_available);
 #define PUNIT_SEMAPHORE_BIT		BIT(0)
 #define PUNIT_SEMAPHORE_ACQUIRE		BIT(1)
 
-static DEFINE_MUTEX(iosf_mbi_punit_mutex);
-static DEFINE_MUTEX(iosf_mbi_block_punit_i2c_access_count_mutex);
+static DEFINE_MUTEX(iosf_mbi_pmic_access_mutex);
 static BLOCKING_NOTIFIER_HEAD(iosf_mbi_pmic_bus_access_notifier);
-static u32 iosf_mbi_block_punit_i2c_access_count;
+static DECLARE_WAIT_QUEUE_HEAD(iosf_mbi_pmic_access_waitq);
+static u32 iosf_mbi_pmic_punit_access_count;
+static u32 iosf_mbi_pmic_i2c_access_count;
 static u32 iosf_mbi_sem_address;
 static unsigned long iosf_mbi_sem_acquired;
 static struct pm_qos_request iosf_mbi_pm_qos;
 
 void iosf_mbi_punit_acquire(void)
 {
-	mutex_lock(&iosf_mbi_punit_mutex);
+	/* Wait for any I2C PMIC accesses from in kernel drivers to finish. */
+	mutex_lock(&iosf_mbi_pmic_access_mutex);
+	while (iosf_mbi_pmic_i2c_access_count != 0) {
+		mutex_unlock(&iosf_mbi_pmic_access_mutex);
+		wait_event(iosf_mbi_pmic_access_waitq,
+			   iosf_mbi_pmic_i2c_access_count == 0);
+		mutex_lock(&iosf_mbi_pmic_access_mutex);
+	}
+	/*
+	 * We do not need to do anything to allow the PUNIT to safely access
+	 * the PMIC, other then block in kernel accesses to the PMIC.
+	 */
+	iosf_mbi_pmic_punit_access_count++;
+	mutex_unlock(&iosf_mbi_pmic_access_mutex);
 }
 EXPORT_SYMBOL(iosf_mbi_punit_acquire);
 
 void iosf_mbi_punit_release(void)
 {
-	mutex_unlock(&iosf_mbi_punit_mutex);
+	bool do_wakeup;
+
+	mutex_lock(&iosf_mbi_pmic_access_mutex);
+	iosf_mbi_pmic_punit_access_count--;
+	do_wakeup = iosf_mbi_pmic_punit_access_count == 0;
+	mutex_unlock(&iosf_mbi_pmic_access_mutex);
+
+	if (do_wakeup)
+		wake_up(&iosf_mbi_pmic_access_waitq);
 }
 EXPORT_SYMBOL(iosf_mbi_punit_release);
 
@@ -256,34 +279,32 @@ static void iosf_mbi_reset_semaphore(void)
  * already blocked P-Unit accesses because it wants them blocked over multiple
  * i2c-transfers, for e.g. read-modify-write of an I2C client register.
  *
- * The P-Unit accesses already being blocked is tracked through the
- * iosf_mbi_block_punit_i2c_access_count variable which is protected by the
- * iosf_mbi_block_punit_i2c_access_count_mutex this mutex is hold for the
- * entire duration of the function.
- *
- * If access is not blocked yet, this function takes the following steps:
+ * To allow safe PMIC i2c bus accesses this function takes the following steps:
  *
  * 1) Some code sends request to the P-Unit which make it access the PMIC
  *    I2C bus. Testing has shown that the P-Unit does not check its internal
  *    PMIC bus semaphore for these requests. Callers of these requests call
  *    iosf_mbi_punit_acquire()/_release() around their P-Unit accesses, these
- *    functions lock/unlock the iosf_mbi_punit_mutex.
- *    As the first step we lock the iosf_mbi_punit_mutex, to wait for any in
- *    flight requests to finish and to block any new requests.
+ *    functions increase/decrease iosf_mbi_pmic_punit_access_count, so first
+ *    we wait for iosf_mbi_pmic_punit_access_count to become 0.
+ *
+ * 2) Check iosf_mbi_pmic_i2c_access_count, if access has already
+ *    been blocked by another caller, we only need to increment
+ *    iosf_mbi_pmic_i2c_access_count and we can skip the other steps.
  *
- * 2) Some code makes such P-Unit requests from atomic contexts where it
+ * 3) Some code makes such P-Unit requests from atomic contexts where it
  *    cannot call iosf_mbi_punit_acquire() as that may sleep.
  *    As the second step we call a notifier chain which allows any code
  *    needing P-Unit resources from atomic context to acquire them before
  *    we take control over the PMIC I2C bus.
  *
- * 3) When CPU cores enter C6 or C7 the P-Unit needs to talk to the PMIC
+ * 4) When CPU cores enter C6 or C7 the P-Unit needs to talk to the PMIC
  *    if this happens while the kernel itself is accessing the PMIC I2C bus
  *    the SoC hangs.
  *    As the third step we call pm_qos_update_request() to disallow the CPU
  *    to enter C6 or C7.
  *
- * 4) The P-Unit has a PMIC bus semaphore which we can request to stop
+ * 5) The P-Unit has a PMIC bus semaphore which we can request to stop
  *    autonomous P-Unit tasks from accessing the PMIC I2C bus while we hold it.
  *    As the fourth and final step we request this semaphore and wait for our
  *    request to be acknowledged.
@@ -297,12 +318,18 @@ int iosf_mbi_block_punit_i2c_access(void)
 	if (WARN_ON(!mbi_pdev || !iosf_mbi_sem_address))
 		return -ENXIO;
 
-	mutex_lock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+	mutex_lock(&iosf_mbi_pmic_access_mutex);
 
-	if (iosf_mbi_block_punit_i2c_access_count > 0)
+	while (iosf_mbi_pmic_punit_access_count != 0) {
+		mutex_unlock(&iosf_mbi_pmic_access_mutex);
+		wait_event(iosf_mbi_pmic_access_waitq,
+			   iosf_mbi_pmic_punit_access_count == 0);
+		mutex_lock(&iosf_mbi_pmic_access_mutex);
+	}
+
+	if (iosf_mbi_pmic_i2c_access_count > 0)
 		goto success;
 
-	mutex_lock(&iosf_mbi_punit_mutex);
 	blocking_notifier_call_chain(&iosf_mbi_pmic_bus_access_notifier,
 				     MBI_PMIC_BUS_ACCESS_BEGIN, NULL);
 
@@ -330,10 +357,6 @@ int iosf_mbi_block_punit_i2c_access(void)
 			iosf_mbi_sem_acquired = jiffies;
 			dev_dbg(&mbi_pdev->dev, "P-Unit semaphore acquired after %ums\n",
 				jiffies_to_msecs(jiffies - start));
-			/*
-			 * Success, keep iosf_mbi_punit_mutex locked till
-			 * iosf_mbi_unblock_punit_i2c_access() gets called.
-			 */
 			goto success;
 		}
 
@@ -344,15 +367,13 @@ int iosf_mbi_block_punit_i2c_access(void)
 	dev_err(&mbi_pdev->dev, "Error P-Unit semaphore timed out, resetting\n");
 error:
 	iosf_mbi_reset_semaphore();
-	mutex_unlock(&iosf_mbi_punit_mutex);
-
 	if (!iosf_mbi_get_sem(&sem))
 		dev_err(&mbi_pdev->dev, "P-Unit semaphore: %d\n", sem);
 success:
 	if (!WARN_ON(ret))
-		iosf_mbi_block_punit_i2c_access_count++;
+		iosf_mbi_pmic_i2c_access_count++;
 
-	mutex_unlock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+	mutex_unlock(&iosf_mbi_pmic_access_mutex);
 
 	return ret;
 }
@@ -360,17 +381,20 @@ EXPORT_SYMBOL(iosf_mbi_block_punit_i2c_access);
 
 void iosf_mbi_unblock_punit_i2c_access(void)
 {
-	mutex_lock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+	bool do_wakeup = false;
 
-	iosf_mbi_block_punit_i2c_access_count--;
-	if (iosf_mbi_block_punit_i2c_access_count == 0) {
+	mutex_lock(&iosf_mbi_pmic_access_mutex);
+	iosf_mbi_pmic_i2c_access_count--;
+	if (iosf_mbi_pmic_i2c_access_count == 0) {
 		iosf_mbi_reset_semaphore();
-		mutex_unlock(&iosf_mbi_punit_mutex);
 		dev_dbg(&mbi_pdev->dev, "punit semaphore held for %ums\n",
 			jiffies_to_msecs(jiffies - iosf_mbi_sem_acquired));
+		do_wakeup = true;
 	}
+	mutex_unlock(&iosf_mbi_pmic_access_mutex);
 
-	mutex_unlock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+	if (do_wakeup)
+		wake_up(&iosf_mbi_pmic_access_waitq);
 }
 EXPORT_SYMBOL(iosf_mbi_unblock_punit_i2c_access);
 
@@ -379,10 +403,10 @@ int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb)
 	int ret;
 
 	/* Wait for the bus to go inactive before registering */
-	mutex_lock(&iosf_mbi_punit_mutex);
+	iosf_mbi_punit_acquire();
 	ret = blocking_notifier_chain_register(
 				&iosf_mbi_pmic_bus_access_notifier, nb);
-	mutex_unlock(&iosf_mbi_punit_mutex);
+	iosf_mbi_punit_release();
 
 	return ret;
 }
@@ -403,9 +427,9 @@ int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb)
 	int ret;
 
 	/* Wait for the bus to go inactive before unregistering */
-	mutex_lock(&iosf_mbi_punit_mutex);
+	iosf_mbi_punit_acquire();
 	ret = iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(nb);
-	mutex_unlock(&iosf_mbi_punit_mutex);
+	iosf_mbi_punit_release();
 
 	return ret;
 }
@@ -413,7 +437,7 @@ EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier);
 
 void iosf_mbi_assert_punit_acquired(void)
 {
-	WARN_ON(!mutex_is_locked(&iosf_mbi_punit_mutex));
+	WARN_ON(iosf_mbi_pmic_punit_access_count == 0);
 }
 EXPORT_SYMBOL(iosf_mbi_assert_punit_acquired);
 
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 25ce1b3b0732..99a28ce2244c 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -157,6 +157,12 @@ static ssize_t lid_wake_mode_set(struct device *dev,
 static DEVICE_ATTR(lid_wake_mode, S_IWUSR | S_IRUGO, lid_wake_mode_show,
 		   lid_wake_mode_set);
 
+static struct attribute *lid_attrs[] = {
+	&dev_attr_lid_wake_mode.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(lid);
+
 /*
  * Process all items in the EC's SCI queue.
  *
@@ -510,17 +516,8 @@ static int setup_lid_switch(struct platform_device *pdev)
 		goto err_register;
 	}
 
-	r = device_create_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode);
-	if (r) {
-		dev_err(&pdev->dev, "failed to create wake mode attr: %d\n", r);
-		goto err_create_attr;
-	}
-
 	return 0;
 
-err_create_attr:
-	input_unregister_device(lid_switch_idev);
-	lid_switch_idev = NULL;
 err_register:
 	input_free_device(lid_switch_idev);
 	return r;
@@ -528,7 +525,6 @@ err_register:
 
 static void free_lid_switch(void)
 {
-	device_remove_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode);
 	input_unregister_device(lid_switch_idev);
 }
 
@@ -624,6 +620,7 @@ static int xo1_sci_remove(struct platform_device *pdev)
 static struct platform_driver xo1_sci_driver = {
 	.driver = {
 		.name = "olpc-xo1-sci-acpi",
+		.dev_groups = lid_groups,
 	},
 	.probe = xo1_sci_probe,
 	.remove = xo1_sci_remove,
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 7c69652ffeea..c2ee31953372 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -14,6 +14,8 @@
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
+unsigned long uv_systab_phys __ro_after_init = EFI_INVALID_TABLE_ADDR;
+
 struct uv_systab *uv_systab;
 
 static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -185,13 +187,13 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
 void uv_bios_init(void)
 {
 	uv_systab = NULL;
-	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
-	    !efi.uv_systab || efi_runtime_disabled()) {
+	if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
+	    !uv_systab_phys || efi_runtime_disabled()) {
 		pr_crit("UV: UVsystab: missing\n");
 		return;
 	}
 
-	uv_systab = ioremap(efi.uv_systab, sizeof(struct uv_systab));
+	uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
 	if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
 		pr_err("UV: UVsystab: bad signature!\n");
 		iounmap(uv_systab);
@@ -203,7 +205,7 @@ void uv_bios_init(void)
 		int size = uv_systab->size;
 
 		iounmap(uv_systab);
-		uv_systab = ioremap(efi.uv_systab, size);
+		uv_systab = ioremap(uv_systab_phys, size);
 		if (!uv_systab) {
 			pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
 			return;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 20c389a91b80..5f0a96bf27a1 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1804,9 +1804,9 @@ static void pq_init(int node, int pnode)
 
 	plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
 	vp = kmalloc_node(plsize, GFP_KERNEL, node);
-	pqp = (struct bau_pq_entry *)vp;
-	BUG_ON(!pqp);
+	BUG_ON(!vp);
 
+	pqp = (struct bau_pq_entry *)vp;
 	cp = (char *)pqp + 31;
 	pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
 
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index c9ef6a7a4a1a..915bb1639763 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -123,9 +123,6 @@ static void __save_processor_state(struct saved_context *ctxt)
 	ctxt->cr2 = read_cr2();
 	ctxt->cr3 = __read_cr3();
 	ctxt->cr4 = __read_cr4();
-#ifdef CONFIG_X86_64
-	ctxt->cr8 = read_cr8();
-#endif
 	ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
 					       &ctxt->misc_enable);
 	msr_save_context(ctxt);
@@ -208,7 +205,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 #else
 /* CONFIG X86_64 */
 	wrmsrl(MSR_EFER, ctxt->efer);
-	write_cr8(ctxt->cr8);
 	__write_cr4(ctxt->cr4);
 #endif
 	write_cr3(ctxt->cr3);
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 8901a1f89cf5..fb4ee5444379 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -9,46 +9,52 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 $(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
+$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
 targets += purgatory.ro
 
 KASAN_SANITIZE	:= n
 KCOV_INSTRUMENT := n
 
+# These are adjustments to the compiler flags used for objects that
+# make up the standalone purgatory.ro
+
+PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN)
+
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
 # sure how to relocate those.
 ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_sha256.o		+= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_purgatory.o	+= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_string.o		+= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_kexec-purgatory.o	+= $(CC_FLAGS_FTRACE)
+PURGATORY_CFLAGS_REMOVE		+= $(CC_FLAGS_FTRACE)
 endif
 
 ifdef CONFIG_STACKPROTECTOR
-CFLAGS_REMOVE_sha256.o		+= -fstack-protector
-CFLAGS_REMOVE_purgatory.o	+= -fstack-protector
-CFLAGS_REMOVE_string.o		+= -fstack-protector
-CFLAGS_REMOVE_kexec-purgatory.o	+= -fstack-protector
+PURGATORY_CFLAGS_REMOVE		+= -fstack-protector
 endif
 
 ifdef CONFIG_STACKPROTECTOR_STRONG
-CFLAGS_REMOVE_sha256.o		+= -fstack-protector-strong
-CFLAGS_REMOVE_purgatory.o	+= -fstack-protector-strong
-CFLAGS_REMOVE_string.o		+= -fstack-protector-strong
-CFLAGS_REMOVE_kexec-purgatory.o	+= -fstack-protector-strong
+PURGATORY_CFLAGS_REMOVE		+= -fstack-protector-strong
 endif
 
 ifdef CONFIG_RETPOLINE
-CFLAGS_REMOVE_sha256.o		+= $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_purgatory.o	+= $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_string.o		+= $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_kexec-purgatory.o	+= $(RETPOLINE_CFLAGS)
+PURGATORY_CFLAGS_REMOVE		+= $(RETPOLINE_CFLAGS)
 endif
 
+CFLAGS_REMOVE_purgatory.o	+= $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_purgatory.o		+= $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_sha256.o		+= $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_sha256.o			+= $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_string.o		+= $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_string.o			+= $(PURGATORY_CFLAGS)
+
 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
 
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index b607bda786f6..3b95410ff0f8 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/bug.h>
-#include <linux/sha256.h>
+#include <crypto/sha.h>
 #include <asm/purgatory.h>
 
 #include "../boot/string.h"
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index 30b0d30d861a..6363761cc74c 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -19,7 +19,6 @@ GLOBAL(real_mode_header)
 	.long	pa_ro_end
 	/* SMP trampoline */
 	.long	pa_trampoline_start
-	.long	pa_trampoline_status
 	.long	pa_trampoline_header
 #ifdef CONFIG_X86_64
 	.long	pa_trampoline_pgd;
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S
index 2dd866c9e21e..1868b158480d 100644
--- a/arch/x86/realmode/rm/trampoline_32.S
+++ b/arch/x86/realmode/rm/trampoline_32.S
@@ -41,9 +41,6 @@ ENTRY(trampoline_start)
 
 	movl	tr_start, %eax	# where we need to go
 
-	movl	$0xA5A5A5A5, trampoline_status
-				# write marker for master knows we're running
-
 	/*
 	 * GDT tables in non default location kernel can be beyond 16MB and
 	 * lgdt will not be able to load the address as in real mode default
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 24bb7598774e..aee2b45d83b8 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -49,9 +49,6 @@ ENTRY(trampoline_start)
 	mov	%ax, %es
 	mov	%ax, %ss
 
-	movl	$0xA5A5A5A5, trampoline_status
-	# write marker for master knows we're running
-
 	# Setup stack
 	movl	$rm_stack_end, %esp
 
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
index 7c706772ab59..8d8208dcca24 100644
--- a/arch/x86/realmode/rm/trampoline_common.S
+++ b/arch/x86/realmode/rm/trampoline_common.S
@@ -2,7 +2,3 @@
 	.section ".rodata","a"
 	.balign	16
 tr_idt: .fill 1, 6, 0
-
-	.bss
-	.balign	4
-GLOBAL(trampoline_status)	.space	4
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index f31e5d903161..165be7f9a964 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -2,14 +2,7 @@
 #ifndef _ASM_UM_BARRIER_H_
 #define _ASM_UM_BARRIER_H_
 
-#include <asm/asm.h>
-#include <asm/segment.h>
-#include <asm/cpufeatures.h>
-#include <asm/cmpxchg.h>
-#include <asm/nops.h>
-
-#include <linux/kernel.h>
-#include <linux/irqflags.h>
+#include <asm/alternative.h>
 
 /*
  * Force strict CPU ordering.
@@ -30,9 +23,6 @@
 
 #endif /* CONFIG_X86_32 */
 
-#define dma_rmb()	barrier()
-#define dma_wmb()	barrier()
-
 #include <asm-generic/barrier.h>
 
 #endif
diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
index ac9c02b9d92c..891868756a51 100644
--- a/arch/x86/um/vdso/um_vdso.c
+++ b/arch/x86/um/vdso/um_vdso.c
@@ -47,7 +47,7 @@ time_t __vdso_time(time_t *t)
 
 	return secs;
 }
-int time(time_t *t) __attribute__((weak, alias("__vdso_time")));
+time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
 
 long
 __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 0d3365cb64de..a04551ee5568 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -57,19 +57,7 @@ static efi_system_table_t __init *xen_efi_probe(void)
 		return NULL;
 
 	/* Here we know that Xen runs on EFI platform. */
-
-	efi.get_time                 = xen_efi_get_time;
-	efi.set_time                 = xen_efi_set_time;
-	efi.get_wakeup_time          = xen_efi_get_wakeup_time;
-	efi.set_wakeup_time          = xen_efi_set_wakeup_time;
-	efi.get_variable             = xen_efi_get_variable;
-	efi.get_next_variable        = xen_efi_get_next_variable;
-	efi.set_variable             = xen_efi_set_variable;
-	efi.query_variable_info      = xen_efi_query_variable_info;
-	efi.update_capsule           = xen_efi_update_capsule;
-	efi.query_capsule_caps       = xen_efi_query_capsule_caps;
-	efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
-	efi.reset_system             = xen_efi_reset_system;
+	xen_efi_runtime_setup();
 
 	efi_systab_xen.tables = info->cfg.addr;
 	efi_systab_xen.nr_tables = info->cfg.nent;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 750f46ad018a..205b1176084f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -269,19 +269,41 @@ void xen_reboot(int reason)
 		BUG();
 }
 
+static int reboot_reason = SHUTDOWN_reboot;
+static bool xen_legacy_crash;
 void xen_emergency_restart(void)
 {
-	xen_reboot(SHUTDOWN_reboot);
+	xen_reboot(reboot_reason);
 }
 
 static int
 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-	if (!kexec_crash_loaded())
-		xen_reboot(SHUTDOWN_crash);
+	if (!kexec_crash_loaded()) {
+		if (xen_legacy_crash)
+			xen_reboot(SHUTDOWN_crash);
+
+		reboot_reason = SHUTDOWN_crash;
+
+		/*
+		 * If panic_timeout==0 then we are supposed to wait forever.
+		 * However, to preserve original dom0 behavior we have to drop
+		 * into hypervisor. (domU behavior is controlled by its
+		 * config file)
+		 */
+		if (panic_timeout == 0)
+			panic_timeout = -1;
+	}
 	return NOTIFY_DONE;
 }
 
+static int __init parse_xen_legacy_crash(char *arg)
+{
+	xen_legacy_crash = true;
+	return 0;
+}
+early_param("xen_legacy_crash", parse_xen_legacy_crash);
+
 static struct notifier_block xen_panic_block = {
 	.notifier_call = xen_panic_event,
 	.priority = INT_MIN
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 7ceb32821093..5bfea374a160 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -117,6 +117,14 @@ static void __init xen_banner(void)
 	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
 	       version >> 16, version & 0xffff, extra.extraversion,
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+
+#ifdef CONFIG_X86_32
+	pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
+		"Support for running as 32-bit PV-guest under Xen will soon be removed\n"
+		"from the Linux kernel!\n"
+		"Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
+		"WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
+#endif
 }
 
 static void __init xen_pv_init_platform(void)
@@ -877,16 +885,6 @@ static void xen_write_cr4(unsigned long cr4)
 
 	native_write_cr4(cr4);
 }
-#ifdef CONFIG_X86_64
-static inline unsigned long xen_read_cr8(void)
-{
-	return 0;
-}
-static inline void xen_write_cr8(unsigned long val)
-{
-	BUG_ON(val);
-}
-#endif
 
 static u64 xen_read_msr_safe(unsigned int msr, int *err)
 {
@@ -1023,11 +1021,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 
 	.write_cr4 = xen_write_cr4,
 
-#ifdef CONFIG_X86_64
-	.read_cr8 = xen_read_cr8,
-	.write_cr8 = xen_write_cr8,
-#endif
-
 	.wbinvd = native_wbinvd,
 
 	.read_msr = xen_read_msr,
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 26e8b326966d..c8dbee62ec2a 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2625,7 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 	*dma_handle = virt_to_machine(vstart).maddr;
 	return success ? 0 : -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
 
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 {
@@ -2660,7 +2659,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 
 	spin_unlock_irqrestore(&xen_reservation_lock, flags);
 }
-EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
 static noinline void xen_flush_tlb_all(void)
 {