459 files changed, 10259 insertions, 5380 deletions
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index dd31e97edae8..396caece6d6d 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,6 +3,5 @@
 generated-y += syscall_table.h
 generic-y += agp.h
 generic-y += asm-offsets.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 3d7473531ab1..c80258ec332f 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -421,7 +421,7 @@ register_cpus(void)
 arch_initcall(register_cpus);
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_reboot_handler(int unused)
+static void sysrq_reboot_handler(u8 unused)
 {
 	machine_halt();
 }
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 6dc952b0df4a..d6139dbae4ac 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -129,9 +129,8 @@ srmcons_do_write(struct tty_port *port, const char *buf, int count)
 	return count;
 }
 
-static int
-srmcons_write(struct tty_struct *tty,
-	      const unsigned char *buf, int count)
+static ssize_t
+srmcons_write(struct tty_struct *tty, const u8 *buf, size_t count)
 {
 	unsigned long flags;
 
diff --git a/arch/alpha/lib/callback_srm.S b/arch/alpha/lib/callback_srm.S
index b13c4a231f1b..36b63f295170 100644
--- a/arch/alpha/lib/callback_srm.S
+++ b/arch/alpha/lib/callback_srm.S
@@ -3,8 +3,8 @@
  *	arch/alpha/lib/callback_srm.S
  */
 
+#include <linux/export.h>
 #include <asm/console.h>
-#include <asm/export.h>
 
 .text
 #define HWRPB_CRB_OFFSET 0xc0
diff --git a/arch/alpha/lib/clear_page.S b/arch/alpha/lib/clear_page.S
index ce02de7b0493..af70ee309a33 100644
--- a/arch/alpha/lib/clear_page.S
+++ b/arch/alpha/lib/clear_page.S
@@ -4,7 +4,7 @@
  *
  * Zero an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 	.align 4
 	.global clear_page
diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S
index db6c6ca45896..848eb60a0010 100644
--- a/arch/alpha/lib/clear_user.S
+++ b/arch/alpha/lib/clear_user.S
@@ -10,7 +10,7 @@
  * a successful copy).  There is also some rather minor exception setup
  * stuff.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)			\
diff --git a/arch/alpha/lib/copy_page.S b/arch/alpha/lib/copy_page.S
index 5439a30c77d0..1c444fdad9a5 100644
--- a/arch/alpha/lib/copy_page.S
+++ b/arch/alpha/lib/copy_page.S
@@ -4,7 +4,7 @@
  *
  * Copy an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 	.align 4
 	.global copy_page
diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S
index 32ab0344b185..ef18faafcad6 100644
--- a/arch/alpha/lib/copy_user.S
+++ b/arch/alpha/lib/copy_user.S
@@ -12,7 +12,7 @@
  * exception setup stuff..
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)			\
diff --git a/arch/alpha/lib/csum_ipv6_magic.S b/arch/alpha/lib/csum_ipv6_magic.S
index c7b213ab01ab..273c426c3859 100644
--- a/arch/alpha/lib/csum_ipv6_magic.S
+++ b/arch/alpha/lib/csum_ipv6_magic.S
@@ -13,7 +13,7 @@
  * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.globl csum_ipv6_magic
 	.align 4
 	.ent csum_ipv6_magic
diff --git a/arch/alpha/lib/divide.S b/arch/alpha/lib/divide.S
index 2b60eb45e50b..db01840d76ec 100644
--- a/arch/alpha/lib/divide.S
+++ b/arch/alpha/lib/divide.S
@@ -46,7 +46,7 @@
  *	$28 - compare status
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #define halt .long 0
 
 /*
diff --git a/arch/alpha/lib/ev6-clear_page.S b/arch/alpha/lib/ev6-clear_page.S
index 325864c81586..a534d9ff7161 100644
--- a/arch/alpha/lib/ev6-clear_page.S
+++ b/arch/alpha/lib/ev6-clear_page.S
@@ -4,7 +4,7 @@
  *
  * Zero an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
         .text
         .align 4
         .global clear_page
diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S
index 7e644f83cdf2..af776cc45f91 100644
--- a/arch/alpha/lib/ev6-clear_user.S
+++ b/arch/alpha/lib/ev6-clear_user.S
@@ -29,7 +29,7 @@
  *	want to leave a hole (and we also want to avoid repeating lots of work)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)			\
 	99: x,##y;			\
diff --git a/arch/alpha/lib/ev6-copy_page.S b/arch/alpha/lib/ev6-copy_page.S
index fd7212c8dcf1..36be5113b7b7 100644
--- a/arch/alpha/lib/ev6-copy_page.S
+++ b/arch/alpha/lib/ev6-copy_page.S
@@ -57,7 +57,7 @@
    destination pages are in the dcache, but it is my guess that this is
    less important than the dcache miss case.  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 	.align 4
 	.global copy_page
diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S
index f3e433754397..b9b19710c364 100644
--- a/arch/alpha/lib/ev6-copy_user.S
+++ b/arch/alpha/lib/ev6-copy_user.S
@@ -23,7 +23,7 @@
  *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)			\
 	99: x,##y;			\
diff --git a/arch/alpha/lib/ev6-csum_ipv6_magic.S b/arch/alpha/lib/ev6-csum_ipv6_magic.S
index 9a73f90700a1..2ee548be98e3 100644
--- a/arch/alpha/lib/ev6-csum_ipv6_magic.S
+++ b/arch/alpha/lib/ev6-csum_ipv6_magic.S
@@ -53,7 +53,7 @@
  * may cause additional delay in rare cases (load-load replay traps).
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.globl csum_ipv6_magic
 	.align 4
 	.ent csum_ipv6_magic
diff --git a/arch/alpha/lib/ev6-divide.S b/arch/alpha/lib/ev6-divide.S
index 137ff1a07356..b73a6d26362e 100644
--- a/arch/alpha/lib/ev6-divide.S
+++ b/arch/alpha/lib/ev6-divide.S
@@ -56,7 +56,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #define halt .long 0
 
 /*
diff --git a/arch/alpha/lib/ev6-memchr.S b/arch/alpha/lib/ev6-memchr.S
index 56bf9e14eeee..f75ba43e61e3 100644
--- a/arch/alpha/lib/ev6-memchr.S
+++ b/arch/alpha/lib/ev6-memchr.S
@@ -28,7 +28,7 @@
  *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-#include <asm/export.h>
+#include <linux/export.h>
         .set noreorder
         .set noat
 
diff --git a/arch/alpha/lib/ev6-memcpy.S b/arch/alpha/lib/ev6-memcpy.S
index ffbd056b6eb2..3ef43c26c8af 100644
--- a/arch/alpha/lib/ev6-memcpy.S
+++ b/arch/alpha/lib/ev6-memcpy.S
@@ -20,7 +20,7 @@
  * Temp usage notes:
  *	$1,$2,		- scratch
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.set noreorder
 	.set noat
 
diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S
index 1cfcfbbea6f0..89d7809da4cc 100644
--- a/arch/alpha/lib/ev6-memset.S
+++ b/arch/alpha/lib/ev6-memset.S
@@ -27,7 +27,7 @@
  * as fixes will need to be made in multiple places.  The performance gain
  * is worth it.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.set noat
 	.set noreorder
 .text
diff --git a/arch/alpha/lib/ev67-strcat.S b/arch/alpha/lib/ev67-strcat.S
index ec3096a9e8d4..f8c7305b11d6 100644
--- a/arch/alpha/lib/ev67-strcat.S
+++ b/arch/alpha/lib/ev67-strcat.S
@@ -20,7 +20,7 @@
  * string once.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 
 	.align 4
diff --git a/arch/alpha/lib/ev67-strchr.S b/arch/alpha/lib/ev67-strchr.S
index fbf89e0b6dc3..97a7cb475309 100644
--- a/arch/alpha/lib/ev67-strchr.S
+++ b/arch/alpha/lib/ev67-strchr.S
@@ -16,7 +16,7 @@
  *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
 	.set noreorder
diff --git a/arch/alpha/lib/ev67-strlen.S b/arch/alpha/lib/ev67-strlen.S
index b73106ffbbc7..3d9078807ab4 100644
--- a/arch/alpha/lib/ev67-strlen.S
+++ b/arch/alpha/lib/ev67-strlen.S
@@ -18,7 +18,7 @@
  *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
  *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.set noreorder
 	.set noat
 
diff --git a/arch/alpha/lib/ev67-strncat.S b/arch/alpha/lib/ev67-strncat.S
index ceb0ca528789..8f313233e3a7 100644
--- a/arch/alpha/lib/ev67-strncat.S
+++ b/arch/alpha/lib/ev67-strncat.S
@@ -21,7 +21,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 
 	.align 4
diff --git a/arch/alpha/lib/ev67-strrchr.S b/arch/alpha/lib/ev67-strrchr.S
index 7f80e398530f..ae7355f9ec56 100644
--- a/arch/alpha/lib/ev67-strrchr.S
+++ b/arch/alpha/lib/ev67-strrchr.S
@@ -19,7 +19,7 @@
  *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
 	.set noreorder
diff --git a/arch/alpha/lib/memchr.S b/arch/alpha/lib/memchr.S
index c13d3eca2e05..45366e32feee 100644
--- a/arch/alpha/lib/memchr.S
+++ b/arch/alpha/lib/memchr.S
@@ -31,7 +31,7 @@ For correctness consider that:
       - only minimum number of quadwords may be accessed
       - the third argument is an unsigned long
 */
-#include <asm/export.h>
+#include <linux/export.h>
         .set noreorder
         .set noat
 
diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S
index 42d1922d0edf..3a27689e3390 100644
--- a/arch/alpha/lib/memmove.S
+++ b/arch/alpha/lib/memmove.S
@@ -7,7 +7,7 @@
  * This is hand-massaged output from the original memcpy.c.  We defer to
  * memcpy whenever possible; the backwards copy loops are not unrolled.
  */
-#include <asm/export.h>        
+#include <linux/export.h>
 	.set noat
 	.set noreorder
 	.text
diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S
index 00393e30df25..9075d6918346 100644
--- a/arch/alpha/lib/memset.S
+++ b/arch/alpha/lib/memset.S
@@ -14,7 +14,7 @@
  * The scheduling comments are according to the EV5 documentation (and done by 
  * hand, so they might well be incorrect, please do tell me about it..)
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.set noat
 	.set noreorder
 .text
diff --git a/arch/alpha/lib/strcat.S b/arch/alpha/lib/strcat.S
index 055877dccd27..62b90ebbcf44 100644
--- a/arch/alpha/lib/strcat.S
+++ b/arch/alpha/lib/strcat.S
@@ -5,7 +5,7 @@
  *
  * Append a null-terminated string from SRC to DST.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 
 	.text
 
diff --git a/arch/alpha/lib/strchr.S b/arch/alpha/lib/strchr.S
index 17871dd00280..68c54ff50dfe 100644
--- a/arch/alpha/lib/strchr.S
+++ b/arch/alpha/lib/strchr.S
@@ -6,7 +6,7 @@
  * Return the address of a given character within a null-terminated
  * string, or null if it is not found.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
 	.set noreorder
diff --git a/arch/alpha/lib/strcpy.S b/arch/alpha/lib/strcpy.S
index cb74ad23a90d..d8773ba77525 100644
--- a/arch/alpha/lib/strcpy.S
+++ b/arch/alpha/lib/strcpy.S
@@ -6,7 +6,7 @@
  * Copy a null-terminated string from SRC to DST.  Return a pointer
  * to the null-terminator in the source.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 
 	.align 3
diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S
index dd882fe4d7e3..4fc6a6ff24cd 100644
--- a/arch/alpha/lib/strlen.S
+++ b/arch/alpha/lib/strlen.S
@@ -12,7 +12,7 @@
  *	  do this instead of the 9 instructions that
  *	  binary search needs).
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.set noreorder
 	.set noat
 
diff --git a/arch/alpha/lib/strncat.S b/arch/alpha/lib/strncat.S
index 522fee3e26ac..a913a7c84a39 100644
--- a/arch/alpha/lib/strncat.S
+++ b/arch/alpha/lib/strncat.S
@@ -10,7 +10,7 @@
  * past count, whereas libc may write to count+1.  This follows the generic
  * implementation in lib/string.c and is, IMHO, more sensible.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 
 	.align 3
diff --git a/arch/alpha/lib/strncpy.S b/arch/alpha/lib/strncpy.S
index cc57fad8b7ca..cb90cf022df3 100644
--- a/arch/alpha/lib/strncpy.S
+++ b/arch/alpha/lib/strncpy.S
@@ -11,7 +11,7 @@
  * version has cropped that bit o' nastiness as well as assuming that
  * __stxncpy is in range of a branch.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 	.set noat
 	.set noreorder
 
diff --git a/arch/alpha/lib/strrchr.S b/arch/alpha/lib/strrchr.S
index 7650ba99b7e2..dd8e073b6cf2 100644
--- a/arch/alpha/lib/strrchr.S
+++ b/arch/alpha/lib/strrchr.S
@@ -6,7 +6,7 @@
  * Return the address of the last occurrence of a given character
  * within a null-terminated string, or null if it is not found.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
 	.set noreorder
diff --git a/arch/alpha/lib/udiv-qrnnd.S b/arch/alpha/lib/udiv-qrnnd.S
index b887aa5428e5..96f05918bffe 100644
--- a/arch/alpha/lib/udiv-qrnnd.S
+++ b/arch/alpha/lib/udiv-qrnnd.S
@@ -25,7 +25,7 @@
  # along with GCC; see the file COPYING.  If not, write to the 
  # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  # MA 02111-1307, USA.
-#include <asm/export.h>
+#include <linux/export.h>
 
         .set noreorder
         .set noat
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 6f4995ad9873..3162db540ee9 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -27,6 +27,8 @@ config ARC
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_IOREMAP
+	select GENERIC_STRNCPY_FROM_USER if MMU
+	select GENERIC_STRNLEN_USER if MMU
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
@@ -491,11 +493,11 @@ config ARC_KVADDR_SIZE
 	  kernel-user gutter)
 
 config ARC_CURR_IN_REG
-	bool "Dedicate Register r25 for current_task pointer"
+	bool "cache current task pointer in gp"
 	default y
 	help
-	  This reserved Register R25 to point to Current Task in
-	  kernel mode. This saves memory access for each such access
+	  This reserves gp register to point to Current Task in
+	  kernel mode eliding memory access for each access
 
 
 config ARC_EMUL_UNALIGNED
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 329400a1c355..2390dd042e36 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -28,14 +28,14 @@ cflags-y				+= $(tune-mcpu-def-y)
 endif
 endif
 
-
 ifdef CONFIG_ARC_CURR_IN_REG
 # For a global register definition, make sure it gets passed to every file
 # We had a customer reported bug where some code built in kernel was NOT using
-# any kernel headers, and missing the r25 global register
+# any kernel headers, and missing the global register
 # Can't do unconditionally because of recursive include issues
 # due to <linux/thread_info.h>
 LINUXINCLUDE	+=  -include $(srctree)/arch/arc/include/asm/current.h
+cflags-y	+= -ffixed-gp
 endif
 
 cflags-y				+= -fsection-anchors
@@ -67,7 +67,7 @@ cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables $(cfi)
 # small data is default for elf32 tool-chain. If not usable, disable it
 # This also allows repurposing GP as scratch reg to gcc reg allocator
 disable_small_data := y
-cflags-$(disable_small_data)		+= -mno-sdata -fcall-used-gp
+cflags-$(disable_small_data)		+= -mno-sdata
 
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= -mbig-endian
 ldflags-$(CONFIG_CPU_BIG_ENDIAN)	+= -EB
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 2162023195c5..4b13f60fe7ca 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -23,7 +23,7 @@
 #define ARC_REG_ICCM_BUILD	0x78	/* ICCM size (common) */
 #define ARC_REG_XY_MEM_BCR	0x79
 #define ARC_REG_MAC_BCR		0x7a
-#define ARC_REG_MUL_BCR		0x7b
+#define ARC_REG_MPY_BCR		0x7b
 #define ARC_REG_SWAP_BCR	0x7c
 #define ARC_REG_NORM_BCR	0x7d
 #define ARC_REG_MIXMAX_BCR	0x7e
@@ -177,7 +177,7 @@ struct bcr_isa_arcv2 {
 #endif
 };
 
-struct bcr_uarch_build_arcv2 {
+struct bcr_uarch_build {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int pad:8, prod:8, maj:8, min:8;
 #else
@@ -185,6 +185,59 @@ struct bcr_uarch_build_arcv2 {
 #endif
 };
 
+struct bcr_mmu_3 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
+		     u_itlb:4, u_dtlb:4;
+#else
+	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
+		     ways:4, ver:8;
+#endif
+};
+
+struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+	/*           DTLB      ITLB      JES        JE         JA      */
+	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+};
+
+struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+	unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+};
+
+struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+	unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+};
+
+struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+	unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+};
+
+struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+	unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+};
+
 struct bcr_mpy {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
@@ -302,48 +355,6 @@ struct bcr_generic {
 #endif
 };
 
-/*
- *******************************************************************
- * Generic structures to hold build configuration used at runtime
- */
-
-struct cpuinfo_arc_mmu {
-	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
-	unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
-};
-
-struct cpuinfo_arc_cache {
-	unsigned int sz_k:14, line_len:8, assoc:4, alias:1, vipt:1, pad:4;
-};
-
-struct cpuinfo_arc_bpu {
-	unsigned int ver, full, num_cache, num_pred, ret_stk;
-};
-
-struct cpuinfo_arc_ccm {
-	unsigned int base_addr, sz;
-};
-
-struct cpuinfo_arc {
-	struct cpuinfo_arc_cache icache, dcache, slc;
-	struct cpuinfo_arc_mmu mmu;
-	struct cpuinfo_arc_bpu bpu;
-	struct bcr_identity core;
-	struct bcr_isa_arcv2 isa;
-	const char *release, *name;
-	unsigned int vec_base;
-	struct cpuinfo_arc_ccm iccm, dccm;
-	struct {
-		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
-			     fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
-			     ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
-			     timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
-	} extn;
-	struct bcr_mpy extn_mpy;
-};
-
-extern struct cpuinfo_arc cpuinfo_arc700[];
-
 static inline int is_isa_arcv2(void)
 {
 	return IS_ENABLED(CONFIG_ISA_ARCV2);
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
index 1b0ffaeee16d..5258cb81a16b 100644
--- a/arch/arc/include/asm/atomic-llsc.h
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v)			\
 	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
 	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
 	  [i]	"ir"	(i)						\
-	: "cc");							\
+	: "cc", "memory");						\
 }									\
 
 #define ATOMIC_OP_RETURN(op, asm_op)				\
@@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 	: [val]	"=&r"	(val)						\
 	: [ctr]	"r"	(&v->counter),					\
 	  [i]	"ir"	(i)						\
-	: "cc");							\
+	: "cc", "memory");						\
 									\
 	return val;							\
 }
@@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 	  [orig] "=&r" (orig)						\
 	: [ctr]	"r"	(&v->counter),					\
 	  [i]	"ir"	(i)						\
-	: "cc");							\
+	: "cc", "memory");						\
 									\
 	return orig;							\
 }
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index 6b6db981967a..9b5791b85471 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
 	"	bnz     1b		\n"				\
 	: "=&r"(val)							\
 	: "r"(&v->counter), "ir"(a)					\
-	: "cc");							\
+	: "cc", "memory");						\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
@@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
 	"	bnz     1b		\n"				\
 	: [val] "=&r"(val)						\
 	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
+	: "cc", "memory");						\
 									\
 	return val;							\
 }
@@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
 	"	bnz     1b		\n"				\
 	: "=&r"(orig), "=&r"(val)					\
 	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
+	: "cc", "memory");						\
 									\
 	return orig;							\
 }
diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h
index 9b9bdd3e6538..06be89f6f2f0 100644
--- a/arch/arc/include/asm/current.h
+++ b/arch/arc/include/asm/current.h
@@ -13,7 +13,7 @@
 
 #ifdef CONFIG_ARC_CURR_IN_REG
 
-register struct task_struct *curr_arc asm("r25");
+register struct task_struct *curr_arc asm("gp");
 #define current (curr_arc)
 
 #else
diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h
index 5f4de05bd4ee..a0d5ebe1bc3f 100644
--- a/arch/arc/include/asm/dwarf.h
+++ b/arch/arc/include/asm/dwarf.h
@@ -10,23 +10,31 @@
 
 #ifdef ARC_DW2_UNWIND_AS_CFI
 
-#define CFI_STARTPROC	.cfi_startproc
-#define CFI_ENDPROC	.cfi_endproc
-#define CFI_DEF_CFA	.cfi_def_cfa
-#define CFI_REGISTER	.cfi_register
-#define CFI_REL_OFFSET	.cfi_rel_offset
-#define CFI_UNDEFINED	.cfi_undefined
+#define CFI_STARTPROC		.cfi_startproc
+#define CFI_ENDPROC		.cfi_endproc
+#define CFI_DEF_CFA		.cfi_def_cfa
+#define CFI_DEF_CFA_OFFSET	.cfi_def_cfa_offset
+#define CFI_DEF_CFA_REGISTER	.cfi_def_cfa_register
+#define CFI_OFFSET		.cfi_offset
+#define CFI_REL_OFFSET		.cfi_rel_offset
+#define CFI_REGISTER		.cfi_register
+#define CFI_RESTORE		.cfi_restore
+#define CFI_UNDEFINED		.cfi_undefined
 
 #else
 
 #define CFI_IGNORE	#
 
-#define CFI_STARTPROC	CFI_IGNORE
-#define CFI_ENDPROC	CFI_IGNORE
-#define CFI_DEF_CFA	CFI_IGNORE
-#define CFI_REGISTER	CFI_IGNORE
-#define CFI_REL_OFFSET	CFI_IGNORE
-#define CFI_UNDEFINED	CFI_IGNORE
+#define CFI_STARTPROC		CFI_IGNORE
+#define CFI_ENDPROC		CFI_IGNORE
+#define CFI_DEF_CFA		CFI_IGNORE
+#define CFI_DEF_CFA_OFFSET	CFI_IGNORE
+#define CFI_DEF_CFA_REGISTER	CFI_IGNORE
+#define CFI_OFFSET		CFI_IGNORE
+#define CFI_REL_OFFSET		CFI_IGNORE
+#define CFI_REGISTER		CFI_IGNORE
+#define CFI_RESTORE		CFI_IGNORE
+#define CFI_UNDEFINED		CFI_IGNORE
 
 #endif	/* !ARC_DW2_UNWIND_AS_CFI */
 
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 0ff4c0610561..4d13320e0c1b 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -18,7 +18,6 @@
  *              |      orig_r0      |
  *              |      event/ECR    |
  *              |      bta          |
- *              |      user_r25     |
  *              |      gp           |
  *              |      fp           |
  *              |      sp           |
@@ -49,14 +48,18 @@
 /*------------------------------------------------------------------------*/
 .macro INTERRUPT_PROLOGUE
 
-	; (A) Before jumping to Interrupt Vector, hardware micro-ops did following:
+	; Before jumping to Interrupt Vector, hardware micro-ops did following:
 	;   1. SP auto-switched to kernel mode stack
 	;   2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0)
 	;   3. Auto save: (mandatory) Push PC and STAT32 on stack
 	;                 hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE
-	;   4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
+	;  4a. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
 	;
-	; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair
+	; Now
+	;  4b. If Auto-save (optional) not enabled in hw, manually save them
+	;   5. Manually save: r12,r30, sp,fp,gp, ACCL pair
+	;
+	; At the end, SP points to pt_regs
 
 #ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
 	; carve pt_regs on stack (case #3), PC/STAT32 already on stack
@@ -72,15 +75,16 @@
 .endm
 
 /*------------------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
-	; (A) Before jumping to Exception Vector, hardware micro-ops did following:
+	; Before jumping to Exception Vector, hardware micro-ops did following:
 	;   1. SP auto-switched to kernel mode stack
 	;   2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0)
 	;
-	; (B) Manually save the complete reg file below
+	; Now manually save rest of reg file
+	; At the end, SP points to pt_regs
 
-	sub	sp, sp, SZ_PT_REGS	; carve pt_regs
+	sub	sp, sp, SZ_PT_REGS	; carve space for pt_regs
 
 	; _HARD saves r10 clobbered by _SOFT as scratch hence comes first
 
@@ -100,6 +104,16 @@
 	; OUTPUT: r10 has ECR expected by EV_Trap
 .endm
 
+.macro EXCEPTION_PROLOGUE
+
+	EXCEPTION_PROLOGUE_KEEP_AE	; return ECR in r10
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN		; clobbers r9
+.endm
+
 /*------------------------------------------------------------------------
  * This macro saves the registers manually which would normally be autosaved
  * by hardware on taken interrupts. It is used by
@@ -135,10 +149,10 @@
  */
 .macro __SAVE_REGFILE_SOFT
 
-	ST2	gp, fp, PT_r26		; gp (r26), fp (r27)
-
-	st	r12, [sp, PT_sp + 4]
-	st	r30, [sp, PT_sp + 8]
+	st	fp,  [sp, PT_fp]	; r27
+	st	r30, [sp, PT_r30]
+	st	r12, [sp, PT_r12]
+	st	r26, [sp, PT_r26]	; gp
 
 	; Saving pt_regs->sp correctly requires some extra work due to the way
 	; Auto stack switch works
@@ -153,30 +167,30 @@
 
 	; ISA requires ADD.nz to have same dest and src reg operands
 	mov.nz	r10, sp
-	add.nz	r10, r10, SZ_PT_REGS	; K mode SP
+	add2.nz	r10, r10, SZ_PT_REGS/4	; K mode SP
 
 	st	r10, [sp, PT_sp]	; SP (pt_regs->sp)
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	st	r25, [sp, PT_user_r25]
-	GET_CURR_TASK_ON_CPU	r25
-#endif
-
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
 	ST2	r58, r59, PT_r58
 #endif
 
 	/* clobbers r10, r11 registers pair */
 	DSP_SAVE_REGFILE_IRQ
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	GET_CURR_TASK_ON_CPU	gp
+#endif
+
 .endm
 
 /*------------------------------------------------------------------------*/
 .macro __RESTORE_REGFILE_SOFT
 
-	LD2	gp, fp, PT_r26		; gp (r26), fp (r27)
-
-	ld	r12, [sp, PT_r12]
+	ld	fp,  [sp, PT_fp]
 	ld	r30, [sp, PT_r30]
+	ld	r12, [sp, PT_r12]
+	ld	r26, [sp, PT_r26]
 
 	; Restore SP (into AUX_USER_SP) only if returning to U mode
 	;  - for K mode, it will be implicitly restored as stack is unwound
@@ -188,10 +202,6 @@
 	sr	r10, [AUX_USER_SP]
 1:
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	ld	r25, [sp, PT_user_r25]
-#endif
-
 	/* clobbers r10, r11 registers pair */
 	DSP_RESTORE_REGFILE_IRQ
 
@@ -249,7 +259,7 @@
 
 	btst	r0, STATUS_U_BIT	; Z flag set if K, used in restoring SP
 
-	ld	r10, [sp, PT_event + 4]
+	ld	r10, [sp, PT_bta]
 	sr	r10, [erbta]
 
 	LD2	r10, r11, PT_ret
@@ -264,8 +274,8 @@
 
 .macro FAKE_RET_FROM_EXCPN
 	lr      r9, [status32]
-	bic     r9, r9, STATUS_AE_MASK
-	or      r9, r9, STATUS_IE_MASK
+	bclr    r9, r9, STATUS_AE_BIT
+	bset    r9, r9, STATUS_IE_BIT
 	kflag   r9
 .endm
 
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 67ff06e15cea..a0e760eb35a8 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -140,7 +140,7 @@
  *
  * After this it is safe to call the "C" handlers
  *-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
 	/* Need at least 1 reg to code the early exception prologue */
 	PROLOG_FREEUP_REG r9, @ex_saved_reg1
@@ -151,14 +151,6 @@
 	/* ARC700 doesn't provide auto-stack switching */
 	SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	/* Treat r25 as scratch reg (save on stack) and load with "current" */
-	PUSH    r25
-	GET_CURR_TASK_ON_CPU   r25
-#else
-	sub     sp, sp, 4
-#endif
-
 	st.a	r0, [sp, -8]    /* orig_r0 needed for syscall (skip ECR slot) */
 	sub	sp, sp, 4	/* skip pt_regs->sp, already saved above */
 
@@ -178,7 +170,23 @@
 	PUSHAX	erbta
 
 	lr	r10, [ecr]
-	st      r10, [sp, PT_event]    /* EV_Trap expects r10 to have ECR */
+	st      r10, [sp, PT_event]
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/* gp already saved on stack: now load with "current" */
+	GET_CURR_TASK_ON_CPU   gp
+#endif
+	; OUTPUT: r10 has ECR expected by EV_Trap
+.endm
+
+.macro EXCEPTION_PROLOGUE
+
+	EXCEPTION_PROLOGUE_KEEP_AE	; return ECR in r10
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN		; clobbers r9
 .endm
 
 /*--------------------------------------------------------------
@@ -208,11 +216,8 @@
 	POP	gp
 	RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	ld	r25, [sp, 12]
-#endif
 	ld  sp, [sp] /* restore original sp */
-	/* orig_r0, ECR, user_r25 skipped automatically */
+	/* orig_r0, ECR skipped automatically */
 .endm
 
 /* Dummy ECR values for Interrupts */
@@ -229,13 +234,6 @@
 
 	SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	/* Treat r25 as scratch reg (save on stack) and load with "current" */
-	PUSH    r25
-	GET_CURR_TASK_ON_CPU   r25
-#else
-	sub     sp, sp, 4
-#endif
 
 	PUSH	0x003\LVL\()abcd    /* Dummy ECR */
 	sub	sp, sp, 8	    /* skip orig_r0 (not needed)
@@ -255,6 +253,10 @@
 	PUSHAX	lp_start
 	PUSHAX	bta_l\LVL\()
 
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/* gp already saved on stack: now load with "current" */
+	GET_CURR_TASK_ON_CPU   gp
+#endif
 .endm
 
 /*--------------------------------------------------------------
@@ -282,11 +284,7 @@
 	POP	gp
 	RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	ld	r25, [sp, 12]
-#endif
-	ld  sp, [sp] /* restore original sp */
-	/* orig_r0, ECR, user_r25 skipped automatically */
+	ld  sp, [sp] /* restore original sp; orig_r0, ECR skipped implicitly */
 .endm
 
 /* Get thread_info of "current" tsk */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index fcdd59d77f42..49c2e090cb5c 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -13,6 +13,8 @@
 #include <asm/processor.h>	/* For VMALLOC_START */
 #include <asm/mmu.h>
 
+#ifdef __ASSEMBLY__
+
 #ifdef CONFIG_ISA_ARCOMPACT
 #include <asm/entry-compact.h>	/* ISA specific bits */
 #else
@@ -89,7 +91,7 @@
  * Helpers to save/restore callee-saved regs:
  * used by several macros below
  *-------------------------------------------------------------*/
-.macro SAVE_R13_TO_R24
+.macro SAVE_R13_TO_R25
 	PUSH	r13
 	PUSH	r14
 	PUSH	r15
@@ -102,9 +104,11 @@
 	PUSH	r22
 	PUSH	r23
 	PUSH	r24
+	PUSH	r25
 .endm
 
-.macro RESTORE_R24_TO_R13
+.macro RESTORE_R25_TO_R13
+	POP	r25
 	POP	r24
 	POP	r23
 	POP	r22
@@ -119,81 +123,31 @@
 	POP	r13
 .endm
 
-/*--------------------------------------------------------------
- * Collect User Mode callee regs as struct callee_regs - needed by
- * fork/do_signal/unaligned-access-emulation.
- * (By default only scratch regs are saved on entry to kernel)
- *
- * Special handling for r25 if used for caching Task Pointer.
- * It would have been saved in task->thread.user_r25 already, but to keep
- * the interface same it is copied into regular r25 placeholder in
- * struct callee_regs.
- *-------------------------------------------------------------*/
+/*
+ * save user mode callee regs as struct callee_regs
+ *  - needed by fork/do_signal/unaligned-access-emulation.
+ */
 .macro SAVE_CALLEE_SAVED_USER
+	SAVE_R13_TO_R25
+.endm
 
-	mov	r12, sp		; save SP as ref to pt_regs
-	SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	; Retrieve orig r25 and save it with rest of callee_regs
-	ld	r12, [r12, PT_user_r25]
-	PUSH	r12
-#else
-	PUSH	r25
-#endif
-
+/*
+ * restore user mode callee regs as struct callee_regs
+ *  - could have been changed by ptrace tracer or unaligned-access fixup
+ */
+.macro RESTORE_CALLEE_SAVED_USER
+	RESTORE_R25_TO_R13
 .endm
 
-/*--------------------------------------------------------------
- * Save kernel Mode callee regs at the time of Contect Switch.
- *
- * Special handling for r25 if used for caching Task Pointer.
- * Kernel simply skips saving it since it will be loaded with
- * incoming task pointer anyways
- *-------------------------------------------------------------*/
+/*
+ * save/restore kernel mode callee regs at the time of context switch
+ */
 .macro SAVE_CALLEE_SAVED_KERNEL
-
-	SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	sub     sp, sp, 4
-#else
-	PUSH	r25
-#endif
+	SAVE_R13_TO_R25
 .endm
 
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_KERNEL
- *-------------------------------------------------------------*/
 .macro RESTORE_CALLEE_SAVED_KERNEL
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	add     sp, sp, 4  /* skip usual r25 placeholder */
-#else
-	POP	r25
-#endif
-	RESTORE_R24_TO_R13
-.endm
-
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_USER
- *
- * ptrace tracer or unaligned-access fixup might have changed a user mode
- * callee reg which is saved back to usual r25 storage location
- *-------------------------------------------------------------*/
-.macro RESTORE_CALLEE_SAVED_USER
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	POP	r12
-#else
-	POP	r25
-#endif
-	RESTORE_R24_TO_R13
-
-	; SP is back to start of pt_regs
-#ifdef CONFIG_ARC_CURR_IN_REG
-	st	r12, [sp, PT_user_r25]
-#endif
+	RESTORE_R25_TO_R13
 .endm
 
 /*--------------------------------------------------------------
@@ -229,10 +183,10 @@
 
 #ifdef CONFIG_SMP
 
-/*-------------------------------------------------
+/*
  * Retrieve the current running task on this CPU
- * 1. Determine curr CPU id.
- * 2. Use it to index into _current_task[ ]
+ *  - loads it from backing _current_task[] (and can't use the
+ *    caching reg for current task
  */
 .macro  GET_CURR_TASK_ON_CPU   reg
 	GET_CPU_ID  \reg
@@ -254,7 +208,7 @@
 	add2 \tmp, @_current_task, \tmp
 	st   \tsk, [\tmp]
 #ifdef CONFIG_ARC_CURR_IN_REG
-	mov r25, \tsk
+	mov gp, \tsk
 #endif
 
 .endm
@@ -269,21 +223,20 @@
 .macro  SET_CURR_TASK_ON_CPU    tsk, tmp
 	st  \tsk, [@_current_task]
 #ifdef CONFIG_ARC_CURR_IN_REG
-	mov r25, \tsk
+	mov gp, \tsk
 #endif
 .endm
 
 #endif /* SMP / UNI */
 
-/* ------------------------------------------------------------------
+/*
  * Get the ptr to some field of Current Task at @off in task struct
- *  -Uses r25 for Current task ptr if that is enabled
+ *  - Uses current task cached in reg if enabled
  */
-
 #ifdef CONFIG_ARC_CURR_IN_REG
 
 .macro GET_CURR_TASK_FIELD_PTR  off,  reg
-	add \reg, r25, \off
+	add \reg, gp, \off
 .endm
 
 #else
@@ -295,4 +248,23 @@
 
 #endif	/* CONFIG_ARC_CURR_IN_REG */
 
+#else	/* !__ASSEMBLY__ */
+
+extern void do_signal(struct pt_regs *);
+extern void do_notify_resume(struct pt_regs *);
+extern int do_privilege_fault(unsigned long, struct pt_regs *);
+extern int do_extension_fault(unsigned long, struct pt_regs *);
+extern int insterror_is_error(unsigned long, struct pt_regs *);
+extern int do_memory_error(unsigned long, struct pt_regs *);
+extern int trap_is_brkpt(unsigned long, struct pt_regs *);
+extern int do_misaligned_error(unsigned long, struct pt_regs *);
+extern int do_trap5_error(unsigned long, struct pt_regs *);
+extern int do_misaligned_access(unsigned long, struct pt_regs *, struct callee_regs *);
+extern void do_machine_check_fault(unsigned long, struct pt_regs *);
+extern void do_non_swi_trap(unsigned long, struct pt_regs *);
+extern void do_insterror_or_kprobe(unsigned long, struct pt_regs *);
+extern void do_page_fault(unsigned long, struct pt_regs *);
+
+#endif
+
 #endif  /* __ASM_ARC_ENTRY_H */
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 0309cb405cfb..c574712ad865 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -25,5 +25,6 @@
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
+extern void arch_do_IRQ(unsigned int, struct pt_regs *);
 
 #endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index ca427c30f70e..9febf5bc3de6 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -14,6 +14,8 @@ typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
+extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *);
+
 #endif
 
 #include <asm/mmu-arcv2.h>
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index fb844fce1ab6..d606658e2fe7 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -22,7 +22,6 @@
  * struct thread_info
  */
 struct thread_struct {
-	unsigned long ksp;	/* kernel mode stack pointer */
 	unsigned long callee_reg;	/* pointer to callee regs */
 	unsigned long fault_address;	/* dbls as brkpt holder as well */
 #ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
@@ -33,9 +32,7 @@ struct thread_struct {
 #endif
 };
 
-#define INIT_THREAD  {                          \
-	.ksp = sizeof(init_stack) + (unsigned long) init_stack, \
-}
+#define INIT_THREAD  { }
 
 /* Forward declaration, a strange C thing */
 struct task_struct;
@@ -56,7 +53,7 @@ struct task_struct;
  * Where about of Task's sp, fp, blink when it was last seen in kernel mode.
  * Look in process.c for details of kernel stack layout
  */
-#define TSK_K_ESP(tsk)		(tsk->thread.ksp)
+#define TSK_K_ESP(tsk)		(task_thread_info(tsk)->ksp)
 
 #define TSK_K_REG(tsk, off)	(*((unsigned long *)(TSK_K_ESP(tsk) + \
 					sizeof(struct callee_regs) + off)))
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 5869a74c0db2..4a2b30fb5a98 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -12,6 +12,17 @@
 
 #ifndef __ASSEMBLY__
 
+typedef union {
+	struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned long state:8, vec:8, cause:8, param:8;
+#else
+		unsigned long param:8, cause:8, vec:8, state:8;
+#endif
+	};
+	unsigned long full;
+} ecr_reg;
+
 /* THE pt_regs: Defines how regs are saved during entry into kernel */
 
 #ifdef CONFIG_ISA_ARCOMPACT
@@ -40,23 +51,10 @@ struct pt_regs {
 	 * 	Last word used by Linux for extra state mgmt (syscall-restart)
 	 * For interrupts, use artificial ECR values to note current prio-level
 	 */
-	union {
-		struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-			unsigned long state:8, ecr_vec:8,
-				      ecr_cause:8, ecr_param:8;
-#else
-			unsigned long ecr_param:8, ecr_cause:8,
-				      ecr_vec:8, state:8;
-#endif
-		};
-		unsigned long event;
-	};
-
-	unsigned long user_r25;
+	ecr_reg ecr;
 };
 
-#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25)
+#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr)
 
 #else
 
@@ -64,28 +62,14 @@ struct pt_regs {
 
 	unsigned long orig_r0;
 
-	union {
-		struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-			unsigned long state:8, ecr_vec:8,
-				      ecr_cause:8, ecr_param:8;
-#else
-			unsigned long ecr_param:8, ecr_cause:8,
-				      ecr_vec:8, state:8;
-#endif
-		};
-		unsigned long event;
-	};
-
-	unsigned long bta;	/* bta_l1, bta_l2, erbta */
+	ecr_reg ecr;		/* Exception Cause Reg */
 
-	unsigned long user_r25;
+	unsigned long bta;	/* erbta */
 
-	unsigned long r26;	/* gp */
 	unsigned long fp;
-	unsigned long sp;	/* user/kernel sp depending on where we came from  */
-
-	unsigned long r12, r30;
+	unsigned long r30;
+	unsigned long r12;
+	unsigned long r26;	/* gp */
 
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
 	unsigned long r58, r59;	/* ACCL/ACCH used by FPU / DSP MPY */
@@ -94,6 +78,8 @@ struct pt_regs {
 	unsigned long DSP_CTRL;
 #endif
 
+	unsigned long sp;	/* user/kernel sp depending on entry  */
+
 	/*------- Below list auto saved by h/w -----------*/
 	unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
 
@@ -134,13 +120,13 @@ struct callee_regs {
 /* return 1 if PC in delay slot */
 #define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK)
 
-#define in_syscall(regs)    ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param)
-#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param)
+#define in_syscall(regs)    ((regs->ecr.vec == ECR_V_TRAP) && !regs->ecr.param)
+#define in_brkpt_trap(regs) ((regs->ecr.vec == ECR_V_TRAP) && regs->ecr.param)
 
 #define STATE_SCALL_RESTARTED	0x01
 
-#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED)
-#define syscall_restartable(reg) !(reg->state &  STATE_SCALL_RESTARTED)
+#define syscall_wont_restart(regs) (regs->ecr.state |= STATE_SCALL_RESTARTED)
+#define syscall_restartable(regs) !(regs->ecr.state &  STATE_SCALL_RESTARTED)
 
 #define current_pt_regs()					\
 ({								\
@@ -181,6 +167,9 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
 	return *(unsigned long *)((unsigned long)regs + offset);
 }
 
+extern int syscall_trace_entry(struct pt_regs *);
+extern void syscall_trace_exit(struct pt_regs *);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PTRACE_H */
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 028a8cf76206..1c6db599e1fc 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -35,11 +35,11 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL3(v, v2, s)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
 extern void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_mmu_bcr(void);
+extern int arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 
 extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
+extern int arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+
+extern void __init handle_uboot_args(void);
 
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index d856491606ac..e0913f52c2cd 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -29,6 +29,8 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void __init smp_init_cpus(void);
 extern void first_lines_of_secondary(void);
 extern const char *arc_platform_smp_cpuinfo(void);
+extern void arc_platform_smp_wait_to_boot(int);
+extern void start_kernel_secondary(void);
 
 /*
  * API expected BY platform smp code (FROM arch smp code)
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 6ba7fe417095..4c530cf131f3 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -37,16 +37,16 @@
  */
 struct thread_info {
 	unsigned long flags;		/* low level flags */
+	unsigned long ksp;		/* kernel mode stack top in __switch_to */
 	int preempt_count;		/* 0 => preemptable, <0 => BUG */
-	struct task_struct *task;	/* main task structure */
-	__u32 cpu;			/* current CPU */
+	int cpu;			/* current CPU */
 	unsigned long thr_ptr;		/* TLS ptr */
+	struct task_struct *task;	/* main task structure */
 };
 
 /*
- * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
+ * initilaize thread_info for any @tsk
+ *  - this is not related to init_task per se
  */
 #define INIT_THREAD_INFO(tsk)			\
 {						\
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 99712471c96a..1e8809ea000a 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -146,8 +146,9 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 	if (n == 0)
 		return 0;
 
-	/* unaligned */
-	if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+	/* fallback for unaligned access when hardware doesn't support */
+	if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+	     (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
 		unsigned char tmp;
 
@@ -373,8 +374,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	if (n == 0)
 		return 0;
 
-	/* unaligned */
-	if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+	/* fallback for unaligned access when hardware doesn't support */
+	if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+	     (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
 		unsigned char tmp;
 
@@ -584,7 +586,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return res;
 }
 
-static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
+static inline unsigned long __clear_user(void __user *to, unsigned long n)
 {
 	long res = n;
 	unsigned char *d_char = to;
@@ -626,17 +628,10 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
 	return res;
 }
 
-#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
-#define __clear_user(d, n)		__arc_clear_user(d, n)
-#else
-extern unsigned long arc_clear_user_noinline(void __user *to,
-		unsigned long n);
-#define __clear_user(d, n)		arc_clear_user_noinline(d, n)
-#endif
+#define __clear_user			__clear_user
 
 #include <asm-generic/uaccess.h>
 
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 0723d888ac44..95fbf9364c67 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -5,6 +5,8 @@
 
 obj-y	:= head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
 obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
+obj-y	+= ctx_sw_asm.o
+
 obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
 
@@ -24,11 +26,4 @@ ifdef CONFIG_ISA_ARCOMPACT
 CFLAGS_fpu.o   += -mdpfp
 endif
 
-ifdef CONFIG_ARC_DW2_UNWIND
-CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
-obj-y += ctx_sw.o
-else
-obj-y += ctx_sw_asm.o
-endif
-
 extra-y := vmlinux.lds
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 0e884036ab74..f77deb799175 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -20,13 +20,13 @@ int main(void)
 
 	BLANK();
 
-	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
 	DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
 	DEFINE(THREAD_FAULT_ADDR,
 	       offsetof(struct thread_struct, fault_address));
 
 	BLANK();
 
+	DEFINE(THREAD_INFO_KSP, offsetof(struct thread_info, ksp));
 	DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(THREAD_INFO_PREEMPT_COUNT,
 	       offsetof(struct thread_info, preempt_count));
@@ -46,7 +46,8 @@ int main(void)
 	BLANK();
 
 	DEFINE(PT_status32, offsetof(struct pt_regs, status32));
-	DEFINE(PT_event, offsetof(struct pt_regs, event));
+	DEFINE(PT_event, offsetof(struct pt_regs, ecr));
+	DEFINE(PT_bta, offsetof(struct pt_regs, bta));
 	DEFINE(PT_sp, offsetof(struct pt_regs, sp));
 	DEFINE(PT_r0, offsetof(struct pt_regs, r0));
 	DEFINE(PT_r1, offsetof(struct pt_regs, r1));
@@ -61,13 +62,9 @@ int main(void)
 	DEFINE(PT_r26, offsetof(struct pt_regs, r26));
 	DEFINE(PT_ret, offsetof(struct pt_regs, ret));
 	DEFINE(PT_blink, offsetof(struct pt_regs, blink));
+	OFFSET(PT_fp, pt_regs, fp);
 	DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end));
 	DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count));
-	DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
-
-	DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
-	DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
-
 #ifdef CONFIG_ISA_ARCV2
 	OFFSET(PT_r12, pt_regs, r12);
 	OFFSET(PT_r30, pt_regs, r30);
@@ -80,5 +77,8 @@ int main(void)
 	OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
 #endif
 
+	DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
+	DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+
 	return 0;
 }
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
deleted file mode 100644
index 1a76f2d6f694..000000000000
--- a/arch/arc/kernel/ctx_sw.c
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: Aug 2009
- *  -"C" version of lowest level context switch asm macro called by schedular
- *   gcc doesn't generate the dward CFI info for hand written asm, hence can't
- *   backtrace out of it (e.g. tasks sleeping in kernel).
- *   So we cheat a bit by writing almost similar code in inline-asm.
- *  -This is a hacky way of doing things, but there is no other simple way.
- *   I don't want/intend to extend unwinding code to understand raw asm
- */
-
-#include <asm/asm-offsets.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-
-#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
-
-struct task_struct *__sched
-__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
-{
-	unsigned int tmp;
-	unsigned int prev = (unsigned int)prev_task;
-	unsigned int next = (unsigned int)next_task;
-
-	__asm__ __volatile__(
-		/* FP/BLINK save generated by gcc (standard function prologue */
-		"st.a    r13, [sp, -4]   \n\t"
-		"st.a    r14, [sp, -4]   \n\t"
-		"st.a    r15, [sp, -4]   \n\t"
-		"st.a    r16, [sp, -4]   \n\t"
-		"st.a    r17, [sp, -4]   \n\t"
-		"st.a    r18, [sp, -4]   \n\t"
-		"st.a    r19, [sp, -4]   \n\t"
-		"st.a    r20, [sp, -4]   \n\t"
-		"st.a    r21, [sp, -4]   \n\t"
-		"st.a    r22, [sp, -4]   \n\t"
-		"st.a    r23, [sp, -4]   \n\t"
-		"st.a    r24, [sp, -4]   \n\t"
-#ifndef CONFIG_ARC_CURR_IN_REG
-		"st.a    r25, [sp, -4]   \n\t"
-#else
-		"sub     sp, sp, 4      \n\t"	/* usual r25 placeholder */
-#endif
-
-		/* set ksp of outgoing task in tsk->thread.ksp */
-#if KSP_WORD_OFF <= 255
-		"st.as   sp, [%3, %1]    \n\t"
-#else
-		/*
-		 * Workaround for NR_CPUS=4k
-		 * %1 is bigger than 255 (S9 offset for st.as)
-		 */
-		"add2    r24, %3, %1     \n\t"
-		"st      sp, [r24]       \n\t"
-#endif
-
-		/*
-		 * setup _current_task with incoming tsk.
-		 * optionally, set r25 to that as well
-		 * For SMP extra work to get to &_current_task[cpu]
-		 * (open coded SET_CURR_TASK_ON_CPU)
-		 */
-#ifndef CONFIG_SMP
-		"st  %2, [@_current_task]	\n\t"
-#else
-		"lr   r24, [identity]		\n\t"
-		"lsr  r24, r24, 8		\n\t"
-		"bmsk r24, r24, 7		\n\t"
-		"add2 r24, @_current_task, r24	\n\t"
-		"st   %2,  [r24]		\n\t"
-#endif
-#ifdef CONFIG_ARC_CURR_IN_REG
-		"mov r25, %2   \n\t"
-#endif
-
-		/* get ksp of incoming task from tsk->thread.ksp */
-		"ld.as  sp, [%2, %1]   \n\t"
-
-		/* start loading it's CALLEE reg file */
-
-#ifndef CONFIG_ARC_CURR_IN_REG
-		"ld.ab   r25, [sp, 4]   \n\t"
-#else
-		"add    sp, sp, 4       \n\t"
-#endif
-		"ld.ab   r24, [sp, 4]   \n\t"
-		"ld.ab   r23, [sp, 4]   \n\t"
-		"ld.ab   r22, [sp, 4]   \n\t"
-		"ld.ab   r21, [sp, 4]   \n\t"
-		"ld.ab   r20, [sp, 4]   \n\t"
-		"ld.ab   r19, [sp, 4]   \n\t"
-		"ld.ab   r18, [sp, 4]   \n\t"
-		"ld.ab   r17, [sp, 4]   \n\t"
-		"ld.ab   r16, [sp, 4]   \n\t"
-		"ld.ab   r15, [sp, 4]   \n\t"
-		"ld.ab   r14, [sp, 4]   \n\t"
-		"ld.ab   r13, [sp, 4]   \n\t"
-
-		/* last (ret value) = prev : although for ARC it mov r0, r0 */
-		"mov     %0, %3        \n\t"
-
-		/* FP/BLINK restore generated by gcc (standard func epilogue */
-
-		: "=r"(tmp)
-		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
-		: "blink"
-	);
-
-	return (struct task_struct *)tmp;
-}
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index 02c461484761..48e1f21976ed 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -11,50 +11,54 @@
 #include <asm/entry.h>       /* For the SAVE_* macros */
 #include <asm/asm-offsets.h>
 
-#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
-
-;################### Low Level Context Switch ##########################
+; IN
+;  - r0: prev task (also current)
+;  - r1: next task
+; OUT
+;  - r0: prev task (so r0 not touched)
 
 	.section .sched.text,"ax",@progbits
-	.align 4
-	.global __switch_to
-	.type   __switch_to, @function
-__switch_to:
-	CFI_STARTPROC
-
-	/* Save regs on kernel mode stack of task */
-	st.a    blink, [sp, -4]
-	st.a    fp, [sp, -4]
-	SAVE_CALLEE_SAVED_KERNEL
+ENTRY_CFI(__switch_to)
 
-	/* Save the now KSP in task->thread.ksp */
-#if KSP_WORD_OFF  <= 255
-	st.as  sp, [r0, KSP_WORD_OFF]
-#else
-	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
-	add2	r24, r0, KSP_WORD_OFF
-	st	sp, [r24]
-#endif
-	/*
-	* Return last task in r0 (return reg)
-	* On ARC, Return reg = First Arg reg = r0.
-	* Since we already have last task in r0,
-	* don't need to do anything special to return it
-	*/
+	/* save kernel stack frame regs of @prev task */
+	push	blink
+	CFI_DEF_CFA_OFFSET 4
+	CFI_OFFSET r31, -4
+
+	push	fp
+	CFI_DEF_CFA_OFFSET 8
+	CFI_OFFSET r27, -8
+
+	mov	fp, sp
+	CFI_DEF_CFA_REGISTER r27
+
+	/* kernel mode callee regs of @prev */
+	SAVE_CALLEE_SAVED_KERNEL
 
 	/*
-	 * switch to new task, contained in r1
-	 * Temp reg r3 is required to get the ptr to store val
+	 * save final SP to @prev->thread_info.ksp
+	 * @prev is "current" so thread_info derived from SP
 	 */
-	SET_CURR_TASK_ON_CPU  r1, r3
+	GET_CURR_THR_INFO_FROM_SP  r10
+	st	sp,  [r10, THREAD_INFO_KSP]
+
+	/* update @next in _current_task[] and GP register caching it */
+	SET_CURR_TASK_ON_CPU  r1, r10
 
-	/* reload SP with kernel mode stack pointer in task->thread.ksp */
-	ld.as  sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
+	/* load SP from @next->thread_info.ksp */
+	ld	r10, [r1, TASK_THREAD_INFO]
+	ld	sp,  [r10, THREAD_INFO_KSP]
 
-	/* restore the registers */
+	/* restore callee regs, stack frame regs of @next */
 	RESTORE_CALLEE_SAVED_KERNEL
-	ld.ab   fp, [sp, 4]
-	ld.ab   blink, [sp, 4]
-	j       [blink]
 
+	pop	fp
+	CFI_RESTORE r27
+	CFI_DEF_CFA r28, 4
+
+	pop	blink
+	CFI_RESTORE r31
+	CFI_DEF_CFA_OFFSET 0
+
+	j      [blink]
 END_CFI(__switch_to)
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index 721d465f1580..4c9e61457b2f 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -12,6 +12,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <asm/mach_desc.h>
+#include <asm/serial.h>
 
 #ifdef CONFIG_SERIAL_EARLYCON
 
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index a7e6a2174187..2e49c81c8086 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -125,11 +125,6 @@ ENTRY(mem_service)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_memory_error
 	b   ret_from_exception
 END(mem_service)
@@ -138,11 +133,6 @@ ENTRY(EV_Misaligned)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]	; Faulting Data address
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	SAVE_CALLEE_SAVED_USER
 	mov r2, sp              ; callee_regs
 
@@ -163,11 +153,6 @@ ENTRY(EV_TLBProtV)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]	; Faulting Data address
-	mov r1, sp	; pt_regs
-
-	FAKE_RET_FROM_EXCPN
-
 	mov blink, ret_from_exception
 	b   do_page_fault
 
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 5cb0cd7e4eab..774c03cc1d1a 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -254,18 +254,7 @@ END(handle_interrupt_level1)
 
 ENTRY(EV_TLBProtV)
 
-	EXCEPTION_PROLOGUE
-
-	mov r2, r10	; ECR set into r10 already
-	lr  r0, [efa]	; Faulting Data address (not part of pt_regs saved above)
-
-	; Exception auto-disables further Intr/exceptions.
-	; Re-enable them by pretending to return from exception
-	; (so rest of handler executes in pure K mode)
-
-	FAKE_RET_FROM_EXCPN
-
-	mov   r1, sp	; Handle to pt_regs
+	EXCEPTION_PROLOGUE	; ECR returned in r10
 
 	;------ (5) Type of Protection Violation? ----------
 	;
@@ -273,8 +262,7 @@ ENTRY(EV_TLBProtV)
 	;   -Access Violation	: 00_23_(00|01|02|03)_00
 	;			         x  r  w  r+w
 	;   -Unaligned Access	: 00_23_04_00
-	;
-	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+	bbit1 r10, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
 
 	;========= (6a) Access Violation Processing ========
 	bl  do_page_fault
@@ -303,9 +291,6 @@ END(EV_TLBProtV)
 ENTRY(call_do_page_fault)
 
 	EXCEPTION_PROLOGUE
-	lr  r0, [efa]	; Faulting Data address
-	mov   r1, sp
-	FAKE_RET_FROM_EXCPN
 
 	mov blink, ret_from_exception
 	b  do_page_fault
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 54e91df678dd..089f6680518f 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -80,11 +80,6 @@ ENTRY(instr_service)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_insterror_or_kprobe
 	b   ret_from_exception
 END(instr_service)
@@ -95,16 +90,15 @@ END(instr_service)
 
 ENTRY(EV_MachineCheck)
 
-	EXCEPTION_PROLOGUE
+	EXCEPTION_PROLOGUE_KEEP_AE	; ECR returned in r10
 
-	lr  r2, [ecr]
 	lr  r0, [efa]
 	mov r1, sp
 
 	; MC excpetions disable MMU
 	ARC_MMU_REENABLE r3
 
-	lsr  	r3, r2, 8
+	lsr  	r3, r10, 8
 	bmsk 	r3, r3, 7
 	brne    r3, ECR_C_MCHK_DUP_TLB, 1f
 
@@ -129,11 +123,6 @@ ENTRY(EV_PrivilegeV)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_privilege_fault
 	b   ret_from_exception
 END(EV_PrivilegeV)
@@ -145,11 +134,6 @@ ENTRY(EV_Extension)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_extension_fault
 	b   ret_from_exception
 END(EV_Extension)
@@ -160,20 +144,19 @@ END(EV_Extension)
 ; syscall Tracing
 ; ---------------------------------------------
 tracesys:
-	; save EFA in case tracer wants the PC of traced task
-	; using ERET won't work since next-PC has already committed
+	; safekeep EFA (r12) if syscall tracer wanted PC
+	; for traps, ERET is pre-commit so points to next-PC
 	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
 	st  r12, [r11, THREAD_FAULT_ADDR]	; thread.fault_address
 
-	; PRE Sys Call Ptrace hook
-	mov r0, sp			; pt_regs needed
-	bl  @syscall_trace_entry
+	; PRE syscall trace hook
+	mov r0, sp				; pt_regs
+	bl  @syscall_trace_enter
 
 	; Tracing code now returns the syscall num (orig or modif)
 	mov r8, r0
 
 	; Do the Sys Call as we normally would.
-	; Validate the Sys Call number
 	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     tracesys_exit
@@ -190,37 +173,36 @@ tracesys:
 	ld  r6, [sp, PT_r6]
 	ld  r7, [sp, PT_r7]
 	ld.as   r9, [sys_call_table, r8]
-	jl      [r9]        ; Entry into Sys Call Handler
+	jl      [r9]
 
 tracesys_exit:
-	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+	st  r0, [sp, PT_r0]
 
-	;POST Sys Call Ptrace Hook
+	; POST syscall trace hook
 	mov r0, sp		; pt_regs needed
 	bl  @syscall_trace_exit
-	b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
-	; we'd done before calling post hook above
+
+	; don't call ret_from_system_call as it saves r0, already done above
+	b   ret_from_exception
 
 ; ---------------------------------------------
 ; Breakpoint TRAP
 ; ---------------------------------------------
 trap_with_param:
 	mov r0, r12	; EFA in case ptracer/gdb wants stop_pc
-	mov r1, sp
+	mov r1, sp	; pt_regs
 
-	; Save callee regs in case gdb wants to have a look
-	; SP will grow up by size of CALLEE Reg-File
-	; NOTE: clobbers r12
+	; save callee regs in case tracer/gdb wants to peek
 	SAVE_CALLEE_SAVED_USER
 
-	; save location of saved Callee Regs @ thread_struct->pc
+	; safekeep ref to callee regs
 	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
 	st  sp, [r10, THREAD_CALLEE_REG]
 
-	; Call the trap handler
+	; call the non syscall trap handler
 	bl  do_non_swi_trap
 
-	; unwind stack to discard Callee saved Regs
+	; unwind stack to discard callee regs
 	DISCARD_CALLEE_SAVED_USER
 
 	b   ret_from_exception
@@ -232,37 +214,33 @@ trap_with_param:
 
 ENTRY(EV_Trap)
 
-	EXCEPTION_PROLOGUE
+	EXCEPTION_PROLOGUE_KEEP_AE
 
 	lr  r12, [efa]
 
 	FAKE_RET_FROM_EXCPN
 
-	;============ TRAP 1   :breakpoints
-	; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
+	;============ TRAP N : breakpoints, kprobes etc
 	bmsk.f 0, r10, 7
 	bnz    trap_with_param
 
-	;============ TRAP  (no param): syscall top level
+	;============ TRAP 0 (no param): syscall
 
-	; If syscall tracing ongoing, invoke pre-post-hooks
+	; syscall tracing ongoing, invoke pre-post-hooks around syscall
 	GET_CURR_THR_INFO_FLAGS   r10
 	and.f 0, r10, _TIF_SYSCALL_WORK
 	bnz   tracesys  ; this never comes back
 
 	;============ Normal syscall case
 
-	; syscall num shd not exceed the total system calls avail
 	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     .Lret_from_system_call
 
-	; Offset into the syscall_table and call handler
 	ld.as   r9,[sys_call_table, r8]
-	jl      [r9]        ; Entry into Sys Call Handler
+	jl      [r9]
 
 .Lret_from_system_call:
-
 	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
 
 	; fall through to ret_from_exception
@@ -318,7 +296,7 @@ resume_user_mode_begin:
 	;      tracer might call PEEKUSR(CALLEE reg)
 	;
 	; NOTE: SP will grow up by size of CALLEE Reg-File
-	SAVE_CALLEE_SAVED_USER		; clobbers r12
+	SAVE_CALLEE_SAVED_USER
 
 	; save location of saved Callee Regs @ thread_struct->callee
 	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 5cda19d0aa91..678898757e47 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -108,7 +108,7 @@ static void arcv2_irq_unmask(struct irq_data *data)
 	write_aux_reg(AUX_IRQ_ENABLE, 1);
 }
 
-void arcv2_irq_enable(struct irq_data *data)
+static void arcv2_irq_enable(struct irq_data *data)
 {
 	/* set default priority */
 	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index 345a0000554c..4f2b5951454f 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -175,7 +175,7 @@ void kgdb_trap(struct pt_regs *regs)
 	 * with trap_s 4 (compiled) breakpoints, continuation needs to
 	 * start after the breakpoint.
 	 */
-	if (regs->ecr_param == 3)
+	if (regs->ecr.param == 3)
 		instruction_pointer(regs) -= BREAK_INSTR_SIZE;
 
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index f9fdb557c263..55373ca0d28b 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -165,8 +165,6 @@ static void mcip_probe_n_setup(void)
 		IS_AVAIL1(mp.idu, "IDU "),
 		IS_AVAIL1(mp.dbg, "DEBUG "),
 		IS_AVAIL1(mp.gfrc, "GFRC"));
-
-	cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
 }
 
 struct plat_smp_ops plat_smp_ops = {
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 980b71da2f61..186ceab661eb 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -141,7 +141,7 @@ asmlinkage void ret_from_fork(void);
  * |    unused      |
  * |                |
  * ------------------
- * |     r25        |   <==== top of Stack (thread.ksp)
+ * |     r25        |   <==== top of Stack (thread_info.ksp)
  * ~                ~
  * |    --to--      |   (CALLEE Regs of kernel mode)
  * |     r13        |
@@ -162,7 +162,6 @@ asmlinkage void ret_from_fork(void);
  * |      SP        |
  * |    orig_r0     |
  * |    event/ECR   |
- * |    user_r25    |
  * ------------------  <===== END of PAGE
  */
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
@@ -182,14 +181,14 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	c_callee = ((struct callee_regs *)childksp) - 1;
 
 	/*
-	 * __switch_to() uses thread.ksp to start unwinding stack
+	 * __switch_to() uses thread_info.ksp to start unwinding stack
 	 * For kernel threads we don't need to create callee regs, the
 	 * stack layout nevertheless needs to remain the same.
 	 * Also, since __switch_to anyways unwinds callee regs, we use
 	 * this to populate kernel thread entry-pt/args into callee regs,
 	 * so that ret_from_kernel_thread() becomes simpler.
 	 */
-	p->thread.ksp = (unsigned long)c_callee;	/* THREAD_KSP */
+	task_thread_info(p)->ksp = (unsigned long)c_callee;	/* THREAD_INFO_KSP */
 
 	/* __switch_to expects FP(0), BLINK(return addr) at top */
 	childksp[0] = 0;			/* fp */
@@ -243,16 +242,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	 */
 	c_callee->r25 = task_thread_info(p)->thr_ptr;
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	/*
-	 * setup usermode thread pointer #2:
-	 * however for this special use of r25 in kernel, __switch_to() sets
-	 * r25 for kernel needs and only in the final return path is usermode
-	 * r25 setup, from pt_regs->user_r25. So set that up as well
-	 */
-	c_regs->user_r25 = c_callee->r25;
-#endif
-
 	return 0;
 }
 
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 2abdcd9b09e8..e0c233c178b1 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -46,8 +46,7 @@ static const struct pt_regs_offset regoffset_table[] = {
 	REG_OFFSET_NAME(r0),
 	REG_OFFSET_NAME(sp),
 	REG_OFFSET_NAME(orig_r0),
-	REG_OFFSET_NAME(event),
-	REG_OFFSET_NAME(user_r25),
+	REG_OFFSET_NAME(ecr),
 	REG_OFFSET_END,
 };
 
@@ -55,9 +54,8 @@ static const struct pt_regs_offset regoffset_table[] = {
 
 static const struct pt_regs_offset regoffset_table[] = {
 	REG_OFFSET_NAME(orig_r0),
-	REG_OFFSET_NAME(event),
+	REG_OFFSET_NAME(ecr),
 	REG_OFFSET_NAME(bta),
-	REG_OFFSET_NAME(user_r25),
 	REG_OFFSET_NAME(r26),
 	REG_OFFSET_NAME(fp),
 	REG_OFFSET_NAME(sp),
@@ -341,7 +339,7 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-asmlinkage int syscall_trace_entry(struct pt_regs *regs)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		if (ptrace_report_syscall_entry(regs))
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 41f07b3e594e..4dcf8589b708 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -29,6 +29,7 @@
 #include <asm/mach_desc.h>
 #include <asm/smp.h>
 #include <asm/dsp-impl.h>
+#include <soc/arc/mcip.h>
 
 #define FIX_PTR(x)  __asm__ __volatile__(";" : "+r"(x))
 
@@ -43,19 +44,22 @@ const struct machine_desc *machine_desc;
 
 struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
-struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+struct cpuinfo_arc {
+	int arcver;
+	unsigned int t0:1, t1:1;
+	struct {
+		unsigned long base;
+		unsigned int sz;
+	} iccm, dccm;
+};
+
+#ifdef CONFIG_ISA_ARCV2
 
-static const struct id_to_str arc_legacy_rel[] = {
+static const struct id_to_str arc_hs_rel[] = {
 	/* ID.ARCVER,	Release */
-#ifdef CONFIG_ISA_ARCOMPACT
-	{ 0x34, 	"R4.10"},
-	{ 0x35, 	"R4.11"},
-#else
 	{ 0x51, 	"R2.0" },
 	{ 0x52, 	"R2.1" },
 	{ 0x53,		"R3.0" },
-#endif
-	{ 0x00,		NULL   }
 };
 
 static const struct id_to_str arc_hs_ver54_rel[] = {
@@ -66,323 +70,296 @@ static const struct id_to_str arc_hs_ver54_rel[] = {
 	{  3,		"R4.00a"},
 	{  0xFF,	NULL   }
 };
+#endif
 
-static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+static int
+arcompact_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-	if (is_isa_arcompact()) {
-		struct bcr_iccm_arcompact iccm;
-		struct bcr_dccm_arcompact dccm;
+	int n = 0;
+#ifdef CONFIG_ISA_ARCOMPACT
+	char *cpu_nm, *isa_nm = "ARCompact";
+	struct bcr_fp_arcompact fpu_sp, fpu_dp;
+	int atomic = 0, be, present;
+	int bpu_full, bpu_cache, bpu_pred;
+	struct bcr_bpu_arcompact bpu;
+	struct bcr_iccm_arcompact iccm;
+	struct bcr_dccm_arcompact dccm;
+	struct bcr_generic isa;
 
-		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-		if (iccm.ver) {
-			cpu->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
-			cpu->iccm.base_addr = iccm.base << 16;
-		}
+	READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-		if (dccm.ver) {
-			unsigned long base;
-			cpu->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
+	if (!isa.ver)	/* ISA BCR absent, use Kconfig info */
+		atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+	else {
+		/* ARC700_BUILD only has 2 bits of isa info */
+		atomic = isa.info & 1;
+	}
 
-			base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
-			cpu->dccm.base_addr = base & ~0xF;
-		}
-	} else {
-		struct bcr_iccm_arcv2 iccm;
-		struct bcr_dccm_arcv2 dccm;
-		unsigned long region;
-
-		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-		if (iccm.ver) {
-			cpu->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
-			if (iccm.sz00 == 0xF && iccm.sz01 > 0)
-				cpu->iccm.sz <<= iccm.sz01;
-
-			region = read_aux_reg(ARC_REG_AUX_ICCM);
-			cpu->iccm.base_addr = region & 0xF0000000;
-		}
+	be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
 
-		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-		if (dccm.ver) {
-			cpu->dccm.sz = 256 << dccm.sz0;
-			if (dccm.sz0 == 0xF && dccm.sz1 > 0)
-				cpu->dccm.sz <<= dccm.sz1;
+	if (info->arcver < 0x34)
+		cpu_nm = "ARC750";
+	else
+		cpu_nm = "ARC770";
 
-			region = read_aux_reg(ARC_REG_AUX_DCCM);
-			cpu->dccm.base_addr = region & 0xF0000000;
-		}
-	}
-}
+	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s%s%s\n",
+		       c, cpu_nm, isa_nm,
+		       IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+		       IS_AVAIL1(be, "[Big-Endian]"));
 
-static void decode_arc_core(struct cpuinfo_arc *cpu)
-{
-	struct bcr_uarch_build_arcv2 uarch;
-	const struct id_to_str *tbl;
-
-	if (cpu->core.family < 0x54) { /* includes arc700 */
+	READ_BCR(ARC_REG_FP_BCR, fpu_sp);
+	READ_BCR(ARC_REG_DPFP_BCR, fpu_dp);
 
-		for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) {
-			if (cpu->core.family == tbl->id) {
-				cpu->release = tbl->str;
-				break;
-			}
-		}
+	if (fpu_sp.ver | fpu_dp.ver)
+		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+			       IS_AVAIL1(fpu_sp.ver, "SP "),
+			       IS_AVAIL1(fpu_dp.ver, "DP "));
 
-		if (is_isa_arcompact())
-			cpu->name = "ARC700";
-		else if (tbl->str)
-			cpu->name = "HS38";
-		else
-			cpu->name = cpu->release = "Unknown";
+	READ_BCR(ARC_REG_BPU_BCR, bpu);
+	bpu_full = bpu.fam ? 1 : 0;
+	bpu_cache = 256 << (bpu.ent - 1);
+	bpu_pred = 256 << (bpu.ent - 1);
 
-		return;
+	n += scnprintf(buf + n, len - n,
+			"BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
+			IS_AVAIL1(bpu_full, "full"),
+			IS_AVAIL1(!bpu_full, "partial"),
+			bpu_cache, bpu_pred);
+
+	READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+	if (iccm.ver) {
+		info->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
+		info->iccm.base = iccm.base << 16;
 	}
 
-	/*
-	 * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
-	 * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
-	 * releases only update it.
-	 */
-	READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
-
-	if (uarch.prod == 4) {
-		cpu->name = "HS48";
-		cpu->extn.dual = 1;
+	READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+	if (dccm.ver) {
+		unsigned long base;
+		info->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
 
-	} else {
-		cpu->name = "HS38";
+		base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+		info->dccm.base = base & ~0xF;
 	}
 
-	for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
-		if (uarch.maj == tbl->id) {
-			cpu->release = tbl->str;
-			break;
-		}
-	}
+	/* ARCompact ISA specific sanity checks */
+	present = fpu_dp.ver;	/* SP has no arch visible regs */
+	CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
+#endif
+	return n;
+
 }
 
-static void read_arc_build_cfg_regs(void)
+static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-	struct bcr_timer timer;
-	struct bcr_generic bcr;
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	int n = 0;
+#ifdef CONFIG_ISA_ARCV2
+	const char *release, *cpu_nm, *isa_nm = "ARCv2";
+	int dual_issue = 0, dual_enb = 0, mpy_opt, present;
+	int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk;
+	char mpy_nm[16], lpb_nm[32];
 	struct bcr_isa_arcv2 isa;
-	struct bcr_actionpoint ap;
-
-	FIX_PTR(cpu);
+	struct bcr_mpy mpy;
+	struct bcr_fp_arcv2 fpu;
+	struct bcr_bpu_arcv2 bpu;
+	struct bcr_lpb lpb;
+	struct bcr_iccm_arcv2 iccm;
+	struct bcr_dccm_arcv2 dccm;
+	struct bcr_erp erp;
 
-	READ_BCR(AUX_IDENTITY, cpu->core);
-	decode_arc_core(cpu);
-
-	READ_BCR(ARC_REG_TIMERS_BCR, timer);
-	cpu->extn.timer0 = timer.t0;
-	cpu->extn.timer1 = timer.t1;
-	cpu->extn.rtc = timer.rtc;
-
-	cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
+	/*
+	 * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
+	 * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
+	 * releases only update it.
+	 */
 
-	READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
+	cpu_nm = "HS38";
 
-	/* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
-	read_decode_ccm_bcr(cpu);
+	if (info->arcver > 0x50 && info->arcver <= 0x53) {
+		release = arc_hs_rel[info->arcver - 0x51].str;
+	} else {
+		const struct id_to_str *tbl;
+		struct bcr_uarch_build uarch;
 
-	read_decode_mmu_bcr();
-	read_decode_cache_bcr();
+		READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
 
-	if (is_isa_arcompact()) {
-		struct bcr_fp_arcompact sp, dp;
-		struct bcr_bpu_arcompact bpu;
-
-		READ_BCR(ARC_REG_FP_BCR, sp);
-		READ_BCR(ARC_REG_DPFP_BCR, dp);
-		cpu->extn.fpu_sp = sp.ver ? 1 : 0;
-		cpu->extn.fpu_dp = dp.ver ? 1 : 0;
-
-		READ_BCR(ARC_REG_BPU_BCR, bpu);
-		cpu->bpu.ver = bpu.ver;
-		cpu->bpu.full = bpu.fam ? 1 : 0;
-		if (bpu.ent) {
-			cpu->bpu.num_cache = 256 << (bpu.ent - 1);
-			cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+		for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
+			if (uarch.maj == tbl->id) {
+				release = tbl->str;
+				break;
+			}
 		}
-	} else {
-		struct bcr_fp_arcv2 spdp;
-		struct bcr_bpu_arcv2 bpu;
-
-		READ_BCR(ARC_REG_FP_V2_BCR, spdp);
-		cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
-		cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
-
-		READ_BCR(ARC_REG_BPU_BCR, bpu);
-		cpu->bpu.ver = bpu.ver;
-		cpu->bpu.full = bpu.ft;
-		cpu->bpu.num_cache = 256 << bpu.bce;
-		cpu->bpu.num_pred = 2048 << bpu.pte;
-		cpu->bpu.ret_stk = 4 << bpu.rse;
-
-		/* if dual issue hardware, is it enabled ? */
-		if (cpu->extn.dual) {
+		if (uarch.prod == 4) {
 			unsigned int exec_ctrl;
 
+			cpu_nm = "HS48";
+			dual_issue = 1;
+			/* if dual issue hardware, is it enabled ? */
 			READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
-			cpu->extn.dual_enb = !(exec_ctrl & 1);
+			dual_enb = !(exec_ctrl & 1);
 		}
 	}
 
-	READ_BCR(ARC_REG_AP_BCR, ap);
-	if (ap.ver) {
-		cpu->extn.ap_num = 2 << ap.num;
-		cpu->extn.ap_full = !ap.min;
-	}
-
-	READ_BCR(ARC_REG_SMART_BCR, bcr);
-	cpu->extn.smart = bcr.ver ? 1 : 0;
-
-	READ_BCR(ARC_REG_RTT_BCR, bcr);
-	cpu->extn.rtt = bcr.ver ? 1 : 0;
-
 	READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-	/* some hacks for lack of feature BCR info in old ARC700 cores */
-	if (is_isa_arcompact()) {
-		if (!isa.ver)	/* ISA BCR absent, use Kconfig info */
-			cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
-		else {
-			/* ARC700_BUILD only has 2 bits of isa info */
-			struct bcr_generic bcr = *(struct bcr_generic *)&isa;
-			cpu->isa.atomic = bcr.info & 1;
-		}
-
-		cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
+		       c, cpu_nm, release, isa_nm,
+		       IS_AVAIL1(isa.be, "[Big-Endian]"),
+		       IS_AVAIL3(dual_issue, dual_enb, " Dual-Issue "));
+
+	READ_BCR(ARC_REG_MPY_BCR, mpy);
+	mpy_opt = 2;	/* stock MPY/MPYH */
+	if (mpy.dsp)	/* OPT 7-9 */
+		mpy_opt = mpy.dsp + 6;
+
+	scnprintf(mpy_nm, 16, "mpy[opt %d] ", mpy_opt);
+
+	READ_BCR(ARC_REG_FP_V2_BCR, fpu);
+
+	n += scnprintf(buf + n, len - n, "ISA Extn\t: %s%s%s%s%s%s%s%s%s%s%s\n",
+		       IS_AVAIL2(isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+		       IS_AVAIL2(isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+		       IS_AVAIL2(isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
+		       IS_AVAIL1(mpy.ver, mpy_nm),
+		       IS_AVAIL1(isa.div_rem, "div_rem "),
+		       IS_AVAIL1((fpu.sp | fpu.dp), "  FPU:"),
+		       IS_AVAIL1(fpu.sp, " sp"),
+		       IS_AVAIL1(fpu.dp, " dp"));
+
+	READ_BCR(ARC_REG_BPU_BCR, bpu);
+	bpu_full = bpu.ft;
+	bpu_cache = 256 << bpu.bce;
+	bpu_pred = 2048 << bpu.pte;
+	bpu_ret_stk = 4 << bpu.rse;
+
+	READ_BCR(ARC_REG_LPB_BUILD, lpb);
+	if (lpb.ver) {
+		unsigned int ctl;
+		ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+		scnprintf(lpb_nm, sizeof(lpb_nm), " Loop Buffer:%d %s",
+			  lpb.entries, IS_DISABLED_RUN(!ctl));
+	}
 
-		 /* there's no direct way to distinguish 750 vs. 770 */
-		if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
-			cpu->name = "ARC750";
-	} else {
-		cpu->isa = isa;
+	n += scnprintf(buf + n, len - n,
+			"BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d%s\n",
+			IS_AVAIL1(bpu_full, "full"),
+			IS_AVAIL1(!bpu_full, "partial"),
+			bpu_cache, bpu_pred, bpu_ret_stk,
+			lpb_nm);
+
+	READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+	if (iccm.ver) {
+		unsigned long base;
+		info->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
+		if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+			info->iccm.sz <<= iccm.sz01;
+		base = read_aux_reg(ARC_REG_AUX_ICCM);
+		info->iccm.base = base & 0xF0000000;
 	}
-}
 
-static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
-	struct bcr_identity *core = &cpu->core;
-	char mpy_opt[16];
-	int n = 0;
+	READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+	if (dccm.ver) {
+		unsigned long base;
+		info->dccm.sz = 256 << dccm.sz0;
+		if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+			info->dccm.sz <<= dccm.sz1;
+		base = read_aux_reg(ARC_REG_AUX_DCCM);
+		info->dccm.base = base & 0xF0000000;
+	}
 
-	FIX_PTR(cpu);
+	/* Error Protection: ECC/Parity */
+	READ_BCR(ARC_REG_ERP_BUILD, erp);
+	if (erp.ver) {
+		struct ctl_erp ctl;
+		READ_BCR(ARC_REG_ERP_CTRL, ctl);
+		/* inverted bits: 0 means enabled */
+		n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+				IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
+				IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
+				IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+	}
 
-	n += scnprintf(buf + n, len - n,
-		       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
-		       core->family, core->cpu_id, core->chip_id);
+	/* ARCv2 ISA specific sanity checks */
+	present = fpu.sp | fpu.dp | mpy.dsp;	/* DSP and/or FPU */
+	CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
 
-	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
-		       cpu_id, cpu->name, cpu->release,
-		       is_isa_arcompact() ? "ARCompact" : "ARCv2",
-		       IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
-		       IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
+	dsp_config_check();
+#endif
+	return n;
+}
 
-	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
-		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
-		       IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
-		       IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
-		       IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
+static char *arc_cpu_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
+{
+	struct bcr_identity ident;
+	struct bcr_timer timer;
+	struct bcr_generic bcr;
+	struct mcip_bcr mp;
+	struct bcr_actionpoint ap;
+	unsigned long vec_base;
+	int ap_num, ap_full, smart, rtt, n;
 
-	if (cpu->extn_mpy.ver) {
-		if (is_isa_arcompact()) {
-			scnprintf(mpy_opt, 16, "mpy");
-		} else {
+	memset(info, 0, sizeof(struct cpuinfo_arc));
 
-			int opt = 2;	/* stock MPY/MPYH */
+	READ_BCR(AUX_IDENTITY, ident);
+	info->arcver = ident.family;
 
-			if (cpu->extn_mpy.dsp)	/* OPT 7-9 */
-				opt = cpu->extn_mpy.dsp + 6;
+	n = scnprintf(buf, len,
+		       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+		       ident.family, ident.cpu_id, ident.chip_id);
 
-			scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt);
-		}
+	if (is_isa_arcompact()) {
+		n += arcompact_mumbojumbo(c, info, buf + n, len - n);
+	} else if (is_isa_arcv2()){
+		n += arcv2_mumbojumbo(c, info, buf + n, len - n);
 	}
 
-	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
-		       IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
-		       IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
-		       IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
-		       IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt),
-		       IS_AVAIL1(cpu->isa.div_rem, "div_rem "));
+	n += arc_mmu_mumbojumbo(c, buf + n, len - n);
+	n += arc_cache_mumbojumbo(c, buf + n, len - n);
 
-	if (cpu->bpu.ver) {
-		n += scnprintf(buf + n, len - n,
-			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
-			      IS_AVAIL1(cpu->bpu.full, "full"),
-			      IS_AVAIL1(!cpu->bpu.full, "partial"),
-			      cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
-
-		if (is_isa_arcv2()) {
-			struct bcr_lpb lpb;
-
-			READ_BCR(ARC_REG_LPB_BUILD, lpb);
-			if (lpb.ver) {
-				unsigned int ctl;
-				ctl = read_aux_reg(ARC_REG_LPB_CTRL);
-
-				n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
-					       lpb.entries,
-					       IS_DISABLED_RUN(!ctl));
-			}
-		}
-		n += scnprintf(buf + n, len - n, "\n");
-	}
+	READ_BCR(ARC_REG_TIMERS_BCR, timer);
+	info->t0 = timer.t0;
+	info->t1 = timer.t1;
 
-	return buf;
-}
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+	vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
-static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
-{
-	int n = 0;
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+	n += scnprintf(buf + n, len - n,
+		       "Timers\t\t: %s%s%s%s%s%s\nVector Table\t: %#lx\n",
+		       IS_AVAIL1(timer.t0, "Timer0 "),
+		       IS_AVAIL1(timer.t1, "Timer1 "),
+		       IS_AVAIL2(timer.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+		       IS_AVAIL2(mp.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+		       vec_base);
 
-	FIX_PTR(cpu);
+	READ_BCR(ARC_REG_AP_BCR, ap);
+	if (ap.ver) {
+		ap_num = 2 << ap.num;
+		ap_full = !ap.min;
+	}
 
-	n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
+	READ_BCR(ARC_REG_SMART_BCR, bcr);
+	smart = bcr.ver ? 1 : 0;
 
-	if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
-		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
-			       IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
-			       IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
+	READ_BCR(ARC_REG_RTT_BCR, bcr);
+	rtt = bcr.ver ? 1 : 0;
 
-	if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
+	if (ap.ver | smart | rtt) {
 		n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
-			       IS_AVAIL1(cpu->extn.smart, "smaRT "),
-			       IS_AVAIL1(cpu->extn.rtt, "RTT "));
-		if (cpu->extn.ap_num) {
+			       IS_AVAIL1(smart, "smaRT "),
+			       IS_AVAIL1(rtt, "RTT "));
+		if (ap.ver) {
 			n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
-				       cpu->extn.ap_num,
-				       cpu->extn.ap_full ? "full":"min");
+				       ap_num,
+				       ap_full ? "full":"min");
 		}
 		n += scnprintf(buf + n, len - n, "\n");
 	}
 
-	if (cpu->dccm.sz || cpu->iccm.sz)
-		n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
-			       cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
-			       cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
-
-	if (is_isa_arcv2()) {
-
-		/* Error Protection: ECC/Parity */
-		struct bcr_erp erp;
-		READ_BCR(ARC_REG_ERP_BUILD, erp);
-
-		if (erp.ver) {
-			struct  ctl_erp ctl;
-			READ_BCR(ARC_REG_ERP_CTRL, ctl);
-
-			/* inverted bits: 0 means enabled */
-			n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
-				IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
-				IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
-				IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
-		}
-	}
+	if (info->dccm.sz || info->iccm.sz)
+		n += scnprintf(buf + n, len - n,
+			       "Extn [CCM]\t: DCCM @ %lx, %d KB / ICCM: @ %lx, %d KB\n",
+			       info->dccm.base, TO_KB(info->dccm.sz),
+			       info->iccm.base, TO_KB(info->iccm.sz));
 
 	return buf;
 }
@@ -401,15 +378,15 @@ void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena)
 		panic("Disable %s, hardware NOT present\n", opt_name);
 }
 
-static void arc_chk_core_config(void)
+/*
+ * ISA agnostic sanity checks
+ */
+static void arc_chk_core_config(struct cpuinfo_arc *info)
 {
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
-	int present = 0;
-
-	if (!cpu->extn.timer0)
+	if (!info->t0)
 		panic("Timer0 is not present!\n");
 
-	if (!cpu->extn.timer1)
+	if (!info->t1)
 		panic("Timer1 is not present!\n");
 
 #ifdef CONFIG_ARC_HAS_DCCM
@@ -417,35 +394,17 @@ static void arc_chk_core_config(void)
 	 * DCCM can be arbit placed in hardware.
 	 * Make sure it's placement/sz matches what Linux is built with
 	 */
-	if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
+	if ((unsigned int)__arc_dccm_base != info->dccm.base)
 		panic("Linux built with incorrect DCCM Base address\n");
 
-	if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz)
+	if (CONFIG_ARC_DCCM_SZ * SZ_1K != info->dccm.sz)
 		panic("Linux built with incorrect DCCM Size\n");
 #endif
 
 #ifdef CONFIG_ARC_HAS_ICCM
-	if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz)
+	if (CONFIG_ARC_ICCM_SZ * SZ_1K != info->iccm.sz)
 		panic("Linux built with incorrect ICCM Size\n");
 #endif
-
-	/*
-	 * FP hardware/software config sanity
-	 * -If hardware present, kernel needs to save/restore FPU state
-	 * -If not, it will crash trying to save/restore the non-existant regs
-	 */
-
-	if (is_isa_arcompact()) {
-		/* only DPDP checked since SP has no arch visible regs */
-		present = cpu->extn.fpu_dp;
-		CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
-	} else {
-		/* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
-		present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
-		CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
-
-		dsp_config_check();
-	}
 }
 
 /*
@@ -456,21 +415,19 @@ static void arc_chk_core_config(void)
 
 void setup_processor(void)
 {
+	struct cpuinfo_arc info;
+	int c = smp_processor_id();
 	char str[512];
-	int cpu_id = smp_processor_id();
 
-	read_arc_build_cfg_regs();
-	arc_init_IRQ();
+	pr_info("%s", arc_cpu_mumbojumbo(c, &info, str, sizeof(str)));
+	pr_info("%s", arc_platform_smp_cpuinfo());
 
-	pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+	arc_chk_core_config(&info);
 
+	arc_init_IRQ();
 	arc_mmu_init();
 	arc_cache_init();
 
-	pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
-	pr_info("%s", arc_platform_smp_cpuinfo());
-
-	arc_chk_core_config();
 }
 
 static inline bool uboot_arg_invalid(unsigned long addr)
@@ -617,6 +574,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	char *str;
 	int cpu_id = ptr_to_cpu(v);
 	struct device *cpu_dev = get_cpu_device(cpu_id);
+	struct cpuinfo_arc info;
 	struct clk *cpu_clk;
 	unsigned long freq = 0;
 
@@ -629,7 +587,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (!str)
 		goto done;
 
-	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, &info, str, PAGE_SIZE));
 
 	cpu_clk = clk_get(cpu_dev, NULL);
 	if (IS_ERR(cpu_clk)) {
@@ -646,9 +604,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   loops_per_jiffy / (500000 / HZ),
 		   (loops_per_jiffy / (5000 / HZ)) % 100);
 
-	seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
-	seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
-	seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
 	seq_printf(m, arc_platform_smp_cpuinfo());
 
 	free_page((unsigned long)str);
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 3c1590c27fae..0b3bb529d246 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -53,6 +53,7 @@
 #include <linux/sched/task_stack.h>
 
 #include <asm/ucontext.h>
+#include <asm/entry.h>
 
 struct rt_sigframe {
 	struct siginfo info;
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 409cfa4675b4..8d9b188caa27 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -23,9 +23,10 @@
 #include <linux/export.h>
 #include <linux/of_fdt.h>
 
-#include <asm/processor.h>
-#include <asm/setup.h>
 #include <asm/mach_desc.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
 
 #ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -351,7 +352,7 @@ static inline int __do_IPI(unsigned long msg)
  * arch-common ISR to handle for inter-processor interrupts
  * Has hooks for platform specific IPI
  */
-irqreturn_t do_IPI(int irq, void *dev_id)
+static irqreturn_t do_IPI(int irq, void *dev_id)
 {
 	unsigned long pending;
 	unsigned long __maybe_unused copy;
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 5372dc04e784..ea99c066ef25 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -29,6 +29,7 @@
 
 #include <asm/arcregs.h>
 #include <asm/unwind.h>
+#include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 
 /*-------------------------------------------------------------------------
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 6b83e3f2b41c..9b9570b79362 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -16,6 +16,7 @@
 #include <linux/ptrace.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
+#include <asm/entry.h>
 #include <asm/setup.h>
 #include <asm/unaligned.h>
 #include <asm/kprobes.h>
@@ -109,9 +110,7 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
  */
 void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 {
-	unsigned int param = regs->ecr_param;
-
-	switch (param) {
+	switch (regs->ecr.param) {
 	case 1:
 		trap_is_brkpt(address, regs);
 		break;
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 7654c2e42dc0..d5b3ed2c58f5 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -115,8 +115,8 @@ static void show_ecr_verbose(struct pt_regs *regs)
 	/* For Data fault, this is data address not instruction addr */
 	address = current->thread.fault_address;
 
-	vec = regs->ecr_vec;
-	cause_code = regs->ecr_cause;
+	vec = regs->ecr.vec;
+	cause_code = regs->ecr.cause;
 
 	/* For DTLB Miss or ProtV, display the memory involved too */
 	if (vec == ECR_V_DTLB_MISS) {
@@ -154,7 +154,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
 		pr_cont("Misaligned r/w from 0x%08lx\n", address);
 #endif
 	} else if (vec == ECR_V_TRAP) {
-		if (regs->ecr_param == 5)
+		if (regs->ecr.param == 5)
 			pr_cont("gcc generated __builtin_trap\n");
 	} else {
 		pr_cont("Check Programmer's Manual\n");
@@ -184,9 +184,10 @@ void show_regs(struct pt_regs *regs)
 	if (user_mode(regs))
 		show_faulting_vma(regs->ret); /* faulting code, not data */
 
-	pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\nSTAT: 0x%08lx",
-		regs->event, current->thread.fault_address, regs->ret,
-		regs->status32);
+	pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n",
+		regs->ecr.full, current->thread.fault_address, regs->ret);
+
+	pr_info("STAT32: 0x%08lx", regs->status32);
 
 #define STS_BIT(r, bit)	r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
 
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index d2e09fece5bc..d0a5cec4cdca 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -36,12 +36,13 @@
 #endif
 
 ENTRY_CFI(memset)
-	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
 	mov.f	0, r2
 ;;; if size is zero
 	jz.d	[blink]
 	mov	r3, r0		; don't clobber ret val
 
+	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
+
 ;;; if length < 8
 	brls.d.nt	r2, 8, .Lsmallchunk
 	mov.f	lp_count,r2
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 3c16ee942a5c..f7e05c146637 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -28,6 +28,10 @@ int slc_enable = 1, ioc_enable = 1;
 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
+static struct cpuinfo_arc_cache {
+	unsigned int sz_k, line_len, colors;
+} ic_info, dc_info, slc_info;
+
 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
 			       unsigned long sz, const int op, const int full_page);
 
@@ -35,78 +39,24 @@ void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz);
 
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
-	int n = 0;
-	struct cpuinfo_arc_cache *p;
-
-#define PR_CACHE(p, cfg, str)						\
-	if (!(p)->line_len)						\
-		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
-	else								\
-		n += scnprintf(buf + n, len - n,			\
-			str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",	\
-			(p)->sz_k, (p)->assoc, (p)->line_len,		\
-			(p)->vipt ? "VIPT" : "PIPT",			\
-			(p)->alias ? " aliasing" : "",			\
-			IS_USED_CFG(cfg));
-
-	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
-	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
-	p = &cpuinfo_arc700[c].slc;
-	if (p->line_len)
-		n += scnprintf(buf + n, len - n,
-			       "SLC\t\t: %uK, %uB Line%s\n",
-			       p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
-
-	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
-		       perip_base,
-		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
-
-	return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-static void read_decode_cache_bcr_arcv2(int cpu)
+static int read_decode_cache_bcr_arcv2(int c, char *buf, int len)
 {
-	struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+	struct cpuinfo_arc_cache *p_slc = &slc_info;
+	struct bcr_identity ident;
 	struct bcr_generic sbcr;
-
-	struct bcr_slc_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:24, way:2, lsz:2, sz:4;
-#else
-		unsigned int sz:4, lsz:2, way:2, pad:24;
-#endif
-	} slc_cfg;
-
-	struct bcr_clust_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
-#else
-		unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
-#endif
-	} cbcr;
-
-	struct bcr_volatile {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int start:4, limit:4, pad:22, order:1, disable:1;
-#else
-		unsigned int disable:1, order:1, pad:22, limit:4, start:4;
-#endif
-	} vol;
-
+	struct bcr_clust_cfg cbcr;
+	struct bcr_volatile vol;
+	int n = 0;
 
 	READ_BCR(ARC_REG_SLC_BCR, sbcr);
 	if (sbcr.ver) {
+		struct bcr_slc_cfg  slc_cfg;
 		READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
 		p_slc->sz_k = 128 << slc_cfg.sz;
 		l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+		n += scnprintf(buf + n, len - n,
+			       "SLC\t\t: %uK, %uB Line%s\n",
+			       p_slc->sz_k, p_slc->line_len, IS_USED_RUN(slc_enable));
 	}
 
 	READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
@@ -129,70 +79,83 @@ static void read_decode_cache_bcr_arcv2(int cpu)
 		ioc_enable = 0;
 	}
 
+	READ_BCR(AUX_IDENTITY, ident);
+
 	/* HS 2.0 didn't have AUX_VOL */
-	if (cpuinfo_arc700[cpu].core.family > 0x51) {
+	if (ident.family > 0x51) {
 		READ_BCR(AUX_VOL, vol);
 		perip_base = vol.start << 28;
 		/* HS 3.0 has limit and strict-ordering fields */
-		if (cpuinfo_arc700[cpu].core.family > 0x52)
+		if (ident.family > 0x52)
 			perip_end = (vol.limit << 28) - 1;
 	}
+
+	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+		       perip_base,
+		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
+
+	return n;
 }
 
-void read_decode_cache_bcr(void)
+int arc_cache_mumbojumbo(int c, char *buf, int len)
 {
-	struct cpuinfo_arc_cache *p_ic, *p_dc;
-	unsigned int cpu = smp_processor_id();
-	struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
-		unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
-	} ibcr, dbcr;
+	struct cpuinfo_arc_cache *p_ic = &ic_info, *p_dc = &dc_info;
+	struct bcr_cache ibcr, dbcr;
+	int vipt, assoc;
+	int n = 0;
 
-	p_ic = &cpuinfo_arc700[cpu].icache;
 	READ_BCR(ARC_REG_IC_BCR, ibcr);
-
 	if (!ibcr.ver)
 		goto dc_chk;
 
-	if (ibcr.ver <= 3) {
+	if (is_isa_arcompact() && (ibcr.ver <= 3)) {
 		BUG_ON(ibcr.config != 3);
-		p_ic->assoc = 2;		/* Fixed to 2w set assoc */
-	} else if (ibcr.ver >= 4) {
-		p_ic->assoc = 1 << ibcr.config;	/* 1,2,4,8 */
+		assoc = 2;		/* Fixed to 2w set assoc */
+	} else if (is_isa_arcv2() && (ibcr.ver >= 4)) {
+		assoc = 1 << ibcr.config;	/* 1,2,4,8 */
 	}
 
 	p_ic->line_len = 8 << ibcr.line_len;
 	p_ic->sz_k = 1 << (ibcr.sz - 1);
-	p_ic->vipt = 1;
-	p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+	p_ic->colors = p_ic->sz_k/assoc/TO_KB(PAGE_SIZE);
+
+	n += scnprintf(buf + n, len - n,
+			"I-Cache\t\t: %uK, %dway/set, %uB Line, VIPT%s%s\n",
+			p_ic->sz_k, assoc, p_ic->line_len,
+			p_ic->colors > 1 ? " aliasing" : "",
+			IS_USED_CFG(CONFIG_ARC_HAS_ICACHE));
 
 dc_chk:
-	p_dc = &cpuinfo_arc700[cpu].dcache;
 	READ_BCR(ARC_REG_DC_BCR, dbcr);
-
 	if (!dbcr.ver)
 		goto slc_chk;
 
-	if (dbcr.ver <= 3) {
+	if (is_isa_arcompact() && (dbcr.ver <= 3)) {
 		BUG_ON(dbcr.config != 2);
-		p_dc->assoc = 4;		/* Fixed to 4w set assoc */
-		p_dc->vipt = 1;
-		p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
-	} else if (dbcr.ver >= 4) {
-		p_dc->assoc = 1 << dbcr.config;	/* 1,2,4,8 */
-		p_dc->vipt = 0;
-		p_dc->alias = 0;		/* PIPT so can't VIPT alias */
+		vipt = 1;
+		assoc = 4;		/* Fixed to 4w set assoc */
+		p_dc->colors = p_dc->sz_k/assoc/TO_KB(PAGE_SIZE);
+	} else if (is_isa_arcv2() && (dbcr.ver >= 4)) {
+		vipt = 0;
+		assoc = 1 << dbcr.config;	/* 1,2,4,8 */
+		p_dc->colors = 1;		/* PIPT so can't VIPT alias */
 	}
 
 	p_dc->line_len = 16 << dbcr.line_len;
 	p_dc->sz_k = 1 << (dbcr.sz - 1);
 
+	n += scnprintf(buf + n, len - n,
+			"D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",
+			p_dc->sz_k, assoc, p_dc->line_len,
+			vipt ? "VIPT" : "PIPT",
+			p_dc->colors > 1 ? " aliasing" : "",
+			IS_USED_CFG(CONFIG_ARC_HAS_DCACHE));
+
 slc_chk:
 	if (is_isa_arcv2())
-                read_decode_cache_bcr_arcv2(cpu);
+		n += read_decode_cache_bcr_arcv2(c, buf + n, len - n);
+
+	return n;
 }
 
 /*
@@ -581,7 +544,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
 
-noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
+static noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
 	/*
@@ -644,7 +607,7 @@ noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 #endif
 }
 
-noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+static __maybe_unused noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
 	/*
@@ -1082,7 +1045,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
  * 3. All Caches need to be disabled when setting up IOC to elide any in-flight
  *    Coherency transactions
  */
-noinline void __init arc_ioc_setup(void)
+static noinline void __init arc_ioc_setup(void)
 {
 	unsigned int ioc_base, mem_sz;
 
@@ -1144,12 +1107,10 @@ noinline void __init arc_ioc_setup(void)
  *    one core suffices for all
  *  - IOC setup / dma callbacks only need to be done once
  */
-void __init arc_cache_init_master(void)
+static noinline void __init arc_cache_init_master(void)
 {
-	unsigned int __maybe_unused cpu = smp_processor_id();
-
 	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
-		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+		struct cpuinfo_arc_cache *ic = &ic_info;
 
 		if (!ic->line_len)
 			panic("cache support enabled but non-existent cache\n");
@@ -1162,14 +1123,14 @@ void __init arc_cache_init_master(void)
 		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
 		 */
-		if (is_isa_arcv2() && ic->alias)
+		if (is_isa_arcv2() && ic->colors > 1)
 			_cache_line_loop_ic_fn = __cache_line_loop_v3;
 		else
 			_cache_line_loop_ic_fn = __cache_line_loop;
 	}
 
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+		struct cpuinfo_arc_cache *dc = &dc_info;
 
 		if (!dc->line_len)
 			panic("cache support enabled but non-existent cache\n");
@@ -1181,14 +1142,13 @@ void __init arc_cache_init_master(void)
 		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
 		if (is_isa_arcompact()) {
 			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-			int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE);
 
-			if (dc->alias) {
+			if (dc->colors > 1) {
 				if (!handled)
 					panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-				if (CACHE_COLORS_NUM != num_colors)
+				if (CACHE_COLORS_NUM != dc->colors)
 					panic("CACHE_COLORS_NUM not optimized for config\n");
-			} else if (!dc->alias && handled) {
+			} else if (handled && dc->colors == 1) {
 				panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
 			}
 		}
@@ -1231,9 +1191,6 @@ void __init arc_cache_init_master(void)
 void __ref arc_cache_init(void)
 {
 	unsigned int __maybe_unused cpu = smp_processor_id();
-	char str[256];
-
-	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
 
 	if (!cpu)
 		arc_cache_init_master();
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
index 4e14c4244ea2..88fa3a4d4906 100644
--- a/arch/arc/mm/extable.c
+++ b/arch/arc/mm/extable.c
@@ -22,14 +22,3 @@ int fixup_exception(struct pt_regs *regs)
 
 	return 0;
 }
-
-#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
-unsigned long arc_clear_user_noinline(void __user *to,
-		unsigned long n)
-{
-	return __arc_clear_user(to, n);
-}
-EXPORT_SYMBOL(arc_clear_user_noinline);
-
-#endif
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index f59e722d147f..95119a5e7761 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -13,6 +13,7 @@
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
 #include <linux/mm_types.h>
+#include <asm/entry.h>
 #include <asm/mmu.h>
 
 /*
@@ -99,10 +100,10 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
 	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
-	if (regs->ecr_cause & ECR_C_PROTV_STORE)	/* ST/EX */
+	if (regs->ecr.cause & ECR_C_PROTV_STORE)	/* ST/EX */
 		write = 1;
-	else if ((regs->ecr_vec == ECR_V_PROTV) &&
-	         (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+	else if ((regs->ecr.vec == ECR_V_PROTV) &&
+	         (regs->ecr.cause == ECR_C_PROTV_INST_FETCH))
 		exec = 1;
 
 	flags = FAULT_FLAG_DEFAULT;
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 9f64d729c9f8..6a71b23f1383 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -15,6 +15,7 @@
 #include <linux/highmem.h>
 #include <asm/page.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/arcregs.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 6f40f37e6550..e536b2dcd4b0 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -18,7 +18,9 @@
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
-static int __read_mostly pae_exists;
+static struct cpuinfo_arc_mmu {
+	unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways;
+} mmuinfo;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -131,7 +133,7 @@ static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 
 noinline void local_flush_tlb_all(void)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
 	unsigned long flags;
 	unsigned int entry;
 	int num_tlb = mmu->sets * mmu->ways;
@@ -389,7 +391,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 /*
  * Routine to create a TLB entry
  */
-void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
+static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 {
 	unsigned long flags;
 	unsigned int asid_or_sasid, rwx;
@@ -564,89 +566,64 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
-void read_decode_mmu_bcr(void)
+int arc_mmu_mumbojumbo(int c, char *buf, int len)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-	unsigned int tmp;
-	struct bcr_mmu_3 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
-		     u_itlb:4, u_dtlb:4;
-#else
-	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
-		     ways:4, ver:8;
-#endif
-	} *mmu3;
-
-	struct bcr_mmu_4 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
-		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
-#else
-	/*           DTLB      ITLB      JES        JE         JA      */
-	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
-		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
-#endif
-	} *mmu4;
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
+	unsigned int bcr, u_dtlb, u_itlb, sasid;
+	struct bcr_mmu_3 *mmu3;
+	struct bcr_mmu_4 *mmu4;
+	char super_pg[64] = "";
+	int n = 0;
 
-	tmp = read_aux_reg(ARC_REG_MMU_BCR);
-	mmu->ver = (tmp >> 24);
+	bcr = read_aux_reg(ARC_REG_MMU_BCR);
+	mmu->ver = (bcr >> 24);
 
 	if (is_isa_arcompact() && mmu->ver == 3) {
-		mmu3 = (struct bcr_mmu_3 *)&tmp;
+		mmu3 = (struct bcr_mmu_3 *)&bcr;
 		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
 		mmu->sets = 1 << mmu3->sets;
 		mmu->ways = 1 << mmu3->ways;
-		mmu->u_dtlb = mmu3->u_dtlb;
-		mmu->u_itlb = mmu3->u_itlb;
-		mmu->sasid = mmu3->sasid;
+		u_dtlb = mmu3->u_dtlb;
+		u_itlb = mmu3->u_itlb;
+		sasid = mmu3->sasid;
 	} else {
-		mmu4 = (struct bcr_mmu_4 *)&tmp;
+		mmu4 = (struct bcr_mmu_4 *)&bcr;
 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
 		mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
 		mmu->sets = 64 << mmu4->n_entry;
 		mmu->ways = mmu4->n_ways * 2;
-		mmu->u_dtlb = mmu4->u_dtlb * 4;
-		mmu->u_itlb = mmu4->u_itlb * 4;
-		mmu->sasid = mmu4->sasid;
-		pae_exists = mmu->pae = mmu4->pae;
+		u_dtlb = mmu4->u_dtlb * 4;
+		u_itlb = mmu4->u_itlb * 4;
+		sasid = mmu4->sasid;
+		mmu->pae = mmu4->pae;
 	}
-}
 
-char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-	int n = 0;
-	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
-	char super_pg[64] = "";
-
-	if (p_mmu->s_pg_sz_m)
-		scnprintf(super_pg, 64, "%dM Super Page %s",
-			  p_mmu->s_pg_sz_m,
-			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
+	if (mmu->s_pg_sz_m)
+		scnprintf(super_pg, 64, "/%dM%s",
+			  mmu->s_pg_sz_m,
+			  IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)":"");
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
-		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,  CONFIG_PGTABLE_LEVELS,
-		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
-		       p_mmu->u_dtlb, p_mmu->u_itlb,
-		       IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
-
-	return buf;
+		      "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n",
+		       mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
+		       mmu->sets, mmu->ways,
+		       u_dtlb, u_itlb,
+		       IS_AVAIL1(sasid, ", SASID"),
+		       IS_AVAIL2(mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
+
+	return n;
 }
 
 int pae40_exist_but_not_enab(void)
 {
-	return pae_exists && !is_pae40_enabled();
+	return mmuinfo.pae && !is_pae40_enabled();
 }
 
 void arc_mmu_init(void)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-	char str[256];
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
 	int compat = 0;
 
-	pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
-
 	/*
 	 * Can't be done in processor.h due to header include dependencies
 	 */
@@ -723,7 +700,7 @@ volatile int dup_pd_silent; /* Be silent abt it or complain (default) */
 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			  struct pt_regs *regs)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
 	unsigned long flags;
 	int set, n_ways = mmu->ways;
 
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index b821df7b0089..1feb990a56bc 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/libfdt.h>
 
 #include <asm/asm-offsets.h>
diff --git a/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi b/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi
index c7b5ef15b716..868454ae6bde 100644
--- a/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi
+++ b/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi
@@ -220,6 +220,15 @@
 				};
 			};
 
+			peci: peci-controller@f0100000 {
+				compatible = "nuvoton,npcm750-peci";
+				reg = <0xf0100000 0x200>;
+				interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clk NPCM7XX_CLK_APB3>;
+				cmd-timeout-ms = <1000>;
+				status = "disabled";
+			};
+
 			spi0: spi@200000 {
 				compatible = "nuvoton,npcm750-pspi";
 				reg = <0x200000 0x1000>;
diff --git a/arch/arm/configs/dram_0x00000000.config b/arch/arm/configs/dram_0x00000000.config
index db96dcb420ce..8803a0f58343 100644
--- a/arch/arm/configs/dram_0x00000000.config
+++ b/arch/arm/configs/dram_0x00000000.config
@@ -1 +1,2 @@
+# Help: DRAM base at 0x00000000
 CONFIG_DRAM_BASE=0x00000000
diff --git a/arch/arm/configs/dram_0xc0000000.config b/arch/arm/configs/dram_0xc0000000.config
index 343d5333d973..aab8f864686b 100644
--- a/arch/arm/configs/dram_0xc0000000.config
+++ b/arch/arm/configs/dram_0xc0000000.config
@@ -1 +1,2 @@
+# Help: DRAM base at 0xc0000000
 CONFIG_DRAM_BASE=0xc0000000
diff --git a/arch/arm/configs/dram_0xd0000000.config b/arch/arm/configs/dram_0xd0000000.config
index 61ba7045f8a1..4aabce4ea3d4 100644
--- a/arch/arm/configs/dram_0xd0000000.config
+++ b/arch/arm/configs/dram_0xd0000000.config
@@ -1 +1,2 @@
+# Help: DRAM base at 0xd0000000
 CONFIG_DRAM_BASE=0xd0000000
diff --git a/arch/arm/configs/lpae.config b/arch/arm/configs/lpae.config
index a6d6f7ab3c01..1ab94da8345d 100644
--- a/arch/arm/configs/lpae.config
+++ b/arch/arm/configs/lpae.config
@@ -1,2 +1,3 @@
+# Help: Enable Large Physical Address Extension mode
 CONFIG_ARM_LPAE=y
 CONFIG_VMSPLIT_2G=y
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index f3cd04ff022d..72529f5e2bed 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -227,6 +227,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
 	return false;
 }
 
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
+
 /* PMU Version in DFR Register */
 #define ARMV8_PMU_DFR_VER_NI        0
 #define ARMV8_PMU_DFR_VER_V3P4      0x5
diff --git a/arch/arm/include/asm/ide.h b/arch/arm/include/asm/ide.h
deleted file mode 100644
index a81e0b0d6747..000000000000
--- a/arch/arm/include/asm/ide.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  arch/arm/include/asm/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the ARM architecture specific IDE code.
- */
-
-#ifndef __ASMARM_IDE_H
-#define __ASMARM_IDE_H
-
-#ifdef __KERNEL__
-
-#define __ide_mm_insw(port,addr,len)	readsw(port,addr,len)
-#define __ide_mm_insl(port,addr,len)	readsl(port,addr,len)
-#define __ide_mm_outsw(port,addr,len)	writesw(port,addr,len)
-#define __ide_mm_outsl(port,addr,len)	writesl(port,addr,len)
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMARM_IDE_H */
diff --git a/arch/arm64/boot/dts/amlogic/meson-a1.dtsi b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
index c8f344596285..96225c421194 100644
--- a/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi
@@ -108,7 +108,7 @@
 			};
 
 			uart_AO: serial@1c00 {
-				compatible = "amlogic,meson-gx-uart",
+				compatible = "amlogic,meson-a1-uart",
 					     "amlogic,meson-ao-uart";
 				reg = <0x0 0x1c00 0x0 0x18>;
 				interrupts = <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>;
@@ -118,7 +118,7 @@
 			};
 
 			uart_AO_B: serial@2000 {
-				compatible = "amlogic,meson-gx-uart",
+				compatible = "amlogic,meson-a1-uart",
 					     "amlogic,meson-ao-uart";
 				reg = <0x0 0x2000 0x0 0x18>;
 				interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
diff --git a/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi b/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi
index aa7aac8c3774..ecd171b2feba 100644
--- a/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi
+++ b/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi
@@ -68,6 +68,15 @@
 			ranges = <0x0 0x0 0xf0000000 0x00300000>,
 				<0xfff00000 0x0 0xfff00000 0x00016000>;
 
+			peci: peci-controller@100000 {
+				compatible = "nuvoton,npcm845-peci";
+				reg = <0x100000 0x1000>;
+				interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clk NPCM8XX_CLK_APB3>;
+				cmd-timeout-ms = <1000>;
+				status = "disabled";
+			};
+
 			timer0: timer@8000 {
 				compatible = "nuvoton,npcm845-timer";
 				interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2e9636da2147..5315789f4868 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1159,7 +1159,6 @@ CONFIG_XEN_GNTDEV=y
 CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_STAGING=y
 CONFIG_STAGING_MEDIA=y
-CONFIG_VIDEO_IMX_MEDIA=m
 CONFIG_VIDEO_MAX96712=m
 CONFIG_CHROME_PLATFORMS=y
 CONFIG_CROS_EC=y
diff --git a/arch/arm64/configs/virt.config b/arch/arm64/configs/virt.config
index 6865d54e68f8..c47c36f8f67b 100644
--- a/arch/arm64/configs/virt.config
+++ b/arch/arm64/configs/virt.config
@@ -1,3 +1,4 @@
+# Help: Virtualization guest
 #
 # Base options for platforms
 #
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 58e5eb27da68..5882b2415596 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -18,10 +18,19 @@
 #define HCR_DCT		(UL(1) << 57)
 #define HCR_ATA_SHIFT	56
 #define HCR_ATA		(UL(1) << HCR_ATA_SHIFT)
+#define HCR_TTLBOS	(UL(1) << 55)
+#define HCR_TTLBIS	(UL(1) << 54)
+#define HCR_ENSCXT	(UL(1) << 53)
+#define HCR_TOCU	(UL(1) << 52)
 #define HCR_AMVOFFEN	(UL(1) << 51)
+#define HCR_TICAB	(UL(1) << 50)
 #define HCR_TID4	(UL(1) << 49)
 #define HCR_FIEN	(UL(1) << 47)
 #define HCR_FWB		(UL(1) << 46)
+#define HCR_NV2		(UL(1) << 45)
+#define HCR_AT		(UL(1) << 44)
+#define HCR_NV1		(UL(1) << 43)
+#define HCR_NV		(UL(1) << 42)
 #define HCR_API		(UL(1) << 41)
 #define HCR_APK		(UL(1) << 40)
 #define HCR_TEA		(UL(1) << 37)
@@ -89,7 +98,6 @@
 			 HCR_BSU_IS | HCR_FB | HCR_TACR | \
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
-#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
 #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
@@ -324,6 +332,47 @@
 				 BIT(18) |		\
 				 GENMASK(16, 15))
 
+/*
+ * FGT register definitions
+ *
+ * RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
+ * We're not using the generated masks as they are usually ahead of
+ * the published ARM ARM, which we use as a reference.
+ *
+ * Once we get to a point where the two describe the same thing, we'll
+ * merge the definitions. One day.
+ */
+#define __HFGRTR_EL2_RES0	(GENMASK(63, 56) | GENMASK(53, 51))
+#define __HFGRTR_EL2_MASK	GENMASK(49, 0)
+#define __HFGRTR_EL2_nMASK	(GENMASK(55, 54) | BIT(50))
+
+#define __HFGWTR_EL2_RES0	(GENMASK(63, 56) | GENMASK(53, 51) |	\
+				 BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+				 GENMASK(26, 25) | BIT(21) | BIT(18) |	\
+				 GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
+#define __HFGWTR_EL2_MASK	GENMASK(49, 0)
+#define __HFGWTR_EL2_nMASK	(GENMASK(55, 54) | BIT(50))
+
+#define __HFGITR_EL2_RES0	GENMASK(63, 57)
+#define __HFGITR_EL2_MASK	GENMASK(54, 0)
+#define __HFGITR_EL2_nMASK	GENMASK(56, 55)
+
+#define __HDFGRTR_EL2_RES0	(BIT(49) | BIT(42) | GENMASK(39, 38) |	\
+				 GENMASK(21, 20) | BIT(8))
+#define __HDFGRTR_EL2_MASK	~__HDFGRTR_EL2_nMASK
+#define __HDFGRTR_EL2_nMASK	GENMASK(62, 59)
+
+#define __HDFGWTR_EL2_RES0	(BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
+				 BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
+				 BIT(22) | BIT(9) | BIT(6))
+#define __HDFGWTR_EL2_MASK	~__HDFGWTR_EL2_nMASK
+#define __HDFGWTR_EL2_nMASK	GENMASK(62, 60)
+
+/* Similar definitions for HCRX_EL2 */
+#define __HCRX_EL2_RES0		(GENMASK(63, 16) | GENMASK(13, 12))
+#define __HCRX_EL2_MASK		(0)
+#define __HCRX_EL2_nMASK	(GENMASK(15, 14) | GENMASK(4, 0))
+
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK	(~UL(0xf))
 /*
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24e28bb2d95b..24b5e6b23417 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
 	__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
 	__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
 					 phys_addr_t ipa,
 					 int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+					phys_addr_t start, unsigned long pages);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d3dd05bbfe23..af06ccb7ee34 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -49,6 +49,7 @@
 #define KVM_REQ_RELOAD_GICv4	KVM_ARCH_REQ(4)
 #define KVM_REQ_RELOAD_PMU	KVM_ARCH_REQ(5)
 #define KVM_REQ_SUSPEND		KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0	KVM_ARCH_REQ(7)
 
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
 				     KVM_DIRTY_LOG_INITIALLY_SET)
@@ -380,6 +381,7 @@ enum vcpu_sysreg {
 	CPTR_EL2,	/* Architectural Feature Trap Register (EL2) */
 	HSTR_EL2,	/* Hypervisor System Trap Register */
 	HACR_EL2,	/* Hypervisor Auxiliary Control Register */
+	HCRX_EL2,	/* Extended Hypervisor Configuration Register */
 	TTBR0_EL2,	/* Translation Table Base Register 0 (EL2) */
 	TTBR1_EL2,	/* Translation Table Base Register 1 (EL2) */
 	TCR_EL2,	/* Translation Control Register (EL2) */
@@ -400,6 +402,11 @@ enum vcpu_sysreg {
 	TPIDR_EL2,	/* EL2 Software Thread ID Register */
 	CNTHCTL_EL2,	/* Counter-timer Hypervisor Control register */
 	SP_EL2,		/* EL2 Stack Pointer */
+	HFGRTR_EL2,
+	HFGWTR_EL2,
+	HFGITR_EL2,
+	HDFGRTR_EL2,
+	HDFGWTR_EL2,
 	CNTHP_CTL_EL2,
 	CNTHP_CVAL_EL2,
 	CNTHV_CTL_EL2,
@@ -567,8 +574,7 @@ struct kvm_vcpu_arch {
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
-	/* Target CPU and feature flags */
-	int target;
+	/* feature flags */
 	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
 	/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
@@ -669,6 +675,8 @@ struct kvm_vcpu_arch {
 #define VCPU_SVE_FINALIZED	__vcpu_single_flag(cflags, BIT(1))
 /* PTRAUTH exposed to guest */
 #define GUEST_HAS_PTRAUTH	__vcpu_single_flag(cflags, BIT(2))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED	__vcpu_single_flag(cflags, BIT(3))
 
 /* Exception pending */
 #define PENDING_EXCEPTION	__vcpu_single_flag(iflags, BIT(0))
@@ -899,7 +907,6 @@ struct kvm_vcpu_stat {
 	u64 exits;
 };
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
@@ -967,8 +974,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
 #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
 #endif /* __KVM_NVHE_HYPERVISOR__ */
 
-void force_vm_exit(const cpumask_t *mask);
-
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
 
@@ -983,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
 void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
 
 int __init kvm_sys_reg_table_init(void);
+int __init populate_nv_trap_config(void);
 
 bool lock_all_vcpus(struct kvm *kvm);
 void unlock_all_vcpus(struct kvm *kvm);
@@ -1049,8 +1055,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
 	return cpus_have_const_cap(ARM64_SPECTRE_V3A);
 }
 
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
-
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
@@ -1113,13 +1117,15 @@ int __init kvm_set_ipa_limit(void);
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 static inline bool kvm_vm_is_protected(struct kvm *kvm)
 {
 	return false;
 }
 
-void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
-
 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0e1e1ab17b4d..96a80e8f6226 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
 			   void __iomem **haddr);
 int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
 			     void **haddr);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
 void __init free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 8fb67f032fd1..fa23cc9c2adc 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
 		test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
 }
 
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+
 struct sys_reg_params;
 struct sys_reg_desc;
 
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 929d355eae0a..d3e354bb8351 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
  *	   kvm_pgtable_prot format.
  */
 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu:	Stage-2 KVM MMU struct
+ * @addr:	The base Intermediate physical address from which to invalidate
+ * @size:	Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+				phys_addr_t addr, size_t size);
 #endif	/* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 16464bf9a8aa..38296579a4fd 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -124,6 +124,37 @@
 #define SYS_DC_CIGSW			sys_insn(1, 0, 7, 14, 4)
 #define SYS_DC_CIGDSW			sys_insn(1, 0, 7, 14, 6)
 
+#define SYS_IC_IALLUIS			sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU			sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU			sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC			sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC			sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC			sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC			sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC			sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC			sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU			sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP			sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP			sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP			sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP			sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP			sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP			sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC			sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC			sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC			sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA			sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA			sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA			sys_insn(1, 3, 7, 4, 4)
+
 /*
  * Automatically generated definitions for system registers, the
  * manual encodings below are in the process of being converted to
@@ -163,6 +194,82 @@
 #define SYS_DBGDTRTX_EL0		sys_reg(2, 3, 0, 5, 0)
 #define SYS_DBGVCR32_EL2		sys_reg(2, 4, 0, 7, 0)
 
+#define SYS_BRBINF_EL1(n)		sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1		sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n)		sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1		sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n)		sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1		sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1			sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1			sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1			sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1			sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1		sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m)			sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m)			sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS		sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR			sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR			sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR			sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0		sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1		sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m)		sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR			sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET			sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m)		sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m)		sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m)			sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR			sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH			sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID			sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R		sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R		sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m)		sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0			sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10			sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11			sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12			sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13			sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1			sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2			sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3			sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4			sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5			sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6			sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7			sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8			sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9			sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m)		sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR			sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR			sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR			sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR			sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m)		sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR			sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m)		sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR		sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR			sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m)			sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m)			sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m)		sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR		sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR			sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR			sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR			sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR			sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR			sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR			sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR		sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR			sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0		sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1		sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m)		sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR			sys_reg(2, 1, 1, 0, 4)
+
 #define SYS_MIDR_EL1			sys_reg(3, 0, 0, 0, 0)
 #define SYS_MPIDR_EL1			sys_reg(3, 0, 0, 0, 5)
 #define SYS_REVIDR_EL1			sys_reg(3, 0, 0, 0, 6)
@@ -203,8 +310,13 @@
 #define SYS_ERXCTLR_EL1			sys_reg(3, 0, 5, 4, 1)
 #define SYS_ERXSTATUS_EL1		sys_reg(3, 0, 5, 4, 2)
 #define SYS_ERXADDR_EL1			sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1			sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1		sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1		sys_reg(3, 0, 5, 4, 6)
 #define SYS_ERXMISC0_EL1		sys_reg(3, 0, 5, 5, 0)
 #define SYS_ERXMISC1_EL1		sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1		sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1		sys_reg(3, 0, 5, 5, 3)
 #define SYS_TFSR_EL1			sys_reg(3, 0, 5, 6, 0)
 #define SYS_TFSRE0_EL1			sys_reg(3, 0, 5, 6, 1)
 
@@ -275,6 +387,8 @@
 #define SYS_ICC_IGRPEN0_EL1		sys_reg(3, 0, 12, 12, 6)
 #define SYS_ICC_IGRPEN1_EL1		sys_reg(3, 0, 12, 12, 7)
 
+#define SYS_ACCDATA_EL1			sys_reg(3, 0, 13, 0, 5)
+
 #define SYS_CNTKCTL_EL1			sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_AIDR_EL1			sys_reg(3, 1, 0, 0, 7)
@@ -383,8 +497,6 @@
 #define SYS_VTCR_EL2			sys_reg(3, 4, 2, 1, 2)
 
 #define SYS_TRFCR_EL2			sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2			sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2			sys_reg(3, 4, 3, 1, 5)
 #define SYS_HAFGRTR_EL2			sys_reg(3, 4, 3, 1, 6)
 #define SYS_SPSR_EL2			sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2			sys_reg(3, 4, 4, 0, 1)
@@ -478,6 +590,158 @@
 
 #define SYS_SP_EL2			sys_reg(3, 6,  4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R	sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W	sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R	sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W	sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP	sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP	sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E2R	sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W	sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R	sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W	sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R	sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W	sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define OP_TLBI_VMALLE1OS		sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS			sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS		sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS			sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS			sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS		sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS			sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS		sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS		sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS		sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS		sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS			sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS		sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS			sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS			sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS		sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS			sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS		sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS		sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS		sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1			sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1			sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1			sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1			sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1			sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1			sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1			sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1			sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1			sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1			sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS		sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS		sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS		sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS		sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS		sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS		sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS		sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS		sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS		sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS		sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS		sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS		sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS		sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS		sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS		sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS		sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS		sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS		sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS		sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS		sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS		sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS		sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS		sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS		sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS		sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS			sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS		sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS		sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS		sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS		sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS		sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS		sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS		sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS		sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS			sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS			sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS			sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS			sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS		sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS			sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS		sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS			sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS			sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS			sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS			sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS		sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS		sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1			sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1		sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS		sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS		sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1		sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1		sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS		sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS			sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS		sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2			sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2			sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2			sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2			sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1			sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2			sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1		sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS		sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS		sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS		sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS		sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS		sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS		sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS		sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS		sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS		sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS		sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS		sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS		sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS		sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS		sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS		sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS		sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS		sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS		sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS		sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS		sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS		sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS		sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS		sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS		sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS		sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS		sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS		sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS		sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS		sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS			sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS		sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS		sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS		sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_BRB_IALL			sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ			sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX			sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX			sys_insn(1, 3, 7, 3, 5)
+#define OP_CPP_RCTX			sys_insn(1, 3, 7, 3, 7)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_ENTP2	(BIT(60))
 #define SCTLR_ELx_DSSBS	(BIT(44))
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 55b50e1d4a84..b149cf9f91bc 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -335,14 +335,77 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  */
 #define MAX_TLBI_OPS	PTRS_PER_PTE
 
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op:	TLBI instruction that operates on a range (has 'r' prefix)
+ * @start:	The start address of the range
+ * @pages:	Range as the number of pages from 'start'
+ * @stride:	Flush granularity
+ * @asid:	The ASID of the task (0 for IPA instructions)
+ * @tlb_level:	Translation Table level hint, if known
+ * @tlbi_user:	If 'true', call an additional __tlbi_user()
+ *              (typically for user ASIDs). 'flase' for IPA instructions
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ *    operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ *    by 'scale', so multiple range TLBI operations may be required.
+ *    Start from scale = 0, flush the corresponding number of pages
+ *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ *    until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride,			\
+				asid, tlb_level, tlbi_user)		\
+do {									\
+	int num = 0;							\
+	int scale = 0;							\
+	unsigned long addr;						\
+									\
+	while (pages > 0) {						\
+		if (!system_supports_tlb_range() ||			\
+		    pages % 2 == 1) {					\
+			addr = __TLBI_VADDR(start, asid);		\
+			__tlbi_level(op, addr, tlb_level);		\
+			if (tlbi_user)					\
+				__tlbi_user_level(op, addr, tlb_level);	\
+			start += stride;				\
+			pages -= stride >> PAGE_SHIFT;			\
+			continue;					\
+		}							\
+									\
+		num = __TLBI_RANGE_NUM(pages, scale);			\
+		if (num >= 0) {						\
+			addr = __TLBI_VADDR_RANGE(start, asid, scale,	\
+						  num, tlb_level);	\
+			__tlbi(r##op, addr);				\
+			if (tlbi_user)					\
+				__tlbi_user(r##op, addr);		\
+			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+			pages -= __TLBI_RANGE_PAGES(num, scale);	\
+		}							\
+		scale++;						\
+	}								\
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
 				     unsigned long stride, bool last_level,
 				     int tlb_level)
 {
-	int num = 0;
-	int scale = 0;
-	unsigned long asid, addr, pages;
+	unsigned long asid, pages;
 
 	start = round_down(start, stride);
 	end = round_up(end, stride);
@@ -364,56 +427,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	dsb(ishst);
 	asid = ASID(vma->vm_mm);
 
-	/*
-	 * When the CPU does not support TLB range operations, flush the TLB
-	 * entries one by one at the granularity of 'stride'. If the TLB
-	 * range ops are supported, then:
-	 *
-	 * 1. If 'pages' is odd, flush the first page through non-range
-	 *    operations;
-	 *
-	 * 2. For remaining pages: the minimum range granularity is decided
-	 *    by 'scale', so multiple range TLBI operations may be required.
-	 *    Start from scale = 0, flush the corresponding number of pages
-	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
-	 *    until no pages left.
-	 *
-	 * Note that certain ranges can be represented by either num = 31 and
-	 * scale or num = 0 and scale + 1. The loop below favours the latter
-	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
-	 */
-	while (pages > 0) {
-		if (!system_supports_tlb_range() ||
-		    pages % 2 == 1) {
-			addr = __TLBI_VADDR(start, asid);
-			if (last_level) {
-				__tlbi_level(vale1is, addr, tlb_level);
-				__tlbi_user_level(vale1is, addr, tlb_level);
-			} else {
-				__tlbi_level(vae1is, addr, tlb_level);
-				__tlbi_user_level(vae1is, addr, tlb_level);
-			}
-			start += stride;
-			pages -= stride >> PAGE_SHIFT;
-			continue;
-		}
-
-		num = __TLBI_RANGE_NUM(pages, scale);
-		if (num >= 0) {
-			addr = __TLBI_VADDR_RANGE(start, asid, scale,
-						  num, tlb_level);
-			if (last_level) {
-				__tlbi(rvale1is, addr);
-				__tlbi_user(rvale1is, addr);
-			} else {
-				__tlbi(rvae1is, addr);
-				__tlbi_user(rvae1is, addr);
-			}
-			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
-			pages -= __TLBI_RANGE_PAGES(num, scale);
-		}
-		scale++;
-	}
+	if (last_level)
+		__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+	else
+		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+
 	dsb(ish);
 	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
 }
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a5f533f63b60..b018ae12ff5f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2627,6 +2627,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
 	},
+	{
+		.desc = "Fine Grained Traps",
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.capability = ARM64_HAS_FGT,
+		.matches = has_cpuid_feature,
+		ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
+	},
 #ifdef CONFIG_ARM64_SME
 	{
 		.desc = "Scalable Matrix Extension",
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index aee12c75b738..3addc09f8746 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
 		if (!len)
 			return;
 
-		len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
-		if (len == -E2BIG)
-			len = ARRAY_SIZE(buf) - 1;
+		len = min(len, ARRAY_SIZE(buf) - 1);
+		memcpy(buf, cmdline, len);
+		buf[len] = '\0';
 
 		if (strcmp(buf, "--") == 0)
 			return;
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index f531da6b362e..83c1e09be42e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -25,7 +25,6 @@ menuconfig KVM
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
-	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select KVM_XFER_TO_GUEST_WORK
@@ -43,6 +42,7 @@ menuconfig KVM
 	select SCHED_INFO
 	select GUEST_PERF_EVENTS if PERF_EVENTS
 	select INTERVAL_TREE
+	select XARRAY_MULTI
 	help
 	  Support hosting virtualized guest machines.
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index d1cb298a58a0..4866b3f7b4ea 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -36,6 +36,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 #include <asm/kvm_pkvm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/sections.h>
@@ -365,7 +366,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 #endif
 
 	/* Force users to call KVM_ARM_VCPU_INIT */
-	vcpu->arch.target = -1;
+	vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
 	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
 	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -462,7 +463,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		vcpu_ptrauth_disable(vcpu);
 	kvm_arch_vcpu_load_debug_state_flags(vcpu);
 
-	if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
+	if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
 		vcpu_set_on_unsupported_cpu(vcpu);
 }
 
@@ -574,7 +575,7 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
 
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.target >= 0;
+	return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
 }
 
 /*
@@ -803,6 +804,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
 			kvm_pmu_handle_pmcr(vcpu,
 					    __vcpu_sys_reg(vcpu, PMCR_EL0));
 
+		if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
+			kvm_vcpu_pmu_restore_guest(vcpu);
+
 		if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
 			return kvm_vcpu_suspend(vcpu);
 
@@ -818,6 +822,9 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
 	if (likely(!vcpu_mode_is_32bit(vcpu)))
 		return false;
 
+	if (vcpu_has_nv(vcpu))
+		return true;
+
 	return !kvm_supports_32bit_el0();
 }
 
@@ -1058,7 +1065,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 			 * invalid. The VMM can try and fix it by issuing  a
 			 * KVM_ARM_VCPU_INIT if it really wants to.
 			 */
-			vcpu->arch.target = -1;
+			vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
 			ret = ARM_EXCEPTION_IL;
 		}
 
@@ -1219,8 +1226,7 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
 {
 	unsigned long features = init->features[0];
 
-	return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
-			vcpu->arch.target != init->target;
+	return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 }
 
 static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
@@ -1236,20 +1242,18 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	    !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
 		goto out_unlock;
 
-	vcpu->arch.target = init->target;
 	bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 
 	/* Now we know what it is, we can reset it. */
 	ret = kvm_reset_vcpu(vcpu);
 	if (ret) {
-		vcpu->arch.target = -1;
 		bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 		goto out_unlock;
 	}
 
 	bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
 	set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
-
+	vcpu_set_flag(vcpu, VCPU_INITIALIZED);
 out_unlock:
 	mutex_unlock(&kvm->arch.config_lock);
 	return ret;
@@ -1260,14 +1264,15 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
 	int ret;
 
-	if (init->target != kvm_target_cpu())
+	if (init->target != KVM_ARM_TARGET_GENERIC_V8 &&
+	    init->target != kvm_target_cpu())
 		return -EINVAL;
 
 	ret = kvm_vcpu_init_check_features(vcpu, init);
 	if (ret)
 		return ret;
 
-	if (vcpu->arch.target == -1)
+	if (!kvm_vcpu_initialized(vcpu))
 		return __kvm_vcpu_set_target(vcpu, init);
 
 	if (kvm_vcpu_init_changed(vcpu, init))
@@ -1532,12 +1537,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-					const struct kvm_memory_slot *memslot)
-{
-	kvm_flush_remote_tlbs(kvm);
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 					struct kvm_arm_device_addr *dev_addr)
 {
@@ -1595,9 +1594,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
 	}
 	case KVM_ARM_PREFERRED_TARGET: {
-		struct kvm_vcpu_init init;
-
-		kvm_vcpu_preferred_target(&init);
+		struct kvm_vcpu_init init = {
+			.target = KVM_ARM_TARGET_GENERIC_V8,
+		};
 
 		if (copy_to_user(argp, &init, sizeof(init)))
 			return -EFAULT;
@@ -2276,30 +2275,8 @@ static int __init init_hyp_mode(void)
 	for_each_possible_cpu(cpu) {
 		struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
 		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
-		unsigned long hyp_addr;
 
-		/*
-		 * Allocate a contiguous HYP private VA range for the stack
-		 * and guard page. The allocation is also aligned based on
-		 * the order of its size.
-		 */
-		err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
-		if (err) {
-			kvm_err("Cannot allocate hyp stack guard page\n");
-			goto out_err;
-		}
-
-		/*
-		 * Since the stack grows downwards, map the stack to the page
-		 * at the higher address and leave the lower guard page
-		 * unbacked.
-		 *
-		 * Any valid stack address now has the PAGE_SHIFT bit as 1
-		 * and addresses corresponding to the guard page have the
-		 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-		 */
-		err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
-					    __pa(stack_page), PAGE_HYP);
+		err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
 		if (err) {
 			kvm_err("Cannot map hyp stack\n");
 			goto out_err;
@@ -2312,8 +2289,6 @@ static int __init init_hyp_mode(void)
 		 * has been mapped in the flexible private VA space.
 		 */
 		params->stack_pa = __pa(stack_page);
-
-		params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
 	}
 
 	for_each_possible_cpu(cpu) {
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index b96662029fb1..9ced1bf0c2b7 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -14,6 +14,1858 @@
 
 #include "trace.h"
 
+enum trap_behaviour {
+	BEHAVE_HANDLE_LOCALLY	= 0,
+	BEHAVE_FORWARD_READ	= BIT(0),
+	BEHAVE_FORWARD_WRITE	= BIT(1),
+	BEHAVE_FORWARD_ANY	= BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
+};
+
+struct trap_bits {
+	const enum vcpu_sysreg		index;
+	const enum trap_behaviour	behaviour;
+	const u64			value;
+	const u64			mask;
+};
+
+/* Coarse Grained Trap definitions */
+enum cgt_group_id {
+	/* Indicates no coarse trap control */
+	__RESERVED__,
+
+	/*
+	 * The first batch of IDs denote coarse trapping that are used
+	 * on their own instead of being part of a combination of
+	 * trap controls.
+	 */
+	CGT_HCR_TID1,
+	CGT_HCR_TID2,
+	CGT_HCR_TID3,
+	CGT_HCR_IMO,
+	CGT_HCR_FMO,
+	CGT_HCR_TIDCP,
+	CGT_HCR_TACR,
+	CGT_HCR_TSW,
+	CGT_HCR_TPC,
+	CGT_HCR_TPU,
+	CGT_HCR_TTLB,
+	CGT_HCR_TVM,
+	CGT_HCR_TDZ,
+	CGT_HCR_TRVM,
+	CGT_HCR_TLOR,
+	CGT_HCR_TERR,
+	CGT_HCR_APK,
+	CGT_HCR_NV,
+	CGT_HCR_NV_nNV2,
+	CGT_HCR_NV1_nNV2,
+	CGT_HCR_AT,
+	CGT_HCR_nFIEN,
+	CGT_HCR_TID4,
+	CGT_HCR_TICAB,
+	CGT_HCR_TOCU,
+	CGT_HCR_ENSCXT,
+	CGT_HCR_TTLBIS,
+	CGT_HCR_TTLBOS,
+
+	CGT_MDCR_TPMCR,
+	CGT_MDCR_TPM,
+	CGT_MDCR_TDE,
+	CGT_MDCR_TDA,
+	CGT_MDCR_TDOSA,
+	CGT_MDCR_TDRA,
+	CGT_MDCR_E2PB,
+	CGT_MDCR_TPMS,
+	CGT_MDCR_TTRF,
+	CGT_MDCR_E2TB,
+	CGT_MDCR_TDCC,
+
+	/*
+	 * Anything after this point is a combination of coarse trap
+	 * controls, which must all be evaluated to decide what to do.
+	 */
+	__MULTIPLE_CONTROL_BITS__,
+	CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
+	CGT_HCR_TID2_TID4,
+	CGT_HCR_TTLB_TTLBIS,
+	CGT_HCR_TTLB_TTLBOS,
+	CGT_HCR_TVM_TRVM,
+	CGT_HCR_TPU_TICAB,
+	CGT_HCR_TPU_TOCU,
+	CGT_HCR_NV1_nNV2_ENSCXT,
+	CGT_MDCR_TPM_TPMCR,
+	CGT_MDCR_TDE_TDA,
+	CGT_MDCR_TDE_TDOSA,
+	CGT_MDCR_TDE_TDRA,
+	CGT_MDCR_TDCC_TDE_TDA,
+
+	/*
+	 * Anything after this point requires a callback evaluating a
+	 * complex trap condition. Ugly stuff.
+	 */
+	__COMPLEX_CONDITIONS__,
+	CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__,
+	CGT_CNTHCTL_EL1PTEN,
+
+	/* Must be last */
+	__NR_CGT_GROUP_IDS__
+};
+
+static const struct trap_bits coarse_trap_bits[] = {
+	[CGT_HCR_TID1] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_TID1,
+		.mask		= HCR_TID1,
+		.behaviour	= BEHAVE_FORWARD_READ,
+	},
+	[CGT_HCR_TID2] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_TID2,
+		.mask		= HCR_TID2,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TID3] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_TID3,
+		.mask		= HCR_TID3,
+		.behaviour	= BEHAVE_FORWARD_READ,
+	},
+	[CGT_HCR_IMO] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_IMO,
+		.mask		= HCR_IMO,
+		.behaviour	= BEHAVE_FORWARD_WRITE,
+	},
+	[CGT_HCR_FMO] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_FMO,
+		.mask		= HCR_FMO,
+		.behaviour	= BEHAVE_FORWARD_WRITE,
+	},
+	[CGT_HCR_TIDCP] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TIDCP,
+		.mask		= HCR_TIDCP,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TACR] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TACR,
+		.mask		= HCR_TACR,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TSW] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TSW,
+		.mask		= HCR_TSW,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TPC] = { /* Also called TCPC when FEAT_DPB is implemented */
+		.index		= HCR_EL2,
+		.value		= HCR_TPC,
+		.mask		= HCR_TPC,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TPU] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TPU,
+		.mask		= HCR_TPU,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TTLB] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TTLB,
+		.mask		= HCR_TTLB,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TVM] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TVM,
+		.mask		= HCR_TVM,
+		.behaviour	= BEHAVE_FORWARD_WRITE,
+	},
+	[CGT_HCR_TDZ] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TDZ,
+		.mask		= HCR_TDZ,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TRVM] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TRVM,
+		.mask		= HCR_TRVM,
+		.behaviour	= BEHAVE_FORWARD_READ,
+	},
+	[CGT_HCR_TLOR] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TLOR,
+		.mask		= HCR_TLOR,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TERR] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TERR,
+		.mask		= HCR_TERR,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_APK] = {
+		.index		= HCR_EL2,
+		.value		= 0,
+		.mask		= HCR_APK,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_NV] = {
+		.index		= HCR_EL2,
+		.value		= HCR_NV,
+		.mask		= HCR_NV,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_NV_nNV2] = {
+		.index		= HCR_EL2,
+		.value		= HCR_NV,
+		.mask		= HCR_NV | HCR_NV2,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_NV1_nNV2] = {
+		.index		= HCR_EL2,
+		.value		= HCR_NV | HCR_NV1,
+		.mask		= HCR_NV | HCR_NV1 | HCR_NV2,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_AT] = {
+		.index		= HCR_EL2,
+		.value		= HCR_AT,
+		.mask		= HCR_AT,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_nFIEN] = {
+		.index		= HCR_EL2,
+		.value		= 0,
+		.mask		= HCR_FIEN,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TID4] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_TID4,
+		.mask		= HCR_TID4,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TICAB] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_TICAB,
+		.mask		= HCR_TICAB,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TOCU] = {
+		.index		= HCR_EL2,
+		.value 		= HCR_TOCU,
+		.mask		= HCR_TOCU,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_ENSCXT] = {
+		.index		= HCR_EL2,
+		.value 		= 0,
+		.mask		= HCR_ENSCXT,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TTLBIS] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TTLBIS,
+		.mask		= HCR_TTLBIS,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_HCR_TTLBOS] = {
+		.index		= HCR_EL2,
+		.value		= HCR_TTLBOS,
+		.mask		= HCR_TTLBOS,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TPMCR] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TPMCR,
+		.mask		= MDCR_EL2_TPMCR,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TPM] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TPM,
+		.mask		= MDCR_EL2_TPM,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TDE] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TDE,
+		.mask		= MDCR_EL2_TDE,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TDA] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TDA,
+		.mask		= MDCR_EL2_TDA,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TDOSA] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TDOSA,
+		.mask		= MDCR_EL2_TDOSA,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TDRA] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TDRA,
+		.mask		= MDCR_EL2_TDRA,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_E2PB] = {
+		.index		= MDCR_EL2,
+		.value		= 0,
+		.mask		= BIT(MDCR_EL2_E2PB_SHIFT),
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TPMS] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TPMS,
+		.mask		= MDCR_EL2_TPMS,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TTRF] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TTRF,
+		.mask		= MDCR_EL2_TTRF,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_E2TB] = {
+		.index		= MDCR_EL2,
+		.value		= 0,
+		.mask		= BIT(MDCR_EL2_E2TB_SHIFT),
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+	[CGT_MDCR_TDCC] = {
+		.index		= MDCR_EL2,
+		.value		= MDCR_EL2_TDCC,
+		.mask		= MDCR_EL2_TDCC,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
+};
+
+#define MCB(id, ...)						\
+	[id - __MULTIPLE_CONTROL_BITS__]	=		\
+		(const enum cgt_group_id[]){			\
+		__VA_ARGS__, __RESERVED__			\
+		}
+
+static const enum cgt_group_id *coarse_control_combo[] = {
+	MCB(CGT_HCR_IMO_FMO,		CGT_HCR_IMO, CGT_HCR_FMO),
+	MCB(CGT_HCR_TID2_TID4,		CGT_HCR_TID2, CGT_HCR_TID4),
+	MCB(CGT_HCR_TTLB_TTLBIS,	CGT_HCR_TTLB, CGT_HCR_TTLBIS),
+	MCB(CGT_HCR_TTLB_TTLBOS,	CGT_HCR_TTLB, CGT_HCR_TTLBOS),
+	MCB(CGT_HCR_TVM_TRVM,		CGT_HCR_TVM, CGT_HCR_TRVM),
+	MCB(CGT_HCR_TPU_TICAB,		CGT_HCR_TPU, CGT_HCR_TICAB),
+	MCB(CGT_HCR_TPU_TOCU,		CGT_HCR_TPU, CGT_HCR_TOCU),
+	MCB(CGT_HCR_NV1_nNV2_ENSCXT,	CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
+	MCB(CGT_MDCR_TPM_TPMCR,		CGT_MDCR_TPM, CGT_MDCR_TPMCR),
+	MCB(CGT_MDCR_TDE_TDA,		CGT_MDCR_TDE, CGT_MDCR_TDA),
+	MCB(CGT_MDCR_TDE_TDOSA,		CGT_MDCR_TDE, CGT_MDCR_TDOSA),
+	MCB(CGT_MDCR_TDE_TDRA,		CGT_MDCR_TDE, CGT_MDCR_TDRA),
+	MCB(CGT_MDCR_TDCC_TDE_TDA,	CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
+};
+
+typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
+
+/*
+ * Warning, maximum confusion ahead.
+ *
+ * When E2H=0, CNTHCTL_EL2[1:0] are defined as EL1PCEN:EL1PCTEN
+ * When E2H=1, CNTHCTL_EL2[11:10] are defined as EL1PTEN:EL1PCTEN
+ *
+ * Note the single letter difference? Yet, the bits have the same
+ * function despite a different layout and a different name.
+ *
+ * We don't try to reconcile this mess. We just use the E2H=0 bits
+ * to generate something that is in the E2H=1 format, and live with
+ * it. You're welcome.
+ */
+static u64 get_sanitized_cnthctl(struct kvm_vcpu *vcpu)
+{
+	u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+	if (!vcpu_el2_e2h_is_set(vcpu))
+		val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
+
+	return val & ((CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN) << 10);
+}
+
+static enum trap_behaviour check_cnthctl_el1pcten(struct kvm_vcpu *vcpu)
+{
+	if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCTEN << 10))
+		return BEHAVE_HANDLE_LOCALLY;
+
+	return BEHAVE_FORWARD_ANY;
+}
+
+static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
+{
+	if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCEN << 10))
+		return BEHAVE_HANDLE_LOCALLY;
+
+	return BEHAVE_FORWARD_ANY;
+}
+
+#define CCC(id, fn)				\
+	[id - __COMPLEX_CONDITIONS__] = fn
+
+static const complex_condition_check ccc[] = {
+	CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
+	CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
+};
+
+/*
+ * Bit assignment for the trap controls. We use a 64bit word with the
+ * following layout for each trapped sysreg:
+ *
+ * [9:0]	enum cgt_group_id (10 bits)
+ * [13:10]	enum fgt_group_id (4 bits)
+ * [19:14]	bit number in the FGT register (6 bits)
+ * [20]		trap polarity (1 bit)
+ * [25:21]	FG filter (5 bits)
+ * [62:26]	Unused (37 bits)
+ * [63]		RES0 - Must be zero, as lost on insertion in the xarray
+ */
+#define TC_CGT_BITS	10
+#define TC_FGT_BITS	4
+#define TC_FGF_BITS	5
+
+union trap_config {
+	u64	val;
+	struct {
+		unsigned long	cgt:TC_CGT_BITS; /* Coarse Grained Trap id */
+		unsigned long	fgt:TC_FGT_BITS; /* Fine Grained Trap id */
+		unsigned long	bit:6;		 /* Bit number */
+		unsigned long	pol:1;		 /* Polarity */
+		unsigned long	fgf:TC_FGF_BITS; /* Fine Grained Filter */
+		unsigned long	unused:37;	 /* Unused, should be zero */
+		unsigned long	mbz:1;		 /* Must Be Zero */
+	};
+};
+
+struct encoding_to_trap_config {
+	const u32			encoding;
+	const u32			end;
+	const union trap_config		tc;
+	const unsigned int		line;
+};
+
+#define SR_RANGE_TRAP(sr_start, sr_end, trap_id)			\
+	{								\
+		.encoding	= sr_start,				\
+		.end		= sr_end,				\
+		.tc		= {					\
+			.cgt		= trap_id,			\
+		},							\
+		.line = __LINE__,					\
+	}
+
+#define SR_TRAP(sr, trap_id)		SR_RANGE_TRAP(sr, sr, trap_id)
+
+/*
+ * Map encoding to trap bits for exception reported with EC=0x18.
+ * These must only be evaluated when running a nested hypervisor, but
+ * that the current context is not a hypervisor context. When the
+ * trapped access matches one of the trap controls, the exception is
+ * re-injected in the nested hypervisor.
+ */
+static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
+	SR_TRAP(SYS_REVIDR_EL1,		CGT_HCR_TID1),
+	SR_TRAP(SYS_AIDR_EL1,		CGT_HCR_TID1),
+	SR_TRAP(SYS_SMIDR_EL1,		CGT_HCR_TID1),
+	SR_TRAP(SYS_CTR_EL0,		CGT_HCR_TID2),
+	SR_TRAP(SYS_CCSIDR_EL1,		CGT_HCR_TID2_TID4),
+	SR_TRAP(SYS_CCSIDR2_EL1,	CGT_HCR_TID2_TID4),
+	SR_TRAP(SYS_CLIDR_EL1,		CGT_HCR_TID2_TID4),
+	SR_TRAP(SYS_CSSELR_EL1,		CGT_HCR_TID2_TID4),
+	SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
+		      sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
+	SR_TRAP(SYS_ICC_SGI0R_EL1,	CGT_HCR_IMO_FMO),
+	SR_TRAP(SYS_ICC_ASGI1R_EL1,	CGT_HCR_IMO_FMO),
+	SR_TRAP(SYS_ICC_SGI1R_EL1,	CGT_HCR_IMO_FMO),
+	SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
+		      sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
+		      sys_reg(3, 1, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 2, 11, 0, 0),
+		      sys_reg(3, 2, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 3, 11, 0, 0),
+		      sys_reg(3, 3, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 4, 11, 0, 0),
+		      sys_reg(3, 4, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 5, 11, 0, 0),
+		      sys_reg(3, 5, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 6, 11, 0, 0),
+		      sys_reg(3, 6, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 7, 11, 0, 0),
+		      sys_reg(3, 7, 11, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 0, 15, 0, 0),
+		      sys_reg(3, 0, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 1, 15, 0, 0),
+		      sys_reg(3, 1, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 2, 15, 0, 0),
+		      sys_reg(3, 2, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 3, 15, 0, 0),
+		      sys_reg(3, 3, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 4, 15, 0, 0),
+		      sys_reg(3, 4, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 5, 15, 0, 0),
+		      sys_reg(3, 5, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 6, 15, 0, 0),
+		      sys_reg(3, 6, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_RANGE_TRAP(sys_reg(3, 7, 15, 0, 0),
+		      sys_reg(3, 7, 15, 15, 7), CGT_HCR_TIDCP),
+	SR_TRAP(SYS_ACTLR_EL1,		CGT_HCR_TACR),
+	SR_TRAP(SYS_DC_ISW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_CSW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_CISW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_IGSW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_IGDSW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_CGSW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_CGDSW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_CIGSW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_CIGDSW,		CGT_HCR_TSW),
+	SR_TRAP(SYS_DC_CIVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CVAP,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CVADP,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_IVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CIGVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CIGDVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_IGVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_IGDVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CGVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CGDVAC,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CGVAP,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CGDVAP,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CGVADP,		CGT_HCR_TPC),
+	SR_TRAP(SYS_DC_CGDVADP,		CGT_HCR_TPC),
+	SR_TRAP(SYS_IC_IVAU,		CGT_HCR_TPU_TOCU),
+	SR_TRAP(SYS_IC_IALLU,		CGT_HCR_TPU_TOCU),
+	SR_TRAP(SYS_IC_IALLUIS,		CGT_HCR_TPU_TICAB),
+	SR_TRAP(SYS_DC_CVAU,		CGT_HCR_TPU_TOCU),
+	SR_TRAP(OP_TLBI_RVAE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVAAE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVALE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVAALE1,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VMALLE1,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VAE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_ASIDE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VAAE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VALE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VAALE1,		CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVAE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVAAE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVALE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVAALE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VMALLE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VAE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_ASIDE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VAAE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VALE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_VAALE1NXS,	CGT_HCR_TTLB),
+	SR_TRAP(OP_TLBI_RVAE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_RVAAE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_RVALE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_RVAALE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VMALLE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VAE1IS,		CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_ASIDE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VAAE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VALE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VAALE1IS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_RVAE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_RVAAE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_RVALE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_RVAALE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VMALLE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VAE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_ASIDE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VAAE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VALE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VAALE1ISNXS,	CGT_HCR_TTLB_TTLBIS),
+	SR_TRAP(OP_TLBI_VMALLE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VAE1OS,		CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_ASIDE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VAAE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VALE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VAALE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVAE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVAAE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVALE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVAALE1OS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VMALLE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VAE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_ASIDE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VAAE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VALE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_VAALE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVAE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVAAE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVALE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(OP_TLBI_RVAALE1OSNXS,	CGT_HCR_TTLB_TTLBOS),
+	SR_TRAP(SYS_SCTLR_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_TTBR0_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_TTBR1_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_TCR_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_ESR_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_FAR_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_AFSR0_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_AFSR1_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_MAIR_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_AMAIR_EL1,		CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_CONTEXTIDR_EL1,	CGT_HCR_TVM_TRVM),
+	SR_TRAP(SYS_DC_ZVA,		CGT_HCR_TDZ),
+	SR_TRAP(SYS_DC_GVA,		CGT_HCR_TDZ),
+	SR_TRAP(SYS_DC_GZVA,		CGT_HCR_TDZ),
+	SR_TRAP(SYS_LORSA_EL1,		CGT_HCR_TLOR),
+	SR_TRAP(SYS_LOREA_EL1, 		CGT_HCR_TLOR),
+	SR_TRAP(SYS_LORN_EL1, 		CGT_HCR_TLOR),
+	SR_TRAP(SYS_LORC_EL1, 		CGT_HCR_TLOR),
+	SR_TRAP(SYS_LORID_EL1,		CGT_HCR_TLOR),
+	SR_TRAP(SYS_ERRIDR_EL1,		CGT_HCR_TERR),
+	SR_TRAP(SYS_ERRSELR_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXADDR_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXCTLR_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXFR_EL1,		CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXMISC0_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXMISC1_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXMISC2_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXMISC3_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_ERXSTATUS_EL1,	CGT_HCR_TERR),
+	SR_TRAP(SYS_APIAKEYLO_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APIAKEYHI_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APIBKEYLO_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APIBKEYHI_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APDAKEYLO_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APDAKEYHI_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APDBKEYLO_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APDBKEYHI_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APGAKEYLO_EL1,	CGT_HCR_APK),
+	SR_TRAP(SYS_APGAKEYHI_EL1,	CGT_HCR_APK),
+	/* All _EL2 registers */
+	SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0),
+		      sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV),
+	/* Skip the SP_EL1 encoding... */
+	SR_TRAP(SYS_SPSR_EL2,		CGT_HCR_NV),
+	SR_TRAP(SYS_ELR_EL2,		CGT_HCR_NV),
+	SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1),
+		      sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV),
+	SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0),
+		      sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV),
+	/* All _EL02, _EL12 registers */
+	SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
+		      sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
+	SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0),
+		      sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV),
+	SR_TRAP(OP_AT_S1E2R,		CGT_HCR_NV),
+	SR_TRAP(OP_AT_S1E2W,		CGT_HCR_NV),
+	SR_TRAP(OP_AT_S12E1R,		CGT_HCR_NV),
+	SR_TRAP(OP_AT_S12E1W,		CGT_HCR_NV),
+	SR_TRAP(OP_AT_S12E0R,		CGT_HCR_NV),
+	SR_TRAP(OP_AT_S12E0W,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2E1,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2E1,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2LE1,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2LE1,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVAE2,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVALE2,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE2,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VAE2,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE1,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VALE2,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VMALLS12E1,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2E1NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2E1NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2LE1NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2LE1NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVAE2NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVALE2NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE2NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VAE2NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE1NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VALE2NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VMALLS12E1NXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2E1IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2E1IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2LE1IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2LE1IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVAE2IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVALE2IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE2IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VAE2IS,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE1IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VALE2IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VMALLS12E1IS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2E1ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2E1ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2LE1ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2LE1ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVAE2ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVALE2ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE2ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VAE2ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE1ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VALE2ISNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VMALLS12E1ISNXS,CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE2OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VAE2OS,		CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE1OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VALE2OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VMALLS12E1OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2E1OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2E1OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2LE1OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2LE1OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVAE2OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVALE2OS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE2OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VAE2OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_ALLE1OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VALE2OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_VMALLS12E1OSNXS,CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2E1OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2E1OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_IPAS2LE1OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RIPAS2LE1OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVAE2OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_TLBI_RVALE2OSNXS,	CGT_HCR_NV),
+	SR_TRAP(OP_CPP_RCTX, 		CGT_HCR_NV),
+	SR_TRAP(OP_DVP_RCTX, 		CGT_HCR_NV),
+	SR_TRAP(OP_CFP_RCTX, 		CGT_HCR_NV),
+	SR_TRAP(SYS_SP_EL1,		CGT_HCR_NV_nNV2),
+	SR_TRAP(SYS_VBAR_EL1,		CGT_HCR_NV1_nNV2),
+	SR_TRAP(SYS_ELR_EL1,		CGT_HCR_NV1_nNV2),
+	SR_TRAP(SYS_SPSR_EL1,		CGT_HCR_NV1_nNV2),
+	SR_TRAP(SYS_SCXTNUM_EL1,	CGT_HCR_NV1_nNV2_ENSCXT),
+	SR_TRAP(SYS_SCXTNUM_EL0,	CGT_HCR_ENSCXT),
+	SR_TRAP(OP_AT_S1E1R, 		CGT_HCR_AT),
+	SR_TRAP(OP_AT_S1E1W, 		CGT_HCR_AT),
+	SR_TRAP(OP_AT_S1E0R, 		CGT_HCR_AT),
+	SR_TRAP(OP_AT_S1E0W, 		CGT_HCR_AT),
+	SR_TRAP(OP_AT_S1E1RP, 		CGT_HCR_AT),
+	SR_TRAP(OP_AT_S1E1WP, 		CGT_HCR_AT),
+	SR_TRAP(SYS_ERXPFGF_EL1,	CGT_HCR_nFIEN),
+	SR_TRAP(SYS_ERXPFGCTL_EL1,	CGT_HCR_nFIEN),
+	SR_TRAP(SYS_ERXPFGCDN_EL1,	CGT_HCR_nFIEN),
+	SR_TRAP(SYS_PMCR_EL0,		CGT_MDCR_TPM_TPMCR),
+	SR_TRAP(SYS_PMCNTENSET_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMCNTENCLR_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMOVSSET_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMOVSCLR_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMCEID0_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMCEID1_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMXEVTYPER_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMSWINC_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMSELR_EL0,		CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMXEVCNTR_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMCCNTR_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMUSERENR_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMINTENSET_EL1,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMINTENCLR_EL1,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMMIR_EL1,		CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(0),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(1),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(2),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(3),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(4),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(5),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(6),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(7),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(8),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(9),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(10),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(11),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(12),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(13),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(14),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(15),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(16),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(17),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(18),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(19),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(20),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(21),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(22),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(23),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(24),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(25),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(26),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(27),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(28),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(29),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVCNTRn_EL0(30),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(0),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(1),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(2),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(3),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(4),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(5),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(6),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(7),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(8),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(9),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(10),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(11),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(12),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(13),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(14),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(15),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(16),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(17),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(18),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(19),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(20),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(21),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(22),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(23),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(24),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(25),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(26),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(27),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(28),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(29),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMEVTYPERn_EL0(30),	CGT_MDCR_TPM),
+	SR_TRAP(SYS_PMCCFILTR_EL0,	CGT_MDCR_TPM),
+	SR_TRAP(SYS_MDCCSR_EL0,		CGT_MDCR_TDCC_TDE_TDA),
+	SR_TRAP(SYS_MDCCINT_EL1,	CGT_MDCR_TDCC_TDE_TDA),
+	SR_TRAP(SYS_OSDTRRX_EL1,	CGT_MDCR_TDCC_TDE_TDA),
+	SR_TRAP(SYS_OSDTRTX_EL1,	CGT_MDCR_TDCC_TDE_TDA),
+	SR_TRAP(SYS_DBGDTR_EL0,		CGT_MDCR_TDCC_TDE_TDA),
+	/*
+	 * Also covers DBGDTRRX_EL0, which has the same encoding as
+	 * SYS_DBGDTRTX_EL0...
+	 */
+	SR_TRAP(SYS_DBGDTRTX_EL0,	CGT_MDCR_TDCC_TDE_TDA),
+	SR_TRAP(SYS_MDSCR_EL1,		CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_OSECCR_EL1,		CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(0),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(1),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(2),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(3),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(4),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(5),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(6),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(7),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(8),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(9),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(10),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(11),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(12),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(13),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(14),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBVRn_EL1(15),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(0),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(1),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(2),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(3),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(4),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(5),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(6),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(7),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(8),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(9),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(10),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(11),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(12),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(13),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(14),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGBCRn_EL1(15),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(0),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(1),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(2),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(3),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(4),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(5),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(6),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(7),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(8),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(9),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(10),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(11),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(12),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(13),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(14),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWVRn_EL1(15),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(0),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(1),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(2),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(3),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(4),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(5),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(6),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(7),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(8),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(9),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(10),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(11),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(12),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(13),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGWCRn_EL1(14),	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGCLAIMSET_EL1,	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGCLAIMCLR_EL1,	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_DBGAUTHSTATUS_EL1,	CGT_MDCR_TDE_TDA),
+	SR_TRAP(SYS_OSLAR_EL1,		CGT_MDCR_TDE_TDOSA),
+	SR_TRAP(SYS_OSLSR_EL1,		CGT_MDCR_TDE_TDOSA),
+	SR_TRAP(SYS_OSDLR_EL1,		CGT_MDCR_TDE_TDOSA),
+	SR_TRAP(SYS_DBGPRCR_EL1,	CGT_MDCR_TDE_TDOSA),
+	SR_TRAP(SYS_MDRAR_EL1,		CGT_MDCR_TDE_TDRA),
+	SR_TRAP(SYS_PMBLIMITR_EL1,	CGT_MDCR_E2PB),
+	SR_TRAP(SYS_PMBPTR_EL1,		CGT_MDCR_E2PB),
+	SR_TRAP(SYS_PMBSR_EL1,		CGT_MDCR_E2PB),
+	SR_TRAP(SYS_PMSCR_EL1,		CGT_MDCR_TPMS),
+	SR_TRAP(SYS_PMSEVFR_EL1,	CGT_MDCR_TPMS),
+	SR_TRAP(SYS_PMSFCR_EL1,		CGT_MDCR_TPMS),
+	SR_TRAP(SYS_PMSICR_EL1,		CGT_MDCR_TPMS),
+	SR_TRAP(SYS_PMSIDR_EL1,		CGT_MDCR_TPMS),
+	SR_TRAP(SYS_PMSIRR_EL1,		CGT_MDCR_TPMS),
+	SR_TRAP(SYS_PMSLATFR_EL1,	CGT_MDCR_TPMS),
+	SR_TRAP(SYS_PMSNEVFR_EL1,	CGT_MDCR_TPMS),
+	SR_TRAP(SYS_TRFCR_EL1,		CGT_MDCR_TTRF),
+	SR_TRAP(SYS_TRBBASER_EL1,	CGT_MDCR_E2TB),
+	SR_TRAP(SYS_TRBLIMITR_EL1,	CGT_MDCR_E2TB),
+	SR_TRAP(SYS_TRBMAR_EL1, 	CGT_MDCR_E2TB),
+	SR_TRAP(SYS_TRBPTR_EL1, 	CGT_MDCR_E2TB),
+	SR_TRAP(SYS_TRBSR_EL1, 		CGT_MDCR_E2TB),
+	SR_TRAP(SYS_TRBTRG_EL1,		CGT_MDCR_E2TB),
+	SR_TRAP(SYS_CNTP_TVAL_EL0,	CGT_CNTHCTL_EL1PTEN),
+	SR_TRAP(SYS_CNTP_CVAL_EL0,	CGT_CNTHCTL_EL1PTEN),
+	SR_TRAP(SYS_CNTP_CTL_EL0,	CGT_CNTHCTL_EL1PTEN),
+	SR_TRAP(SYS_CNTPCT_EL0,		CGT_CNTHCTL_EL1PCTEN),
+	SR_TRAP(SYS_CNTPCTSS_EL0,	CGT_CNTHCTL_EL1PCTEN),
+};
+
+static DEFINE_XARRAY(sr_forward_xa);
+
+enum fgt_group_id {
+	__NO_FGT_GROUP__,
+	HFGxTR_GROUP,
+	HDFGRTR_GROUP,
+	HDFGWTR_GROUP,
+	HFGITR_GROUP,
+
+	/* Must be last */
+	__NR_FGT_GROUP_IDS__
+};
+
+enum fg_filter_id {
+	__NO_FGF__,
+	HCRX_FGTnXS,
+
+	/* Must be last */
+	__NR_FG_FILTER_IDS__
+};
+
+#define SR_FGF(sr, g, b, p, f)					\
+	{							\
+		.encoding	= sr,				\
+		.end		= sr,				\
+		.tc		= {				\
+			.fgt = g ## _GROUP,			\
+			.bit = g ## _EL2_ ## b ## _SHIFT,	\
+			.pol = p,				\
+			.fgf = f,				\
+		},						\
+		.line = __LINE__,				\
+	}
+
+#define SR_FGT(sr, g, b, p)	SR_FGF(sr, g, b, p, __NO_FGF__)
+
+static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
+	/* HFGRTR_EL2, HFGWTR_EL2 */
+	SR_FGT(SYS_TPIDR2_EL0,		HFGxTR, nTPIDR2_EL0, 0),
+	SR_FGT(SYS_SMPRI_EL1,		HFGxTR, nSMPRI_EL1, 0),
+	SR_FGT(SYS_ACCDATA_EL1,		HFGxTR, nACCDATA_EL1, 0),
+	SR_FGT(SYS_ERXADDR_EL1,		HFGxTR, ERXADDR_EL1, 1),
+	SR_FGT(SYS_ERXPFGCDN_EL1,	HFGxTR, ERXPFGCDN_EL1, 1),
+	SR_FGT(SYS_ERXPFGCTL_EL1,	HFGxTR, ERXPFGCTL_EL1, 1),
+	SR_FGT(SYS_ERXPFGF_EL1,		HFGxTR, ERXPFGF_EL1, 1),
+	SR_FGT(SYS_ERXMISC0_EL1,	HFGxTR, ERXMISCn_EL1, 1),
+	SR_FGT(SYS_ERXMISC1_EL1,	HFGxTR, ERXMISCn_EL1, 1),
+	SR_FGT(SYS_ERXMISC2_EL1,	HFGxTR, ERXMISCn_EL1, 1),
+	SR_FGT(SYS_ERXMISC3_EL1,	HFGxTR, ERXMISCn_EL1, 1),
+	SR_FGT(SYS_ERXSTATUS_EL1,	HFGxTR, ERXSTATUS_EL1, 1),
+	SR_FGT(SYS_ERXCTLR_EL1,		HFGxTR, ERXCTLR_EL1, 1),
+	SR_FGT(SYS_ERXFR_EL1,		HFGxTR, ERXFR_EL1, 1),
+	SR_FGT(SYS_ERRSELR_EL1,		HFGxTR, ERRSELR_EL1, 1),
+	SR_FGT(SYS_ERRIDR_EL1,		HFGxTR, ERRIDR_EL1, 1),
+	SR_FGT(SYS_ICC_IGRPEN0_EL1,	HFGxTR, ICC_IGRPENn_EL1, 1),
+	SR_FGT(SYS_ICC_IGRPEN1_EL1,	HFGxTR, ICC_IGRPENn_EL1, 1),
+	SR_FGT(SYS_VBAR_EL1,		HFGxTR, VBAR_EL1, 1),
+	SR_FGT(SYS_TTBR1_EL1,		HFGxTR, TTBR1_EL1, 1),
+	SR_FGT(SYS_TTBR0_EL1,		HFGxTR, TTBR0_EL1, 1),
+	SR_FGT(SYS_TPIDR_EL0,		HFGxTR, TPIDR_EL0, 1),
+	SR_FGT(SYS_TPIDRRO_EL0,		HFGxTR, TPIDRRO_EL0, 1),
+	SR_FGT(SYS_TPIDR_EL1,		HFGxTR, TPIDR_EL1, 1),
+	SR_FGT(SYS_TCR_EL1,		HFGxTR, TCR_EL1, 1),
+	SR_FGT(SYS_SCXTNUM_EL0,		HFGxTR, SCXTNUM_EL0, 1),
+	SR_FGT(SYS_SCXTNUM_EL1, 	HFGxTR, SCXTNUM_EL1, 1),
+	SR_FGT(SYS_SCTLR_EL1, 		HFGxTR, SCTLR_EL1, 1),
+	SR_FGT(SYS_REVIDR_EL1, 		HFGxTR, REVIDR_EL1, 1),
+	SR_FGT(SYS_PAR_EL1, 		HFGxTR, PAR_EL1, 1),
+	SR_FGT(SYS_MPIDR_EL1, 		HFGxTR, MPIDR_EL1, 1),
+	SR_FGT(SYS_MIDR_EL1, 		HFGxTR, MIDR_EL1, 1),
+	SR_FGT(SYS_MAIR_EL1, 		HFGxTR, MAIR_EL1, 1),
+	SR_FGT(SYS_LORSA_EL1, 		HFGxTR, LORSA_EL1, 1),
+	SR_FGT(SYS_LORN_EL1, 		HFGxTR, LORN_EL1, 1),
+	SR_FGT(SYS_LORID_EL1, 		HFGxTR, LORID_EL1, 1),
+	SR_FGT(SYS_LOREA_EL1, 		HFGxTR, LOREA_EL1, 1),
+	SR_FGT(SYS_LORC_EL1, 		HFGxTR, LORC_EL1, 1),
+	SR_FGT(SYS_ISR_EL1, 		HFGxTR, ISR_EL1, 1),
+	SR_FGT(SYS_FAR_EL1, 		HFGxTR, FAR_EL1, 1),
+	SR_FGT(SYS_ESR_EL1, 		HFGxTR, ESR_EL1, 1),
+	SR_FGT(SYS_DCZID_EL0, 		HFGxTR, DCZID_EL0, 1),
+	SR_FGT(SYS_CTR_EL0, 		HFGxTR, CTR_EL0, 1),
+	SR_FGT(SYS_CSSELR_EL1, 		HFGxTR, CSSELR_EL1, 1),
+	SR_FGT(SYS_CPACR_EL1, 		HFGxTR, CPACR_EL1, 1),
+	SR_FGT(SYS_CONTEXTIDR_EL1, 	HFGxTR, CONTEXTIDR_EL1, 1),
+	SR_FGT(SYS_CLIDR_EL1, 		HFGxTR, CLIDR_EL1, 1),
+	SR_FGT(SYS_CCSIDR_EL1, 		HFGxTR, CCSIDR_EL1, 1),
+	SR_FGT(SYS_APIBKEYLO_EL1, 	HFGxTR, APIBKey, 1),
+	SR_FGT(SYS_APIBKEYHI_EL1, 	HFGxTR, APIBKey, 1),
+	SR_FGT(SYS_APIAKEYLO_EL1, 	HFGxTR, APIAKey, 1),
+	SR_FGT(SYS_APIAKEYHI_EL1, 	HFGxTR, APIAKey, 1),
+	SR_FGT(SYS_APGAKEYLO_EL1, 	HFGxTR, APGAKey, 1),
+	SR_FGT(SYS_APGAKEYHI_EL1, 	HFGxTR, APGAKey, 1),
+	SR_FGT(SYS_APDBKEYLO_EL1, 	HFGxTR, APDBKey, 1),
+	SR_FGT(SYS_APDBKEYHI_EL1, 	HFGxTR, APDBKey, 1),
+	SR_FGT(SYS_APDAKEYLO_EL1, 	HFGxTR, APDAKey, 1),
+	SR_FGT(SYS_APDAKEYHI_EL1, 	HFGxTR, APDAKey, 1),
+	SR_FGT(SYS_AMAIR_EL1, 		HFGxTR, AMAIR_EL1, 1),
+	SR_FGT(SYS_AIDR_EL1, 		HFGxTR, AIDR_EL1, 1),
+	SR_FGT(SYS_AFSR1_EL1, 		HFGxTR, AFSR1_EL1, 1),
+	SR_FGT(SYS_AFSR0_EL1, 		HFGxTR, AFSR0_EL1, 1),
+	/* HFGITR_EL2 */
+	SR_FGT(OP_BRB_IALL, 		HFGITR, nBRBIALL, 0),
+	SR_FGT(OP_BRB_INJ, 		HFGITR, nBRBINJ, 0),
+	SR_FGT(SYS_DC_CVAC, 		HFGITR, DCCVAC, 1),
+	SR_FGT(SYS_DC_CGVAC, 		HFGITR, DCCVAC, 1),
+	SR_FGT(SYS_DC_CGDVAC, 		HFGITR, DCCVAC, 1),
+	SR_FGT(OP_CPP_RCTX, 		HFGITR, CPPRCTX, 1),
+	SR_FGT(OP_DVP_RCTX, 		HFGITR, DVPRCTX, 1),
+	SR_FGT(OP_CFP_RCTX, 		HFGITR, CFPRCTX, 1),
+	SR_FGT(OP_TLBI_VAALE1, 		HFGITR, TLBIVAALE1, 1),
+	SR_FGT(OP_TLBI_VALE1, 		HFGITR, TLBIVALE1, 1),
+	SR_FGT(OP_TLBI_VAAE1, 		HFGITR, TLBIVAAE1, 1),
+	SR_FGT(OP_TLBI_ASIDE1, 		HFGITR, TLBIASIDE1, 1),
+	SR_FGT(OP_TLBI_VAE1, 		HFGITR, TLBIVAE1, 1),
+	SR_FGT(OP_TLBI_VMALLE1, 	HFGITR, TLBIVMALLE1, 1),
+	SR_FGT(OP_TLBI_RVAALE1, 	HFGITR, TLBIRVAALE1, 1),
+	SR_FGT(OP_TLBI_RVALE1, 		HFGITR, TLBIRVALE1, 1),
+	SR_FGT(OP_TLBI_RVAAE1, 		HFGITR, TLBIRVAAE1, 1),
+	SR_FGT(OP_TLBI_RVAE1, 		HFGITR, TLBIRVAE1, 1),
+	SR_FGT(OP_TLBI_RVAALE1IS, 	HFGITR, TLBIRVAALE1IS, 1),
+	SR_FGT(OP_TLBI_RVALE1IS, 	HFGITR, TLBIRVALE1IS, 1),
+	SR_FGT(OP_TLBI_RVAAE1IS, 	HFGITR, TLBIRVAAE1IS, 1),
+	SR_FGT(OP_TLBI_RVAE1IS, 	HFGITR, TLBIRVAE1IS, 1),
+	SR_FGT(OP_TLBI_VAALE1IS, 	HFGITR, TLBIVAALE1IS, 1),
+	SR_FGT(OP_TLBI_VALE1IS, 	HFGITR, TLBIVALE1IS, 1),
+	SR_FGT(OP_TLBI_VAAE1IS, 	HFGITR, TLBIVAAE1IS, 1),
+	SR_FGT(OP_TLBI_ASIDE1IS, 	HFGITR, TLBIASIDE1IS, 1),
+	SR_FGT(OP_TLBI_VAE1IS, 		HFGITR, TLBIVAE1IS, 1),
+	SR_FGT(OP_TLBI_VMALLE1IS, 	HFGITR, TLBIVMALLE1IS, 1),
+	SR_FGT(OP_TLBI_RVAALE1OS, 	HFGITR, TLBIRVAALE1OS, 1),
+	SR_FGT(OP_TLBI_RVALE1OS, 	HFGITR, TLBIRVALE1OS, 1),
+	SR_FGT(OP_TLBI_RVAAE1OS, 	HFGITR, TLBIRVAAE1OS, 1),
+	SR_FGT(OP_TLBI_RVAE1OS, 	HFGITR, TLBIRVAE1OS, 1),
+	SR_FGT(OP_TLBI_VAALE1OS, 	HFGITR, TLBIVAALE1OS, 1),
+	SR_FGT(OP_TLBI_VALE1OS, 	HFGITR, TLBIVALE1OS, 1),
+	SR_FGT(OP_TLBI_VAAE1OS, 	HFGITR, TLBIVAAE1OS, 1),
+	SR_FGT(OP_TLBI_ASIDE1OS, 	HFGITR, TLBIASIDE1OS, 1),
+	SR_FGT(OP_TLBI_VAE1OS, 		HFGITR, TLBIVAE1OS, 1),
+	SR_FGT(OP_TLBI_VMALLE1OS, 	HFGITR, TLBIVMALLE1OS, 1),
+	/* nXS variants must be checked against HCRX_EL2.FGTnXS */
+	SR_FGF(OP_TLBI_VAALE1NXS, 	HFGITR, TLBIVAALE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VALE1NXS, 	HFGITR, TLBIVALE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAAE1NXS, 	HFGITR, TLBIVAAE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_ASIDE1NXS, 	HFGITR, TLBIASIDE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAE1NXS, 	HFGITR, TLBIVAE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VMALLE1NXS, 	HFGITR, TLBIVMALLE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAALE1NXS, 	HFGITR, TLBIRVAALE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVALE1NXS, 	HFGITR, TLBIRVALE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAAE1NXS, 	HFGITR, TLBIRVAAE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAE1NXS, 	HFGITR, TLBIRVAE1, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAALE1ISNXS, 	HFGITR, TLBIRVAALE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVALE1ISNXS, 	HFGITR, TLBIRVALE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAAE1ISNXS, 	HFGITR, TLBIRVAAE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAE1ISNXS, 	HFGITR, TLBIRVAE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAALE1ISNXS, 	HFGITR, TLBIVAALE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VALE1ISNXS, 	HFGITR, TLBIVALE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAAE1ISNXS, 	HFGITR, TLBIVAAE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_ASIDE1ISNXS, 	HFGITR, TLBIASIDE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAE1ISNXS, 	HFGITR, TLBIVAE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VMALLE1ISNXS, 	HFGITR, TLBIVMALLE1IS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAALE1OSNXS, 	HFGITR, TLBIRVAALE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVALE1OSNXS, 	HFGITR, TLBIRVALE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAAE1OSNXS, 	HFGITR, TLBIRVAAE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_RVAE1OSNXS, 	HFGITR, TLBIRVAE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAALE1OSNXS, 	HFGITR, TLBIVAALE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VALE1OSNXS, 	HFGITR, TLBIVALE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAAE1OSNXS, 	HFGITR, TLBIVAAE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_ASIDE1OSNXS, 	HFGITR, TLBIASIDE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VAE1OSNXS, 	HFGITR, TLBIVAE1OS, 1, HCRX_FGTnXS),
+	SR_FGF(OP_TLBI_VMALLE1OSNXS, 	HFGITR, TLBIVMALLE1OS, 1, HCRX_FGTnXS),
+	SR_FGT(OP_AT_S1E1WP, 		HFGITR, ATS1E1WP, 1),
+	SR_FGT(OP_AT_S1E1RP, 		HFGITR, ATS1E1RP, 1),
+	SR_FGT(OP_AT_S1E0W, 		HFGITR, ATS1E0W, 1),
+	SR_FGT(OP_AT_S1E0R, 		HFGITR, ATS1E0R, 1),
+	SR_FGT(OP_AT_S1E1W, 		HFGITR, ATS1E1W, 1),
+	SR_FGT(OP_AT_S1E1R, 		HFGITR, ATS1E1R, 1),
+	SR_FGT(SYS_DC_ZVA, 		HFGITR, DCZVA, 1),
+	SR_FGT(SYS_DC_GVA, 		HFGITR, DCZVA, 1),
+	SR_FGT(SYS_DC_GZVA, 		HFGITR, DCZVA, 1),
+	SR_FGT(SYS_DC_CIVAC, 		HFGITR, DCCIVAC, 1),
+	SR_FGT(SYS_DC_CIGVAC, 		HFGITR, DCCIVAC, 1),
+	SR_FGT(SYS_DC_CIGDVAC, 		HFGITR, DCCIVAC, 1),
+	SR_FGT(SYS_DC_CVADP, 		HFGITR, DCCVADP, 1),
+	SR_FGT(SYS_DC_CGVADP, 		HFGITR, DCCVADP, 1),
+	SR_FGT(SYS_DC_CGDVADP, 		HFGITR, DCCVADP, 1),
+	SR_FGT(SYS_DC_CVAP, 		HFGITR, DCCVAP, 1),
+	SR_FGT(SYS_DC_CGVAP, 		HFGITR, DCCVAP, 1),
+	SR_FGT(SYS_DC_CGDVAP, 		HFGITR, DCCVAP, 1),
+	SR_FGT(SYS_DC_CVAU, 		HFGITR, DCCVAU, 1),
+	SR_FGT(SYS_DC_CISW, 		HFGITR, DCCISW, 1),
+	SR_FGT(SYS_DC_CIGSW, 		HFGITR, DCCISW, 1),
+	SR_FGT(SYS_DC_CIGDSW, 		HFGITR, DCCISW, 1),
+	SR_FGT(SYS_DC_CSW, 		HFGITR, DCCSW, 1),
+	SR_FGT(SYS_DC_CGSW, 		HFGITR, DCCSW, 1),
+	SR_FGT(SYS_DC_CGDSW, 		HFGITR, DCCSW, 1),
+	SR_FGT(SYS_DC_ISW, 		HFGITR, DCISW, 1),
+	SR_FGT(SYS_DC_IGSW, 		HFGITR, DCISW, 1),
+	SR_FGT(SYS_DC_IGDSW, 		HFGITR, DCISW, 1),
+	SR_FGT(SYS_DC_IVAC, 		HFGITR, DCIVAC, 1),
+	SR_FGT(SYS_DC_IGVAC, 		HFGITR, DCIVAC, 1),
+	SR_FGT(SYS_DC_IGDVAC, 		HFGITR, DCIVAC, 1),
+	SR_FGT(SYS_IC_IVAU, 		HFGITR, ICIVAU, 1),
+	SR_FGT(SYS_IC_IALLU, 		HFGITR, ICIALLU, 1),
+	SR_FGT(SYS_IC_IALLUIS, 		HFGITR, ICIALLUIS, 1),
+	/* HDFGRTR_EL2 */
+	SR_FGT(SYS_PMBIDR_EL1, 		HDFGRTR, PMBIDR_EL1, 1),
+	SR_FGT(SYS_PMSNEVFR_EL1, 	HDFGRTR, nPMSNEVFR_EL1, 0),
+	SR_FGT(SYS_BRBINF_EL1(0), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(1), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(2), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(3), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(4), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(5), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(6), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(7), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(8), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(9), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(10), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(11), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(12), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(13), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(14), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(15), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(16), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(17), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(18), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(19), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(20), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(21), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(22), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(23), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(24), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(25), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(26), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(27), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(28), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(29), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(30), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINF_EL1(31), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBINFINJ_EL1, 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(0), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(1), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(2), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(3), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(4), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(5), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(6), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(7), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(8), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(9), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(10), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(11), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(12), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(13), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(14), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(15), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(16), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(17), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(18), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(19), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(20), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(21), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(22), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(23), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(24), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(25), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(26), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(27), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(28), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(29), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(30), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRC_EL1(31), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBSRCINJ_EL1, 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(0), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(1), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(2), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(3), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(4), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(5), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(6), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(7), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(8), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(9), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(10), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(11), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(12), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(13), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(14), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(15), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(16), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(17), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(18), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(19), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(20), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(21), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(22), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(23), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(24), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(25), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(26), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(27), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(28), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(29), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(30), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGT_EL1(31), 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTGTINJ_EL1, 	HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBTS_EL1, 		HDFGRTR, nBRBDATA, 0),
+	SR_FGT(SYS_BRBCR_EL1, 		HDFGRTR, nBRBCTL, 0),
+	SR_FGT(SYS_BRBFCR_EL1, 		HDFGRTR, nBRBCTL, 0),
+	SR_FGT(SYS_BRBIDR0_EL1, 	HDFGRTR, nBRBIDR, 0),
+	SR_FGT(SYS_PMCEID0_EL0, 	HDFGRTR, PMCEIDn_EL0, 1),
+	SR_FGT(SYS_PMCEID1_EL0, 	HDFGRTR, PMCEIDn_EL0, 1),
+	SR_FGT(SYS_PMUSERENR_EL0, 	HDFGRTR, PMUSERENR_EL0, 1),
+	SR_FGT(SYS_TRBTRG_EL1, 		HDFGRTR, TRBTRG_EL1, 1),
+	SR_FGT(SYS_TRBSR_EL1, 		HDFGRTR, TRBSR_EL1, 1),
+	SR_FGT(SYS_TRBPTR_EL1, 		HDFGRTR, TRBPTR_EL1, 1),
+	SR_FGT(SYS_TRBMAR_EL1, 		HDFGRTR, TRBMAR_EL1, 1),
+	SR_FGT(SYS_TRBLIMITR_EL1, 	HDFGRTR, TRBLIMITR_EL1, 1),
+	SR_FGT(SYS_TRBIDR_EL1, 		HDFGRTR, TRBIDR_EL1, 1),
+	SR_FGT(SYS_TRBBASER_EL1, 	HDFGRTR, TRBBASER_EL1, 1),
+	SR_FGT(SYS_TRCVICTLR, 		HDFGRTR, TRCVICTLR, 1),
+	SR_FGT(SYS_TRCSTATR, 		HDFGRTR, TRCSTATR, 1),
+	SR_FGT(SYS_TRCSSCSR(0), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSSCSR(1), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSSCSR(2), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSSCSR(3), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSSCSR(4), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSSCSR(5), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSSCSR(6), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSSCSR(7), 	HDFGRTR, TRCSSCSRn, 1),
+	SR_FGT(SYS_TRCSEQSTR, 		HDFGRTR, TRCSEQSTR, 1),
+	SR_FGT(SYS_TRCPRGCTLR, 		HDFGRTR, TRCPRGCTLR, 1),
+	SR_FGT(SYS_TRCOSLSR, 		HDFGRTR, TRCOSLSR, 1),
+	SR_FGT(SYS_TRCIMSPEC(0), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCIMSPEC(1), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCIMSPEC(2), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCIMSPEC(3), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCIMSPEC(4), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCIMSPEC(5), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCIMSPEC(6), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCIMSPEC(7), 	HDFGRTR, TRCIMSPECn, 1),
+	SR_FGT(SYS_TRCDEVARCH, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCDEVID, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR0, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR1, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR2, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR3, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR4, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR5, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR6, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR7, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR8, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR9, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR10, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR11, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR12, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCIDR13, 		HDFGRTR, TRCID, 1),
+	SR_FGT(SYS_TRCCNTVR(0), 	HDFGRTR, TRCCNTVRn, 1),
+	SR_FGT(SYS_TRCCNTVR(1), 	HDFGRTR, TRCCNTVRn, 1),
+	SR_FGT(SYS_TRCCNTVR(2), 	HDFGRTR, TRCCNTVRn, 1),
+	SR_FGT(SYS_TRCCNTVR(3), 	HDFGRTR, TRCCNTVRn, 1),
+	SR_FGT(SYS_TRCCLAIMCLR, 	HDFGRTR, TRCCLAIM, 1),
+	SR_FGT(SYS_TRCCLAIMSET, 	HDFGRTR, TRCCLAIM, 1),
+	SR_FGT(SYS_TRCAUXCTLR, 		HDFGRTR, TRCAUXCTLR, 1),
+	SR_FGT(SYS_TRCAUTHSTATUS, 	HDFGRTR, TRCAUTHSTATUS, 1),
+	SR_FGT(SYS_TRCACATR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(4), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(5), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(6), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(7), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(8), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(9), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(10), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(11), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(12), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(13), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(14), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACATR(15), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(0), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(1), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(2), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(3), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(4), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(5), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(6), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(7), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(8), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(9), 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(10), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(11), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(12), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(13), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(14), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCACVR(15), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCBBCTLR, 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCCCTLR, 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCCTLR0, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCCTLR1, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(4), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(5), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(6), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCIDCVR(7), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTCTLR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTCTLR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTCTLR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTCTLR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTRLDVR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTRLDVR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTRLDVR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCNTRLDVR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCCONFIGR, 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCEVENTCTL0R, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCEVENTCTL1R, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCEXTINSELR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCEXTINSELR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCEXTINSELR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCEXTINSELR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCQCTLR, 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(4), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(5), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(6), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(7), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(8), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(9), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(10), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(11), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(12), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(13), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(14), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(15), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(16), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(17), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(18), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(19), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(20), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(21), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(22), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(23), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(24), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(25), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(26), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(27), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(28), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(29), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(30), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSCTLR(31), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCRSR, 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSEQEVR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSEQEVR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSEQEVR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSEQRSTEVR, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(4), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(5), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(6), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSCCR(7), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(4), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(5), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(6), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSSPCICR(7), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSTALLCTLR, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCSYNCPR, 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCTRACEIDR, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCTSCTLR, 		HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVIIECTLR, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVIPCSSCTLR, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVISSCTLR, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCCTLR0, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCCTLR1, 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(0), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(1), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(2), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(3), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(4), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(5), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(6), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_TRCVMIDCVR(7), 	HDFGRTR, TRC, 1),
+	SR_FGT(SYS_PMSLATFR_EL1, 	HDFGRTR, PMSLATFR_EL1, 1),
+	SR_FGT(SYS_PMSIRR_EL1, 		HDFGRTR, PMSIRR_EL1, 1),
+	SR_FGT(SYS_PMSIDR_EL1, 		HDFGRTR, PMSIDR_EL1, 1),
+	SR_FGT(SYS_PMSICR_EL1, 		HDFGRTR, PMSICR_EL1, 1),
+	SR_FGT(SYS_PMSFCR_EL1, 		HDFGRTR, PMSFCR_EL1, 1),
+	SR_FGT(SYS_PMSEVFR_EL1, 	HDFGRTR, PMSEVFR_EL1, 1),
+	SR_FGT(SYS_PMSCR_EL1, 		HDFGRTR, PMSCR_EL1, 1),
+	SR_FGT(SYS_PMBSR_EL1, 		HDFGRTR, PMBSR_EL1, 1),
+	SR_FGT(SYS_PMBPTR_EL1, 		HDFGRTR, PMBPTR_EL1, 1),
+	SR_FGT(SYS_PMBLIMITR_EL1, 	HDFGRTR, PMBLIMITR_EL1, 1),
+	SR_FGT(SYS_PMMIR_EL1, 		HDFGRTR, PMMIR_EL1, 1),
+	SR_FGT(SYS_PMSELR_EL0, 		HDFGRTR, PMSELR_EL0, 1),
+	SR_FGT(SYS_PMOVSCLR_EL0, 	HDFGRTR, PMOVS, 1),
+	SR_FGT(SYS_PMOVSSET_EL0, 	HDFGRTR, PMOVS, 1),
+	SR_FGT(SYS_PMINTENCLR_EL1, 	HDFGRTR, PMINTEN, 1),
+	SR_FGT(SYS_PMINTENSET_EL1, 	HDFGRTR, PMINTEN, 1),
+	SR_FGT(SYS_PMCNTENCLR_EL0, 	HDFGRTR, PMCNTEN, 1),
+	SR_FGT(SYS_PMCNTENSET_EL0, 	HDFGRTR, PMCNTEN, 1),
+	SR_FGT(SYS_PMCCNTR_EL0, 	HDFGRTR, PMCCNTR_EL0, 1),
+	SR_FGT(SYS_PMCCFILTR_EL0, 	HDFGRTR, PMCCFILTR_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(0), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(1), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(2), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(3), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(4), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(5), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(6), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(7), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(8), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(9), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(10), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(11), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(12), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(13), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(14), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(15), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(16), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(17), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(18), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(19), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(20), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(21), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(22), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(23), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(24), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(25), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(26), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(27), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(28), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(29), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVTYPERn_EL0(30), 	HDFGRTR, PMEVTYPERn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(0), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(1), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(2), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(3), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(4), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(5), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(6), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(7), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(8), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(9), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(10), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(11), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(12), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(13), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(14), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(15), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(16), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(17), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(18), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(19), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(20), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(21), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(22), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(23), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(24), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(25), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(26), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(27), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(28), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(29), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_PMEVCNTRn_EL0(30), 	HDFGRTR, PMEVCNTRn_EL0, 1),
+	SR_FGT(SYS_OSDLR_EL1, 		HDFGRTR, OSDLR_EL1, 1),
+	SR_FGT(SYS_OSECCR_EL1, 		HDFGRTR, OSECCR_EL1, 1),
+	SR_FGT(SYS_OSLSR_EL1, 		HDFGRTR, OSLSR_EL1, 1),
+	SR_FGT(SYS_DBGPRCR_EL1, 	HDFGRTR, DBGPRCR_EL1, 1),
+	SR_FGT(SYS_DBGAUTHSTATUS_EL1, 	HDFGRTR, DBGAUTHSTATUS_EL1, 1),
+	SR_FGT(SYS_DBGCLAIMSET_EL1, 	HDFGRTR, DBGCLAIM, 1),
+	SR_FGT(SYS_DBGCLAIMCLR_EL1, 	HDFGRTR, DBGCLAIM, 1),
+	SR_FGT(SYS_MDSCR_EL1, 		HDFGRTR, MDSCR_EL1, 1),
+	/*
+	 * The trap bits capture *64* debug registers per bit, but the
+	 * ARM ARM only describes the encoding for the first 16, and
+	 * we don't really support more than that anyway.
+	 */
+	SR_FGT(SYS_DBGWVRn_EL1(0), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(1), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(2), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(3), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(4), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(5), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(6), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(7), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(8), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(9), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(10), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(11), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(12), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(13), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(14), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWVRn_EL1(15), 	HDFGRTR, DBGWVRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(0), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(1), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(2), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(3), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(4), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(5), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(6), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(7), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(8), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(9), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(10), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(11), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(12), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(13), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(14), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGWCRn_EL1(15), 	HDFGRTR, DBGWCRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(0), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(1), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(2), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(3), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(4), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(5), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(6), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(7), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(8), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(9), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(10), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(11), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(12), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(13), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(14), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBVRn_EL1(15), 	HDFGRTR, DBGBVRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(0), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(1), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(2), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(3), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(4), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(5), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(6), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(7), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(8), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(9), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(10), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(11), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(12), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(13), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(14), 	HDFGRTR, DBGBCRn_EL1, 1),
+	SR_FGT(SYS_DBGBCRn_EL1(15), 	HDFGRTR, DBGBCRn_EL1, 1),
+	/*
+	 * HDFGWTR_EL2
+	 *
+	 * Although HDFGRTR_EL2 and HDFGWTR_EL2 registers largely
+	 * overlap in their bit assignment, there are a number of bits
+	 * that are RES0 on one side, and an actual trap bit on the
+	 * other.  The policy chosen here is to describe all the
+	 * read-side mappings, and only the write-side mappings that
+	 * differ from the read side, and the trap handler will pick
+	 * the correct shadow register based on the access type.
+	 */
+	SR_FGT(SYS_TRFCR_EL1,		HDFGWTR, TRFCR_EL1, 1),
+	SR_FGT(SYS_TRCOSLAR,		HDFGWTR, TRCOSLAR, 1),
+	SR_FGT(SYS_PMCR_EL0,		HDFGWTR, PMCR_EL0, 1),
+	SR_FGT(SYS_PMSWINC_EL0,		HDFGWTR, PMSWINC_EL0, 1),
+	SR_FGT(SYS_OSLAR_EL1,		HDFGWTR, OSLAR_EL1, 1),
+};
+
+static union trap_config get_trap_config(u32 sysreg)
+{
+	return (union trap_config) {
+		.val = xa_to_value(xa_load(&sr_forward_xa, sysreg)),
+	};
+}
+
+static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
+				       const char *type, int err)
+{
+	kvm_err("%s line %d encoding range "
+		"(%d, %d, %d, %d, %d) - (%d, %d, %d, %d, %d) (err=%d)\n",
+		type, tc->line,
+		sys_reg_Op0(tc->encoding), sys_reg_Op1(tc->encoding),
+		sys_reg_CRn(tc->encoding), sys_reg_CRm(tc->encoding),
+		sys_reg_Op2(tc->encoding),
+		sys_reg_Op0(tc->end), sys_reg_Op1(tc->end),
+		sys_reg_CRn(tc->end), sys_reg_CRm(tc->end),
+		sys_reg_Op2(tc->end),
+		err);
+}
+
+int __init populate_nv_trap_config(void)
+{
+	int ret = 0;
+
+	BUILD_BUG_ON(sizeof(union trap_config) != sizeof(void *));
+	BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS));
+	BUILD_BUG_ON(__NR_FGT_GROUP_IDS__ > BIT(TC_FGT_BITS));
+	BUILD_BUG_ON(__NR_FG_FILTER_IDS__ > BIT(TC_FGF_BITS));
+
+	for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) {
+		const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i];
+		void *prev;
+
+		if (cgt->tc.val & BIT(63)) {
+			kvm_err("CGT[%d] has MBZ bit set\n", i);
+			ret = -EINVAL;
+		}
+
+		if (cgt->encoding != cgt->end) {
+			prev = xa_store_range(&sr_forward_xa,
+					      cgt->encoding, cgt->end,
+					      xa_mk_value(cgt->tc.val),
+					      GFP_KERNEL);
+		} else {
+			prev = xa_store(&sr_forward_xa, cgt->encoding,
+					xa_mk_value(cgt->tc.val), GFP_KERNEL);
+			if (prev && !xa_is_err(prev)) {
+				ret = -EINVAL;
+				print_nv_trap_error(cgt, "Duplicate CGT", ret);
+			}
+		}
+
+		if (xa_is_err(prev)) {
+			ret = xa_err(prev);
+			print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+		}
+	}
+
+	kvm_info("nv: %ld coarse grained trap handlers\n",
+		 ARRAY_SIZE(encoding_to_cgt));
+
+	if (!cpus_have_final_cap(ARM64_HAS_FGT))
+		goto check_mcb;
+
+	for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) {
+		const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i];
+		union trap_config tc;
+
+		if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) {
+			ret = -EINVAL;
+			print_nv_trap_error(fgt, "Invalid FGT", ret);
+		}
+
+		tc = get_trap_config(fgt->encoding);
+
+		if (tc.fgt) {
+			ret = -EINVAL;
+			print_nv_trap_error(fgt, "Duplicate FGT", ret);
+		}
+
+		tc.val |= fgt->tc.val;
+		xa_store(&sr_forward_xa, fgt->encoding,
+			 xa_mk_value(tc.val), GFP_KERNEL);
+	}
+
+	kvm_info("nv: %ld fine grained trap handlers\n",
+		 ARRAY_SIZE(encoding_to_fgt));
+
+check_mcb:
+	for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) {
+		const enum cgt_group_id *cgids;
+
+		cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+
+		for (int i = 0; cgids[i] != __RESERVED__; i++) {
+			if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) {
+				kvm_err("Recursive MCB %d/%d\n", id, cgids[i]);
+				ret = -EINVAL;
+			}
+		}
+	}
+
+	if (ret)
+		xa_destroy(&sr_forward_xa);
+
+	return ret;
+}
+
+static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
+					 const struct trap_bits *tb)
+{
+	enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+	u64 val;
+
+	val = __vcpu_sys_reg(vcpu, tb->index);
+	if ((val & tb->mask) == tb->value)
+		b |= tb->behaviour;
+
+	return b;
+}
+
+static enum trap_behaviour __compute_trap_behaviour(struct kvm_vcpu *vcpu,
+						    const enum cgt_group_id id,
+						    enum trap_behaviour b)
+{
+	switch (id) {
+		const enum cgt_group_id *cgids;
+
+	case __RESERVED__ ... __MULTIPLE_CONTROL_BITS__ - 1:
+		if (likely(id != __RESERVED__))
+			b |= get_behaviour(vcpu, &coarse_trap_bits[id]);
+		break;
+	case __MULTIPLE_CONTROL_BITS__ ... __COMPLEX_CONDITIONS__ - 1:
+		/* Yes, this is recursive. Don't do anything stupid. */
+		cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+		for (int i = 0; cgids[i] != __RESERVED__; i++)
+			b |= __compute_trap_behaviour(vcpu, cgids[i], b);
+		break;
+	default:
+		if (ARRAY_SIZE(ccc))
+			b |= ccc[id -  __COMPLEX_CONDITIONS__](vcpu);
+		break;
+	}
+
+	return b;
+}
+
+static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
+						  const union trap_config tc)
+{
+	enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+
+	return __compute_trap_behaviour(vcpu, tc.cgt, b);
+}
+
+static bool check_fgt_bit(u64 val, const union trap_config tc)
+{
+	return ((val >> tc.bit) & 1) == tc.pol;
+}
+
+#define sanitised_sys_reg(vcpu, reg)			\
+	({						\
+		u64 __val;				\
+		__val = __vcpu_sys_reg(vcpu, reg);	\
+		__val &= ~__ ## reg ## _RES0;		\
+		(__val);				\
+	})
+
+bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
+{
+	union trap_config tc;
+	enum trap_behaviour b;
+	bool is_read;
+	u32 sysreg;
+	u64 esr, val;
+
+	if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+		return false;
+
+	esr = kvm_vcpu_get_esr(vcpu);
+	sysreg = esr_sys64_to_sysreg(esr);
+	is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+
+	tc = get_trap_config(sysreg);
+
+	/*
+	 * A value of 0 for the whole entry means that we know nothing
+	 * for this sysreg, and that it cannot be re-injected into the
+	 * nested hypervisor. In this situation, let's cut it short.
+	 *
+	 * Note that ultimately, we could also make use of the xarray
+	 * to store the index of the sysreg in the local descriptor
+	 * array, avoiding another search... Hint, hint...
+	 */
+	if (!tc.val)
+		return false;
+
+	switch ((enum fgt_group_id)tc.fgt) {
+	case __NO_FGT_GROUP__:
+		break;
+
+	case HFGxTR_GROUP:
+		if (is_read)
+			val = sanitised_sys_reg(vcpu, HFGRTR_EL2);
+		else
+			val = sanitised_sys_reg(vcpu, HFGWTR_EL2);
+		break;
+
+	case HDFGRTR_GROUP:
+	case HDFGWTR_GROUP:
+		if (is_read)
+			val = sanitised_sys_reg(vcpu, HDFGRTR_EL2);
+		else
+			val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
+		break;
+
+	case HFGITR_GROUP:
+		val = sanitised_sys_reg(vcpu, HFGITR_EL2);
+		switch (tc.fgf) {
+			u64 tmp;
+
+		case __NO_FGF__:
+			break;
+
+		case HCRX_FGTnXS:
+			tmp = sanitised_sys_reg(vcpu, HCRX_EL2);
+			if (tmp & HCRX_EL2_FGTnXS)
+				tc.fgt = __NO_FGT_GROUP__;
+		}
+		break;
+
+	case __NR_FGT_GROUP_IDS__:
+		/* Something is really wrong, bail out */
+		WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
+		return false;
+	}
+
+	if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc))
+		goto inject;
+
+	b = compute_trap_behaviour(vcpu, tc);
+
+	if (((b & BEHAVE_FORWARD_READ) && is_read) ||
+	    ((b & BEHAVE_FORWARD_WRITE) && !is_read))
+		goto inject;
+
+	return false;
+
+inject:
+	trace_kvm_forward_sysreg_trap(vcpu, sysreg, is_read);
+
+	kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+	return true;
+}
+
 static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
 {
 	u64 mode = spsr & PSR_MODE_MASK;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 20280a5233f6..95f6945c4432 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -884,21 +884,6 @@ u32 __attribute_const__ kvm_target_cpu(void)
 	return KVM_ARM_TARGET_GENERIC_V8;
 }
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
-{
-	u32 target = kvm_target_cpu();
-
-	memset(init, 0, sizeof(*init));
-
-	/*
-	 * For now, we don't return any features.
-	 * In future, we might use features to return target
-	 * specific features available for the preferred
-	 * target type.
-	 */
-	init->target = (__u32)target;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 6dcd6604b6bc..617ae6dea5d5 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -222,7 +222,33 @@ static int kvm_handle_eret(struct kvm_vcpu *vcpu)
 	if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
 		return kvm_handle_ptrauth(vcpu);
 
-	kvm_emulate_nested_eret(vcpu);
+	/*
+	 * If we got here, two possibilities:
+	 *
+	 * - the guest is in EL2, and we need to fully emulate ERET
+	 *
+	 * - the guest is in EL1, and we need to reinject the
+         *   exception into the L1 hypervisor.
+	 *
+	 * If KVM ever traps ERET for its own use, we'll have to
+	 * revisit this.
+	 */
+	if (is_hyp_ctxt(vcpu))
+		kvm_emulate_nested_eret(vcpu);
+	else
+		kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+	return 1;
+}
+
+static int handle_svc(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * So far, SVC traps only for NV via HFGITR_EL2. A SVC from a
+	 * 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so
+	 * we should only have to deal with a 64 bit exception.
+	 */
+	kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
 	return 1;
 }
 
@@ -239,6 +265,7 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_ELx_EC_SMC32]	= handle_smc,
 	[ESR_ELx_EC_HVC64]	= handle_hvc,
 	[ESR_ELx_EC_SMC64]	= handle_smc,
+	[ESR_ELx_EC_SVC64]	= handle_svc,
 	[ESR_ELx_EC_SYS64]	= kvm_handle_sys_reg,
 	[ESR_ELx_EC_SVE]	= handle_sve,
 	[ESR_ELx_EC_ERET]	= kvm_handle_eret,
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 34f222af6165..9cfe6bd1dbe4 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -70,20 +70,26 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
 	}
 }
 
-static inline bool __hfgxtr_traps_required(void)
-{
-	if (cpus_have_final_cap(ARM64_SME))
-		return true;
-
-	if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-		return true;
+#define compute_clr_set(vcpu, reg, clr, set)				\
+	do {								\
+		u64 hfg;						\
+		hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0;	\
+		set |= hfg & __ ## reg ## _MASK; 			\
+		clr |= ~hfg & __ ## reg ## _nMASK; 			\
+	} while(0)
 
-	return false;
-}
 
-static inline void __activate_traps_hfgxtr(void)
+static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
+	struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
 	u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+	u64 r_val, w_val;
+
+	if (!cpus_have_final_cap(ARM64_HAS_FGT))
+		return;
+
+	ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
+	ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
 
 	if (cpus_have_final_cap(ARM64_SME)) {
 		tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
@@ -98,26 +104,72 @@ static inline void __activate_traps_hfgxtr(void)
 	if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
 		w_set |= HFGxTR_EL2_TCR_EL1_MASK;
 
-	sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-	sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+	if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+		compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
+		compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
+	}
+
+	/* The default is not to trap anything but ACCDATA_EL1 */
+	r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+	r_val |= r_set;
+	r_val &= ~r_clr;
+
+	w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+	w_val |= w_set;
+	w_val &= ~w_clr;
+
+	write_sysreg_s(r_val, SYS_HFGRTR_EL2);
+	write_sysreg_s(w_val, SYS_HFGWTR_EL2);
+
+	if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+		return;
+
+	ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
+
+	r_set = r_clr = 0;
+	compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
+	r_val = __HFGITR_EL2_nMASK;
+	r_val |= r_set;
+	r_val &= ~r_clr;
+
+	write_sysreg_s(r_val, SYS_HFGITR_EL2);
+
+	ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
+	ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
+
+	r_clr = r_set = w_clr = w_set = 0;
+
+	compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
+	compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
+
+	r_val = __HDFGRTR_EL2_nMASK;
+	r_val |= r_set;
+	r_val &= ~r_clr;
+
+	w_val = __HDFGWTR_EL2_nMASK;
+	w_val |= w_set;
+	w_val &= ~w_clr;
+
+	write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
+	write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
 }
 
-static inline void __deactivate_traps_hfgxtr(void)
+static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
-	u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+	struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
 
-	if (cpus_have_final_cap(ARM64_SME)) {
-		tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
+	if (!cpus_have_final_cap(ARM64_HAS_FGT))
+		return;
 
-		r_set |= tmp;
-		w_set |= tmp;
-	}
+	write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
+	write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
 
-	if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-		w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
+	if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+		return;
 
-	sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-	sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+	write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
+	write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
+	write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
 }
 
 static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -145,8 +197,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
 	vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
 	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 
-	if (__hfgxtr_traps_required())
-		__activate_traps_hfgxtr();
+	if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+		u64 hcrx = HCRX_GUEST_FLAGS;
+		if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+			u64 clr = 0, set = 0;
+
+			compute_clr_set(vcpu, HCRX_EL2, clr, set);
+
+			hcrx |= set;
+			hcrx &= ~clr;
+		}
+
+		write_sysreg_s(hcrx, SYS_HCRX_EL2);
+	}
+
+	__activate_traps_hfgxtr(vcpu);
 }
 
 static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -162,8 +227,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
 		vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
 	}
 
-	if (__hfgxtr_traps_required())
-		__deactivate_traps_hfgxtr();
+	if (cpus_have_final_cap(ARM64_HAS_HCX))
+		write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
+
+	__deactivate_traps_hfgxtr(vcpu);
 }
 
 static inline void ___activate_traps(struct kvm_vcpu *vcpu)
@@ -177,9 +244,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
 
 	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
 		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
-	if (cpus_have_final_cap(ARM64_HAS_HCX))
-		write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@@ -194,9 +258,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
 		vcpu->arch.hcr_el2 &= ~HCR_VSE;
 		vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
 	}
-
-	if (cpus_have_final_cap(ARM64_HAS_HCX))
-		write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index d5ec972b5c1e..230e4f2527de 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
 int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
 				  enum kvm_pgtable_prot prot,
 				  unsigned long *haddr);
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
 
 #endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index a169c619db60..857d9bc04fd4 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
 	__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
 }
 
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+	DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+	DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+	__kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
 static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid),
+	HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
 	HANDLE_FUNC(__kvm_flush_cpu_context),
 	HANDLE_FUNC(__kvm_timer_set_cntvoff),
 	HANDLE_FUNC(__vgic_v3_read_vmcr),
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 318298eb3d6b..65a7a186d7b2 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
 	return err;
 }
 
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+	unsigned long cur;
+
+	hyp_assert_lock_held(&pkvm_pgd_lock);
+
+	if (!start || start < __io_map_base)
+		return -EINVAL;
+
+	/* The allocated size is always a multiple of PAGE_SIZE */
+	cur = start + PAGE_ALIGN(size);
+
+	/* Are we overflowing on the vmemmap ? */
+	if (cur > __hyp_vmemmap)
+		return -ENOMEM;
+
+	__io_map_base = cur;
+
+	return 0;
+}
+
 /**
  * pkvm_alloc_private_va_range - Allocates a private VA range.
  * @size:	The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
  */
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
 {
-	unsigned long base, addr;
-	int ret = 0;
+	unsigned long addr;
+	int ret;
 
 	hyp_spin_lock(&pkvm_pgd_lock);
-
-	/* Align the allocation based on the order of its size */
-	addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
-	/* The allocated size is always a multiple of PAGE_SIZE */
-	base = addr + PAGE_ALIGN(size);
-
-	/* Are we overflowing on the vmemmap ? */
-	if (!addr || base > __hyp_vmemmap)
-		ret = -ENOMEM;
-	else {
-		__io_map_base = base;
-		*haddr = addr;
-	}
-
+	addr = __io_map_base;
+	ret = __pkvm_alloc_private_va_range(addr, size);
 	hyp_spin_unlock(&pkvm_pgd_lock);
 
+	*haddr = addr;
+
 	return ret;
 }
 
@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
 	return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
 }
 
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+	unsigned long addr, prev_base;
+	size_t size;
+	int ret;
+
+	hyp_spin_lock(&pkvm_pgd_lock);
+
+	prev_base = __io_map_base;
+	/*
+	 * Efficient stack verification using the PAGE_SHIFT bit implies
+	 * an alignment of our allocation on the order of the size.
+	 */
+	size = PAGE_SIZE * 2;
+	addr = ALIGN(__io_map_base, size);
+
+	ret = __pkvm_alloc_private_va_range(addr, size);
+	if (!ret) {
+		/*
+		 * Since the stack grows downwards, map the stack to the page
+		 * at the higher address and leave the lower guard page
+		 * unbacked.
+		 *
+		 * Any valid stack address now has the PAGE_SHIFT bit as 1
+		 * and addresses corresponding to the guard page have the
+		 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+		 */
+		ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+					  PAGE_SIZE, phys, PAGE_HYP);
+		if (ret)
+			__io_map_base = prev_base;
+	}
+	hyp_spin_unlock(&pkvm_pgd_lock);
+
+	*haddr = addr + size;
+
+	return ret;
+}
+
 static void *admit_host_page(void *arg)
 {
 	struct kvm_hyp_memcache *host_mc = arg;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bb98630dfeaf..0d5e0a89ddce 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 
 	for (i = 0; i < hyp_nr_cpus; i++) {
 		struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
-		unsigned long hyp_addr;
 
 		start = (void *)kern_hyp_va(per_cpu_base[i]);
 		end = start + PAGE_ALIGN(hyp_percpu_size);
@@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 		if (ret)
 			return ret;
 
-		/*
-		 * Allocate a contiguous HYP private VA range for the stack
-		 * and guard page. The allocation is also aligned based on
-		 * the order of its size.
-		 */
-		ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+		ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
 		if (ret)
 			return ret;
-
-		/*
-		 * Since the stack grows downwards, map the stack to the page
-		 * at the higher address and leave the lower guard page
-		 * unbacked.
-		 *
-		 * Any valid stack address now has the PAGE_SHIFT bit as 1
-		 * and addresses corresponding to the guard page have the
-		 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-		 */
-		hyp_spin_lock(&pkvm_pgd_lock);
-		ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
-					PAGE_SIZE, params->stack_pa, PAGE_HYP);
-		hyp_spin_unlock(&pkvm_pgd_lock);
-		if (ret)
-			return ret;
-
-		/* Update stack_hyp_va to end of the stack's private VA range */
-		params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
 	}
 
 	/*
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index e89a23153e85..c353a06ee7e6 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
 		 * KVM_ARM_VCPU_INIT, however, this is likely not possible for
 		 * protected VMs.
 		 */
-		vcpu->arch.target = -1;
+		vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
 		*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
 		*exit_code |= ARM_EXCEPTION_IL;
 	}
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index b9991bbd8e3f..1b265713d6be 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
 	__tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+				phys_addr_t start, unsigned long pages)
+{
+	struct tlb_inv_context cxt;
+	unsigned long stride;
+
+	/*
+	 * Since the range of addresses may not be mapped at
+	 * the same level, assume the worst case as PAGE_SIZE
+	 */
+	stride = PAGE_SIZE;
+	start = round_down(start, stride);
+
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest(mmu, &cxt, false);
+
+	__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+	dsb(ish);
+	__tlbi(vmalle1is);
+	dsb(ish);
+	isb();
+
+	/* See the comment in __kvm_tlb_flush_vmid_ipa() */
+	if (icache_is_vpipt())
+		icache_inval_all_pou();
+
+	__tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	struct tlb_inv_context cxt;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f7a93ef29250..f155b8c9e98c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
 	return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
 }
 
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+				phys_addr_t addr, size_t size)
+{
+	unsigned long pages, inval_pages;
+
+	if (!system_supports_tlb_range()) {
+		kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+		return;
+	}
+
+	pages = size >> PAGE_SHIFT;
+	while (pages > 0) {
+		inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+		kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+		addr += inval_pages << PAGE_SHIFT;
+		pages -= inval_pages;
+	}
+}
+
 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
 
 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
 		 * evicted pte value (if any).
 		 */
 		if (kvm_pte_table(ctx->old, ctx->level))
-			kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+			kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+						kvm_granule_size(ctx->level));
 		else if (kvm_pte_valid(ctx->old))
 			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
 				     ctx->addr, ctx->level);
@@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
 	smp_store_release(ctx->ptep, new);
 }
 
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
-			   struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+	/*
+	 * If FEAT_TLBIRANGE is implemented, defer the individual
+	 * TLB invalidations until the entire walk is finished, and
+	 * then use the range-based TLBI instructions to do the
+	 * invalidations. Condition deferred TLB invalidation on the
+	 * system supporting FWB as the optimization is entirely
+	 * pointless when the unmap walker needs to perform CMOs.
+	 */
+	return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+				struct kvm_s2_mmu *mmu,
+				struct kvm_pgtable_mm_ops *mm_ops)
 {
+	struct kvm_pgtable *pgt = ctx->arg;
+
 	/*
-	 * Clear the existing PTE, and perform break-before-make with
-	 * TLB maintenance if it was valid.
+	 * Clear the existing PTE, and perform break-before-make if it was
+	 * valid. Depending on the system support, defer the TLB maintenance
+	 * for the same until the entire unmap walk is completed.
 	 */
 	if (kvm_pte_valid(ctx->old)) {
 		kvm_clear_pte(ctx->ptep);
-		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+		if (!stage2_unmap_defer_tlb_flush(pgt))
+			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+					ctx->addr, ctx->level);
 	}
 
 	mm_ops->put_page(ctx->ptep);
@@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 	 * block entry and rely on the remaining portions being faulted
 	 * back lazily.
 	 */
-	stage2_put_pte(ctx, mmu, mm_ops);
+	stage2_unmap_put_pte(ctx, mmu, mm_ops);
 
 	if (need_flush && mm_ops->dcache_clean_inval_poc)
 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
+	int ret;
 	struct kvm_pgtable_walker walker = {
 		.cb	= stage2_unmap_walker,
 		.arg	= pgt,
 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
 	};
 
-	return kvm_pgtable_walk(pgt, addr, size, &walker);
+	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+	if (stage2_unmap_defer_tlb_flush(pgt))
+		/* Perform the deferred TLB invalidations */
+		kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+	return ret;
 }
 
 struct stage2_attr_data {
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index e69da550cdc5..46bd43f61d76 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
 	__tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+				phys_addr_t start, unsigned long pages)
+{
+	struct tlb_inv_context cxt;
+	unsigned long stride;
+
+	/*
+	 * Since the range of addresses may not be mapped at
+	 * the same level, assume the worst case as PAGE_SIZE
+	 */
+	stride = PAGE_SIZE;
+	start = round_down(start, stride);
+
+	dsb(ishst);
+
+	/* Switch to requested VMID */
+	__tlb_switch_to_guest(mmu, &cxt);
+
+	__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+	dsb(ish);
+	__tlbi(vmalle1is);
+	dsb(ish);
+	isb();
+
+	__tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	struct tlb_inv_context cxt;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d3b4feed460c..587a104f66c3 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 }
 
 /**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
  * @kvm:	pointer to kvm structure.
  *
  * Interface to HYP function to flush all VM TLB entries
  */
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
-	++kvm->stat.generic.remote_tlb_flush_requests;
 	kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+	return 0;
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+				      gfn_t gfn, u64 nr_pages)
+{
+	kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+				gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+	return 0;
 }
 
 static bool kvm_is_device_pfn(unsigned long pfn)
@@ -592,6 +600,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 	return 0;
 }
 
+static int __hyp_alloc_private_va_range(unsigned long base)
+{
+	lockdep_assert_held(&kvm_hyp_pgd_mutex);
+
+	if (!PAGE_ALIGNED(base))
+		return -EINVAL;
+
+	/*
+	 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+	 * allocating the new area, as it would indicate we've
+	 * overflowed the idmap/IO address range.
+	 */
+	if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+		return -ENOMEM;
+
+	io_map_base = base;
+
+	return 0;
+}
 
 /**
  * hyp_alloc_private_va_range - Allocates a private VA range.
@@ -612,26 +639,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
 
 	/*
 	 * This assumes that we have enough space below the idmap
-	 * page to allocate our VAs. If not, the check below will
-	 * kick. A potential alternative would be to detect that
-	 * overflow and switch to an allocation above the idmap.
+	 * page to allocate our VAs. If not, the check in
+	 * __hyp_alloc_private_va_range() will kick. A potential
+	 * alternative would be to detect that overflow and switch
+	 * to an allocation above the idmap.
 	 *
 	 * The allocated size is always a multiple of PAGE_SIZE.
 	 */
-	base = io_map_base - PAGE_ALIGN(size);
-
-	/* Align the allocation based on the order of its size */
-	base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
-
-	/*
-	 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
-	 * allocating the new area, as it would indicate we've
-	 * overflowed the idmap/IO address range.
-	 */
-	if ((base ^ io_map_base) & BIT(VA_BITS - 1))
-		ret = -ENOMEM;
-	else
-		*haddr = io_map_base = base;
+	size = PAGE_ALIGN(size);
+	base = io_map_base - size;
+	ret = __hyp_alloc_private_va_range(base);
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 
@@ -668,6 +685,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
 	return ret;
 }
 
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
+{
+	unsigned long base;
+	size_t size;
+	int ret;
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+	/*
+	 * Efficient stack verification using the PAGE_SHIFT bit implies
+	 * an alignment of our allocation on the order of the size.
+	 */
+	size = PAGE_SIZE * 2;
+	base = ALIGN_DOWN(io_map_base - size, size);
+
+	ret = __hyp_alloc_private_va_range(base);
+
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+
+	if (ret) {
+		kvm_err("Cannot allocate hyp stack guard page\n");
+		return ret;
+	}
+
+	/*
+	 * Since the stack grows downwards, map the stack to the page
+	 * at the higher address and leave the lower guard page
+	 * unbacked.
+	 *
+	 * Any valid stack address now has the PAGE_SHIFT bit as 1
+	 * and addresses corresponding to the guard page have the
+	 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+	 */
+	ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
+				    PAGE_HYP);
+	if (ret)
+		kvm_err("Cannot map hyp stack\n");
+
+	*haddr = base + size;
+
+	return ret;
+}
+
 /**
  * create_hyp_io_mappings - Map IO into both kernel and HYP
  * @phys_addr:	The physical start address which gets mapped
@@ -1075,7 +1134,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	write_lock(&kvm->mmu_lock);
 	stage2_wp_range(&kvm->arch.mmu, start, end);
 	write_unlock(&kvm->mmu_lock);
-	kvm_flush_remote_tlbs(kvm);
+	kvm_flush_remote_tlbs_memslot(kvm, memslot);
 }
 
 /**
@@ -1541,7 +1600,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 out_unlock:
 	read_unlock(&kvm->mmu_lock);
-	kvm_set_pfn_accessed(pfn);
 	kvm_release_pfn_clean(pfn);
 	return ret != -EAGAIN ? ret : 0;
 }
@@ -1721,7 +1779,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	kvm_pfn_t pfn = pte_pfn(range->pte);
+	kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
 	if (!kvm->arch.mmu.pgt)
 		return false;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 315354d27978..042695a210ce 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -71,8 +71,9 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
 		break;
 
 	case SYS_ID_AA64MMFR0_EL1:
-		/* Hide ECV, FGT, ExS, Secure Memory */
-		val &= ~(GENMASK_ULL(63, 43)		|
+		/* Hide ECV, ExS, Secure Memory */
+		val &= ~(NV_FTR(MMFR0, ECV)		|
+			 NV_FTR(MMFR0, EXS)		|
 			 NV_FTR(MMFR0, TGRAN4_2)	|
 			 NV_FTR(MMFR0, TGRAN16_2)	|
 			 NV_FTR(MMFR0, TGRAN64_2)	|
@@ -116,7 +117,8 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
 		break;
 
 	case SYS_ID_AA64MMFR1_EL1:
-		val &= (NV_FTR(MMFR1, PAN)	|
+		val &= (NV_FTR(MMFR1, HCX)	|
+			NV_FTR(MMFR1, PAN)	|
 			NV_FTR(MMFR1, LO)	|
 			NV_FTR(MMFR1, HPDS)	|
 			NV_FTR(MMFR1, VH)	|
@@ -124,8 +126,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
 		break;
 
 	case SYS_ID_AA64MMFR2_EL1:
-		val &= ~(NV_FTR(MMFR2, EVT)	|
-			 NV_FTR(MMFR2, BBM)	|
+		val &= ~(NV_FTR(MMFR2, BBM)	|
 			 NV_FTR(MMFR2, TTL)	|
 			 GENMASK_ULL(47, 44)	|
 			 NV_FTR(MMFR2, ST)	|
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 560650972478..6b066e04dc5d 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -14,6 +14,7 @@
 #include <asm/kvm_emulate.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_vgic.h>
+#include <asm/arm_pmuv3.h>
 
 #define PERF_ATTR_CFG1_COUNTER_64BIT	BIT(0)
 
@@ -35,12 +36,8 @@ static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
 	return &vcpu->arch.pmu.pmc[cnt_idx];
 }
 
-static u32 kvm_pmu_event_mask(struct kvm *kvm)
+static u32 __kvm_pmu_event_mask(unsigned int pmuver)
 {
-	unsigned int pmuver;
-
-	pmuver = kvm->arch.arm_pmu->pmuver;
-
 	switch (pmuver) {
 	case ID_AA64DFR0_EL1_PMUVer_IMP:
 		return GENMASK(9, 0);
@@ -55,6 +52,14 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
 	}
 }
 
+static u32 kvm_pmu_event_mask(struct kvm *kvm)
+{
+	u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1);
+	u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
+
+	return __kvm_pmu_event_mask(pmuver);
+}
+
 /**
  * kvm_pmc_is_64bit - determine if counter is 64bit
  * @pmc: counter context
@@ -672,8 +677,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
 {
 	struct arm_pmu_entry *entry;
 
-	if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
-	    pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+	/*
+	 * Check the sanitised PMU version for the system, as KVM does not
+	 * support implementations where PMUv3 exists on a subset of CPUs.
+	 */
+	if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
 		return;
 
 	mutex_lock(&arm_pmus_lock);
@@ -750,11 +758,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 	} else {
 		val = read_sysreg(pmceid1_el0);
 		/*
-		 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
+		 * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
 		 * as RAZ
 		 */
-		if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
-			val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
+		val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
+			 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
+			 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
 		base = 32;
 	}
 
@@ -950,11 +959,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 		return 0;
 	}
 	case KVM_ARM_VCPU_PMU_V3_FILTER: {
+		u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
 		struct kvm_pmu_event_filter __user *uaddr;
 		struct kvm_pmu_event_filter filter;
 		int nr_events;
 
-		nr_events = kvm_pmu_event_mask(kvm) + 1;
+		/*
+		 * Allow userspace to specify an event filter for the entire
+		 * event range supported by PMUVer of the hardware, rather
+		 * than the guest's PMUVer for KVM backward compatibility.
+		 */
+		nr_events = __kvm_pmu_event_mask(pmuver) + 1;
 
 		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
 
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 121f1a14c829..0eea225fd09a 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -236,3 +236,21 @@ bool kvm_set_pmuserenr(u64 val)
 	ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
 	return true;
 }
+
+/*
+ * If we interrupted the guest to update the host PMU context, make
+ * sure we re-apply the guest EL0 state.
+ */
+void kvm_vcpu_pmu_resync_el0(void)
+{
+	struct kvm_vcpu *vcpu;
+
+	if (!has_vhe() || !in_interrupt())
+		return;
+
+	vcpu = kvm_get_running_vcpu();
+	if (!vcpu)
+		return;
+
+	kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu);
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index bc8556b6f459..7a65a35ee4ac 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -248,21 +248,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	switch (vcpu->arch.target) {
-	default:
-		if (vcpu_el1_is_32bit(vcpu)) {
-			pstate = VCPU_RESET_PSTATE_SVC;
-		} else if (vcpu_has_nv(vcpu)) {
-			pstate = VCPU_RESET_PSTATE_EL2;
-		} else {
-			pstate = VCPU_RESET_PSTATE_EL1;
-		}
-
-		if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
-			ret = -EINVAL;
-			goto out;
-		}
-		break;
+	if (vcpu_el1_is_32bit(vcpu))
+		pstate = VCPU_RESET_PSTATE_SVC;
+	else if (vcpu_has_nv(vcpu))
+		pstate = VCPU_RESET_PSTATE_EL2;
+	else
+		pstate = VCPU_RESET_PSTATE_EL1;
+
+	if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
+		ret = -EINVAL;
+		goto out;
 	}
 
 	/* Reset core registers */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2ca2973abe66..e92ec810d449 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2151,6 +2151,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
 	{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
 
+	{ SYS_DESC(SYS_ACCDATA_EL1), undef_access },
+
 	{ SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
 
 	{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
@@ -2365,8 +2367,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
 	EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
 	EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
+	EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
+	EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
+	EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
 	EL2_REG(HACR_EL2, access_rw, reset_val, 0),
 
+	EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
+
 	EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
 	EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
 	EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
@@ -2374,6 +2381,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
 
 	{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+	EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
+	EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
 	EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
 	EL2_REG(ELR_EL2, access_rw, reset_val, 0),
 	{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
@@ -3170,6 +3179,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
 	trace_kvm_handle_sys_reg(esr);
 
+	if (__check_nv_sr_forward(vcpu))
+		return 1;
+
 	params = esr_sys64_to_params(esr);
 	params.regval = vcpu_get_reg(vcpu, Rt);
 
@@ -3587,5 +3599,8 @@ int __init kvm_sys_reg_table_init(void)
 	if (!first_idreg)
 		return -EINVAL;
 
+	if (kvm_get_mode() == KVM_MODE_NV)
+		return populate_nv_trap_config();
+
 	return 0;
 }
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index 6ce5c025218d..8ad53104934d 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception,
 		  __entry->hcr_el2)
 );
 
+TRACE_EVENT(kvm_forward_sysreg_trap,
+	    TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
+	    TP_ARGS(vcpu, sysreg, is_read),
+
+	    TP_STRUCT__entry(
+		__field(u64,	pc)
+		__field(u32,	sysreg)
+		__field(bool,	is_read)
+	    ),
+
+	    TP_fast_assign(
+		__entry->pc = *vcpu_pc(vcpu);
+		__entry->sysreg = sysreg;
+		__entry->is_read = is_read;
+	    ),
+
+	    TP_printk("%llx %c (%d,%d,%d,%d,%d)",
+		      __entry->pc,
+		      __entry->is_read ? 'R' : 'W',
+		      sys_reg_Op0(__entry->sysreg),
+		      sys_reg_Op1(__entry->sysreg),
+		      sys_reg_CRn(__entry->sysreg),
+		      sys_reg_CRm(__entry->sysreg),
+		      sys_reg_Op2(__entry->sysreg))
+);
+
 #endif /* _TRACE_ARM_ARM64_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index f9923beedd27..0ab09b0d4440 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -199,7 +199,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
 			 int offset, u32 *val);
@@ -233,7 +232,6 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c
index 78b87a64ca0a..2432683e48a6 100644
--- a/arch/arm64/lib/csum.c
+++ b/arch/arm64/lib/csum.c
@@ -24,7 +24,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
 	const u64 *ptr;
 	u64 data, sum64 = 0;
 
-	if (unlikely(len == 0))
+	if (unlikely(len <= 0))
 		return 0;
 
 	offset = (unsigned long)buff & 7;
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index c80ed4f3cbce..c3f06fdef609 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -26,6 +26,7 @@ HAS_ECV
 HAS_ECV_CNTPOFF
 HAS_EPAN
 HAS_EVT
+HAS_FGT
 HAS_GENERIC_AUTH
 HAS_GENERIC_AUTH_ARCH_QARMA3
 HAS_GENERIC_AUTH_ARCH_QARMA5
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 65866bf819c3..2517ef7c21cf 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -2156,6 +2156,135 @@ Field	1	ICIALLU
 Field	0	ICIALLUIS
 EndSysreg
 
+Sysreg HDFGRTR_EL2	3	4	3	1	4
+Field	63	PMBIDR_EL1
+Field	62	nPMSNEVFR_EL1
+Field	61	nBRBDATA
+Field	60	nBRBCTL
+Field	59	nBRBIDR
+Field	58	PMCEIDn_EL0
+Field	57	PMUSERENR_EL0
+Field	56	TRBTRG_EL1
+Field	55	TRBSR_EL1
+Field	54	TRBPTR_EL1
+Field	53	TRBMAR_EL1
+Field	52	TRBLIMITR_EL1
+Field	51	TRBIDR_EL1
+Field	50	TRBBASER_EL1
+Res0	49
+Field	48	TRCVICTLR
+Field	47	TRCSTATR
+Field	46	TRCSSCSRn
+Field	45	TRCSEQSTR
+Field	44	TRCPRGCTLR
+Field	43	TRCOSLSR
+Res0	42
+Field	41	TRCIMSPECn
+Field	40	TRCID
+Res0	39:38
+Field	37	TRCCNTVRn
+Field	36	TRCCLAIM
+Field	35	TRCAUXCTLR
+Field	34	TRCAUTHSTATUS
+Field	33	TRC
+Field	32	PMSLATFR_EL1
+Field	31	PMSIRR_EL1
+Field	30	PMSIDR_EL1
+Field	29	PMSICR_EL1
+Field	28	PMSFCR_EL1
+Field	27	PMSEVFR_EL1
+Field	26	PMSCR_EL1
+Field	25	PMBSR_EL1
+Field	24	PMBPTR_EL1
+Field	23	PMBLIMITR_EL1
+Field	22	PMMIR_EL1
+Res0	21:20
+Field	19	PMSELR_EL0
+Field	18	PMOVS
+Field	17	PMINTEN
+Field	16	PMCNTEN
+Field	15	PMCCNTR_EL0
+Field	14	PMCCFILTR_EL0
+Field	13	PMEVTYPERn_EL0
+Field	12	PMEVCNTRn_EL0
+Field	11	OSDLR_EL1
+Field	10	OSECCR_EL1
+Field	9	OSLSR_EL1
+Res0	8
+Field	7	DBGPRCR_EL1
+Field	6	DBGAUTHSTATUS_EL1
+Field	5	DBGCLAIM
+Field	4	MDSCR_EL1
+Field	3	DBGWVRn_EL1
+Field	2	DBGWCRn_EL1
+Field	1	DBGBVRn_EL1
+Field	0	DBGBCRn_EL1
+EndSysreg
+
+Sysreg HDFGWTR_EL2	3	4	3	1	5
+Res0	63
+Field	62	nPMSNEVFR_EL1
+Field	61	nBRBDATA
+Field	60	nBRBCTL
+Res0	59:58
+Field	57	PMUSERENR_EL0
+Field	56	TRBTRG_EL1
+Field	55	TRBSR_EL1
+Field	54	TRBPTR_EL1
+Field	53	TRBMAR_EL1
+Field	52	TRBLIMITR_EL1
+Res0	51
+Field	50	TRBBASER_EL1
+Field	49	TRFCR_EL1
+Field	48	TRCVICTLR
+Res0	47
+Field	46	TRCSSCSRn
+Field	45	TRCSEQSTR
+Field	44	TRCPRGCTLR
+Res0	43
+Field	42	TRCOSLAR
+Field	41	TRCIMSPECn
+Res0	40:38
+Field	37	TRCCNTVRn
+Field	36	TRCCLAIM
+Field	35	TRCAUXCTLR
+Res0	34
+Field	33	TRC
+Field	32	PMSLATFR_EL1
+Field	31	PMSIRR_EL1
+Res0	30
+Field	29	PMSICR_EL1
+Field	28	PMSFCR_EL1
+Field	27	PMSEVFR_EL1
+Field	26	PMSCR_EL1
+Field	25	PMBSR_EL1
+Field	24	PMBPTR_EL1
+Field	23	PMBLIMITR_EL1
+Res0	22
+Field	21	PMCR_EL0
+Field	20	PMSWINC_EL0
+Field	19	PMSELR_EL0
+Field	18	PMOVS
+Field	17	PMINTEN
+Field	16	PMCNTEN
+Field	15	PMCCNTR_EL0
+Field	14	PMCCFILTR_EL0
+Field	13	PMEVTYPERn_EL0
+Field	12	PMEVCNTRn_EL0
+Field	11	OSDLR_EL1
+Field	10	OSECCR_EL1
+Res0	9
+Field	8	OSLAR_EL1
+Field	7	DBGPRCR_EL1
+Res0	6
+Field	5	DBGCLAIM
+Field	4	MDSCR_EL1
+Field	3	DBGWVRn_EL1
+Field	2	DBGWCRn_EL1
+Field	1	DBGBVRn_EL1
+Field	0	DBGBCRn_EL1
+EndSysreg
+
 Sysreg	ZCR_EL2	3	4	1	2	0
 Fields	ZCR_ELx
 EndSysreg
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 33733245f42b..aefae2efde9f 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += agp.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += vtime.h
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 5eba3fb2e311..ac06d44b9b27 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -37,7 +37,7 @@
  *	pNonSys:	!pSys
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/cache.h>
@@ -49,7 +49,6 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
-#include <asm/export.h>
 
 #include "minstate.h"
 
diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S
index 821e68d10598..9928c5b2957c 100644
--- a/arch/ia64/kernel/esi_stub.S
+++ b/arch/ia64/kernel/esi_stub.S
@@ -34,9 +34,9 @@
 #define PSR_BITS_TO_SET							\
 	(IA64_PSR_BN)
 
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 /*
  * Inputs:
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index c096500590e9..85c8a57da402 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -20,7 +20,7 @@
  *   Support for CPU Hotplug
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/fpu.h>
@@ -33,7 +33,6 @@
 #include <asm/mca_asm.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define SAL_PSR_BITS_TO_SET				\
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 7a418e324d30..da90c49df628 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -47,7 +47,7 @@
  * Table is based upon EAS2.6 (Oct 1999)
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/break.h>
@@ -58,7 +58,6 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 
 #if 0
 # define PSR_DEFAULT_BITS	psr.ac
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index 06d01a070aae..fb6db6966f70 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -13,9 +13,9 @@
  * 05/24/2000 eranian Added support for physical mode static calls
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/processor.h>
-#include <asm/export.h>
 
 	.data
 pal_entry_point:
diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S
index 65b75085c8f4..ba0dd2538fa5 100644
--- a/arch/ia64/lib/clear_page.S
+++ b/arch/ia64/lib/clear_page.S
@@ -10,9 +10,9 @@
  * 3/08/02 davidm	Some more tweaking
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #ifdef CONFIG_ITANIUM
 # define L3_LINE_SIZE	64	// Itanium L3 line size
diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S
index a28f39d349eb..1d9e45ccf8e5 100644
--- a/arch/ia64/lib/clear_user.S
+++ b/arch/ia64/lib/clear_user.S
@@ -12,8 +12,8 @@
  *	Stephane Eranian <eranian@hpl.hp.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 // arguments
diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S
index 176f857c522e..c0a0e6b2af00 100644
--- a/arch/ia64/lib/copy_page.S
+++ b/arch/ia64/lib/copy_page.S
@@ -15,9 +15,9 @@
  *
  * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define PIPE_DEPTH	3
 #define EPI		p[PIPE_DEPTH-1]
diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S
index d6fd56e4f1c1..5e8bb4b4b535 100644
--- a/arch/ia64/lib/copy_page_mck.S
+++ b/arch/ia64/lib/copy_page_mck.S
@@ -60,9 +60,9 @@
  *	to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
  *	an order that avoids bank conflicts.
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define PREFETCH_DIST	8		// McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
 
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
index f681556c6b86..8daab72cfe77 100644
--- a/arch/ia64/lib/copy_user.S
+++ b/arch/ia64/lib/copy_user.S
@@ -30,8 +30,8 @@
  *	- fix extraneous stop bit introduced by the EX() macro.
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 // Tuneable parameters
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index 8573d59c9ed1..f8e795fe45cb 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -8,9 +8,8 @@
  * 05/28/05 Zoltan Menyhart	Dynamic stride size
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
-
 
 	/*
 	 * flush_icache_range(start,end)
diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S
index def92b708e6e..83586fbc51ff 100644
--- a/arch/ia64/lib/idiv32.S
+++ b/arch/ia64/lib/idiv32.S
@@ -15,8 +15,8 @@
  * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #ifdef MODULO
 # define OP	mod
diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S
index a8ba3bd3d4d8..5c9113691f72 100644
--- a/arch/ia64/lib/idiv64.S
+++ b/arch/ia64/lib/idiv64.S
@@ -15,8 +15,8 @@
  * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #ifdef MODULO
 # define OP	mod
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S
index dc9e6e6fe876..fcc0b812ce2e 100644
--- a/arch/ia64/lib/ip_fast_csum.S
+++ b/arch/ia64/lib/ip_fast_csum.S
@@ -13,8 +13,8 @@
  * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 /*
  * Since we know that most likely this function is called with buf aligned
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
index 91a625fddbf0..35c9069a8345 100644
--- a/arch/ia64/lib/memcpy.S
+++ b/arch/ia64/lib/memcpy.S
@@ -14,8 +14,8 @@
  *	Stephane Eranian <eranian@hpl.hp.com>
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(memcpy)
 
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
index cc4e6ac914b6..c0d4362217ae 100644
--- a/arch/ia64/lib/memcpy_mck.S
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -14,9 +14,9 @@
  * Copyright (C) 2002 Intel Corp.
  * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define EK(y...) EX(y)
 
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
index 07a8b92c6496..552c5c7e4d06 100644
--- a/arch/ia64/lib/memset.S
+++ b/arch/ia64/lib/memset.S
@@ -18,8 +18,8 @@
    Since a stf.spill f0 can store 16B in one go, we use this instruction
    to get peak speed when value = 0.  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 #undef ret
 
 #define dest		in0
diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S
index d66de5966974..1f4a46c15127 100644
--- a/arch/ia64/lib/strlen.S
+++ b/arch/ia64/lib/strlen.S
@@ -17,8 +17,8 @@
  * 09/24/99 S.Eranian add speculation recovery code
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 //
diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S
index 49eb81b69cd2..a287169bd953 100644
--- a/arch/ia64/lib/strncpy_from_user.S
+++ b/arch/ia64/lib/strncpy_from_user.S
@@ -17,8 +17,8 @@
  *			 by Andreas Schwab <schwab@suse.de>).
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(__strncpy_from_user)
 	alloc r2=ar.pfs,3,0,0,0
diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S
index 4b684d4da106..a7eb56e840a9 100644
--- a/arch/ia64/lib/strnlen_user.S
+++ b/arch/ia64/lib/strnlen_user.S
@@ -13,8 +13,8 @@
  * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(__strnlen_user)
 	.prologue
diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S
index 5413dafe6b2e..6e2a69662c06 100644
--- a/arch/ia64/lib/xor.S
+++ b/arch/ia64/lib/xor.S
@@ -5,8 +5,8 @@
  * Optimized RAID-5 checksumming functions for IA-64.
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(xor_ia64_2)
 	.prologue
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ecf282dee513..e14396a2ddcb 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -8,11 +8,13 @@ config LOONGARCH
 	select ACPI_PPTT if ACPI
 	select ACPI_SYSTEM_POWER_STATES_SUPPORT	if ACPI
 	select ARCH_BINFMT_ELF_STATE
+	select ARCH_DISABLE_KASAN_INLINE
 	select ARCH_ENABLE_MEMORY_HOTPLUG
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
 	select ARCH_HAS_CPU_FINALIZE_INIT
 	select ARCH_HAS_FORTIFY_SOURCE
+	select ARCH_HAS_KCOV
 	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	select ARCH_HAS_PTE_SPECIAL
@@ -91,6 +93,9 @@ config LOONGARCH
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
+	select HAVE_ARCH_KASAN
+	select HAVE_ARCH_KFENCE
+	select HAVE_ARCH_KGDB if PERF_EVENTS
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
@@ -115,6 +120,7 @@ config LOONGARCH
 	select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
+	select HAVE_GCC_PLUGINS
 	select HAVE_GENERIC_VDSO
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IOREMAP_PROT
@@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION
 config AS_HAS_LASX_EXTENSION
 	def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
 
+config AS_HAS_LBT_EXTENSION
+	def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+
 menu "Kernel type and options"
 
 source "kernel/Kconfig.hz"
@@ -534,6 +543,18 @@ config CPU_HAS_LASX
 
 	  If unsure, say Y.
 
+config CPU_HAS_LBT
+	bool "Support for the Loongson Binary Translation Extension"
+	depends on AS_HAS_LBT_EXTENSION
+	help
+	  Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
+	  to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
+	  Enabling this option allows the kernel to allocate and switch registers
+	  specific to LBT.
+
+	  If you want to use this feature, such as the Loongson Architecture
+	  Translator (LAT), say Y.
+
 config CPU_HAS_PREFETCH
 	bool
 	default y
@@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
 
+config KASAN_SHADOW_OFFSET
+	hex
+	default 0x0
+	depends on KASAN
+
 menu "Power management options"
 
 config ARCH_SUSPEND_POSSIBLE
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index ef87bab46754..fb0fada43197 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -84,7 +84,10 @@ LDFLAGS_vmlinux			+= -static -pie --no-dynamic-linker -z notext
 endif
 
 cflags-y += $(call cc-option, -mno-check-zero-division)
+
+ifndef CONFIG_KASAN
 cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
+endif
 
 load-y		= 0x9000000000200000
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y)
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index d64849b4cba1..a3b52aaa83b3 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
-CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
@@ -47,8 +46,12 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+CONFIG_RANDOMIZE_BASE=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
@@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y
 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
 CONFIG_EFI_CAPSULE_LOADER=m
 CONFIG_EFI_TEST=m
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
 CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
 CONFIG_INET_ESP=m
 CONFIG_INET_UDP_DIAG=y
 CONFIG_TCP_CONG_ADVANCED=y
@@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m
 CONFIG_NFT_REDIR=m
 CONFIG_NFT_NAT=m
 CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REJECT=m
@@ -208,7 +216,11 @@ CONFIG_IP_VS=m
 CONFIG_IP_VS_IPV6=y
 CONFIG_IP_VS_PROTO_TCP=y
 CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
 CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
 CONFIG_IP_VS_NFCT=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_DUP_IPV4=m
@@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
 CONFIG_IP_NF_TARGET_REDIRECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m
 CONFIG_MTD_CFI_STAA=m
 CONFIG_MTD_RAM=m
 CONFIG_MTD_ROM=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BLOCK=y
 CONFIG_PARPORT=y
 CONFIG_PARPORT_PC=y
 CONFIG_PARPORT_SERIAL=y
@@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_ZRAM=m
 CONFIG_ZRAM_DEF_COMP_ZSTD=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y
 # CONFIG_NET_VENDOR_TEHUTI is not set
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NGBE=y
+CONFIG_TXGBE=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PIIX4=y
 CONFIG_I2C_GPIO=y
+CONFIG_I2C_LS2X=y
 CONFIG_SPI=y
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_LOONGSON2=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_LOONGSON=y
+CONFIG_GPIO_LOONGSON_64BIT=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_RESTART=y
 CONFIG_POWER_RESET_SYSCON=y
@@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m
 CONFIG_SENSORS_LM93=m
 CONFIG_SENSORS_W83795=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_LOONGSON2_THERMAL=m
 CONFIG_RC_CORE=m
 CONFIG_LIRC=y
 CONFIG_RC_DECODERS=y
@@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AST=y
 CONFIG_DRM_QXL=m
 CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_LOONGSON=y
 CONFIG_FB=y
 CONFIG_FB_EFI=y
 CONFIG_FB_RADEON=y
@@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m
 CONFIG_INFINIBAND=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_LOONGSON=y
 CONFIG_DMADEVICES=y
 CONFIG_UIO=m
 CONFIG_UIO_PDRV_GENIRQ=m
@@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m
 CONFIG_COMEDI_NI_PCIDIO=m
 CONFIG_COMEDI_NI_PCIMIO=m
 CONFIG_STAGING=y
-CONFIG_R8188EU=m
+CONFIG_COMMON_CLK_LOONGSON2=y
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
 CONFIG_PM_DEVFREQ=y
 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=y
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=y
 CONFIG_OVERLAY_FS_INDEX=y
 CONFIG_OVERLAY_FS_XINO_AUTO=y
 CONFIG_OVERLAY_FS_METACOPY=y
 CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
@@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_CODEPAGE=936
 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=y
+CONFIG_ORANGEFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_ECRYPT_FS_MESSAGING=y
 CONFIG_HFS_FS=m
 CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=y
 CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZ4=y
 CONFIG_SQUASHFS_LZO=y
 CONFIG_SQUASHFS_XZ=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_PSTORE=m
+CONFIG_PSTORE_LZO_COMPRESS=m
+CONFIG_PSTORE_LZ4_COMPRESS=m
+CONFIG_PSTORE_LZ4HC_COMPRESS=m
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
@@ -807,6 +867,10 @@ CONFIG_NFSD=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_DEBUG is not set
 CONFIG_9P_FS=y
@@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_936=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_UTF8=y
+CONFIG_DLM=m
 CONFIG_KEY_DH_OPERATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_SELINUX=y
@@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index ed06d3997420..cf8e1a4e7c19 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/uaccess.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/ftrace.h>
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index 79e1d53fea89..c9544f358c33 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -10,113 +10,6 @@
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 
-	.macro	parse_v var val
-	\var	= \val
-	.endm
-
-	.macro	parse_r var r
-	\var	= -1
-	.ifc	\r, $r0
-	\var	= 0
-	.endif
-	.ifc	\r, $r1
-	\var	= 1
-	.endif
-	.ifc	\r, $r2
-	\var	= 2
-	.endif
-	.ifc	\r, $r3
-	\var	= 3
-	.endif
-	.ifc	\r, $r4
-	\var	= 4
-	.endif
-	.ifc	\r, $r5
-	\var	= 5
-	.endif
-	.ifc	\r, $r6
-	\var	= 6
-	.endif
-	.ifc	\r, $r7
-	\var	= 7
-	.endif
-	.ifc	\r, $r8
-	\var	= 8
-	.endif
-	.ifc	\r, $r9
-	\var	= 9
-	.endif
-	.ifc	\r, $r10
-	\var	= 10
-	.endif
-	.ifc	\r, $r11
-	\var	= 11
-	.endif
-	.ifc	\r, $r12
-	\var	= 12
-	.endif
-	.ifc	\r, $r13
-	\var	= 13
-	.endif
-	.ifc	\r, $r14
-	\var	= 14
-	.endif
-	.ifc	\r, $r15
-	\var	= 15
-	.endif
-	.ifc	\r, $r16
-	\var	= 16
-	.endif
-	.ifc	\r, $r17
-	\var	= 17
-	.endif
-	.ifc	\r, $r18
-	\var	= 18
-	.endif
-	.ifc	\r, $r19
-	\var	= 19
-	.endif
-	.ifc	\r, $r20
-	\var	= 20
-	.endif
-	.ifc	\r, $r21
-	\var	= 21
-	.endif
-	.ifc	\r, $r22
-	\var	= 22
-	.endif
-	.ifc	\r, $r23
-	\var	= 23
-	.endif
-	.ifc	\r, $r24
-	\var	= 24
-	.endif
-	.ifc	\r, $r25
-	\var	= 25
-	.endif
-	.ifc	\r, $r26
-	\var	= 26
-	.endif
-	.ifc	\r, $r27
-	\var	= 27
-	.endif
-	.ifc	\r, $r28
-	\var	= 28
-	.endif
-	.ifc	\r, $r29
-	\var	= 29
-	.endif
-	.ifc	\r, $r30
-	\var	= 30
-	.endif
-	.ifc	\r, $r31
-	\var	= 31
-	.endif
-	.iflt	\var
-	.error	"Unable to parse register name \r"
-	.endif
-	.endm
-
 	.macro	cpu_save_nonscratch thread
 	stptr.d	s0, \thread, THREAD_REG23
 	stptr.d	s1, \thread, THREAD_REG24
@@ -148,12 +41,51 @@
 
 	.macro fpu_save_csr thread tmp
 	movfcsr2gr	\tmp, fcsr0
-	stptr.w	\tmp, \thread, THREAD_FCSR
+	stptr.w		\tmp, \thread, THREAD_FCSR
+#ifdef CONFIG_CPU_HAS_LBT
+	/* TM bit is always 0 if LBT not supported */
+	andi		\tmp, \tmp, FPU_CSR_TM
+	beqz		\tmp, 1f
+	/* Save FTOP */
+	x86mftop	\tmp
+	stptr.w		\tmp, \thread, THREAD_FTOP
+	/* Turn off TM to ensure the order of FPR in memory independent of TM */
+	x86clrtm
+1:
+#endif
 	.endm
 
-	.macro fpu_restore_csr thread tmp
-	ldptr.w	\tmp, \thread, THREAD_FCSR
-	movgr2fcsr	fcsr0, \tmp
+	.macro fpu_restore_csr thread tmp0 tmp1
+	ldptr.w		\tmp0, \thread, THREAD_FCSR
+	movgr2fcsr	fcsr0, \tmp0
+#ifdef CONFIG_CPU_HAS_LBT
+	/* TM bit is always 0 if LBT not supported */
+	andi		\tmp0, \tmp0, FPU_CSR_TM
+	beqz		\tmp0, 2f
+	/* Restore FTOP */
+	ldptr.w		\tmp0, \thread, THREAD_FTOP
+	andi		\tmp0, \tmp0, 0x7
+	la.pcrel	\tmp1, 1f
+	alsl.d		\tmp1, \tmp0, \tmp1, 3
+	jr		\tmp1
+1:
+	x86mttop	0
+	b	2f
+	x86mttop	1
+	b	2f
+	x86mttop	2
+	b	2f
+	x86mttop	3
+	b	2f
+	x86mttop	4
+	b	2f
+	x86mttop	5
+	b	2f
+	x86mttop	6
+	b	2f
+	x86mttop	7
+2:
+#endif
 	.endm
 
 	.macro fpu_save_cc thread tmp0 tmp1
@@ -353,7 +285,7 @@
 	.macro	lsx_restore_all	thread tmp0 tmp1
 	lsx_restore_data	\thread, \tmp0
 	fpu_restore_cc		\thread, \tmp0, \tmp1
-	fpu_restore_csr		\thread, \tmp0
+	fpu_restore_csr		\thread, \tmp0, \tmp1
 	.endm
 
 	.macro	lsx_save_upper vd base tmp off
@@ -563,7 +495,7 @@
 	.macro	lasx_restore_all thread tmp0 tmp1
 	lasx_restore_data	\thread, \tmp0
 	fpu_restore_cc		\thread, \tmp0, \tmp1
-	fpu_restore_csr		\thread, \tmp0
+	fpu_restore_csr		\thread, \tmp0, \tmp1
 	.endm
 
 	.macro	lasx_save_upper xd base tmp off
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
new file mode 100644
index 000000000000..deeff8158f45
--- /dev/null
+++ b/arch/loongarch/include/asm/kasan.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/mmzone.h>
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_SHADOW_MAP
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_OFFSET	_AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+#define XRANGE_SHIFT (48)
+
+/* Valid address length */
+#define XRANGE_SHADOW_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+/* Used for taking out the valid address */
+#define XRANGE_SHADOW_MASK	GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
+/* One segment whole address space size */
+#define XRANGE_SIZE		(XRANGE_SHADOW_MASK + 1)
+
+/* 64-bit segment value. */
+#define XKPRANGE_UC_SEG		(0x8000)
+#define XKPRANGE_CC_SEG		(0x9000)
+#define XKVRANGE_VC_SEG		(0xffff)
+
+/* Cached */
+#define XKPRANGE_CC_START		CACHE_BASE
+#define XKPRANGE_CC_SIZE		XRANGE_SIZE
+#define XKPRANGE_CC_KASAN_OFFSET	(0)
+#define XKPRANGE_CC_SHADOW_SIZE		(XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_CC_SHADOW_END		(XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
+
+/* UnCached */
+#define XKPRANGE_UC_START		UNCACHE_BASE
+#define XKPRANGE_UC_SIZE		XRANGE_SIZE
+#define XKPRANGE_UC_KASAN_OFFSET	XKPRANGE_CC_SHADOW_END
+#define XKPRANGE_UC_SHADOW_SIZE		(XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_UC_SHADOW_END		(XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+
+/* VMALLOC (Cached or UnCached)  */
+#define XKVRANGE_VC_START		MODULES_VADDR
+#define XKVRANGE_VC_SIZE		round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
+#define XKVRANGE_VC_KASAN_OFFSET	XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_SHADOW_SIZE		(XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKVRANGE_VC_SHADOW_END		(XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
+
+/* KAsan shadow memory start right after vmalloc. */
+#define KASAN_SHADOW_START		round_up(KFENCE_AREA_END, PGDIR_SIZE)
+#define KASAN_SHADOW_SIZE		(XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
+#define KASAN_SHADOW_END		round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+
+#define XKPRANGE_CC_SHADOW_OFFSET	(KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
+#define XKPRANGE_UC_SHADOW_OFFSET	(KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKVRANGE_VC_SHADOW_OFFSET	(KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
+
+extern bool kasan_early_stage;
+extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+static __always_inline bool kasan_arch_is_ready(void)
+{
+	return !kasan_early_stage;
+}
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+	if (!kasan_arch_is_ready()) {
+		return (void *)(kasan_early_shadow_page);
+	} else {
+		unsigned long maddr = (unsigned long)addr;
+		unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
+		unsigned long offset = 0;
+
+		maddr &= XRANGE_SHADOW_MASK;
+		switch (xrange) {
+		case XKPRANGE_CC_SEG:
+			offset = XKPRANGE_CC_SHADOW_OFFSET;
+			break;
+		case XKPRANGE_UC_SEG:
+			offset = XKPRANGE_UC_SHADOW_OFFSET;
+			break;
+		case XKVRANGE_VC_SEG:
+			offset = XKVRANGE_VC_SHADOW_OFFSET;
+			break;
+		default:
+			WARN_ON(1);
+			return NULL;
+		}
+
+		return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
+	}
+}
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+	unsigned long addr = (unsigned long)shadow_addr;
+
+	if (unlikely(addr > KASAN_SHADOW_END) ||
+		unlikely(addr < KASAN_SHADOW_START)) {
+		WARN_ON(1);
+		return NULL;
+	}
+
+	if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
+		return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+	else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
+		return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
+	else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
+		return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
+	else {
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
new file mode 100644
index 000000000000..6c82aea1c993
--- /dev/null
+++ b/arch/loongarch/include/asm/kfence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+	int err;
+	char *kfence_pool = __kfence_pool;
+	struct vm_struct *area;
+
+	area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+				    KFENCE_AREA_START, KFENCE_AREA_END,
+				    __builtin_return_address(0));
+	if (!area)
+		return false;
+
+	__kfence_pool = (char *)area->addr;
+	err = ioremap_page_range((unsigned long)__kfence_pool,
+				 (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+				 virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
+	if (err) {
+		free_vm_area(area);
+		__kfence_pool = kfence_pool;
+		return false;
+	}
+
+	return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	pte_t *pte = virt_to_kpte(addr);
+
+	if (WARN_ON(!pte) || pte_none(*pte))
+		return false;
+
+	if (protect)
+		set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+	else
+		set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+	preempt_disable();
+	local_flush_tlb_one(addr);
+	preempt_enable();
+
+	return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h
new file mode 100644
index 000000000000..2041ae58b161
--- /dev/null
+++ b/arch/loongarch/include/asm/kgdb.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KGDB_H
+#define _ASM_LOONGARCH_KGDB_H
+
+#define GDB_SIZEOF_REG		sizeof(u64)
+
+/* gdb remote procotol expects the following register layout. */
+
+/*
+ * General purpose registers:
+ *     r0-r31: 64 bit
+ *     orig_a0: 64 bit
+ *     pc : 64 bit
+ *     csr_badvaddr: 64 bit
+ */
+#define DBG_PT_REGS_BASE	0
+#define DBG_PT_REGS_NUM		35
+#define DBG_PT_REGS_END		(DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
+
+/*
+ * Floating point registers:
+ *     f0-f31: 64 bit
+ */
+#define DBG_FPR_BASE		(DBG_PT_REGS_END + 1)
+#define DBG_FPR_NUM		32
+#define DBG_FPR_END		(DBG_FPR_BASE + DBG_FPR_NUM - 1)
+
+/*
+ * Condition Flag registers:
+ *     fcc0-fcc8: 8 bit
+ */
+#define DBG_FCC_BASE		(DBG_FPR_END + 1)
+#define DBG_FCC_NUM		8
+#define DBG_FCC_END		(DBG_FCC_BASE + DBG_FCC_NUM - 1)
+
+/*
+ * Floating-point Control and Status registers:
+ *     fcsr: 32 bit
+ */
+#define DBG_FCSR_NUM		1
+#define DBG_FCSR		(DBG_FCC_END + 1)
+
+#define DBG_MAX_REG_NUM		(DBG_FCSR + 1)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+#define BUFMAX			2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES		((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
+
+#define BREAK_INSTR_SIZE	4
+#define CACHE_FLUSH_IS_SAFE	0
+
+/* Register numbers of various important registers. */
+enum dbg_loongarch_regnum {
+	DBG_LOONGARCH_ZERO = 0,
+	DBG_LOONGARCH_RA,
+	DBG_LOONGARCH_TP,
+	DBG_LOONGARCH_SP,
+	DBG_LOONGARCH_A0,
+	DBG_LOONGARCH_FP = 22,
+	DBG_LOONGARCH_S0,
+	DBG_LOONGARCH_S1,
+	DBG_LOONGARCH_S2,
+	DBG_LOONGARCH_S3,
+	DBG_LOONGARCH_S4,
+	DBG_LOONGARCH_S5,
+	DBG_LOONGARCH_S6,
+	DBG_LOONGARCH_S7,
+	DBG_LOONGARCH_S8,
+	DBG_LOONGARCH_ORIG_A0,
+	DBG_LOONGARCH_PC,
+	DBG_LOONGARCH_BADV
+};
+
+void kgdb_breakinst(void);
+void arch_kgdb_breakpoint(void);
+
+#ifdef CONFIG_KGDB
+bool kgdb_breakpoint_handler(struct pt_regs *regs);
+#else /* !CONFIG_KGDB */
+static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
+#endif /* CONFIG_KGDB */
+
+#endif /* __ASM_KGDB_H_ */
diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h
new file mode 100644
index 000000000000..e671978bf552
--- /dev/null
+++ b/arch/loongarch/include/asm/lbt.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_LBT_H
+#define _ASM_LBT_H
+
+#include <asm/cpu.h>
+#include <asm/current.h>
+#include <asm/loongarch.h>
+#include <asm/processor.h>
+
+extern void _init_lbt(void);
+extern void _save_lbt(struct loongarch_lbt *);
+extern void _restore_lbt(struct loongarch_lbt *);
+
+static inline int is_lbt_enabled(void)
+{
+	if (!cpu_has_lbt)
+		return 0;
+
+	return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
+		1 : 0;
+}
+
+static inline int is_lbt_owner(void)
+{
+	return test_thread_flag(TIF_USEDLBT);
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+
+static inline void enable_lbt(void)
+{
+	if (cpu_has_lbt)
+		csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lbt(void)
+{
+	if (cpu_has_lbt)
+		csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void __own_lbt(void)
+{
+	enable_lbt();
+	set_thread_flag(TIF_USEDLBT);
+	KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
+}
+
+static inline void own_lbt_inatomic(int restore)
+{
+	if (cpu_has_lbt && !is_lbt_owner()) {
+		__own_lbt();
+		if (restore)
+			_restore_lbt(&current->thread.lbt);
+	}
+}
+
+static inline void own_lbt(int restore)
+{
+	preempt_disable();
+	own_lbt_inatomic(restore);
+	preempt_enable();
+}
+
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
+{
+	if (cpu_has_lbt && is_lbt_owner()) {
+		if (save)
+			_save_lbt(&tsk->thread.lbt);
+
+		disable_lbt();
+		clear_tsk_thread_flag(tsk, TIF_USEDLBT);
+	}
+	KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
+}
+
+static inline void lose_lbt(int save)
+{
+	preempt_disable();
+	lose_lbt_inatomic(save, current);
+	preempt_enable();
+}
+
+static inline void init_lbt(void)
+{
+	__own_lbt();
+	_init_lbt();
+}
+#else
+static inline void own_lbt_inatomic(int restore) {}
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
+static inline void init_lbt(void) {}
+static inline void lose_lbt(int save) {}
+#endif
+
+static inline int thread_lbt_context_live(void)
+{
+	if (!cpu_has_lbt)
+		return 0;
+
+	return test_thread_flag(TIF_LBT_CTX_LIVE);
+}
+
+#endif /* _ASM_LBT_H */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 10748a20a2ab..33531d432b49 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -12,49 +12,6 @@
 #ifndef __ASSEMBLY__
 #include <larchintrin.h>
 
-/*
- * parse_r var, r - Helper assembler macro for parsing register names.
- *
- * This converts the register name in $n form provided in \r to the
- * corresponding register number, which is assigned to the variable \var. It is
- * needed to allow explicit encoding of instructions in inline assembly where
- * registers are chosen by the compiler in $n form, allowing us to avoid using
- * fixed register numbers.
- *
- * It also allows newer instructions (not implemented by the assembler) to be
- * transparently implemented using assembler macros, instead of needing separate
- * cases depending on toolchain support.
- *
- * Simple usage example:
- * __asm__ __volatile__("parse_r addr, %0\n\t"
- *			"#invtlb op, 0, %0\n\t"
- *			".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
- *			: "=r" (status);
- */
-
-/* Match an individual register number and assign to \var */
-#define _IFC_REG(n)				\
-	".ifc	\\r, $r" #n "\n\t"		\
-	"\\var	= " #n "\n\t"			\
-	".endif\n\t"
-
-__asm__(".macro	parse_r var r\n\t"
-	"\\var	= -1\n\t"
-	_IFC_REG(0)  _IFC_REG(1)  _IFC_REG(2)  _IFC_REG(3)
-	_IFC_REG(4)  _IFC_REG(5)  _IFC_REG(6)  _IFC_REG(7)
-	_IFC_REG(8)  _IFC_REG(9)  _IFC_REG(10) _IFC_REG(11)
-	_IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
-	_IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
-	_IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
-	_IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
-	_IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
-	".iflt	\\var\n\t"
-	".error	\"Unable to parse register name \\r\"\n\t"
-	".endif\n\t"
-	".endm");
-
-#undef _IFC_REG
-
 /* CPUCFG */
 #define read_cpucfg(reg) __cpucfg(reg)
 
@@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx)
 #define FPU_CSR_RU	0x200	/* towards +Infinity */
 #define FPU_CSR_RD	0x300	/* towards -Infinity */
 
+/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
+#define FPU_CSR_TM_SHIFT	0x6
+#define FPU_CSR_TM		(_ULCAST_(1) << FPU_CSR_TM_SHIFT)
+
 #define read_fcsr(source)	\
 ({	\
 	unsigned int __res;	\
diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h
index fe67d0b4b33d..2b9a90727e19 100644
--- a/arch/loongarch/include/asm/mmzone.h
+++ b/arch/loongarch/include/asm/mmzone.h
@@ -13,6 +13,4 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid)	(node_data[(nid)])
 
-extern void setup_zero_pages(void);
-
 #endif /* _ASM_MMZONE_H_ */
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 26e8dccb6619..63f137ce82a4 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define sym_to_pfn(x)		__phys_to_pfn(__pa_symbol(x))
 
 #define virt_to_pfn(kaddr)	PFN_DOWN(PHYSADDR(kaddr))
-#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
+
+#define virt_to_page(kaddr)								\
+({											\
+	(likely((unsigned long)kaddr < vm_map_base)) ?					\
+	dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
+})
 
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr)	__virt_addr_valid((volatile void *)(kaddr))
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index 23f5b1107246..79470f0b4f1d 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+extern pte_t * __init populate_kernel_pte(unsigned long addr);
 #endif /* _ASM_PGALLOC_H */
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 06963a172319..29d9b12298bc 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -70,12 +70,9 @@ struct vm_area_struct;
  * for zero-mapped memory areas etc..
  */
 
-extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
-#define ZERO_PAGE(vaddr) \
-	(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-#define __HAVE_COLOR_ZERO_PAGE
+#define ZERO_PAGE(vaddr)	virt_to_page(empty_zero_page)
 
 /*
  * TLB refill handlers may also map the vmalloc area into xkvrange.
@@ -85,14 +82,30 @@ extern unsigned long zero_page_mask;
 #define MODULES_VADDR	(vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
 #define MODULES_END	(MODULES_VADDR + SZ_256M)
 
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE	(((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE	0
+#endif
+
 #define VMALLOC_START	MODULES_END
+
+#ifndef CONFIG_KASAN
 #define VMALLOC_END	\
 	(vm_map_base +	\
-	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#else
+#define VMALLOC_END	\
+	(vm_map_base +	\
+	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#endif
 
 #define vmemmap		((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
 #define VMEMMAP_END	((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
+#define KFENCE_AREA_START	(VMEMMAP_END + 1)
+#define KFENCE_AREA_END		(KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
+
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #ifndef __PAGETABLE_PMD_FOLDED
@@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 extern pgd_t swapper_pg_dir[];
 extern pgd_t invalid_pg_dir[];
 
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#define pmd_leaf(pmd)		((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud)		((pud_val(pud) & _PAGE_HUGE) != 0)
+
 /*
  * We provide our own get_unmapped area to cope with the virtual aliasing
  * constraints placed on us by the cache architecture.
diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h
index 636e1c66398c..c3bc44b5f5b3 100644
--- a/arch/loongarch/include/asm/processor.h
+++ b/arch/loongarch/include/asm/processor.h
@@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32)
 BUILD_FPR_ACCESS(64)
 
 struct loongarch_fpu {
-	unsigned int	fcsr;
 	uint64_t	fcc;	/* 8x8 */
+	uint32_t	fcsr;
+	uint32_t	ftop;
 	union fpureg	fpr[NUM_FPU_REGS];
 };
 
+struct loongarch_lbt {
+	/* Scratch registers */
+	unsigned long scr0;
+	unsigned long scr1;
+	unsigned long scr2;
+	unsigned long scr3;
+	/* Eflags register */
+	unsigned long eflags;
+};
+
 #define INIT_CPUMASK { \
 	{0,} \
 }
@@ -113,15 +124,6 @@ struct thread_struct {
 	unsigned long csr_ecfg;
 	unsigned long csr_badvaddr;	/* Last user fault */
 
-	/* Scratch registers */
-	unsigned long scr0;
-	unsigned long scr1;
-	unsigned long scr2;
-	unsigned long scr3;
-
-	/* Eflags register */
-	unsigned long eflags;
-
 	/* Other stuff associated with the thread. */
 	unsigned long trap_nr;
 	unsigned long error_code;
@@ -133,6 +135,7 @@ struct thread_struct {
 	 * context because they are conditionally copied at fork().
 	 */
 	struct loongarch_fpu fpu FPU_ALIGN;
+	struct loongarch_lbt lbt; /* Also conditionally copied */
 
 	/* Hardware breakpoints pinned to this task. */
 	struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
@@ -174,8 +177,9 @@ struct thread_struct {
 	 * FPU & vector registers				\
 	 */							\
 	.fpu			= {				\
-		.fcsr		= 0,				\
 		.fcc		= 0,				\
+		.fcsr		= 0,				\
+		.ftop		= 0,				\
 		.fpr		= {{{0,},},},			\
 	},							\
 	.hbp_break		= {0},				\
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index be05c0e706a2..a0bc159ce8bd 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -7,6 +7,7 @@
 #define _LOONGARCH_SETUP_H
 
 #include <linux/types.h>
+#include <asm/sections.h>
 #include <uapi/asm/setup.h>
 
 #define VECSIZE 0x200
@@ -33,8 +34,13 @@ extern long __la_abs_end;
 extern long __rela_dyn_begin;
 extern long __rela_dyn_end;
 
-extern void * __init relocate_kernel(void);
+extern unsigned long __init relocate_kernel(void);
 
 #endif
 
+static inline unsigned long kaslr_offset(void)
+{
+	return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
+}
+
 #endif /* __SETUP_H */
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 7df80e6ae9d2..4fb1e6408b98 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -158,6 +158,10 @@
 	cfi_st  u0, PT_R21, \docfi
 	csrrd	u0, PERCPU_BASE_KS
 9:
+#ifdef CONFIG_KGDB
+	li.w	t0, CSR_CRMD_WE
+	csrxchg	t0, t0, LOONGARCH_CSR_CRMD
+#endif
 	.endm
 
 	.macro	SAVE_ALL docfi=0
diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h
index 7b29cc9c70aa..5bb5a90d2681 100644
--- a/arch/loongarch/include/asm/string.h
+++ b/arch/loongarch/include/asm/string.h
@@ -7,11 +7,31 @@
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
+extern void *__memset(void *__s, int __c, size_t __count);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memset(s, c, n) __memset(s, c, n)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
 
 #endif /* _ASM_STRING_H */
diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h
index 24e3094bebab..5b225aff3ba2 100644
--- a/arch/loongarch/include/asm/switch_to.h
+++ b/arch/loongarch/include/asm/switch_to.h
@@ -7,6 +7,7 @@
 
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 
 struct task_struct;
 
@@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 #define switch_to(prev, next, last)						\
 do {										\
 	lose_fpu_inatomic(1, prev);						\
+	lose_lbt_inatomic(1, prev);						\
 	hw_breakpoint_thread_switch(next);					\
 	(last) = __switch_to(prev, next, task_thread_info(next),		\
 		 __builtin_return_address(0), __builtin_frame_address(0));	\
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 1a3354ca056e..8cb653d49a54 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define TIF_SINGLESTEP		16	/* Single Step */
 #define TIF_LSX_CTX_LIVE	17	/* LSX context must be preserved */
 #define TIF_LASX_CTX_LIVE	18	/* LASX context must be preserved */
+#define TIF_USEDLBT		19	/* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE	20	/* LBT context must be preserved */
 
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
@@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_LSX_CTX_LIVE	(1<<TIF_LSX_CTX_LIVE)
 #define _TIF_LASX_CTX_LIVE	(1<<TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT		(1<<TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE	(1<<TIF_LBT_CTX_LIVE)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
new file mode 100644
index 000000000000..12467fffee46
--- /dev/null
+++ b/arch/loongarch/include/asm/xor.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_H
+#define _ASM_LOONGARCH_XOR_H
+
+#include <asm/cpu-features.h>
+#include <asm/xor_simd.h>
+
+#ifdef CONFIG_CPU_HAS_LSX
+static struct xor_block_template xor_block_lsx = {
+	.name = "lsx",
+	.do_2 = xor_lsx_2,
+	.do_3 = xor_lsx_3,
+	.do_4 = xor_lsx_4,
+	.do_5 = xor_lsx_5,
+};
+
+#define XOR_SPEED_LSX()					\
+	do {						\
+		if (cpu_has_lsx)			\
+			xor_speed(&xor_block_lsx);	\
+	} while (0)
+#else /* CONFIG_CPU_HAS_LSX */
+#define XOR_SPEED_LSX()
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static struct xor_block_template xor_block_lasx = {
+	.name = "lasx",
+	.do_2 = xor_lasx_2,
+	.do_3 = xor_lasx_3,
+	.do_4 = xor_lasx_4,
+	.do_5 = xor_lasx_5,
+};
+
+#define XOR_SPEED_LASX()					\
+	do {							\
+		if (cpu_has_lasx)				\
+			xor_speed(&xor_block_lasx);		\
+	} while (0)
+#else /* CONFIG_CPU_HAS_LASX */
+#define XOR_SPEED_LASX()
+#endif /* CONFIG_CPU_HAS_LASX */
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+do {							\
+	xor_speed(&xor_block_8regs);			\
+	xor_speed(&xor_block_8regs_p);			\
+	xor_speed(&xor_block_32regs);			\
+	xor_speed(&xor_block_32regs_p);			\
+	XOR_SPEED_LSX();				\
+	XOR_SPEED_LASX();				\
+} while (0)
+
+#endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h
new file mode 100644
index 000000000000..471b96332f38
--- /dev/null
+++ b/arch/loongarch/include/asm/xor_simd.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_SIMD_H
+#define _ASM_LOONGARCH_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2);
+void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4);
+void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+	        const unsigned long * __restrict p2);
+void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+	        const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+	        const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+	        const unsigned long * __restrict p4);
+void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+	        const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+	        const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h
index 06e3be52cb04..ac915f841650 100644
--- a/arch/loongarch/include/uapi/asm/ptrace.h
+++ b/arch/loongarch/include/uapi/asm/ptrace.h
@@ -56,6 +56,12 @@ struct user_lasx_state {
 	uint64_t vregs[32*4];
 };
 
+struct user_lbt_state {
+	uint64_t scr[4];
+	uint32_t eflags;
+	uint32_t ftop;
+};
+
 struct user_watch_state {
 	uint64_t dbg_info;
 	struct {
diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h
index 4cd7d16f7037..6c22f616b8f1 100644
--- a/arch/loongarch/include/uapi/asm/sigcontext.h
+++ b/arch/loongarch/include/uapi/asm/sigcontext.h
@@ -59,4 +59,14 @@ struct lasx_context {
 	__u32	fcsr;
 };
 
+/* LBT context */
+#define LBT_CTX_MAGIC		0x42540001
+#define LBT_CTX_ALIGN		8
+struct lbt_context {
+	__u64	regs[4];
+	__u32	eflags;
+	__u32	ftop;
+};
+
+
 #endif /* _UAPI_ASM_SIGCONTEXT_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 8e279f04f9e7..c56ea0b75448 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -15,6 +15,8 @@ obj-$(CONFIG_EFI) 		+= efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o kfpu.o
 
+obj-$(CONFIG_CPU_HAS_LBT)	+= lbt.o
+
 obj-$(CONFIG_ARCH_STRICT_ALIGN)	+= unaligned.o
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER
   CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
 endif
 
+KASAN_SANITIZE_efi.o := n
+KASAN_SANITIZE_cpu-probe.o := n
+KASAN_SANITIZE_traps.o := n
+KASAN_SANITIZE_smp.o := n
+KASAN_SANITIZE_vdso.o := n
+
 obj-$(CONFIG_MODULES)		+= module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
@@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_regs.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 
+obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_RETHOOK)		+= rethook.o rethook_trampoline.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 505e4bf59603..8da0726777ed 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -118,13 +118,6 @@ void output_thread_defines(void)
 	OFFSET(THREAD_CSRECFG, task_struct,
 	       thread.csr_ecfg);
 
-	OFFSET(THREAD_SCR0, task_struct, thread.scr0);
-	OFFSET(THREAD_SCR1, task_struct, thread.scr1);
-	OFFSET(THREAD_SCR2, task_struct, thread.scr2);
-	OFFSET(THREAD_SCR3, task_struct, thread.scr3);
-
-	OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
-
 	OFFSET(THREAD_FPU, task_struct, thread.fpu);
 
 	OFFSET(THREAD_BVADDR, task_struct, \
@@ -172,6 +165,17 @@ void output_thread_fpu_defines(void)
 
 	OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
 	OFFSET(THREAD_FCC,  loongarch_fpu, fcc);
+	OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
+	BLANK();
+}
+
+void output_thread_lbt_defines(void)
+{
+	OFFSET(THREAD_SCR0,  loongarch_lbt, scr0);
+	OFFSET(THREAD_SCR1,  loongarch_lbt, scr1);
+	OFFSET(THREAD_SCR2,  loongarch_lbt, scr2);
+	OFFSET(THREAD_SCR3,  loongarch_lbt, scr3);
+	OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
 	BLANK();
 }
 
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index e925579c7a71..55320813ee08 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
 		c->options |= LOONGARCH_CPU_LVZ;
 		elf_hwcap |= HWCAP_LOONGARCH_LVZ;
 	}
+#ifdef CONFIG_CPU_HAS_LBT
+	if (config & CPUCFG2_X86BT) {
+		c->options |= LOONGARCH_CPU_LBT_X86;
+		elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
+	}
+	if (config & CPUCFG2_ARMBT) {
+		c->options |= LOONGARCH_CPU_LBT_ARM;
+		elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
+	}
+	if (config & CPUCFG2_MIPSBT) {
+		c->options |= LOONGARCH_CPU_LBT_MIPS;
+		elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
+	}
+#endif
 
 	config = read_cpucfg(LOONGARCH_CPUCFG6);
 	if (config & CPUCFG6_PMP)
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index d737e3cf42d3..65518bb8f472 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall)
 
 	SAVE_STATIC
 
+#ifdef CONFIG_KGDB
+	li.w		t1, CSR_CRMD_WE
+	csrxchg		t1, t1, LOONGARCH_CSR_CRMD
+#endif
+
 	move		u0, t0
 	li.d		tp, ~_THREAD_MASK
 	and		tp, tp, sp
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index 501094a09f5d..d53ab10f4644 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -22,7 +22,7 @@
 
 	.macro	EX insn, reg, src, offs
 .ex\@:	\insn	\reg, \src, \offs
-	_asm_extable .ex\@, fault
+	_asm_extable .ex\@, .L_fpu_fault
 	.endm
 
 	.macro sc_save_fp base
@@ -138,6 +138,13 @@
 	.macro sc_save_fcsr base, tmp0
 	movfcsr2gr	\tmp0, fcsr0
 	EX	st.w	\tmp0, \base, 0
+#if defined(CONFIG_CPU_HAS_LBT)
+	/* TM bit is always 0 if LBT not supported */
+	andi		\tmp0, \tmp0, FPU_CSR_TM
+	beqz		\tmp0, 1f
+	x86clrtm
+1:
+#endif
 	.endm
 
 	.macro sc_restore_fcsr base, tmp0
@@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp)
  */
 SYM_FUNC_START(_restore_fp)
 	fpu_restore_double	a0 t1		# clobbers t1
-	fpu_restore_csr		a0 t1
+	fpu_restore_csr		a0 t1 t2
 	fpu_restore_cc		a0 t1 t2	# clobbers t1, t2
 	jr			ra
 SYM_FUNC_END(_restore_fp)
@@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context)
 	jr	ra
 SYM_FUNC_END(_restore_lasx_context)
 
-SYM_FUNC_START(fault)
+.L_fpu_fault:
 	li.w	a0, -EFAULT				# failure
 	jr	ra
-SYM_FUNC_END(fault)
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 5e828a8bc0a0..53b883db0786 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	PTR_LI		sp, (_THREAD_SIZE - PT_SIZE)
 	PTR_ADD		sp, sp, tp
 	set_saved_sp	sp, t0, t1
-#endif
 
-	/* relocate_kernel() returns the new kernel entry point */
-	jr		a0
-	ASM_BUG()
+	/* Jump to the new kernel: new_pc = current_pc + random_offset */
+	pcaddi		t0, 0
+	add.d		t0, t0, a0
+	jirl		zero, t0, 0xc
+#endif /* CONFIG_RANDOMIZE_BASE */
+
+#endif /* CONFIG_RELOCATABLE */
 
+#ifdef CONFIG_KASAN
+	bl		kasan_early_init
 #endif
 
 	bl		start_kernel
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
index 5c46ae8c6cac..ec5b28e570c9 100644
--- a/arch/loongarch/kernel/kfpu.c
+++ b/arch/loongarch/kernel/kfpu.c
@@ -8,19 +8,40 @@
 #include <asm/fpu.h>
 #include <asm/smp.h>
 
+static unsigned int euen_mask = CSR_EUEN_FPEN;
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
+ */
 static DEFINE_PER_CPU(bool, in_kernel_fpu);
+static DEFINE_PER_CPU(unsigned int, euen_current);
 
 void kernel_fpu_begin(void)
 {
+	unsigned int *euen_curr;
+
 	preempt_disable();
 
 	WARN_ON(this_cpu_read(in_kernel_fpu));
 
 	this_cpu_write(in_kernel_fpu, true);
+	euen_curr = this_cpu_ptr(&euen_current);
 
-	if (!is_fpu_owner())
-		enable_fpu();
+	*euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+	if (*euen_curr & CSR_EUEN_LASXEN)
+		_save_lasx(&current->thread.fpu);
+	else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+	if (*euen_curr & CSR_EUEN_LSXEN)
+		_save_lsx(&current->thread.fpu);
 	else
+#endif
+	if (*euen_curr & CSR_EUEN_FPEN)
 		_save_fp(&current->thread.fpu);
 
 	write_fcsr(LOONGARCH_FCSR0, 0);
@@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 
 void kernel_fpu_end(void)
 {
+	unsigned int *euen_curr;
+
 	WARN_ON(!this_cpu_read(in_kernel_fpu));
 
-	if (!is_fpu_owner())
-		disable_fpu();
+	euen_curr = this_cpu_ptr(&euen_current);
+
+#ifdef CONFIG_CPU_HAS_LASX
+	if (*euen_curr & CSR_EUEN_LASXEN)
+		_restore_lasx(&current->thread.fpu);
 	else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+	if (*euen_curr & CSR_EUEN_LSXEN)
+		_restore_lsx(&current->thread.fpu);
+	else
+#endif
+	if (*euen_curr & CSR_EUEN_FPEN)
 		_restore_fp(&current->thread.fpu);
 
+	*euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
+
 	this_cpu_write(in_kernel_fpu, false);
 
 	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+static int __init init_euen_mask(void)
+{
+	if (cpu_has_lsx)
+		euen_mask |= CSR_EUEN_LSXEN;
+
+	if (cpu_has_lasx)
+		euen_mask |= CSR_EUEN_LASXEN;
+
+	return 0;
+}
+arch_initcall(init_euen_mask);
diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c
new file mode 100644
index 000000000000..445c452d72a7
--- /dev/null
+++ b/arch/loongarch/kernel/kgdb.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch KGDB support
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/processor.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/inst.h>
+#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+
+int kgdb_watch_activated;
+static unsigned int stepped_opcode;
+static unsigned long stepped_address;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+	{ "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
+	{ "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
+	{ "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
+	{ "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
+	{ "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
+	{ "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
+	{ "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
+	{ "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
+	{ "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
+	{ "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
+	{ "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
+	{ "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
+	{ "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
+	{ "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
+	{ "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
+	{ "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
+	{ "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
+	{ "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
+	{ "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
+	{ "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
+	{ "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
+	{ "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
+	{ "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
+	{ "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
+	{ "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
+	{ "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
+	{ "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
+	{ "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
+	{ "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
+	{ "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
+	{ "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
+	{ "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
+	{ "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
+	{ "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
+	{ "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
+	{ "f0", GDB_SIZEOF_REG, 0 },
+	{ "f1", GDB_SIZEOF_REG, 1 },
+	{ "f2", GDB_SIZEOF_REG, 2 },
+	{ "f3", GDB_SIZEOF_REG, 3 },
+	{ "f4", GDB_SIZEOF_REG, 4 },
+	{ "f5", GDB_SIZEOF_REG, 5 },
+	{ "f6", GDB_SIZEOF_REG, 6 },
+	{ "f7", GDB_SIZEOF_REG, 7 },
+	{ "f8", GDB_SIZEOF_REG, 8 },
+	{ "f9", GDB_SIZEOF_REG, 9 },
+	{ "f10", GDB_SIZEOF_REG, 10 },
+	{ "f11", GDB_SIZEOF_REG, 11 },
+	{ "f12", GDB_SIZEOF_REG, 12 },
+	{ "f13", GDB_SIZEOF_REG, 13 },
+	{ "f14", GDB_SIZEOF_REG, 14 },
+	{ "f15", GDB_SIZEOF_REG, 15 },
+	{ "f16", GDB_SIZEOF_REG, 16 },
+	{ "f17", GDB_SIZEOF_REG, 17 },
+	{ "f18", GDB_SIZEOF_REG, 18 },
+	{ "f19", GDB_SIZEOF_REG, 19 },
+	{ "f20", GDB_SIZEOF_REG, 20 },
+	{ "f21", GDB_SIZEOF_REG, 21 },
+	{ "f22", GDB_SIZEOF_REG, 22 },
+	{ "f23", GDB_SIZEOF_REG, 23 },
+	{ "f24", GDB_SIZEOF_REG, 24 },
+	{ "f25", GDB_SIZEOF_REG, 25 },
+	{ "f26", GDB_SIZEOF_REG, 26 },
+	{ "f27", GDB_SIZEOF_REG, 27 },
+	{ "f28", GDB_SIZEOF_REG, 28 },
+	{ "f29", GDB_SIZEOF_REG, 29 },
+	{ "f30", GDB_SIZEOF_REG, 30 },
+	{ "f31", GDB_SIZEOF_REG, 31 },
+	{ "fcc0", 1, 0 },
+	{ "fcc1", 1, 1 },
+	{ "fcc2", 1, 2 },
+	{ "fcc3", 1, 3 },
+	{ "fcc4", 1, 4 },
+	{ "fcc5", 1, 5 },
+	{ "fcc6", 1, 6 },
+	{ "fcc7", 1, 7 },
+	{ "fcsr", 4, 0 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	int reg_offset, reg_size;
+
+	if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+		return NULL;
+
+	reg_offset = dbg_reg_def[regno].offset;
+	reg_size = dbg_reg_def[regno].size;
+
+	if (reg_offset == -1)
+		goto out;
+
+	/* Handle general-purpose/orig_a0/pc/badv registers */
+	if (regno <= DBG_PT_REGS_END) {
+		memcpy(mem, (void *)regs + reg_offset, reg_size);
+		goto out;
+	}
+
+	if (!(regs->csr_euen & CSR_EUEN_FPEN))
+		goto out;
+
+	save_fp(current);
+
+	/* Handle FP registers */
+	switch (regno) {
+	case DBG_FCSR:				/* Process the fcsr */
+		memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size);
+		break;
+	case DBG_FCC_BASE ... DBG_FCC_END:	/* Process the fcc */
+		memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size);
+		break;
+	case DBG_FPR_BASE ... DBG_FPR_END:	/* Process the fpr */
+		memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size);
+		break;
+	default:
+		break;
+	}
+
+out:
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	int reg_offset, reg_size;
+
+	if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+		return -EINVAL;
+
+	reg_offset = dbg_reg_def[regno].offset;
+	reg_size = dbg_reg_def[regno].size;
+
+	if (reg_offset == -1)
+		return 0;
+
+	/* Handle general-purpose/orig_a0/pc/badv registers */
+	if (regno <= DBG_PT_REGS_END) {
+		memcpy((void *)regs + reg_offset, mem, reg_size);
+		return 0;
+	}
+
+	if (!(regs->csr_euen & CSR_EUEN_FPEN))
+		return 0;
+
+	/* Handle FP registers */
+	switch (regno) {
+	case DBG_FCSR:				/* Process the fcsr */
+		memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size);
+		break;
+	case DBG_FCC_BASE ... DBG_FCC_END:	/* Process the fcc */
+		memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size);
+		break;
+	case DBG_FPR_BASE ... DBG_FPR_END:	/* Process the fpr */
+		memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size);
+		break;
+	default:
+		break;
+	}
+
+	restore_fp(current);
+
+	return 0;
+}
+
+/*
+ * Similar to regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+	/* Initialize to zero */
+	memset((char *)gdb_regs, 0, NUMREGBYTES);
+
+	gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
+	gdb_regs[DBG_LOONGARCH_TP] = (long)p;
+	gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
+
+	/* S0 - S8 */
+	gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
+	gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
+	gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
+	gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
+	gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
+	gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
+	gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
+	gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
+	gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
+
+	/*
+	 * PC use return address (RA), i.e. the moment after return from __switch_to()
+	 */
+	gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->csr_era = pc;
+}
+
+void arch_kgdb_breakpoint(void)
+{
+	__asm__ __volatile__ (			\
+		".globl kgdb_breakinst\n\t"	\
+		"nop\n"				\
+		"kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger
+ */
+static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	struct die_args *args = (struct die_args *)ptr;
+	struct pt_regs *regs = args->regs;
+
+	/* Userspace events, ignore. */
+	if (user_mode(regs))
+		return NOTIFY_DONE;
+
+	if (!kgdb_io_module_registered)
+		return NOTIFY_DONE;
+
+	if (atomic_read(&kgdb_active) != -1)
+		kgdb_nmicallback(smp_processor_id(), regs);
+
+	if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
+		return NOTIFY_DONE;
+
+	if (atomic_read(&kgdb_setting_breakpoint))
+		if (regs->csr_era == (unsigned long)&kgdb_breakinst)
+			regs->csr_era += LOONGARCH_INSN_SIZE;
+
+	return NOTIFY_STOP;
+}
+
+bool kgdb_breakpoint_handler(struct pt_regs *regs)
+{
+	struct die_args args = {
+		.regs	= regs,
+		.str	= "Break",
+		.err	= BRK_KDB,
+		.trapnr = read_csr_excode(),
+		.signr	= SIGTRAP,
+
+	};
+
+	return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call = kgdb_loongarch_notify,
+};
+
+static inline void kgdb_arch_update_addr(struct pt_regs *regs,
+					 char *remcom_in_buffer)
+{
+	unsigned long addr;
+	char *ptr;
+
+	ptr = &remcom_in_buffer[1];
+	if (kgdb_hex2long(&ptr, &addr))
+		regs->csr_era = addr;
+}
+
+/* Calculate the new address for after a step */
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+{
+	char cj_val;
+	unsigned int si, si_l, si_h, rd, rj, cj;
+	unsigned long pc = instruction_pointer(regs);
+	union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+	if (pc & 3) {
+		pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+		return -EINVAL;
+	}
+
+	*next_addr = pc + LOONGARCH_INSN_SIZE;
+
+	si_h = ip->reg0i26_format.immediate_h;
+	si_l = ip->reg0i26_format.immediate_l;
+	switch (ip->reg0i26_format.opcode) {
+	case b_op:
+		*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+		return 0;
+	case bl_op:
+		*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+		regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+		return 0;
+	}
+
+	rj = ip->reg1i21_format.rj;
+	cj = (rj & 0x07) + DBG_FCC_BASE;
+	si_l = ip->reg1i21_format.immediate_l;
+	si_h = ip->reg1i21_format.immediate_h;
+	dbg_get_reg(cj, &cj_val, regs);
+	switch (ip->reg1i21_format.opcode) {
+	case beqz_op:
+		if (regs->regs[rj] == 0)
+			*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+		return 0;
+	case bnez_op:
+		if (regs->regs[rj] != 0)
+			*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+		return 0;
+	case bceqz_op: /* bceqz_op = bcnez_op */
+		if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
+			*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+		if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
+			*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+		return 0;
+	}
+
+	rj = ip->reg2i16_format.rj;
+	rd = ip->reg2i16_format.rd;
+	si = ip->reg2i16_format.immediate;
+	switch (ip->reg2i16_format.opcode) {
+	case beq_op:
+		if (regs->regs[rj] == regs->regs[rd])
+			*next_addr = pc + sign_extend64(si << 2, 17);
+		return 0;
+	case bne_op:
+		if (regs->regs[rj] != regs->regs[rd])
+			*next_addr = pc + sign_extend64(si << 2, 17);
+		return 0;
+	case blt_op:
+		if ((long)regs->regs[rj] < (long)regs->regs[rd])
+			*next_addr = pc + sign_extend64(si << 2, 17);
+		return 0;
+	case bge_op:
+		if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+			*next_addr = pc + sign_extend64(si << 2, 17);
+		return 0;
+	case bltu_op:
+		if (regs->regs[rj] < regs->regs[rd])
+			*next_addr = pc + sign_extend64(si << 2, 17);
+		return 0;
+	case bgeu_op:
+		if (regs->regs[rj] >= regs->regs[rd])
+			*next_addr = pc + sign_extend64(si << 2, 17);
+		return 0;
+	case jirl_op:
+		regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+		*next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int do_single_step(struct pt_regs *regs)
+{
+	int error = 0;
+	unsigned long addr = 0; /* Determine where the target instruction will send us to */
+
+	error = get_step_address(regs, &addr);
+	if (error)
+		return error;
+
+	/* Store the opcode in the stepped address */
+	error = get_kernel_nofault(stepped_opcode, (void *)addr);
+	if (error)
+		return error;
+
+	stepped_address = addr;
+
+	/* Replace the opcode with the break instruction */
+	error = copy_to_kernel_nofault((void *)stepped_address,
+				       arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+	flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+
+	if (error) {
+		stepped_opcode = 0;
+		stepped_address = 0;
+	} else {
+		kgdb_single_step = 1;
+		atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+	}
+
+	return error;
+}
+
+/* Undo a single step */
+static void undo_single_step(struct pt_regs *regs)
+{
+	if (stepped_opcode) {
+		copy_to_kernel_nofault((void *)stepped_address,
+				       (void *)&stepped_opcode, BREAK_INSTR_SIZE);
+		flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
+	}
+
+	stepped_opcode = 0;
+	stepped_address = 0;
+	kgdb_single_step = 0;
+	atomic_set(&kgdb_cpu_doing_single_step, -1);
+}
+
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+			       char *remcom_in_buffer, char *remcom_out_buffer,
+			       struct pt_regs *regs)
+{
+	int ret = 0;
+
+	undo_single_step(regs);
+	regs->csr_prmd |= CSR_PRMD_PWE;
+
+	switch (remcom_in_buffer[0]) {
+	case 'D':
+	case 'k':
+		regs->csr_prmd &= ~CSR_PRMD_PWE;
+		fallthrough;
+	case 'c':
+		kgdb_arch_update_addr(regs, remcom_in_buffer);
+		break;
+	case 's':
+		kgdb_arch_update_addr(regs, remcom_in_buffer);
+		ret = do_single_step(regs);
+		break;
+	default:
+		ret = -1;
+	}
+
+	return ret;
+}
+
+static struct hw_breakpoint {
+	unsigned int		enabled;
+	unsigned long		addr;
+	int			len;
+	int			type;
+	struct perf_event	* __percpu *pev;
+} breakinfo[LOONGARCH_MAX_BRP];
+
+static int hw_break_reserve_slot(int breakno)
+{
+	int cpu, cnt = 0;
+	struct perf_event **pevent;
+
+	for_each_online_cpu(cpu) {
+		cnt++;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_reserve_bp_slot(*pevent))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	for_each_online_cpu(cpu) {
+		cnt--;
+		if (!cnt)
+			break;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		dbg_release_bp_slot(*pevent);
+	}
+
+	return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+	int cpu;
+	struct perf_event **pevent;
+
+	if (dbg_is_early)
+		return 0;
+
+	for_each_online_cpu(cpu) {
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_release_bp_slot(*pevent))
+			/*
+			 * The debugger is responsible for handing the retry on
+			 * remove failure.
+			 */
+			return -1;
+	}
+
+	return 0;
+}
+
+static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+	int i;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+		if (!breakinfo[i].enabled)
+			break;
+
+	if (i == LOONGARCH_MAX_BRP)
+		return -1;
+
+	switch (bptype) {
+	case BP_HARDWARE_BREAKPOINT:
+		breakinfo[i].type = HW_BREAKPOINT_X;
+		break;
+	case BP_READ_WATCHPOINT:
+		breakinfo[i].type = HW_BREAKPOINT_R;
+		break;
+	case BP_WRITE_WATCHPOINT:
+		breakinfo[i].type = HW_BREAKPOINT_W;
+		break;
+	case BP_ACCESS_WATCHPOINT:
+		breakinfo[i].type = HW_BREAKPOINT_RW;
+		break;
+	default:
+		return -1;
+	}
+
+	switch (len) {
+	case 1:
+		breakinfo[i].len = HW_BREAKPOINT_LEN_1;
+		break;
+	case 2:
+		breakinfo[i].len = HW_BREAKPOINT_LEN_2;
+		break;
+	case 4:
+		breakinfo[i].len = HW_BREAKPOINT_LEN_4;
+		break;
+	case 8:
+		breakinfo[i].len = HW_BREAKPOINT_LEN_8;
+		break;
+	default:
+		return -1;
+	}
+
+	breakinfo[i].addr = addr;
+	if (hw_break_reserve_slot(i)) {
+		breakinfo[i].addr = 0;
+		return -1;
+	}
+	breakinfo[i].enabled = 1;
+
+	return 0;
+}
+
+static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+	int i;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+		if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+			break;
+
+	if (i == LOONGARCH_MAX_BRP)
+		return -1;
+
+	if (hw_break_release_slot(i)) {
+		pr_err("Cannot remove hw breakpoint at %lx\n", addr);
+		return -1;
+	}
+	breakinfo[i].enabled = 0;
+
+	return 0;
+}
+
+static void kgdb_disable_hw_break(struct pt_regs *regs)
+{
+	int i;
+	int cpu = raw_smp_processor_id();
+	struct perf_event *bp;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+		if (!breakinfo[i].enabled)
+			continue;
+
+		bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+		if (bp->attr.disabled == 1)
+			continue;
+
+		arch_uninstall_hw_breakpoint(bp);
+		bp->attr.disabled = 1;
+	}
+
+	/* Disable hardware debugging while we are in kgdb */
+	csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+	int i;
+	int cpu = raw_smp_processor_id();
+	struct perf_event *bp;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+		if (!breakinfo[i].enabled)
+			continue;
+
+		bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+		if (!bp->attr.disabled) {
+			arch_uninstall_hw_breakpoint(bp);
+			bp->attr.disabled = 1;
+			continue;
+		}
+
+		if (hw_break_release_slot(i))
+			pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
+		breakinfo[i].enabled = 0;
+	}
+
+	csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+	kgdb_watch_activated = 0;
+}
+
+static void kgdb_correct_hw_break(void)
+{
+	int i, activated = 0;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+		struct perf_event *bp;
+		int val;
+		int cpu = raw_smp_processor_id();
+
+		if (!breakinfo[i].enabled)
+			continue;
+
+		bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+		if (bp->attr.disabled != 1)
+			continue;
+
+		bp->attr.bp_addr = breakinfo[i].addr;
+		bp->attr.bp_len = breakinfo[i].len;
+		bp->attr.bp_type = breakinfo[i].type;
+
+		val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
+		if (val)
+			return;
+
+		val = arch_install_hw_breakpoint(bp);
+		if (!val)
+			bp->attr.disabled = 0;
+		activated = 1;
+	}
+
+	csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+	kgdb_watch_activated = activated;
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+	.gdb_bpt_instr		= {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
+	.flags			= KGDB_HW_BREAKPOINT,
+	.set_hw_breakpoint	= kgdb_set_hw_break,
+	.remove_hw_breakpoint	= kgdb_remove_hw_break,
+	.disable_hw_break	= kgdb_disable_hw_break,
+	.remove_all_hw_break	= kgdb_remove_all_hw_break,
+	.correct_hw_break	= kgdb_correct_hw_break,
+};
+
+int kgdb_arch_init(void)
+{
+	return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
+{
+	int i, cpu;
+	struct perf_event_attr attr;
+	struct perf_event **pevent;
+
+	hw_breakpoint_init(&attr);
+
+	attr.bp_addr = (unsigned long)kgdb_arch_init;
+	attr.bp_len = HW_BREAKPOINT_LEN_4;
+	attr.bp_type = HW_BREAKPOINT_W;
+	attr.disabled = 1;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+		if (breakinfo[i].pev)
+			continue;
+
+		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+		if (IS_ERR((void * __force)breakinfo[i].pev)) {
+			pr_err("kgdb: Could not allocate hw breakpoints.\n");
+			breakinfo[i].pev = NULL;
+			return;
+		}
+
+		for_each_online_cpu(cpu) {
+			pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
+			if (pevent[0]->destroy) {
+				pevent[0]->destroy = NULL;
+				release_bp_slot(*pevent);
+			}
+		}
+	}
+}
+
+void kgdb_arch_exit(void)
+{
+	int i;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+		if (breakinfo[i].pev) {
+			unregister_wide_hw_breakpoint(breakinfo[i].pev);
+			breakinfo[i].pev = NULL;
+		}
+	}
+
+	unregister_die_notifier(&kgdb_notifier);
+}
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
new file mode 100644
index 000000000000..9c75120a26d8
--- /dev/null
+++ b/arch/loongarch/kernel/lbt.S
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/regdef.h>
+
+#define SCR_REG_WIDTH 8
+
+	.macro	EX insn, reg, src, offs
+.ex\@:	\insn	\reg, \src, \offs
+	_asm_extable .ex\@, .L_lbt_fault
+	.endm
+
+/*
+ * Save a thread's lbt context.
+ */
+SYM_FUNC_START(_save_lbt)
+	movscr2gr	t1, $scr0		# save scr
+	stptr.d		t1, a0, THREAD_SCR0
+	movscr2gr	t1, $scr1
+	stptr.d		t1, a0, THREAD_SCR1
+	movscr2gr	t1, $scr2
+	stptr.d		t1, a0, THREAD_SCR2
+	movscr2gr	t1, $scr3
+	stptr.d		t1, a0, THREAD_SCR3
+
+	x86mfflag	t1, 0x3f		# save eflags
+	stptr.d		t1, a0, THREAD_EFLAGS
+	jr		ra
+SYM_FUNC_END(_save_lbt)
+EXPORT_SYMBOL(_save_lbt)
+
+/*
+ * Restore a thread's lbt context.
+ */
+SYM_FUNC_START(_restore_lbt)
+	ldptr.d		t1, a0, THREAD_SCR0	# restore scr
+	movgr2scr	$scr0, t1
+	ldptr.d		t1, a0, THREAD_SCR1
+	movgr2scr	$scr1, t1
+	ldptr.d		t1, a0, THREAD_SCR2
+	movgr2scr	$scr2, t1
+	ldptr.d		t1, a0, THREAD_SCR3
+	movgr2scr	$scr3, t1
+
+	ldptr.d		t1, a0, THREAD_EFLAGS	# restore eflags
+	x86mtflag	t1, 0x3f
+	jr		ra
+SYM_FUNC_END(_restore_lbt)
+EXPORT_SYMBOL(_restore_lbt)
+
+/*
+ * Load scr/eflag with zero.
+ */
+SYM_FUNC_START(_init_lbt)
+	movgr2scr	$scr0, zero
+	movgr2scr	$scr1, zero
+	movgr2scr	$scr2, zero
+	movgr2scr	$scr3, zero
+
+	x86mtflag	zero, 0x3f
+	jr		ra
+SYM_FUNC_END(_init_lbt)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_save_lbt_context)
+	movscr2gr	t1, $scr0		# save scr
+	EX	st.d	t1, a0, (0 * SCR_REG_WIDTH)
+	movscr2gr	t1, $scr1
+	EX	st.d	t1, a0, (1 * SCR_REG_WIDTH)
+	movscr2gr	t1, $scr2
+	EX	st.d	t1, a0, (2 * SCR_REG_WIDTH)
+	movscr2gr	t1, $scr3
+	EX	st.d	t1, a0, (3 * SCR_REG_WIDTH)
+
+	x86mfflag	t1, 0x3f		# save eflags
+	EX 	st.w	t1, a1, 0
+	li.w		a0, 0			# success
+	jr		ra
+SYM_FUNC_END(_save_lbt_context)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_restore_lbt_context)
+	EX	ld.d	t1, a0, (0 * SCR_REG_WIDTH)	# restore scr
+	movgr2scr	$scr0, t1
+	EX	ld.d	t1, a0, (1 * SCR_REG_WIDTH)
+	movgr2scr	$scr1, t1
+	EX	ld.d	t1, a0, (2 * SCR_REG_WIDTH)
+	movgr2scr	$scr2, t1
+	EX	ld.d	t1, a0, (3 * SCR_REG_WIDTH)
+	movgr2scr	$scr3, t1
+
+	EX 	ld.w	t1, a1, 0			# restore eflags
+	x86mtflag	t1, 0x3f
+	li.w		a0, 0			# success
+	jr		ra
+SYM_FUNC_END(_restore_lbt_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_save_ftop_context)
+	x86mftop	t1
+	st.w		t1, a0, 0
+	li.w		a0, 0			# success
+	jr		ra
+SYM_FUNC_END(_save_ftop_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_restore_ftop_context)
+	ld.w		t1, a0, 0
+	andi		t1, t1, 0x7
+	la.pcrel	a0, 1f
+	alsl.d		a0, t1, a0, 3
+	jr		a0
+1:
+	x86mttop	0
+	b	2f
+	x86mttop	1
+	b	2f
+	x86mttop	2
+	b	2f
+	x86mttop	3
+	b	2f
+	x86mttop	4
+	b	2f
+	x86mttop	5
+	b	2f
+	x86mttop	6
+	b	2f
+	x86mttop	7
+2:
+	li.w		a0, 0			# success
+	jr		ra
+SYM_FUNC_END(_restore_ftop_context)
+
+.L_lbt_fault:
+	li.w		a0, -EFAULT		# failure
+	jr		ra
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 708665895b47..c7d33c489e04 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 
 void __init pcpu_populate_pte(unsigned long addr)
 {
-	pgd_t *pgd = pgd_offset_k(addr);
-	p4d_t *p4d = p4d_offset(pgd, addr);
-	pud_t *pud;
-	pmd_t *pmd;
-
-	if (p4d_none(*p4d)) {
-		pud_t *new;
-
-		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-		pgd_populate(&init_mm, pgd, new);
-#ifndef __PAGETABLE_PUD_FOLDED
-		pud_init(new);
-#endif
-	}
-
-	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud)) {
-		pmd_t *new;
-
-		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-		pud_populate(&init_mm, pud, new);
-#ifndef __PAGETABLE_PMD_FOLDED
-		pmd_init(new);
-#endif
-	}
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd)) {
-		pte_t *new;
-
-		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-		pmd_populate_kernel(&init_mm, pmd, new);
-	}
+	populate_kernel_pte(addr);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -470,7 +438,6 @@ void __init mem_init(void)
 {
 	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
 	memblock_free_all();
-	setup_zero_pages();	/* This comes from node 0 */
 }
 
 int pcibus_to_node(struct pci_bus *bus)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index ba457e43f5be..3cb082e0c992 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/elf.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
@@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 	euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
 	regs->csr_euen = euen;
 	lose_fpu(0);
+	lose_lbt(0);
 
 	clear_thread_flag(TIF_LSX_CTX_LIVE);
 	clear_thread_flag(TIF_LASX_CTX_LIVE);
+	clear_thread_flag(TIF_LBT_CTX_LIVE);
 	clear_used_math();
 	regs->csr_era = pc;
 	regs->regs[3] = sp;
@@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
 	preempt_enable();
 
-	if (used_math())
-		memcpy(dst, src, sizeof(struct task_struct));
-	else
+	if (!used_math())
 		memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
+	else
+		memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
+
+#ifdef CONFIG_CPU_HAS_LBT
+	memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
+#endif
 
 	return 0;
 }
@@ -189,8 +196,10 @@ out:
 	ptrace_hw_copy_thread(p);
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 	clear_tsk_thread_flag(p, TIF_USEDSIMD);
+	clear_tsk_thread_flag(p, TIF_USEDLBT);
 	clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
 	clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+	clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
 
 	return 0;
 }
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index f72adbf530c6..c114c5ef1332 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target,
 
 #endif /* CONFIG_CPU_HAS_LSX */
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int lbt_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   struct membuf to)
+{
+	int r;
+
+	r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
+	r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
+	r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
+	r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
+	r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
+	r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
+
+	return r;
+}
+
+static int lbt_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int err = 0;
+	const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
+	const int ftop_start = eflags_start + sizeof(u32);
+
+	err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.lbt.scr0,
+				  0, 4 * sizeof(target->thread.lbt.scr0));
+	err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.lbt.eflags,
+				  eflags_start, ftop_start);
+	err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpu.ftop,
+				  ftop_start, ftop_start + sizeof(u32));
+
+	return err;
+}
+#endif /* CONFIG_CPU_HAS_LBT */
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 /*
@@ -802,6 +843,9 @@ enum loongarch_regset {
 #ifdef CONFIG_CPU_HAS_LASX
 	REGSET_LASX,
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+	REGSET_LBT,
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	REGSET_HW_BREAK,
 	REGSET_HW_WATCH,
@@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = {
 		.set		= simd_set,
 	},
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+	[REGSET_LBT] = {
+		.core_note_type	= NT_LOONGARCH_LBT,
+		.n		= 5,
+		.size		= sizeof(u64),
+		.align		= sizeof(u64),
+		.regset_get	= lbt_get,
+		.set		= lbt_set,
+	},
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	[REGSET_HW_BREAK] = {
 		.core_note_type = NT_LOONGARCH_HW_BREAK,
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 01f94d1e3edf..6c3eff9af9fb 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o
 	*new_addr = (unsigned long)reloc_offset;
 }
 
-void * __init relocate_kernel(void)
+unsigned long __init relocate_kernel(void)
 {
 	unsigned long kernel_length;
 	unsigned long random_offset = 0;
 	void *location_new = _text; /* Default to original kernel start */
-	void *kernel_entry = start_kernel; /* Default to original kernel entry point */
 	char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
 
 	strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
@@ -190,9 +189,6 @@ void * __init relocate_kernel(void)
 
 		reloc_offset += random_offset;
 
-		/* Return the new kernel's entry point */
-		kernel_entry = RELOCATED_KASLR(start_kernel);
-
 		/* The current thread is now within the relocated kernel */
 		__current_thread_info = RELOCATED_KASLR(__current_thread_info);
 
@@ -204,7 +200,7 @@ void * __init relocate_kernel(void)
 
 	relocate_absolute(random_offset);
 
-	return kernel_entry;
+	return random_offset;
 }
 
 /*
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 9d830ab4e302..7783f0a3d742 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	paging_init();
+
+#ifdef CONFIG_KASAN
+	kasan_init();
+#endif
 }
diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c
index ceb899366c0a..504fdfe85203 100644
--- a/arch/loongarch/kernel/signal.c
+++ b/arch/loongarch/kernel/signal.c
@@ -32,6 +32,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
 
@@ -44,6 +45,9 @@
 /* Make sure we will not lose FPU ownership */
 #define lock_fpu_owner()	({ preempt_disable(); pagefault_disable(); })
 #define unlock_fpu_owner()	({ pagefault_enable(); preempt_enable(); })
+/* Make sure we will not lose LBT ownership */
+#define lock_lbt_owner()	({ preempt_disable(); pagefault_disable(); })
+#define unlock_lbt_owner()	({ pagefault_enable(); preempt_enable(); })
 
 /* Assembly functions to move context to/from the FPU */
 extern asmlinkage int
@@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 extern asmlinkage int
 _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 
+#ifdef CONFIG_CPU_HAS_LBT
+extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _save_ftop_context(void __user *ftop);
+extern asmlinkage int _restore_ftop_context(void __user *ftop);
+#endif
+
 struct rt_sigframe {
 	struct siginfo rs_info;
 	struct ucontext rs_uctx;
@@ -75,6 +86,7 @@ struct extctx_layout {
 	struct _ctx_layout fpu;
 	struct _ctx_layout lsx;
 	struct _ctx_layout lasx;
+	struct _ctx_layout lbt;
 	struct _ctx_layout end;
 };
 
@@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
 	return err;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
+{
+	int err = 0;
+	uint64_t __user *regs	= (uint64_t *)&ctx->regs;
+	uint32_t __user *eflags	= (uint32_t *)&ctx->eflags;
+
+	err |= __put_user(current->thread.lbt.scr0, &regs[0]);
+	err |= __put_user(current->thread.lbt.scr1, &regs[1]);
+	err |= __put_user(current->thread.lbt.scr2, &regs[2]);
+	err |= __put_user(current->thread.lbt.scr3, &regs[3]);
+	err |= __put_user(current->thread.lbt.eflags, eflags);
+
+	return err;
+}
+
+static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
+{
+	int err = 0;
+	uint64_t __user *regs	= (uint64_t *)&ctx->regs;
+	uint32_t __user *eflags	= (uint32_t *)&ctx->eflags;
+
+	err |= __get_user(current->thread.lbt.scr0, &regs[0]);
+	err |= __get_user(current->thread.lbt.scr1, &regs[1]);
+	err |= __get_user(current->thread.lbt.scr2, &regs[2]);
+	err |= __get_user(current->thread.lbt.scr3, &regs[3]);
+	err |= __get_user(current->thread.lbt.eflags, eflags);
+
+	return err;
+}
+
+static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
+{
+	uint32_t  __user *ftop	= &ctx->ftop;
+
+	return __put_user(current->thread.fpu.ftop, ftop);
+}
+
+static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
+{
+	uint32_t  __user *ftop	= &ctx->ftop;
+
+	return __get_user(current->thread.fpu.ftop, ftop);
+}
+#endif
+
 /*
  * Wrappers for the assembly _{save,restore}_fp_context functions.
  */
@@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx)
 	return _restore_lasx_context(regs, fcc, fcsr);
 }
 
+/*
+ * Wrappers for the assembly _{save,restore}_lbt_context functions.
+ */
+#ifdef CONFIG_CPU_HAS_LBT
+static int save_hw_lbt_context(struct lbt_context __user *ctx)
+{
+	uint64_t __user *regs	= (uint64_t *)&ctx->regs;
+	uint32_t __user *eflags	= (uint32_t *)&ctx->eflags;
+
+	return _save_lbt_context(regs, eflags);
+}
+
+static int restore_hw_lbt_context(struct lbt_context __user *ctx)
+{
+	uint64_t __user *regs	= (uint64_t *)&ctx->regs;
+	uint32_t __user *eflags	= (uint32_t *)&ctx->eflags;
+
+	return _restore_lbt_context(regs, eflags);
+}
+
+static int save_hw_ftop_context(struct lbt_context __user *ctx)
+{
+	uint32_t __user *ftop	= &ctx->ftop;
+
+	return _save_ftop_context(ftop);
+}
+
+static int restore_hw_ftop_context(struct lbt_context __user *ctx)
+{
+	uint32_t __user *ftop	= &ctx->ftop;
+
+	return _restore_ftop_context(ftop);
+}
+#endif
+
 static int fcsr_pending(unsigned int __user *fcsr)
 {
 	int err, sig = 0;
@@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx)
 	return err ?: sig;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int protected_save_lbt_context(struct extctx_layout *extctx)
+{
+	int err = 0;
+	struct sctx_info __user *info = extctx->lbt.addr;
+	struct lbt_context __user *lbt_ctx =
+		(struct lbt_context *)get_ctx_through_ctxinfo(info);
+	uint64_t __user *regs	= (uint64_t *)&lbt_ctx->regs;
+	uint32_t __user *eflags	= (uint32_t *)&lbt_ctx->eflags;
+
+	while (1) {
+		lock_lbt_owner();
+		if (is_lbt_owner())
+			err |= save_hw_lbt_context(lbt_ctx);
+		else
+			err |= copy_lbt_to_sigcontext(lbt_ctx);
+		if (is_fpu_owner())
+			err |= save_hw_ftop_context(lbt_ctx);
+		else
+			err |= copy_ftop_to_sigcontext(lbt_ctx);
+		unlock_lbt_owner();
+
+		err |= __put_user(LBT_CTX_MAGIC, &info->magic);
+		err |= __put_user(extctx->lbt.size, &info->size);
+
+		if (likely(!err))
+			break;
+		/* Touch the LBT context and try again */
+		err = __put_user(0, &regs[0]) | __put_user(0, eflags);
+
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+static int protected_restore_lbt_context(struct extctx_layout *extctx)
+{
+	int err = 0, tmp __maybe_unused;
+	struct sctx_info __user *info = extctx->lbt.addr;
+	struct lbt_context __user *lbt_ctx =
+		(struct lbt_context *)get_ctx_through_ctxinfo(info);
+	uint64_t __user *regs	= (uint64_t *)&lbt_ctx->regs;
+	uint32_t __user *eflags	= (uint32_t *)&lbt_ctx->eflags;
+
+	while (1) {
+		lock_lbt_owner();
+		if (is_lbt_owner())
+			err |= restore_hw_lbt_context(lbt_ctx);
+		else
+			err |= copy_lbt_from_sigcontext(lbt_ctx);
+		if (is_fpu_owner())
+			err |= restore_hw_ftop_context(lbt_ctx);
+		else
+			err |= copy_ftop_from_sigcontext(lbt_ctx);
+		unlock_lbt_owner();
+
+		if (likely(!err))
+			break;
+		/* Touch the LBT context and try again */
+		err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags);
+
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+#endif
+
 static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 			    struct extctx_layout *extctx)
 {
@@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	else if (extctx->fpu.addr)
 		err |= protected_save_fpu_context(extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+	if (extctx->lbt.addr)
+		err |= protected_save_lbt_context(extctx);
+#endif
+
 	/* Set the "end" magic */
 	info = (struct sctx_info *)extctx->end.addr;
 	err |= __put_user(0, &info->magic);
@@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
 			extctx->lasx.addr = info;
 			break;
 
+		case LBT_CTX_MAGIC:
+			if (size < (sizeof(struct sctx_info) +
+				    sizeof(struct lbt_context)))
+				goto invalid;
+			extctx->lbt.addr = info;
+			break;
+
 		default:
 			goto invalid;
 		}
@@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
 	else if (extctx.fpu.addr)
 		err |= protected_restore_fpu_context(&extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+	if (extctx.lbt.addr)
+		err |= protected_restore_lbt_context(&extctx);
+#endif
+
 bad:
 	return err;
 }
@@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
 			  sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
 	}
 
+#ifdef CONFIG_CPU_HAS_LBT
+	if (cpu_has_lbt && thread_lbt_context_live()) {
+		new_sp = extframe_alloc(extctx, &extctx->lbt,
+			  sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
+	}
+#endif
+
 	return new_sp;
 }
 
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 2463d2fea21f..92270f14db94 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 	struct pt_regs dummyregs;
 	struct unwind_state state;
 
-	regs = &dummyregs;
+	if (!regs) {
+		regs = &dummyregs;
 
-	if (task == current) {
-		regs->regs[3] = (unsigned long)__builtin_frame_address(0);
-		regs->csr_era = (unsigned long)__builtin_return_address(0);
-	} else {
-		regs->regs[3] = thread_saved_fp(task);
-		regs->csr_era = thread_saved_ra(task);
+		if (task == current) {
+			regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+			regs->csr_era = (unsigned long)__builtin_return_address(0);
+		} else {
+			regs->regs[3] = thread_saved_fp(task);
+			regs->csr_era = thread_saved_ra(task);
+		}
+		regs->regs[1] = 0;
 	}
 
-	regs->regs[1] = 0;
 	for (unwind_start(&state, task, regs);
 	     !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
diff --git a/arch/loongarch/kernel/sysrq.c b/arch/loongarch/kernel/sysrq.c
index 366baef72d29..e663c10fa39c 100644
--- a/arch/loongarch/kernel/sysrq.c
+++ b/arch/loongarch/kernel/sysrq.c
@@ -43,7 +43,7 @@ static void sysrq_tlbdump_othercpus(struct work_struct *dummy)
 static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus);
 #endif
 
-static void sysrq_handle_tlbdump(int key)
+static void sysrq_handle_tlbdump(u8 key)
 {
 	sysrq_tlbdump_single(NULL);
 #ifdef CONFIG_SMP
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 89699db45cec..65214774ef7c 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -36,7 +36,9 @@
 #include <asm/break.h>
 #include <asm/cpu.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/inst.h>
+#include <asm/kgdb.h>
 #include <asm/loongarch.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
 	 * pertain to them.
 	 */
 	switch (bcode) {
+	case BRK_KDB:
+		if (kgdb_breakpoint_handler(regs))
+			goto out;
+		else
+			break;
 	case BRK_KPROBE_BP:
 		if (kprobe_breakpoint_handler(regs))
 			goto out;
@@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
 	pr_warn("Hardware watch point handler not implemented!\n");
 #else
+	if (kgdb_breakpoint_handler(regs))
+		goto out;
+
 	if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
 		int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
 		unsigned long pc = instruction_pointer(regs);
@@ -966,13 +976,47 @@ out:
 	irqentry_exit(regs, state);
 }
 
+static void init_restore_lbt(void)
+{
+	if (!thread_lbt_context_live()) {
+		/* First time LBT context user */
+		init_lbt();
+		set_thread_flag(TIF_LBT_CTX_LIVE);
+	} else {
+		if (!is_lbt_owner())
+			own_lbt_inatomic(1);
+	}
+
+	BUG_ON(!is_lbt_enabled());
+}
+
 asmlinkage void noinstr do_lbt(struct pt_regs *regs)
 {
 	irqentry_state_t state = irqentry_enter(regs);
 
-	local_irq_enable();
-	force_sig(SIGILL);
-	local_irq_disable();
+	/*
+	 * BTD (Binary Translation Disable exception) can be triggered
+	 * during FP save/restore if TM (Top Mode) is on, which may
+	 * cause irq_enable during 'switch_to'. To avoid this situation
+	 * (including the user using 'MOVGR2GCSR' to turn on TM, which
+	 * will not trigger the BTE), we need to check PRMD first.
+	 */
+	if (regs->csr_prmd & CSR_PRMD_PIE)
+		local_irq_enable();
+
+	if (!cpu_has_lbt) {
+		force_sig(SIGILL);
+		goto out;
+	}
+	BUG_ON(is_lbt_enabled());
+
+	preempt_disable();
+	init_restore_lbt();
+	preempt_enable();
+
+out:
+	if (regs->csr_prmd & CSR_PRMD_PIE)
+		local_irq_disable();
 
 	irqentry_exit(regs, state);
 }
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index d60d4e096cfa..a77bf160bfc4 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -6,4 +6,6 @@
 lib-y	+= delay.o memset.o memcpy.o memmove.o \
 	   clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
 
+obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
+
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 0790eadce166..be741544e62b 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -11,19 +11,6 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-	sub.d	a0, a2, a0
-	addi.d	a0, a0, (\to) * (-8)
-	jr	ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-	addi.d	a0, a1, -\to
-	jr	ra
-.endr
-
 SYM_FUNC_START(__clear_user)
 	/*
 	 * Some CPUs support hardware unaligned access
@@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic)
 2:	move	a0, a1
 	jr	ra
 
-	_asm_extable 1b, .L_fixup_handle_s0
+	_asm_extable 1b, 2b
 SYM_FUNC_END(__clear_user_generic)
 
 /*
@@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast)
 	jr	ra
 
 	/* fixup and ex_table */
-	_asm_extable 0b, .L_fixup_handle_0
-	_asm_extable 1b, .L_fixup_handle_0
-	_asm_extable 2b, .L_fixup_handle_1
-	_asm_extable 3b, .L_fixup_handle_2
-	_asm_extable 4b, .L_fixup_handle_3
-	_asm_extable 5b, .L_fixup_handle_4
-	_asm_extable 6b, .L_fixup_handle_5
-	_asm_extable 7b, .L_fixup_handle_6
-	_asm_extable 8b, .L_fixup_handle_7
-	_asm_extable 9b, .L_fixup_handle_0
-	_asm_extable 10b, .L_fixup_handle_1
-	_asm_extable 11b, .L_fixup_handle_2
-	_asm_extable 12b, .L_fixup_handle_3
-	_asm_extable 13b, .L_fixup_handle_0
-	_asm_extable 14b, .L_fixup_handle_1
-	_asm_extable 15b, .L_fixup_handle_0
-	_asm_extable 16b, .L_fixup_handle_0
-	_asm_extable 17b, .L_fixup_handle_s0
-	_asm_extable 18b, .L_fixup_handle_s0
-	_asm_extable 19b, .L_fixup_handle_s0
-	_asm_extable 20b, .L_fixup_handle_s2
-	_asm_extable 21b, .L_fixup_handle_s0
-	_asm_extable 22b, .L_fixup_handle_s0
-	_asm_extable 23b, .L_fixup_handle_s4
-	_asm_extable 24b, .L_fixup_handle_s0
-	_asm_extable 25b, .L_fixup_handle_s4
-	_asm_extable 26b, .L_fixup_handle_s0
-	_asm_extable 27b, .L_fixup_handle_s4
-	_asm_extable 28b, .L_fixup_handle_s0
+.Llarge_fixup:
+	sub.d	a1, a2, a0
+
+.Lsmall_fixup:
+29:	st.b	zero, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a1, a1, -1
+	bgt	a1, zero, 29b
+
+.Lexit:
+	move	a0, a1
+	jr	ra
+
+	_asm_extable 0b, .Lsmall_fixup
+	_asm_extable 1b, .Llarge_fixup
+	_asm_extable 2b, .Llarge_fixup
+	_asm_extable 3b, .Llarge_fixup
+	_asm_extable 4b, .Llarge_fixup
+	_asm_extable 5b, .Llarge_fixup
+	_asm_extable 6b, .Llarge_fixup
+	_asm_extable 7b, .Llarge_fixup
+	_asm_extable 8b, .Llarge_fixup
+	_asm_extable 9b, .Llarge_fixup
+	_asm_extable 10b, .Llarge_fixup
+	_asm_extable 11b, .Llarge_fixup
+	_asm_extable 12b, .Llarge_fixup
+	_asm_extable 13b, .Llarge_fixup
+	_asm_extable 14b, .Llarge_fixup
+	_asm_extable 15b, .Llarge_fixup
+	_asm_extable 16b, .Llarge_fixup
+	_asm_extable 17b, .Lexit
+	_asm_extable 18b, .Lsmall_fixup
+	_asm_extable 19b, .Lsmall_fixup
+	_asm_extable 20b, .Lsmall_fixup
+	_asm_extable 21b, .Lsmall_fixup
+	_asm_extable 22b, .Lsmall_fixup
+	_asm_extable 23b, .Lsmall_fixup
+	_asm_extable 24b, .Lsmall_fixup
+	_asm_extable 25b, .Lsmall_fixup
+	_asm_extable 26b, .Lsmall_fixup
+	_asm_extable 27b, .Lsmall_fixup
+	_asm_extable 28b, .Lsmall_fixup
+	_asm_extable 29b, .Lexit
 SYM_FUNC_END(__clear_user_fast)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index bfe3d2793d00..feec3d362803 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -11,19 +11,6 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-	sub.d	a0, a2, a0
-	addi.d	a0, a0, (\to) * (-8)
-	jr	ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-	addi.d	a0, a2, -\to
-	jr	ra
-.endr
-
 SYM_FUNC_START(__copy_user)
 	/*
 	 * Some CPUs support hardware unaligned access
@@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic)
 3:	move	a0, a2
 	jr	ra
 
-	_asm_extable 1b, .L_fixup_handle_s0
-	_asm_extable 2b, .L_fixup_handle_s0
+	_asm_extable 1b, 3b
+	_asm_extable 2b, 3b
 SYM_FUNC_END(__copy_user_generic)
 
 /*
@@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast)
 	sltui	t0, a2, 9
 	bnez	t0, .Lsmall
 
-	add.d	a3, a1, a2
-	add.d	a2, a0, a2
 0:	ld.d	t0, a1, 0
 1:	st.d	t0, a0, 0
+	add.d	a3, a1, a2
+	add.d	a2, a0, a2
 
 	/* align up destination address */
 	andi	t1, a0, 7
@@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast)
 7:	ld.d	t5, a1, 40
 8:	ld.d	t6, a1, 48
 9:	ld.d	t7, a1, 56
-	addi.d	a1, a1, 64
 10:	st.d	t0, a0, 0
 11:	st.d	t1, a0, 8
 12:	st.d	t2, a0, 16
@@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast)
 15:	st.d	t5, a0, 40
 16:	st.d	t6, a0, 48
 17:	st.d	t7, a0, 56
+	addi.d	a1, a1, 64
 	addi.d	a0, a0, 64
 	bltu	a1, a4, .Lloop64
 
@@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast)
 19:	ld.d	t1, a1, 8
 20:	ld.d	t2, a1, 16
 21:	ld.d	t3, a1, 24
-	addi.d	a1, a1, 32
 22:	st.d	t0, a0, 0
 23:	st.d	t1, a0, 8
 24:	st.d	t2, a0, 16
 25:	st.d	t3, a0, 24
+	addi.d	a1, a1, 32
 	addi.d	a0, a0, 32
 
 .Llt32:
@@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast)
 	bgeu	a1, a4, .Llt16
 26:	ld.d	t0, a1, 0
 27:	ld.d	t1, a1, 8
-	addi.d	a1, a1, 16
 28:	st.d	t0, a0, 0
 29:	st.d	t1, a0, 8
+	addi.d	a1, a1, 16
 	addi.d	a0, a0, 16
 
 .Llt16:
@@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast)
 	bgeu	a1, a4, .Llt8
 30:	ld.d	t0, a1, 0
 31:	st.d	t0, a0, 0
+	addi.d	a1, a1, 8
 	addi.d	a0, a0, 8
 
 .Llt8:
@@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast)
 	jr	ra
 
 	/* fixup and ex_table */
-	_asm_extable 0b, .L_fixup_handle_0
-	_asm_extable 1b, .L_fixup_handle_0
-	_asm_extable 2b, .L_fixup_handle_0
-	_asm_extable 3b, .L_fixup_handle_0
-	_asm_extable 4b, .L_fixup_handle_0
-	_asm_extable 5b, .L_fixup_handle_0
-	_asm_extable 6b, .L_fixup_handle_0
-	_asm_extable 7b, .L_fixup_handle_0
-	_asm_extable 8b, .L_fixup_handle_0
-	_asm_extable 9b, .L_fixup_handle_0
-	_asm_extable 10b, .L_fixup_handle_0
-	_asm_extable 11b, .L_fixup_handle_1
-	_asm_extable 12b, .L_fixup_handle_2
-	_asm_extable 13b, .L_fixup_handle_3
-	_asm_extable 14b, .L_fixup_handle_4
-	_asm_extable 15b, .L_fixup_handle_5
-	_asm_extable 16b, .L_fixup_handle_6
-	_asm_extable 17b, .L_fixup_handle_7
-	_asm_extable 18b, .L_fixup_handle_0
-	_asm_extable 19b, .L_fixup_handle_0
-	_asm_extable 20b, .L_fixup_handle_0
-	_asm_extable 21b, .L_fixup_handle_0
-	_asm_extable 22b, .L_fixup_handle_0
-	_asm_extable 23b, .L_fixup_handle_1
-	_asm_extable 24b, .L_fixup_handle_2
-	_asm_extable 25b, .L_fixup_handle_3
-	_asm_extable 26b, .L_fixup_handle_0
-	_asm_extable 27b, .L_fixup_handle_0
-	_asm_extable 28b, .L_fixup_handle_0
-	_asm_extable 29b, .L_fixup_handle_1
-	_asm_extable 30b, .L_fixup_handle_0
-	_asm_extable 31b, .L_fixup_handle_0
-	_asm_extable 32b, .L_fixup_handle_0
-	_asm_extable 33b, .L_fixup_handle_0
-	_asm_extable 34b, .L_fixup_handle_s0
-	_asm_extable 35b, .L_fixup_handle_s0
-	_asm_extable 36b, .L_fixup_handle_s0
-	_asm_extable 37b, .L_fixup_handle_s0
-	_asm_extable 38b, .L_fixup_handle_s0
-	_asm_extable 39b, .L_fixup_handle_s0
-	_asm_extable 40b, .L_fixup_handle_s0
-	_asm_extable 41b, .L_fixup_handle_s2
-	_asm_extable 42b, .L_fixup_handle_s0
-	_asm_extable 43b, .L_fixup_handle_s0
-	_asm_extable 44b, .L_fixup_handle_s0
-	_asm_extable 45b, .L_fixup_handle_s0
-	_asm_extable 46b, .L_fixup_handle_s0
-	_asm_extable 47b, .L_fixup_handle_s4
-	_asm_extable 48b, .L_fixup_handle_s0
-	_asm_extable 49b, .L_fixup_handle_s0
-	_asm_extable 50b, .L_fixup_handle_s0
-	_asm_extable 51b, .L_fixup_handle_s4
-	_asm_extable 52b, .L_fixup_handle_s0
-	_asm_extable 53b, .L_fixup_handle_s0
-	_asm_extable 54b, .L_fixup_handle_s0
-	_asm_extable 55b, .L_fixup_handle_s4
-	_asm_extable 56b, .L_fixup_handle_s0
-	_asm_extable 57b, .L_fixup_handle_s0
+.Llarge_fixup:
+	sub.d	a2, a2, a0
+
+.Lsmall_fixup:
+58:	ld.b	t0, a1, 0
+59:	st.b	t0, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a1, a1, 1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 58b
+
+.Lexit:
+	move	a0, a2
+	jr	ra
+
+	_asm_extable 0b, .Lsmall_fixup
+	_asm_extable 1b, .Lsmall_fixup
+	_asm_extable 2b, .Llarge_fixup
+	_asm_extable 3b, .Llarge_fixup
+	_asm_extable 4b, .Llarge_fixup
+	_asm_extable 5b, .Llarge_fixup
+	_asm_extable 6b, .Llarge_fixup
+	_asm_extable 7b, .Llarge_fixup
+	_asm_extable 8b, .Llarge_fixup
+	_asm_extable 9b, .Llarge_fixup
+	_asm_extable 10b, .Llarge_fixup
+	_asm_extable 11b, .Llarge_fixup
+	_asm_extable 12b, .Llarge_fixup
+	_asm_extable 13b, .Llarge_fixup
+	_asm_extable 14b, .Llarge_fixup
+	_asm_extable 15b, .Llarge_fixup
+	_asm_extable 16b, .Llarge_fixup
+	_asm_extable 17b, .Llarge_fixup
+	_asm_extable 18b, .Llarge_fixup
+	_asm_extable 19b, .Llarge_fixup
+	_asm_extable 20b, .Llarge_fixup
+	_asm_extable 21b, .Llarge_fixup
+	_asm_extable 22b, .Llarge_fixup
+	_asm_extable 23b, .Llarge_fixup
+	_asm_extable 24b, .Llarge_fixup
+	_asm_extable 25b, .Llarge_fixup
+	_asm_extable 26b, .Llarge_fixup
+	_asm_extable 27b, .Llarge_fixup
+	_asm_extable 28b, .Llarge_fixup
+	_asm_extable 29b, .Llarge_fixup
+	_asm_extable 30b, .Llarge_fixup
+	_asm_extable 31b, .Llarge_fixup
+	_asm_extable 32b, .Llarge_fixup
+	_asm_extable 33b, .Llarge_fixup
+	_asm_extable 34b, .Lexit
+	_asm_extable 35b, .Lexit
+	_asm_extable 36b, .Lsmall_fixup
+	_asm_extable 37b, .Lsmall_fixup
+	_asm_extable 38b, .Lsmall_fixup
+	_asm_extable 39b, .Lsmall_fixup
+	_asm_extable 40b, .Lsmall_fixup
+	_asm_extable 41b, .Lsmall_fixup
+	_asm_extable 42b, .Lsmall_fixup
+	_asm_extable 43b, .Lsmall_fixup
+	_asm_extable 44b, .Lsmall_fixup
+	_asm_extable 45b, .Lsmall_fixup
+	_asm_extable 46b, .Lsmall_fixup
+	_asm_extable 47b, .Lsmall_fixup
+	_asm_extable 48b, .Lsmall_fixup
+	_asm_extable 49b, .Lsmall_fixup
+	_asm_extable 50b, .Lsmall_fixup
+	_asm_extable 51b, .Lsmall_fixup
+	_asm_extable 52b, .Lsmall_fixup
+	_asm_extable 53b, .Lsmall_fixup
+	_asm_extable 54b, .Lsmall_fixup
+	_asm_extable 55b, .Lsmall_fixup
+	_asm_extable 56b, .Lsmall_fixup
+	_asm_extable 57b, .Lsmall_fixup
+	_asm_extable 58b, .Lexit
+	_asm_extable 59b, .Lexit
 SYM_FUNC_END(__copy_user_fast)
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index cc30b3b6252f..fa1148878d2b 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -10,6 +10,8 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memcpy)
 	/*
 	 * Some CPUs support hardware unaligned access
@@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy)
 	ALTERNATIVE	"b __memcpy_generic", \
 			"b __memcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memcpy)
-_ASM_NOKPROBE(memcpy)
+SYM_FUNC_ALIAS(__memcpy, memcpy)
 
 EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+_ASM_NOKPROBE(memcpy)
+_ASM_NOKPROBE(__memcpy)
 
 /*
  * void *__memcpy_generic(void *dst, const void *src, size_t n)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
index 7dc76d1484b6..82dae062fec8 100644
--- a/arch/loongarch/lib/memmove.S
+++ b/arch/loongarch/lib/memmove.S
@@ -10,23 +10,29 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memmove)
-	blt	a0, a1, memcpy	/* dst < src, memcpy */
-	blt	a1, a0, rmemcpy	/* src < dst, rmemcpy */
-	jr	ra		/* dst == src, return */
+	blt	a0, a1, __memcpy	/* dst < src, memcpy */
+	blt	a1, a0, __rmemcpy	/* src < dst, rmemcpy */
+	jr	ra			/* dst == src, return */
 SYM_FUNC_END(memmove)
-_ASM_NOKPROBE(memmove)
+SYM_FUNC_ALIAS(__memmove, memmove)
 
 EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL(__memmove)
+
+_ASM_NOKPROBE(memmove)
+_ASM_NOKPROBE(__memmove)
 
-SYM_FUNC_START(rmemcpy)
+SYM_FUNC_START(__rmemcpy)
 	/*
 	 * Some CPUs support hardware unaligned access
 	 */
 	ALTERNATIVE	"b __rmemcpy_generic", \
 			"b __rmemcpy_fast", CPU_FEATURE_UAL
-SYM_FUNC_END(rmemcpy)
-_ASM_NOKPROBE(rmemcpy)
+SYM_FUNC_END(__rmemcpy)
+_ASM_NOKPROBE(__rmemcpy)
 
 /*
  * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index 3f20f7996e8e..06d3ca54cbfe 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -16,6 +16,8 @@
 	bstrins.d \r0, \r0, 63, 32
 .endm
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memset)
 	/*
 	 * Some CPUs support hardware unaligned access
@@ -23,9 +25,13 @@ SYM_FUNC_START(memset)
 	ALTERNATIVE	"b __memset_generic", \
 			"b __memset_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memset)
-_ASM_NOKPROBE(memset)
+SYM_FUNC_ALIAS(__memset, memset)
 
 EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+
+_ASM_NOKPROBE(memset)
+_ASM_NOKPROBE(__memset)
 
 /*
  * void *__memset_generic(void *s, int c, size_t n)
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
new file mode 100644
index 000000000000..84cd24b728c4
--- /dev/null
+++ b/arch/loongarch/lib/xor_simd.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset)	\
+	"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)	\
+	"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)	"vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)	\
+	LD(0, base, 0)		\
+	LD(1, base, 16)		\
+	LD(2, base, 32)		\
+	LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base)	\
+	LD(4, base, 0)		\
+	LD(5, base, 16)		\
+	LD(6, base, 32)		\
+	LD(7, base, 48)		\
+	XOR(0, 4)		\
+	XOR(1, 5)		\
+	XOR(2, 6)		\
+	XOR(3, 7)
+
+#define ST_LINE(base)		\
+	ST(0, base, 0)		\
+	ST(1, base, 16)		\
+	ST(2, base, 32)		\
+	ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset)	\
+	"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)	\
+	"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)	"xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)	\
+	LD(0, base, 0)		\
+	LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base)	\
+	LD(2, base, 0)		\
+	LD(3, base, 32)		\
+	XOR(0, 2)		\
+	XOR(1, 3)
+
+#define ST_LINE(base)		\
+	ST(0, base, 0)		\
+	ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h
new file mode 100644
index 000000000000..f50f32514d80
--- /dev/null
+++ b/arch/loongarch/lib/xor_simd.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		 const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		 const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		  const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		  const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c
new file mode 100644
index 000000000000..393f689dbcf6
--- /dev/null
+++ b/arch/loongarch/lib/xor_simd_glue.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+#include <asm/xor_simd.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor)							\
+void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,	\
+		      const unsigned long * __restrict p2)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_2(bytes, p1, p2);					\
+	kernel_fpu_end();							\
+}										\
+EXPORT_SYMBOL_GPL(xor_##flavor##_2)
+
+#define MAKE_XOR_GLUE_3(flavor)							\
+void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,	\
+		      const unsigned long * __restrict p2,			\
+		      const unsigned long * __restrict p3)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_3(bytes, p1, p2, p3);					\
+	kernel_fpu_end();							\
+}										\
+EXPORT_SYMBOL_GPL(xor_##flavor##_3)
+
+#define MAKE_XOR_GLUE_4(flavor)							\
+void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,	\
+		      const unsigned long * __restrict p2,			\
+		      const unsigned long * __restrict p3,			\
+		      const unsigned long * __restrict p4)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_4(bytes, p1, p2, p3, p4);				\
+	kernel_fpu_end();							\
+}										\
+EXPORT_SYMBOL_GPL(xor_##flavor##_4)
+
+#define MAKE_XOR_GLUE_5(flavor)							\
+void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,	\
+		      const unsigned long * __restrict p2,			\
+		      const unsigned long * __restrict p3,			\
+		      const unsigned long * __restrict p4,			\
+		      const unsigned long * __restrict p5)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);				\
+	kernel_fpu_end();							\
+}										\
+EXPORT_SYMBOL_GPL(xor_##flavor##_5)
+
+#define MAKE_XOR_GLUES(flavor)		\
+	MAKE_XOR_GLUE_2(flavor);	\
+	MAKE_XOR_GLUE_3(flavor);	\
+	MAKE_XOR_GLUE_4(flavor);	\
+	MAKE_XOR_GLUE_5(flavor)
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif
diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c
new file mode 100644
index 000000000000..0358ced7fe33
--- /dev/null
+++ b/arch/loongarch/lib/xor_template.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2,
+		      const unsigned long * __restrict v3)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			LD_AND_XOR_LINE(v3)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+		v3 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2,
+		      const unsigned long * __restrict v3,
+		      const unsigned long * __restrict v4)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			LD_AND_XOR_LINE(v3)
+			LD_AND_XOR_LINE(v4)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+		: "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+		v3 += LINE_WIDTH / sizeof(unsigned long);
+		v4 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2,
+		      const unsigned long * __restrict v3,
+		      const unsigned long * __restrict v4,
+		      const unsigned long * __restrict v5)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			LD_AND_XOR_LINE(v3)
+			LD_AND_XOR_LINE(v4)
+			LD_AND_XOR_LINE(v5)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+		    [v5] "r"(v5) : "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+		v3 += LINE_WIDTH / sizeof(unsigned long);
+		v4 += LINE_WIDTH / sizeof(unsigned long);
+		v5 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile
index 8ffc6383f836..e4d1e581dbae 100644
--- a/arch/loongarch/mm/Makefile
+++ b/arch/loongarch/mm/Makefile
@@ -7,3 +7,6 @@ obj-y				+= init.o cache.o tlb.o tlbex.o extable.o \
 				   fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_KASAN)		+= kasan_init.o
+
+KASAN_SANITIZE_kasan_init.o     := n
diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c
index 72685a48eaf0..6be04d36ca07 100644
--- a/arch/loongarch/mm/cache.c
+++ b/arch/loongarch/mm/cache.c
@@ -156,7 +156,6 @@ void cpu_cache_init(void)
 
 	current_cpu_data.cache_leaves_present = leaf;
 	current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
-	shm_align_mask = PAGE_SIZE - 1;
 }
 
 static const pgprot_t protection_map[16] = {
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index da5b6d518cdb..e6376e3dce86 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
@@ -30,7 +31,8 @@
 
 int show_unhandled_signals = 1;
 
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs,
+			unsigned long write, unsigned long address)
 {
 	const int field = sizeof(unsigned long) * 2;
 
@@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
 	if (fixup_exception(regs))
 		return;
 
+	if (kfence_handle_page_fault(address, write, regs))
+		return;
+
 	/*
 	 * Oops. The kernel tried to access some bad page. We'll have to
 	 * terminate things with extreme prejudice.
@@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
 	die("Oops", regs);
 }
 
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs,
+			unsigned long write, unsigned long address)
 {
 	/*
 	 * We ran out of memory, call the OOM killer, and return the userspace
 	 * (which will retry the fault, or kill us if we got oom-killed).
 	 */
 	if (!user_mode(regs)) {
-		no_context(regs, address);
+		no_context(regs, write, address);
 		return;
 	}
 	pagefault_out_of_memory();
@@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
 {
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs)) {
-		no_context(regs, address);
+		no_context(regs, write, address);
 		return;
 	}
 
@@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
 
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs)) {
-		no_context(regs, address);
+		no_context(regs, write, address);
 		return;
 	}
 
@@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
 	 */
 	if (address & __UA_LIMIT) {
 		if (!user_mode(regs))
-			no_context(regs, address);
+			no_context(regs, write, address);
 		else
 			do_sigsegv(regs, write, address, si_code);
 		return;
@@ -211,7 +217,7 @@ good_area:
 
 	if (fault_signal_pending(fault, regs)) {
 		if (!user_mode(regs))
-			no_context(regs, address);
+			no_context(regs, write, address);
 		return;
 	}
 
@@ -232,7 +238,7 @@ good_area:
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mmap_read_unlock(mm);
 		if (fault & VM_FAULT_OOM) {
-			do_out_of_memory(regs, address);
+			do_out_of_memory(regs, write, address);
 			return;
 		} else if (fault & VM_FAULT_SIGSEGV) {
 			do_sigsegv(regs, write, address, si_code);
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 3b7d8129570b..f3fe8c06ba4d 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -35,33 +35,8 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
-/*
- * We have up to 8 empty zeroed pages so we can map one of the right colour
- * when needed.	 Since page is never written to after the initialization we
- * don't have to care about aliases on other CPUs.
- */
-unsigned long empty_zero_page, zero_page_mask;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(zero_page_mask);
-
-void setup_zero_pages(void)
-{
-	unsigned int order, i;
-	struct page *page;
-
-	order = 0;
-
-	empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-	if (!empty_zero_page)
-		panic("Oh boy, that early out of memory?");
-
-	page = virt_to_page((void *)empty_zero_page);
-	split_page(page, order);
-	for (i = 0; i < (1 << order); i++, page++)
-		mark_page_reserved(page);
-
-	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
-}
 
 void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long vaddr, struct vm_area_struct *vma)
@@ -106,7 +81,6 @@ void __init mem_init(void)
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
 	memblock_free_all();
-	setup_zero_pages();	/* Setup zeroed pages.  */
 }
 #endif /* !CONFIG_NUMA */
 
@@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al
 #endif
 #endif
 
-static pte_t *fixmap_pte(unsigned long addr)
+pte_t * __init populate_kernel_pte(unsigned long addr)
 {
-	pgd_t *pgd;
-	p4d_t *p4d;
+	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d = p4d_offset(pgd, addr);
 	pud_t *pud;
 	pmd_t *pmd;
 
-	pgd = pgd_offset_k(addr);
-	p4d = p4d_offset(pgd, addr);
-
-	if (pgd_none(*pgd)) {
-		pud_t *new __maybe_unused;
-
-		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-		pgd_populate(&init_mm, pgd, new);
+	if (p4d_none(*p4d)) {
+		pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+		if (!pud)
+			panic("%s: Failed to allocate memory\n", __func__);
+		p4d_populate(&init_mm, p4d, pud);
 #ifndef __PAGETABLE_PUD_FOLDED
-		pud_init(new);
+		pud_init(pud);
 #endif
 	}
 
 	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud)) {
-		pmd_t *new __maybe_unused;
-
-		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-		pud_populate(&init_mm, pud, new);
+		pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+		if (!pmd)
+			panic("%s: Failed to allocate memory\n", __func__);
+		pud_populate(&init_mm, pud, pmd);
 #ifndef __PAGETABLE_PMD_FOLDED
-		pmd_init(new);
+		pmd_init(pmd);
 #endif
 	}
 
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd)) {
-		pte_t *new __maybe_unused;
+	if (!pmd_present(*pmd)) {
+		pte_t *pte;
 
-		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-		pmd_populate_kernel(&init_mm, pmd, new);
+		pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+		if (!pte)
+			panic("%s: Failed to allocate memory\n", __func__);
+		pmd_populate_kernel(&init_mm, pmd, pte);
 	}
 
 	return pte_offset_kernel(pmd, addr);
@@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx,
 
 	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-	ptep = fixmap_pte(addr);
+	ptep = populate_kernel_pte(addr);
 	if (!pte_none(*ptep)) {
 		pte_ERROR(*ptep);
 		return;
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
new file mode 100644
index 000000000000..da68bc1a4643
--- /dev/null
+++ b/arch/loongarch/mm/kasan_init.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm-generic/sections.h>
+
+static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define __p4d_none(early, p4d) (0)
+#else
+#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
+(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
+#endif
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define __pud_none(early, pud) (0)
+#else
+#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
+(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
+#endif
+
+#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
+(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
+
+#define __pte_none(early, pte) (early ? pte_none(pte) : \
+((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
+
+bool kasan_early_stage = true;
+
+/*
+ * Alloc memory for shadow memory page table.
+ */
+static phys_addr_t __init kasan_alloc_zeroed_page(int node)
+{
+	void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+					__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+	if (!p)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
+			__func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
+
+	return __pa(p);
+}
+
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
+{
+	if (__pmd_none(early, READ_ONCE(*pmdp))) {
+		phys_addr_t pte_phys = early ?
+				__pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
+		if (!early)
+			memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
+		pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+	}
+
+	return pte_offset_kernel(pmdp, addr);
+}
+
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
+{
+	if (__pud_none(early, READ_ONCE(*pudp))) {
+		phys_addr_t pmd_phys = early ?
+				__pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
+		if (!early)
+			memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
+		pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
+	}
+
+	return pmd_offset(pudp, addr);
+}
+
+static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
+{
+	if (__p4d_none(early, READ_ONCE(*p4dp))) {
+		phys_addr_t pud_phys = early ?
+			__pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
+		if (!early)
+			memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
+		p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
+	}
+
+	return pud_offset(p4dp, addr);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+				      unsigned long end, int node, bool early)
+{
+	unsigned long next;
+	pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
+
+	do {
+		phys_addr_t page_phys = early ?
+					__pa_symbol(kasan_early_shadow_page)
+					      : kasan_alloc_zeroed_page(node);
+		next = addr + PAGE_SIZE;
+		set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+	} while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
+}
+
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+				      unsigned long end, int node, bool early)
+{
+	unsigned long next;
+	pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
+
+	do {
+		next = pmd_addr_end(addr, end);
+		kasan_pte_populate(pmdp, addr, next, node, early);
+	} while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
+}
+
+static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
+					    unsigned long end, int node, bool early)
+{
+	unsigned long next;
+	pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
+
+	do {
+		next = pud_addr_end(addr, end);
+		kasan_pmd_populate(pudp, addr, next, node, early);
+	} while (pudp++, addr = next, addr != end);
+}
+
+static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
+					    unsigned long end, int node, bool early)
+{
+	unsigned long next;
+	p4d_t *p4dp = p4d_offset(pgdp, addr);
+
+	do {
+		next = p4d_addr_end(addr, end);
+		kasan_pud_populate(p4dp, addr, next, node, early);
+	} while (p4dp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+				      int node, bool early)
+{
+	unsigned long next;
+	pgd_t *pgdp;
+
+	pgdp = pgd_offset_k(addr);
+
+	do {
+		next = pgd_addr_end(addr, end);
+		kasan_p4d_populate(pgdp, addr, next, node, early);
+	} while (pgdp++, addr = next, addr != end);
+
+}
+
+/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
+static void __init kasan_map_populate(unsigned long start, unsigned long end,
+				      int node)
+{
+	kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+}
+
+static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
+{
+	WRITE_ONCE(*pgdp, pgdval);
+}
+
+static void __init clear_pgds(unsigned long start, unsigned long end)
+{
+	/*
+	 * Remove references to kasan page tables from
+	 * swapper_pg_dir. pgd_clear() can't be used
+	 * here because it's nop on 2,3-level pagetable setups
+	 */
+	for (; start < end; start += PGDIR_SIZE)
+		kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
+}
+
+void __init kasan_init(void)
+{
+	u64 i;
+	phys_addr_t pa_start, pa_end;
+
+	/*
+	 * PGD was populated as invalid_pmd_table or invalid_pud_table
+	 * in pagetable_init() which depends on how many levels of page
+	 * table you are using, but we had to clean the gpd of kasan
+	 * shadow memory, as the pgd value is none-zero.
+	 * The assertion pgd_none is going to be false and the formal populate
+	 * afterwards is not going to create any new pgd at all.
+	 */
+	memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
+	csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
+	local_flush_tlb_all();
+
+	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+	/* Maps everything to a single page of zeroes */
+	kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
+
+	kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+					kasan_mem_to_shadow((void *)KFENCE_AREA_END));
+
+	kasan_early_stage = false;
+
+	/* Populate the linear mapping */
+	for_each_mem_range(i, &pa_start, &pa_end) {
+		void *start = (void *)phys_to_virt(pa_start);
+		void *end   = (void *)phys_to_virt(pa_end);
+
+		if (start >= end)
+			break;
+
+		kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
+			(unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
+	}
+
+	/* Populate modules mapping */
+	kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
+		(unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
+	/*
+	 * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
+	 * should make sure that it maps the zero page read-only.
+	 */
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		set_pte(&kasan_early_shadow_pte[i],
+			pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
+
+	memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+	csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
+	local_flush_tlb_all();
+
+	/* At this point kasan is fully initialized. Enable error messages */
+	init_task.kasan_depth = 0;
+	pr_info("KernelAddressSanitizer initialized.\n");
+}
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index fbe1a4856fc4..a9630a81b38a 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -8,12 +8,11 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
-EXPORT_SYMBOL(shm_align_mask);
+#define SHM_ALIGN_MASK	(SHMLBA - 1)
 
-#define COLOUR_ALIGN(addr, pgoff)				\
-	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
-	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+#define COLOUR_ALIGN(addr, pgoff)			\
+	((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK)	\
+	 + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
 
 enum mmap_allocation_direction {UP, DOWN};
 
@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
 		 * cache aliasing constraints.
 		 */
 		if ((flags & MAP_SHARED) &&
-		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+		    ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
 			return -EINVAL;
 		return addr;
 	}
@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
 	}
 
 	info.length = len;
-	info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+	info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 
 	if (dir == DOWN) {
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index b14343e211b6..71d0539e2d0b 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -9,6 +9,18 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+struct page *dmw_virt_to_page(unsigned long kaddr)
+{
+	return pfn_to_page(virt_to_pfn(kaddr));
+}
+EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+
+struct page *tlb_virt_to_page(unsigned long kaddr)
+{
+	return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+}
+EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *init, *ret = NULL;
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index a50308b6fc25..5c97d1463328 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # Objects to go into the VDSO.
 
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
 
diff --git a/arch/m68k/coldfire/dma_timer.c b/arch/m68k/coldfire/dma_timer.c
index cbb289439606..91e6728f51ed 100644
--- a/arch/m68k/coldfire/dma_timer.c
+++ b/arch/m68k/coldfire/dma_timer.c
@@ -48,7 +48,7 @@ static struct clocksource clocksource_cf_dt = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int __init  init_cf_dt_clocksource(void)
+static int __init init_cf_dt_clocksource(void)
 {
 	/*
 	 * We setup DMA timer 0 in free run mode. This incrementing counter is
diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c
index 6fdc13610565..3a74d493eb3e 100644
--- a/arch/m68k/emu/nfcon.c
+++ b/arch/m68k/emu/nfcon.c
@@ -70,16 +70,16 @@ static void nfcon_tty_close(struct tty_struct *tty, struct file *filp)
 {
 }
 
-static int nfcon_tty_write(struct tty_struct *tty, const unsigned char *buf,
-			   int count)
+static ssize_t nfcon_tty_write(struct tty_struct *tty, const u8 *buf,
+			       size_t count)
 {
 	nfputs(buf, count);
 	return count;
 }
 
-static int nfcon_tty_put_char(struct tty_struct *tty, unsigned char ch)
+static int nfcon_tty_put_char(struct tty_struct *tty, u8 ch)
 {
-	char temp[2] = { ch, 0 };
+	u8 temp[2] = { ch, 0 };
 
 	nf_call(stderr_id, virt_to_phys(temp));
 	return 1;
diff --git a/arch/m68k/include/asm/ide.h b/arch/m68k/include/asm/ide.h
deleted file mode 100644
index 05cc7dc00e0c..000000000000
--- a/arch/m68k/include/asm/ide.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/* Copyright(c) 1996 Kars de Jong */
-/* Based on the ide driver from 1.2.13pl8 */
-
-/*
- * Credits (alphabetical):
- *
- *  - Bjoern Brauel
- *  - Kars de Jong
- *  - Torsten Ebeling
- *  - Dwight Engen
- *  - Thorsten Floeck
- *  - Roman Hodek
- *  - Guenther Kelleter
- *  - Chris Lawrence
- *  - Michael Rausch
- *  - Christian Sauer
- *  - Michael Schmitz
- *  - Jes Soerensen
- *  - Michael Thurm
- *  - Geert Uytterhoeven
- */
-
-#ifndef _M68K_IDE_H
-#define _M68K_IDE_H
-
-#ifdef __KERNEL__
-#include <asm/setup.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-
-#ifdef CONFIG_MMU
-
-/*
- * Get rid of defs from io.h - ide has its private and conflicting versions
- * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we
- * always use the `raw' MMIO versions
- */
-#undef readb
-#undef readw
-#undef writeb
-#undef writew
-
-#define readb				in_8
-#define readw				in_be16
-#define __ide_mm_insw(port, addr, n)	raw_insw((u16 *)port, addr, n)
-#define __ide_mm_insl(port, addr, n)	raw_insl((u32 *)port, addr, n)
-#define writeb(val, port)		out_8(port, val)
-#define writew(val, port)		out_be16(port, val)
-#define __ide_mm_outsw(port, addr, n)	raw_outsw((u16 *)port, addr, n)
-#define __ide_mm_outsl(port, addr, n)	raw_outsl((u32 *)port, addr, n)
-
-#else
-
-#define __ide_mm_insw(port, addr, n)	io_insw((unsigned int)port, addr, n)
-#define __ide_mm_insl(port, addr, n)	io_insl((unsigned int)port, addr, n)
-#define __ide_mm_outsw(port, addr, n)	io_outsw((unsigned int)port, addr, n)
-#define __ide_mm_outsl(port, addr, n)	io_outsl((unsigned int)port, addr, n)
-
-#endif /* CONFIG_MMU */
-
-#endif /* __KERNEL__ */
-#endif /* _M68K_IDE_H */
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index b0e110d3d2e6..9504eb19d73a 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -92,9 +92,3 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 32);
 	}
 }
-
-char *pcibios_setup(char *str)
-{
-	return str;
-}
-
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 337f23eabc71..86a4ce07c192 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -99,9 +99,6 @@ extern int page_is_ram(unsigned long pfn);
 # define phys_to_pfn(phys)	(PFN_DOWN(phys))
 # define pfn_to_phys(pfn)	(PFN_PHYS(pfn))
 
-# define virt_to_pfn(vaddr)	(phys_to_pfn((__pa(vaddr))))
-# define pfn_to_virt(pfn)	__va(pfn_to_phys((pfn)))
-
 #  define virt_to_page(kaddr)	(pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
 #  define page_to_virt(page)   __va(page_to_pfn(page) << PAGE_SHIFT)
 #  define page_to_phys(page)     (page_to_pfn(page) << PAGE_SHIFT)
@@ -109,11 +106,6 @@ extern int page_is_ram(unsigned long pfn);
 #  define ARCH_PFN_OFFSET	(memory_start >> PAGE_SHIFT)
 # endif /* __ASSEMBLY__ */
 
-#define	virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
-
-# define __pa(x)	__virt_to_phys((unsigned long)(x))
-# define __va(x)	((void *)__phys_to_virt((unsigned long)(x)))
-
 /* Convert between virtual and physical address for MMU. */
 /* Handle MicroBlaze processor with virtual memory. */
 #define __virt_to_phys(addr) \
@@ -125,6 +117,25 @@ extern int page_is_ram(unsigned long pfn);
 #define tovirt(rd, rs) \
 	addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
 
+#ifndef __ASSEMBLY__
+
+# define __pa(x)	__virt_to_phys((unsigned long)(x))
+# define __va(x)	((void *)__phys_to_virt((unsigned long)(x)))
+
+static inline unsigned long virt_to_pfn(const void *vaddr)
+{
+	return phys_to_pfn(__pa(vaddr));
+}
+
+static inline const void *pfn_to_virt(unsigned long pfn)
+{
+	return __va(pfn_to_phys((pfn)));
+}
+
+#define	virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
+
+#endif /* __ASSEMBLY__ */
+
 #define TOPHYS(addr)  __virt_to_phys(addr)
 
 #endif /* __KERNEL__ */
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 3657f5e78a3d..bf2600f75959 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -25,7 +25,5 @@ void machine_shutdown(void);
 void machine_halt(void);
 void machine_power_off(void);
 
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
-
 # endif /* __ASSEMBLY__ */
 #endif /* _ASM_MICROBLAZE_SETUP_H */
diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c
index 5f4722908164..2f66c7963084 100644
--- a/arch/microblaze/kernel/reset.c
+++ b/arch/microblaze/kernel/reset.c
@@ -9,7 +9,6 @@
 
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/of_platform.h>
 #include <linux/reboot.h>
 
 void machine_shutdown(void)
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 353fabdfcbc5..3827dc76edd8 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -270,22 +270,6 @@ asmlinkage void __init mmu_init(void)
 	memblock_dump_all();
 }
 
-void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-	void *p;
-
-	if (mem_init_done) {
-		p = kzalloc(size, mask);
-	} else {
-		p = memblock_alloc(size, SMP_CACHE_BYTES);
-		if (!p)
-			panic("%s: Failed to allocate %zu bytes\n",
-			      __func__, size);
-	}
-
-	return p;
-}
-
 static const pgprot_t protection_map[16] = {
 	[VM_NONE]					= PAGE_NONE,
 	[VM_READ]					= PAGE_READONLY_X,
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index a47593d72f6f..f49807e1f19b 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -181,12 +181,16 @@ endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)	+= -march=mips32 -Wa,-mips32 -Wa,--trap
 
-cflags-$(CONFIG_CPU_LOONGSON2E) += $(call cc-option,-march=loongson2e) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2F) += $(call cc-option,-march=loongson2f) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-march=loongson3a,-march=mips64r2) -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += -march=loongson2e -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2F) += -march=loongson2f -Wa,--trap
 # Some -march= flags enable MMI instructions, and GCC complains about that
 # support being enabled alongside -msoft-float. Thus explicitly disable MMI.
 cflags-$(CONFIG_CPU_LOONGSON2EF) += $(call cc-option,-mno-loongson-mmi)
+ifdef CONFIG_CPU_LOONGSON64
+cflags-$(CONFIG_CPU_LOONGSON64)	+= -Wa,--trap
+cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a
+cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2
+endif
 cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-mno-loongson-mmi)
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS)	+= $(call cc-option,-mfix-r4000,)
@@ -299,8 +303,8 @@ ifdef CONFIG_64BIT
     endif
   endif
 
-  ifeq ($(KBUILD_SYM32)$(call cc-option-yn,-msym32), yy)
-    cflags-y += -msym32 -DKBUILD_64BIT_SYM32
+  ifeq ($(KBUILD_SYM32), y)
+    cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
   else
     ifeq ($(CONFIG_CPU_DADDI_WORKAROUNDS), y)
       $(error CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32)
@@ -341,7 +345,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 KBUILD_LDFLAGS		+= -m $(ld-emul)
 
-ifdef CONFIG_MIPS
+ifdef need-compiler
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
 	grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
 	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index 053805cb741c..ec180ab92eaa 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -16,7 +16,6 @@
 #include <linux/of.h>
 #include <linux/of_clk.h>
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/libfdt.h>
 #include <linux/smp.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile
index 7c02e542959a..2a5926578841 100644
--- a/arch/mips/cavium-octeon/Makefile
+++ b/arch/mips/cavium-octeon/Makefile
@@ -18,4 +18,3 @@ obj-y += crypto/
 obj-$(CONFIG_MTD)		      += flash_setup.o
 obj-$(CONFIG_SMP)		      += smp.o
 obj-$(CONFIG_OCTEON_ILM)	      += oct_ilm.o
-obj-$(CONFIG_USB)		      += octeon-usb.o
diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c
index c8a8c6d359b9..3395acde4d60 100644
--- a/arch/mips/cavium-octeon/flash_setup.c
+++ b/arch/mips/cavium-octeon/flash_setup.c
@@ -12,7 +12,8 @@
 #include <linux/semaphore.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/octeon/octeon.h>
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
index 25860fba6218..fef0c6de3fa1 100644
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@@ -13,9 +13,9 @@
  * Mnemonic names for arguments to memcpy/__copy_user
  */
 
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define dst a0
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index ce05c0dd3acd..f76783c24338 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -8,8 +8,10 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
+#include <linux/platform_device.h>
 #include <linux/libfdt.h>
 
 #include <asm/octeon/octeon.h>
@@ -450,7 +452,6 @@ static const struct of_device_id octeon_ids[] __initconst = {
 	{ .compatible = "cavium,octeon-3860-bootbus", },
 	{ .compatible = "cavium,mdio-mux", },
 	{ .compatible = "gpio-leds", },
-	{ .compatible = "cavium,octeon-7130-usb-uctl", },
 	{},
 };
 
diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c
deleted file mode 100644
index 2add435ad038..000000000000
--- a/arch/mips/cavium-octeon/octeon-usb.c
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * XHCI HCD glue for Cavium Octeon III SOCs.
- *
- * Copyright (C) 2010-2017 Cavium Networks
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/bitfield.h>
-#include <linux/bits.h>
-#include <linux/device.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/of_platform.h>
-
-/*
- * USB Control Register
- */
-#define USBDRD_UCTL_CTL				0x00
-/* BIST fast-clear mode select. A BIST run with this bit set
- * clears all entries in USBH RAMs to 0x0.
- */
-# define USBDRD_UCTL_CTL_CLEAR_BIST		BIT(63)
-/* 1 = Start BIST and cleared by hardware */
-# define USBDRD_UCTL_CTL_START_BIST		BIT(62)
-/* Reference clock select for SuperSpeed and HighSpeed PLLs:
- *	0x0 = Both PLLs use DLMC_REF_CLK0 for reference clock
- *	0x1 = Both PLLs use DLMC_REF_CLK1 for reference clock
- *	0x2 = SuperSpeed PLL uses DLMC_REF_CLK0 for reference clock &
- *	      HighSpeed PLL uses PLL_REF_CLK for reference clck
- *	0x3 = SuperSpeed PLL uses DLMC_REF_CLK1 for reference clock &
- *	      HighSpeed PLL uses PLL_REF_CLK for reference clck
- */
-# define USBDRD_UCTL_CTL_REF_CLK_SEL		GENMASK(61, 60)
-/* 1 = Spread-spectrum clock enable, 0 = SS clock disable */
-# define USBDRD_UCTL_CTL_SSC_EN			BIT(59)
-/* Spread-spectrum clock modulation range:
- *	0x0 = -4980 ppm downspread
- *	0x1 = -4492 ppm downspread
- *	0x2 = -4003 ppm downspread
- *	0x3 - 0x7 = Reserved
- */
-# define USBDRD_UCTL_CTL_SSC_RANGE		GENMASK(58, 56)
-/* Enable non-standard oscillator frequencies:
- *	[55:53] = modules -1
- *	[52:47] = 2's complement push amount, 0 = Feature disabled
- */
-# define USBDRD_UCTL_CTL_SSC_REF_CLK_SEL	GENMASK(55, 47)
-/* Reference clock multiplier for non-standard frequencies:
- *	0x19 = 100MHz on DLMC_REF_CLK* if REF_CLK_SEL = 0x0 or 0x1
- *	0x28 = 125MHz on DLMC_REF_CLK* if REF_CLK_SEL = 0x0 or 0x1
- *	0x32 =  50MHz on DLMC_REF_CLK* if REF_CLK_SEL = 0x0 or 0x1
- *	Other Values = Reserved
- */
-# define USBDRD_UCTL_CTL_MPLL_MULTIPLIER	GENMASK(46, 40)
-/* Enable reference clock to prescaler for SuperSpeed functionality.
- * Should always be set to "1"
- */
-# define USBDRD_UCTL_CTL_REF_SSP_EN		BIT(39)
-/* Divide the reference clock by 2 before entering the
- * REF_CLK_FSEL divider:
- *	If REF_CLK_SEL = 0x0 or 0x1, then only 0x0 is legal
- *	If REF_CLK_SEL = 0x2 or 0x3, then:
- *		0x1 = DLMC_REF_CLK* is 125MHz
- *		0x0 = DLMC_REF_CLK* is another supported frequency
- */
-# define USBDRD_UCTL_CTL_REF_CLK_DIV2		BIT(38)
-/* Select reference clock freqnuency for both PLL blocks:
- *	0x27 = REF_CLK_SEL is 0x0 or 0x1
- *	0x07 = REF_CLK_SEL is 0x2 or 0x3
- */
-# define USBDRD_UCTL_CTL_REF_CLK_FSEL		GENMASK(37, 32)
-/* Controller clock enable. */
-# define USBDRD_UCTL_CTL_H_CLK_EN		BIT(30)
-/* Select bypass input to controller clock divider:
- *	0x0 = Use divided coprocessor clock from H_CLKDIV
- *	0x1 = Use clock from GPIO pins
- */
-# define USBDRD_UCTL_CTL_H_CLK_BYP_SEL		BIT(29)
-/* Reset controller clock divider. */
-# define USBDRD_UCTL_CTL_H_CLKDIV_RST		BIT(28)
-/* Clock divider select:
- *	0x0 = divide by 1
- *	0x1 = divide by 2
- *	0x2 = divide by 4
- *	0x3 = divide by 6
- *	0x4 = divide by 8
- *	0x5 = divide by 16
- *	0x6 = divide by 24
- *	0x7 = divide by 32
- */
-# define USBDRD_UCTL_CTL_H_CLKDIV_SEL		GENMASK(26, 24)
-/* USB3 port permanently attached: 0x0 = No, 0x1 = Yes */
-# define USBDRD_UCTL_CTL_USB3_PORT_PERM_ATTACH	BIT(21)
-/* USB2 port permanently attached: 0x0 = No, 0x1 = Yes */
-# define USBDRD_UCTL_CTL_USB2_PORT_PERM_ATTACH	BIT(20)
-/* Disable SuperSpeed PHY: 0x0 = No, 0x1 = Yes */
-# define USBDRD_UCTL_CTL_USB3_PORT_DISABLE	BIT(18)
-/* Disable HighSpeed PHY: 0x0 = No, 0x1 = Yes */
-# define USBDRD_UCTL_CTL_USB2_PORT_DISABLE	BIT(16)
-/* Enable PHY SuperSpeed block power: 0x0 = No, 0x1 = Yes */
-# define USBDRD_UCTL_CTL_SS_POWER_EN		BIT(14)
-/* Enable PHY HighSpeed block power: 0x0 = No, 0x1 = Yes */
-# define USBDRD_UCTL_CTL_HS_POWER_EN		BIT(12)
-/* Enable USB UCTL interface clock: 0xx = No, 0x1 = Yes */
-# define USBDRD_UCTL_CTL_CSCLK_EN		BIT(4)
-/* Controller mode: 0x0 = Host, 0x1 = Device */
-# define USBDRD_UCTL_CTL_DRD_MODE		BIT(3)
-/* PHY reset */
-# define USBDRD_UCTL_CTL_UPHY_RST		BIT(2)
-/* Software reset UAHC */
-# define USBDRD_UCTL_CTL_UAHC_RST		BIT(1)
-/* Software resets UCTL */
-# define USBDRD_UCTL_CTL_UCTL_RST		BIT(0)
-
-#define USBDRD_UCTL_BIST_STATUS			0x08
-#define USBDRD_UCTL_SPARE0			0x10
-#define USBDRD_UCTL_INTSTAT			0x30
-#define USBDRD_UCTL_PORT_CFG_HS(port)		(0x40 + (0x20 * port))
-#define USBDRD_UCTL_PORT_CFG_SS(port)		(0x48 + (0x20 * port))
-#define USBDRD_UCTL_PORT_CR_DBG_CFG(port)	(0x50 + (0x20 * port))
-#define USBDRD_UCTL_PORT_CR_DBG_STATUS(port)	(0x58 + (0x20 * port))
-
-/*
- * UCTL Configuration Register
- */
-#define USBDRD_UCTL_HOST_CFG			0xe0
-/* Indicates minimum value of all received BELT values */
-# define USBDRD_UCTL_HOST_CFG_HOST_CURRENT_BELT	GENMASK(59, 48)
-/* HS jitter adjustment */
-# define USBDRD_UCTL_HOST_CFG_FLA		GENMASK(37, 32)
-/* Bus-master enable: 0x0 = Disabled (stall DMAs), 0x1 = enabled */
-# define USBDRD_UCTL_HOST_CFG_BME		BIT(28)
-/* Overcurrent protection enable: 0x0 = unavailable, 0x1 = available */
-# define USBDRD_UCTL_HOST_OCI_EN		BIT(27)
-/* Overcurrent sene selection:
- *	0x0 = Overcurrent indication from off-chip is active-low
- *	0x1 = Overcurrent indication from off-chip is active-high
- */
-# define USBDRD_UCTL_HOST_OCI_ACTIVE_HIGH_EN	BIT(26)
-/* Port power control enable: 0x0 = unavailable, 0x1 = available */
-# define USBDRD_UCTL_HOST_PPC_EN		BIT(25)
-/* Port power control sense selection:
- *	0x0 = Port power to off-chip is active-low
- *	0x1 = Port power to off-chip is active-high
- */
-# define USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN	BIT(24)
-
-/*
- * UCTL Shim Features Register
- */
-#define USBDRD_UCTL_SHIM_CFG			0xe8
-/* Out-of-bound UAHC register access: 0 = read, 1 = write */
-# define USBDRD_UCTL_SHIM_CFG_XS_NCB_OOB_WRN	BIT(63)
-/* SRCID error log for out-of-bound UAHC register access:
- *	[59:58] = chipID
- *	[57] = Request source: 0 = core, 1 = NCB-device
- *	[56:51] = Core/NCB-device number, [56] always 0 for NCB devices
- *	[50:48] = SubID
- */
-# define USBDRD_UCTL_SHIM_CFG_XS_NCB_OOB_OSRC	GENMASK(59, 48)
-/* Error log for bad UAHC DMA access: 0 = Read log, 1 = Write log */
-# define USBDRD_UCTL_SHIM_CFG_XM_BAD_DMA_WRN	BIT(47)
-/* Encoded error type for bad UAHC DMA */
-# define USBDRD_UCTL_SHIM_CFG_XM_BAD_DMA_TYPE	GENMASK(43, 40)
-/* Select the IOI read command used by DMA accesses */
-# define USBDRD_UCTL_SHIM_CFG_DMA_READ_CMD	BIT(12)
-/* Select endian format for DMA accesses to the L2C:
- *	0x0 = Little endian
- *	0x1 = Big endian
- *	0x2 = Reserved
- *	0x3 = Reserved
- */
-# define USBDRD_UCTL_SHIM_CFG_DMA_ENDIAN_MODE	GENMASK(9, 8)
-/* Select endian format for IOI CSR access to UAHC:
- *	0x0 = Little endian
- *	0x1 = Big endian
- *	0x2 = Reserved
- *	0x3 = Reserved
- */
-# define USBDRD_UCTL_SHIM_CFG_CSR_ENDIAN_MODE	GENMASK(1, 0)
-
-#define USBDRD_UCTL_ECC				0xf0
-#define USBDRD_UCTL_SPARE1			0xf8
-
-static DEFINE_MUTEX(dwc3_octeon_clocks_mutex);
-
-#ifdef CONFIG_CAVIUM_OCTEON_SOC
-#include <asm/octeon/octeon.h>
-static inline uint64_t dwc3_octeon_readq(void __iomem *addr)
-{
-	return cvmx_readq_csr(addr);
-}
-
-static inline void dwc3_octeon_writeq(void __iomem *base, uint64_t val)
-{
-	cvmx_writeq_csr(base, val);
-}
-
-static void dwc3_octeon_config_gpio(int index, int gpio)
-{
-	union cvmx_gpio_bit_cfgx gpio_bit;
-
-	if ((OCTEON_IS_MODEL(OCTEON_CN73XX) ||
-	    OCTEON_IS_MODEL(OCTEON_CNF75XX))
-	    && gpio <= 31) {
-		gpio_bit.u64 = cvmx_read_csr(CVMX_GPIO_BIT_CFGX(gpio));
-		gpio_bit.s.tx_oe = 1;
-		gpio_bit.s.output_sel = (index == 0 ? 0x14 : 0x15);
-		cvmx_write_csr(CVMX_GPIO_BIT_CFGX(gpio), gpio_bit.u64);
-	} else if (gpio <= 15) {
-		gpio_bit.u64 = cvmx_read_csr(CVMX_GPIO_BIT_CFGX(gpio));
-		gpio_bit.s.tx_oe = 1;
-		gpio_bit.s.output_sel = (index == 0 ? 0x14 : 0x19);
-		cvmx_write_csr(CVMX_GPIO_BIT_CFGX(gpio), gpio_bit.u64);
-	} else {
-		gpio_bit.u64 = cvmx_read_csr(CVMX_GPIO_XBIT_CFGX(gpio));
-		gpio_bit.s.tx_oe = 1;
-		gpio_bit.s.output_sel = (index == 0 ? 0x14 : 0x19);
-		cvmx_write_csr(CVMX_GPIO_XBIT_CFGX(gpio), gpio_bit.u64);
-	}
-}
-#else
-static inline uint64_t dwc3_octeon_readq(void __iomem *addr)
-{
-	return 0;
-}
-
-static inline void dwc3_octeon_writeq(void __iomem *base, uint64_t val) { }
-
-static inline void dwc3_octeon_config_gpio(int index, int gpio) { }
-#endif
-
-static int dwc3_octeon_get_divider(void)
-{
-	static const uint8_t clk_div[] = { 1, 2, 4, 6, 8, 16, 24, 32 };
-	int div = 0;
-
-	while (div < ARRAY_SIZE(clk_div)) {
-		uint64_t rate = octeon_get_io_clock_rate() / clk_div[div];
-		if (rate <= 300000000 && rate >= 150000000)
-			break;
-		div++;
-	}
-
-	return div;
-}
-
-static int dwc3_octeon_config_power(struct device *dev, void __iomem *base)
-{
-	uint32_t gpio_pwr[3];
-	int gpio, len, power_active_low;
-	struct device_node *node = dev->of_node;
-	u64 val;
-	void __iomem *uctl_host_cfg_reg = base + USBDRD_UCTL_HOST_CFG;
-
-	if (of_find_property(node, "power", &len) != NULL) {
-		if (len == 12) {
-			of_property_read_u32_array(node, "power", gpio_pwr, 3);
-			power_active_low = gpio_pwr[2] & 0x01;
-			gpio = gpio_pwr[1];
-		} else if (len == 8) {
-			of_property_read_u32_array(node, "power", gpio_pwr, 2);
-			power_active_low = 0;
-			gpio = gpio_pwr[1];
-		} else {
-			dev_err(dev, "invalid power configuration\n");
-			return -EINVAL;
-		}
-		dwc3_octeon_config_gpio(((u64)base >> 24) & 1, gpio);
-
-		/* Enable XHCI power control and set if active high or low. */
-		val = dwc3_octeon_readq(uctl_host_cfg_reg);
-		val |= USBDRD_UCTL_HOST_PPC_EN;
-		if (power_active_low)
-			val &= ~USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN;
-		else
-			val |= USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN;
-		dwc3_octeon_writeq(uctl_host_cfg_reg, val);
-	} else {
-		/* Disable XHCI power control and set if active high. */
-		val = dwc3_octeon_readq(uctl_host_cfg_reg);
-		val &= ~USBDRD_UCTL_HOST_PPC_EN;
-		val &= ~USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN;
-		dwc3_octeon_writeq(uctl_host_cfg_reg, val);
-		dev_info(dev, "power control disabled\n");
-	}
-	return 0;
-}
-
-static int dwc3_octeon_clocks_start(struct device *dev, void __iomem *base)
-{
-	int i, div, mpll_mul, ref_clk_fsel, ref_clk_sel = 2;
-	u32 clock_rate;
-	u64 val;
-	void __iomem *uctl_ctl_reg = base + USBDRD_UCTL_CTL;
-
-	if (dev->of_node) {
-		const char *ss_clock_type;
-		const char *hs_clock_type;
-
-		i = of_property_read_u32(dev->of_node,
-					 "refclk-frequency", &clock_rate);
-		if (i) {
-			dev_err(dev, "No UCTL \"refclk-frequency\"\n");
-			return -EINVAL;
-		}
-		i = of_property_read_string(dev->of_node,
-					    "refclk-type-ss", &ss_clock_type);
-		if (i) {
-			dev_err(dev, "No UCTL \"refclk-type-ss\"\n");
-			return -EINVAL;
-		}
-		i = of_property_read_string(dev->of_node,
-					    "refclk-type-hs", &hs_clock_type);
-		if (i) {
-			dev_err(dev, "No UCTL \"refclk-type-hs\"\n");
-			return -EINVAL;
-		}
-		if (strcmp("dlmc_ref_clk0", ss_clock_type) == 0) {
-			if (strcmp(hs_clock_type, "dlmc_ref_clk0") == 0)
-				ref_clk_sel = 0;
-			else if (strcmp(hs_clock_type, "pll_ref_clk") == 0)
-				ref_clk_sel = 2;
-			else
-				dev_warn(dev, "Invalid HS clock type %s, using pll_ref_clk instead\n",
-					 hs_clock_type);
-		} else if (strcmp(ss_clock_type, "dlmc_ref_clk1") == 0) {
-			if (strcmp(hs_clock_type, "dlmc_ref_clk1") == 0)
-				ref_clk_sel = 1;
-			else if (strcmp(hs_clock_type, "pll_ref_clk") == 0)
-				ref_clk_sel = 3;
-			else {
-				dev_warn(dev, "Invalid HS clock type %s, using pll_ref_clk instead\n",
-					 hs_clock_type);
-				ref_clk_sel = 3;
-			}
-		} else
-			dev_warn(dev, "Invalid SS clock type %s, using dlmc_ref_clk0 instead\n",
-				 ss_clock_type);
-
-		if ((ref_clk_sel == 0 || ref_clk_sel == 1) &&
-		    (clock_rate != 100000000))
-			dev_warn(dev, "Invalid UCTL clock rate of %u, using 100000000 instead\n",
-				 clock_rate);
-
-	} else {
-		dev_err(dev, "No USB UCTL device node\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Step 1: Wait for all voltages to be stable...that surely
-	 *         happened before starting the kernel. SKIP
-	 */
-
-	/* Step 2: Select GPIO for overcurrent indication, if desired. SKIP */
-
-	/* Step 3: Assert all resets. */
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val |= USBDRD_UCTL_CTL_UPHY_RST |
-	       USBDRD_UCTL_CTL_UAHC_RST |
-	       USBDRD_UCTL_CTL_UCTL_RST;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	/* Step 4a: Reset the clock dividers. */
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val |= USBDRD_UCTL_CTL_H_CLKDIV_RST;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	/* Step 4b: Select controller clock frequency. */
-	div = dwc3_octeon_get_divider();
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val &= ~USBDRD_UCTL_CTL_H_CLKDIV_SEL;
-	val |= FIELD_PREP(USBDRD_UCTL_CTL_H_CLKDIV_SEL, div);
-	val |= USBDRD_UCTL_CTL_H_CLK_EN;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	if ((div != FIELD_GET(USBDRD_UCTL_CTL_H_CLKDIV_SEL, val)) ||
-	    (!(FIELD_GET(USBDRD_UCTL_CTL_H_CLK_EN, val)))) {
-		dev_err(dev, "dwc3 controller clock init failure.\n");
-			return -EINVAL;
-	}
-
-	/* Step 4c: Deassert the controller clock divider reset. */
-	val &= ~USBDRD_UCTL_CTL_H_CLKDIV_RST;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	/* Step 5a: Reference clock configuration. */
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val &= ~USBDRD_UCTL_CTL_REF_CLK_DIV2;
-	val &= ~USBDRD_UCTL_CTL_REF_CLK_SEL;
-	val |= FIELD_PREP(USBDRD_UCTL_CTL_REF_CLK_SEL, ref_clk_sel);
-
-	ref_clk_fsel = 0x07;
-	switch (clock_rate) {
-	default:
-		dev_warn(dev, "Invalid ref_clk %u, using 100000000 instead\n",
-			 clock_rate);
-		fallthrough;
-	case 100000000:
-		mpll_mul = 0x19;
-		if (ref_clk_sel < 2)
-			ref_clk_fsel = 0x27;
-		break;
-	case 50000000:
-		mpll_mul = 0x32;
-		break;
-	case 125000000:
-		mpll_mul = 0x28;
-		break;
-	}
-	val &= ~USBDRD_UCTL_CTL_REF_CLK_FSEL;
-	val |= FIELD_PREP(USBDRD_UCTL_CTL_REF_CLK_FSEL, ref_clk_fsel);
-
-	val &= ~USBDRD_UCTL_CTL_MPLL_MULTIPLIER;
-	val |= FIELD_PREP(USBDRD_UCTL_CTL_MPLL_MULTIPLIER, mpll_mul);
-
-	/* Step 5b: Configure and enable spread-spectrum for SuperSpeed. */
-	val |= USBDRD_UCTL_CTL_SSC_EN;
-
-	/* Step 5c: Enable SuperSpeed. */
-	val |= USBDRD_UCTL_CTL_REF_SSP_EN;
-
-	/* Step 5d: Configure PHYs. SKIP */
-
-	/* Step 6a & 6b: Power up PHYs. */
-	val |= USBDRD_UCTL_CTL_HS_POWER_EN;
-	val |= USBDRD_UCTL_CTL_SS_POWER_EN;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	/* Step 7: Wait 10 controller-clock cycles to take effect. */
-	udelay(10);
-
-	/* Step 8a: Deassert UCTL reset signal. */
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val &= ~USBDRD_UCTL_CTL_UCTL_RST;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	/* Step 8b: Wait 10 controller-clock cycles. */
-	udelay(10);
-
-	/* Steo 8c: Setup power-power control. */
-	if (dwc3_octeon_config_power(dev, base))
-		return -EINVAL;
-
-	/* Step 8d: Deassert UAHC reset signal. */
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val &= ~USBDRD_UCTL_CTL_UAHC_RST;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	/* Step 8e: Wait 10 controller-clock cycles. */
-	udelay(10);
-
-	/* Step 9: Enable conditional coprocessor clock of UCTL. */
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val |= USBDRD_UCTL_CTL_CSCLK_EN;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	/*Step 10: Set for host mode only. */
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val &= ~USBDRD_UCTL_CTL_DRD_MODE;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-
-	return 0;
-}
-
-static void __init dwc3_octeon_set_endian_mode(void __iomem *base)
-{
-	u64 val;
-	void __iomem *uctl_shim_cfg_reg = base + USBDRD_UCTL_SHIM_CFG;
-
-	val = dwc3_octeon_readq(uctl_shim_cfg_reg);
-	val &= ~USBDRD_UCTL_SHIM_CFG_DMA_ENDIAN_MODE;
-	val &= ~USBDRD_UCTL_SHIM_CFG_CSR_ENDIAN_MODE;
-#ifdef __BIG_ENDIAN
-	val |= FIELD_PREP(USBDRD_UCTL_SHIM_CFG_DMA_ENDIAN_MODE, 1);
-	val |= FIELD_PREP(USBDRD_UCTL_SHIM_CFG_CSR_ENDIAN_MODE, 1);
-#endif
-	dwc3_octeon_writeq(uctl_shim_cfg_reg, val);
-}
-
-static void __init dwc3_octeon_phy_reset(void __iomem *base)
-{
-	u64 val;
-	void __iomem *uctl_ctl_reg = base + USBDRD_UCTL_CTL;
-
-	val = dwc3_octeon_readq(uctl_ctl_reg);
-	val &= ~USBDRD_UCTL_CTL_UPHY_RST;
-	dwc3_octeon_writeq(uctl_ctl_reg, val);
-}
-
-static int __init dwc3_octeon_device_init(void)
-{
-	const char compat_node_name[] = "cavium,octeon-7130-usb-uctl";
-	struct platform_device *pdev;
-	struct device_node *node;
-	struct resource *res;
-	void __iomem *base;
-
-	/*
-	 * There should only be three universal controllers, "uctl"
-	 * in the device tree. Two USB and a SATA, which we ignore.
-	 */
-	node = NULL;
-	do {
-		node = of_find_node_by_name(node, "uctl");
-		if (!node)
-			return -ENODEV;
-
-		if (of_device_is_compatible(node, compat_node_name)) {
-			pdev = of_find_device_by_node(node);
-			if (!pdev)
-				return -ENODEV;
-
-			/*
-			 * The code below maps in the registers necessary for
-			 * setting up the clocks and reseting PHYs. We must
-			 * release the resources so the dwc3 subsystem doesn't
-			 * know the difference.
-			 */
-			base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
-			if (IS_ERR(base)) {
-				put_device(&pdev->dev);
-				return PTR_ERR(base);
-			}
-
-			mutex_lock(&dwc3_octeon_clocks_mutex);
-			if (dwc3_octeon_clocks_start(&pdev->dev, base) == 0)
-				dev_info(&pdev->dev, "clocks initialized.\n");
-			dwc3_octeon_set_endian_mode(base);
-			dwc3_octeon_phy_reset(base);
-			mutex_unlock(&dwc3_octeon_clocks_mutex);
-			devm_iounmap(&pdev->dev, base);
-			devm_release_mem_region(&pdev->dev, res->start,
-						resource_size(res));
-			put_device(&pdev->dev);
-		}
-	} while (node != NULL);
-
-	return 0;
-}
-device_initcall(dwc3_octeon_device_init);
-
-MODULE_AUTHOR("David Daney <david.daney@cavium.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("USB driver for OCTEON III SoC");
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 44821f497261..dc49b09d492b 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 2b4133176930..07839a4b397e 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -283,6 +283,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AMD_ACP=y
 CONFIG_DRM_AMD_DC=y
 CONFIG_DRM_AMD_DC_SI=y
+CONFIG_DRM_AST=m
 CONFIG_DRM_RADEON=m
 CONFIG_DRM_QXL=y
 CONFIG_DRM_VIRTIO_GPU=y
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 743209047792..ae1a7793e810 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index dd2b9c181f32..c07e30f63d8b 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -131,7 +131,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index 97c2d7f530b3..0a5701020d3f 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -128,7 +128,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index e0e312dd968a..5c5e2186210c 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -90,7 +90,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index dee172716581..7ba67a0d6c97 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -7,7 +7,6 @@ generated-y += unistd_nr_n32.h
 generated-y += unistd_nr_n64.h
 generated-y += unistd_nr_o32.h
 
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 04cedf9f8811..54a85f1d4f2c 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-int kvm_arch_flush_remote_tlb(struct kvm *kvm);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h
index 7971272345d3..84f45461c832 100644
--- a/arch/mips/include/asm/mach-loongson32/loongson1.h
+++ b/arch/mips/include/asm/mach-loongson32/loongson1.h
@@ -45,8 +45,6 @@
 #define LS1X_NAND_BASE			0x1fe78000
 #define LS1X_CLK_BASE			0x1fe78030
 
-#include <regs-clk.h>
 #include <regs-mux.h>
-#include <regs-rtc.h>
 
 #endif /* __ASM_MACH_LOONGSON32_LOONGSON1_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-clk.h b/arch/mips/include/asm/mach-loongson32/regs-clk.h
deleted file mode 100644
index 98136fa8bee1..000000000000
--- a/arch/mips/include/asm/mach-loongson32/regs-clk.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * Loongson 1 Clock Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_CLK_H
-#define __ASM_MACH_LOONGSON32_REGS_CLK_H
-
-#define LS1X_CLK_REG(x) \
-		((void __iomem *)KSEG1ADDR(LS1X_CLK_BASE + (x)))
-
-#define LS1X_CLK_PLL_FREQ		LS1X_CLK_REG(0x0)
-#define LS1X_CLK_PLL_DIV		LS1X_CLK_REG(0x4)
-
-#if defined(CONFIG_LOONGSON1_LS1B)
-/* Clock PLL Divisor Register Bits */
-#define DIV_DC_EN			BIT(31)
-#define DIV_DC_RST			BIT(30)
-#define DIV_CPU_EN			BIT(25)
-#define DIV_CPU_RST			BIT(24)
-#define DIV_DDR_EN			BIT(19)
-#define DIV_DDR_RST			BIT(18)
-#define RST_DC_EN			BIT(5)
-#define RST_DC				BIT(4)
-#define RST_DDR_EN			BIT(3)
-#define RST_DDR				BIT(2)
-#define RST_CPU_EN			BIT(1)
-#define RST_CPU				BIT(0)
-
-#define DIV_DC_SHIFT			26
-#define DIV_CPU_SHIFT			20
-#define DIV_DDR_SHIFT			14
-
-#define DIV_DC_WIDTH			4
-#define DIV_CPU_WIDTH			4
-#define DIV_DDR_WIDTH			4
-
-#define BYPASS_DC_SHIFT			12
-#define BYPASS_DDR_SHIFT		10
-#define BYPASS_CPU_SHIFT		8
-
-#define BYPASS_DC_WIDTH			1
-#define BYPASS_DDR_WIDTH		1
-#define BYPASS_CPU_WIDTH		1
-
-#elif defined(CONFIG_LOONGSON1_LS1C)
-/* PLL/SDRAM Frequency configuration register Bits */
-#define PLL_VALID			BIT(31)
-#define FRAC_N				GENMASK(23, 16)
-#define RST_TIME			GENMASK(3, 2)
-#define SDRAM_DIV			GENMASK(1, 0)
-
-/* CPU/CAMERA/DC Frequency configuration register Bits */
-#define DIV_DC_EN			BIT(31)
-#define DIV_DC				GENMASK(30, 24)
-#define DIV_CAM_EN			BIT(23)
-#define DIV_CAM				GENMASK(22, 16)
-#define DIV_CPU_EN			BIT(15)
-#define DIV_CPU				GENMASK(14, 8)
-#define DIV_DC_SEL_EN			BIT(5)
-#define DIV_DC_SEL			BIT(4)
-#define DIV_CAM_SEL_EN			BIT(3)
-#define DIV_CAM_SEL			BIT(2)
-#define DIV_CPU_SEL_EN			BIT(1)
-#define DIV_CPU_SEL			BIT(0)
-
-#define DIV_DC_SHIFT			24
-#define DIV_CAM_SHIFT			16
-#define DIV_CPU_SHIFT			8
-#define DIV_DDR_SHIFT			0
-
-#define DIV_DC_WIDTH			7
-#define DIV_CAM_WIDTH			7
-#define DIV_CPU_WIDTH			7
-#define DIV_DDR_WIDTH			2
-
-#endif
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_CLK_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-rtc.h b/arch/mips/include/asm/mach-loongson32/regs-rtc.h
deleted file mode 100644
index a3d096be1607..000000000000
--- a/arch/mips/include/asm/mach-loongson32/regs-rtc.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com>
- *
- * Loongson 1 RTC timer Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_RTC_H
-#define __ASM_MACH_LOONGSON32_REGS_RTC_H
-
-#define LS1X_RTC_REG(x) \
-		((void __iomem *)KSEG1ADDR(LS1X_RTC_BASE + (x)))
-
-#define LS1X_RTC_CTRL	LS1X_RTC_REG(0x40)
-
-#define RTC_EXTCLK_OK	(BIT(5) | BIT(8))
-#define RTC_EXTCLK_EN	BIT(8)
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_RTC_H */
diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index cff52b283e03..fcec579f64e9 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -10,7 +10,7 @@
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 #include <asm/ftrace.h>
diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
index 9b7c8ab6f08c..447a3ea14aa1 100644
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S
@@ -11,7 +11,6 @@
  *    written by Carsten Langgaard, carstenl@mips.com
  */
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/asm-offsets.h>
 #include <asm/mipsregs.h>
 #include <asm/regdef.h>
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index 6c745aa9e825..c000b22e3fcd 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -11,10 +11,10 @@
  * Further modifications to make this work:
  * Copyright (c) 1998 Harald Koerfgen
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
index 71b1aafae1bb..48e63943e6f7 100644
--- a/arch/mips/kernel/r2300_switch.S
+++ b/arch/mips/kernel/r2300_switch.S
@@ -13,7 +13,6 @@
  */
 #include <asm/asm.h>
 #include <asm/cachectl.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 4e8c98517d9d..4bb97ee89904 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -12,10 +12,10 @@
  * Copyright (C) 2000 MIPS Technologies, Inc.
  * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/mips/kernel/sysrq.c b/arch/mips/kernel/sysrq.c
index 9c1a2019113b..2e98049fe783 100644
--- a/arch/mips/kernel/sysrq.c
+++ b/arch/mips/kernel/sysrq.c
@@ -44,7 +44,7 @@ static void sysrq_tlbdump_othercpus(struct work_struct *dummy)
 static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus);
 #endif
 
-static void sysrq_handle_tlbdump(int key)
+static void sysrq_handle_tlbdump(u8 key)
 {
 	sysrq_tlbdump_single(NULL);
 #ifdef CONFIG_SMP
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index aa5583a7b05b..231ac052b506 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 	/* Flush slot from GPA */
 	kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
 			      slot->base_gfn + slot->npages - 1);
-	kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+	kvm_flush_remote_tlbs_memslot(kvm, slot);
 	spin_unlock(&kvm->mmu_lock);
 }
 
@@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 		needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
 					new->base_gfn + new->npages - 1);
 		if (needs_flush)
-			kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+			kvm_flush_remote_tlbs_memslot(kvm, new);
 		spin_unlock(&kvm->mmu_lock);
 	}
 }
@@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
 	kvm_mips_callbacks->prepare_flush_shadow(kvm);
 	return 1;
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-					const struct kvm_memory_slot *memslot)
-{
-	kvm_flush_remote_tlbs(kvm);
-}
-
 int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
 	int r;
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index e8c08988ed37..7b2ac1319d70 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -447,7 +447,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	gpa_t gpa = range->start << PAGE_SHIFT;
-	pte_t hva_pte = range->pte;
+	pte_t hva_pte = range->arg.pte;
 	pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 	pte_t old_pte;
 
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 20622bf0a9b3..8f208007b8e8 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -10,7 +10,7 @@
 #include <linux/sched.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 
diff --git a/arch/mips/lantiq/xway/dcdc.c b/arch/mips/lantiq/xway/dcdc.c
index 96199966a350..4a808f8c5beb 100644
--- a/arch/mips/lantiq/xway/dcdc.c
+++ b/arch/mips/lantiq/xway/dcdc.c
@@ -6,7 +6,8 @@
  */
 
 #include <linux/ioport.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
 #include <lantiq_soc.h>
 
diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c
index a492b1eb1925..8d52001301de 100644
--- a/arch/mips/lantiq/xway/gptu.c
+++ b/arch/mips/lantiq/xway/gptu.c
@@ -8,8 +8,9 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 
 #include <lantiq_soc.h>
 #include "../clk.h"
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index d444a1b98a72..3ed078225222 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -10,7 +10,6 @@
 #include <linux/clkdev.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/of_address.h>
 
 #include <lantiq_soc.h>
diff --git a/arch/mips/lantiq/xway/vmmc.c b/arch/mips/lantiq/xway/vmmc.c
index 2796e87dfcae..37c133052ef7 100644
--- a/arch/mips/lantiq/xway/vmmc.c
+++ b/arch/mips/lantiq/xway/vmmc.c
@@ -7,7 +7,8 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <lantiq_soc.h>
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 7767137c3e49..3d2ff4118d79 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -11,9 +11,9 @@
  * Copyright (C) 2014 Imagination Technologies Ltd.
  */
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #ifdef CONFIG_64BIT
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 18a43f2e29c8..a4b4e805ff13 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -32,9 +32,9 @@
 #undef CONFIG_CPU_HAS_PREFETCH
 #endif
 
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define dst a0
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 0b342bae9a98..79405c32cc85 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -8,9 +8,9 @@
  * Copyright (C) 2007 by Maciej W. Rozycki
  * Copyright (C) 2011, 2012 MIPS Technologies, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #if LONGSIZE == 4
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index 13aaa9927ad1..94f4203563c1 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -7,9 +7,9 @@
  * Copyright (C) 2011 MIPS Technologies, Inc.
  */
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define EX(insn,reg,addr,handler)			\
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 6de31b616f9c..c192a6f6cd84 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -6,9 +6,9 @@
  * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle
  * Copyright (c) 1999 Silicon Graphics, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define EX(insn,reg,addr,handler)			\
diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c
index 64d7979394e6..8075590a9f83 100644
--- a/arch/mips/loongson32/common/platform.c
+++ b/arch/mips/loongson32/common/platform.c
@@ -265,14 +265,6 @@ struct platform_device ls1x_ehci_pdev = {
 };
 
 /* Real Time Clock */
-void __init ls1x_rtc_set_extclk(struct platform_device *pdev)
-{
-	u32 val = __raw_readl(LS1X_RTC_CTRL);
-
-	if (!(val & RTC_EXTCLK_OK))
-		__raw_writel(val | RTC_EXTCLK_EN, LS1X_RTC_CTRL);
-}
-
 struct platform_device ls1x_rtc_pdev = {
 	.name		= "ls1x-rtc",
 	.id		= -1,
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index cdecd7af11a6..e015a26a40f7 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -187,181 +187,181 @@ static void csr_ipi_probe(void)
 
 static void ipi_set0_regs_init(void)
 {
-	ipi_set0_regs[0] = (void *)
+	ipi_set0_regs[0] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[1] = (void *)
+	ipi_set0_regs[1] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[2] = (void *)
+	ipi_set0_regs[2] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[3] = (void *)
+	ipi_set0_regs[3] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0);
-	ipi_set0_regs[4] = (void *)
+	ipi_set0_regs[4] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[5] = (void *)
+	ipi_set0_regs[5] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[6] = (void *)
+	ipi_set0_regs[6] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[7] = (void *)
+	ipi_set0_regs[7] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0);
-	ipi_set0_regs[8] = (void *)
+	ipi_set0_regs[8] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[9] = (void *)
+	ipi_set0_regs[9] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[10] = (void *)
+	ipi_set0_regs[10] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[11] = (void *)
+	ipi_set0_regs[11] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0);
-	ipi_set0_regs[12] = (void *)
+	ipi_set0_regs[12] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[13] = (void *)
+	ipi_set0_regs[13] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[14] = (void *)
+	ipi_set0_regs[14] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[15] = (void *)
+	ipi_set0_regs[15] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0);
 }
 
 static void ipi_clear0_regs_init(void)
 {
-	ipi_clear0_regs[0] = (void *)
+	ipi_clear0_regs[0] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[1] = (void *)
+	ipi_clear0_regs[1] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[2] = (void *)
+	ipi_clear0_regs[2] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[3] = (void *)
+	ipi_clear0_regs[3] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0);
-	ipi_clear0_regs[4] = (void *)
+	ipi_clear0_regs[4] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[5] = (void *)
+	ipi_clear0_regs[5] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[6] = (void *)
+	ipi_clear0_regs[6] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[7] = (void *)
+	ipi_clear0_regs[7] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0);
-	ipi_clear0_regs[8] = (void *)
+	ipi_clear0_regs[8] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[9] = (void *)
+	ipi_clear0_regs[9] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[10] = (void *)
+	ipi_clear0_regs[10] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[11] = (void *)
+	ipi_clear0_regs[11] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0);
-	ipi_clear0_regs[12] = (void *)
+	ipi_clear0_regs[12] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[13] = (void *)
+	ipi_clear0_regs[13] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[14] = (void *)
+	ipi_clear0_regs[14] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[15] = (void *)
+	ipi_clear0_regs[15] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0);
 }
 
 static void ipi_status0_regs_init(void)
 {
-	ipi_status0_regs[0] = (void *)
+	ipi_status0_regs[0] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[1] = (void *)
+	ipi_status0_regs[1] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[2] = (void *)
+	ipi_status0_regs[2] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[3] = (void *)
+	ipi_status0_regs[3] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0);
-	ipi_status0_regs[4] = (void *)
+	ipi_status0_regs[4] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[5] = (void *)
+	ipi_status0_regs[5] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[6] = (void *)
+	ipi_status0_regs[6] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[7] = (void *)
+	ipi_status0_regs[7] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0);
-	ipi_status0_regs[8] = (void *)
+	ipi_status0_regs[8] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[9] = (void *)
+	ipi_status0_regs[9] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[10] = (void *)
+	ipi_status0_regs[10] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[11] = (void *)
+	ipi_status0_regs[11] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0);
-	ipi_status0_regs[12] = (void *)
+	ipi_status0_regs[12] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[13] = (void *)
+	ipi_status0_regs[13] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[14] = (void *)
+	ipi_status0_regs[14] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[15] = (void *)
+	ipi_status0_regs[15] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0);
 }
 
 static void ipi_en0_regs_init(void)
 {
-	ipi_en0_regs[0] = (void *)
+	ipi_en0_regs[0] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[1] = (void *)
+	ipi_en0_regs[1] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[2] = (void *)
+	ipi_en0_regs[2] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[3] = (void *)
+	ipi_en0_regs[3] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0);
-	ipi_en0_regs[4] = (void *)
+	ipi_en0_regs[4] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[5] = (void *)
+	ipi_en0_regs[5] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[6] = (void *)
+	ipi_en0_regs[6] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[7] = (void *)
+	ipi_en0_regs[7] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0);
-	ipi_en0_regs[8] = (void *)
+	ipi_en0_regs[8] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[9] = (void *)
+	ipi_en0_regs[9] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[10] = (void *)
+	ipi_en0_regs[10] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[11] = (void *)
+	ipi_en0_regs[11] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0);
-	ipi_en0_regs[12] = (void *)
+	ipi_en0_regs[12] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[13] = (void *)
+	ipi_en0_regs[13] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[14] = (void *)
+	ipi_en0_regs[14] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[15] = (void *)
+	ipi_en0_regs[15] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0);
 }
 
 static void ipi_mailbox_buf_init(void)
 {
-	ipi_mailbox_buf[0] = (void *)
+	ipi_mailbox_buf[0] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[1] = (void *)
+	ipi_mailbox_buf[1] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[2] = (void *)
+	ipi_mailbox_buf[2] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[3] = (void *)
+	ipi_mailbox_buf[3] = (void __iomem *)
 		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF);
-	ipi_mailbox_buf[4] = (void *)
+	ipi_mailbox_buf[4] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[5] = (void *)
+	ipi_mailbox_buf[5] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[6] = (void *)
+	ipi_mailbox_buf[6] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[7] = (void *)
+	ipi_mailbox_buf[7] = (void __iomem *)
 		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF);
-	ipi_mailbox_buf[8] = (void *)
+	ipi_mailbox_buf[8] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[9] = (void *)
+	ipi_mailbox_buf[9] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[10] = (void *)
+	ipi_mailbox_buf[10] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[11] = (void *)
+	ipi_mailbox_buf[11] = (void __iomem *)
 		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF);
-	ipi_mailbox_buf[12] = (void *)
+	ipi_mailbox_buf[12] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[13] = (void *)
+	ipi_mailbox_buf[13] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[14] = (void *)
+	ipi_mailbox_buf[14] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[15] = (void *)
+	ipi_mailbox_buf[15] = (void __iomem *)
 		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF);
 }
 
diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S
index 43181ac0a1af..42d0516ca18a 100644
--- a/arch/mips/mm/page-funcs.S
+++ b/arch/mips/mm/page-funcs.S
@@ -8,8 +8,8 @@
  * Copyright (C) 2012  MIPS Technologies, Inc.
  * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/export.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S
index 00fef578c8cd..2705d7dcb33e 100644
--- a/arch/mips/mm/tlb-funcs.S
+++ b/arch/mips/mm/tlb-funcs.S
@@ -11,8 +11,8 @@
  * Copyright (C) 2012  MIPS Technologies, Inc.
  * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/export.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define FASTPATH_SIZE	128
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 79e29bf42a24..80f7293166bb 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -13,9 +13,9 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/clk.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/of_pci.h>
+#include <linux/platform_device.h>
 
 #include <asm/addrspace.h>
 
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index e9dd01431f21..1cada09fa5db 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -13,9 +13,8 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
-#include <linux/of_pci.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-ralink/rt288x.h>
 
diff --git a/arch/mips/pic32/pic32mzda/config.c b/arch/mips/pic32/pic32mzda/config.c
index f69532007717..73be5689e0df 100644
--- a/arch/mips/pic32/pic32mzda/config.c
+++ b/arch/mips/pic32/pic32mzda/config.c
@@ -5,7 +5,7 @@
  */
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/spinlock.h>
 
 #include <asm/mach-pic32/pic32.h>
 
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
index f395ae218470..25341b2319d0 100644
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -5,8 +5,10 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-ralink/ralink_regs.h>
 
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index fa353bc13947..46aef0a1b22a 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -7,7 +7,7 @@
 
 #include <linux/io.h>
 #include <linux/bitops.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/irqdomain.h>
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 45d60c094496..7f90068c68f2 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -14,7 +14,7 @@
 #include <linux/of_fdt.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 
 #include <asm/reboot.h>
diff --git a/arch/mips/ralink/prom.c b/arch/mips/ralink/prom.c
index aaac1e6ec7d9..c3b96861844c 100644
--- a/arch/mips/ralink/prom.c
+++ b/arch/mips/ralink/prom.c
@@ -7,8 +7,6 @@
  */
 
 #include <linux/string.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 
 #include <asm/bootinfo.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index e98845543b77..5ae30b78d38d 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -51,6 +51,7 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus,
 	unsigned short vid;
 	int cap66 = -1;
 	u16 stat;
+	int ret;
 
 	/* It seems SLC90E66 needs some time after PCI reset... */
 	mdelay(80);
@@ -60,9 +61,9 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus,
 	for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) {
 		if (PCI_FUNC(pci_devfn))
 			continue;
-		if (early_read_config_word(hose, top_bus, current_bus,
-					   pci_devfn, PCI_VENDOR_ID, &vid) !=
-		    PCIBIOS_SUCCESSFUL)
+		ret = early_read_config_word(hose, top_bus, current_bus,
+					     pci_devfn, PCI_VENDOR_ID, &vid);
+		if (ret != PCIBIOS_SUCCESSFUL)
 			continue;
 		if (vid == 0xffff)
 			continue;
@@ -343,26 +344,28 @@ static void tc35815_fixup(struct pci_dev *dev)
 
 static void final_fixup(struct pci_dev *dev)
 {
+	unsigned long timeout;
 	unsigned char bist;
+	int ret;
 
 	/* Do build-in self test */
-	if (pci_read_config_byte(dev, PCI_BIST, &bist) == PCIBIOS_SUCCESSFUL &&
-	    (bist & PCI_BIST_CAPABLE)) {
-		unsigned long timeout;
-		pci_set_power_state(dev, PCI_D0);
-		pr_info("PCI: %s BIST...", pci_name(dev));
-		pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
-		timeout = jiffies + HZ * 2;	/* timeout after 2 sec */
-		do {
-			pci_read_config_byte(dev, PCI_BIST, &bist);
-			if (time_after(jiffies, timeout))
-				break;
-		} while (bist & PCI_BIST_START);
-		if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
-			pr_cont("failed. (0x%x)\n", bist);
-		else
-			pr_cont("OK.\n");
-	}
+	ret = pci_read_config_byte(dev, PCI_BIST, &bist);
+	if ((ret != PCIBIOS_SUCCESSFUL) || !(bist & PCI_BIST_CAPABLE))
+		return;
+
+	pci_set_power_state(dev, PCI_D0);
+	pr_info("PCI: %s BIST...", pci_name(dev));
+	pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
+	timeout = jiffies + HZ * 2;	/* timeout after 2 sec */
+	do {
+		pci_read_config_byte(dev, PCI_BIST, &bist);
+		if (time_after(jiffies, timeout))
+			break;
+	} while (bist & PCI_BIST_START);
+	if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
+		pr_cont("failed. (0x%x)\n", bist);
+	else
+		pr_cont("OK.\n");
 }
 
 #ifdef CONFIG_TOSHIBA_FPCIB0
diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S
index d90b65724d78..836465e3bcb8 100644
--- a/arch/mips/vdso/vdso.lds.S
+++ b/arch/mips/vdso/vdso.lds.S
@@ -94,7 +94,9 @@ VERSION
 #ifndef CONFIG_MIPS_DISABLE_VDSO
 	global:
 		__vdso_clock_gettime;
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
 		__vdso_gettimeofday;
+#endif
 		__vdso_clock_getres;
 #if _MIPS_SIM != _MIPS_SIM_ABI64
 		__vdso_clock_gettime64;
diff --git a/arch/openrisc/include/asm/bug.h b/arch/openrisc/include/asm/bug.h
new file mode 100644
index 000000000000..6d04776eaf10
--- /dev/null
+++ b/arch/openrisc/include/asm/bug.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_OPENRISC_BUG_H
+#define __ASM_OPENRISC_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+void __noreturn die(const char *str, struct pt_regs *regs, long err);
+
+#endif /* __ASM_OPENRISC_BUG_H */
diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h
index 52b0d7e76446..44fc1fd56717 100644
--- a/arch/openrisc/include/asm/page.h
+++ b/arch/openrisc/include/asm/page.h
@@ -72,8 +72,15 @@ typedef struct page *pgtable_t;
 #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
 #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
 
-#define virt_to_pfn(kaddr)      (__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)        __va((pfn) << PAGE_SHIFT)
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+	return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline void * pfn_to_virt(unsigned long pfn)
+{
+	return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT);
+}
 
 #define virt_to_page(addr) \
 	(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index ed9efb430afa..3b736e74e6ed 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -73,6 +73,7 @@ struct thread_struct {
 
 void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 unsigned long __get_wchan(struct task_struct *p);
+void show_registers(struct pt_regs *regs);
 
 #define cpu_relax()     barrier()
 
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index dfa558f98ed8..86e02929f3ac 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -14,6 +14,7 @@
  */
 
 #define __KERNEL_SYSCALLS__
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
@@ -38,6 +39,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/spr_defs.h>
+#include <asm/switch_to.h>
 
 #include <linux/smp.h>
 
@@ -119,8 +121,6 @@ void flush_thread(void)
 
 void show_regs(struct pt_regs *regs)
 {
-	extern void show_registers(struct pt_regs *regs);
-
 	show_regs_print_info(KERN_DEFAULT);
 	/* __PHX__ cleanup this mess */
 	show_registers(regs);
diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c
index 0b7d2ca6ba3b..1eeac3b62e9d 100644
--- a/arch/openrisc/kernel/ptrace.c
+++ b/arch/openrisc/kernel/ptrace.c
@@ -27,6 +27,10 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+
 /*
  * Copy the thread state to a regset that can be interpreted by userspace.
  *
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 2e7257a433ff..e2f21a5d8ad9 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -34,6 +34,11 @@ struct rt_sigframe {
 	unsigned char retcode[16];	/* trampoline code */
 };
 
+asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs);
+
+asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+			       int syscall);
+
 static int restore_sigcontext(struct pt_regs *regs,
 			      struct sigcontext __user *sc)
 {
@@ -224,7 +229,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
  * mode below.
  */
 
-int do_signal(struct pt_regs *regs, int syscall)
+static int do_signal(struct pt_regs *regs, int syscall)
 {
 	struct ksignal ksig;
 	unsigned long continue_addr = 0;
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
index 0a7a059e2dff..1c5a2d71d675 100644
--- a/arch/openrisc/kernel/smp.c
+++ b/arch/openrisc/kernel/smp.c
@@ -23,6 +23,8 @@
 #include <asm/cacheflush.h>
 #include <asm/time.h>
 
+asmlinkage __init void secondary_start_kernel(void);
+
 static void (*smp_cross_call)(const struct cpumask *, unsigned int);
 
 unsigned long secondary_release = -1;
diff --git a/arch/openrisc/kernel/time.c b/arch/openrisc/kernel/time.c
index 8e26c1af5441..764c7bfb5df3 100644
--- a/arch/openrisc/kernel/time.c
+++ b/arch/openrisc/kernel/time.c
@@ -25,6 +25,8 @@
 #include <asm/cpuinfo.h>
 #include <asm/time.h>
 
+irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs);
+
 /* Test the timer ticks to count, used in sync routine */
 inline void openrisc_timer_set(unsigned long count)
 {
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 0aa6b07efda1..9370888c9a7e 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -30,14 +30,23 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 
+#include <asm/bug.h>
 #include <asm/io.h>
+#include <asm/processor.h>
 #include <asm/unwinder.h>
 #include <asm/sections.h>
 
-static int kstack_depth_to_print = 0x180;
 int lwa_flag;
 static unsigned long __user *lwa_addr;
 
+asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector);
+asmlinkage void do_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_illegal_instruction(struct pt_regs *regs,
+				       unsigned long address);
+
 static void print_trace(void *data, unsigned long addr, int reliable)
 {
 	const char *loglvl = data;
@@ -143,80 +152,6 @@ bad:
 	printk("\n");
 }
 
-void nommu_dump_state(struct pt_regs *regs,
-		      unsigned long ea, unsigned long vector)
-{
-	int i;
-	unsigned long addr, stack = regs->sp;
-
-	printk("\n\r[nommu_dump_state] :: ea %lx, vector %lx\n\r", ea, vector);
-
-	printk("CPU #: %d\n"
-	       "   PC: %08lx    SR: %08lx    SP: %08lx\n",
-	       0, regs->pc, regs->sr, regs->sp);
-	printk("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n",
-	       0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]);
-	printk("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n",
-	       regs->gpr[4], regs->gpr[5], regs->gpr[6], regs->gpr[7]);
-	printk("GPR08: %08lx GPR09: %08lx GPR10: %08lx GPR11: %08lx\n",
-	       regs->gpr[8], regs->gpr[9], regs->gpr[10], regs->gpr[11]);
-	printk("GPR12: %08lx GPR13: %08lx GPR14: %08lx GPR15: %08lx\n",
-	       regs->gpr[12], regs->gpr[13], regs->gpr[14], regs->gpr[15]);
-	printk("GPR16: %08lx GPR17: %08lx GPR18: %08lx GPR19: %08lx\n",
-	       regs->gpr[16], regs->gpr[17], regs->gpr[18], regs->gpr[19]);
-	printk("GPR20: %08lx GPR21: %08lx GPR22: %08lx GPR23: %08lx\n",
-	       regs->gpr[20], regs->gpr[21], regs->gpr[22], regs->gpr[23]);
-	printk("GPR24: %08lx GPR25: %08lx GPR26: %08lx GPR27: %08lx\n",
-	       regs->gpr[24], regs->gpr[25], regs->gpr[26], regs->gpr[27]);
-	printk("GPR28: %08lx GPR29: %08lx GPR30: %08lx GPR31: %08lx\n",
-	       regs->gpr[28], regs->gpr[29], regs->gpr[30], regs->gpr[31]);
-	printk("  RES: %08lx oGPR11: %08lx\n",
-	       regs->gpr[11], regs->orig_gpr11);
-
-	printk("Process %s (pid: %d, stackpage=%08lx)\n",
-	       ((struct task_struct *)(__pa(current)))->comm,
-	       ((struct task_struct *)(__pa(current)))->pid,
-	       (unsigned long)current);
-
-	printk("\nStack: ");
-	printk("Stack dump [0x%08lx]:\n", (unsigned long)stack);
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (((long)stack & (THREAD_SIZE - 1)) == 0)
-			break;
-		stack++;
-
-		printk("%lx :: sp + %02d: 0x%08lx\n", stack, i * 4,
-		       *((unsigned long *)(__pa(stack))));
-	}
-	printk("\n");
-
-	printk("Call Trace:   ");
-	i = 1;
-	while (((long)stack & (THREAD_SIZE - 1)) != 0) {
-		addr = *((unsigned long *)__pa(stack));
-		stack++;
-
-		if (kernel_text_address(addr)) {
-			if (i && ((i % 6) == 0))
-				printk("\n ");
-			printk(" [<%08lx>]", addr);
-			i++;
-		}
-	}
-	printk("\n");
-
-	printk("\nCode: ");
-
-	for (i = -24; i < 24; i++) {
-		unsigned long word;
-
-		word = ((unsigned long *)(__pa(regs->pc)))[i];
-
-		print_data(regs->pc, word, i);
-	}
-	printk("\n");
-}
-
 /* This is normally the 'Oops' routine */
 void __noreturn die(const char *str, struct pt_regs *regs, long err)
 {
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index a9dcd4381d1a..29e232d78d82 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -18,6 +18,7 @@
 #include <linux/perf_event.h>
 
 #include <linux/uaccess.h>
+#include <asm/bug.h>
 #include <asm/mmu_context.h>
 #include <asm/siginfo.h>
 #include <asm/signal.h>
@@ -30,7 +31,8 @@
  */
 volatile pgd_t *current_pgd[NR_CPUS];
 
-extern void __noreturn die(char *, struct pt_regs *, long);
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
+			      unsigned long vector, int write_acc);
 
 /*
  * This routine handles page faults.  It determines the address,
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index d531ab82be12..1dcd78c8f0e9 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -123,8 +123,6 @@ static void __init map_ram(void)
 
 void __init paging_init(void)
 {
-	extern void tlb_init(void);
-
 	int i;
 
 	printk(KERN_INFO "Setting up paging and PTEs.\n");
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 91c8259d4b7e..f59ea4c10b0f 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -22,7 +22,7 @@
 
 extern int mem_init_done;
 
-/**
+/*
  * OK, this one's a bit tricky... ioremap can get called before memory is
  * initialized (early serial console does this) and will want to alloc a page
  * for its mapping.  No userspace pages will ever get allocated before memory
diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c
index e2f2a3c3bb22..3115f2e4f864 100644
--- a/arch/openrisc/mm/tlb.c
+++ b/arch/openrisc/mm/tlb.c
@@ -182,12 +182,3 @@ void destroy_context(struct mm_struct *mm)
 	flush_tlb_mm(mm);
 
 }
-
-/* called once during VM initialization, from init.c */
-
-void __init tlb_init(void)
-{
-	/* Do nothing... */
-	/* invalidate the entire TLB */
-	/* flush_tlb_all(); */
-}
diff --git a/arch/parisc/include/asm/ide.h b/arch/parisc/include/asm/ide.h
deleted file mode 100644
index 7aa75b93a1b6..000000000000
--- a/arch/parisc/include/asm/ide.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/include/asm-parisc/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the PARISC architecture specific IDE code.
- */
-
-#ifndef __ASM_PARISC_IDE_H
-#define __ASM_PARISC_IDE_H
-
-/* Generic I/O and MEMIO string operations.  */
-
-#define __ide_insw	insw
-#define __ide_insl	insl
-#define __ide_outsw	outsw
-#define __ide_outsl	outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		*(u16 *)addr = __raw_readw(port);
-		addr += 2;
-	}
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		*(u32 *)addr = __raw_readl(port);
-		addr += 4;
-	}
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		__raw_writew(*(u16 *)addr, port);
-		addr += 2;
-	}
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		__raw_writel(*(u32 *)addr, port);
-		addr += 4;
-	}
-}
-
-#endif /* __ASM_PARISC_IDE_H */
diff --git a/arch/powerpc/configs/disable-werror.config b/arch/powerpc/configs/disable-werror.config
index 6ea12a12432c..7776b91da37f 100644
--- a/arch/powerpc/configs/disable-werror.config
+++ b/arch/powerpc/configs/disable-werror.config
@@ -1 +1,2 @@
+# Help: Disable -Werror
 CONFIG_PPC_DISABLE_WERROR=y
diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config
index 1c91a35c6a73..0d54e29e2cdf 100644
--- a/arch/powerpc/configs/security.config
+++ b/arch/powerpc/configs/security.config
@@ -1,3 +1,5 @@
+# Help: Common security options for PowerPC builds
+
 # This is the equivalent of booting with lockdown=integrity
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
@@ -12,4 +14,4 @@ CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 
 # UBSAN bounds checking is very cheap and good for hardening
 CONFIG_UBSAN=y
-# CONFIG_UBSAN_MISC is not set
-\ No newline at end of file
+# CONFIG_UBSAN_MISC is not set
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 803da4a8a9a2..6fc2248ca561 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -113,7 +113,7 @@ config CRYPTO_AES_GCM_P10
 
 config CRYPTO_CHACHA20_P10
 	tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)"
-	depends on PPC64 && CPU_LITTLE_ENDIAN
+	depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_CHACHA_GENERIC
 	select CRYPTO_ARCH_HAVE_LIB_CHACHA
@@ -127,7 +127,7 @@ config CRYPTO_CHACHA20_P10
 
 config CRYPTO_POLY1305_P10
 	tristate "Hash functions: Poly1305 (P10 or later)"
-	depends on PPC64 && CPU_LITTLE_ENDIAN
+	depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
 	select CRYPTO_HASH
 	select CRYPTO_LIB_POLY1305_GENERIC
 	help
diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h
deleted file mode 100644
index d530f68b4eef..000000000000
--- a/arch/powerpc/include/asm/fs_pd.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Platform information definitions.
- *
- * 2006 (c) MontaVista Software, Inc.
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef FS_PD_H
-#define FS_PD_H
-#include <sysdev/fsl_soc.h>
-#include <asm/time.h>
-
-static inline int uart_baudrate(void)
-{
-        return get_baudrate();
-}
-
-static inline int uart_clock(void)
-{
-        return ppc_proc_freq;
-}
-
-#endif
diff --git a/arch/powerpc/include/asm/ide.h b/arch/powerpc/include/asm/ide.h
deleted file mode 100644
index ce87a4441ca3..000000000000
--- a/arch/powerpc/include/asm/ide.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1994-1996 Linus Torvalds & authors
- *
- *  This file contains the powerpc architecture specific IDE code.
- */
-#ifndef _ASM_POWERPC_IDE_H
-#define _ASM_POWERPC_IDE_H
-
-#include <linux/compiler.h>
-#include <asm/io.h>
-
-#define __ide_mm_insw(p, a, c)	readsw((void __iomem *)(p), (a), (c))
-#define __ide_mm_insl(p, a, c)	readsl((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsw(p, a, c)	writesw((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsl(p, a, c)	writesl((void __iomem *)(p), (a), (c))
-
-#endif /* _ASM_POWERPC_IDE_H */
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index 76c7cd78c17e..2d899be746eb 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -21,7 +21,6 @@
 #include <linux/device.h>
 #include <linux/delay.h>
 
-#include <linux/fs_uart_pd.h>
 #include <linux/fsl_devices.h>
 #include <linux/mii.h>
 #include <linux/of_address.h>
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 1670dfd30809..d97a7910c594 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -24,7 +24,6 @@
 #include <linux/device.h>
 #include <linux/delay.h>
 
-#include <linux/fs_uart_pd.h>
 #include <linux/fsl_devices.h>
 #include <linux/mii.h>
 #include <linux/of_fdt.h>
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 528506f6e2b8..3949ceb79e64 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -22,7 +22,6 @@
 #include <linux/phy.h>
 #include <linux/spi/spi.h>
 #include <linux/fsl_devices.h>
-#include <linux/fs_uart_pd.h>
 #include <linux/reboot.h>
 
 #include <linux/atomic.h>
@@ -35,7 +34,6 @@
 #include <asm/cpm2.h>
 #include <asm/fsl_hcalls.h>	/* For the Freescale hypervisor */
 
-extern void init_smc_ioports(struct fs_uart_platform_info*);
 static phys_addr_t immrbase = -1;
 
 phys_addr_t get_immrbase(void)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 5888fcd8e408..b3b94cd37713 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3988,7 +3988,7 @@ static void xmon_init(int enable)
 }
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_xmon(int key)
+static void sysrq_handle_xmon(u8 key)
 {
 	if (xmon_is_locked_down()) {
 		clear_all_bpt();
diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config
index f6af0f708df4..16ee163847b4 100644
--- a/arch/riscv/configs/32-bit.config
+++ b/arch/riscv/configs/32-bit.config
@@ -1,3 +1,4 @@
+# Help: Build a 32-bit image
 CONFIG_ARCH_RV32I=y
 CONFIG_32BIT=y
 # CONFIG_PORTABLE is not set
diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config
index 313edc554d84..d872a2d533f2 100644
--- a/arch/riscv/configs/64-bit.config
+++ b/arch/riscv/configs/64-bit.config
@@ -1,2 +1,3 @@
+# Help: Build a 64-bit image
 CONFIG_ARCH_RV64I=y
 CONFIG_64BIT=y
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 7bac43a3176e..777cb8299551 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -54,6 +54,7 @@
 #ifndef CONFIG_64BIT
 #define SATP_PPN	_AC(0x003FFFFF, UL)
 #define SATP_MODE_32	_AC(0x80000000, UL)
+#define SATP_MODE_SHIFT	31
 #define SATP_ASID_BITS	9
 #define SATP_ASID_SHIFT	22
 #define SATP_ASID_MASK	_AC(0x1FF, UL)
@@ -62,6 +63,7 @@
 #define SATP_MODE_39	_AC(0x8000000000000000, UL)
 #define SATP_MODE_48	_AC(0x9000000000000000, UL)
 #define SATP_MODE_57	_AC(0xa000000000000000, UL)
+#define SATP_MODE_SHIFT	60
 #define SATP_ASID_BITS	16
 #define SATP_ASID_SHIFT	44
 #define SATP_ASID_MASK	_AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 2d8ee53b66c7..1ebf20dfbaa6 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -337,6 +337,15 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
 
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu);
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+				    u64 __user *uindices);
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+			   const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+			   const struct kvm_one_reg *reg);
+
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
diff --git a/arch/riscv/include/asm/kvm_vcpu_vector.h b/arch/riscv/include/asm/kvm_vcpu_vector.h
index ff994fdd6d0d..27f5bccdd8b0 100644
--- a/arch/riscv/include/asm/kvm_vcpu_vector.h
+++ b/arch/riscv/include/asm/kvm_vcpu_vector.h
@@ -74,9 +74,7 @@ static inline void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
 #endif
 
 int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
-				  const struct kvm_one_reg *reg,
-				  unsigned long rtype);
+				  const struct kvm_one_reg *reg);
 int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
-				  const struct kvm_one_reg *reg,
-				  unsigned long rtype);
+				  const struct kvm_one_reg *reg);
 #endif
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 930fdc4101cd..992c5e407104 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -55,6 +55,7 @@ struct kvm_riscv_config {
 	unsigned long marchid;
 	unsigned long mimpid;
 	unsigned long zicboz_block_size;
+	unsigned long satp_mode;
 };
 
 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID {
 	KVM_RISCV_ISA_EXT_SSAIA,
 	KVM_RISCV_ISA_EXT_V,
 	KVM_RISCV_ISA_EXT_SVNAPOT,
+	KVM_RISCV_ISA_EXT_ZBA,
+	KVM_RISCV_ISA_EXT_ZBS,
+	KVM_RISCV_ISA_EXT_ZICNTR,
+	KVM_RISCV_ISA_EXT_ZICSR,
+	KVM_RISCV_ISA_EXT_ZIFENCEI,
+	KVM_RISCV_ISA_EXT_ZIHPM,
 	KVM_RISCV_ISA_EXT_MAX,
 };
 
@@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID {
 
 /* ISA Extension registers are mapped as type 7 */
 #define KVM_REG_RISCV_ISA_EXT		(0x07 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_SINGLE	(0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_EN	(0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_DIS	(0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id)	\
+		((__ext_id) / __BITS_PER_LONG)
+#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id)	\
+		(1UL << ((__ext_id) % __BITS_PER_LONG))
+#define KVM_REG_RISCV_ISA_MULTI_REG_LAST	\
+		KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1)
 
 /* SBI extension registers are mapped as type 8 */
 #define KVM_REG_RISCV_SBI_EXT		(0x08 << KVM_REG_RISCV_TYPE_SHIFT)
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index fee0671e2dc1..4c2067fc59fc 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -19,6 +19,7 @@ kvm-y += vcpu_exit.o
 kvm-y += vcpu_fp.o
 kvm-y += vcpu_vector.o
 kvm-y += vcpu_insn.o
+kvm-y += vcpu_onereg.o
 kvm-y += vcpu_switch.o
 kvm-y += vcpu_sbi.o
 kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 585a3b42c52c..74bb27440527 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -176,7 +176,7 @@ int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu,
 	struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
 
 	if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
-		return -EINVAL;
+		return -ENOENT;
 
 	*out_val = 0;
 	if (kvm_riscv_aia_available())
@@ -192,7 +192,7 @@ int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu,
 	struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
 
 	if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
-		return -EINVAL;
+		return -ENOENT;
 
 	if (kvm_riscv_aia_available()) {
 		((unsigned long *)csr)[reg_num] = val;
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index f2eb47925806..068c74593871 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-					const struct kvm_memory_slot *memslot)
-{
-	kvm_flush_remote_tlbs(kvm);
-}
-
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
 {
 }
@@ -559,7 +553,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	int ret;
-	kvm_pfn_t pfn = pte_pfn(range->pte);
+	kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
 	if (!kvm->arch.pgd)
 		return false;
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index d12ef99901fc..82229db1ce73 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -13,16 +13,12 @@
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
-#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
 #include <asm/cacheflush.h>
-#include <asm/hwcap.h>
-#include <asm/sbi.h>
-#include <asm/vector.h>
 #include <asm/kvm_vcpu_vector.h>
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
@@ -46,79 +42,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
-#define KVM_RISCV_BASE_ISA_MASK		GENMASK(25, 0)
-
-#define KVM_ISA_EXT_ARR(ext)		[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
-
-/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
-static const unsigned long kvm_isa_ext_arr[] = {
-	[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
-	[KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
-	[KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
-	[KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
-	[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
-	[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
-	[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
-	[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
-
-	KVM_ISA_EXT_ARR(SSAIA),
-	KVM_ISA_EXT_ARR(SSTC),
-	KVM_ISA_EXT_ARR(SVINVAL),
-	KVM_ISA_EXT_ARR(SVNAPOT),
-	KVM_ISA_EXT_ARR(SVPBMT),
-	KVM_ISA_EXT_ARR(ZBB),
-	KVM_ISA_EXT_ARR(ZIHINTPAUSE),
-	KVM_ISA_EXT_ARR(ZICBOM),
-	KVM_ISA_EXT_ARR(ZICBOZ),
-};
-
-static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
-{
-	unsigned long i;
-
-	for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
-		if (kvm_isa_ext_arr[i] == base_ext)
-			return i;
-	}
-
-	return KVM_RISCV_ISA_EXT_MAX;
-}
-
-static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
-{
-	switch (ext) {
-	case KVM_RISCV_ISA_EXT_H:
-		return false;
-	case KVM_RISCV_ISA_EXT_V:
-		return riscv_v_vstate_ctrl_user_allowed();
-	default:
-		break;
-	}
-
-	return true;
-}
-
-static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
-{
-	switch (ext) {
-	case KVM_RISCV_ISA_EXT_A:
-	case KVM_RISCV_ISA_EXT_C:
-	case KVM_RISCV_ISA_EXT_I:
-	case KVM_RISCV_ISA_EXT_M:
-	case KVM_RISCV_ISA_EXT_SSAIA:
-	case KVM_RISCV_ISA_EXT_SSTC:
-	case KVM_RISCV_ISA_EXT_SVINVAL:
-	case KVM_RISCV_ISA_EXT_SVNAPOT:
-	case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
-	case KVM_RISCV_ISA_EXT_ZBB:
-		return false;
-	default:
-		break;
-	}
-
-	return true;
-}
-
 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
@@ -176,7 +99,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	int rc;
 	struct kvm_cpu_context *cntx;
 	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
-	unsigned long host_isa, i;
 
 	/* Mark this VCPU never ran */
 	vcpu->arch.ran_atleast_once = false;
@@ -184,12 +106,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
 
 	/* Setup ISA features available to VCPU */
-	for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
-		host_isa = kvm_isa_ext_arr[i];
-		if (__riscv_isa_extension_available(NULL, host_isa) &&
-		    kvm_riscv_vcpu_isa_enable_allowed(i))
-			set_bit(host_isa, vcpu->arch.isa);
-	}
+	kvm_riscv_vcpu_setup_isa(vcpu);
 
 	/* Setup vendor, arch, and implementation details */
 	vcpu->arch.mvendorid = sbi_get_mvendorid();
@@ -294,450 +211,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
-					 const struct kvm_one_reg *reg)
-{
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_CONFIG);
-	unsigned long reg_val;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-
-	switch (reg_num) {
-	case KVM_REG_RISCV_CONFIG_REG(isa):
-		reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
-		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
-			return -EINVAL;
-		reg_val = riscv_cbom_block_size;
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
-		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
-			return -EINVAL;
-		reg_val = riscv_cboz_block_size;
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(mvendorid):
-		reg_val = vcpu->arch.mvendorid;
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(marchid):
-		reg_val = vcpu->arch.marchid;
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(mimpid):
-		reg_val = vcpu->arch.mimpid;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
-					 const struct kvm_one_reg *reg)
-{
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_CONFIG);
-	unsigned long i, isa_ext, reg_val;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-
-	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	switch (reg_num) {
-	case KVM_REG_RISCV_CONFIG_REG(isa):
-		/*
-		 * This ONE REG interface is only defined for
-		 * single letter extensions.
-		 */
-		if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
-			return -EINVAL;
-
-		if (!vcpu->arch.ran_atleast_once) {
-			/* Ignore the enable/disable request for certain extensions */
-			for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
-				isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
-				if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
-					reg_val &= ~BIT(i);
-					continue;
-				}
-				if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
-					if (reg_val & BIT(i))
-						reg_val &= ~BIT(i);
-				if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
-					if (!(reg_val & BIT(i)))
-						reg_val |= BIT(i);
-			}
-			reg_val &= riscv_isa_extension_base(NULL);
-			/* Do not modify anything beyond single letter extensions */
-			reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
-				  (reg_val & KVM_RISCV_BASE_ISA_MASK);
-			vcpu->arch.isa[0] = reg_val;
-			kvm_riscv_vcpu_fp_reset(vcpu);
-		} else {
-			return -EOPNOTSUPP;
-		}
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
-		return -EOPNOTSUPP;
-	case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
-		return -EOPNOTSUPP;
-	case KVM_REG_RISCV_CONFIG_REG(mvendorid):
-		if (!vcpu->arch.ran_atleast_once)
-			vcpu->arch.mvendorid = reg_val;
-		else
-			return -EBUSY;
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(marchid):
-		if (!vcpu->arch.ran_atleast_once)
-			vcpu->arch.marchid = reg_val;
-		else
-			return -EBUSY;
-		break;
-	case KVM_REG_RISCV_CONFIG_REG(mimpid):
-		if (!vcpu->arch.ran_atleast_once)
-			vcpu->arch.mimpid = reg_val;
-		else
-			return -EBUSY;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
-				       const struct kvm_one_reg *reg)
-{
-	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_CORE);
-	unsigned long reg_val;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
-		return -EINVAL;
-
-	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
-		reg_val = cntx->sepc;
-	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
-		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
-		reg_val = ((unsigned long *)cntx)[reg_num];
-	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
-		reg_val = (cntx->sstatus & SR_SPP) ?
-				KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
-	else
-		return -EINVAL;
-
-	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
-				       const struct kvm_one_reg *reg)
-{
-	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_CORE);
-	unsigned long reg_val;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
-		return -EINVAL;
-
-	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
-		cntx->sepc = reg_val;
-	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
-		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
-		((unsigned long *)cntx)[reg_num] = reg_val;
-	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
-		if (reg_val == KVM_RISCV_MODE_S)
-			cntx->sstatus |= SR_SPP;
-		else
-			cntx->sstatus &= ~SR_SPP;
-	} else
-		return -EINVAL;
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
-					  unsigned long reg_num,
-					  unsigned long *out_val)
-{
-	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
-	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
-		return -EINVAL;
-
-	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
-		kvm_riscv_vcpu_flush_interrupts(vcpu);
-		*out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
-		*out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
-	} else
-		*out_val = ((unsigned long *)csr)[reg_num];
-
-	return 0;
-}
-
-static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
-						 unsigned long reg_num,
-						 unsigned long reg_val)
-{
-	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
-	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
-		return -EINVAL;
-
-	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
-		reg_val &= VSIP_VALID_MASK;
-		reg_val <<= VSIP_TO_HVIP_SHIFT;
-	}
-
-	((unsigned long *)csr)[reg_num] = reg_val;
-
-	if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
-		WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
-				      const struct kvm_one_reg *reg)
-{
-	int rc;
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_CSR);
-	unsigned long reg_val, reg_subtype;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-
-	reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
-	reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
-	switch (reg_subtype) {
-	case KVM_REG_RISCV_CSR_GENERAL:
-		rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val);
-		break;
-	case KVM_REG_RISCV_CSR_AIA:
-		rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val);
-		break;
-	default:
-		rc = -EINVAL;
-		break;
-	}
-	if (rc)
-		return rc;
-
-	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
-				      const struct kvm_one_reg *reg)
-{
-	int rc;
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_CSR);
-	unsigned long reg_val, reg_subtype;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-
-	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
-	reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
-	switch (reg_subtype) {
-	case KVM_REG_RISCV_CSR_GENERAL:
-		rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
-		break;
-	case KVM_REG_RISCV_CSR_AIA:
-		rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
-		break;
-	default:
-		rc = -EINVAL;
-		break;
-	}
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
-					  const struct kvm_one_reg *reg)
-{
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_ISA_EXT);
-	unsigned long reg_val = 0;
-	unsigned long host_isa_ext;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-
-	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
-	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
-		return -EINVAL;
-
-	host_isa_ext = kvm_isa_ext_arr[reg_num];
-	if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
-		reg_val = 1; /* Mark the given extension as available */
-
-	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
-					  const struct kvm_one_reg *reg)
-{
-	unsigned long __user *uaddr =
-			(unsigned long __user *)(unsigned long)reg->addr;
-	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-					    KVM_REG_SIZE_MASK |
-					    KVM_REG_RISCV_ISA_EXT);
-	unsigned long reg_val;
-	unsigned long host_isa_ext;
-
-	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-		return -EINVAL;
-
-	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
-	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
-		return -EINVAL;
-
-	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-		return -EFAULT;
-
-	host_isa_ext = kvm_isa_ext_arr[reg_num];
-	if (!__riscv_isa_extension_available(NULL, host_isa_ext))
-		return	-EOPNOTSUPP;
-
-	if (!vcpu->arch.ran_atleast_once) {
-		/*
-		 * All multi-letter extension and a few single letter
-		 * extension can be disabled
-		 */
-		if (reg_val == 1 &&
-		    kvm_riscv_vcpu_isa_enable_allowed(reg_num))
-			set_bit(host_isa_ext, vcpu->arch.isa);
-		else if (!reg_val &&
-			 kvm_riscv_vcpu_isa_disable_allowed(reg_num))
-			clear_bit(host_isa_ext, vcpu->arch.isa);
-		else
-			return -EINVAL;
-		kvm_riscv_vcpu_fp_reset(vcpu);
-	} else {
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
-				  const struct kvm_one_reg *reg)
-{
-	switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
-	case KVM_REG_RISCV_CONFIG:
-		return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
-	case KVM_REG_RISCV_CORE:
-		return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
-	case KVM_REG_RISCV_CSR:
-		return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
-	case KVM_REG_RISCV_TIMER:
-		return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
-	case KVM_REG_RISCV_FP_F:
-		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
-						 KVM_REG_RISCV_FP_F);
-	case KVM_REG_RISCV_FP_D:
-		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
-						 KVM_REG_RISCV_FP_D);
-	case KVM_REG_RISCV_ISA_EXT:
-		return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
-	case KVM_REG_RISCV_SBI_EXT:
-		return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
-	case KVM_REG_RISCV_VECTOR:
-		return kvm_riscv_vcpu_set_reg_vector(vcpu, reg,
-						 KVM_REG_RISCV_VECTOR);
-	default:
-		break;
-	}
-
-	return -EINVAL;
-}
-
-static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
-				  const struct kvm_one_reg *reg)
-{
-	switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
-	case KVM_REG_RISCV_CONFIG:
-		return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
-	case KVM_REG_RISCV_CORE:
-		return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
-	case KVM_REG_RISCV_CSR:
-		return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
-	case KVM_REG_RISCV_TIMER:
-		return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
-	case KVM_REG_RISCV_FP_F:
-		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
-						 KVM_REG_RISCV_FP_F);
-	case KVM_REG_RISCV_FP_D:
-		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
-						 KVM_REG_RISCV_FP_D);
-	case KVM_REG_RISCV_ISA_EXT:
-		return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
-	case KVM_REG_RISCV_SBI_EXT:
-		return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
-	case KVM_REG_RISCV_VECTOR:
-		return kvm_riscv_vcpu_get_reg_vector(vcpu, reg,
-						 KVM_REG_RISCV_VECTOR);
-	default:
-		break;
-	}
-
-	return -EINVAL;
-}
-
 long kvm_arch_vcpu_async_ioctl(struct file *filp,
 			       unsigned int ioctl, unsigned long arg)
 {
@@ -781,6 +254,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
 		break;
 	}
+	case KVM_GET_REG_LIST: {
+		struct kvm_reg_list __user *user_list = argp;
+		struct kvm_reg_list reg_list;
+		unsigned int n;
+
+		r = -EFAULT;
+		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
+			break;
+		n = reg_list.n;
+		reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
+		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
+			break;
+		r = -E2BIG;
+		if (n < reg_list.n)
+			break;
+		r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
+		break;
+	}
 	default:
 		break;
 	}
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
index 9d8cbc42057a..08ba48a395aa 100644
--- a/arch/riscv/kvm/vcpu_fp.c
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -96,7 +96,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
 			  reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
 			reg_val = &cntx->fp.f.f[reg_num];
 		else
-			return -EINVAL;
+			return -ENOENT;
 	} else if ((rtype == KVM_REG_RISCV_FP_D) &&
 		   riscv_isa_extension_available(vcpu->arch.isa, d)) {
 		if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
@@ -109,9 +109,9 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
 				return -EINVAL;
 			reg_val = &cntx->fp.d.f[reg_num];
 		} else
-			return -EINVAL;
+			return -ENOENT;
 	} else
-		return -EINVAL;
+		return -ENOENT;
 
 	if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
 		return -EFAULT;
@@ -141,7 +141,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
 			  reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
 			reg_val = &cntx->fp.f.f[reg_num];
 		else
-			return -EINVAL;
+			return -ENOENT;
 	} else if ((rtype == KVM_REG_RISCV_FP_D) &&
 		   riscv_isa_extension_available(vcpu->arch.isa, d)) {
 		if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
@@ -154,9 +154,9 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
 				return -EINVAL;
 			reg_val = &cntx->fp.d.f[reg_num];
 		} else
-			return -EINVAL;
+			return -ENOENT;
 	} else
-		return -EINVAL;
+		return -ENOENT;
 
 	if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
 		return -EFAULT;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
new file mode 100644
index 000000000000..1b7e9fa265cb
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -0,0 +1,1051 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2023 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ *	Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/hwcap.h>
+#include <asm/kvm_vcpu_vector.h>
+#include <asm/vector.h>
+
+#define KVM_RISCV_BASE_ISA_MASK		GENMASK(25, 0)
+
+#define KVM_ISA_EXT_ARR(ext)		\
+[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
+
+/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+static const unsigned long kvm_isa_ext_arr[] = {
+	/* Single letter extensions (alphabetically sorted) */
+	[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
+	[KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
+	[KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
+	[KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
+	[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
+	[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
+	[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+	[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
+	/* Multi letter extensions (alphabetically sorted) */
+	KVM_ISA_EXT_ARR(SSAIA),
+	KVM_ISA_EXT_ARR(SSTC),
+	KVM_ISA_EXT_ARR(SVINVAL),
+	KVM_ISA_EXT_ARR(SVNAPOT),
+	KVM_ISA_EXT_ARR(SVPBMT),
+	KVM_ISA_EXT_ARR(ZBA),
+	KVM_ISA_EXT_ARR(ZBB),
+	KVM_ISA_EXT_ARR(ZBS),
+	KVM_ISA_EXT_ARR(ZICBOM),
+	KVM_ISA_EXT_ARR(ZICBOZ),
+	KVM_ISA_EXT_ARR(ZICNTR),
+	KVM_ISA_EXT_ARR(ZICSR),
+	KVM_ISA_EXT_ARR(ZIFENCEI),
+	KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+	KVM_ISA_EXT_ARR(ZIHPM),
+};
+
+static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
+{
+	unsigned long i;
+
+	for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+		if (kvm_isa_ext_arr[i] == base_ext)
+			return i;
+	}
+
+	return KVM_RISCV_ISA_EXT_MAX;
+}
+
+static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
+{
+	switch (ext) {
+	case KVM_RISCV_ISA_EXT_H:
+		return false;
+	case KVM_RISCV_ISA_EXT_V:
+		return riscv_v_vstate_ctrl_user_allowed();
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
+{
+	switch (ext) {
+	case KVM_RISCV_ISA_EXT_A:
+	case KVM_RISCV_ISA_EXT_C:
+	case KVM_RISCV_ISA_EXT_I:
+	case KVM_RISCV_ISA_EXT_M:
+	case KVM_RISCV_ISA_EXT_SSAIA:
+	case KVM_RISCV_ISA_EXT_SSTC:
+	case KVM_RISCV_ISA_EXT_SVINVAL:
+	case KVM_RISCV_ISA_EXT_SVNAPOT:
+	case KVM_RISCV_ISA_EXT_ZBA:
+	case KVM_RISCV_ISA_EXT_ZBB:
+	case KVM_RISCV_ISA_EXT_ZBS:
+	case KVM_RISCV_ISA_EXT_ZICNTR:
+	case KVM_RISCV_ISA_EXT_ZICSR:
+	case KVM_RISCV_ISA_EXT_ZIFENCEI:
+	case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+	case KVM_RISCV_ISA_EXT_ZIHPM:
+		return false;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
+{
+	unsigned long host_isa, i;
+
+	for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
+		host_isa = kvm_isa_ext_arr[i];
+		if (__riscv_isa_extension_available(NULL, host_isa) &&
+		    kvm_riscv_vcpu_isa_enable_allowed(i))
+			set_bit(host_isa, vcpu->arch.isa);
+	}
+}
+
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+					 const struct kvm_one_reg *reg)
+{
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_CONFIG);
+	unsigned long reg_val;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+
+	switch (reg_num) {
+	case KVM_REG_RISCV_CONFIG_REG(isa):
+		reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+			return -ENOENT;
+		reg_val = riscv_cbom_block_size;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+			return -ENOENT;
+		reg_val = riscv_cboz_block_size;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+		reg_val = vcpu->arch.mvendorid;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(marchid):
+		reg_val = vcpu->arch.marchid;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(mimpid):
+		reg_val = vcpu->arch.mimpid;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+		reg_val = satp_mode >> SATP_MODE_SHIFT;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
+					 const struct kvm_one_reg *reg)
+{
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_CONFIG);
+	unsigned long i, isa_ext, reg_val;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+
+	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	switch (reg_num) {
+	case KVM_REG_RISCV_CONFIG_REG(isa):
+		/*
+		 * This ONE REG interface is only defined for
+		 * single letter extensions.
+		 */
+		if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
+			return -EINVAL;
+
+		/*
+		 * Return early (i.e. do nothing) if reg_val is the same
+		 * value retrievable via kvm_riscv_vcpu_get_reg_config().
+		 */
+		if (reg_val == (vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK))
+			break;
+
+		if (!vcpu->arch.ran_atleast_once) {
+			/* Ignore the enable/disable request for certain extensions */
+			for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
+				isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
+				if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
+					reg_val &= ~BIT(i);
+					continue;
+				}
+				if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
+					if (reg_val & BIT(i))
+						reg_val &= ~BIT(i);
+				if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
+					if (!(reg_val & BIT(i)))
+						reg_val |= BIT(i);
+			}
+			reg_val &= riscv_isa_extension_base(NULL);
+			/* Do not modify anything beyond single letter extensions */
+			reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
+				  (reg_val & KVM_RISCV_BASE_ISA_MASK);
+			vcpu->arch.isa[0] = reg_val;
+			kvm_riscv_vcpu_fp_reset(vcpu);
+		} else {
+			return -EBUSY;
+		}
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+			return -ENOENT;
+		if (reg_val != riscv_cbom_block_size)
+			return -EINVAL;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+			return -ENOENT;
+		if (reg_val != riscv_cboz_block_size)
+			return -EINVAL;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+		if (reg_val == vcpu->arch.mvendorid)
+			break;
+		if (!vcpu->arch.ran_atleast_once)
+			vcpu->arch.mvendorid = reg_val;
+		else
+			return -EBUSY;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(marchid):
+		if (reg_val == vcpu->arch.marchid)
+			break;
+		if (!vcpu->arch.ran_atleast_once)
+			vcpu->arch.marchid = reg_val;
+		else
+			return -EBUSY;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(mimpid):
+		if (reg_val == vcpu->arch.mimpid)
+			break;
+		if (!vcpu->arch.ran_atleast_once)
+			vcpu->arch.mimpid = reg_val;
+		else
+			return -EBUSY;
+		break;
+	case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+		if (reg_val != (satp_mode >> SATP_MODE_SHIFT))
+			return -EINVAL;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
+				       const struct kvm_one_reg *reg)
+{
+	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_CORE);
+	unsigned long reg_val;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+		return -ENOENT;
+
+	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+		reg_val = cntx->sepc;
+	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+		reg_val = ((unsigned long *)cntx)[reg_num];
+	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
+		reg_val = (cntx->sstatus & SR_SPP) ?
+				KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
+	else
+		return -ENOENT;
+
+	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
+				       const struct kvm_one_reg *reg)
+{
+	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_CORE);
+	unsigned long reg_val;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+		return -ENOENT;
+
+	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+		cntx->sepc = reg_val;
+	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+		((unsigned long *)cntx)[reg_num] = reg_val;
+	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
+		if (reg_val == KVM_RISCV_MODE_S)
+			cntx->sstatus |= SR_SPP;
+		else
+			cntx->sstatus &= ~SR_SPP;
+	} else
+		return -ENOENT;
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
+					  unsigned long reg_num,
+					  unsigned long *out_val)
+{
+	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+		return -ENOENT;
+
+	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+		kvm_riscv_vcpu_flush_interrupts(vcpu);
+		*out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
+		*out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
+	} else
+		*out_val = ((unsigned long *)csr)[reg_num];
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
+					  unsigned long reg_num,
+					  unsigned long reg_val)
+{
+	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+		return -ENOENT;
+
+	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+		reg_val &= VSIP_VALID_MASK;
+		reg_val <<= VSIP_TO_HVIP_SHIFT;
+	}
+
+	((unsigned long *)csr)[reg_num] = reg_val;
+
+	if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
+		WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
+				      const struct kvm_one_reg *reg)
+{
+	int rc;
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_CSR);
+	unsigned long reg_val, reg_subtype;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+
+	reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+	reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+	switch (reg_subtype) {
+	case KVM_REG_RISCV_CSR_GENERAL:
+		rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val);
+		break;
+	case KVM_REG_RISCV_CSR_AIA:
+		rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val);
+		break;
+	default:
+		rc = -ENOENT;
+		break;
+	}
+	if (rc)
+		return rc;
+
+	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
+				      const struct kvm_one_reg *reg)
+{
+	int rc;
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_CSR);
+	unsigned long reg_val, reg_subtype;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+
+	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+	reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+	switch (reg_subtype) {
+	case KVM_REG_RISCV_CSR_GENERAL:
+		rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
+		break;
+	case KVM_REG_RISCV_CSR_AIA:
+		rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
+		break;
+	default:
+		rc = -ENOENT;
+		break;
+	}
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
+					 unsigned long reg_num,
+					 unsigned long *reg_val)
+{
+	unsigned long host_isa_ext;
+
+	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+		return -ENOENT;
+
+	*reg_val = 0;
+	host_isa_ext = kvm_isa_ext_arr[reg_num];
+	if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
+		*reg_val = 1; /* Mark the given extension as available */
+
+	return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
+					 unsigned long reg_num,
+					 unsigned long reg_val)
+{
+	unsigned long host_isa_ext;
+
+	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+		return -ENOENT;
+
+	host_isa_ext = kvm_isa_ext_arr[reg_num];
+	if (!__riscv_isa_extension_available(NULL, host_isa_ext))
+		return -ENOENT;
+
+	if (reg_val == test_bit(host_isa_ext, vcpu->arch.isa))
+		return 0;
+
+	if (!vcpu->arch.ran_atleast_once) {
+		/*
+		 * All multi-letter extension and a few single letter
+		 * extension can be disabled
+		 */
+		if (reg_val == 1 &&
+		    kvm_riscv_vcpu_isa_enable_allowed(reg_num))
+			set_bit(host_isa_ext, vcpu->arch.isa);
+		else if (!reg_val &&
+			 kvm_riscv_vcpu_isa_disable_allowed(reg_num))
+			clear_bit(host_isa_ext, vcpu->arch.isa);
+		else
+			return -EINVAL;
+		kvm_riscv_vcpu_fp_reset(vcpu);
+	} else {
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_multi(struct kvm_vcpu *vcpu,
+					unsigned long reg_num,
+					unsigned long *reg_val)
+{
+	unsigned long i, ext_id, ext_val;
+
+	if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+		return -ENOENT;
+
+	for (i = 0; i < BITS_PER_LONG; i++) {
+		ext_id = i + reg_num * BITS_PER_LONG;
+		if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+			break;
+
+		ext_val = 0;
+		riscv_vcpu_get_isa_ext_single(vcpu, ext_id, &ext_val);
+		if (ext_val)
+			*reg_val |= KVM_REG_RISCV_ISA_MULTI_MASK(ext_id);
+	}
+
+	return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_multi(struct kvm_vcpu *vcpu,
+					unsigned long reg_num,
+					unsigned long reg_val, bool enable)
+{
+	unsigned long i, ext_id;
+
+	if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+		return -ENOENT;
+
+	for_each_set_bit(i, &reg_val, BITS_PER_LONG) {
+		ext_id = i + reg_num * BITS_PER_LONG;
+		if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+			break;
+
+		riscv_vcpu_set_isa_ext_single(vcpu, ext_id, enable);
+	}
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
+					  const struct kvm_one_reg *reg)
+{
+	int rc;
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_ISA_EXT);
+	unsigned long reg_val, reg_subtype;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+
+	reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+	reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+	reg_val = 0;
+	switch (reg_subtype) {
+	case KVM_REG_RISCV_ISA_SINGLE:
+		rc = riscv_vcpu_get_isa_ext_single(vcpu, reg_num, &reg_val);
+		break;
+	case KVM_REG_RISCV_ISA_MULTI_EN:
+	case KVM_REG_RISCV_ISA_MULTI_DIS:
+		rc = riscv_vcpu_get_isa_ext_multi(vcpu, reg_num, &reg_val);
+		if (!rc && reg_subtype == KVM_REG_RISCV_ISA_MULTI_DIS)
+			reg_val = ~reg_val;
+		break;
+	default:
+		rc = -ENOENT;
+	}
+	if (rc)
+		return rc;
+
+	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+					  const struct kvm_one_reg *reg)
+{
+	unsigned long __user *uaddr =
+			(unsigned long __user *)(unsigned long)reg->addr;
+	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+					    KVM_REG_SIZE_MASK |
+					    KVM_REG_RISCV_ISA_EXT);
+	unsigned long reg_val, reg_subtype;
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+		return -EINVAL;
+
+	reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+	reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	switch (reg_subtype) {
+	case KVM_REG_RISCV_ISA_SINGLE:
+		return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
+	case KVM_REG_RISCV_SBI_MULTI_EN:
+		return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
+	case KVM_REG_RISCV_SBI_MULTI_DIS:
+		return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
+	default:
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int copy_config_reg_indices(const struct kvm_vcpu *vcpu,
+				u64 __user *uindices)
+{
+	int n = 0;
+
+	for (int i = 0; i < sizeof(struct kvm_riscv_config)/sizeof(unsigned long);
+		 i++) {
+		u64 size;
+		u64 reg;
+
+		/*
+		 * Avoid reporting config reg if the corresponding extension
+		 * was not available.
+		 */
+		if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) &&
+			!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+			continue;
+		else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) &&
+			!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+			continue;
+
+		size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+		reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CONFIG | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+
+		n++;
+	}
+
+	return n;
+}
+
+static unsigned long num_config_regs(const struct kvm_vcpu *vcpu)
+{
+	return copy_config_reg_indices(vcpu, NULL);
+}
+
+static inline unsigned long num_core_regs(void)
+{
+	return sizeof(struct kvm_riscv_core) / sizeof(unsigned long);
+}
+
+static int copy_core_reg_indices(u64 __user *uindices)
+{
+	int n = num_core_regs();
+
+	for (int i = 0; i < n; i++) {
+		u64 size = IS_ENABLED(CONFIG_32BIT) ?
+			   KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+		u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CORE | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+	}
+
+	return n;
+}
+
+static inline unsigned long num_csr_regs(const struct kvm_vcpu *vcpu)
+{
+	unsigned long n = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+
+	if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA))
+		n += sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+	return n;
+}
+
+static int copy_csr_reg_indices(const struct kvm_vcpu *vcpu,
+				u64 __user *uindices)
+{
+	int n1 = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+	int n2 = 0;
+
+	/* copy general csr regs */
+	for (int i = 0; i < n1; i++) {
+		u64 size = IS_ENABLED(CONFIG_32BIT) ?
+			   KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+		u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+				  KVM_REG_RISCV_CSR_GENERAL | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+	}
+
+	/* copy AIA csr regs */
+	if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) {
+		n2 = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+		for (int i = 0; i < n2; i++) {
+			u64 size = IS_ENABLED(CONFIG_32BIT) ?
+				   KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+			u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+					  KVM_REG_RISCV_CSR_AIA | i;
+
+			if (uindices) {
+				if (put_user(reg, uindices))
+					return -EFAULT;
+				uindices++;
+			}
+		}
+	}
+
+	return n1 + n2;
+}
+
+static inline unsigned long num_timer_regs(void)
+{
+	return sizeof(struct kvm_riscv_timer) / sizeof(u64);
+}
+
+static int copy_timer_reg_indices(u64 __user *uindices)
+{
+	int n = num_timer_regs();
+
+	for (int i = 0; i < n; i++) {
+		u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+			  KVM_REG_RISCV_TIMER | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+	}
+
+	return n;
+}
+
+static inline unsigned long num_fp_f_regs(const struct kvm_vcpu *vcpu)
+{
+	const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+	if (riscv_isa_extension_available(vcpu->arch.isa, f))
+		return sizeof(cntx->fp.f) / sizeof(u32);
+	else
+		return 0;
+}
+
+static int copy_fp_f_reg_indices(const struct kvm_vcpu *vcpu,
+				u64 __user *uindices)
+{
+	int n = num_fp_f_regs(vcpu);
+
+	for (int i = 0; i < n; i++) {
+		u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 |
+			  KVM_REG_RISCV_FP_F | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+	}
+
+	return n;
+}
+
+static inline unsigned long num_fp_d_regs(const struct kvm_vcpu *vcpu)
+{
+	const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+	if (riscv_isa_extension_available(vcpu->arch.isa, d))
+		return sizeof(cntx->fp.d.f) / sizeof(u64) + 1;
+	else
+		return 0;
+}
+
+static int copy_fp_d_reg_indices(const struct kvm_vcpu *vcpu,
+				u64 __user *uindices)
+{
+	int i;
+	int n = num_fp_d_regs(vcpu);
+	u64 reg;
+
+	/* copy fp.d.f indices */
+	for (i = 0; i < n-1; i++) {
+		reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+		      KVM_REG_RISCV_FP_D | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+	}
+
+	/* copy fp.d.fcsr indices */
+	reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | i;
+	if (uindices) {
+		if (put_user(reg, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	return n;
+}
+
+static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
+				u64 __user *uindices)
+{
+	unsigned int n = 0;
+	unsigned long isa_ext;
+
+	for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+		u64 size = IS_ENABLED(CONFIG_32BIT) ?
+			   KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+		u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
+
+		isa_ext = kvm_isa_ext_arr[i];
+		if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext))
+			continue;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+
+		n++;
+	}
+
+	return n;
+}
+
+static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
+{
+	return copy_isa_ext_reg_indices(vcpu, NULL);;
+}
+
+static inline unsigned long num_sbi_ext_regs(void)
+{
+	/*
+	 * number of KVM_REG_RISCV_SBI_SINGLE +
+	 * 2 x (number of KVM_REG_RISCV_SBI_MULTI)
+	 */
+	return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1);
+}
+
+static int copy_sbi_ext_reg_indices(u64 __user *uindices)
+{
+	int n;
+
+	/* copy KVM_REG_RISCV_SBI_SINGLE */
+	n = KVM_RISCV_SBI_EXT_MAX;
+	for (int i = 0; i < n; i++) {
+		u64 size = IS_ENABLED(CONFIG_32BIT) ?
+			   KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+		u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+			  KVM_REG_RISCV_SBI_SINGLE | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+	}
+
+	/* copy KVM_REG_RISCV_SBI_MULTI */
+	n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1;
+	for (int i = 0; i < n; i++) {
+		u64 size = IS_ENABLED(CONFIG_32BIT) ?
+			   KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+		u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+			  KVM_REG_RISCV_SBI_MULTI_EN | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+
+		reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+			  KVM_REG_RISCV_SBI_MULTI_DIS | i;
+
+		if (uindices) {
+			if (put_user(reg, uindices))
+				return -EFAULT;
+			uindices++;
+		}
+	}
+
+	return num_sbi_ext_regs();
+}
+
+/*
+ * kvm_riscv_vcpu_num_regs - how many registers do we present via KVM_GET/SET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu)
+{
+	unsigned long res = 0;
+
+	res += num_config_regs(vcpu);
+	res += num_core_regs();
+	res += num_csr_regs(vcpu);
+	res += num_timer_regs();
+	res += num_fp_f_regs(vcpu);
+	res += num_fp_d_regs(vcpu);
+	res += num_isa_ext_regs(vcpu);
+	res += num_sbi_ext_regs();
+
+	return res;
+}
+
+/*
+ * kvm_riscv_vcpu_copy_reg_indices - get indices of all registers.
+ */
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+				    u64 __user *uindices)
+{
+	int ret;
+
+	ret = copy_config_reg_indices(vcpu, uindices);
+	if (ret < 0)
+		return ret;
+	uindices += ret;
+
+	ret = copy_core_reg_indices(uindices);
+	if (ret < 0)
+		return ret;
+	uindices += ret;
+
+	ret = copy_csr_reg_indices(vcpu, uindices);
+	if (ret < 0)
+		return ret;
+	uindices += ret;
+
+	ret = copy_timer_reg_indices(uindices);
+	if (ret < 0)
+		return ret;
+	uindices += ret;
+
+	ret = copy_fp_f_reg_indices(vcpu, uindices);
+	if (ret < 0)
+		return ret;
+	uindices += ret;
+
+	ret = copy_fp_d_reg_indices(vcpu, uindices);
+	if (ret < 0)
+		return ret;
+	uindices += ret;
+
+	ret = copy_isa_ext_reg_indices(vcpu, uindices);
+	if (ret < 0)
+		return ret;
+	uindices += ret;
+
+	ret = copy_sbi_ext_reg_indices(uindices);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+			   const struct kvm_one_reg *reg)
+{
+	switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+	case KVM_REG_RISCV_CONFIG:
+		return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
+	case KVM_REG_RISCV_CORE:
+		return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
+	case KVM_REG_RISCV_CSR:
+		return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
+	case KVM_REG_RISCV_TIMER:
+		return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
+	case KVM_REG_RISCV_FP_F:
+		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+						 KVM_REG_RISCV_FP_F);
+	case KVM_REG_RISCV_FP_D:
+		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+						 KVM_REG_RISCV_FP_D);
+	case KVM_REG_RISCV_ISA_EXT:
+		return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
+	case KVM_REG_RISCV_SBI_EXT:
+		return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
+	case KVM_REG_RISCV_VECTOR:
+		return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
+	default:
+		break;
+	}
+
+	return -ENOENT;
+}
+
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+			   const struct kvm_one_reg *reg)
+{
+	switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+	case KVM_REG_RISCV_CONFIG:
+		return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
+	case KVM_REG_RISCV_CORE:
+		return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
+	case KVM_REG_RISCV_CSR:
+		return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
+	case KVM_REG_RISCV_TIMER:
+		return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
+	case KVM_REG_RISCV_FP_F:
+		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+						 KVM_REG_RISCV_FP_F);
+	case KVM_REG_RISCV_FP_D:
+		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+						 KVM_REG_RISCV_FP_D);
+	case KVM_REG_RISCV_ISA_EXT:
+		return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
+	case KVM_REG_RISCV_SBI_EXT:
+		return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
+	case KVM_REG_RISCV_VECTOR:
+		return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
+	default:
+		break;
+	}
+
+	return -ENOENT;
+}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 7b46e04fb667..9cd97091c723 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -140,8 +140,10 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
 	const struct kvm_riscv_sbi_extension_entry *sext = NULL;
 	struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
 
-	if (reg_num >= KVM_RISCV_SBI_EXT_MAX ||
-	    (reg_val != 1 && reg_val != 0))
+	if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
+		return -ENOENT;
+
+	if (reg_val != 1 && reg_val != 0)
 		return -EINVAL;
 
 	for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
@@ -175,7 +177,7 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
 	struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
 
 	if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
-		return -EINVAL;
+		return -ENOENT;
 
 	for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
 		if (sbi_ext[i].ext_idx == reg_num) {
@@ -206,7 +208,7 @@ static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu,
 	unsigned long i, ext_id;
 
 	if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
-		return -EINVAL;
+		return -ENOENT;
 
 	for_each_set_bit(i, &reg_val, BITS_PER_LONG) {
 		ext_id = i + reg_num * BITS_PER_LONG;
@@ -226,7 +228,7 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu,
 	unsigned long i, ext_id, ext_val;
 
 	if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
-		return -EINVAL;
+		return -ENOENT;
 
 	for (i = 0; i < BITS_PER_LONG; i++) {
 		ext_id = i + reg_num * BITS_PER_LONG;
@@ -272,7 +274,7 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
 	case KVM_REG_RISCV_SBI_MULTI_DIS:
 		return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false);
 	default:
-		return -EINVAL;
+		return -ENOENT;
 	}
 
 	return 0;
@@ -307,7 +309,7 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
 			reg_val = ~reg_val;
 		break;
 	default:
-		rc = -EINVAL;
+		rc = -ENOENT;
 	}
 	if (rc)
 		return rc;
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index 3ac2ff6a65da..75486b25ac45 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -170,7 +170,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
 	if (KVM_REG_SIZE(reg->id) != sizeof(u64))
 		return -EINVAL;
 	if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
-		return -EINVAL;
+		return -ENOENT;
 
 	switch (reg_num) {
 	case KVM_REG_RISCV_TIMER_REG(frequency):
@@ -187,7 +187,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
 					  KVM_RISCV_TIMER_STATE_OFF;
 		break;
 	default:
-		return -EINVAL;
+		return -ENOENT;
 	}
 
 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
@@ -211,14 +211,15 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
 	if (KVM_REG_SIZE(reg->id) != sizeof(u64))
 		return -EINVAL;
 	if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
-		return -EINVAL;
+		return -ENOENT;
 
 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
 		return -EFAULT;
 
 	switch (reg_num) {
 	case KVM_REG_RISCV_TIMER_REG(frequency):
-		ret = -EOPNOTSUPP;
+		if (reg_val != riscv_timebase)
+			return -EINVAL;
 		break;
 	case KVM_REG_RISCV_TIMER_REG(time):
 		gt->time_delta = reg_val - get_cycles64();
@@ -233,7 +234,7 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
 			ret = kvm_riscv_vcpu_timer_cancel(t);
 		break;
 	default:
-		ret = -EINVAL;
+		ret = -ENOENT;
 		break;
 	}
 
diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c
index edd2eecbddc2..b430cbb69521 100644
--- a/arch/riscv/kvm/vcpu_vector.c
+++ b/arch/riscv/kvm/vcpu_vector.c
@@ -91,95 +91,93 @@ void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
 }
 #endif
 
-static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
-				      unsigned long reg_num,
-				      size_t reg_size)
+static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
+				    unsigned long reg_num,
+				    size_t reg_size,
+				    void **reg_addr)
 {
 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-	void *reg_val;
 	size_t vlenb = riscv_v_vsize / 32;
 
 	if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) {
 		if (reg_size != sizeof(unsigned long))
-			return NULL;
+			return -EINVAL;
 		switch (reg_num) {
 		case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
-			reg_val = &cntx->vector.vstart;
+			*reg_addr = &cntx->vector.vstart;
 			break;
 		case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
-			reg_val = &cntx->vector.vl;
+			*reg_addr = &cntx->vector.vl;
 			break;
 		case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
-			reg_val = &cntx->vector.vtype;
+			*reg_addr = &cntx->vector.vtype;
 			break;
 		case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
-			reg_val = &cntx->vector.vcsr;
+			*reg_addr = &cntx->vector.vcsr;
 			break;
 		case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
 		default:
-			return NULL;
+			return -ENOENT;
 		}
 	} else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
 		if (reg_size != vlenb)
-			return NULL;
-		reg_val = cntx->vector.datap
-			  + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
+			return -EINVAL;
+		*reg_addr = cntx->vector.datap +
+			    (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
 	} else {
-		return NULL;
+		return -ENOENT;
 	}
 
-	return reg_val;
+	return 0;
 }
 
 int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
-				  const struct kvm_one_reg *reg,
-				  unsigned long rtype)
+				  const struct kvm_one_reg *reg)
 {
 	unsigned long *isa = vcpu->arch.isa;
 	unsigned long __user *uaddr =
 			(unsigned long __user *)(unsigned long)reg->addr;
 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
 					    KVM_REG_SIZE_MASK |
-					    rtype);
-	void *reg_val = NULL;
+					    KVM_REG_RISCV_VECTOR);
 	size_t reg_size = KVM_REG_SIZE(reg->id);
+	void *reg_addr;
+	int rc;
 
-	if (rtype == KVM_REG_RISCV_VECTOR &&
-	    riscv_isa_extension_available(isa, v)) {
-		reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
-	}
+	if (!riscv_isa_extension_available(isa, v))
+		return -ENOENT;
 
-	if (!reg_val)
-		return -EINVAL;
+	rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
+	if (rc)
+		return rc;
 
-	if (copy_to_user(uaddr, reg_val, reg_size))
+	if (copy_to_user(uaddr, reg_addr, reg_size))
 		return -EFAULT;
 
 	return 0;
 }
 
 int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
-				  const struct kvm_one_reg *reg,
-				  unsigned long rtype)
+				  const struct kvm_one_reg *reg)
 {
 	unsigned long *isa = vcpu->arch.isa;
 	unsigned long __user *uaddr =
 			(unsigned long __user *)(unsigned long)reg->addr;
 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
 					    KVM_REG_SIZE_MASK |
-					    rtype);
-	void *reg_val = NULL;
+					    KVM_REG_RISCV_VECTOR);
 	size_t reg_size = KVM_REG_SIZE(reg->id);
+	void *reg_addr;
+	int rc;
 
-	if (rtype == KVM_REG_RISCV_VECTOR &&
-	    riscv_isa_extension_available(isa, v)) {
-		reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
-	}
+	if (!riscv_isa_extension_available(isa, v))
+		return -ENOENT;
 
-	if (!reg_val)
-		return -EINVAL;
+	rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
+	if (rc)
+		return rc;
 
-	if (copy_from_user(reg_val, uaddr, reg_size))
+	if (copy_from_user(reg_addr, uaddr, reg_size))
 		return -EFAULT;
 
 	return 0;
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 8753cb0339e5..7b7521762633 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -19,7 +19,6 @@ struct parmarea parmarea __section(".parmarea") = {
 };
 
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-int __bootdata(noexec_disabled);
 
 unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
 struct ipl_parameter_block __bootdata_preserved(ipl_block);
@@ -290,12 +289,6 @@ void parse_boot_command_line(void)
 				zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
 		}
 
-		if (!strcmp(param, "noexec")) {
-			rc = kstrtobool(val, &enabled);
-			if (!rc && !enabled)
-				noexec_disabled = 1;
-		}
-
 		if (!strcmp(param, "facilities") && val)
 			modify_fac_list(val);
 
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index b9681cb22753..d3e48bd9c394 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -53,10 +53,8 @@ static void detect_facilities(void)
 	}
 	if (test_facility(78))
 		machine.has_edat2 = 1;
-	if (!noexec_disabled && test_facility(130)) {
+	if (test_facility(130))
 		machine.has_nx = 1;
-		__ctl_set_bit(0, 20);
-	}
 }
 
 static void setup_lpp(void)
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index c67f59db7a51..01257ce3b89c 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -287,7 +287,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
 			if (kasan_pte_populate_zero_shadow(pte, mode))
 				continue;
 			entry = __pte(_pa(addr, PAGE_SIZE, mode));
-			entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+			entry = set_pte_bit(entry, PAGE_KERNEL);
+			if (!machine.has_nx)
+				entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
 			set_pte(pte, entry);
 			pages++;
 		}
@@ -311,7 +313,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
 				continue;
 			if (can_large_pmd(pmd, addr, next)) {
 				entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
-				entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+				entry = set_pmd_bit(entry, SEGMENT_KERNEL);
+				if (!machine.has_nx)
+					entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
 				set_pmd(pmd, entry);
 				pages++;
 				continue;
@@ -342,7 +346,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
 				continue;
 			if (can_large_pud(pud, addr, next)) {
 				entry = __pud(_pa(addr, _REGION3_SIZE, mode));
-				entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+				entry = set_pud_bit(entry, REGION3_KERNEL);
+				if (!machine.has_nx)
+					entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
 				set_pud(pud, entry);
 				pages++;
 				continue;
diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config
index 39227b4511af..eb7f84f5925c 100644
--- a/arch/s390/configs/btf.config
+++ b/arch/s390/configs/btf.config
@@ -1 +1,2 @@
+# Help: Enable BTF debug info
 CONFIG_DEBUG_INFO_BTF=y
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
index 700a8b25c3ff..84c2b551e992 100644
--- a/arch/s390/configs/kasan.config
+++ b/arch/s390/configs/kasan.config
@@ -1,3 +1,4 @@
+# Help: Enable KASan for debugging
 CONFIG_KASAN=y
 CONFIG_KASAN_INLINE=y
 CONFIG_KASAN_VMALLOC=y
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index e82e5626e139..c4c28c2609a5 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -18,7 +18,6 @@ struct airq_struct {
 	struct hlist_node list;		/* Handler queueing. */
 	void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
 	u8 *lsi_ptr;			/* Local-Summary-Indicator pointer */
-	u8 lsi_mask;			/* Local-Summary-Indicator mask */
 	u8 isc;				/* Interrupt-subclass */
 	u8 flags;
 };
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
index c260adb25997..7fe3e31956d7 100644
--- a/arch/s390/include/asm/dma.h
+++ b/arch/s390/include/asm/dma.h
@@ -9,6 +9,6 @@
  * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
  * by the 31 bit heritage.
  */
-#define MAX_DMA_ADDRESS         0x80000000
+#define MAX_DMA_ADDRESS		__va(0x80000000)
 
 #endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 91bfecb91321..427f9528a7b6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -817,6 +817,8 @@ struct kvm_s390_cpu_model {
 	__u64 *fac_list;
 	u64 cpuid;
 	unsigned short ibc;
+	/* subset of available UV-features for pv-guests enabled by user space */
+	struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
 };
 
 typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 3fecaa4e8b74..0486e6ef62bf 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -23,7 +23,7 @@
  */
 #define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
 
-extern unsigned long __samode31, __eamode31;
-extern unsigned long __stext_amode31, __etext_amode31;
+extern char *__samode31, *__eamode31;
+extern char *__stext_amode31, *__etext_amode31;
 
 #endif
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index 7a3eefd7a242..06fbabe2f66c 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -24,43 +24,41 @@ enum {
 #define SET_MEMORY_INV	BIT(_SET_MEMORY_INV_BIT)
 #define SET_MEMORY_DEF	BIT(_SET_MEMORY_DEF_BIT)
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags);
-
-static inline int set_memory_ro(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_RO);
-}
-
-static inline int set_memory_rw(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_RW);
-}
-
-static inline int set_memory_nx(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_NX);
-}
-
-static inline int set_memory_x(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_X);
-}
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags);
 
 #define set_memory_rox set_memory_rox
-static inline int set_memory_rox(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X);
-}
 
-static inline int set_memory_rwnx(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_RW | SET_MEMORY_NX);
+/*
+ * Generate two variants of each set_memory() function:
+ *
+ * set_memory_yy(unsigned long addr, int numpages);
+ * __set_memory_yy(void *start, void *end);
+ *
+ * The second variant exists for both convenience to avoid the usual
+ * (unsigned long) casts, but unlike the first variant it can also be used
+ * for areas larger than 8TB, which may happen at memory initialization.
+ */
+#define __SET_MEMORY_FUNC(fname, flags)					\
+static inline int fname(unsigned long addr, int numpages)		\
+{									\
+	return __set_memory(addr, numpages, (flags));			\
+}									\
+									\
+static inline int __##fname(void *start, void *end)			\
+{									\
+	unsigned long numpages;						\
+									\
+	numpages = (end - start) >> PAGE_SHIFT;				\
+	return __set_memory((unsigned long)start, numpages, (flags));	\
 }
 
-static inline int set_memory_4k(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_4K);
-}
+__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO)
+__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW)
+__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
 
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b30fe91166e3..25cadc2b9cff 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -72,7 +72,6 @@ extern unsigned int zlib_dfltcc_support;
 #define ZLIB_DFLTCC_INFLATE_ONLY	3
 #define ZLIB_DFLTCC_FULL_DEBUG		4
 
-extern int noexec_disabled;
 extern unsigned long ident_map_size;
 extern unsigned long max_mappable;
 
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index d2cd42bb2c26..0e7bd3873907 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -99,6 +99,8 @@ enum uv_cmds_inst {
 enum uv_feat_ind {
 	BIT_UV_FEAT_MISC = 0,
 	BIT_UV_FEAT_AIV = 1,
+	BIT_UV_FEAT_AP = 4,
+	BIT_UV_FEAT_AP_INTR = 5,
 };
 
 struct uv_cb_header {
@@ -159,7 +161,15 @@ struct uv_cb_cgc {
 	u64 guest_handle;
 	u64 conf_base_stor_origin;
 	u64 conf_virt_stor_origin;
-	u64 reserved30;
+	u8  reserved30[6];
+	union {
+		struct {
+			u16 : 14;
+			u16 ap_instr_intr : 1;
+			u16 ap_allow_instr : 1;
+		};
+		u16 raw;
+	} flags;
 	u64 guest_stor_origin;
 	u64 guest_stor_len;
 	u64 guest_sca;
@@ -397,6 +407,13 @@ struct uv_info {
 
 extern struct uv_info uv_info;
 
+static inline bool uv_has_feature(u8 feature_bit)
+{
+	if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+		return false;
+	return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 extern int prot_virt_guest;
 
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index a73cf01a1606..abe926d43cbe 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
 	__u8 reserved[1728];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST	6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST	7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS	64
+struct kvm_s390_vm_cpu_uv_feat {
+	union {
+		struct {
+			__u64 : 4;
+			__u64 ap : 1;		/* bit 4 */
+			__u64 ap_intr : 1;	/* bit 5 */
+			__u64 : 58;
+		};
+		__u64 feat;
+	};
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 2dd5976a55ac..442ce0489e1a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -44,7 +44,6 @@ early_param(#param, ignore_decompressor_param_##param)
 decompressor_handled_param(mem);
 decompressor_handled_param(vmalloc);
 decompressor_handled_param(dfltcc);
-decompressor_handled_param(noexec);
 decompressor_handled_param(facilities);
 decompressor_handled_param(nokaslr);
 #if IS_ENABLED(CONFIG_KVM)
@@ -233,10 +232,8 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
 		__ctl_set_bit(0, 17);
 	}
-	if (test_facility(130) && !noexec_disabled) {
+	if (test_facility(130))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
-		__ctl_set_bit(0, 20);
-	}
 	if (test_facility(133))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
 	if (test_facility(139) && (tod_clock_base.tod >> 63)) {
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 12a2bd4fc88c..ce65fc01671f 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -216,8 +216,8 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-	vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
-	vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
+	vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+	vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
 	abs_lc = get_abs_lowcore();
 	abs_lc->vmcore_info = paddr_vmcoreinfo_note();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c744104e4a9c..de6ad0fb2328 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -97,10 +97,10 @@ EXPORT_SYMBOL(console_irq);
  * relocated above 2 GB, because it has to use 31 bit addresses.
  * Such code and data is part of the .amode31 section.
  */
-unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
-unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
-unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
-unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
 
@@ -145,7 +145,6 @@ static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
 
-int __bootdata(noexec_disabled);
 unsigned long __bootdata_preserved(max_mappable);
 unsigned long __bootdata(ident_map_size);
 struct physmem_info __bootdata(physmem_info);
@@ -771,15 +770,15 @@ static void __init setup_memory(void)
 static void __init relocate_amode31_section(void)
 {
 	unsigned long amode31_size = __eamode31 - __samode31;
-	long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31;
-	long *ptr;
+	long amode31_offset, *ptr;
 
+	amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
 	pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
 
 	/* Move original AMODE31 section to the new one */
-	memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size);
+	memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
 	/* Zero out the old AMODE31 section to catch invalid accesses within it */
-	memset((void *)__samode31, 0, amode31_size);
+	memset(__samode31, 0, amode31_size);
 
 	/* Update all AMODE31 region references */
 	for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index b771f1b4cdd1..fc07bc39e698 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -258,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
 	 * shared page from a different protected VM will automatically also
 	 * transfer its ownership.
 	 */
-	if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+	if (uv_has_feature(BIT_UV_FEAT_MISC))
 		return false;
 	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
 		return false;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 341abafb96e4..b16352083ff9 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 
 #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
 
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+	if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+		return false;
+	if (guestdbg_sstep_enabled(vcpu) &&
+	    vcpu->arch.sie_block->iprcc != PGM_PER) {
+		/*
+		 * __vcpu_run() will exit after delivering the concurrently
+		 * indicated condition.
+		 */
+		return false;
+	}
+	return true;
+}
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 	psw_t psw;
@@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
 	if (kvm_s390_pv_cpu_is_protected(vcpu))
 		return -EOPNOTSUPP;
 
-	if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+	if (should_handle_per_event(vcpu)) {
 		rc = kvm_s390_handle_per_event(vcpu);
 		if (rc)
 			return rc;
@@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu)
 	return handle_instruction(vcpu);
 }
 
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+	/* Process PER, also if the instruction is processed in user space. */
+	if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+		return false;
+	if (rc != 0 && rc != -EOPNOTSUPP)
+		return false;
+	if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+		/* __vcpu_run() will exit after delivering the interrupt. */
+		return false;
+	return true;
+}
+
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
 	int rc, per_rc = 0;
@@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 		rc = handle_partial_execution(vcpu);
 		break;
 	case ICPT_KSS:
-		rc = kvm_s390_skey_check_enable(vcpu);
-		break;
+		/* Instruction will be redriven, skip the PER check. */
+		return kvm_s390_skey_check_enable(vcpu);
 	case ICPT_MCHKREQ:
 	case ICPT_INT_ENABLE:
 		/*
@@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	}
 
-	/* process PER, also if the instruction is processed in user space */
-	if (vcpu->arch.sie_block->icptstatus & 0x02 &&
-	    (!rc || rc == -EOPNOTSUPP))
+	if (should_handle_per_ifetch(vcpu, rc))
 		per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
 	return per_rc ? per_rc : rc;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9bd0a873f3b1..c1b47d608a2b 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc = 0;
+	bool delivered = false;
 	unsigned long irq_type;
 	unsigned long irqs;
 
@@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 			WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
 			clear_bit(irq_type, &li->pending_irqs);
 		}
+		delivered |= !rc;
+	}
+
+	/*
+	 * We delivered at least one interrupt and modified the PC. Force a
+	 * singlestep event now.
+	 */
+	if (delivered && guestdbg_sstep_enabled(vcpu)) {
+		struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+		debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+		debug_exit->type = KVM_SINGLESTEP;
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
 	}
 
 	set_intercept_indicators(vcpu);
@@ -3398,7 +3412,6 @@ static void gib_alert_irq_handler(struct airq_struct *airq,
 
 static struct airq_struct gib_alert_irq = {
 	.handler = gib_alert_irq_handler,
-	.lsi_ptr = &gib_alert_irq.lsi_mask,
 };
 
 void kvm_s390_gib_destroy(void)
@@ -3438,6 +3451,8 @@ int __init kvm_s390_gib_init(u8 nisc)
 		rc = -EIO;
 		goto out_free_gib;
 	}
+	/* adapter interrupts used for AP (applicable here) don't use the LSI */
+	*gib_alert_irq.lsi_ptr = 0xff;
 
 	gib->nisc = nisc;
 	gib_origin = virt_to_phys(gib);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d1e768bcfe1d..b3f17e014cab 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 	return 0;
 }
 
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK	\
+(						\
+	((struct kvm_s390_vm_cpu_uv_feat){	\
+		.ap = 1,			\
+		.ap_intr = 1,			\
+	})					\
+	.feat					\
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+	unsigned long data, filter;
+
+	filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+	if (get_user(data, &ptr->feat))
+		return -EFAULT;
+	if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->created_vcpus) {
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+	kvm->arch.model.uv_feat_guest.feat = data;
+	mutex_unlock(&kvm->lock);
+
+	VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
+
+	return 0;
+}
+
 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	int ret = -ENXIO;
@@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+		ret = kvm_s390_set_uv_feat(kvm, attr);
+		break;
 	}
 	return ret;
 }
@@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 	return 0;
 }
 
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+	unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+	if (put_user(feat, &dst->feat))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+	return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+	unsigned long feat;
+
+	BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+	feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+	if (put_user(feat, &dst->feat))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+	return 0;
+}
+
 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	int ret = -ENXIO;
@@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+		ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+		ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+		break;
 	}
 	return ret;
 }
@@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 		case KVM_S390_VM_CPU_MACHINE_FEAT:
 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+		case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+		case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
 			ret = 0;
 			break;
 		default:
@@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
 	struct kvm_vcpu *vcpu;
 
 	/* Disable the GISA if the ultravisor does not support AIV. */
-	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+	if (!uv_has_feature(BIT_UV_FEAT_AIV))
 		kvm_s390_gisa_disable(kvm);
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
 
+	kvm->arch.model.uv_feat_guest.feat = 0;
+
 	kvm_s390_crypto_init(kvm);
 
 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
@@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
-		if (rc)
+		if (rc || guestdbg_exit_pending(vcpu))
 			return rc;
 	}
 
@@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
 	do {
 		rc = vcpu_pre_run(vcpu);
-		if (rc)
+		if (rc || guestdbg_exit_pending(vcpu))
 			break;
 
 		kvm_vcpu_srcu_read_unlock(vcpu);
@@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 {
 	struct kvm_vcpu *vcpu = filp->private_data;
 	void __user *argp = (void __user *)arg;
+	int rc;
 
 	switch (ioctl) {
 	case KVM_S390_IRQ: {
@@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 
 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
 			return -EFAULT;
-		return kvm_s390_inject_vcpu(vcpu, &s390irq);
+		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
 	}
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
@@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 			return -EFAULT;
 		if (s390int_to_s390irq(&s390int, &s390irq))
 			return -EINVAL;
-		return kvm_s390_inject_vcpu(vcpu, &s390irq);
+		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
 	}
+	default:
+		rc = -ENOIOCTLCMD;
+		break;
 	}
-	return -ENOIOCTLCMD;
+
+	/*
+	 * To simplify single stepping of userspace-emulated instructions,
+	 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+	 * should_handle_per_ifetch()). However, if userspace emulation injects
+	 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+	 * after (and not before) the interrupt delivery.
+	 */
+	if (!rc)
+		vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+	return rc;
 }
 
 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 8d3f39a8a11e..75e81ba26d04 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -285,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
 	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
 		     uvcb.header.rc, uvcb.header.rrc);
-	WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+	WARN_ONCE(cc && uvcb.header.rc != 0x104,
+		  "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
 		  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
 	/* Intended memory leak on "impossible" error */
 	if (!cc)
@@ -575,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
 	uvcb.conf_base_stor_origin =
 		virt_to_phys((void *)kvm->arch.pv.stor_base);
 	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+	uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+	uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
 
 	cc = uv_call_sched(0, (u64)&uvcb);
 	*rc = uvcb.header.rc;
 	*rrc = uvcb.header.rrc;
-	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
-		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
 
 	/* Outputs */
 	kvm->arch.pv.handle = uvcb.guest_handle;
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index afa5db750d92..b51666967aa1 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -290,8 +290,8 @@ static int pt_dump_init(void)
 	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
 	max_addr = 1UL << (max_addr * 11 + 31);
 	address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
-	address_markers[AMODE31_START_NR].start_address = __samode31;
-	address_markers[AMODE31_END_NR].start_address = __eamode31;
+	address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
+	address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31;
 	address_markers[MODULES_NR].start_address = MODULES_VADDR;
 	address_markers[MODULES_END_NR].start_address = MODULES_END;
 	address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 099c4824dd8a..b678295931c3 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -598,7 +598,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 	 * reliable without the misc UV feature so we need to check
 	 * for that as well.
 	 */
-	if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+	if (uv_has_feature(BIT_UV_FEAT_MISC) &&
 	    !test_bit_inv(61, &regs->int_parm_long)) {
 		/*
 		 * When this happens, userspace did something that it
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8d94e29adcdb..8b94d2212d33 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -98,7 +98,7 @@ void __init paging_init(void)
 	sparse_init();
 	zone_dma_bits = 31;
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+	max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 	free_area_init(max_zone_pfns);
 }
@@ -107,7 +107,7 @@ void mark_rodata_ro(void)
 {
 	unsigned long size = __end_ro_after_init - __start_ro_after_init;
 
-	set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+	__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
 	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
 	debug_checkwx();
 }
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index e5ec76271b16..b87e96c64b61 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -373,7 +373,7 @@ static int change_page_attr_alias(unsigned long addr, unsigned long end,
 	return rc;
 }
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags)
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
 {
 	unsigned long end;
 	int rc;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index e44243b9c0a4..6957d2ed97bf 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -5,7 +5,6 @@
 
 #include <linux/memory_hotplug.h>
 #include <linux/memblock.h>
-#include <linux/kasan.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
 #include <linux/init.h>
@@ -291,14 +290,9 @@ out:
 
 static void try_free_pmd_table(pud_t *pud, unsigned long start)
 {
-	const unsigned long end = start + PUD_SIZE;
 	pmd_t *pmd;
 	int i;
 
-	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-	if (end > VMALLOC_START)
-		return;
-
 	pmd = pmd_offset(pud, start);
 	for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
 		if (!pmd_none(*pmd))
@@ -363,14 +357,9 @@ out:
 
 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
 {
-	const unsigned long end = start + P4D_SIZE;
 	pud_t *pud;
 	int i;
 
-	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-	if (end > VMALLOC_START)
-		return;
-
 	pud = pud_offset(p4d, start);
 	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 		if (!pud_none(*pud))
@@ -413,14 +402,9 @@ out:
 
 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
 {
-	const unsigned long end = start + PGDIR_SIZE;
 	p4d_t *p4d;
 	int i;
 
-	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-	if (end > VMALLOC_START)
-		return;
-
 	p4d = p4d_offset(pgd, start);
 	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
 		if (!p4d_none(*p4d))
@@ -440,6 +424,9 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
 
 	if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
 		return -EINVAL;
+	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+	if (WARN_ON_ONCE(end > VMALLOC_START))
+		return -EINVAL;
 	for (addr = start; addr < end; addr = next) {
 		next = pgd_addr_end(addr, end);
 		pgd = pgd_offset_k(addr);
@@ -650,122 +637,29 @@ void vmem_unmap_4k_page(unsigned long addr)
 	mutex_unlock(&vmem_mutex);
 }
 
-static int __init memblock_region_cmp(const void *a, const void *b)
-{
-	const struct memblock_region *r1 = a;
-	const struct memblock_region *r2 = b;
-
-	if (r1->base < r2->base)
-		return -1;
-	if (r1->base > r2->base)
-		return 1;
-	return 0;
-}
-
-static void __init memblock_region_swap(void *a, void *b, int size)
-{
-	swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
-}
-
-#ifdef CONFIG_KASAN
-#define __sha(x)	((unsigned long)kasan_mem_to_shadow((void *)x))
-
-static inline int set_memory_kasan(unsigned long start, unsigned long end)
-{
-	start = PAGE_ALIGN_DOWN(__sha(start));
-	end = PAGE_ALIGN(__sha(end));
-	return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
-}
-#endif
-
-/*
- * map whole physical memory to virtual memory (identity mapping)
- * we reserve enough space in the vmalloc area for vmemmap to hotplug
- * additional memory segments.
- */
 void __init vmem_map_init(void)
 {
-	struct memblock_region memory_rwx_regions[] = {
-		{
-			.base	= 0,
-			.size	= sizeof(struct lowcore),
-			.flags	= MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-			.nid	= NUMA_NO_NODE,
-#endif
-		},
-		{
-			.base	= __pa(_stext),
-			.size	= _etext - _stext,
-			.flags	= MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-			.nid	= NUMA_NO_NODE,
-#endif
-		},
-		{
-			.base	= __pa(_sinittext),
-			.size	= _einittext - _sinittext,
-			.flags	= MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-			.nid	= NUMA_NO_NODE,
-#endif
-		},
-		{
-			.base	= __stext_amode31,
-			.size	= __etext_amode31 - __stext_amode31,
-			.flags	= MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-			.nid	= NUMA_NO_NODE,
-#endif
-		},
-	};
-	struct memblock_type memory_rwx = {
-		.regions	= memory_rwx_regions,
-		.cnt		= ARRAY_SIZE(memory_rwx_regions),
-		.max		= ARRAY_SIZE(memory_rwx_regions),
-	};
-	phys_addr_t base, end;
-	u64 i;
-
+	__set_memory_rox(_stext, _etext);
+	__set_memory_ro(_etext, __end_rodata);
+	__set_memory_rox(_sinittext, _einittext);
+	__set_memory_rox(__stext_amode31, __etext_amode31);
 	/*
-	 * Set RW+NX attribute on all memory, except regions enumerated with
-	 * memory_rwx exclude type. These regions need different attributes,
-	 * which are enforced afterwards.
-	 *
-	 * __for_each_mem_range() iterate and exclude types should be sorted.
-	 * The relative location of _stext and _sinittext is hardcoded in the
-	 * linker script. However a location of __stext_amode31 and the kernel
-	 * image itself are chosen dynamically. Thus, sort the exclude type.
+	 * If the BEAR-enhancement facility is not installed the first
+	 * prefix page is used to return to the previous context with
+	 * an LPSWE instruction and therefore must be executable.
 	 */
-	sort(&memory_rwx_regions,
-	     ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
-	     memblock_region_cmp, memblock_region_swap);
-	__for_each_mem_range(i, &memblock.memory, &memory_rwx,
-			     NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
-		set_memory_rwnx((unsigned long)__va(base),
-				(end - base) >> PAGE_SHIFT);
+	if (!static_key_enabled(&cpu_has_bear))
+		set_memory_x(0, 1);
+	if (debug_pagealloc_enabled()) {
+		/*
+		 * Use RELOC_HIDE() as long as __va(0) translates to NULL,
+		 * since performing pointer arithmetic on a NULL pointer
+		 * has undefined behavior and generates compiler warnings.
+		 */
+		__set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
 	}
-
-#ifdef CONFIG_KASAN
-	for_each_mem_range(i, &base, &end)
-		set_memory_kasan(base, end);
-#endif
-	set_memory_rox((unsigned long)_stext,
-		       (unsigned long)(_etext - _stext) >> PAGE_SHIFT);
-	set_memory_ro((unsigned long)_etext,
-		      (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT);
-	set_memory_rox((unsigned long)_sinittext,
-		       (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT);
-	set_memory_rox(__stext_amode31,
-		       (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT);
-
-	/* lowcore must be executable for LPSWE */
-	if (static_key_enabled(&cpu_has_bear))
-		set_memory_nx(0, 1);
-	set_memory_nx(PAGE_SIZE, 1);
-	if (debug_pagealloc_enabled())
-		set_memory_4k(0, ident_map_size >> PAGE_SHIFT);
-
+	if (MACHINE_HAS_NX)
+		ctl_set_bit(0, 20);
 	pr_info("Write protected kernel read-only data: %luk\n",
 		(unsigned long)(__end_rodata - _stext) >> 10);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 5e9371fbf3d5..de2fb12120d2 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2088,6 +2088,7 @@ struct bpf_tramp_jit {
 				 */
 	int r14_off;		/* Offset of saved %r14 */
 	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
+	int tccnt_off;		/* Offset of saved tailcall counter */
 	int do_fexit;		/* do_fexit: label */
 };
 
@@ -2258,12 +2259,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	tjit->r14_off = alloc_stack(tjit, sizeof(u64));
 	tjit->run_ctx_off = alloc_stack(tjit,
 					sizeof(struct bpf_tramp_run_ctx));
+	tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
 	/* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
 	tjit->stack_size -= STACK_FRAME_OVERHEAD;
 	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
 
 	/* aghi %r15,-stack_size */
 	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+	/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+	_EMIT6(0xd203f000 | tjit->tccnt_off,
+	       0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
 	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
 	if (nr_reg_args)
 		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
@@ -2400,6 +2405,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 				       (nr_stack_args * sizeof(u64) - 1) << 16 |
 				       tjit->stack_args_off,
 			       0xf000 | tjit->orig_stack_args_off);
+		/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+		_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
 		/* lgr %r1,%r8 */
 		EMIT4(0xb9040000, REG_1, REG_8);
 		/* %r1() */
@@ -2456,6 +2463,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
 			      tjit->retval_off);
+	/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+	_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+	       0xf000 | tjit->tccnt_off);
 	/* aghi %r15,stack_size */
 	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
 	/* Emit an expoline for the following indirect jump. */
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 595ca0be286b..43b0ae4c2c21 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -2,6 +2,5 @@
 generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
 generic-y += agp.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
diff --git a/arch/sparc/include/asm/ide.h b/arch/sparc/include/asm/ide.h
deleted file mode 100644
index 904cc6cbc155..000000000000
--- a/arch/sparc/include/asm/ide.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ide.h: SPARC PCI specific IDE glue.
- *
- * Copyright (C) 1997  David S. Miller (davem@davemloft.net)
- * Copyright (C) 1998  Eddie C. Dost   (ecd@skynet.be)
- * Adaptation from sparc64 version to sparc by Pete Zaitcev.
- */
-
-#ifndef _SPARC_IDE_H
-#define _SPARC_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm/io.h>
-#ifdef CONFIG_SPARC64
-#include <asm/spitfire.h>
-#include <asm/cacheflush.h>
-#include <asm/page.h>
-#else
-#include <linux/pgtable.h>
-#include <asm/psr.h>
-#endif
-
-#define __ide_insl(data_reg, buffer, wcount) \
-	__ide_insw(data_reg, buffer, (wcount)<<1)
-#define __ide_outsl(data_reg, buffer, wcount) \
-	__ide_outsw(data_reg, buffer, (wcount)<<1)
-
-/* On sparc, I/O ports and MMIO registers are accessed identically.  */
-#define __ide_mm_insw	__ide_insw
-#define __ide_mm_insl	__ide_insl
-#define __ide_mm_outsw	__ide_outsw
-#define __ide_mm_outsl	__ide_outsl
-
-static inline void __ide_insw(void __iomem *port, void *dst, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-	unsigned long end = (unsigned long)dst + (count << 1);
-#endif
-	u16 *ps = dst;
-	u32 *pi;
-
-	if(((unsigned long)ps) & 0x2) {
-		*ps++ = __raw_readw(port);
-		count--;
-	}
-	pi = (u32 *)ps;
-	while(count >= 2) {
-		u32 w;
-
-		w  = __raw_readw(port) << 16;
-		w |= __raw_readw(port);
-		*pi++ = w;
-		count -= 2;
-	}
-	ps = (u16 *)pi;
-	if(count)
-		*ps++ = __raw_readw(port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-	__flush_dcache_range((unsigned long)dst, end);
-#endif
-}
-
-static inline void __ide_outsw(void __iomem *port, const void *src, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-	unsigned long end = (unsigned long)src + (count << 1);
-#endif
-	const u16 *ps = src;
-	const u32 *pi;
-
-	if(((unsigned long)src) & 0x2) {
-		__raw_writew(*ps++, port);
-		count--;
-	}
-	pi = (const u32 *)ps;
-	while(count >= 2) {
-		u32 w;
-
-		w = *pi++;
-		__raw_writew((w >> 16), port);
-		__raw_writew(w, port);
-		count -= 2;
-	}
-	ps = (const u16 *)pi;
-	if(count)
-		__raw_writew(*ps, port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-	__flush_dcache_range((unsigned long)src, end);
-#endif
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _SPARC_IDE_H */
diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index 8a0c3c11c9ce..587fb7841096 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -284,7 +284,7 @@ struct vio_dring_state {
 	struct ldc_trans_cookie	cookies[VIO_MAX_RING_COOKIES];
 };
 
-#define VIO_TAG_SIZE		((int)sizeof(struct vio_msg_tag))
+#define VIO_TAG_SIZE		(sizeof(struct vio_msg_tag))
 #define VIO_VCC_MTU_SIZE	(LDC_PACKET_SIZE - VIO_TAG_SIZE)
 
 struct vio_vcc {
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index a269ad2fe6df..a3fdee4cd6fa 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -8,6 +8,7 @@
  * Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/pgtable.h>
@@ -30,7 +31,6 @@
 #include <asm/unistd.h>
 
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #define curptr      g6
 
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index 6044b82b9767..964c61b5cd03 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -11,6 +11,7 @@
  * CompactPCI platform by Eric Brower, 1999.
  */
 
+#include <linux/export.h>
 #include <linux/version.h>
 #include <linux/init.h>
 
@@ -25,7 +26,6 @@
 #include <asm/thread_info.h>	/* TI_UWINMASK */
 #include <asm/errno.h>
 #include <asm/pgtable.h>	/* PGDIR_SHIFT */
-#include <asm/export.h>
 
 	.data
 /* The following are used with the prom_vector node-ops to figure out
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 72a5bdc833ea..cf0549134234 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -9,6 +9,7 @@
 
 #include <linux/version.h>
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
@@ -33,7 +34,6 @@
 #include <asm/estate.h>
 #include <asm/sfafsr.h>
 #include <asm/unistd.h>
-#include <asm/export.h>
 
 /* This section from from _start to sparc64_boot_end should fit into
  * 0x0000000000404000 to 0x0000000000408000.
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1ea3f37fa985..529adfecd58c 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -295,7 +295,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 
 #ifdef CONFIG_MAGIC_SYSRQ
 
-static void sysrq_handle_globreg(int key)
+static void sysrq_handle_globreg(u8 key)
 {
 	trigger_all_cpu_backtrace();
 }
@@ -370,7 +370,7 @@ static void pmu_snapshot_all_cpus(void)
 	spin_unlock_irqrestore(&global_cpu_snapshot_lock, flags);
 }
 
-static void sysrq_handle_globpmu(int key)
+static void sysrq_handle_globpmu(u8 key)
 {
 	pmu_snapshot_all_cpus();
 }
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
index a6f4ee391897..635398ec7540 100644
--- a/arch/sparc/lib/U1memcpy.S
+++ b/arch/sparc/lib/U1memcpy.S
@@ -6,10 +6,10 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 #define GLOBAL_SPARE	g7
 #else
 #define GLOBAL_SPARE	g5
diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S
index 9c8eb2017d5b..31a0c336c185 100644
--- a/arch/sparc/lib/VISsave.S
+++ b/arch/sparc/lib/VISsave.S
@@ -7,6 +7,7 @@
  * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 
 #include <asm/asi.h>
@@ -14,7 +15,6 @@
 #include <asm/ptrace.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
-#include <asm/export.h>
 
 	/* On entry: %o5=current FPRS value, %g7 is callers address */
 	/* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S
index 2d72de88af90..2a9e7c4fb260 100644
--- a/arch/sparc/lib/ashldi3.S
+++ b/arch/sparc/lib/ashldi3.S
@@ -6,8 +6,8 @@
  * Copyright (C) 1999 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 ENTRY(__ashldi3)
diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S
index 05dfda9f5005..8fd0b311722f 100644
--- a/arch/sparc/lib/ashrdi3.S
+++ b/arch/sparc/lib/ashrdi3.S
@@ -6,8 +6,8 @@
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 ENTRY(__ashrdi3)
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 8245d4a97301..4f8cab2fb9cd 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -4,10 +4,10 @@
  * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
-#include <asm/export.h>
 
 	.text
 
diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S
index 9d647f977618..9c91cbb310e7 100644
--- a/arch/sparc/lib/bitops.S
+++ b/arch/sparc/lib/bitops.S
@@ -4,10 +4,10 @@
  * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
-#include <asm/export.h>
 
 	.text
 
diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S
index 76ddd1ff6833..5b92959a4d48 100644
--- a/arch/sparc/lib/blockops.S
+++ b/arch/sparc/lib/blockops.S
@@ -5,9 +5,9 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 	/* Zero out 64 bytes of memory at (buf + offset).
 	 * Assumes %g1 contains zero.
diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S
index 87fec4cbe10c..2bfa44a6b25e 100644
--- a/arch/sparc/lib/bzero.S
+++ b/arch/sparc/lib/bzero.S
@@ -5,8 +5,8 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 781e39b3c009..84ad709cbecb 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -14,8 +14,8 @@
  *	BSD4.4 portable checksum routine
  */
 
+#include <linux/export.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 
 #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5)	\
 	ldd	[buf + offset + 0x00], t0;			\
diff --git a/arch/sparc/lib/checksum_64.S b/arch/sparc/lib/checksum_64.S
index 9700ef1730df..32b626f3fe4d 100644
--- a/arch/sparc/lib/checksum_64.S
+++ b/arch/sparc/lib/checksum_64.S
@@ -14,7 +14,7 @@
  *	BSD4.4 portable checksum routine
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 
 csum_partial_fix_alignment:
diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S
index 302d3454a994..e63458194f5a 100644
--- a/arch/sparc/lib/clear_page.S
+++ b/arch/sparc/lib/clear_page.S
@@ -5,13 +5,13 @@
  * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
-#include <asm/export.h>
 
 	/* What we used to do was lock a TLB entry into a specific
 	 * TLB slot, clear the page with interrupts disabled, then
diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
index 66e90bf528e2..e23e6a69ff92 100644
--- a/arch/sparc/lib/copy_in_user.S
+++ b/arch/sparc/lib/copy_in_user.S
@@ -4,9 +4,9 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 
 #define XCC xcc
 
diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S
index 5ebcfd479f4f..7a041f3ebc58 100644
--- a/arch/sparc/lib/copy_page.S
+++ b/arch/sparc/lib/copy_page.S
@@ -5,13 +5,13 @@
  * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
  */
 
+#include <linux/export.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <linux/pgtable.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
-#include <asm/export.h>
 
 	/* What we used to do was lock a TLB entry into a specific
 	 * TLB slot, clear the page with interrupts disabled, then
diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
index 954572c78539..7bb2ef68881d 100644
--- a/arch/sparc/lib/copy_user.S
+++ b/arch/sparc/lib/copy_user.S
@@ -12,11 +12,11 @@
  * Returns 0 if successful, otherwise count of bytes not copied yet
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
-#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
index d839956407a7..f968e83bc93b 100644
--- a/arch/sparc/lib/csum_copy.S
+++ b/arch/sparc/lib/csum_copy.S
@@ -4,7 +4,7 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 #ifdef __KERNEL__
 #define GLOBAL_SPARE	%g7
diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S
index a7389409d9fa..4ba901acd572 100644
--- a/arch/sparc/lib/divdi3.S
+++ b/arch/sparc/lib/divdi3.S
@@ -5,7 +5,7 @@ This file is part of GNU CC.
 
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 	.align 4
 	.globl __divdi3
diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S
index 5a11d864fa05..3a9ad8ffdfe8 100644
--- a/arch/sparc/lib/ffs.S
+++ b/arch/sparc/lib/ffs.S
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.register	%g2,#scratch
 
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
index 06b8d300bcae..ccf97fb7d8cd 100644
--- a/arch/sparc/lib/fls.S
+++ b/arch/sparc/lib/fls.S
@@ -5,8 +5,8 @@
  * and onward.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 	.register	%g2, #scratch
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
index c83e22ae9586..87005b67d378 100644
--- a/arch/sparc/lib/fls64.S
+++ b/arch/sparc/lib/fls64.S
@@ -5,8 +5,8 @@
  * and onward.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 	.register	%g2, #scratch
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
index 0ddbbb031822..eebee59b0655 100644
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 	.align	32
diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S
index 531d89c9d5d9..7fa8fd4b795a 100644
--- a/arch/sparc/lib/ipcsum.S
+++ b/arch/sparc/lib/ipcsum.S
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S
index 9a1289a3fb28..47a39f4384a2 100644
--- a/arch/sparc/lib/locks.S
+++ b/arch/sparc/lib/locks.S
@@ -7,11 +7,11 @@
  * Copyright (C) 1998 Jakub Jelinek   (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
 #include <asm/psr.h>
 #include <asm/smp.h>
 #include <asm/spinlock.h>
-#include <asm/export.h>
 
 	.text
 	.align	4
diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S
index 509ca6682da8..09bf581a0ba5 100644
--- a/arch/sparc/lib/lshrdi3.S
+++ b/arch/sparc/lib/lshrdi3.S
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 ENTRY(__lshrdi3)
 	cmp	%o2, 0
diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S
index deba6fa0bc78..f7f7910eb41e 100644
--- a/arch/sparc/lib/mcount.S
+++ b/arch/sparc/lib/mcount.S
@@ -6,8 +6,8 @@
  * This can also be tweaked for kernel stack overflow detection.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 /*
  * This is the main variant and is called by C code.  GCC's -pg option
diff --git a/arch/sparc/lib/memcmp.S b/arch/sparc/lib/memcmp.S
index a18076ef5af1..c87e8000feba 100644
--- a/arch/sparc/lib/memcmp.S
+++ b/arch/sparc/lib/memcmp.S
@@ -5,9 +5,9 @@
  * Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 
 	.text
 ENTRY(memcmp)
diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
index ee823d8c9215..57b1ae0f5924 100644
--- a/arch/sparc/lib/memcpy.S
+++ b/arch/sparc/lib/memcpy.S
@@ -8,7 +8,8 @@
  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
+
 #define FUNC(x) 		\
 	.globl	x;		\
 	.type	x,@function;	\
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
index 3132b6316144..543dda7b9dac 100644
--- a/arch/sparc/lib/memmove.S
+++ b/arch/sparc/lib/memmove.S
@@ -5,8 +5,8 @@
  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 ENTRY(memmove) /* o0=dst o1=src o2=len */
diff --git a/arch/sparc/lib/memscan_32.S b/arch/sparc/lib/memscan_32.S
index c4c2d5b3a2e9..5386a3a20019 100644
--- a/arch/sparc/lib/memscan_32.S
+++ b/arch/sparc/lib/memscan_32.S
@@ -5,7 +5,7 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* In essence, this is just a fancy strlen. */
 
diff --git a/arch/sparc/lib/memscan_64.S b/arch/sparc/lib/memscan_64.S
index 36dd638905c3..70a4f21057f2 100644
--- a/arch/sparc/lib/memscan_64.S
+++ b/arch/sparc/lib/memscan_64.S
@@ -6,7 +6,7 @@
  * Copyright (C) 1998 David S. Miller (davem@redhat.com)
  */
 
-	#include <asm/export.h>
+#include <linux/export.h>
 
 #define HI_MAGIC	0x8080808080808080
 #define LO_MAGIC	0x0101010101010101
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
index eaff68213fdf..a33419dbb464 100644
--- a/arch/sparc/lib/memset.S
+++ b/arch/sparc/lib/memset.S
@@ -9,8 +9,8 @@
  * clear_user.
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S
index 53054dee66d6..7e1e8cd30a22 100644
--- a/arch/sparc/lib/muldi3.S
+++ b/arch/sparc/lib/muldi3.S
@@ -5,7 +5,7 @@ This file is part of GNU CC.
 
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 	.text
 	.align 4
 	.globl __muldi3
diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S
index 2f187b299345..5bb4c122a2cf 100644
--- a/arch/sparc/lib/multi3.S
+++ b/arch/sparc/lib/multi3.S
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 	.align	4
diff --git a/arch/sparc/lib/strlen.S b/arch/sparc/lib/strlen.S
index dd111bbad5df..27478b3f1647 100644
--- a/arch/sparc/lib/strlen.S
+++ b/arch/sparc/lib/strlen.S
@@ -6,9 +6,9 @@
  * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 
 #define LO_MAGIC 0x01010101
 #define HI_MAGIC 0x80808080
diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S
index 794733f036b6..387bbf621548 100644
--- a/arch/sparc/lib/strncmp_32.S
+++ b/arch/sparc/lib/strncmp_32.S
@@ -4,8 +4,8 @@
  *            generic strncmp routine.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 	.text
 ENTRY(strncmp)
diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S
index 3d37d65f674c..76c1207ecf5a 100644
--- a/arch/sparc/lib/strncmp_64.S
+++ b/arch/sparc/lib/strncmp_64.S
@@ -5,9 +5,9 @@
  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 
 	.text
 ENTRY(strncmp)
diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S
index f6af7c7ee6fc..35461e3b2a9b 100644
--- a/arch/sparc/lib/xor.S
+++ b/arch/sparc/lib/xor.S
@@ -9,12 +9,12 @@
  * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #include <asm/dcu.h>
 #include <asm/spitfire.h>
-#include <asm/export.h>
 
 /*
  *	Requirements:
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 0d41c94ec3ac..b44d79d778c7 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -128,6 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
 			goto no_cache_flush;
 
 		/* A real file page? */
+		folio = page_folio(page);
 		mapping = folio_flush_mapping(folio);
 		if (!mapping)
 			goto no_cache_flush;
diff --git a/arch/um/Kbuild b/arch/um/Kbuild
index a4e40e534e6a..6cf0c1e5927b 100644
--- a/arch/um/Kbuild
+++ b/arch/um/Kbuild
@@ -1 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
+
+obj-y += kernel/ drivers/ os-Linux/
diff --git a/arch/um/Makefile b/arch/um/Makefile
index da4d5256af2f..82f05f250634 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -18,15 +18,10 @@ else
 endif
 
 ARCH_DIR := arch/um
-OS := $(shell uname -s)
 # We require bash because the vmlinux link and loader script cpp use bash
 # features.
 SHELL := /bin/bash
 
-core-y			+= $(ARCH_DIR)/kernel/		\
-			   $(ARCH_DIR)/drivers/		\
-			   $(ARCH_DIR)/os-$(OS)/
-
 MODE_INCLUDE	+= -I$(srctree)/$(ARCH_DIR)/include/shared/skas
 
 HEADER_ARCH 	:= $(SUBARCH)
@@ -78,7 +73,7 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
 		-idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__
 
 #This will adjust *FLAGS accordingly to the platform.
-include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
+include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
 		   -I$(srctree)/$(HOST_DIR)/include/uapi \
@@ -155,4 +150,4 @@ archclean:
 	@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
 		-o -name '*.gcov' \) -type f -print | xargs rm -f
 
-export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS DEV_NULL_PATH
+export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING DEV_NULL_PATH
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
index 630be793759e..e543cbac8792 100644
--- a/arch/um/configs/i386_defconfig
+++ b/arch/um/configs/i386_defconfig
@@ -34,6 +34,7 @@ CONFIG_TTY_CHAN=y
 CONFIG_XTERM_CHAN=y
 CONFIG_CON_CHAN="pts"
 CONFIG_SSL_CHAN="pts"
+CONFIG_SOUND=m
 CONFIG_UML_SOUND=m
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
index 8540d3370272..939cb12318ca 100644
--- a/arch/um/configs/x86_64_defconfig
+++ b/arch/um/configs/x86_64_defconfig
@@ -32,6 +32,7 @@ CONFIG_TTY_CHAN=y
 CONFIG_XTERM_CHAN=y
 CONFIG_CON_CHAN="pts"
 CONFIG_SSL_CHAN="pts"
+CONFIG_SOUND=m
 CONFIG_UML_SOUND=m
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 36911b1fddcf..b94b2618e7d8 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -111,24 +111,14 @@ config SSL_CHAN
 
 config UML_SOUND
 	tristate "Sound support"
+	depends on SOUND
+	select SOUND_OSS_CORE
 	help
 	  This option enables UML sound support.  If enabled, it will pull in
-	  soundcore and the UML hostaudio relay, which acts as a intermediary
+	  the UML hostaudio relay, which acts as a intermediary
 	  between the host's dsp and mixer devices and the UML sound system.
 	  It is safe to say 'Y' here.
 
-config SOUND
-	tristate
-	default UML_SOUND
-
-config SOUND_OSS_CORE
-	bool
-	default UML_SOUND
-
-config HOSTAUDIO
-	tristate
-	default UML_SOUND
-
 endmenu
 
 menu "UML Network Devices"
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index a461a950f051..0e6af81096fd 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -54,7 +54,7 @@ obj-$(CONFIG_UML_NET) += net.o
 obj-$(CONFIG_MCONSOLE) += mconsole.o
 obj-$(CONFIG_MMAPPER) += mmapper_kern.o 
 obj-$(CONFIG_BLK_DEV_UBD) += ubd.o 
-obj-$(CONFIG_HOSTAUDIO) += hostaudio.o
+obj-$(CONFIG_UML_SOUND) += hostaudio.o
 obj-$(CONFIG_NULL_CHAN) += null.o 
 obj-$(CONFIG_PORT_CHAN) += port.o
 obj-$(CONFIG_PTY_CHAN) += pty.o
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 5b064d360cb7..c42b793bce65 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -310,7 +310,7 @@ static const struct file_operations hostmixer_fops = {
 	.release        = hostmixer_release,
 };
 
-struct {
+static struct {
 	int dev_audio;
 	int dev_mixer;
 } module_data;
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 02b0befd6763..b98545f3edb5 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -184,7 +184,7 @@ void line_flush_chars(struct tty_struct *tty)
 	line_flush_buffer(tty);
 }
 
-int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
+ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len)
 {
 	struct line *line = tty->driver_data;
 	unsigned long flags;
diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
index f15be75a3bf3..e84fb9b4165e 100644
--- a/arch/um/drivers/line.h
+++ b/arch/um/drivers/line.h
@@ -64,8 +64,7 @@ extern void line_cleanup(struct tty_struct *tty);
 extern void line_hangup(struct tty_struct *tty);
 extern int line_setup(char **conf, unsigned nlines, char **def,
 		      char *init, char *name);
-extern int line_write(struct tty_struct *tty, const unsigned char *buf,
-		      int len);
+extern ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len);
 extern unsigned int line_chars_in_buffer(struct tty_struct *tty);
 extern void line_flush_buffer(struct tty_struct *tty);
 extern void line_flush_chars(struct tty_struct *tty);
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index efa8b7304090..c52b3ff3c092 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -144,7 +144,7 @@ static void port_work_proc(struct work_struct *unused)
 	local_irq_restore(flags);
 }
 
-DECLARE_WORK(port_work, port_work_proc);
+static DECLARE_WORK(port_work, port_work_proc);
 
 static irqreturn_t port_interrupt(int irq, void *data)
 {
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index 2d9769237f08..0a6151ee9572 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -15,7 +15,7 @@ struct slirp_init {
 	struct arg_list_dummy_wrapper argw;  /* XXX should be simpler... */
 };
 
-void slirp_init(struct net_device *dev, void *data)
+static void slirp_init(struct net_device *dev, void *data)
 {
 	struct uml_net_private *private;
 	struct slirp_data *spri;
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index 7699ca5f35d4..ffe2ee8a0246 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -544,6 +544,7 @@ static void um_pci_irq_vq_cb(struct virtqueue *vq)
 	}
 }
 
+#ifdef CONFIG_OF
 /* Copied from arch/x86/kernel/devicetree.c */
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
 {
@@ -562,6 +563,7 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
 	}
 	return NULL;
 }
+#endif
 
 static int um_pci_init_vqs(struct um_pci_device *dev)
 {
diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
index 50f11b7b4774..8011e51993d5 100644
--- a/arch/um/drivers/xterm_kern.c
+++ b/arch/um/drivers/xterm_kern.c
@@ -9,6 +9,7 @@
 #include <asm/irq.h>
 #include <irq_kern.h>
 #include <os.h>
+#include "xterm.h"
 
 struct xterm_wait {
 	struct completion ready;
diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h
index f2dc817abb7c..44357fa6ee29 100644
--- a/arch/um/include/shared/irq_kern.h
+++ b/arch/um/include/shared/irq_kern.h
@@ -76,4 +76,5 @@ static inline bool um_irq_timetravel_handler_used(void)
 }
 
 void um_free_irq(int irq, void *dev_id);
+void free_irqs(void);
 #endif
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index a8873d9bc28b..635d44606bfe 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -23,8 +23,6 @@
 #include <linux/time-internal.h>
 
 
-extern void free_irqs(void);
-
 /* When epoll triggers we do not know why it did so
  * we can also have different IRQs for read and write.
  * This is why we keep a small irq_reg array for each fd -
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 95315d3474a2..5bfe5caaa444 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -335,9 +335,5 @@ define archhelp
   echo  '			  bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  '			  FDARGS="..."  arguments for the booted kernel'
   echo  '			  FDINITRD=file initrd for the booted kernel'
-  echo  ''
-  echo  '  kvm_guest.config	- Enable Kconfig items for running this kernel as a KVM guest'
-  echo  '  xen.config		- Enable Kconfig items for running this kernel as a Xen guest'
-  echo  '  x86_debug.config	- Enable tip tree debugging options for testing'
 
 endef
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 187e13b15e9a..97bfe5f0531f 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -175,8 +175,11 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
 	    (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask)))
 		return true;
 
-	if (!hv_hypercall_pg)
-		return false;
+	/* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */
+	if (!hv_hypercall_pg) {
+		if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx())
+			return false;
+	}
 
 	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
 		return false;
@@ -229,9 +232,15 @@ static bool __send_ipi_one(int cpu, int vector)
 
 	trace_hyperv_send_ipi_one(cpu, vector);
 
-	if (!hv_hypercall_pg || (vp == VP_INVAL))
+	if (vp == VP_INVAL)
 		return false;
 
+	/* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */
+	if (!hv_hypercall_pg) {
+		if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx())
+			return false;
+	}
+
 	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
 		return false;
 
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 507d98331e7c..783ed339f341 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -19,6 +19,7 @@
 #include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
 #include <asm/idtentry.h>
+#include <asm/set_memory.h>
 #include <linux/kexec.h>
 #include <linux/version.h>
 #include <linux/vmalloc.h>
@@ -52,7 +53,7 @@ static int hyperv_init_ghcb(void)
 	void *ghcb_va;
 	void **ghcb_base;
 
-	if (!hv_isolation_type_snp())
+	if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp())
 		return 0;
 
 	if (!hv_ghcb_pg)
@@ -80,7 +81,7 @@ static int hyperv_init_ghcb(void)
 static int hv_cpu_init(unsigned int cpu)
 {
 	union hv_vp_assist_msr_contents msr = { 0 };
-	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
+	struct hv_vp_assist_page **hvp;
 	int ret;
 
 	ret = hv_common_cpu_init(cpu);
@@ -90,6 +91,7 @@ static int hv_cpu_init(unsigned int cpu)
 	if (!hv_vp_assist_page)
 		return 0;
 
+	hvp = &hv_vp_assist_page[cpu];
 	if (hv_root_partition) {
 		/*
 		 * For root partition we get the hypervisor provided VP assist
@@ -107,8 +109,21 @@ static int hv_cpu_init(unsigned int cpu)
 		 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
 		 * case of CPU offlining and the VM will hang.
 		 */
-		if (!*hvp)
+		if (!*hvp) {
 			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
+
+			/*
+			 * Hyper-V should never specify a VM that is a Confidential
+			 * VM and also running in the root partition. Root partition
+			 * is blocked to run in Confidential VM. So only decrypt assist
+			 * page in non-root partition here.
+			 */
+			if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
+				WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1));
+				memset(*hvp, 0, PAGE_SIZE);
+			}
+		}
+
 		if (*hvp)
 			msr.pfn = vmalloc_to_pfn(*hvp);
 
@@ -379,6 +394,36 @@ static void __init hv_get_partition_id(void)
 	local_irq_restore(flags);
 }
 
+static u8 __init get_vtl(void)
+{
+	u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS;
+	struct hv_get_vp_registers_input *input;
+	struct hv_get_vp_registers_output *output;
+	unsigned long flags;
+	u64 ret;
+
+	local_irq_save(flags);
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	output = (struct hv_get_vp_registers_output *)input;
+
+	memset(input, 0, struct_size(input, element, 1));
+	input->header.partitionid = HV_PARTITION_ID_SELF;
+	input->header.vpindex = HV_VP_INDEX_SELF;
+	input->header.inputvtl = 0;
+	input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS;
+
+	ret = hv_do_hypercall(control, input, output);
+	if (hv_result_success(ret)) {
+		ret = output->as64.low & HV_X64_VTL_MASK;
+	} else {
+		pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret);
+		ret = 0;
+	}
+
+	local_irq_restore(flags);
+	return ret;
+}
+
 /*
  * This function is to be invoked early in the boot sequence after the
  * hypervisor has been detected.
@@ -399,14 +444,24 @@ void __init hyperv_init(void)
 	if (hv_common_init())
 		return;
 
-	hv_vp_assist_page = kcalloc(num_possible_cpus(),
-				    sizeof(*hv_vp_assist_page), GFP_KERNEL);
+	/*
+	 * The VP assist page is useless to a TDX guest: the only use we
+	 * would have for it is lazy EOI, which can not be used with TDX.
+	 */
+	if (hv_isolation_type_tdx())
+		hv_vp_assist_page = NULL;
+	else
+		hv_vp_assist_page = kcalloc(num_possible_cpus(),
+					    sizeof(*hv_vp_assist_page),
+					    GFP_KERNEL);
 	if (!hv_vp_assist_page) {
 		ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
-		goto common_free;
+
+		if (!hv_isolation_type_tdx())
+			goto common_free;
 	}
 
-	if (hv_isolation_type_snp()) {
+	if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
 		/* Negotiate GHCB Version. */
 		if (!hv_ghcb_negotiate_protocol())
 			hv_ghcb_terminate(SEV_TERM_SET_GEN,
@@ -426,12 +481,32 @@ void __init hyperv_init(void)
 	 * Setup the hypercall page and enable hypercalls.
 	 * 1. Register the guest ID
 	 * 2. Enable the hypercall and register the hypercall page
+	 *
+	 * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg:
+	 * when the hypercall input is a page, such a VM must pass a decrypted
+	 * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page
+	 * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present.
+	 *
+	 * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls,
+	 * which are handled by the paravisor and the VM must use an encrypted
+	 * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and
+	 * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and
+	 * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls:
+	 * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8().
+	 * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e.
+	 * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg;
+	 * instead, hv_post_message() uses the post_msg_page, which is decrypted
+	 * in such a VM and is only used in such a VM.
 	 */
 	guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
 
-	/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
-	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
+	/* With the paravisor, the VM must also write the ID via GHCB/GHCI */
+	hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
+
+	/* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */
+	if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present)
+		goto skip_hypercall_pg_init;
 
 	hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
 			VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
@@ -472,6 +547,7 @@ void __init hyperv_init(void)
 		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 	}
 
+skip_hypercall_pg_init:
 	/*
 	 * Some versions of Hyper-V that provide IBT in guest VMs have a bug
 	 * in that there's no ENDBR64 instruction at the entry to the
@@ -527,11 +603,15 @@ void __init hyperv_init(void)
 	/* Query the VMs extended capability once, so that it can be cached. */
 	hv_query_ext_cap(0);
 
+	/* Find the VTL */
+	if (!ms_hyperv.paravisor_present && hv_isolation_type_snp())
+		ms_hyperv.vtl = get_vtl();
+
 	return;
 
 clean_guest_os_id:
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
-	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
+	hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
 	cpuhp_remove_state(cpuhp);
 free_ghcb_page:
 	free_percpu(hv_ghcb_pg);
@@ -552,7 +632,7 @@ void hyperv_cleanup(void)
 
 	/* Reset our OS id */
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
-	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
+	hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
 
 	/*
 	 * Reset hypercall page reference before reset the page,
@@ -615,6 +695,9 @@ bool hv_is_hyperv_initialized(void)
 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
 		return false;
 
+	/* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */
+	if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present)
+		return true;
 	/*
 	 * Verify that earlier initialization succeeded by checking
 	 * that the hypercall page is setup
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 28be6df88063..8c6bf07f7d2b 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -18,6 +18,11 @@
 #include <asm/mshyperv.h>
 #include <asm/hypervisor.h>
 #include <asm/mtrr.h>
+#include <asm/io_apic.h>
+#include <asm/realmode.h>
+#include <asm/e820/api.h>
+#include <asm/desc.h>
+#include <uapi/asm/vmx.h>
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
@@ -56,8 +61,10 @@ union hv_ghcb {
 	} hypercall;
 } __packed __aligned(HV_HYP_PAGE_SIZE);
 
+/* Only used in an SNP VM with the paravisor */
 static u16 hv_ghcb_version __ro_after_init;
 
+/* Functions only used in an SNP VM with the paravisor go here. */
 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
 {
 	union hv_ghcb *hv_ghcb;
@@ -175,7 +182,7 @@ bool hv_ghcb_negotiate_protocol(void)
 	return true;
 }
 
-void hv_ghcb_msr_write(u64 msr, u64 value)
+static void hv_ghcb_msr_write(u64 msr, u64 value)
 {
 	union hv_ghcb *hv_ghcb;
 	void **ghcb_base;
@@ -203,9 +210,8 @@ void hv_ghcb_msr_write(u64 msr, u64 value)
 
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(hv_ghcb_msr_write);
 
-void hv_ghcb_msr_read(u64 msr, u64 *value)
+static void hv_ghcb_msr_read(u64 msr, u64 *value)
 {
 	union hv_ghcb *hv_ghcb;
 	void **ghcb_base;
@@ -235,7 +241,217 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
 			| ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
+
+/* Only used in a fully enlightened SNP VM, i.e. without the paravisor */
+static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE);
+static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa);
+
+/* Functions only used in an SNP VM without the paravisor go here. */
+
+#define hv_populate_vmcb_seg(seg, gdtr_base)			\
+do {								\
+	if (seg.selector) {					\
+		seg.base = 0;					\
+		seg.limit = HV_AP_SEGMENT_LIMIT;		\
+		seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5);	\
+		seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \
+	}							\
+} while (0)							\
+
+static int snp_set_vmsa(void *va, bool vmsa)
+{
+	u64 attrs;
+
+	/*
+	 * Running at VMPL0 allows the kernel to change the VMSA bit for a page
+	 * using the RMPADJUST instruction. However, for the instruction to
+	 * succeed it must target the permissions of a lesser privileged
+	 * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST
+	 * instruction in the AMD64 APM Volume 3).
+	 */
+	attrs = 1;
+	if (vmsa)
+		attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+	return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+}
+
+static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
+{
+	int err;
+
+	err = snp_set_vmsa(vmsa, false);
+	if (err)
+		pr_err("clear VMSA page failed (%u), leaking page\n", err);
+	else
+		free_page((unsigned long)vmsa);
+}
+
+int hv_snp_boot_ap(int cpu, unsigned long start_ip)
+{
+	struct sev_es_save_area *vmsa = (struct sev_es_save_area *)
+		__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	struct sev_es_save_area *cur_vmsa;
+	struct desc_ptr gdtr;
+	u64 ret, retry = 5;
+	struct hv_enable_vp_vtl *start_vp_input;
+	unsigned long flags;
+
+	if (!vmsa)
+		return -ENOMEM;
+
+	native_store_gdt(&gdtr);
+
+	vmsa->gdtr.base = gdtr.address;
+	vmsa->gdtr.limit = gdtr.size;
+
+	asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
+	hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
+
+	asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
+	hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
+
+	asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
+	hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
+
+	asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
+	hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
+
+	vmsa->efer = native_read_msr(MSR_EFER);
+
+	asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4));
+	asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3));
+	asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0));
+
+	vmsa->xcr0 = 1;
+	vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT;
+	vmsa->rip = (u64)secondary_startup_64_no_verify;
+	vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE];
+
+	/*
+	 * Set the SNP-specific fields for this VMSA:
+	 *   VMPL level
+	 *   SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
+	 */
+	vmsa->vmpl = 0;
+	vmsa->sev_features = sev_status >> 2;
+
+	ret = snp_set_vmsa(vmsa, true);
+	if (!ret) {
+		pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
+		free_page((u64)vmsa);
+		return ret;
+	}
+
+	local_irq_save(flags);
+	start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg;
+	memset(start_vp_input, 0, sizeof(*start_vp_input));
+	start_vp_input->partition_id = -1;
+	start_vp_input->vp_index = cpu;
+	start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl;
+	*(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1;
+
+	do {
+		ret = hv_do_hypercall(HVCALL_START_VP,
+				      start_vp_input, NULL);
+	} while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--);
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(ret)) {
+		pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret);
+		snp_cleanup_vmsa(vmsa);
+		vmsa = NULL;
+	}
+
+	cur_vmsa = per_cpu(hv_sev_vmsa, cpu);
+	/* Free up any previous VMSA page */
+	if (cur_vmsa)
+		snp_cleanup_vmsa(cur_vmsa);
+
+	/* Record the current VMSA page */
+	per_cpu(hv_sev_vmsa, cpu) = vmsa;
+
+	return ret;
+}
+
+#else
+static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
+static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+static void hv_tdx_msr_write(u64 msr, u64 val)
+{
+	struct tdx_hypercall_args args = {
+		.r10 = TDX_HYPERCALL_STANDARD,
+		.r11 = EXIT_REASON_MSR_WRITE,
+		.r12 = msr,
+		.r13 = val,
+	};
+
+	u64 ret = __tdx_hypercall(&args);
+
+	WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret);
+}
+
+static void hv_tdx_msr_read(u64 msr, u64 *val)
+{
+	struct tdx_hypercall_args args = {
+		.r10 = TDX_HYPERCALL_STANDARD,
+		.r11 = EXIT_REASON_MSR_READ,
+		.r12 = msr,
+	};
+
+	u64 ret = __tdx_hypercall_ret(&args);
+
+	if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret))
+		*val = 0;
+	else
+		*val = args.r11;
+}
+
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
+{
+	struct tdx_hypercall_args args = { };
+
+	args.r10 = control;
+	args.rdx = param1;
+	args.r8  = param2;
+
+	(void)__tdx_hypercall_ret(&args);
+
+	return args.r11;
+}
+
+#else
+static inline void hv_tdx_msr_write(u64 msr, u64 value) {}
+static inline void hv_tdx_msr_read(u64 msr, u64 *value) {}
+#endif /* CONFIG_INTEL_TDX_GUEST */
+
+#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
+void hv_ivm_msr_write(u64 msr, u64 value)
+{
+	if (!ms_hyperv.paravisor_present)
+		return;
+
+	if (hv_isolation_type_tdx())
+		hv_tdx_msr_write(msr, value);
+	else if (hv_isolation_type_snp())
+		hv_ghcb_msr_write(msr, value);
+}
+
+void hv_ivm_msr_read(u64 msr, u64 *value)
+{
+	if (!ms_hyperv.paravisor_present)
+		return;
+
+	if (hv_isolation_type_tdx())
+		hv_tdx_msr_read(msr, value);
+	else if (hv_isolation_type_snp())
+		hv_ghcb_msr_read(msr, value);
+}
 
 /*
  * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
@@ -358,13 +574,34 @@ static bool hv_is_private_mmio(u64 addr)
 
 void __init hv_vtom_init(void)
 {
+	enum hv_isolation_type type = hv_get_isolation_type();
+
+	switch (type) {
+	case HV_ISOLATION_TYPE_VBS:
+		fallthrough;
 	/*
 	 * By design, a VM using vTOM doesn't see the SEV setting,
 	 * so SEV initialization is bypassed and sev_status isn't set.
 	 * Set it here to indicate a vTOM VM.
+	 *
+	 * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is
+	 * defined as 0ULL, to which we can't assigned a value.
 	 */
-	sev_status = MSR_AMD64_SNP_VTOM;
-	cc_vendor = CC_VENDOR_AMD;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	case HV_ISOLATION_TYPE_SNP:
+		sev_status = MSR_AMD64_SNP_VTOM;
+		cc_vendor = CC_VENDOR_AMD;
+		break;
+#endif
+
+	case HV_ISOLATION_TYPE_TDX:
+		cc_vendor = CC_VENDOR_INTEL;
+		break;
+
+	default:
+		panic("hv_vtom_init: unsupported isolation type %d\n", type);
+	}
+
 	cc_set_mask(ms_hyperv.shared_gpa_boundary);
 	physical_mask &= ms_hyperv.shared_gpa_boundary - 1;
 
@@ -377,7 +614,7 @@ void __init hv_vtom_init(void)
 	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
 }
 
-#endif /* CONFIG_AMD_MEM_ENCRYPT */
+#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */
 
 enum hv_isolation_type hv_get_isolation_type(void)
 {
@@ -405,10 +642,20 @@ bool hv_is_isolation_supported(void)
 DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
 
 /*
- * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based
+ * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based
  * isolation VM.
  */
 bool hv_isolation_type_snp(void)
 {
 	return static_branch_unlikely(&isolation_type_snp);
 }
+
+DEFINE_STATIC_KEY_FALSE(isolation_type_tdx);
+/*
+ * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based
+ * isolated VM.
+ */
+bool hv_isolation_type_tdx(void)
+{
+	return static_branch_unlikely(&isolation_type_tdx);
+}
diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h
index 36aec57ea7a3..fa918f01333e 100644
--- a/arch/x86/include/asm/audit.h
+++ b/arch/x86/include/asm/audit.h
@@ -4,4 +4,11 @@
 
 int ia32_classify_syscall(unsigned int syscall);
 
+extern unsigned ia32_dir_class[];
+extern unsigned ia32_write_class[];
+extern unsigned ia32_read_class[];
+extern unsigned ia32_chattr_class[];
+extern unsigned ia32_signal_class[];
+
+
 #endif /* _ASM_X86_AUDIT_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2061ed1c398f..58cb9495e40f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -439,6 +439,7 @@
 #define X86_FEATURE_SEV_ES		(19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
 #define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* "" Virtual TSC_AUX */
 #define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP		(19*32+14) /* AMD SEV-ES full debug state swap support */
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP	(20*32+ 0) /* "" No Nested Data Breakpoints */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index cea95dcd27c2..2ff26f53cd62 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -169,7 +169,8 @@
 enum hv_isolation_type {
 	HV_ISOLATION_TYPE_NONE	= 0,
 	HV_ISOLATION_TYPE_VBS	= 1,
-	HV_ISOLATION_TYPE_SNP	= 2
+	HV_ISOLATION_TYPE_SNP	= 2,
+	HV_ISOLATION_TYPE_TDX	= 3
 };
 
 /* Hyper-V specific model specific registers (MSRs) */
@@ -301,6 +302,13 @@ enum hv_isolation_type {
 #define HV_X64_MSR_TIME_REF_COUNT	HV_REGISTER_TIME_REF_COUNT
 #define HV_X64_MSR_REFERENCE_TSC	HV_REGISTER_REFERENCE_TSC
 
+/*
+ * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and
+ * there is not associated MSR address.
+ */
+#define	HV_X64_REGISTER_VSM_VP_STATUS	0x000D0003
+#define	HV_X64_VTL_MASK			GENMASK(3, 0)
+
 /* Hyper-V memory host visibility */
 enum hv_mem_host_visibility {
 	VMBUS_PAGE_NOT_VISIBLE		= 0,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 3be6a98751f0..c9f6a6c5de3c 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
 #endif
 #endif
 
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
 extern void kdump_nmi_shootdown_cpus(void);
 
 #ifdef CONFIG_CRASH_HOTPLUG
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3bc146dfd38d..1a4def36d5bb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry;
  * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
  * also includes TDP pages) to determine whether or not a page can be used in
  * the given MMU context.  This is a subset of the overall kvm_cpu_role to
- * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
- * 2 bytes per gfn instead of 4 bytes per gfn.
+ * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
+ * allocating 2 bytes per gfn instead of 4 bytes per gfn.
  *
  * Upper-level shadow pages having gptes are tracked for write-protection via
- * gfn_track.  As above, gfn_track is a 16 bit counter, so KVM must not create
- * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
- * gfn_track will overflow and explosions will ensure.
+ * gfn_write_track.  As above, gfn_write_track is a 16 bit counter, so KVM must
+ * not create more than 2^16-1 upper-level shadow pages at a single gfn,
+ * otherwise gfn_write_track will overflow and explosions will ensue.
  *
  * A unique shadow page (SP) for a gfn is created if and only if an existing SP
  * cannot be reused.  The ability to reuse a SP is tracked by its role, which
@@ -746,7 +746,6 @@ struct kvm_vcpu_arch {
 	u64 smi_count;
 	bool at_instruction_boundary;
 	bool tpr_access_reporting;
-	bool xsaves_enabled;
 	bool xfd_no_write_intercept;
 	u64 ia32_xss;
 	u64 microcode_version;
@@ -831,6 +830,25 @@ struct kvm_vcpu_arch {
 	struct kvm_cpuid_entry2 *cpuid_entries;
 	struct kvm_hypervisor_cpuid kvm_cpuid;
 
+	/*
+	 * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
+	 * when "struct kvm_vcpu_arch" is no longer defined in an
+	 * arch/x86/include/asm header.  The max is mostly arbitrary, i.e.
+	 * can be increased as necessary.
+	 */
+#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
+
+	/*
+	 * Track whether or not the guest is allowed to use features that are
+	 * governed by KVM, where "governed" means KVM needs to manage state
+	 * and/or explicitly enable the feature in hardware.  Typically, but
+	 * not always, governed features can be used by the guest if and only
+	 * if both KVM and userspace want to expose the feature to the guest.
+	 */
+	struct {
+		DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
+	} governed_features;
+
 	u64 reserved_gpa_bits;
 	int maxphyaddr;
 
@@ -1005,7 +1023,7 @@ struct kvm_lpage_info {
 struct kvm_arch_memory_slot {
 	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
-	unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+	unsigned short *gfn_write_track;
 };
 
 /*
@@ -1247,8 +1265,9 @@ struct kvm_arch {
 	 * create an NX huge page (without hanging the guest).
 	 */
 	struct list_head possible_nx_huge_pages;
-	struct kvm_page_track_notifier_node mmu_sp_tracker;
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 	struct kvm_page_track_notifier_head track_notifier_head;
+#endif
 	/*
 	 * Protects marking pages unsync during page faults, as TDP MMU page
 	 * faults only take mmu_lock for read.  For simplicity, the unsync
@@ -1655,8 +1674,8 @@ struct kvm_x86_ops {
 
 	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
 	u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
-	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
-	void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
+	void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+	void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
 
 	/*
 	 * Retrieve somewhat arbitrary exit information.  Intended to
@@ -1795,8 +1814,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
 #define __KVM_HAVE_ARCH_VM_FREE
 void kvm_arch_free_vm(struct kvm *kvm);
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
 	if (kvm_x86_ops.flush_remote_tlbs &&
 	    !static_call(kvm_x86_flush_remote_tlbs)(kvm))
@@ -1805,6 +1824,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 		return -ENOTSUPP;
 }
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 #define kvm_arch_pmi_in_guest(vcpu) \
 	((vcpu) && (vcpu)->arch.handling_intr_from_guest)
 
@@ -1833,7 +1854,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot);
-void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
 
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index eb186bc57f6a..3d040741044b 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -2,11 +2,9 @@
 #ifndef _ASM_X86_KVM_PAGE_TRACK_H
 #define _ASM_X86_KVM_PAGE_TRACK_H
 
-enum kvm_page_track_mode {
-	KVM_PAGE_TRACK_WRITE,
-	KVM_PAGE_TRACK_MAX,
-};
+#include <linux/kvm_types.h>
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 /*
  * The notifier represented by @kvm_page_track_notifier_node is linked into
  * the head which will be notified when guest is triggering the track event.
@@ -26,54 +24,39 @@ struct kvm_page_track_notifier_node {
 	 * It is called when guest is writing the write-tracked page
 	 * and write emulation is finished at that time.
 	 *
-	 * @vcpu: the vcpu where the write access happened.
 	 * @gpa: the physical address written by guest.
 	 * @new: the data was written to the address.
 	 * @bytes: the written length.
 	 * @node: this node
 	 */
-	void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-			    int bytes, struct kvm_page_track_notifier_node *node);
+	void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
+			    struct kvm_page_track_notifier_node *node);
+
 	/*
-	 * It is called when memory slot is being moved or removed
-	 * users can drop write-protection for the pages in that memory slot
+	 * Invoked when a memory region is removed from the guest.  Or in KVM
+	 * terms, when a memslot is deleted.
 	 *
-	 * @kvm: the kvm where memory slot being moved or removed
-	 * @slot: the memory slot being moved or removed
-	 * @node: this node
+	 * @gfn:       base gfn of the region being removed
+	 * @nr_pages:  number of pages in the to-be-removed region
+	 * @node:      this node
 	 */
-	void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    struct kvm_page_track_notifier_node *node);
+	void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+				    struct kvm_page_track_notifier_node *node);
 };
 
-int kvm_page_track_init(struct kvm *kvm);
-void kvm_page_track_cleanup(struct kvm *kvm);
+int kvm_page_track_register_notifier(struct kvm *kvm,
+				     struct kvm_page_track_notifier_node *n);
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+					struct kvm_page_track_notifier_node *n);
 
-bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
-int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
-int kvm_page_track_create_memslot(struct kvm *kvm,
-				  struct kvm_memory_slot *slot,
-				  unsigned long npages);
-
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-				  struct kvm_memory_slot *slot, gfn_t gfn,
-				  enum kvm_page_track_mode mode);
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-				     struct kvm_memory_slot *slot, gfn_t gfn,
-				     enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
-				   const struct kvm_memory_slot *slot,
-				   gfn_t gfn, enum kvm_page_track_mode mode);
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
+#else
+/*
+ * Allow defining a node in a structure even if page tracking is disabled, e.g.
+ * to play nice with testing headers via direct inclusion from the command line.
+ */
+struct kvm_page_track_notifier_node {};
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
 
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-				 struct kvm_page_track_notifier_node *n);
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-				   struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-			  int bytes);
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fa83d88e4c99..033b53f993c6 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -26,6 +26,7 @@
 union hv_ghcb;
 
 DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
+DECLARE_STATIC_KEY_FALSE(isolation_type_tdx);
 
 typedef int (*hyperv_fill_flush_list_func)(
 		struct hv_guest_mapping_flush_list *flush,
@@ -40,6 +41,7 @@ static inline unsigned char hv_get_nmi_reason(void)
 
 #if IS_ENABLED(CONFIG_HYPERV)
 extern int hyperv_init_cpuhp;
+extern bool hyperv_paravisor_present;
 
 extern void *hv_hypercall_pg;
 
@@ -47,10 +49,25 @@ extern u64 hv_current_partition_id;
 
 extern union hv_ghcb * __percpu *hv_ghcb_pg;
 
+bool hv_isolation_type_snp(void);
+bool hv_isolation_type_tdx(void);
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+/*
+ * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
+ * to start AP in enlightened SEV guest.
+ */
+#define HV_AP_INIT_GPAT_DEFAULT		0x0007040600070406ULL
+#define HV_AP_SEGMENT_LIMIT		0xffffffff
+
 int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
 int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
 
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
 	u64 input_address = input ? virt_to_phys(input) : 0;
@@ -58,6 +75,19 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
+	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
+		return hv_tdx_hypercall(control, input_address, output_address);
+
+	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
+		__asm__ __volatile__("mov %4, %%r8\n"
+				     "vmmcall"
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+				       "+c" (control), "+d" (input_address)
+				     :  "r" (output_address)
+				     : "cc", "memory", "r8", "r9", "r10", "r11");
+		return hv_status;
+	}
+
 	if (!hv_hypercall_pg)
 		return U64_MAX;
 
@@ -101,7 +131,16 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
-	{
+	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
+		return hv_tdx_hypercall(control, input1, 0);
+
+	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
+		__asm__ __volatile__(
+				"vmmcall"
+				: "=a" (hv_status), ASM_CALL_CONSTRAINT,
+				"+c" (control), "+d" (input1)
+				:: "cc", "r8", "r9", "r10", "r11");
+	} else {
 		__asm__ __volatile__(CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
@@ -146,7 +185,17 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
-	{
+	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
+		return hv_tdx_hypercall(control, input1, input2);
+
+	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
+		__asm__ __volatile__("mov %4, %%r8\n"
+				     "vmmcall"
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+				       "+c" (control), "+d" (input1)
+				     : "r" (input2)
+				     : "cc", "r8", "r9", "r10", "r11");
+	} else {
 		__asm__ __volatile__("mov %4, %%r8\n"
 				     CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -225,20 +274,24 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
-void hv_ghcb_msr_write(u64 msr, u64 value);
-void hv_ghcb_msr_read(u64 msr, u64 *value);
 bool hv_ghcb_negotiate_protocol(void);
 void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
-void hv_vtom_init(void);
+int hv_snp_boot_ap(int cpu, unsigned long start_ip);
 #else
-static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
-static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
 static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
 static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
-static inline void hv_vtom_init(void) {}
+static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; }
 #endif
 
-extern bool hv_isolation_type_snp(void);
+#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
+void hv_vtom_init(void);
+void hv_ivm_msr_write(u64 msr, u64 value);
+void hv_ivm_msr_read(u64 msr, u64 *value);
+#else
+static inline void hv_vtom_init(void) {}
+static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
+static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
+#endif
 
 static inline bool hv_is_synic_reg(unsigned int reg)
 {
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 9177b4354c3f..6536873f8fc0 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,7 +25,14 @@ void __noreturn machine_real_restart(unsigned int type);
 #define MRR_BIOS	0
 #define MRR_APM		1
 
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+typedef void (cpu_emergency_virt_cb)(void);
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
 void cpu_emergency_disable_virtualization(void);
+#else
+static inline void cpu_emergency_disable_virtualization(void) {}
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
 
 typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
 void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e7c7379d6ac7..19bf955b67e0 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
 
 #define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
 
+#define SVM_SEV_FEAT_DEBUG_SWAP                        BIT(5)
 
 struct vmcb_seg {
 	u16 selector;
@@ -345,7 +346,7 @@ struct vmcb_save_area {
 	u64 last_excp_from;
 	u64 last_excp_to;
 	u8 reserved_0x298[72];
-	u32 spec_ctrl;		/* Guest version of SPEC_CTRL at 0x2E0 */
+	u64 spec_ctrl;		/* Guest version of SPEC_CTRL at 0x2E0 */
 } __packed;
 
 /* Save area definition for SEV-ES and SEV-SNP guests */
@@ -512,7 +513,7 @@ struct ghcb {
 } __packed;
 
 
-#define EXPECTED_VMCB_SAVE_AREA_SIZE		740
+#define EXPECTED_VMCB_SAVE_AREA_SIZE		744
 #define EXPECTED_GHCB_SAVE_AREA_SIZE		1032
 #define EXPECTED_SEV_ES_SAVE_AREA_SIZE		1648
 #define EXPECTED_VMCB_CONTROL_AREA_SIZE		1024
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
deleted file mode 100644
index 3b12e6b99412..000000000000
--- a/arch/x86/include/asm/virtext.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/tlbflush.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
-	unsigned long ecx = cpuid_ecx(1);
-	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/**
- * cpu_vmxoff() - Disable VMX on the current CPU
- *
- * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
- *
- * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
- * atomically track post-VMXON state, e.g. this may be called in NMI context.
- * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
- * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
- * magically in RM, VM86, compat mode, or at CPL>0.
- */
-static inline int cpu_vmxoff(void)
-{
-	asm_volatile_goto("1: vmxoff\n\t"
-			  _ASM_EXTABLE(1b, %l[fault])
-			  ::: "cc", "memory" : fault);
-
-	cr4_clear_bits(X86_CR4_VMXE);
-	return 0;
-
-fault:
-	cr4_clear_bits(X86_CR4_VMXE);
-	return -EIO;
-}
-
-static inline int cpu_vmx_enabled(void)
-{
-	return __read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
-	if (cpu_vmx_enabled())
-		cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
-	if (cpu_has_vmx())
-		__cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
-	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
-		if (msg)
-			*msg = "not amd or hygon";
-		return 0;
-	}
-
-	if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
-		if (msg)
-			*msg = "can't execute cpuid_8000000a";
-		return 0;
-	}
-
-	if (!boot_cpu_has(X86_FEATURE_SVM)) {
-		if (msg)
-			*msg = "svm not available";
-		return 0;
-	}
-	return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
-	uint64_t efer;
-
-	wrmsrl(MSR_VM_HSAVE_PA, 0);
-	rdmsrl(MSR_EFER, efer);
-	if (efer & EFER_SVME) {
-		/*
-		 * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
-		 * aren't blocked, e.g. if a fatal error occurred between CLGI
-		 * and STGI.  Note, STGI may #UD if SVM is disabled from NMI
-		 * context between reading EFER and executing STGI.  In that
-		 * case, GIF must already be set, otherwise the NMI would have
-		 * been blocked, so just eat the fault.
-		 */
-		asm_volatile_goto("1: stgi\n\t"
-				  _ASM_EXTABLE(1b, %l[fault])
-				  ::: "memory" : fault);
-fault:
-		wrmsrl(MSR_EFER, efer & ~EFER_SVME);
-	}
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
-	if (cpu_has_svm(NULL))
-		cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0d02c4aafa6f..0e73616b82f3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -71,7 +71,7 @@
 #define SECONDARY_EXEC_RDSEED_EXITING		VMCS_CONTROL_BIT(RDSEED_EXITING)
 #define SECONDARY_EXEC_ENABLE_PML               VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
 #define SECONDARY_EXEC_PT_CONCEAL_VMX		VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
-#define SECONDARY_EXEC_XSAVES			VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_ENABLE_XSAVES		VMCS_CONTROL_BIT(XSAVES)
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
 #define SECONDARY_EXEC_PT_USE_GPA		VMCS_CONTROL_BIT(PT_USE_GPA)
 #define SECONDARY_EXEC_TSC_SCALING              VMCS_CONTROL_BIT(TSC_SCALING)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index c6c15ce1952f..5934ee5bc087 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -239,12 +239,6 @@ extern int (*console_blank_hook)(int);
 #endif
 
 /*
- * The apm_bios device is one of the misc char devices.
- * This is its minor number.
- */
-#define	APM_MINOR_DEV	134
-
-/*
  * Various options can be changed at boot time as follows:
  * (We allow underscores for compatibility with the modules code)
  *	apm=on/off			enable/disable APM
diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c
index 44c3601cfdc4..190c120f4285 100644
--- a/arch/x86/kernel/audit_64.c
+++ b/arch/x86/kernel/audit_64.c
@@ -63,11 +63,6 @@ int audit_classify_syscall(int abi, unsigned syscall)
 static int __init audit_classes_init(void)
 {
 #ifdef CONFIG_IA32_EMULATION
-	extern __u32 ia32_dir_class[];
-	extern __u32 ia32_write_class[];
-	extern __u32 ia32_read_class[];
-	extern __u32 ia32_chattr_class[];
-	extern __u32 ia32_signal_class[];
 	audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class);
 	audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class);
 	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6d75fab10161..382d4e6b848d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1280,11 +1280,11 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
 	VULNBL_INTEL_STEPPINGS(BROADWELL_G,	X86_STEPPING_ANY,		SRBDS),
 	VULNBL_INTEL_STEPPINGS(BROADWELL_X,	X86_STEPPING_ANY,		MMIO),
 	VULNBL_INTEL_STEPPINGS(BROADWELL,	X86_STEPPING_ANY,		SRBDS),
-	VULNBL_INTEL_STEPPINGS(SKYLAKE_L,	X86_STEPPING_ANY,		SRBDS | MMIO | RETBLEED),
 	VULNBL_INTEL_STEPPINGS(SKYLAKE_X,	X86_STEPPING_ANY,		MMIO | RETBLEED | GDS),
-	VULNBL_INTEL_STEPPINGS(SKYLAKE,		X86_STEPPING_ANY,		SRBDS | MMIO | RETBLEED),
-	VULNBL_INTEL_STEPPINGS(KABYLAKE_L,	X86_STEPPING_ANY,		SRBDS | MMIO | RETBLEED | GDS),
-	VULNBL_INTEL_STEPPINGS(KABYLAKE,	X86_STEPPING_ANY,		SRBDS | MMIO | RETBLEED | GDS),
+	VULNBL_INTEL_STEPPINGS(SKYLAKE_L,	X86_STEPPING_ANY,		MMIO | RETBLEED | GDS | SRBDS),
+	VULNBL_INTEL_STEPPINGS(SKYLAKE,		X86_STEPPING_ANY,		MMIO | RETBLEED | GDS | SRBDS),
+	VULNBL_INTEL_STEPPINGS(KABYLAKE_L,	X86_STEPPING_ANY,		MMIO | RETBLEED | GDS | SRBDS),
+	VULNBL_INTEL_STEPPINGS(KABYLAKE,	X86_STEPPING_ANY,		MMIO | RETBLEED | GDS | SRBDS),
 	VULNBL_INTEL_STEPPINGS(CANNONLAKE_L,	X86_STEPPING_ANY,		RETBLEED),
 	VULNBL_INTEL_STEPPINGS(ICELAKE_L,	X86_STEPPING_ANY,		MMIO | MMIO_SBDS | RETBLEED | GDS),
 	VULNBL_INTEL_STEPPINGS(ICELAKE_D,	X86_STEPPING_ANY,		MMIO | GDS),
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 0100468e72ca..e6bba12c759c 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -32,6 +32,7 @@
 #include <asm/nmi.h>
 #include <clocksource/hyperv_timer.h>
 #include <asm/numa.h>
+#include <asm/svm.h>
 
 /* Is Linux running as the root partition? */
 bool hv_root_partition;
@@ -39,6 +40,10 @@ bool hv_root_partition;
 bool hv_nested;
 struct ms_hyperv_info ms_hyperv;
 
+/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
+bool hyperv_paravisor_present __ro_after_init;
+EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
+
 #if IS_ENABLED(CONFIG_HYPERV)
 static inline unsigned int hv_get_nested_reg(unsigned int reg)
 {
@@ -65,8 +70,8 @@ u64 hv_get_non_nested_register(unsigned int reg)
 {
 	u64 value;
 
-	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
-		hv_ghcb_msr_read(reg, &value);
+	if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present)
+		hv_ivm_msr_read(reg, &value);
 	else
 		rdmsrl(reg, value);
 	return value;
@@ -75,8 +80,8 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
 
 void hv_set_non_nested_register(unsigned int reg, u64 value)
 {
-	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
-		hv_ghcb_msr_write(reg, value);
+	if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) {
+		hv_ivm_msr_write(reg, value);
 
 		/* Write proxy bit via wrmsl instruction */
 		if (hv_is_sint_reg(reg))
@@ -295,6 +300,15 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
 
 	native_smp_prepare_cpus(max_cpus);
 
+	/*
+	 *  Override wakeup_secondary_cpu_64 callback for SEV-SNP
+	 *  enlightened guest.
+	 */
+	if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
+		apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap;
+		return;
+	}
+
 #ifdef CONFIG_X86_64
 	for_each_present_cpu(i) {
 		if (i == 0)
@@ -313,6 +327,26 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
 }
 #endif
 
+/*
+ * When a fully enlightened TDX VM runs on Hyper-V, the firmware sets the
+ * HW_REDUCED flag: refer to acpi_tb_create_local_fadt(). Consequently ttyS0
+ * interrupts can't work because request_irq() -> ... -> irq_to_desc() returns
+ * NULL for ttyS0. This happens because mp_config_acpi_legacy_irqs() sees a
+ * nr_legacy_irqs() of 0, so it doesn't initialize the array 'mp_irqs[]', and
+ * later setup_IO_APIC_irqs() -> find_irq_entry() fails to find the legacy irqs
+ * from the array and hence doesn't create the necessary irq description info.
+ *
+ * Clone arch/x86/kernel/acpi/boot.c: acpi_generic_reduced_hw_init() here,
+ * except don't change 'legacy_pic', which keeps its default value
+ * 'default_legacy_pic'. This way, mp_config_acpi_legacy_irqs() sees a non-zero
+ * nr_legacy_irqs() and eventually serial console interrupts works properly.
+ */
+static void __init reduced_hw_init(void)
+{
+	x86_init.timers.timer_init	= x86_init_noop;
+	x86_init.irqs.pre_vector_init	= x86_init_noop;
+}
+
 static void __init ms_hyperv_init_platform(void)
 {
 	int hv_max_functions_eax;
@@ -399,11 +433,33 @@ static void __init ms_hyperv_init_platform(void)
 			ms_hyperv.shared_gpa_boundary =
 				BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
 
+		hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
+
 		pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
 			ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
 
-		if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
+
+		if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
 			static_branch_enable(&isolation_type_snp);
+		} else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
+			static_branch_enable(&isolation_type_tdx);
+
+			/* A TDX VM must use x2APIC and doesn't use lazy EOI. */
+			ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
+
+			if (!ms_hyperv.paravisor_present) {
+				/* To be supported: more work is required.  */
+				ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
+
+				/* HV_REGISTER_CRASH_CTL is unsupported. */
+				ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+
+				/* Don't trust Hyper-V's TLB-flushing hypercalls. */
+				ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+
+				x86_init.acpi.reduced_hw_early_init = reduced_hw_init;
+			}
+		}
 	}
 
 	if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
@@ -473,7 +529,7 @@ static void __init ms_hyperv_init_platform(void)
 
 #if IS_ENABLED(CONFIG_HYPERV)
 	if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
-	    (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP))
+	    ms_hyperv.paravisor_present)
 		hv_vtom_init();
 	/*
 	 * Setup the hook to get control post apic initialization.
@@ -497,7 +553,8 @@ static void __init ms_hyperv_init_platform(void)
 
 # ifdef CONFIG_SMP
 	smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
-	if (hv_root_partition)
+	if (hv_root_partition ||
+	    (!ms_hyperv.paravisor_present && hv_isolation_type_snp()))
 		smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
 # endif
 
@@ -560,6 +617,22 @@ static bool __init ms_hyperv_msi_ext_dest_id(void)
 	return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE;
 }
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+static void hv_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
+{
+	/* RAX and CPL are already in the GHCB */
+	ghcb_set_rcx(ghcb, regs->cx);
+	ghcb_set_rdx(ghcb, regs->dx);
+	ghcb_set_r8(ghcb, regs->r8);
+}
+
+static bool hv_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
+{
+	/* No checking of the return state needed */
+	return true;
+}
+#endif
+
 const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
 	.name			= "Microsoft Hyper-V",
 	.detect			= ms_hyperv_platform,
@@ -567,4 +640,8 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
 	.init.x2apic_available	= ms_hyperv_x2apic_available,
 	.init.msi_ext_dest_id	= ms_hyperv_msi_ext_dest_id,
 	.init.init_platform	= ms_hyperv_init_platform,
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	.runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare,
+	.runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish,
+#endif
 };
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 458cb7419502..8f559eeae08e 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -45,7 +45,21 @@ static u64 prefetch_disable_bits;
  */
 static unsigned int pseudo_lock_major;
 static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
-static struct class *pseudo_lock_class;
+
+static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
+{
+	const struct rdtgroup *rdtgrp;
+
+	rdtgrp = dev_get_drvdata(dev);
+	if (mode)
+		*mode = 0600;
+	return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
+}
+
+static const struct class pseudo_lock_class = {
+	.name = "pseudo_lock",
+	.devnode = pseudo_lock_devnode,
+};
 
 /**
  * get_prefetch_disable_bits - prefetch disable bits of supported platforms
@@ -1353,7 +1367,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
 					    &pseudo_measure_fops);
 	}
 
-	dev = device_create(pseudo_lock_class, NULL,
+	dev = device_create(&pseudo_lock_class, NULL,
 			    MKDEV(pseudo_lock_major, new_minor),
 			    rdtgrp, "%s", rdtgrp->kn->name);
 
@@ -1383,7 +1397,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
 	goto out;
 
 out_device:
-	device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
+	device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
 out_debugfs:
 	debugfs_remove_recursive(plr->debugfs_dir);
 	pseudo_lock_minor_release(new_minor);
@@ -1424,7 +1438,7 @@ void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
 
 	pseudo_lock_cstates_relax(plr);
 	debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
-	device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
+	device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
 	pseudo_lock_minor_release(plr->minor);
 
 free:
@@ -1560,16 +1574,6 @@ static const struct file_operations pseudo_lock_dev_fops = {
 	.mmap =		pseudo_lock_dev_mmap,
 };
 
-static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
-{
-	const struct rdtgroup *rdtgrp;
-
-	rdtgrp = dev_get_drvdata(dev);
-	if (mode)
-		*mode = 0600;
-	return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
-}
-
 int rdt_pseudo_lock_init(void)
 {
 	int ret;
@@ -1580,21 +1584,18 @@ int rdt_pseudo_lock_init(void)
 
 	pseudo_lock_major = ret;
 
-	pseudo_lock_class = class_create("pseudo_lock");
-	if (IS_ERR(pseudo_lock_class)) {
-		ret = PTR_ERR(pseudo_lock_class);
+	ret = class_register(&pseudo_lock_class);
+	if (ret) {
 		unregister_chrdev(pseudo_lock_major, "pseudo_lock");
 		return ret;
 	}
 
-	pseudo_lock_class->devnode = pseudo_lock_devnode;
 	return 0;
 }
 
 void rdt_pseudo_lock_release(void)
 {
-	class_destroy(pseudo_lock_class);
-	pseudo_lock_class = NULL;
+	class_unregister(&pseudo_lock_class);
 	unregister_chrdev(pseudo_lock_major, "pseudo_lock");
 	pseudo_lock_major = 0;
 }
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index bdc0d5539b57..dae436253de4 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -40,7 +40,6 @@
 #include <asm/processor.h>
 #include <asm/msr.h>
 
-static struct class *cpuid_class;
 static enum cpuhp_state cpuhp_cpuid_state;
 
 struct cpuid_regs_done {
@@ -124,26 +123,31 @@ static const struct file_operations cpuid_fops = {
 	.open = cpuid_open,
 };
 
+static char *cpuid_devnode(const struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
+}
+
+static const struct class cpuid_class = {
+	.name		= "cpuid",
+	.devnode	= cpuid_devnode,
+};
+
 static int cpuid_device_create(unsigned int cpu)
 {
 	struct device *dev;
 
-	dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), NULL,
+	dev = device_create(&cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), NULL,
 			    "cpu%d", cpu);
 	return PTR_ERR_OR_ZERO(dev);
 }
 
 static int cpuid_device_destroy(unsigned int cpu)
 {
-	device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+	device_destroy(&cpuid_class, MKDEV(CPUID_MAJOR, cpu));
 	return 0;
 }
 
-static char *cpuid_devnode(const struct device *dev, umode_t *mode)
-{
-	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
-}
-
 static int __init cpuid_init(void)
 {
 	int err;
@@ -154,12 +158,9 @@ static int __init cpuid_init(void)
 		       CPUID_MAJOR);
 		return -EBUSY;
 	}
-	cpuid_class = class_create("cpuid");
-	if (IS_ERR(cpuid_class)) {
-		err = PTR_ERR(cpuid_class);
+	err = class_register(&cpuid_class);
+	if (err)
 		goto out_chrdev;
-	}
-	cpuid_class->devnode = cpuid_devnode;
 
 	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/cpuid:online",
 				cpuid_device_create, cpuid_device_destroy);
@@ -170,7 +171,7 @@ static int __init cpuid_init(void)
 	return 0;
 
 out_class:
-	class_destroy(cpuid_class);
+	class_unregister(&cpuid_class);
 out_chrdev:
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
 	return err;
@@ -180,7 +181,7 @@ module_init(cpuid_init);
 static void __exit cpuid_exit(void)
 {
 	cpuhp_remove_state(cpuhp_cpuid_state);
-	class_destroy(cpuid_class);
+	class_unregister(&cpuid_class);
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
 }
 module_exit(cpuid_exit);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 587c7743fd21..c92d88680dbf 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -48,27 +48,6 @@ struct crash_memmap_data {
 	unsigned int type;
 };
 
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
-crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
-EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
-	crash_vmclear_fn *do_vmclear_operation = NULL;
-
-	rcu_read_lock();
-	do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
-	if (do_vmclear_operation)
-		do_vmclear_operation();
-	rcu_read_unlock();
-}
-
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -76,11 +55,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 	crash_save_cpu(regs, cpu);
 
 	/*
-	 * VMCLEAR VMCSs loaded on all cpus if needed.
-	 */
-	cpu_crash_vmclear_loaded_vmcss();
-
-	/*
 	 * Disable Intel PT to stop its logging
 	 */
 	cpu_emergency_stop_pt();
@@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 
 	crash_smp_send_stop();
 
-	/*
-	 * VMCLEAR VMCSs loaded on this cpu if needed.
-	 */
-	cpu_crash_vmclear_loaded_vmcss();
-
 	cpu_emergency_disable_virtualization();
 
 	/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 41dac93b8ea4..cadf68737e6b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -71,7 +71,7 @@ static unsigned short xsave_cpuid_features[] __initdata = {
 	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
 	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
-	[XFEATURE_PKRU]				= X86_FEATURE_PKU,
+	[XFEATURE_PKRU]				= X86_FEATURE_OSPKE,
 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
 	[XFEATURE_CET_USER]			= X86_FEATURE_SHSTK,
 	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4d8aff05a509..30a55207c000 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -231,9 +231,7 @@ struct irq_chip i8259A_chip = {
 };
 
 static char irq_trigger[2];
-/**
- * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
- */
+/* ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ */
 static void restore_ELCR(char *trigger)
 {
 	outb(trigger[0], PIC_ELCR1);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7bb17d37db01..e17c16c54a37 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -39,7 +39,6 @@
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
 
-static struct class *msr_class;
 static enum cpuhp_state cpuhp_msr_state;
 
 enum allow_write_msrs {
@@ -235,26 +234,31 @@ static const struct file_operations msr_fops = {
 	.compat_ioctl = msr_ioctl,
 };
 
+static char *msr_devnode(const struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
+}
+
+static const struct class msr_class = {
+	.name		= "msr",
+	.devnode	= msr_devnode,
+};
+
 static int msr_device_create(unsigned int cpu)
 {
 	struct device *dev;
 
-	dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL,
+	dev = device_create(&msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL,
 			    "msr%d", cpu);
 	return PTR_ERR_OR_ZERO(dev);
 }
 
 static int msr_device_destroy(unsigned int cpu)
 {
-	device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+	device_destroy(&msr_class, MKDEV(MSR_MAJOR, cpu));
 	return 0;
 }
 
-static char *msr_devnode(const struct device *dev, umode_t *mode)
-{
-	return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
-}
-
 static int __init msr_init(void)
 {
 	int err;
@@ -263,12 +267,9 @@ static int __init msr_init(void)
 		pr_err("unable to get major %d for msr\n", MSR_MAJOR);
 		return -EBUSY;
 	}
-	msr_class = class_create("msr");
-	if (IS_ERR(msr_class)) {
-		err = PTR_ERR(msr_class);
+	err = class_register(&msr_class);
+	if (err)
 		goto out_chrdev;
-	}
-	msr_class->devnode = msr_devnode;
 
 	err  = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/msr:online",
 				 msr_device_create, msr_device_destroy);
@@ -278,7 +279,7 @@ static int __init msr_init(void)
 	return 0;
 
 out_class:
-	class_destroy(msr_class);
+	class_unregister(&msr_class);
 out_chrdev:
 	__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
 	return err;
@@ -288,7 +289,7 @@ module_init(msr_init);
 static void __exit msr_exit(void)
 {
 	cpuhp_remove_state(cpuhp_msr_state);
-	class_destroy(msr_class);
+	class_unregister(&msr_class);
 	__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
 }
 module_exit(msr_exit)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 3adbe97015c1..830425e6d38e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -22,7 +22,6 @@
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
 #include <asm/pci_x86.h>
-#include <asm/virtext.h>
 #include <asm/cpu.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
@@ -530,9 +529,54 @@ static inline void kb_wait(void)
 
 static inline void nmi_shootdown_cpus_on_restart(void);
 
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+/* RCU-protected callback to disable virtualization prior to reboot. */
+static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
+
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
+{
+	if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
+		return;
+
+	rcu_assign_pointer(cpu_emergency_virt_callback, callback);
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
+
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
+{
+	if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
+		return;
+
+	rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
+
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+	cpu_emergency_virt_cb *callback;
+
+	/*
+	 * IRQs must be disabled as KVM enables virtualization in hardware via
+	 * function call IPIs, i.e. IRQs need to be disabled to guarantee
+	 * virtualization stays disabled.
+	 */
+	lockdep_assert_irqs_disabled();
+
+	rcu_read_lock();
+	callback = rcu_dereference(cpu_emergency_virt_callback);
+	if (callback)
+		callback();
+	rcu_read_unlock();
+}
+
 static void emergency_reboot_disable_virtualization(void)
 {
-	/* Just make sure we won't change CPUs while doing this */
 	local_irq_disable();
 
 	/*
@@ -545,7 +589,7 @@ static void emergency_reboot_disable_virtualization(void)
 	 * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
 	 * other CPUs may have virtualization enabled.
 	 */
-	if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+	if (rcu_access_pointer(cpu_emergency_virt_callback)) {
 		/* Safely force _this_ CPU out of VMX/SVM operation. */
 		cpu_emergency_disable_virtualization();
 
@@ -553,7 +597,9 @@ static void emergency_reboot_disable_virtualization(void)
 		nmi_shootdown_cpus_on_restart();
 	}
 }
-
+#else
+static void emergency_reboot_disable_virtualization(void) { }
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
 
 void __attribute__((weak)) mach_reboot_fixups(void)
 {
@@ -787,21 +833,9 @@ void machine_crash_shutdown(struct pt_regs *regs)
 }
 #endif
 
-
 /* This is the CPU performing the emergency shutdown work. */
 int crashing_cpu = -1;
 
-/*
- * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
- * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
- * GIF=0, i.e. if the crash occurred between CLGI and STGI.
- */
-void cpu_emergency_disable_virtualization(void)
-{
-	cpu_emergency_vmxoff();
-	cpu_emergency_svm_disable();
-}
-
 #if defined(CONFIG_SMP)
 
 static nmi_shootdown_cb shootdown_callback;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 89ca7f4c1464..ed90f148140d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -101,7 +101,7 @@ config X86_SGX_KVM
 
 config KVM_AMD
 	tristate "KVM for AMD processors support"
-	depends on KVM
+	depends on KVM && (CPU_SUP_AMD || CPU_SUP_HYGON)
 	help
 	  Provides support for KVM on AMD processors equipped with the AMD-V
 	  (SVM) extensions.
@@ -138,6 +138,19 @@ config KVM_XEN
 
 	  If in doubt, say "N".
 
+config KVM_PROVE_MMU
+	bool "Prove KVM MMU correctness"
+	depends on DEBUG_KERNEL
+	depends on KVM
+	depends on EXPERT
+	help
+	  Enables runtime assertions in KVM's MMU that are too costly to enable
+	  in anything remotely resembling a production environment, e.g. this
+	  gates code that verifies a to-be-freed page table doesn't have any
+	  present SPTEs.
+
+	  If in doubt, say "N".
+
 config KVM_EXTERNAL_WRITE_TRACKING
 	bool
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d3432687c9e6..0544e30b4946 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kvm_host.h>
+#include "linux/lockdep.h"
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
@@ -84,6 +85,18 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
 	struct kvm_cpuid_entry2 *e;
 	int i;
 
+	/*
+	 * KVM has a semi-arbitrary rule that querying the guest's CPUID model
+	 * with IRQs disabled is disallowed.  The CPUID model can legitimately
+	 * have over one hundred entries, i.e. the lookup is slow, and IRQs are
+	 * typically disabled in KVM only when KVM is in a performance critical
+	 * path, e.g. the core VM-Enter/VM-Exit run loop.  Nothing will break
+	 * if this rule is violated, this assertion is purely to flag potential
+	 * performance issues.  If this fires, consider moving the lookup out
+	 * of the hotpath, e.g. by caching information during CPUID updates.
+	 */
+	lockdep_assert_irqs_enabled();
+
 	for (i = 0; i < nent; i++) {
 		e = &entries[i];
 
@@ -312,6 +325,27 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	struct kvm_cpuid_entry2 *best;
+	bool allow_gbpages;
+
+	BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
+	bitmap_zero(vcpu->arch.governed_features.enabled,
+		    KVM_MAX_NR_GOVERNED_FEATURES);
+
+	/*
+	 * If TDP is enabled, let the guest use GBPAGES if they're supported in
+	 * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
+	 * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
+	 * walk for performance and complexity reasons.  Not to mention KVM
+	 * _can't_ solve the problem because GVA->GPA walks aren't visible to
+	 * KVM once a TDP translation is installed.  Mimic hardware behavior so
+	 * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
+	 * If TDP is disabled, honor *only* guest CPUID as KVM has full control
+	 * and can install smaller shadow pages if the host lacks 1GiB support.
+	 */
+	allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
+				      guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
+	if (allow_gbpages)
+		kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
 
 	best = kvm_find_cpuid_entry(vcpu, 1);
 	if (best && apic) {
@@ -647,7 +681,8 @@ void kvm_set_cpu_caps(void)
 	);
 
 	kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
-		F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
+		F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
+		F(AMX_COMPLEX)
 	);
 
 	kvm_cpu_cap_mask(CPUID_D_1_EAX,
@@ -1154,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		cpuid_entry_override(entry, CPUID_8000_0001_EDX);
 		cpuid_entry_override(entry, CPUID_8000_0001_ECX);
 		break;
+	case 0x80000005:
+		/*  Pass host L1 cache and TLB info. */
+		break;
 	case 0x80000006:
 		/* Drop reserved bits, pass host L2 cache and TLB info. */
 		entry->edx &= ~GENMASK(17, 16);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b1658c0de847..284fa4704553 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -232,4 +232,50 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
 	return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
 }
 
+enum kvm_governed_features {
+#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
+#include "governed_features.h"
+	KVM_NR_GOVERNED_FEATURES
+};
+
+static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
+{
+	switch (x86_feature) {
+#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
+#include "governed_features.h"
+	default:
+		return -1;
+	}
+}
+
+static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
+{
+	return kvm_governed_feature_index(x86_feature) >= 0;
+}
+
+static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
+						     unsigned int x86_feature)
+{
+	BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+	__set_bit(kvm_governed_feature_index(x86_feature),
+		  vcpu->arch.governed_features.enabled);
+}
+
+static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
+							       unsigned int x86_feature)
+{
+	if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
+		kvm_governed_feature_set(vcpu, x86_feature);
+}
+
+static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
+					  unsigned int x86_feature)
+{
+	BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+	return test_bit(kvm_governed_feature_index(x86_feature),
+			vcpu->arch.governed_features.enabled);
+}
+
 #endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 936a397a08cd..2673cd5c46cb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1799,13 +1799,11 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
 					       op->addr.mem,
 					       &op->val,
 					       op->bytes);
-		break;
 	case OP_MEM_STR:
 		return segmented_write(ctxt,
 				       op->addr.mem,
 				       op->data,
 				       op->bytes * op->count);
-		break;
 	case OP_XMM:
 		kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
 		break;
diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h
new file mode 100644
index 000000000000..423a73395c10
--- /dev/null
+++ b/arch/x86/kvm/governed_features.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
+BUILD_BUG()
+#endif
+
+#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
+
+KVM_GOVERNED_X86_FEATURE(GBPAGES)
+KVM_GOVERNED_X86_FEATURE(XSAVES)
+KVM_GOVERNED_X86_FEATURE(VMX)
+KVM_GOVERNED_X86_FEATURE(NRIPS)
+KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
+KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
+KVM_GOVERNED_X86_FEATURE(LBRV)
+KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
+KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
+KVM_GOVERNED_X86_FEATURE(VGIF)
+KVM_GOVERNED_X86_FEATURE(VNMI)
+
+#undef KVM_GOVERNED_X86_FEATURE
+#undef KVM_GOVERNED_FEATURE
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index b28fd020066f..7c2dac6824e2 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1293,7 +1293,6 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_VP_ASSIST_PAGE:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_APIC_ACCESS_AVAILABLE;
-		break;
 	case HV_X64_MSR_TSC_FREQUENCY:
 	case HV_X64_MSR_APIC_FREQUENCY:
 		return hv_vcpu->cpuid_cache.features_eax &
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index ab65f3a47dfd..be7aeb9b8ea3 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -213,7 +213,6 @@ struct x86_emulate_ops {
 
 	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
 			  u32 *ecx, u32 *edx, bool exact_only);
-	bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
 	bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
 	bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
 	bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a983a16163b1..dcd60b39e794 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -376,7 +376,8 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
 	struct kvm_vcpu *vcpu;
 	unsigned long i;
 	u32 max_id = 255; /* enough space for any xAPIC ID */
-	bool xapic_id_mismatch = false;
+	bool xapic_id_mismatch;
+	int r;
 
 	/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
 	if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
@@ -386,9 +387,14 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
 		  "Dirty APIC map without an in-kernel local APIC");
 
 	mutex_lock(&kvm->arch.apic_map_lock);
+
+retry:
 	/*
-	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
-	 * (if clean) or the APIC registers (if dirty).
+	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
+	 * or the APIC registers (if dirty).  Note, on retry the map may have
+	 * not yet been marked dirty by whatever task changed a vCPU's x2APIC
+	 * ID, i.e. the map may still show up as in-progress.  In that case
+	 * this task still needs to retry and complete its calculation.
 	 */
 	if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
 				   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
@@ -397,6 +403,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
 		return;
 	}
 
+	/*
+	 * Reset the mismatch flag between attempts so that KVM does the right
+	 * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
+	 * keep max_id strictly increasing.  Disallowing max_id from shrinking
+	 * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
+	 * with the highest x2APIC ID is toggling its APIC on and off.
+	 */
+	xapic_id_mismatch = false;
+
 	kvm_for_each_vcpu(i, vcpu, kvm)
 		if (kvm_apic_present(vcpu))
 			max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
@@ -415,9 +430,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
 		if (!kvm_apic_present(vcpu))
 			continue;
 
-		if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
+		r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
+		if (r) {
 			kvfree(new);
 			new = NULL;
+			if (r == -E2BIG) {
+				cond_resched();
+				goto retry;
+			}
+
 			goto out;
 		}
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 92d5a1924fc1..253fb2093d5d 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -121,6 +121,8 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+			 int bytes);
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ec169f5c7dce..e1d011c67cc6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -25,6 +25,7 @@
 #include "kvm_cache_regs.h"
 #include "smm.h"
 #include "kvm_emulate.h"
+#include "page_track.h"
 #include "cpuid.h"
 #include "spte.h"
 
@@ -53,7 +54,7 @@
 #include <asm/io.h>
 #include <asm/set_memory.h>
 #include <asm/vmx.h>
-#include <asm/kvm_page_track.h>
+
 #include "trace.h"
 
 extern bool itlb_multihit_kvm_mitigation;
@@ -115,11 +116,6 @@ static int max_huge_page_level __read_mostly;
 static int tdp_root_level __read_mostly;
 static int max_tdp_level __read_mostly;
 
-#ifdef MMU_DEBUG
-bool dbg = 0;
-module_param(dbg, bool, 0644);
-#endif
-
 #define PTE_PREFETCH_NUM		8
 
 #include <trace/events/kvm.h>
@@ -278,16 +274,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
 	return kvm_x86_ops.flush_remote_tlbs_range;
 }
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-				 gfn_t nr_pages)
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
 {
-	int ret = -EOPNOTSUPP;
+	if (!kvm_x86_ops.flush_remote_tlbs_range)
+		return -EOPNOTSUPP;
 
-	if (kvm_x86_ops.flush_remote_tlbs_range)
-		ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
-								   nr_pages);
-	if (ret)
-		kvm_flush_remote_tlbs(kvm);
+	return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
 }
 
 static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@@ -490,7 +482,7 @@ retry:
  */
 static void mmu_spte_set(u64 *sptep, u64 new_spte)
 {
-	WARN_ON(is_shadow_present_pte(*sptep));
+	WARN_ON_ONCE(is_shadow_present_pte(*sptep));
 	__set_spte(sptep, new_spte);
 }
 
@@ -502,7 +494,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
 {
 	u64 old_spte = *sptep;
 
-	WARN_ON(!is_shadow_present_pte(new_spte));
+	WARN_ON_ONCE(!is_shadow_present_pte(new_spte));
 	check_spte_writable_invariants(new_spte);
 
 	if (!is_shadow_present_pte(old_spte)) {
@@ -515,7 +507,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
 	else
 		old_spte = __update_clear_spte_slow(sptep, new_spte);
 
-	WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
+	WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
 
 	return old_spte;
 }
@@ -597,7 +589,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
 	 * by a refcounted page, the refcount is elevated.
 	 */
 	page = kvm_pfn_to_refcounted_page(pfn);
-	WARN_ON(page && !page_count(page));
+	WARN_ON_ONCE(page && !page_count(page));
 
 	if (is_accessed_spte(old_spte))
 		kvm_set_pfn_accessed(pfn);
@@ -812,7 +804,7 @@ static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
 	for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
 		linfo = lpage_info_slot(gfn, slot, i);
 		linfo->disallow_lpage += count;
-		WARN_ON(linfo->disallow_lpage < 0);
+		WARN_ON_ONCE(linfo->disallow_lpage < 0);
 	}
 }
 
@@ -839,8 +831,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 	/* the non-leaf shadow pages are keeping readonly. */
 	if (sp->role.level > PG_LEVEL_4K)
-		return kvm_slot_page_track_add_page(kvm, slot, gfn,
-						    KVM_PAGE_TRACK_WRITE);
+		return __kvm_write_track_add_gfn(kvm, slot, gfn);
 
 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
@@ -886,8 +877,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, gfn);
 	if (sp->role.level > PG_LEVEL_4K)
-		return kvm_slot_page_track_remove_page(kvm, slot, gfn,
-						       KVM_PAGE_TRACK_WRITE);
+		return __kvm_write_track_remove_gfn(kvm, slot, gfn);
 
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
@@ -941,10 +931,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
 	int count = 0;
 
 	if (!rmap_head->val) {
-		rmap_printk("%p %llx 0->1\n", spte, *spte);
 		rmap_head->val = (unsigned long)spte;
 	} else if (!(rmap_head->val & 1)) {
-		rmap_printk("%p %llx 1->many\n", spte, *spte);
 		desc = kvm_mmu_memory_cache_alloc(cache);
 		desc->sptes[0] = (u64 *)rmap_head->val;
 		desc->sptes[1] = spte;
@@ -953,7 +941,6 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
 		rmap_head->val = (unsigned long)desc | 1;
 		++count;
 	} else {
-		rmap_printk("%p %llx many->many\n", spte, *spte);
 		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		count = desc->tail_count + desc->spte_count;
 
@@ -973,7 +960,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
 	return count;
 }
 
-static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+static void pte_list_desc_remove_entry(struct kvm *kvm,
+				       struct kvm_rmap_head *rmap_head,
 				       struct pte_list_desc *desc, int i)
 {
 	struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
@@ -984,7 +972,7 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
 	 * when adding an entry and the previous head is full, and heads are
 	 * removed (this flow) when they become empty.
 	 */
-	BUG_ON(j < 0);
+	KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
 
 	/*
 	 * Replace the to-be-freed SPTE with the last valid entry from the head
@@ -1009,35 +997,34 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
 	mmu_free_pte_list_desc(head_desc);
 }
 
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+static void pte_list_remove(struct kvm *kvm, u64 *spte,
+			    struct kvm_rmap_head *rmap_head)
 {
 	struct pte_list_desc *desc;
 	int i;
 
-	if (!rmap_head->val) {
-		pr_err("%s: %p 0->BUG\n", __func__, spte);
-		BUG();
-	} else if (!(rmap_head->val & 1)) {
-		rmap_printk("%p 1->0\n", spte);
-		if ((u64 *)rmap_head->val != spte) {
-			pr_err("%s:  %p 1->BUG\n", __func__, spte);
-			BUG();
-		}
+	if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
+		return;
+
+	if (!(rmap_head->val & 1)) {
+		if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
+			return;
+
 		rmap_head->val = 0;
 	} else {
-		rmap_printk("%p many->many\n", spte);
 		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		while (desc) {
 			for (i = 0; i < desc->spte_count; ++i) {
 				if (desc->sptes[i] == spte) {
-					pte_list_desc_remove_entry(rmap_head, desc, i);
+					pte_list_desc_remove_entry(kvm, rmap_head,
+								   desc, i);
 					return;
 				}
 			}
 			desc = desc->more;
 		}
-		pr_err("%s: %p many->many\n", __func__, spte);
-		BUG();
+
+		KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
 	}
 }
 
@@ -1045,7 +1032,7 @@ static void kvm_zap_one_rmap_spte(struct kvm *kvm,
 				  struct kvm_rmap_head *rmap_head, u64 *sptep)
 {
 	mmu_spte_clear_track_bits(kvm, sptep);
-	pte_list_remove(sptep, rmap_head);
+	pte_list_remove(kvm, sptep, rmap_head);
 }
 
 /* Return true if at least one SPTE was zapped, false otherwise */
@@ -1120,7 +1107,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	slot = __gfn_to_memslot(slots, gfn);
 	rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 
-	pte_list_remove(spte, rmap_head);
+	pte_list_remove(kvm, spte, rmap_head);
 }
 
 /*
@@ -1212,7 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
 	struct kvm_mmu_page *sp;
 
 	sp = sptep_to_sp(sptep);
-	WARN_ON(sp->role.level == PG_LEVEL_4K);
+	WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K);
 
 	drop_spte(kvm, sptep);
 
@@ -1241,8 +1228,6 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
 	    !(pt_protect && is_mmu_writable_spte(spte)))
 		return false;
 
-	rmap_printk("spte %p %llx\n", sptep, *sptep);
-
 	if (pt_protect)
 		spte &= ~shadow_mmu_writable_mask;
 	spte = spte & ~PT_WRITABLE_MASK;
@@ -1267,9 +1252,7 @@ static bool spte_clear_dirty(u64 *sptep)
 {
 	u64 spte = *sptep;
 
-	rmap_printk("spte %p %llx\n", sptep, *sptep);
-
-	MMU_WARN_ON(!spte_ad_enabled(spte));
+	KVM_MMU_WARN_ON(!spte_ad_enabled(spte));
 	spte &= ~shadow_dirty_mask;
 	return mmu_spte_update(sptep, spte);
 }
@@ -1475,14 +1458,11 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 	u64 new_spte;
 	kvm_pfn_t new_pfn;
 
-	WARN_ON(pte_huge(pte));
+	WARN_ON_ONCE(pte_huge(pte));
 	new_pfn = pte_pfn(pte);
 
 restart:
 	for_each_rmap_spte(rmap_head, &iter, sptep) {
-		rmap_printk("spte %p %llx gfn %llx (%d)\n",
-			    sptep, *sptep, gfn, level);
-
 		need_flush = true;
 
 		if (pte_write(pte)) {
@@ -1588,7 +1568,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
 	for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
 				 range->start, range->end - 1, &iterator)
 		ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
-			       iterator.level, range->pte);
+			       iterator.level, range->arg.pte);
 
 	return ret;
 }
@@ -1710,21 +1690,19 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	return young;
 }
 
-#ifdef MMU_DEBUG
-static int is_empty_shadow_page(u64 *spt)
+static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
 {
-	u64 *pos;
-	u64 *end;
+#ifdef CONFIG_KVM_PROVE_MMU
+	int i;
 
-	for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++)
-		if (is_shadow_present_pte(*pos)) {
-			printk(KERN_ERR "%s: %p %llx\n", __func__,
-			       pos, *pos);
-			return 0;
-		}
-	return 1;
-}
+	for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
+		if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i])))
+			pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free",
+					   sp->spt[i], &sp->spt[i],
+					   kvm_mmu_page_get_gfn(sp, i));
+	}
 #endif
+}
 
 /*
  * This value is the sum of all of the kvm instances's
@@ -1752,7 +1730,8 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
 {
-	MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
+	kvm_mmu_check_sptes_at_free(sp);
+
 	hlist_del(&sp->hash_link);
 	list_del(&sp->link);
 	free_page((unsigned long)sp->spt);
@@ -1775,16 +1754,16 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
 	pte_list_add(cache, parent_pte, &sp->parent_ptes);
 }
 
-static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
+static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 				       u64 *parent_pte)
 {
-	pte_list_remove(parent_pte, &sp->parent_ptes);
+	pte_list_remove(kvm, parent_pte, &sp->parent_ptes);
 }
 
-static void drop_parent_pte(struct kvm_mmu_page *sp,
+static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 			    u64 *parent_pte)
 {
-	mmu_page_remove_parent_pte(sp, parent_pte);
+	mmu_page_remove_parent_pte(kvm, sp, parent_pte);
 	mmu_spte_clear_no_track(parent_pte);
 }
 
@@ -1840,7 +1819,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
 {
 	--sp->unsync_children;
-	WARN_ON((int)sp->unsync_children < 0);
+	WARN_ON_ONCE((int)sp->unsync_children < 0);
 	__clear_bit(idx, sp->unsync_child_bitmap);
 }
 
@@ -1898,7 +1877,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	WARN_ON(!sp->unsync);
+	WARN_ON_ONCE(!sp->unsync);
 	trace_kvm_mmu_sync_page(sp);
 	sp->unsync = 0;
 	--kvm->stat.mmu_unsync;
@@ -2073,11 +2052,11 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec,
 	if (pvec->nr == 0)
 		return 0;
 
-	WARN_ON(pvec->page[0].idx != INVALID_INDEX);
+	WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX);
 
 	sp = pvec->page[0].sp;
 	level = sp->role.level;
-	WARN_ON(level == PG_LEVEL_4K);
+	WARN_ON_ONCE(level == PG_LEVEL_4K);
 
 	parents->parent[level-2] = sp;
 
@@ -2099,7 +2078,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
 		if (!sp)
 			return;
 
-		WARN_ON(idx == INVALID_INDEX);
+		WARN_ON_ONCE(idx == INVALID_INDEX);
 		clear_unsync_child_bit(sp, idx);
 		level++;
 	} while (!sp->unsync_children);
@@ -2220,7 +2199,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
 			if (ret < 0)
 				break;
 
-			WARN_ON(!list_empty(&invalid_list));
+			WARN_ON_ONCE(!list_empty(&invalid_list));
 			if (ret > 0)
 				kvm_flush_remote_tlbs(kvm);
 		}
@@ -2499,7 +2478,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		if (child->role.access == direct_access)
 			return;
 
-		drop_parent_pte(child, sptep);
+		drop_parent_pte(vcpu->kvm, child, sptep);
 		kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
 	}
 }
@@ -2517,7 +2496,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 			drop_spte(kvm, spte);
 		} else {
 			child = spte_to_child_sp(pte);
-			drop_parent_pte(child, spte);
+			drop_parent_pte(kvm, child, spte);
 
 			/*
 			 * Recursively zap nested TDP SPs, parentless SPs are
@@ -2548,13 +2527,13 @@ static int kvm_mmu_page_unlink_children(struct kvm *kvm,
 	return zapped;
 }
 
-static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
+static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
 
 	while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
-		drop_parent_pte(sp, sptep);
+		drop_parent_pte(kvm, sp, sptep);
 }
 
 static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -2593,7 +2572,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
 	++kvm->stat.mmu_shadow_zapped;
 	*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
 	*nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
-	kvm_mmu_unlink_parents(sp);
+	kvm_mmu_unlink_parents(kvm, sp);
 
 	/* Zapping children means active_mmu_pages has become unstable. */
 	list_unstable = *nr_zapped;
@@ -2675,7 +2654,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 	kvm_flush_remote_tlbs(kvm);
 
 	list_for_each_entry_safe(sp, nsp, invalid_list, link) {
-		WARN_ON(!sp->role.invalid || sp->root_count);
+		WARN_ON_ONCE(!sp->role.invalid || sp->root_count);
 		kvm_mmu_free_shadow_page(sp);
 	}
 }
@@ -2775,12 +2754,9 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 	LIST_HEAD(invalid_list);
 	int r;
 
-	pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
 	r = 0;
 	write_lock(&kvm->mmu_lock);
 	for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) {
-		pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
-			 sp->role.word);
 		r = 1;
 		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
 	}
@@ -2831,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
 	 * track machinery is used to write-protect upper-level shadow pages,
 	 * i.e. this guards the role.level == 4K assertion below!
 	 */
-	if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
+	if (kvm_gfn_is_write_tracked(kvm, slot, gfn))
 		return -EPERM;
 
 	/*
@@ -2873,7 +2849,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
 				continue;
 		}
 
-		WARN_ON(sp->role.level != PG_LEVEL_4K);
+		WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
 		kvm_unsync_page(kvm, sp);
 	}
 	if (locked)
@@ -2938,9 +2914,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
 	bool prefetch = !fault || fault->prefetch;
 	bool write_fault = fault && fault->write;
 
-	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
-		 *sptep, write_fault, gfn);
-
 	if (unlikely(is_noslot_pfn(pfn))) {
 		vcpu->stat.pf_mmio_spte_created++;
 		mark_mmio_spte(vcpu, sptep, gfn, pte_access);
@@ -2957,11 +2930,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
 			u64 pte = *sptep;
 
 			child = spte_to_child_sp(pte);
-			drop_parent_pte(child, sptep);
+			drop_parent_pte(vcpu->kvm, child, sptep);
 			flush = true;
 		} else if (pfn != spte_to_pfn(*sptep)) {
-			pgprintk("hfn old %llx new %llx\n",
-				 spte_to_pfn(*sptep), pfn);
 			drop_spte(vcpu->kvm, sptep);
 			flush = true;
 		} else
@@ -2986,8 +2957,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
 	if (flush)
 		kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
 
-	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
-
 	if (!was_rmapped) {
 		WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
 		rmap_add(vcpu, slot, sptep, gfn, pte_access);
@@ -3033,7 +3002,7 @@ static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
 	u64 *spte, *start = NULL;
 	int i;
 
-	WARN_ON(!sp->role.direct);
+	WARN_ON_ONCE(!sp->role.direct);
 
 	i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1);
 	spte = sp->spt + i;
@@ -3574,12 +3543,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 	if (!VALID_PAGE(*root_hpa))
 		return;
 
-	/*
-	 * The "root" may be a special root, e.g. a PAE entry, treat it as a
-	 * SPTE to ensure any non-PA bits are dropped.
-	 */
-	sp = spte_to_child_sp(*root_hpa);
-	if (WARN_ON(!sp))
+	sp = root_to_sp(*root_hpa);
+	if (WARN_ON_ONCE(!sp))
 		return;
 
 	if (is_tdp_mmu_page(sp))
@@ -3624,7 +3589,9 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
 					   &invalid_list);
 
 	if (free_active_root) {
-		if (to_shadow_page(mmu->root.hpa)) {
+		if (kvm_mmu_is_dummy_root(mmu->root.hpa)) {
+			/* Nothing to cleanup for dummy roots. */
+		} else if (root_to_sp(mmu->root.hpa)) {
 			mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
 		} else if (mmu->pae_root) {
 			for (i = 0; i < 4; ++i) {
@@ -3648,6 +3615,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
 void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 {
 	unsigned long roots_to_free = 0;
+	struct kvm_mmu_page *sp;
 	hpa_t root_hpa;
 	int i;
 
@@ -3662,8 +3630,8 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 		if (!VALID_PAGE(root_hpa))
 			continue;
 
-		if (!to_shadow_page(root_hpa) ||
-			to_shadow_page(root_hpa)->role.guest_mode)
+		sp = root_to_sp(root_hpa);
+		if (!sp || sp->role.guest_mode)
 			roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
 	}
 
@@ -3671,19 +3639,6 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
 
-
-static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
-{
-	int ret = 0;
-
-	if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
-		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-		ret = 1;
-	}
-
-	return ret;
-}
-
 static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
 			    u8 level)
 {
@@ -3821,8 +3776,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
 	root_gfn = root_pgd >> PAGE_SHIFT;
 
-	if (mmu_check_root(vcpu, root_gfn))
-		return 1;
+	if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
+		mmu->root.hpa = kvm_mmu_get_dummy_root();
+		return 0;
+	}
 
 	/*
 	 * On SVM, reading PDPTRs might access guest memory, which might fault
@@ -3834,8 +3791,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 			if (!(pdptrs[i] & PT_PRESENT_MASK))
 				continue;
 
-			if (mmu_check_root(vcpu, pdptrs[i] >> PAGE_SHIFT))
-				return 1;
+			if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT))
+				pdptrs[i] = 0;
 		}
 	}
 
@@ -4002,7 +3959,7 @@ static bool is_unsync_root(hpa_t root)
 {
 	struct kvm_mmu_page *sp;
 
-	if (!VALID_PAGE(root))
+	if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root))
 		return false;
 
 	/*
@@ -4018,7 +3975,7 @@ static bool is_unsync_root(hpa_t root)
 	 * requirement isn't satisfied.
 	 */
 	smp_rmb();
-	sp = to_shadow_page(root);
+	sp = root_to_sp(root);
 
 	/*
 	 * PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the
@@ -4048,11 +4005,12 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
 		hpa_t root = vcpu->arch.mmu->root.hpa;
-		sp = to_shadow_page(root);
 
 		if (!is_unsync_root(root))
 			return;
 
+		sp = root_to_sp(root);
+
 		write_lock(&vcpu->kvm->mmu_lock);
 		mmu_sync_children(vcpu, sp, true);
 		write_unlock(&vcpu->kvm->mmu_lock);
@@ -4194,7 +4152,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 		return RET_PF_EMULATE;
 
 	reserved = get_mmio_spte(vcpu, addr, &spte);
-	if (WARN_ON(reserved))
+	if (WARN_ON_ONCE(reserved))
 		return -EINVAL;
 
 	if (is_mmio_spte(spte)) {
@@ -4232,7 +4190,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
 	 * guest is writing the page which is write tracked which can
 	 * not be fixed by page fault handler.
 	 */
-	if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
+	if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn))
 		return true;
 
 	return false;
@@ -4382,7 +4340,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
 				struct kvm_page_fault *fault)
 {
-	struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
+	struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
 
 	/* Special roots, e.g. pae_root, are not backed by shadow pages. */
 	if (sp && is_obsolete_sp(vcpu->kvm, sp))
@@ -4407,6 +4365,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 {
 	int r;
 
+	/* Dummy roots are used only for shadowing bad guest roots. */
+	if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa)))
+		return RET_PF_RETRY;
+
 	if (page_fault_handle_page_track(vcpu, fault))
 		return RET_PF_EMULATE;
 
@@ -4443,8 +4405,6 @@ out_unlock:
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu,
 				struct kvm_page_fault *fault)
 {
-	pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code);
-
 	/* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
 	fault->max_level = PG_LEVEL_2M;
 	return direct_page_fault(vcpu, fault);
@@ -4562,9 +4522,19 @@ static void nonpaging_init_context(struct kvm_mmu *context)
 static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
 				  union kvm_mmu_page_role role)
 {
-	return (role.direct || pgd == root->pgd) &&
-	       VALID_PAGE(root->hpa) &&
-	       role.word == to_shadow_page(root->hpa)->role.word;
+	struct kvm_mmu_page *sp;
+
+	if (!VALID_PAGE(root->hpa))
+		return false;
+
+	if (!role.direct && pgd != root->pgd)
+		return false;
+
+	sp = root_to_sp(root->hpa);
+	if (WARN_ON_ONCE(!sp))
+		return false;
+
+	return role.word == sp->role.word;
 }
 
 /*
@@ -4634,11 +4604,10 @@ static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu,
 			    gpa_t new_pgd, union kvm_mmu_page_role new_role)
 {
 	/*
-	 * For now, limit the caching to 64-bit hosts+VMs in order to avoid
-	 * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
-	 * later if necessary.
+	 * Limit reuse to 64-bit hosts+VMs without "special" roots in order to
+	 * avoid having to deal with PDPTEs and other complexities.
 	 */
-	if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa))
+	if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa))
 		kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
 
 	if (VALID_PAGE(mmu->root.hpa))
@@ -4684,9 +4653,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
 	 * If this is a direct root page, it doesn't have a write flooding
 	 * count. Otherwise, clear the write flooding count.
 	 */
-	if (!new_role.direct)
-		__clear_sp_write_flooding_count(
-				to_shadow_page(vcpu->arch.mmu->root.hpa));
+	if (!new_role.direct) {
+		struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
+
+		if (!WARN_ON_ONCE(!sp))
+			__clear_sp_write_flooding_count(sp);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
 
@@ -4808,28 +4780,13 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
 	}
 }
 
-static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * If TDP is enabled, let the guest use GBPAGES if they're supported in
-	 * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
-	 * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
-	 * walk for performance and complexity reasons.  Not to mention KVM
-	 * _can't_ solve the problem because GVA->GPA walks aren't visible to
-	 * KVM once a TDP translation is installed.  Mimic hardware behavior so
-	 * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
-	 */
-	return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
-			     guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
-}
-
 static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 					struct kvm_mmu *context)
 {
 	__reset_rsvds_bits_mask(&context->guest_rsvd_check,
 				vcpu->arch.reserved_gpa_bits,
 				context->cpu_role.base.level, is_efer_nx(context),
-				guest_can_use_gbpages(vcpu),
+				guest_can_use(vcpu, X86_FEATURE_GBPAGES),
 				is_cr4_pse(context),
 				guest_cpuid_is_amd_or_hygon(vcpu));
 }
@@ -4906,7 +4863,8 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 	__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
 				context->root_role.level,
 				context->root_role.efer_nx,
-				guest_can_use_gbpages(vcpu), is_pse, is_amd);
+				guest_can_use(vcpu, X86_FEATURE_GBPAGES),
+				is_pse, is_amd);
 
 	if (!shadow_me_mask)
 		return;
@@ -5467,8 +5425,8 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	 * physical address properties) in a single VM would require tracking
 	 * all relevant CPUID information in kvm_mmu_page_role. That is very
 	 * undesirable as it would increase the memory requirements for
-	 * gfn_track (see struct kvm_mmu_page_role comments).  For now that
-	 * problem is swept under the rug; KVM's CPUID API is horrific and
+	 * gfn_write_track (see struct kvm_mmu_page_role comments).  For now
+	 * that problem is swept under the rug; KVM's CPUID API is horrific and
 	 * it's all but impossible to solve it without introducing a new API.
 	 */
 	vcpu->arch.root_mmu.root_role.word = 0;
@@ -5531,9 +5489,9 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = vcpu->kvm;
 
 	kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
-	WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
+	WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
 	kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
-	WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
+	WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
 	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 }
 
@@ -5546,16 +5504,21 @@ static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
 
 	/*
 	 * When freeing obsolete roots, treat roots as obsolete if they don't
-	 * have an associated shadow page.  This does mean KVM will get false
+	 * have an associated shadow page, as it's impossible to determine if
+	 * such roots are fresh or stale.  This does mean KVM will get false
 	 * positives and free roots that don't strictly need to be freed, but
 	 * such false positives are relatively rare:
 	 *
-	 *  (a) only PAE paging and nested NPT has roots without shadow pages
+	 *  (a) only PAE paging and nested NPT have roots without shadow pages
+	 *      (or any shadow paging flavor with a dummy root, see note below)
 	 *  (b) remote reloads due to a memslot update obsoletes _all_ roots
 	 *  (c) KVM doesn't track previous roots for PAE paging, and the guest
 	 *      is unlikely to zap an in-use PGD.
+	 *
+	 * Note!  Dummy roots are unique in that they are obsoleted by memslot
+	 * _creation_!  See also FNAME(fetch).
 	 */
-	sp = to_shadow_page(root_hpa);
+	sp = root_to_sp(root_hpa);
 	return !sp || is_obsolete_sp(kvm, sp);
 }
 
@@ -5634,9 +5597,6 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
 {
 	unsigned offset, pte_size, misaligned;
 
-	pgprintk("misaligned: gpa %llx bytes %d role %x\n",
-		 gpa, bytes, sp->role.word);
-
 	offset = offset_in_page(gpa);
 	pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
 
@@ -5684,9 +5644,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
 	return spte;
 }
 
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-			      const u8 *new, int bytes,
-			      struct kvm_page_track_notifier_node *node)
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+			 int bytes)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	struct kvm_mmu_page *sp;
@@ -5702,8 +5661,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
 		return;
 
-	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
-
 	write_lock(&vcpu->kvm->mmu_lock);
 
 	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
@@ -5742,7 +5699,18 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
 	int r, emulation_type = EMULTYPE_PF;
 	bool direct = vcpu->arch.mmu->root_role.direct;
 
-	if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+	/*
+	 * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
+	 * checks when emulating instructions that triggers implicit access.
+	 * WARN if hardware generates a fault with an error code that collides
+	 * with the KVM-defined value.  Clear the flag and continue on, i.e.
+	 * don't terminate the VM, as KVM can't possibly be relying on a flag
+	 * that KVM doesn't know about.
+	 */
+	if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
+		error_code &= ~PFERR_IMPLICIT_ACCESS;
+
+	if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
 		return RET_PF_RETRY;
 
 	r = RET_PF_INVALID;
@@ -6099,7 +6067,7 @@ restart:
 		 * pages.  Skip the bogus page, otherwise we'll get stuck in an
 		 * infinite loop if the page gets put back on the list (again).
 		 */
-		if (WARN_ON(sp->role.invalid))
+		if (WARN_ON_ONCE(sp->role.invalid))
 			continue;
 
 		/*
@@ -6199,16 +6167,8 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
 	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
 }
 
-static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
-			struct kvm_memory_slot *slot,
-			struct kvm_page_track_notifier_node *node)
-{
-	kvm_mmu_zap_all_fast(kvm);
-}
-
 int kvm_mmu_init_vm(struct kvm *kvm)
 {
-	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 	int r;
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
@@ -6222,10 +6182,6 @@ int kvm_mmu_init_vm(struct kvm *kvm)
 			return r;
 	}
 
-	node->track_write = kvm_mmu_pte_write;
-	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
-	kvm_page_track_register_notifier(kvm, node);
-
 	kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
 	kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
 
@@ -6246,10 +6202,6 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
 {
-	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-
-	kvm_page_track_unregister_notifier(kvm, node);
-
 	if (tdp_mmu_enabled)
 		kvm_mmu_uninit_tdp_mmu(kvm);
 
@@ -6670,7 +6622,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
 	 */
 	if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
 			    PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
-		kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+		kvm_flush_remote_tlbs_memslot(kvm, slot);
 }
 
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
@@ -6689,20 +6641,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	}
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-					const struct kvm_memory_slot *memslot)
-{
-	/*
-	 * All current use cases for flushing the TLBs for a specific memslot
-	 * related to dirty logging, and many do the TLB flush out of mmu_lock.
-	 * The interaction between the various operations on memslot must be
-	 * serialized by slots_locks to ensure the TLB flush from one operation
-	 * is observed by any other operation on the same memslot.
-	 */
-	lockdep_assert_held(&kvm->slots_lock);
-	kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
-}
-
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot)
 {
@@ -6732,7 +6670,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 	 */
 }
 
-void kvm_mmu_zap_all(struct kvm *kvm)
+static void kvm_mmu_zap_all(struct kvm *kvm)
 {
 	struct kvm_mmu_page *sp, *node;
 	LIST_HEAD(invalid_list);
@@ -6741,7 +6679,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 	write_lock(&kvm->mmu_lock);
 restart:
 	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-		if (WARN_ON(sp->role.invalid))
+		if (WARN_ON_ONCE(sp->role.invalid))
 			continue;
 		if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
 			goto restart;
@@ -6757,9 +6695,20 @@ restart:
 	write_unlock(&kvm->mmu_lock);
 }
 
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+	kvm_mmu_zap_all(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	kvm_mmu_zap_all_fast(kvm);
+}
+
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
-	WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+	WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
 
 	gen &= MMIO_SPTE_GEN_MASK;
 
@@ -6862,7 +6811,7 @@ static void mmu_destroy_caches(void)
 static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
 {
 	if (nx_hugepage_mitigation_hard_disabled)
-		return sprintf(buffer, "never\n");
+		return sysfs_emit(buffer, "never\n");
 
 	return param_get_bool(buffer, kp);
 }
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index d39af5639ce9..b102014e2c60 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -6,18 +6,10 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_host.h>
 
-#undef MMU_DEBUG
-
-#ifdef MMU_DEBUG
-extern bool dbg;
-
-#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
-#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
-#define MMU_WARN_ON(x) WARN_ON(x)
+#ifdef CONFIG_KVM_PROVE_MMU
+#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x)
 #else
-#define pgprintk(x...) do { } while (0)
-#define rmap_printk(x...) do { } while (0)
-#define MMU_WARN_ON(x) do { } while (0)
+#define KVM_MMU_WARN_ON(x) BUILD_BUG_ON_INVALID(x)
 #endif
 
 /* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
@@ -44,6 +36,16 @@ extern bool dbg;
 #define INVALID_PAE_ROOT	0
 #define IS_VALID_PAE_ROOT(x)	(!!(x))
 
+static inline hpa_t kvm_mmu_get_dummy_root(void)
+{
+	return my_zero_pfn(0) << PAGE_SHIFT;
+}
+
+static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
+{
+	return is_zero_pfn(shadow_page >> PAGE_SHIFT);
+}
+
 typedef u64 __rcu *tdp_ptep_t;
 
 struct kvm_mmu_page {
@@ -170,9 +172,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn,
 				    int min_level);
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-				 gfn_t nr_pages);
-
 /* Flush the given page (huge or not) of guest memory. */
 static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
 {
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 0a2ac438d647..c87da11f3a04 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -12,13 +12,13 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/lockdep.h>
 #include <linux/kvm_host.h>
 #include <linux/rculist.h>
 
-#include <asm/kvm_page_track.h>
-
 #include "mmu.h"
 #include "mmu_internal.h"
+#include "page_track.h"
 
 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
 {
@@ -28,103 +28,64 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
 {
-	int i;
-
-	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-		kvfree(slot->arch.gfn_track[i]);
-		slot->arch.gfn_track[i] = NULL;
-	}
+	kvfree(slot->arch.gfn_write_track);
+	slot->arch.gfn_write_track = NULL;
 }
 
-int kvm_page_track_create_memslot(struct kvm *kvm,
-				  struct kvm_memory_slot *slot,
-				  unsigned long npages)
+static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
+						 unsigned long npages)
 {
-	int i;
-
-	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-		if (i == KVM_PAGE_TRACK_WRITE &&
-		    !kvm_page_track_write_tracking_enabled(kvm))
-			continue;
-
-		slot->arch.gfn_track[i] =
-			__vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
-				  GFP_KERNEL_ACCOUNT);
-		if (!slot->arch.gfn_track[i])
-			goto track_free;
-	}
+	const size_t size = sizeof(*slot->arch.gfn_write_track);
 
-	return 0;
+	if (!slot->arch.gfn_write_track)
+		slot->arch.gfn_write_track = __vcalloc(npages, size,
+						       GFP_KERNEL_ACCOUNT);
 
-track_free:
-	kvm_page_track_free_memslot(slot);
-	return -ENOMEM;
+	return slot->arch.gfn_write_track ? 0 : -ENOMEM;
 }
 
-static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
+int kvm_page_track_create_memslot(struct kvm *kvm,
+				  struct kvm_memory_slot *slot,
+				  unsigned long npages)
 {
-	if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
-		return false;
+	if (!kvm_page_track_write_tracking_enabled(kvm))
+		return 0;
 
-	return true;
+	return __kvm_page_track_write_tracking_alloc(slot, npages);
 }
 
 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
 {
-	unsigned short *gfn_track;
-
-	if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
-		return 0;
-
-	gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track),
-			      GFP_KERNEL_ACCOUNT);
-	if (gfn_track == NULL)
-		return -ENOMEM;
-
-	slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
-	return 0;
+	return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
 }
 
-static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
-			     enum kvm_page_track_mode mode, short count)
+static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
+				   short count)
 {
 	int index, val;
 
 	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
 
-	val = slot->arch.gfn_track[mode][index];
+	val = slot->arch.gfn_write_track[index];
 
-	if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
+	if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
 		return;
 
-	slot->arch.gfn_track[mode][index] += count;
+	slot->arch.gfn_write_track[index] += count;
 }
 
-/*
- * add guest page to the tracking pool so that corresponding access on that
- * page will be intercepted.
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-				  struct kvm_memory_slot *slot, gfn_t gfn,
-				  enum kvm_page_track_mode mode)
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+			       gfn_t gfn)
 {
+	lockdep_assert_held_write(&kvm->mmu_lock);
 
-	if (WARN_ON(!page_track_mode_is_valid(mode)))
-		return;
+	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+			    srcu_read_lock_held(&kvm->srcu));
 
-	if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
-		    !kvm_page_track_write_tracking_enabled(kvm)))
+	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
 		return;
 
-	update_gfn_track(slot, gfn, mode, 1);
+	update_gfn_write_track(slot, gfn, 1);
 
 	/*
 	 * new track stops large page mapping for the
@@ -132,37 +93,22 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
 	 */
 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
-	if (mode == KVM_PAGE_TRACK_WRITE)
-		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
-			kvm_flush_remote_tlbs(kvm);
+	if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
+		kvm_flush_remote_tlbs(kvm);
 }
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
 
-/*
- * remove the guest page from the tracking pool which stops the interception
- * of corresponding access on that page. It is the opposed operation of
- * kvm_slot_page_track_add_page().
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-				     struct kvm_memory_slot *slot, gfn_t gfn,
-				     enum kvm_page_track_mode mode)
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+				  struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	if (WARN_ON(!page_track_mode_is_valid(mode)))
-		return;
+	lockdep_assert_held_write(&kvm->mmu_lock);
 
-	if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
-		    !kvm_page_track_write_tracking_enabled(kvm)))
+	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+			    srcu_read_lock_held(&kvm->srcu));
+
+	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
 		return;
 
-	update_gfn_track(slot, gfn, mode, -1);
+	update_gfn_write_track(slot, gfn, -1);
 
 	/*
 	 * allow large page mapping for the tracked page
@@ -170,31 +116,26 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
 	 */
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
 
 /*
  * check if the corresponding access on the specified guest page is tracked.
  */
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
-				   const struct kvm_memory_slot *slot,
-				   gfn_t gfn, enum kvm_page_track_mode mode)
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+			      const struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	int index;
 
-	if (WARN_ON(!page_track_mode_is_valid(mode)))
-		return false;
-
 	if (!slot)
 		return false;
 
-	if (mode == KVM_PAGE_TRACK_WRITE &&
-	    !kvm_page_track_write_tracking_enabled(kvm))
+	if (!kvm_page_track_write_tracking_enabled(kvm))
 		return false;
 
 	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
-	return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
+	return !!READ_ONCE(slot->arch.gfn_write_track[index]);
 }
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 void kvm_page_track_cleanup(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_head *head;
@@ -216,17 +157,22 @@ int kvm_page_track_init(struct kvm *kvm)
  * register the notifier so that event interception for the tracked guest
  * pages can be received.
  */
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-				 struct kvm_page_track_notifier_node *n)
+int kvm_page_track_register_notifier(struct kvm *kvm,
+				     struct kvm_page_track_notifier_node *n)
 {
 	struct kvm_page_track_notifier_head *head;
 
+	if (!kvm || kvm->mm != current->mm)
+		return -ESRCH;
+
+	kvm_get_kvm(kvm);
+
 	head = &kvm->arch.track_notifier_head;
 
 	write_lock(&kvm->mmu_lock);
 	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
 	write_unlock(&kvm->mmu_lock);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
 
@@ -234,9 +180,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
  * stop receiving the event interception. It is the opposed operation of
  * kvm_page_track_register_notifier().
  */
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-				   struct kvm_page_track_notifier_node *n)
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+					struct kvm_page_track_notifier_node *n)
 {
 	struct kvm_page_track_notifier_head *head;
 
@@ -246,6 +191,8 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
 	hlist_del_rcu(&n->node);
 	write_unlock(&kvm->mmu_lock);
 	synchronize_srcu(&head->track_srcu);
+
+	kvm_put_kvm(kvm);
 }
 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
 
@@ -256,34 +203,30 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
  * The node should figure out if the written page is the one that node is
  * interested in by itself.
  */
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-			  int bytes)
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
 {
 	struct kvm_page_track_notifier_head *head;
 	struct kvm_page_track_notifier_node *n;
 	int idx;
 
-	head = &vcpu->kvm->arch.track_notifier_head;
+	head = &kvm->arch.track_notifier_head;
 
 	if (hlist_empty(&head->track_notifier_list))
 		return;
 
 	idx = srcu_read_lock(&head->track_srcu);
 	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-				srcu_read_lock_held(&head->track_srcu))
+				  srcu_read_lock_held(&head->track_srcu))
 		if (n->track_write)
-			n->track_write(vcpu, gpa, new, bytes, n);
+			n->track_write(gpa, new, bytes, n);
 	srcu_read_unlock(&head->track_srcu, idx);
 }
 
 /*
- * Notify the node that memory slot is being removed or moved so that it can
- * drop write-protection for the pages in the memory slot.
- *
- * The node should figure out it has any write-protected pages in this slot
- * by itself.
+ * Notify external page track nodes that a memory region is being removed from
+ * the VM, e.g. so that users can free any associated metadata.
  */
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
 	struct kvm_page_track_notifier_head *head;
 	struct kvm_page_track_notifier_node *n;
@@ -296,8 +239,69 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 
 	idx = srcu_read_lock(&head->track_srcu);
 	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-				srcu_read_lock_held(&head->track_srcu))
-		if (n->track_flush_slot)
-			n->track_flush_slot(kvm, slot, n);
+				  srcu_read_lock_held(&head->track_srcu))
+		if (n->track_remove_region)
+			n->track_remove_region(slot->base_gfn, slot->npages, n);
 	srcu_read_unlock(&head->track_srcu, idx);
 }
+
+/*
+ * add guest page to the tracking pool so that corresponding access on that
+ * page will be intercepted.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+
+	slot = gfn_to_memslot(kvm, gfn);
+	if (!slot) {
+		srcu_read_unlock(&kvm->srcu, idx);
+		return -EINVAL;
+	}
+
+	write_lock(&kvm->mmu_lock);
+	__kvm_write_track_add_gfn(kvm, slot, gfn);
+	write_unlock(&kvm->mmu_lock);
+
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
+
+/*
+ * remove the guest page from the tracking pool which stops the interception
+ * of corresponding access on that page.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+
+	slot = gfn_to_memslot(kvm, gfn);
+	if (!slot) {
+		srcu_read_unlock(&kvm->srcu, idx);
+		return -EINVAL;
+	}
+
+	write_lock(&kvm->mmu_lock);
+	__kvm_write_track_remove_gfn(kvm, slot, gfn);
+	write_unlock(&kvm->mmu_lock);
+
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
+#endif
diff --git a/arch/x86/kvm/mmu/page_track.h b/arch/x86/kvm/mmu/page_track.h
new file mode 100644
index 000000000000..d4d72ed999b1
--- /dev/null
+++ b/arch/x86/kvm/mmu/page_track.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_PAGE_TRACK_H
+#define __KVM_X86_PAGE_TRACK_H
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_page_track.h>
+
+
+bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
+int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
+int kvm_page_track_create_memslot(struct kvm *kvm,
+				  struct kvm_memory_slot *slot,
+				  unsigned long npages);
+
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+			       gfn_t gfn);
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+				  struct kvm_memory_slot *slot, gfn_t gfn);
+
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+			      const struct kvm_memory_slot *slot, gfn_t gfn);
+
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+int kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
+
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes);
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm)
+{
+	return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list);
+}
+#else
+static inline int kvm_page_track_init(struct kvm *kvm) { return 0; }
+static inline void kvm_page_track_cleanup(struct kvm *kvm) { }
+
+static inline void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa,
+					  const u8 *new, int bytes) { }
+static inline void kvm_page_track_delete_slot(struct kvm *kvm,
+					      struct kvm_memory_slot *slot) { }
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm) { return false; }
+
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
+
+static inline void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+					const u8 *new, int bytes)
+{
+	__kvm_page_track_write(vcpu->kvm, gpa, new, bytes);
+
+	kvm_mmu_track_write(vcpu, gpa, new, bytes);
+}
+
+#endif /* __KVM_X86_PAGE_TRACK_H */
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 0662e0278e70..c85255073f67 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -338,7 +338,6 @@ retry_walk:
 	}
 #endif
 	walker->max_level = walker->level;
-	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
 
 	/*
 	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
@@ -348,9 +347,21 @@ retry_walk:
 	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
 
 	pte_access = ~0;
+
+	/*
+	 * Queue a page fault for injection if this assertion fails, as callers
+	 * assume that walker.fault contains sane info on a walk failure.  I.e.
+	 * avoid making the situation worse by inducing even worse badness
+	 * between when the assertion fails and when KVM kicks the vCPU out to
+	 * userspace (because the VM is bugged).
+	 */
+	if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm))
+		goto error;
+
 	++walker->level;
 
 	do {
+		struct kvm_memory_slot *slot;
 		unsigned long host_addr;
 
 		pt_access = pte_access;
@@ -381,7 +392,11 @@ retry_walk:
 		if (unlikely(real_gpa == INVALID_GPA))
 			return 0;
 
-		host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
+		slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa));
+		if (!kvm_is_visible_memslot(slot))
+			goto error;
+
+		host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa),
 					    &walker->pte_writable[walker->level - 1]);
 		if (unlikely(kvm_is_error_hva(host_addr)))
 			goto error;
@@ -456,9 +471,6 @@ retry_walk:
 			goto retry_walk;
 	}
 
-	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-		 __func__, (u64)pte, walker->pte_access,
-		 walker->pt_access[walker->level - 1]);
 	return 1;
 
 error:
@@ -529,8 +541,6 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 		return false;
 
-	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-
 	gfn = gpte_to_gfn(gpte);
 	pte_access = sp->role.access & FNAME(gpte_access)(gpte);
 	FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
@@ -638,8 +648,19 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 	if (FNAME(gpte_changed)(vcpu, gw, top_level))
 		goto out_gpte_changed;
 
-	if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+	if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+		goto out_gpte_changed;
+
+	/*
+	 * Load a new root and retry the faulting instruction in the extremely
+	 * unlikely scenario that the guest root gfn became visible between
+	 * loading a dummy root and handling the resulting page fault, e.g. if
+	 * userspace create a memslot in the interim.
+	 */
+	if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
+		kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
 		goto out_gpte_changed;
+	}
 
 	for_each_shadow_entry(vcpu, fault->addr, it) {
 		gfn_t table_gfn;
@@ -758,7 +779,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	struct guest_walker walker;
 	int r;
 
-	pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
 	WARN_ON_ONCE(fault->is_tdp);
 
 	/*
@@ -773,7 +793,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	 * The page is not mapped by the guest.  Let the guest handle it.
 	 */
 	if (!r) {
-		pgprintk("%s: guest page fault\n", __func__);
 		if (!fault->prefetch)
 			kvm_inject_emulated_page_fault(vcpu, &walker.fault);
 
@@ -837,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
 {
 	int offset = 0;
 
-	WARN_ON(sp->role.level != PG_LEVEL_4K);
+	WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
 
 	if (PTTYPE == 32)
 		offset = sp->role.quadrant << SPTE_LEVEL_BITS;
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index cf2c6426a6fc..4a599130e9c9 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -61,7 +61,7 @@ static u64 generation_mmio_spte_mask(u64 gen)
 {
 	u64 mask;
 
-	WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+	WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK);
 
 	mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
 	mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
@@ -221,8 +221,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		 * shadow pages and unsync'ing pages is not allowed.
 		 */
 		if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
-			pgprintk("%s: found shadow page for %llx, marking ro\n",
-				 __func__, gfn);
 			wrprot = true;
 			pte_access &= ~ACC_WRITE_MASK;
 			spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
@@ -242,7 +240,7 @@ out:
 
 	if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
 		/* Enforced by kvm_mmu_hugepage_adjust. */
-		WARN_ON(level > PG_LEVEL_4K);
+		WARN_ON_ONCE(level > PG_LEVEL_4K);
 		mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
 	}
 
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 1279db2eab44..a129951c9a88 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -3,6 +3,7 @@
 #ifndef KVM_X86_MMU_SPTE_H
 #define KVM_X86_MMU_SPTE_H
 
+#include "mmu.h"
 #include "mmu_internal.h"
 
 /*
@@ -236,6 +237,18 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
 	return to_shadow_page(__pa(sptep));
 }
 
+static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
+{
+	if (kvm_mmu_is_dummy_root(root))
+		return NULL;
+
+	/*
+	 * The "root" may be a special root, e.g. a PAE entry, treat it as a
+	 * SPTE to ensure any non-PA bits are dropped.
+	 */
+	return spte_to_child_sp(root);
+}
+
 static inline bool is_mmio_spte(u64 spte)
 {
 	return (spte & shadow_mmio_mask) == shadow_mmio_value &&
@@ -265,13 +278,13 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
 
 static inline bool spte_ad_enabled(u64 spte)
 {
-	MMU_WARN_ON(!is_shadow_present_pte(spte));
+	KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
 	return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED;
 }
 
 static inline bool spte_ad_need_write_protect(u64 spte)
 {
-	MMU_WARN_ON(!is_shadow_present_pte(spte));
+	KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
 	/*
 	 * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0',
 	 * and non-TDP SPTEs will never set these bits.  Optimize for 64-bit
@@ -282,13 +295,13 @@ static inline bool spte_ad_need_write_protect(u64 spte)
 
 static inline u64 spte_shadow_accessed_mask(u64 spte)
 {
-	MMU_WARN_ON(!is_shadow_present_pte(spte));
+	KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
 	return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
 }
 
 static inline u64 spte_shadow_dirty_mask(u64 spte)
 {
-	MMU_WARN_ON(!is_shadow_present_pte(spte));
+	KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
 	return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
 }
 
diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
index d2eb0d4f8710..bd30ebfb2f2c 100644
--- a/arch/x86/kvm/mmu/tdp_iter.c
+++ b/arch/x86/kvm/mmu/tdp_iter.c
@@ -39,13 +39,14 @@ void tdp_iter_restart(struct tdp_iter *iter)
 void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
 		    int min_level, gfn_t next_last_level_gfn)
 {
-	int root_level = root->role.level;
-
-	WARN_ON(root_level < 1);
-	WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+	if (WARN_ON_ONCE(!root || (root->role.level < 1) ||
+			 (root->role.level > PT64_ROOT_MAX_LEVEL))) {
+		iter->valid = false;
+		return;
+	}
 
 	iter->next_last_level_gfn = next_last_level_gfn;
-	iter->root_level = root_level;
+	iter->root_level = root->role.level;
 	iter->min_level = min_level;
 	iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
 	iter->as_id = kvm_mmu_page_as_id(root);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 512163d52194..6c63f2d1675f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -475,9 +475,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 	bool is_leaf = is_present && is_last_spte(new_spte, level);
 	bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
 
-	WARN_ON(level > PT64_ROOT_MAX_LEVEL);
-	WARN_ON(level < PG_LEVEL_4K);
-	WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+	WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
+	WARN_ON_ONCE(level < PG_LEVEL_4K);
+	WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
 
 	/*
 	 * If this warning were to trigger it would indicate that there was a
@@ -522,9 +522,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 		 * impact the guest since both the former and current SPTEs
 		 * are nonpresent.
 		 */
-		if (WARN_ON(!is_mmio_spte(old_spte) &&
-			    !is_mmio_spte(new_spte) &&
-			    !is_removed_spte(new_spte)))
+		if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
+				 !is_mmio_spte(new_spte) &&
+				 !is_removed_spte(new_spte)))
 			pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
 			       "should not be replaced with another,\n"
 			       "different nonpresent SPTE, unless one or both\n"
@@ -661,7 +661,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
 	 * should be used. If operating under the MMU lock in write mode, the
 	 * use of the removed SPTE should not be necessary.
 	 */
-	WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
+	WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
 
 	old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
 
@@ -689,7 +689,7 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 		else
 
 #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)		\
-	for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end)
+	for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
 
 /*
  * Yield if the MMU lock is contended or this thread needs to return control
@@ -709,7 +709,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
 							  struct tdp_iter *iter,
 							  bool flush, bool shared)
 {
-	WARN_ON(iter->yielded);
+	WARN_ON_ONCE(iter->yielded);
 
 	/* Ensure forward progress has been made before yielding. */
 	if (iter->next_last_level_gfn == iter->yielded_gfn)
@@ -728,7 +728,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
 
 		rcu_read_lock();
 
-		WARN_ON(iter->gfn > iter->next_last_level_gfn);
+		WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
 
 		iter->yielded = true;
 	}
@@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
 	u64 new_spte;
 
 	/* Huge pages aren't expected to be modified without first being zapped. */
-	WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
+	WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
 
 	if (iter->level != PG_LEVEL_4K ||
 	    !is_shadow_present_pte(iter->old_spte))
@@ -1255,9 +1255,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
 	 */
 	tdp_mmu_iter_set_spte(kvm, iter, 0);
 
-	if (!pte_write(range->pte)) {
+	if (!pte_write(range->arg.pte)) {
 		new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
-								  pte_pfn(range->pte));
+								  pte_pfn(range->arg.pte));
 
 		tdp_mmu_iter_set_spte(kvm, iter, new_spte);
 	}
@@ -1548,8 +1548,8 @@ retry:
 		if (!is_shadow_present_pte(iter.old_spte))
 			continue;
 
-		MMU_WARN_ON(kvm_ad_enabled() &&
-			    spte_ad_need_write_protect(iter.old_spte));
+		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+				spte_ad_need_write_protect(iter.old_spte));
 
 		if (!(iter.old_spte & dbit))
 			continue;
@@ -1600,6 +1600,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
 						   shadow_dirty_mask;
 	struct tdp_iter iter;
 
+	lockdep_assert_held_write(&kvm->mmu_lock);
+
 	rcu_read_lock();
 
 	tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
@@ -1607,8 +1609,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
 		if (!mask)
 			break;
 
-		MMU_WARN_ON(kvm_ad_enabled() &&
-			    spte_ad_need_write_protect(iter.old_spte));
+		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+				spte_ad_need_write_protect(iter.old_spte));
 
 		if (iter.level > PG_LEVEL_4K ||
 		    !(mask & (1UL << (iter.gfn - gfn))))
@@ -1646,7 +1648,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 {
 	struct kvm_mmu_page *root;
 
-	lockdep_assert_held_write(&kvm->mmu_lock);
 	for_each_tdp_mmu_root(kvm, root, slot->as_id)
 		clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
 }
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index bf653df86112..edb89b51b383 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -382,9 +382,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
 	struct kvm_x86_pmu_event_filter *filter;
 	struct kvm *kvm = pmc->vcpu->kvm;
 
-	if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
-		return false;
-
 	filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
 	if (!filter)
 		return true;
@@ -398,6 +395,7 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
 static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
 {
 	return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
+	       static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
 	       check_pmu_event_filter(pmc);
 }
 
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 56cbdb24400a..b81650678375 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -43,6 +43,7 @@ enum kvm_only_cpuid_leafs {
 /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
 #define X86_FEATURE_AVX_VNNI_INT8       KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
 #define X86_FEATURE_AVX_NE_CONVERT      KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
+#define X86_FEATURE_AMX_COMPLEX         KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
 #define X86_FEATURE_PREFETCHITI         KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
 
 /* CPUID level 0x80000007 (EDX). */
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index cfc8ab773025..2092db892d7d 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 	int ret = 0;
 	unsigned long flags;
 	struct amd_svm_iommu_ir *ir;
+	u64 entry;
 
 	/**
 	 * In some cases, the existing irte is updated and re-set,
@@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 	ir->data = pi->ir_data;
 
 	spin_lock_irqsave(&svm->ir_list_lock, flags);
+
+	/*
+	 * Update the target pCPU for IOMMU doorbells if the vCPU is running.
+	 * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
+	 * will update the pCPU info when the vCPU awkened and/or scheduled in.
+	 * See also avic_vcpu_load().
+	 */
+	entry = READ_ONCE(*(svm->avic_physical_id_cache));
+	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+		amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
+				    true, pi->ir_data);
+
 	list_add(&ir->node, &svm->ir_list);
 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 out:
@@ -986,10 +999,11 @@ static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
 	int ret = 0;
-	unsigned long flags;
 	struct amd_svm_iommu_ir *ir;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	lockdep_assert_held(&svm->ir_list_lock);
+
 	if (!kvm_arch_has_assigned_device(vcpu->kvm))
 		return 0;
 
@@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 	 * Here, we go through the per-vcpu ir_list to update all existing
 	 * interrupt remapping table entry targeting this vcpu.
 	 */
-	spin_lock_irqsave(&svm->ir_list_lock, flags);
-
 	if (list_empty(&svm->ir_list))
-		goto out;
+		return 0;
 
 	list_for_each_entry(ir, &svm->ir_list, node) {
 		ret = amd_iommu_update_ga(cpu, r, ir->data);
 		if (ret)
-			break;
+			return ret;
 	}
-out:
-	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-	return ret;
+	return 0;
 }
 
 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	u64 entry;
 	int h_physical_id = kvm_cpu_get_apicid(cpu);
 	struct vcpu_svm *svm = to_svm(vcpu);
+	unsigned long flags;
 
 	lockdep_assert_preemption_disabled();
 
@@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (kvm_vcpu_is_blocking(vcpu))
 		return;
 
+	/*
+	 * Grab the per-vCPU interrupt remapping lock even if the VM doesn't
+	 * _currently_ have assigned devices, as that can change.  Holding
+	 * ir_list_lock ensures that either svm_ir_list_add() will consume
+	 * up-to-date entry information, or that this task will wait until
+	 * svm_ir_list_add() completes to set the new target pCPU.
+	 */
+	spin_lock_irqsave(&svm->ir_list_lock, flags);
+
 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
 	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 
@@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
+
+	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
 
 void avic_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	u64 entry;
 	struct vcpu_svm *svm = to_svm(vcpu);
+	unsigned long flags;
 
 	lockdep_assert_preemption_disabled();
 
+	/*
+	 * Note, reading the Physical ID entry outside of ir_list_lock is safe
+	 * as only the pCPU that has loaded (or is loading) the vCPU is allowed
+	 * to modify the entry, and preemption is disabled.  I.e. the vCPU
+	 * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
+	 * recursively.
+	 */
 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
 
 	/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
 	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
 		return;
 
+	/*
+	 * Take and hold the per-vCPU interrupt remapping lock while updating
+	 * the Physical ID entry even though the lock doesn't protect against
+	 * multiple writers (see above).  Holding ir_list_lock ensures that
+	 * either svm_ir_list_add() will consume up-to-date entry information,
+	 * or that this task will wait until svm_ir_list_add() completes to
+	 * mark the vCPU as not running.
+	 */
+	spin_lock_irqsave(&svm->ir_list_lock, flags);
+
 	avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
 
 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+
+	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+
 }
 
 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 96936ddf1b3c..dd496c9e5f91 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -107,7 +107,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
 
 static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
 {
-	if (!svm->v_vmload_vmsave_enabled)
+	if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
 		return true;
 
 	if (!nested_npt_enabled(svm))
@@ -552,6 +552,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 	bool new_vmcb12 = false;
 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
 
 	nested_vmcb02_compute_g_pat(svm);
 
@@ -577,18 +578,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 		vmcb_mark_dirty(vmcb02, VMCB_DT);
 	}
 
-	kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
+	kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
 
-	svm_set_efer(&svm->vcpu, svm->nested.save.efer);
+	svm_set_efer(vcpu, svm->nested.save.efer);
 
-	svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
-	svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
+	svm_set_cr0(vcpu, svm->nested.save.cr0);
+	svm_set_cr4(vcpu, svm->nested.save.cr4);
 
 	svm->vcpu.arch.cr2 = vmcb12->save.cr2;
 
-	kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
-	kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
-	kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
+	kvm_rax_write(vcpu, vmcb12->save.rax);
+	kvm_rsp_write(vcpu, vmcb12->save.rsp);
+	kvm_rip_write(vcpu, vmcb12->save.rip);
 
 	/* In case we don't even reach vcpu_run, the fields are not updated */
 	vmcb02->save.rax = vmcb12->save.rax;
@@ -602,7 +603,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 		vmcb_mark_dirty(vmcb02, VMCB_DR);
 	}
 
-	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+	if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+		     (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
 		/*
 		 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
 		 * svm_set_msr's definition of reserved bits.
@@ -658,7 +660,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 	 * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
 	 */
 
-	if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
+	if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
+	    (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
 		int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
 	else
 		int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
@@ -695,10 +698,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 
 	vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
 
-	if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
-		WARN_ON(!svm->tsc_scaling_enabled);
+	if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+	    svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
 		nested_svm_update_tsc_ratio_msr(vcpu);
-	}
 
 	vmcb02->control.int_ctl             =
 		(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
@@ -717,7 +719,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 	 * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
 	 * prior to injecting the event).
 	 */
-	if (svm->nrips_enabled)
+	if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
 		vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
 	else if (boot_cpu_has(X86_FEATURE_NRIPS))
 		vmcb02->control.next_rip    = vmcb12_rip;
@@ -727,7 +729,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 		svm->soft_int_injected = true;
 		svm->soft_int_csbase = vmcb12_csbase;
 		svm->soft_int_old_rip = vmcb12_rip;
-		if (svm->nrips_enabled)
+		if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
 			svm->soft_int_next_rip = svm->nested.ctl.next_rip;
 		else
 			svm->soft_int_next_rip = vmcb12_rip;
@@ -735,15 +737,21 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 
 	vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
 					      LBR_CTL_ENABLE_MASK;
-	if (svm->lbrv_enabled)
+	if (guest_can_use(vcpu, X86_FEATURE_LBRV))
 		vmcb02->control.virt_ext  |=
 			(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
 
 	if (!nested_vmcb_needs_vls_intercept(svm))
 		vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
 
-	pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
-	pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
+	if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
+		pause_count12 = svm->nested.ctl.pause_filter_count;
+	else
+		pause_count12 = 0;
+	if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
+		pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
+	else
+		pause_thresh12 = 0;
 	if (kvm_pause_in_guest(svm->vcpu.kvm)) {
 		/* use guest values since host doesn't intercept PAUSE */
 		vmcb02->control.pause_filter_count = pause_count12;
@@ -1027,7 +1035,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	if (vmcb12->control.exit_code != SVM_EXIT_ERR)
 		nested_save_pending_event_to_vmcb12(svm, vmcb12);
 
-	if (svm->nrips_enabled)
+	if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
 		vmcb12->control.next_rip  = vmcb02->control.next_rip;
 
 	vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
@@ -1066,7 +1074,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	if (!nested_exit_on_intr(svm))
 		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 
-	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+	if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+		     (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
 		svm_copy_lbrs(vmcb12, vmcb02);
 		svm_update_lbrv(vcpu);
 	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
@@ -1101,10 +1110,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 		vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
 	}
 
-	if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
-		WARN_ON(!svm->tsc_scaling_enabled);
+	if (kvm_caps.has_tsc_control &&
+	    vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
 		vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
-		__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+		svm_write_tsc_multiplier(vcpu);
 	}
 
 	svm->nested.ctl.nested_cr3 = 0;
@@ -1537,7 +1546,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
 	vcpu->arch.tsc_scaling_ratio =
 		kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
 					       svm->tsc_ratio_msr);
-	__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+	svm_write_tsc_multiplier(vcpu);
 }
 
 /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index d3aec1f2cad2..b9a0a939d59f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -23,6 +23,7 @@
 #include <asm/pkru.h>
 #include <asm/trapnr.h>
 #include <asm/fpu/xcr.h>
+#include <asm/debugreg.h>
 
 #include "mmu.h"
 #include "x86.h"
@@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
 /* enable/disable SEV-ES support */
 static bool sev_es_enabled = true;
 module_param_named(sev_es, sev_es_enabled, bool, 0444);
+
+/* enable/disable SEV-ES DebugSwap support */
+static bool sev_es_debug_swap_enabled = true;
+module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
 #else
 #define sev_enabled false
 #define sev_es_enabled false
+#define sev_es_debug_swap_enabled false
 #endif /* CONFIG_KVM_AMD_SEV */
 
 static u8 sev_enc_bit;
@@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
 	save->xss  = svm->vcpu.arch.ia32_xss;
 	save->dr6  = svm->vcpu.arch.dr6;
 
+	if (sev_es_debug_swap_enabled)
+		save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+
 	pr_debug("Virtual Machine Save Area (VMSA):\n");
 	print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
 
@@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
 	struct vcpu_svm *svm = to_svm(vcpu);
 	int ret;
 
+	if (vcpu->guest_debug) {
+		pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
+		return -EINVAL;
+	}
+
 	/* Perform some pre-encryption checks against the VMSA */
 	ret = sev_es_sync_vmsa(svm);
 	if (ret)
@@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
 		 * Note, the source is not required to have the same number of
 		 * vCPUs as the destination when migrating a vanilla SEV VM.
 		 */
-		src_vcpu = kvm_get_vcpu(dst_kvm, i);
+		src_vcpu = kvm_get_vcpu(src_kvm, i);
 		src_svm = to_svm(src_vcpu);
 
 		/*
@@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
 	bool sev_es_supported = false;
 	bool sev_supported = false;
 
-	if (!sev_enabled || !npt_enabled)
+	if (!sev_enabled || !npt_enabled || !nrips)
 		goto out;
 
 	/*
@@ -2256,6 +2270,9 @@ out:
 
 	sev_enabled = sev_supported;
 	sev_es_enabled = sev_es_supported;
+	if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
+	    !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
+		sev_es_debug_swap_enabled = false;
 #endif
 }
 
@@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 					    svm->sev_es.ghcb_sa);
 		break;
 	case SVM_VMGEXIT_NMI_COMPLETE:
-		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
+		++vcpu->stat.nmi_window_exits;
+		svm->nmi_masked = false;
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+		ret = 1;
 		break;
 	case SVM_VMGEXIT_AP_HLT_LOOP:
 		ret = kvm_emulate_ap_reset_hold(vcpu);
@@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
 
 static void sev_es_init_vmcb(struct vcpu_svm *svm)
 {
+	struct vmcb *vmcb = svm->vmcb01.ptr;
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 
 	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
@@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
 	/*
 	 * An SEV-ES guest requires a VMSA area that is a separate from the
 	 * VMCB page. Do not include the encryption mask on the VMSA physical
-	 * address since hardware will access it using the guest key.
+	 * address since hardware will access it using the guest key.  Note,
+	 * the VMSA will be NULL if this vCPU is the destination for intrahost
+	 * migration, and will be copied later.
 	 */
-	svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+	if (svm->sev_es.vmsa)
+		svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
 
 	/* Can't intercept CR register access, HV can't modify CR registers */
 	svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
 	svm_set_intercept(svm, TRAP_CR4_WRITE);
 	svm_set_intercept(svm, TRAP_CR8_WRITE);
 
-	/* No support for enable_vmware_backdoor */
-	clr_exception_intercept(svm, GP_VECTOR);
+	vmcb->control.intercepts[INTERCEPT_DR] = 0;
+	if (!sev_es_debug_swap_enabled) {
+		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+		recalc_intercepts(svm);
+	} else {
+		/*
+		 * Disable #DB intercept iff DebugSwap is enabled.  KVM doesn't
+		 * allow debugging SEV-ES guests, and enables DebugSwap iff
+		 * NO_NESTED_DATA_BP is supported, so there's no reason to
+		 * intercept #DB when DebugSwap is enabled.  For simplicity
+		 * with respect to guest debug, intercept #DB for other VMs
+		 * even if NO_NESTED_DATA_BP is supported, i.e. even if the
+		 * guest can't DoS the CPU with infinite #DB vectoring.
+		 */
+		clr_exception_intercept(svm, DB_VECTOR);
+	}
 
 	/* Can't intercept XSETBV, HV can't modify XCR0 directly */
 	svm_clr_intercept(svm, INTERCEPT_XSETBV);
@@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
 	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
 	clr_exception_intercept(svm, UD_VECTOR);
 
+	/*
+	 * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
+	 * KVM can't decrypt guest memory to decode the faulting instruction.
+	 */
+	clr_exception_intercept(svm, GP_VECTOR);
+
 	if (sev_es_guest(svm->vcpu.kvm))
 		sev_es_init_vmcb(svm);
 }
@@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
 void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
 {
 	/*
-	 * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
-	 * of which one step is to perform a VMLOAD.  KVM performs the
-	 * corresponding VMSAVE in svm_prepare_guest_switch for both
-	 * traditional and SEV-ES guests.
+	 * All host state for SEV-ES guests is categorized into three swap types
+	 * based on how it is handled by hardware during a world switch:
+	 *
+	 * A: VMRUN:   Host state saved in host save area
+	 *    VMEXIT:  Host state loaded from host save area
+	 *
+	 * B: VMRUN:   Host state _NOT_ saved in host save area
+	 *    VMEXIT:  Host state loaded from host save area
+	 *
+	 * C: VMRUN:   Host state _NOT_ saved in host save area
+	 *    VMEXIT:  Host state initialized to default(reset) values
+	 *
+	 * Manually save type-B state, i.e. state that is loaded by VMEXIT but
+	 * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
+	 * by common SVM code).
 	 */
-
-	/* XCR0 is restored on VMEXIT, save the current host value */
 	hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-
-	/* PKRU is restored on VMEXIT, save the current host value */
 	hostsa->pkru = read_pkru();
-
-	/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
 	hostsa->xss = host_xss;
+
+	/*
+	 * If DebugSwap is enabled, debug registers are loaded but NOT saved by
+	 * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
+	 * saves and loads debug registers (Type-A).
+	 */
+	if (sev_es_debug_swap_enabled) {
+		hostsa->dr0 = native_get_debugreg(0);
+		hostsa->dr1 = native_get_debugreg(1);
+		hostsa->dr2 = native_get_debugreg(2);
+		hostsa->dr3 = native_get_debugreg(3);
+		hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
+		hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
+		hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
+		hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
+	}
 }
 
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d4bfdc607fe7..f283eb47f6ac 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -39,10 +39,9 @@
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
 #include <asm/traps.h>
+#include <asm/reboot.h>
 #include <asm/fpu/api.h>
 
-#include <asm/virtext.h>
-
 #include <trace/events/ipi.h>
 
 #include "trace.h"
@@ -203,7 +202,7 @@ static int nested = true;
 module_param(nested, int, S_IRUGO);
 
 /* enable/disable Next RIP Save */
-static int nrips = true;
+int nrips = true;
 module_param(nrips, int, 0444);
 
 /* enable/disable Virtual VMLOAD VMSAVE */
@@ -365,6 +364,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
 		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
 
 }
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+					void *insn, int insn_len);
 
 static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
 					   bool commit_side_effects)
@@ -385,6 +386,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
 	}
 
 	if (!svm->next_rip) {
+		/*
+		 * FIXME: Drop this when kvm_emulate_instruction() does the
+		 * right thing and treats "can't emulate" as outright failure
+		 * for EMULTYPE_SKIP.
+		 */
+		if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
+			return 0;
+
 		if (unlikely(!commit_side_effects))
 			old_rflags = svm->vmcb->save.rflags;
 
@@ -517,14 +526,21 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu)
 		vcpu->arch.osvw.status |= 1;
 }
 
-static bool kvm_is_svm_supported(void)
+static bool __kvm_is_svm_supported(void)
 {
-	int cpu = raw_smp_processor_id();
-	const char *msg;
+	int cpu = smp_processor_id();
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
 	u64 vm_cr;
 
-	if (!cpu_has_svm(&msg)) {
-		pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
+	if (c->x86_vendor != X86_VENDOR_AMD &&
+	    c->x86_vendor != X86_VENDOR_HYGON) {
+		pr_err("CPU %d isn't AMD or Hygon\n", cpu);
+		return false;
+	}
+
+	if (!cpu_has(c, X86_FEATURE_SVM)) {
+		pr_err("SVM not supported by CPU %d\n", cpu);
 		return false;
 	}
 
@@ -542,25 +558,55 @@ static bool kvm_is_svm_supported(void)
 	return true;
 }
 
+static bool kvm_is_svm_supported(void)
+{
+	bool supported;
+
+	migrate_disable();
+	supported = __kvm_is_svm_supported();
+	migrate_enable();
+
+	return supported;
+}
+
 static int svm_check_processor_compat(void)
 {
-	if (!kvm_is_svm_supported())
+	if (!__kvm_is_svm_supported())
 		return -EIO;
 
 	return 0;
 }
 
-void __svm_write_tsc_multiplier(u64 multiplier)
+static void __svm_write_tsc_multiplier(u64 multiplier)
 {
-	preempt_disable();
-
 	if (multiplier == __this_cpu_read(current_tsc_ratio))
-		goto out;
+		return;
 
 	wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
 	__this_cpu_write(current_tsc_ratio, multiplier);
-out:
-	preempt_enable();
+}
+
+static inline void kvm_cpu_svm_disable(void)
+{
+	uint64_t efer;
+
+	wrmsrl(MSR_VM_HSAVE_PA, 0);
+	rdmsrl(MSR_EFER, efer);
+	if (efer & EFER_SVME) {
+		/*
+		 * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
+		 * NMI aren't blocked.
+		 */
+		stgi();
+		wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+	}
+}
+
+static void svm_emergency_disable(void)
+{
+	kvm_rebooting = true;
+
+	kvm_cpu_svm_disable();
 }
 
 static void svm_hardware_disable(void)
@@ -569,7 +615,7 @@ static void svm_hardware_disable(void)
 	if (tsc_scaling)
 		__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
 
-	cpu_svm_disable();
+	kvm_cpu_svm_disable();
 
 	amd_pmu_disable_virt();
 }
@@ -677,6 +723,39 @@ free_save_area:
 
 }
 
+static void set_dr_intercepts(struct vcpu_svm *svm)
+{
+	struct vmcb *vmcb = svm->vmcb01.ptr;
+
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+
+	recalc_intercepts(svm);
+}
+
+static void clr_dr_intercepts(struct vcpu_svm *svm)
+{
+	struct vmcb *vmcb = svm->vmcb01.ptr;
+
+	vmcb->control.intercepts[INTERCEPT_DR] = 0;
+
+	recalc_intercepts(svm);
+}
+
 static int direct_access_msr_slot(u32 msr)
 {
 	u32 i;
@@ -947,50 +1026,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
 		svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
 }
 
-static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
+static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
 {
 	/*
-	 * If the LBR virtualization is disabled, the LBR msrs are always
-	 * kept in the vmcb01 to avoid copying them on nested guest entries.
-	 *
-	 * If nested, and the LBR virtualization is enabled/disabled, the msrs
-	 * are moved between the vmcb01 and vmcb02 as needed.
+	 * If LBR virtualization is disabled, the LBR MSRs are always kept in
+	 * vmcb01.  If LBR virtualization is enabled and L1 is running VMs of
+	 * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
 	 */
-	struct vmcb *vmcb =
-		(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
-			svm->vmcb : svm->vmcb01.ptr;
-
-	switch (index) {
-	case MSR_IA32_DEBUGCTLMSR:
-		return vmcb->save.dbgctl;
-	case MSR_IA32_LASTBRANCHFROMIP:
-		return vmcb->save.br_from;
-	case MSR_IA32_LASTBRANCHTOIP:
-		return vmcb->save.br_to;
-	case MSR_IA32_LASTINTFROMIP:
-		return vmcb->save.last_excp_from;
-	case MSR_IA32_LASTINTTOIP:
-		return vmcb->save.last_excp_to;
-	default:
-		KVM_BUG(false, svm->vcpu.kvm,
-			"%s: Unknown MSR 0x%x", __func__, index);
-		return 0;
-	}
+	return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
+								   svm->vmcb01.ptr;
 }
 
 void svm_update_lbrv(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-
-	bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
-					   DEBUGCTLMSR_LBR;
-
-	bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
-				      LBR_CTL_ENABLE_MASK);
-
-	if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
-		if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
-			enable_lbrv = true;
+	bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
+	bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
+			    (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+			    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
 
 	if (enable_lbrv == current_enable_lbrv)
 		return;
@@ -1101,21 +1154,23 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
 	return svm->tsc_ratio_msr;
 }
 
-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
-	svm->vmcb->control.tsc_offset = offset;
+	svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
 	vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
-static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
 {
-	__svm_write_tsc_multiplier(multiplier);
+	preempt_disable();
+	if (to_svm(vcpu)->guest_state_loaded)
+		__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+	preempt_enable();
 }
 
-
 /* Evaluate instruction intercepts that depend on guest CPUID features. */
 static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
 					      struct vcpu_svm *svm)
@@ -1156,8 +1211,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
 		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
-
-		svm->v_vmload_vmsave_enabled = false;
 	} else {
 		/*
 		 * If hardware supports Virtual VMLOAD VMSAVE then enable it
@@ -1201,10 +1254,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	 * Guest access to VMware backdoor ports could legitimately
 	 * trigger #GP because of TSS I/O permission bitmap.
 	 * We intercept those #GP and allow access to them anyway
-	 * as VMware does.  Don't intercept #GP for SEV guests as KVM can't
-	 * decrypt guest memory to decode the faulting instruction.
+	 * as VMware does.
 	 */
-	if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
+	if (enable_vmware_backdoor)
 		set_exception_intercept(svm, GP_VECTOR);
 
 	svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1949,7 +2001,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (vcpu->arch.guest_state_protected)
+	if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
 		return;
 
 	get_debugreg(vcpu->arch.db[0], 0);
@@ -2510,12 +2562,13 @@ static int iret_interception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
+
 	++vcpu->stat.nmi_window_exits;
 	svm->awaiting_iret_completion = true;
 
 	svm_clr_iret_intercept(svm);
-	if (!sev_es_guest(vcpu->kvm))
-		svm->nmi_iret_rip = kvm_rip_read(vcpu);
+	svm->nmi_iret_rip = kvm_rip_read(vcpu);
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return 1;
@@ -2680,6 +2733,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
 	unsigned long val;
 	int err = 0;
 
+	/*
+	 * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
+	 * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
+	 */
+	if (sev_es_guest(vcpu->kvm))
+		return 1;
+
 	if (vcpu->guest_debug == 0) {
 		/*
 		 * No more DR vmexits; force a reload of the debug registers
@@ -2764,7 +2824,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 	switch (msr_info->index) {
 	case MSR_AMD64_TSC_RATIO:
-		if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
+		if (!msr_info->host_initiated &&
+		    !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
 			return 1;
 		msr_info->data = svm->tsc_ratio_msr;
 		break;
@@ -2802,11 +2863,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = svm->tsc_aux;
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
+		msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
+		break;
 	case MSR_IA32_LASTBRANCHFROMIP:
+		msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
+		break;
 	case MSR_IA32_LASTBRANCHTOIP:
+		msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
+		break;
 	case MSR_IA32_LASTINTFROMIP:
+		msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
+		break;
 	case MSR_IA32_LASTINTTOIP:
-		msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
+		msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
 		break;
 	case MSR_VM_HSAVE_PA:
 		msr_info->data = svm->nested.hsave_msr;
@@ -2906,7 +2975,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	switch (ecx) {
 	case MSR_AMD64_TSC_RATIO:
 
-		if (!svm->tsc_scaling_enabled) {
+		if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
 
 			if (!msr->host_initiated)
 				return 1;
@@ -2928,7 +2997,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
 		svm->tsc_ratio_msr = data;
 
-		if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
+		if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+		    is_guest_mode(vcpu))
 			nested_svm_update_tsc_ratio_msr(vcpu);
 
 		break;
@@ -3037,13 +3107,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		if (data & DEBUGCTL_RESERVED_BITS)
 			return 1;
 
-		if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
-			svm->vmcb->save.dbgctl = data;
-		else
-			svm->vmcb01.ptr->save.dbgctl = data;
-
+		svm_get_lbr_vmcb(svm)->save.dbgctl = data;
 		svm_update_lbrv(vcpu);
-
 		break;
 	case MSR_VM_HSAVE_PA:
 		/*
@@ -3769,6 +3834,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
 	if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
 		return; /* IRET will cause a vm exit */
 
+	/*
+	 * SEV-ES guests are responsible for signaling when a vCPU is ready to
+	 * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
+	 * KVM can't intercept and single-step IRET to detect when NMIs are
+	 * unblocked (architecturally speaking).  See SVM_VMGEXIT_NMI_COMPLETE.
+	 *
+	 * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
+	 * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
+	 * supported NAEs in the GHCB protocol.
+	 */
+	if (sev_es_guest(vcpu->kvm))
+		return;
+
 	if (!gif_set(svm)) {
 		if (vgif)
 			svm_set_intercept(svm, INTERCEPT_STGI);
@@ -3918,12 +3996,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
 	svm->soft_int_injected = false;
 
 	/*
-	 * If we've made progress since setting HF_IRET_MASK, we've
+	 * If we've made progress since setting awaiting_iret_completion, we've
 	 * executed an IRET and can allow NMI injection.
 	 */
 	if (svm->awaiting_iret_completion &&
-	    (sev_es_guest(vcpu->kvm) ||
-	     kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
+	    kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
 		svm->awaiting_iret_completion = false;
 		svm->nmi_masked = false;
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -4209,28 +4286,37 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_cpuid_entry2 *best;
 
-	vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-				    boot_cpu_has(X86_FEATURE_XSAVE) &&
-				    boot_cpu_has(X86_FEATURE_XSAVES);
-
-	/* Update nrips enabled cache */
-	svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
-			     guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
-
-	svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
-	svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
-
-	svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
-
-	svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
-			guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+	/*
+	 * SVM doesn't provide a way to disable just XSAVES in the guest, KVM
+	 * can only disable all variants of by disallowing CR4.OSXSAVE from
+	 * being set.  As a result, if the host has XSAVE and XSAVES, and the
+	 * guest has XSAVE enabled, the guest can execute XSAVES without
+	 * faulting.  Treat XSAVES as enabled in this case regardless of
+	 * whether it's advertised to the guest so that KVM context switches
+	 * XSS on VM-Enter/VM-Exit.  Failure to do so would effectively give
+	 * the guest read/write access to the host's XSS.
+	 */
+	if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+	    boot_cpu_has(X86_FEATURE_XSAVES) &&
+	    guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+		kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
 
-	svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
-			guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
 
-	svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
+	/*
+	 * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
+	 * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
+	 * SVM on Intel is bonkers and extremely unlikely to work).
+	 */
+	if (!guest_cpuid_is_intel(vcpu))
+		kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
 
-	svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
 
 	svm_recalc_instruction_intercepts(vcpu, svm);
 
@@ -4651,16 +4737,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
 	 * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
 	 * decode garbage.
 	 *
-	 * Inject #UD if KVM reached this point without an instruction buffer.
-	 * In practice, this path should never be hit by a well-behaved guest,
-	 * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
-	 * is still theoretically reachable, e.g. via unaccelerated fault-like
-	 * AVIC access, and needs to be handled by KVM to avoid putting the
-	 * guest into an infinite loop.   Injecting #UD is somewhat arbitrary,
-	 * but its the least awful option given lack of insight into the guest.
+	 * If KVM is NOT trying to simply skip an instruction, inject #UD if
+	 * KVM reached this point without an instruction buffer.  In practice,
+	 * this path should never be hit by a well-behaved guest, e.g. KVM
+	 * doesn't intercept #UD or #GP for SEV guests, but this path is still
+	 * theoretically reachable, e.g. via unaccelerated fault-like AVIC
+	 * access, and needs to be handled by KVM to avoid putting the guest
+	 * into an infinite loop.   Injecting #UD is somewhat arbitrary, but
+	 * its the least awful option given lack of insight into the guest.
+	 *
+	 * If KVM is trying to skip an instruction, simply resume the guest.
+	 * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
+	 * will attempt to re-inject the INT3/INTO and skip the instruction.
+	 * In that scenario, retrying the INT3/INTO and hoping the guest will
+	 * make forward progress is the only option that has a chance of
+	 * success (and in practice it will work the vast majority of the time).
 	 */
 	if (unlikely(!insn)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
+		if (!(emul_type & EMULTYPE_SKIP))
+			kvm_queue_exception(vcpu, UD_VECTOR);
 		return false;
 	}
 
@@ -5112,9 +5207,11 @@ static __init int svm_hardware_setup(void)
 
 	svm_adjust_mmio_mask();
 
+	nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+
 	/*
 	 * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
-	 * may be modified by svm_adjust_mmio_mask()).
+	 * may be modified by svm_adjust_mmio_mask()), as well as nrips.
 	 */
 	sev_hardware_setup();
 
@@ -5126,11 +5223,6 @@ static __init int svm_hardware_setup(void)
 			goto err;
 	}
 
-	if (nrips) {
-		if (!boot_cpu_has(X86_FEATURE_NRIPS))
-			nrips = false;
-	}
-
 	enable_apicv = avic = avic && avic_hardware_setup();
 
 	if (!enable_apicv) {
@@ -5213,6 +5305,13 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
 	.pmu_ops = &amd_pmu_ops,
 };
 
+static void __svm_exit(void)
+{
+	kvm_x86_vendor_exit();
+
+	cpu_emergency_unregister_virt_callback(svm_emergency_disable);
+}
+
 static int __init svm_init(void)
 {
 	int r;
@@ -5226,6 +5325,8 @@ static int __init svm_init(void)
 	if (r)
 		return r;
 
+	cpu_emergency_register_virt_callback(svm_emergency_disable);
+
 	/*
 	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
 	 * exposed to userspace!
@@ -5238,14 +5339,14 @@ static int __init svm_init(void)
 	return 0;
 
 err_kvm_init:
-	kvm_x86_vendor_exit();
+	__svm_exit();
 	return r;
 }
 
 static void __exit svm_exit(void)
 {
 	kvm_exit();
-	kvm_x86_vendor_exit();
+	__svm_exit();
 }
 
 module_init(svm_init)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 8239c8de45ac..f41253958357 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -22,6 +22,7 @@
 #include <asm/svm.h>
 #include <asm/sev-common.h>
 
+#include "cpuid.h"
 #include "kvm_cache_regs.h"
 
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
@@ -33,6 +34,7 @@
 #define MSRPM_OFFSETS	32
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
+extern int nrips;
 extern int vgif;
 extern bool intercept_smi;
 extern bool x2avic_enabled;
@@ -260,16 +262,6 @@ struct vcpu_svm {
 	unsigned long soft_int_next_rip;
 	bool soft_int_injected;
 
-	/* optional nested SVM features that are enabled for this guest  */
-	bool nrips_enabled                : 1;
-	bool tsc_scaling_enabled          : 1;
-	bool v_vmload_vmsave_enabled      : 1;
-	bool lbrv_enabled                 : 1;
-	bool pause_filter_enabled         : 1;
-	bool pause_threshold_enabled      : 1;
-	bool vgif_enabled                 : 1;
-	bool vnmi_enabled                 : 1;
-
 	u32 ldr_reg;
 	u32 dfr_reg;
 	struct page *avic_backing_page;
@@ -406,48 +398,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
 	return test_bit(bit, (unsigned long *)&control->intercepts);
 }
 
-static inline void set_dr_intercepts(struct vcpu_svm *svm)
-{
-	struct vmcb *vmcb = svm->vmcb01.ptr;
-
-	if (!sev_es_guest(svm->vcpu.kvm)) {
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
-	}
-
-	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-
-	recalc_intercepts(svm);
-}
-
-static inline void clr_dr_intercepts(struct vcpu_svm *svm)
-{
-	struct vmcb *vmcb = svm->vmcb01.ptr;
-
-	vmcb->control.intercepts[INTERCEPT_DR] = 0;
-
-	/* DR7 access must remain intercepted for an SEV-ES guest */
-	if (sev_es_guest(svm->vcpu.kvm)) {
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-	}
-
-	recalc_intercepts(svm);
-}
-
 static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
 {
 	struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -493,7 +443,8 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
 
 static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
 {
-	return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
+	return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
+	       (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
 }
 
 static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
@@ -544,7 +495,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
 
 static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
 {
-	return svm->vnmi_enabled &&
+	return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
 	       (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
 }
 
@@ -660,7 +611,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 			       bool has_error_code, u32 error_code);
 int nested_svm_exit_special(struct vcpu_svm *svm);
 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
-void __svm_write_tsc_multiplier(u64 multiplier);
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
 				       struct vmcb_control_area *control);
 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d0abee35d7ba..41a4533f9989 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -252,7 +252,7 @@ static inline bool cpu_has_vmx_pml(void)
 static inline bool cpu_has_vmx_xsaves(void)
 {
 	return vmcs_config.cpu_based_2nd_exec_ctrl &
-		SECONDARY_EXEC_XSAVES;
+		SECONDARY_EXEC_ENABLE_XSAVES;
 }
 
 static inline bool cpu_has_vmx_waitpkg(void)
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index 79450e1ed7cf..313b8bb5b8a7 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -78,7 +78,7 @@
 	 SECONDARY_EXEC_DESC |						\
 	 SECONDARY_EXEC_ENABLE_RDTSCP |					\
 	 SECONDARY_EXEC_ENABLE_INVPCID |				\
-	 SECONDARY_EXEC_XSAVES |					\
+	 SECONDARY_EXEC_ENABLE_XSAVES |					\
 	 SECONDARY_EXEC_RDSEED_EXITING |				\
 	 SECONDARY_EXEC_RDRAND_EXITING |				\
 	 SECONDARY_EXEC_TSC_SCALING |					\
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 516391cc0d64..c5ec0ef51ff7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2307,7 +2307,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
 				  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 				  SECONDARY_EXEC_ENABLE_INVPCID |
 				  SECONDARY_EXEC_ENABLE_RDTSCP |
-				  SECONDARY_EXEC_XSAVES |
+				  SECONDARY_EXEC_ENABLE_XSAVES |
 				  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -6331,7 +6331,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
 		 * If if it were, XSS would have to be checked against
 		 * the XSS exit bitmap in vmcs12.
 		 */
-		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
 	case EXIT_REASON_UMWAIT:
 	case EXIT_REASON_TPAUSE:
 		return nested_cpu_has2(vmcs12,
@@ -6426,7 +6426,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 	vmx = to_vmx(vcpu);
 	vmcs12 = get_vmcs12(vcpu);
 
-	if (nested_vmx_allowed(vcpu) &&
+	if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
 	    (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
 		kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
 		kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
@@ -6567,7 +6567,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 		if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
 			return -EINVAL;
 	} else {
-		if (!nested_vmx_allowed(vcpu))
+		if (!guest_can_use(vcpu, X86_FEATURE_VMX))
 			return -EINVAL;
 
 		if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
@@ -6601,7 +6601,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
-		(!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
+	    (!guest_can_use(vcpu, X86_FEATURE_VMX) ||
+	     !vmx->nested.enlightened_vmcs_enabled))
 			return -EINVAL;
 
 	vmx_leave_nested(vcpu);
@@ -6874,7 +6875,7 @@ static void nested_vmx_setup_secondary_ctls(u32 ept_caps,
 		SECONDARY_EXEC_ENABLE_INVPCID |
 		SECONDARY_EXEC_ENABLE_VMFUNC |
 		SECONDARY_EXEC_RDSEED_EXITING |
-		SECONDARY_EXEC_XSAVES |
+		SECONDARY_EXEC_ENABLE_XSAVES |
 		SECONDARY_EXEC_TSC_SCALING |
 		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
 
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 96952263b029..b4b9d51438c6 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -168,7 +168,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 
 static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
 {
-	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
 }
 
 static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 80c769c58a87..f2efa0bf7ae8 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -22,23 +22,51 @@
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
 
+enum intel_pmu_architectural_events {
+	/*
+	 * The order of the architectural events matters as support for each
+	 * event is enumerated via CPUID using the index of the event.
+	 */
+	INTEL_ARCH_CPU_CYCLES,
+	INTEL_ARCH_INSTRUCTIONS_RETIRED,
+	INTEL_ARCH_REFERENCE_CYCLES,
+	INTEL_ARCH_LLC_REFERENCES,
+	INTEL_ARCH_LLC_MISSES,
+	INTEL_ARCH_BRANCHES_RETIRED,
+	INTEL_ARCH_BRANCHES_MISPREDICTED,
+
+	NR_REAL_INTEL_ARCH_EVENTS,
+
+	/*
+	 * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
+	 * TSC reference cycles.  The architectural reference cycles event may
+	 * or may not actually use the TSC as the reference, e.g. might use the
+	 * core crystal clock or the bus clock (yeah, "architectural").
+	 */
+	PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
+	NR_INTEL_ARCH_EVENTS,
+};
+
 static struct {
 	u8 eventsel;
 	u8 unit_mask;
 } const intel_arch_events[] = {
-	[0] = { 0x3c, 0x00 },
-	[1] = { 0xc0, 0x00 },
-	[2] = { 0x3c, 0x01 },
-	[3] = { 0x2e, 0x4f },
-	[4] = { 0x2e, 0x41 },
-	[5] = { 0xc4, 0x00 },
-	[6] = { 0xc5, 0x00 },
-	/* The above index must match CPUID 0x0A.EBX bit vector */
-	[7] = { 0x00, 0x03 },
+	[INTEL_ARCH_CPU_CYCLES]			= { 0x3c, 0x00 },
+	[INTEL_ARCH_INSTRUCTIONS_RETIRED]	= { 0xc0, 0x00 },
+	[INTEL_ARCH_REFERENCE_CYCLES]		= { 0x3c, 0x01 },
+	[INTEL_ARCH_LLC_REFERENCES]		= { 0x2e, 0x4f },
+	[INTEL_ARCH_LLC_MISSES]			= { 0x2e, 0x41 },
+	[INTEL_ARCH_BRANCHES_RETIRED]		= { 0xc4, 0x00 },
+	[INTEL_ARCH_BRANCHES_MISPREDICTED]	= { 0xc5, 0x00 },
+	[PSEUDO_ARCH_REFERENCE_CYCLES]		= { 0x00, 0x03 },
 };
 
 /* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {
+	[0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
+	[1] = INTEL_ARCH_CPU_CYCLES,
+	[2] = PSEUDO_ARCH_REFERENCE_CYCLES,
+};
 
 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 {
@@ -80,16 +108,18 @@ static bool intel_hw_event_available(struct kvm_pmc *pmc)
 	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+	BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
+
+	/*
+	 * Disallow events reported as unavailable in guest CPUID.  Note, this
+	 * doesn't apply to pseudo-architectural events.
+	 */
+	for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
 		if (intel_arch_events[i].eventsel != event_select ||
 		    intel_arch_events[i].unit_mask != unit_mask)
 			continue;
 
-		/* disable event that reported as not present by cpuid */
-		if ((i < 7) && !(pmu->available_event_types & (1 << i)))
-			return false;
-
-		break;
+		return pmu->available_event_types & BIT(i);
 	}
 
 	return true;
@@ -438,16 +468,17 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
 {
-	size_t size = ARRAY_SIZE(fixed_pmc_events);
-	struct kvm_pmc *pmc;
-	u32 event;
 	int i;
 
+	BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
+
 	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
-		pmc = &pmu->fixed_counters[i];
-		event = fixed_pmc_events[array_index_nospec(i, size)];
+		int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
+		struct kvm_pmc *pmc = &pmu->fixed_counters[index];
+		u32 event = fixed_pmc_events[index];
+
 		pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
-			intel_arch_events[event].eventsel;
+				 intel_arch_events[event].eventsel;
 	}
 }
 
@@ -508,10 +539,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	if (pmu->version == 1) {
 		pmu->nr_arch_fixed_counters = 0;
 	} else {
-		pmu->nr_arch_fixed_counters =
-			min3(ARRAY_SIZE(fixed_pmc_events),
-			     (size_t) edx.split.num_counters_fixed,
-			     (size_t)kvm_pmu_cap.num_counters_fixed);
+		pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
+						    kvm_pmu_cap.num_counters_fixed);
 		edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
 						  kvm_pmu_cap.bit_width_fixed);
 		pmu->counter_bitmask[KVM_PMC_FIXED] =
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b483a8baaacf..72e3943f3693 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -41,13 +41,12 @@
 #include <asm/idtentry.h>
 #include <asm/io.h>
 #include <asm/irq_remapping.h>
-#include <asm/kexec.h>
+#include <asm/reboot.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/mshyperv.h>
 #include <asm/mwait.h>
 #include <asm/spec-ctrl.h>
-#include <asm/virtext.h>
 #include <asm/vmx.h>
 
 #include "capabilities.h"
@@ -237,9 +236,6 @@ static const struct {
 #define L1D_CACHE_ORDER 4
 static void *vmx_l1d_flush_pages;
 
-/* Control for disabling CPU Fill buffer clear */
-static bool __read_mostly vmx_fb_clear_ctrl_available;
-
 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 {
 	struct page *page;
@@ -255,14 +251,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 		return 0;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-		u64 msr;
-
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-		if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
-			l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
-			return 0;
-		}
+	if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+		return 0;
 	}
 
 	/* If set to auto use the default l1tf mitigation method */
@@ -366,22 +357,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
 {
 	if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
-		return sprintf(s, "???\n");
+		return sysfs_emit(s, "???\n");
 
-	return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
-}
-
-static void vmx_setup_fb_clear_ctrl(void)
-{
-	u64 msr;
-
-	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
-	    !boot_cpu_has_bug(X86_BUG_MDS) &&
-	    !boot_cpu_has_bug(X86_BUG_TAA)) {
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-		if (msr & ARCH_CAP_FB_CLEAR_CTRL)
-			vmx_fb_clear_ctrl_available = true;
-	}
+	return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
 }
 
 static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
@@ -409,7 +387,9 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
 
 static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 {
-	vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+	vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+				!boot_cpu_has_bug(X86_BUG_MDS) &&
+				!boot_cpu_has_bug(X86_BUG_TAA);
 
 	/*
 	 * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@@ -754,17 +734,51 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
 	return ret;
 }
 
-#ifdef CONFIG_KEXEC_CORE
-static void crash_vmclear_local_loaded_vmcss(void)
+/*
+ * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
+ *
+ * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
+ * atomically track post-VMXON state, e.g. this may be called in NMI context.
+ * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
+ * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
+ * magically in RM, VM86, compat mode, or at CPL>0.
+ */
+static int kvm_cpu_vmxoff(void)
+{
+	asm_volatile_goto("1: vmxoff\n\t"
+			  _ASM_EXTABLE(1b, %l[fault])
+			  ::: "cc", "memory" : fault);
+
+	cr4_clear_bits(X86_CR4_VMXE);
+	return 0;
+
+fault:
+	cr4_clear_bits(X86_CR4_VMXE);
+	return -EIO;
+}
+
+static void vmx_emergency_disable(void)
 {
 	int cpu = raw_smp_processor_id();
 	struct loaded_vmcs *v;
 
+	kvm_rebooting = true;
+
+	/*
+	 * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
+	 * set in task context.  If this races with VMX is disabled by an NMI,
+	 * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
+	 * kvm_rebooting set.
+	 */
+	if (!(__read_cr4() & X86_CR4_VMXE))
+		return;
+
 	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
 			    loaded_vmcss_on_cpu_link)
 		vmcs_clear(v->vmcs);
+
+	kvm_cpu_vmxoff();
 }
-#endif /* CONFIG_KEXEC_CORE */
 
 static void __loaded_vmcs_clear(void *arg)
 {
@@ -1899,25 +1913,14 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
 	return kvm_caps.default_tsc_scaling_ratio;
 }
 
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
 {
-	vmcs_write64(TSC_OFFSET, offset);
+	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 }
 
-static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
 {
-	vmcs_write64(TSC_MULTIPLIER, multiplier);
-}
-
-/*
- * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
- * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
- * all guests if the "nested" module option is off, and can also be disabled
- * for a single guest by disabling its VMX cpuid bit.
- */
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
-{
-	return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
+	vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
 }
 
 /*
@@ -2047,7 +2050,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
 		break;
 	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
-		if (!nested_vmx_allowed(vcpu))
+		if (!guest_can_use(vcpu, X86_FEATURE_VMX))
 			return 1;
 		if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
 				    &msr_info->data))
@@ -2355,7 +2358,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
 		if (!msr_info->host_initiated)
 			return 1; /* they are read-only */
-		if (!nested_vmx_allowed(vcpu))
+		if (!guest_can_use(vcpu, X86_FEATURE_VMX))
 			return 1;
 		return vmx_set_vmx_msr(vcpu, msr_index, data);
 	case MSR_IA32_RTIT_CTL:
@@ -2729,11 +2732,11 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 	return 0;
 }
 
-static bool kvm_is_vmx_supported(void)
+static bool __kvm_is_vmx_supported(void)
 {
-	int cpu = raw_smp_processor_id();
+	int cpu = smp_processor_id();
 
-	if (!cpu_has_vmx()) {
+	if (!(cpuid_ecx(1) & feature_bit(VMX))) {
 		pr_err("VMX not supported by CPU %d\n", cpu);
 		return false;
 	}
@@ -2747,13 +2750,24 @@ static bool kvm_is_vmx_supported(void)
 	return true;
 }
 
+static bool kvm_is_vmx_supported(void)
+{
+	bool supported;
+
+	migrate_disable();
+	supported = __kvm_is_vmx_supported();
+	migrate_enable();
+
+	return supported;
+}
+
 static int vmx_check_processor_compat(void)
 {
 	int cpu = raw_smp_processor_id();
 	struct vmcs_config vmcs_conf;
 	struct vmx_capability vmx_cap;
 
-	if (!kvm_is_vmx_supported())
+	if (!__kvm_is_vmx_supported())
 		return -EIO;
 
 	if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
@@ -2833,7 +2847,7 @@ static void vmx_hardware_disable(void)
 {
 	vmclear_local_loaded_vmcss();
 
-	if (cpu_vmxoff())
+	if (kvm_cpu_vmxoff())
 		kvm_spurious_fault();
 
 	hv_reset_evmcs();
@@ -3071,13 +3085,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
 	vmx->rmode.vm86_active = 1;
 
-	/*
-	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
-	 * vcpu. Warn the user that an update is overdue.
-	 */
-	if (!kvm_vmx->tss_addr)
-		pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
-
 	vmx_segment_cache_clear(vmx);
 
 	vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
@@ -3350,7 +3357,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	vmx->emulation_required = vmx_emulation_required(vcpu);
 }
 
-static int vmx_get_max_tdp_level(void)
+static int vmx_get_max_ept_level(void)
 {
 	if (cpu_has_vmx_ept_5levels())
 		return 5;
@@ -4553,16 +4560,19 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
  * based on a single guest CPUID bit, with a dedicated feature bit.  This also
  * verifies that the control is actually supported by KVM and hardware.
  */
-#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
-({									 \
-	bool __enabled;							 \
-									 \
-	if (cpu_has_vmx_##name()) {					 \
-		__enabled = guest_cpuid_has(&(vmx)->vcpu,		 \
-					    X86_FEATURE_##feat_name);	 \
-		vmx_adjust_secondary_exec_control(vmx, exec_control,	 \
-			SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
-	}								 \
+#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting)	\
+({												\
+	struct kvm_vcpu *__vcpu = &(vmx)->vcpu;							\
+	bool __enabled;										\
+												\
+	if (cpu_has_vmx_##name()) {								\
+		if (kvm_is_governed_feature(X86_FEATURE_##feat_name))				\
+			__enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name);		\
+		else										\
+			__enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name);		\
+		vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
+						  __enabled, exiting);				\
+	}											\
 })
 
 /* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
@@ -4622,19 +4632,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 	if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
 		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
-	if (cpu_has_vmx_xsaves()) {
-		/* Exposing XSAVES only when XSAVE is exposed */
-		bool xsaves_enabled =
-			boot_cpu_has(X86_FEATURE_XSAVE) &&
-			guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-			guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
-
-		vcpu->arch.xsaves_enabled = xsaves_enabled;
-
-		vmx_adjust_secondary_exec_control(vmx, &exec_control,
-						  SECONDARY_EXEC_XSAVES,
-						  xsaves_enabled, false);
-	}
+	vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES);
 
 	/*
 	 * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
@@ -4653,6 +4651,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 						  SECONDARY_EXEC_ENABLE_RDTSCP,
 						  rdpid_or_rdtscp_enabled, false);
 	}
+
 	vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
 	vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@@ -6796,8 +6795,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
 	vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
 	read_unlock(&vcpu->kvm->mmu_lock);
 
-	vmx_flush_tlb_current(vcpu);
-
+	/*
+	 * No need for a manual TLB flush at this point, KVM has already done a
+	 * flush if there were SPTEs pointing at the previous page.
+	 */
 out:
 	/*
 	 * Do not pin apic access page in memory, the MMU notifier
@@ -7243,13 +7244,20 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 				   flags);
 
 	vcpu->arch.cr2 = native_read_cr2();
+	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+
+	vmx->idt_vectoring_info = 0;
 
 	vmx_enable_fb_clear(vmx);
 
-	if (unlikely(vmx->fail))
+	if (unlikely(vmx->fail)) {
 		vmx->exit_reason.full = 0xdead;
-	else
-		vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+		goto out;
+	}
+
+	vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+	if (likely(!vmx->exit_reason.failed_vmentry))
+		vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
 	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
 	    is_nmi(vmx_get_intr_info(vcpu))) {
@@ -7258,6 +7266,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 		kvm_after_interrupt(vcpu);
 	}
 
+out:
 	guest_state_exit_irqoff();
 }
 
@@ -7379,8 +7388,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	loadsegment(es, __USER_DS);
 #endif
 
-	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
-
 	pt_guest_exit(vmx);
 
 	kvm_load_host_xsave_state(vcpu);
@@ -7397,17 +7404,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmx->nested.nested_run_pending = 0;
 	}
 
-	vmx->idt_vectoring_info = 0;
-
 	if (unlikely(vmx->fail))
 		return EXIT_FASTPATH_NONE;
 
 	if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
 		kvm_machine_check();
 
-	if (likely(!vmx->exit_reason.failed_vmentry))
-		vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
 	trace_kvm_exit(vcpu, KVM_ISA_VMX);
 
 	if (unlikely(vmx->exit_reason.failed_vmentry))
@@ -7751,8 +7753,16 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
-	vcpu->arch.xsaves_enabled = false;
+	/*
+	 * XSAVES is effectively enabled if and only if XSAVE is also exposed
+	 * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
+	 * set if and only if XSAVE is supported.
+	 */
+	if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+	    guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+		kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
+
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
 
 	vmx_setup_uret_msrs(vmx);
 
@@ -7760,7 +7770,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 		vmcs_set_secondary_exec_control(vmx,
 						vmx_secondary_exec_control(vmx));
 
-	if (nested_vmx_allowed(vcpu))
+	if (guest_can_use(vcpu, X86_FEATURE_VMX))
 		vmx->msr_ia32_feature_control_valid_bits |=
 			FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
 			FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
@@ -7769,7 +7779,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 			~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
 			  FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
 
-	if (nested_vmx_allowed(vcpu))
+	if (guest_can_use(vcpu, X86_FEATURE_VMX))
 		nested_vmx_cr_fixed1_bits_update(vcpu);
 
 	if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@@ -8526,7 +8536,7 @@ static __init int hardware_setup(void)
 	 */
 	vmx_setup_me_spte_mask();
 
-	kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+	kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(),
 			  ept_caps_to_lpage_level(vmx_capability.ept));
 
 	/*
@@ -8622,10 +8632,8 @@ static void __vmx_exit(void)
 {
 	allow_smaller_maxphyaddr = false;
 
-#ifdef CONFIG_KEXEC_CORE
-	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
-	synchronize_rcu();
-#endif
+	cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
+
 	vmx_cleanup_l1d_flush();
 }
 
@@ -8666,18 +8674,14 @@ static int __init vmx_init(void)
 	if (r)
 		goto err_l1d_flush;
 
-	vmx_setup_fb_clear_ctrl();
-
 	for_each_possible_cpu(cpu) {
 		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
 
 		pi_init_cpu(cpu);
 	}
 
-#ifdef CONFIG_KEXEC_CORE
-	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
-			   crash_vmclear_local_loaded_vmcss);
-#endif
+	cpu_emergency_register_virt_callback(vmx_emergency_disable);
+
 	vmx_check_vmcs12_offsets();
 
 	/*
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 32384ba38499..c2130d2c8e24 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -374,7 +374,6 @@ struct kvm_vmx {
 	u64 *pid_table;
 };
 
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 			struct loaded_vmcs *buddy);
 int allocate_vpid(void);
@@ -562,7 +561,7 @@ static inline u8 vmx_get_rvi(void)
 	 SECONDARY_EXEC_APIC_REGISTER_VIRT |				\
 	 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |				\
 	 SECONDARY_EXEC_SHADOW_VMCS |					\
-	 SECONDARY_EXEC_XSAVES |					\
+	 SECONDARY_EXEC_ENABLE_XSAVES |					\
 	 SECONDARY_EXEC_RDSEED_EXITING |				\
 	 SECONDARY_EXEC_RDRAND_EXITING |				\
 	 SECONDARY_EXEC_ENABLE_PML |					\
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c381770bcbf1..6c9c81e82e65 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -25,6 +25,7 @@
 #include "tss.h"
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
+#include "mmu/page_track.h"
 #include "x86.h"
 #include "cpuid.h"
 #include "pmu.h"
@@ -237,6 +238,9 @@ EXPORT_SYMBOL_GPL(enable_apicv);
 u64 __read_mostly host_xss;
 EXPORT_SYMBOL_GPL(host_xss);
 
+u64 __read_mostly host_arch_capabilities;
+EXPORT_SYMBOL_GPL(host_arch_capabilities);
+
 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	KVM_GENERIC_VM_STATS(),
 	STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@@ -1021,7 +1025,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
 		if (vcpu->arch.xcr0 != host_xcr0)
 			xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 
-		if (vcpu->arch.xsaves_enabled &&
+		if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
 		    vcpu->arch.ia32_xss != host_xss)
 			wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
 	}
@@ -1052,7 +1056,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
 		if (vcpu->arch.xcr0 != host_xcr0)
 			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
 
-		if (vcpu->arch.xsaves_enabled &&
+		if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
 		    vcpu->arch.ia32_xss != host_xss)
 			wrmsrl(MSR_IA32_XSS, host_xss);
 	}
@@ -1620,12 +1624,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
 
 static u64 kvm_get_arch_capabilities(void)
 {
-	u64 data = 0;
-
-	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
-		data &= KVM_SUPPORTED_ARCH_CAP;
-	}
+	u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
 
 	/*
 	 * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@@ -2631,7 +2630,7 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
 	else
 		vcpu->arch.tsc_offset = l1_offset;
 
-	static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
+	static_call(kvm_x86_write_tsc_offset)(vcpu);
 }
 
 static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
@@ -2647,8 +2646,7 @@ static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multipli
 		vcpu->arch.tsc_scaling_ratio = l1_multiplier;
 
 	if (kvm_caps.has_tsc_control)
-		static_call(kvm_x86_write_tsc_multiplier)(
-			vcpu, vcpu->arch.tsc_scaling_ratio);
+		static_call(kvm_x86_write_tsc_multiplier)(vcpu);
 }
 
 static inline bool kvm_check_tsc_unstable(void)
@@ -4665,7 +4663,6 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
 		return 0;
 	default:
 		return -ENXIO;
-		break;
 	}
 }
 
@@ -6532,7 +6529,7 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
 			      struct kvm_msr_filter_range *user_range)
 {
-	unsigned long *bitmap = NULL;
+	unsigned long *bitmap;
 	size_t bitmap_size;
 
 	if (!user_range->nmsrs)
@@ -8245,11 +8242,6 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
 	return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
 }
 
-static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
-{
-	return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
-}
-
 static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
 {
 	return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
@@ -8351,7 +8343,6 @@ static const struct x86_emulate_ops emulate_ops = {
 	.fix_hypercall       = emulator_fix_hypercall,
 	.intercept           = emulator_intercept,
 	.get_cpuid           = emulator_get_cpuid,
-	.guest_has_long_mode = emulator_guest_has_long_mode,
 	.guest_has_movbe     = emulator_guest_has_movbe,
 	.guest_has_fxsr      = emulator_guest_has_fxsr,
 	.guest_has_rdpid     = emulator_guest_has_rdpid,
@@ -9172,7 +9163,7 @@ static int kvmclock_cpu_down_prep(unsigned int cpu)
 static void tsc_khz_changed(void *data)
 {
 	struct cpufreq_freqs *freq = data;
-	unsigned long khz = 0;
+	unsigned long khz;
 
 	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
 
@@ -9512,6 +9503,9 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 
 	kvm_init_pmu_capability(ops->pmu_ops);
 
+	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
+
 	r = ops->hardware_setup();
 	if (r != 0)
 		goto out_mmu_exit;
@@ -11111,12 +11105,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 			r = -EINTR;
 			goto out;
 		}
+
 		/*
-		 * It should be impossible for the hypervisor timer to be in
-		 * use before KVM has ever run the vCPU.
+		 * Don't bother switching APIC timer emulation from the
+		 * hypervisor timer to the software timer, the only way for the
+		 * APIC timer to be active is if userspace stuffed vCPU state,
+		 * i.e. put the vCPU into a nonsensical state.  Only an INIT
+		 * will transition the vCPU out of UNINITIALIZED (without more
+		 * state stuffing from userspace), which will reset the local
+		 * APIC and thus cancel the timer or drop the IRQ (if the timer
+		 * already expired).
 		 */
-		WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
-
 		kvm_vcpu_srcu_read_unlock(vcpu);
 		kvm_vcpu_block(vcpu);
 		kvm_vcpu_srcu_read_lock(vcpu);
@@ -11798,15 +11797,22 @@ static int sync_regs(struct kvm_vcpu *vcpu)
 		__set_regs(vcpu, &vcpu->run->s.regs.regs);
 		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
 	}
+
 	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
-		if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+		struct kvm_sregs sregs = vcpu->run->s.regs.sregs;
+
+		if (__set_sregs(vcpu, &sregs))
 			return -EINVAL;
+
 		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
 	}
+
 	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
-		if (kvm_vcpu_ioctl_x86_set_vcpu_events(
-				vcpu, &vcpu->run->s.regs.events))
+		struct kvm_vcpu_events events = vcpu->run->s.regs.events;
+
+		if (kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events))
 			return -EINVAL;
+
 		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
 	}
 
@@ -12627,6 +12633,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *new,
 				   enum kvm_mr_change change)
 {
+	/*
+	 * KVM doesn't support moving memslots when there are external page
+	 * trackers attached to the VM, i.e. if KVMGT is in use.
+	 */
+	if (change == KVM_MR_MOVE && kvm_page_track_has_external_user(kvm))
+		return -EINVAL;
+
 	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
 		if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
 			return -EINVAL;
@@ -12772,7 +12785,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 		 * See is_writable_pte() for more details (the case involving
 		 * access-tracked SPTEs is particularly relevant).
 		 */
-		kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+		kvm_flush_remote_tlbs_memslot(kvm, new);
 	}
 }
 
@@ -12781,6 +12794,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 				const struct kvm_memory_slot *new,
 				enum kvm_mr_change change)
 {
+	if (change == KVM_MR_DELETE)
+		kvm_page_track_delete_slot(kvm, old);
+
 	if (!kvm->arch.n_requested_mmu_pages &&
 	    (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
 		unsigned long nr_mmu_pages;
@@ -12797,17 +12813,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 		kvm_arch_free_memslot(kvm, old);
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-	kvm_mmu_zap_all(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-	kvm_page_track_flush_slot(kvm, slot);
-}
-
 static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	return (is_guest_mode(vcpu) &&
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 82e3dafc5453..1e7be1f6ab29 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -323,6 +323,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 extern u64 host_xcr0;
 extern u64 host_xss;
+extern u64 host_arch_capabilities;
 
 extern struct kvm_caps kvm_caps;
 
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index ee89f6bb9242..8bc72a51b257 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -13,15 +13,16 @@ obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \
 	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
 	stub_$(BITS).o stub_segv.o \
 	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
-	mem_$(BITS).o subarch.o os-$(OS)/
+	mem_$(BITS).o subarch.o os-Linux/
 
 ifeq ($(CONFIG_X86_32),y)
 
-obj-y += checksum_32.o syscalls_32.o
+obj-y += syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
 subarch-y += ../lib/cmpxchg8b_emu.o ../lib/atomic64_386_32.o
+subarch-y += ../lib/checksum_32.o
 subarch-y += ../kernel/sys_ia32.o
 
 else
diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h
index 4a73d63e4760..dc32dc023c2f 100644
--- a/arch/x86/um/asm/mm_context.h
+++ b/arch/x86/um/asm/mm_context.h
@@ -11,8 +11,6 @@
 #include <linux/mutex.h>
 #include <asm/ldt.h>
 
-extern void ldt_host_info(void);
-
 #define LDT_PAGES_MAX \
 	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
 #define LDT_ENTRIES_PER_PAGE \
diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
deleted file mode 100644
index aed782ab7721..000000000000
--- a/arch/x86/um/checksum_32.S
+++ /dev/null
@@ -1,214 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IP/TCP/UDP checksumming routines
- *
- * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
- *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Tom May, <ftom@netcom.com>
- *              Pentium Pro/II routines:
- *              Alexander Kjeldaas <astor@guardian.no>
- *              Finn Arne Gangstad <finnag@guardian.no>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- *			     handling.
- *		Andi Kleen,  add zeroing on error
- *                   converted to pure assembler
- */
-
-#include <asm/errno.h>
-#include <asm/asm.h>
-#include <asm/export.h>
-				
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-/*	
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
- */
-		
-.text
-.align 4
-.globl csum_partial
-		
-#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
-
-	  /*		
-	   * Experiments with Ethernet and SLIP connections show that buff
-	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
-	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
-	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
-	   * alignment for the unrolled loop.
-	   */		
-csum_partial:
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg: unsigned char *buff
-	testl $2, %esi		# Check alignment.
-	jz 2f			# Jump if alignment is ok.
-	subl $2, %ecx		# Alignment uses up two bytes.
-	jae 1f			# Jump if we had at least two bytes.
-	addl $2, %ecx		# ecx was < 2.  Deal with it.
-	jmp 4f
-1:	movw (%esi), %bx
-	addl $2, %esi
-	addw %bx, %ax
-	adcl $0, %eax
-2:
-	movl %ecx, %edx
-	shrl $5, %ecx
-	jz 2f
-	testl %esi, %esi
-1:	movl (%esi), %ebx
-	adcl %ebx, %eax
-	movl 4(%esi), %ebx
-	adcl %ebx, %eax
-	movl 8(%esi), %ebx
-	adcl %ebx, %eax
-	movl 12(%esi), %ebx
-	adcl %ebx, %eax
-	movl 16(%esi), %ebx
-	adcl %ebx, %eax
-	movl 20(%esi), %ebx
-	adcl %ebx, %eax
-	movl 24(%esi), %ebx
-	adcl %ebx, %eax
-	movl 28(%esi), %ebx
-	adcl %ebx, %eax
-	lea 32(%esi), %esi
-	dec %ecx
-	jne 1b
-	adcl $0, %eax
-2:	movl %edx, %ecx
-	andl $0x1c, %edx
-	je 4f
-	shrl $2, %edx		# This clears CF
-3:	adcl (%esi), %eax
-	lea 4(%esi), %esi
-	dec %edx
-	jne 3b
-	adcl $0, %eax
-4:	andl $3, %ecx
-	jz 7f
-	cmpl $2, %ecx
-	jb 5f
-	movw (%esi),%cx
-	leal 2(%esi),%esi
-	je 6f
-	shll $16,%ecx
-5:	movb (%esi),%cl
-6:	addl %ecx,%eax
-	adcl $0, %eax 
-7:	
-	popl %ebx
-	popl %esi
-	RET
-
-#else
-
-/* Version for PentiumII/PPro */
-
-csum_partial:
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
-
-	testl $2, %esi         
-	jnz 30f                 
-10:
-	movl %ecx, %edx
-	movl %ecx, %ebx
-	andl $0x7c, %ebx
-	shrl $7, %ecx
-	addl %ebx,%esi
-	shrl $2, %ebx  
-	negl %ebx
-	lea 45f(%ebx,%ebx,2), %ebx
-	testl %esi, %esi
-	jmp *%ebx
-
-	# Handle 2-byte-aligned regions
-20:	addw (%esi), %ax
-	lea 2(%esi), %esi
-	adcl $0, %eax
-	jmp 10b
-
-30:	subl $2, %ecx          
-	ja 20b                 
-	je 32f
-	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
-	addl %ebx, %eax
-	adcl $0, %eax
-	jmp 80f
-32:
-	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
-	adcl $0, %eax
-	jmp 80f
-
-40: 
-	addl -128(%esi), %eax
-	adcl -124(%esi), %eax
-	adcl -120(%esi), %eax
-	adcl -116(%esi), %eax   
-	adcl -112(%esi), %eax   
-	adcl -108(%esi), %eax
-	adcl -104(%esi), %eax
-	adcl -100(%esi), %eax
-	adcl -96(%esi), %eax
-	adcl -92(%esi), %eax
-	adcl -88(%esi), %eax
-	adcl -84(%esi), %eax
-	adcl -80(%esi), %eax
-	adcl -76(%esi), %eax
-	adcl -72(%esi), %eax
-	adcl -68(%esi), %eax
-	adcl -64(%esi), %eax     
-	adcl -60(%esi), %eax     
-	adcl -56(%esi), %eax     
-	adcl -52(%esi), %eax   
-	adcl -48(%esi), %eax   
-	adcl -44(%esi), %eax
-	adcl -40(%esi), %eax
-	adcl -36(%esi), %eax
-	adcl -32(%esi), %eax
-	adcl -28(%esi), %eax
-	adcl -24(%esi), %eax
-	adcl -20(%esi), %eax
-	adcl -16(%esi), %eax
-	adcl -12(%esi), %eax
-	adcl -8(%esi), %eax
-	adcl -4(%esi), %eax
-45:
-	lea 128(%esi), %esi
-	adcl $0, %eax
-	dec %ecx
-	jge 40b
-	movl %edx, %ecx
-50:	andl $3, %ecx
-	jz 80f
-
-	# Handle the last 1-3 bytes without jumping
-	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
-	movl $0xffffff,%ebx	# by the shll and shrl instructions
-	shll $3,%ecx
-	shrl %cl,%ebx
-	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
-	addl %ebx,%eax
-	adcl $0,%eax
-80: 
-	popl %ebx
-	popl %esi
-	RET
-				
-#endif
-	EXPORT_SYMBOL(csum_partial)
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index a5488cc40f58..7d792077e5fd 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -71,6 +71,9 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
 	def_bool n
 
+config ARCH_MTD_XIP
+	def_bool y
+
 config NO_IOPORT_MAP
 	def_bool n
 
diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h
index 0e1bb6f019d6..3f5ffae89b58 100644
--- a/arch/xtensa/include/asm/core.h
+++ b/arch/xtensa/include/asm/core.h
@@ -52,4 +52,13 @@
 #define XTENSA_STACK_ALIGNMENT	16
 #endif
 
+#ifndef XCHAL_HW_MIN_VERSION
+#if defined(XCHAL_HW_MIN_VERSION_MAJOR) && defined(XCHAL_HW_MIN_VERSION_MINOR)
+#define XCHAL_HW_MIN_VERSION (XCHAL_HW_MIN_VERSION_MAJOR * 100 + \
+			      XCHAL_HW_MIN_VERSION_MINOR)
+#else
+#define XCHAL_HW_MIN_VERSION 0
+#endif
+#endif
+
 #endif
diff --git a/arch/xtensa/include/asm/mtd-xip.h b/arch/xtensa/include/asm/mtd-xip.h
new file mode 100644
index 000000000000..514325155cf8
--- /dev/null
+++ b/arch/xtensa/include/asm/mtd-xip.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_MTD_XIP_H
+#define _ASM_MTD_XIP_H
+
+#include <asm/processor.h>
+
+#define xip_irqpending()	(xtensa_get_sr(interrupt) & xtensa_get_sr(intenable))
+#define xip_currtime()		(xtensa_get_sr(ccount))
+#define xip_elapsed_since(x)	((xtensa_get_sr(ccount) - (x)) / 1000) /* should work up to 1GHz */
+#define xip_cpu_idle()		do { asm volatile ("waiti 0"); } while (0)
+
+#endif /* _ASM_MTD_XIP_H */
+
diff --git a/arch/xtensa/include/asm/sections.h b/arch/xtensa/include/asm/sections.h
index 3bc6b9afa993..e5da6d7092be 100644
--- a/arch/xtensa/include/asm/sections.h
+++ b/arch/xtensa/include/asm/sections.h
@@ -34,6 +34,10 @@ extern char _SecondaryResetVector_text_start[];
 extern char _SecondaryResetVector_text_end[];
 #endif
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+extern char _xip_text_start[];
+extern char _xip_text_end[];
+#endif
 extern char _xip_start[];
 extern char _xip_end[];
 #endif
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index a0d05c8598d0..183618090d05 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -13,17 +13,26 @@
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 
+#include <asm/core.h>
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
 
+#define XTENSA_HWVERSION_RG_2015_0	260000
+
+#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
+#define XTENSA_PMU_ERI_BASE		0x00101000
+#else
+#define XTENSA_PMU_ERI_BASE		0x00001000
+#endif
+
 /* Global control/status for all perf counters */
-#define XTENSA_PMU_PMG			0x1000
+#define XTENSA_PMU_PMG			XTENSA_PMU_ERI_BASE
 /* Perf counter values */
-#define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
+#define XTENSA_PMU_PM(i)		(XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
 /* Perf counter control registers */
-#define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
+#define XTENSA_PMU_PMCTRL(i)		(XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
 /* Perf counter status registers */
-#define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
+#define XTENSA_PMU_PMSTAT(i)		(XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
 
 #define XTENSA_PMU_PMG_PMEN		0x1
 
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index aba3ff4e60d8..52d6e4870a04 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -311,6 +311,9 @@ void __init setup_arch(char **cmdline_p)
 
 	mem_reserve(__pa(_stext), __pa(_end));
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+	mem_reserve(__pa(_xip_text_start), __pa(_xip_text_end));
+#endif
 	mem_reserve(__pa(_xip_start), __pa(_xip_end));
 #endif
 
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index c14fd96f459d..f47e9bbbd291 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -118,6 +118,7 @@ SECTIONS
     SECTION_VECTOR2 (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
 
     *(.exception.text)
+    *(.xiptext)
 #endif
 
     IRQENTRY_TEXT
@@ -201,6 +202,9 @@ SECTIONS
 		   .DebugInterruptVector.text);
     RELOCATE_ENTRY(_exception_text,
 		   .exception.text);
+#ifdef CONFIG_XIP_KERNEL
+    RELOCATE_ENTRY(_xip_text, .xiptext);
+#endif
 #endif
 #ifdef CONFIG_XIP_KERNEL
     RELOCATE_ENTRY(_xip_data, .data);
@@ -319,7 +323,12 @@ SECTIONS
 		  LAST)
 #undef LAST
 #define LAST .exception.text
-
+  SECTION_VECTOR4 (_xip_text,
+		  .xiptext,
+		  ,
+		  LAST)
+#undef LAST
+#define LAST .xiptext
 #endif
   . = (LOADADDR(LAST) + SIZEOF(LAST) + 3) & ~ 3;
 
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 10b79d3c74e0..7d1f8b398a46 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -52,8 +52,7 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
 }
 
 
-static int rs_write(struct tty_struct * tty,
-		    const unsigned char *buf, int count)
+static ssize_t rs_write(struct tty_struct * tty, const u8 *buf, size_t count)
 {
 	/* see drivers/char/serialX.c to reference original version */
 
@@ -82,32 +81,12 @@ static void rs_poll(struct timer_list *unused)
 		mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
 }
 
-
-static int rs_put_char(struct tty_struct *tty, unsigned char ch)
-{
-	return rs_write(tty, &ch, 1);
-}
-
-static void rs_flush_chars(struct tty_struct *tty)
-{
-}
-
 static unsigned int rs_write_room(struct tty_struct *tty)
 {
 	/* Let's say iss can always accept 2K characters.. */
 	return 2 * 1024;
 }
 
-static void rs_hangup(struct tty_struct *tty)
-{
-	/* Stub, once again.. */
-}
-
-static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
-{
-	/* Stub, once again.. */
-}
-
 static int rs_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "serinfo:1.0 driver:0.1\n");
@@ -118,11 +97,7 @@ static const struct tty_operations serial_ops = {
 	.open = rs_open,
 	.close = rs_close,
 	.write = rs_write,
-	.put_char = rs_put_char,
-	.flush_chars = rs_flush_chars,
 	.write_room = rs_write_room,
-	.hangup = rs_hangup,
-	.wait_until_sent = rs_wait_until_sent,
 	.proc_show = rs_proc_show,
 };