diff options
Diffstat (limited to 'arch')
459 files changed, 10259 insertions, 5380 deletions
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index dd31e97edae8..396caece6d6d 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,6 +3,5 @@ generated-y += syscall_table.h generic-y += agp.h generic-y += asm-offsets.h -generic-y += export.h generic-y += kvm_para.h generic-y += mcs_spinlock.h diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 3d7473531ab1..c80258ec332f 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -421,7 +421,7 @@ register_cpus(void) arch_initcall(register_cpus); #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_reboot_handler(int unused) +static void sysrq_reboot_handler(u8 unused) { machine_halt(); } diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 6dc952b0df4a..d6139dbae4ac 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -129,9 +129,8 @@ srmcons_do_write(struct tty_port *port, const char *buf, int count) return count; } -static int -srmcons_write(struct tty_struct *tty, - const unsigned char *buf, int count) +static ssize_t +srmcons_write(struct tty_struct *tty, const u8 *buf, size_t count) { unsigned long flags; diff --git a/arch/alpha/lib/callback_srm.S b/arch/alpha/lib/callback_srm.S index b13c4a231f1b..36b63f295170 100644 --- a/arch/alpha/lib/callback_srm.S +++ b/arch/alpha/lib/callback_srm.S @@ -3,8 +3,8 @@ * arch/alpha/lib/callback_srm.S */ +#include <linux/export.h> #include <asm/console.h> -#include <asm/export.h> .text #define HWRPB_CRB_OFFSET 0xc0 diff --git a/arch/alpha/lib/clear_page.S b/arch/alpha/lib/clear_page.S index ce02de7b0493..af70ee309a33 100644 --- a/arch/alpha/lib/clear_page.S +++ b/arch/alpha/lib/clear_page.S @@ -4,7 +4,7 @@ * * Zero an entire page. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 .global clear_page diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S index db6c6ca45896..848eb60a0010 100644 --- a/arch/alpha/lib/clear_user.S +++ b/arch/alpha/lib/clear_user.S @@ -10,7 +10,7 @@ * a successful copy). There is also some rather minor exception setup * stuff. */ -#include <asm/export.h> +#include <linux/export.h> /* Allow an exception for an insn; exit if we get one. */ #define EX(x,y...) \ diff --git a/arch/alpha/lib/copy_page.S b/arch/alpha/lib/copy_page.S index 5439a30c77d0..1c444fdad9a5 100644 --- a/arch/alpha/lib/copy_page.S +++ b/arch/alpha/lib/copy_page.S @@ -4,7 +4,7 @@ * * Copy an entire page. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 .global copy_page diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S index 32ab0344b185..ef18faafcad6 100644 --- a/arch/alpha/lib/copy_user.S +++ b/arch/alpha/lib/copy_user.S @@ -12,7 +12,7 @@ * exception setup stuff.. */ -#include <asm/export.h> +#include <linux/export.h> /* Allow an exception for an insn; exit if we get one. */ #define EXI(x,y...) \ diff --git a/arch/alpha/lib/csum_ipv6_magic.S b/arch/alpha/lib/csum_ipv6_magic.S index c7b213ab01ab..273c426c3859 100644 --- a/arch/alpha/lib/csum_ipv6_magic.S +++ b/arch/alpha/lib/csum_ipv6_magic.S @@ -13,7 +13,7 @@ * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru> */ -#include <asm/export.h> +#include <linux/export.h> .globl csum_ipv6_magic .align 4 .ent csum_ipv6_magic diff --git a/arch/alpha/lib/divide.S b/arch/alpha/lib/divide.S index 2b60eb45e50b..db01840d76ec 100644 --- a/arch/alpha/lib/divide.S +++ b/arch/alpha/lib/divide.S @@ -46,7 +46,7 @@ * $28 - compare status */ -#include <asm/export.h> +#include <linux/export.h> #define halt .long 0 /* diff --git a/arch/alpha/lib/ev6-clear_page.S b/arch/alpha/lib/ev6-clear_page.S index 325864c81586..a534d9ff7161 100644 --- a/arch/alpha/lib/ev6-clear_page.S +++ b/arch/alpha/lib/ev6-clear_page.S @@ -4,7 +4,7 @@ * * Zero an entire page. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 .global clear_page diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S index 7e644f83cdf2..af776cc45f91 100644 --- a/arch/alpha/lib/ev6-clear_user.S +++ b/arch/alpha/lib/ev6-clear_user.S @@ -29,7 +29,7 @@ * want to leave a hole (and we also want to avoid repeating lots of work) */ -#include <asm/export.h> +#include <linux/export.h> /* Allow an exception for an insn; exit if we get one. */ #define EX(x,y...) \ 99: x,##y; \ diff --git a/arch/alpha/lib/ev6-copy_page.S b/arch/alpha/lib/ev6-copy_page.S index fd7212c8dcf1..36be5113b7b7 100644 --- a/arch/alpha/lib/ev6-copy_page.S +++ b/arch/alpha/lib/ev6-copy_page.S @@ -57,7 +57,7 @@ destination pages are in the dcache, but it is my guess that this is less important than the dcache miss case. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 .global copy_page diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S index f3e433754397..b9b19710c364 100644 --- a/arch/alpha/lib/ev6-copy_user.S +++ b/arch/alpha/lib/ev6-copy_user.S @@ -23,7 +23,7 @@ * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 */ -#include <asm/export.h> +#include <linux/export.h> /* Allow an exception for an insn; exit if we get one. */ #define EXI(x,y...) \ 99: x,##y; \ diff --git a/arch/alpha/lib/ev6-csum_ipv6_magic.S b/arch/alpha/lib/ev6-csum_ipv6_magic.S index 9a73f90700a1..2ee548be98e3 100644 --- a/arch/alpha/lib/ev6-csum_ipv6_magic.S +++ b/arch/alpha/lib/ev6-csum_ipv6_magic.S @@ -53,7 +53,7 @@ * may cause additional delay in rare cases (load-load replay traps). */ -#include <asm/export.h> +#include <linux/export.h> .globl csum_ipv6_magic .align 4 .ent csum_ipv6_magic diff --git a/arch/alpha/lib/ev6-divide.S b/arch/alpha/lib/ev6-divide.S index 137ff1a07356..b73a6d26362e 100644 --- a/arch/alpha/lib/ev6-divide.S +++ b/arch/alpha/lib/ev6-divide.S @@ -56,7 +56,7 @@ * Try not to change the actual algorithm if possible for consistency. */ -#include <asm/export.h> +#include <linux/export.h> #define halt .long 0 /* diff --git a/arch/alpha/lib/ev6-memchr.S b/arch/alpha/lib/ev6-memchr.S index 56bf9e14eeee..f75ba43e61e3 100644 --- a/arch/alpha/lib/ev6-memchr.S +++ b/arch/alpha/lib/ev6-memchr.S @@ -28,7 +28,7 @@ * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 * Try not to change the actual algorithm if possible for consistency. */ -#include <asm/export.h> +#include <linux/export.h> .set noreorder .set noat diff --git a/arch/alpha/lib/ev6-memcpy.S b/arch/alpha/lib/ev6-memcpy.S index ffbd056b6eb2..3ef43c26c8af 100644 --- a/arch/alpha/lib/ev6-memcpy.S +++ b/arch/alpha/lib/ev6-memcpy.S @@ -20,7 +20,7 @@ * Temp usage notes: * $1,$2, - scratch */ -#include <asm/export.h> +#include <linux/export.h> .set noreorder .set noat diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S index 1cfcfbbea6f0..89d7809da4cc 100644 --- a/arch/alpha/lib/ev6-memset.S +++ b/arch/alpha/lib/ev6-memset.S @@ -27,7 +27,7 @@ * as fixes will need to be made in multiple places. The performance gain * is worth it. */ -#include <asm/export.h> +#include <linux/export.h> .set noat .set noreorder .text diff --git a/arch/alpha/lib/ev67-strcat.S b/arch/alpha/lib/ev67-strcat.S index ec3096a9e8d4..f8c7305b11d6 100644 --- a/arch/alpha/lib/ev67-strcat.S +++ b/arch/alpha/lib/ev67-strcat.S @@ -20,7 +20,7 @@ * string once. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 diff --git a/arch/alpha/lib/ev67-strchr.S b/arch/alpha/lib/ev67-strchr.S index fbf89e0b6dc3..97a7cb475309 100644 --- a/arch/alpha/lib/ev67-strchr.S +++ b/arch/alpha/lib/ev67-strchr.S @@ -16,7 +16,7 @@ * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 * Try not to change the actual algorithm if possible for consistency. */ -#include <asm/export.h> +#include <linux/export.h> #include <asm/regdef.h> .set noreorder diff --git a/arch/alpha/lib/ev67-strlen.S b/arch/alpha/lib/ev67-strlen.S index b73106ffbbc7..3d9078807ab4 100644 --- a/arch/alpha/lib/ev67-strlen.S +++ b/arch/alpha/lib/ev67-strlen.S @@ -18,7 +18,7 @@ * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 */ -#include <asm/export.h> +#include <linux/export.h> .set noreorder .set noat diff --git a/arch/alpha/lib/ev67-strncat.S b/arch/alpha/lib/ev67-strncat.S index ceb0ca528789..8f313233e3a7 100644 --- a/arch/alpha/lib/ev67-strncat.S +++ b/arch/alpha/lib/ev67-strncat.S @@ -21,7 +21,7 @@ * Try not to change the actual algorithm if possible for consistency. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 diff --git a/arch/alpha/lib/ev67-strrchr.S b/arch/alpha/lib/ev67-strrchr.S index 7f80e398530f..ae7355f9ec56 100644 --- a/arch/alpha/lib/ev67-strrchr.S +++ b/arch/alpha/lib/ev67-strrchr.S @@ -19,7 +19,7 @@ * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 */ -#include <asm/export.h> +#include <linux/export.h> #include <asm/regdef.h> .set noreorder diff --git a/arch/alpha/lib/memchr.S b/arch/alpha/lib/memchr.S index c13d3eca2e05..45366e32feee 100644 --- a/arch/alpha/lib/memchr.S +++ b/arch/alpha/lib/memchr.S @@ -31,7 +31,7 @@ For correctness consider that: - only minimum number of quadwords may be accessed - the third argument is an unsigned long */ -#include <asm/export.h> +#include <linux/export.h> .set noreorder .set noat diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S index 42d1922d0edf..3a27689e3390 100644 --- a/arch/alpha/lib/memmove.S +++ b/arch/alpha/lib/memmove.S @@ -7,7 +7,7 @@ * This is hand-massaged output from the original memcpy.c. We defer to * memcpy whenever possible; the backwards copy loops are not unrolled. */ -#include <asm/export.h> +#include <linux/export.h> .set noat .set noreorder .text diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S index 00393e30df25..9075d6918346 100644 --- a/arch/alpha/lib/memset.S +++ b/arch/alpha/lib/memset.S @@ -14,7 +14,7 @@ * The scheduling comments are according to the EV5 documentation (and done by * hand, so they might well be incorrect, please do tell me about it..) */ -#include <asm/export.h> +#include <linux/export.h> .set noat .set noreorder .text diff --git a/arch/alpha/lib/strcat.S b/arch/alpha/lib/strcat.S index 055877dccd27..62b90ebbcf44 100644 --- a/arch/alpha/lib/strcat.S +++ b/arch/alpha/lib/strcat.S @@ -5,7 +5,7 @@ * * Append a null-terminated string from SRC to DST. */ -#include <asm/export.h> +#include <linux/export.h> .text diff --git a/arch/alpha/lib/strchr.S b/arch/alpha/lib/strchr.S index 17871dd00280..68c54ff50dfe 100644 --- a/arch/alpha/lib/strchr.S +++ b/arch/alpha/lib/strchr.S @@ -6,7 +6,7 @@ * Return the address of a given character within a null-terminated * string, or null if it is not found. */ -#include <asm/export.h> +#include <linux/export.h> #include <asm/regdef.h> .set noreorder diff --git a/arch/alpha/lib/strcpy.S b/arch/alpha/lib/strcpy.S index cb74ad23a90d..d8773ba77525 100644 --- a/arch/alpha/lib/strcpy.S +++ b/arch/alpha/lib/strcpy.S @@ -6,7 +6,7 @@ * Copy a null-terminated string from SRC to DST. Return a pointer * to the null-terminator in the source. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 3 diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S index dd882fe4d7e3..4fc6a6ff24cd 100644 --- a/arch/alpha/lib/strlen.S +++ b/arch/alpha/lib/strlen.S @@ -12,7 +12,7 @@ * do this instead of the 9 instructions that * binary search needs). */ -#include <asm/export.h> +#include <linux/export.h> .set noreorder .set noat diff --git a/arch/alpha/lib/strncat.S b/arch/alpha/lib/strncat.S index 522fee3e26ac..a913a7c84a39 100644 --- a/arch/alpha/lib/strncat.S +++ b/arch/alpha/lib/strncat.S @@ -10,7 +10,7 @@ * past count, whereas libc may write to count+1. This follows the generic * implementation in lib/string.c and is, IMHO, more sensible. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 3 diff --git a/arch/alpha/lib/strncpy.S b/arch/alpha/lib/strncpy.S index cc57fad8b7ca..cb90cf022df3 100644 --- a/arch/alpha/lib/strncpy.S +++ b/arch/alpha/lib/strncpy.S @@ -11,7 +11,7 @@ * version has cropped that bit o' nastiness as well as assuming that * __stxncpy is in range of a branch. */ -#include <asm/export.h> +#include <linux/export.h> .set noat .set noreorder diff --git a/arch/alpha/lib/strrchr.S b/arch/alpha/lib/strrchr.S index 7650ba99b7e2..dd8e073b6cf2 100644 --- a/arch/alpha/lib/strrchr.S +++ b/arch/alpha/lib/strrchr.S @@ -6,7 +6,7 @@ * Return the address of the last occurrence of a given character * within a null-terminated string, or null if it is not found. */ -#include <asm/export.h> +#include <linux/export.h> #include <asm/regdef.h> .set noreorder diff --git a/arch/alpha/lib/udiv-qrnnd.S b/arch/alpha/lib/udiv-qrnnd.S index b887aa5428e5..96f05918bffe 100644 --- a/arch/alpha/lib/udiv-qrnnd.S +++ b/arch/alpha/lib/udiv-qrnnd.S @@ -25,7 +25,7 @@ # along with GCC; see the file COPYING. If not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA. -#include <asm/export.h> +#include <linux/export.h> .set noreorder .set noat diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6f4995ad9873..3162db540ee9 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -27,6 +27,8 @@ config ARC select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_IOREMAP + select GENERIC_STRNCPY_FROM_USER if MMU + select GENERIC_STRNLEN_USER if MMU select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4 @@ -491,11 +493,11 @@ config ARC_KVADDR_SIZE kernel-user gutter) config ARC_CURR_IN_REG - bool "Dedicate Register r25 for current_task pointer" + bool "cache current task pointer in gp" default y help - This reserved Register R25 to point to Current Task in - kernel mode. This saves memory access for each such access + This reserves gp register to point to Current Task in + kernel mode eliding memory access for each access config ARC_EMUL_UNALIGNED diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 329400a1c355..2390dd042e36 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -28,14 +28,14 @@ cflags-y += $(tune-mcpu-def-y) endif endif - ifdef CONFIG_ARC_CURR_IN_REG # For a global register definition, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using -# any kernel headers, and missing the r25 global register +# any kernel headers, and missing the global register # Can't do unconditionally because of recursive include issues # due to <linux/thread_info.h> LINUXINCLUDE += -include $(srctree)/arch/arc/include/asm/current.h +cflags-y += -ffixed-gp endif cflags-y += -fsection-anchors @@ -67,7 +67,7 @@ cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables $(cfi) # small data is default for elf32 tool-chain. If not usable, disable it # This also allows repurposing GP as scratch reg to gcc reg allocator disable_small_data := y -cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp +cflags-$(disable_small_data) += -mno-sdata cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 2162023195c5..4b13f60fe7ca 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -23,7 +23,7 @@ #define ARC_REG_ICCM_BUILD 0x78 /* ICCM size (common) */ #define ARC_REG_XY_MEM_BCR 0x79 #define ARC_REG_MAC_BCR 0x7a -#define ARC_REG_MUL_BCR 0x7b +#define ARC_REG_MPY_BCR 0x7b #define ARC_REG_SWAP_BCR 0x7c #define ARC_REG_NORM_BCR 0x7d #define ARC_REG_MIXMAX_BCR 0x7e @@ -177,7 +177,7 @@ struct bcr_isa_arcv2 { #endif }; -struct bcr_uarch_build_arcv2 { +struct bcr_uarch_build { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:8, prod:8, maj:8, min:8; #else @@ -185,6 +185,59 @@ struct bcr_uarch_build_arcv2 { #endif }; +struct bcr_mmu_3 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, + u_itlb:4, u_dtlb:4; +#else + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4, + ways:4, ver:8; +#endif +}; + +struct bcr_mmu_4 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif +}; + +struct bcr_cache { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; +#else + unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; +#endif +}; + +struct bcr_slc_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, way:2, lsz:2, sz:4; +#else + unsigned int sz:4, lsz:2, way:2, pad:24; +#endif +}; + +struct bcr_clust_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; +#else + unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; +#endif +}; + +struct bcr_volatile { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int start:4, limit:4, pad:22, order:1, disable:1; +#else + unsigned int disable:1, order:1, pad:22, limit:4, start:4; +#endif +}; + struct bcr_mpy { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8; @@ -302,48 +355,6 @@ struct bcr_generic { #endif }; -/* - ******************************************************************* - * Generic structures to hold build configuration used at runtime - */ - -struct cpuinfo_arc_mmu { - unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1; - unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8; -}; - -struct cpuinfo_arc_cache { - unsigned int sz_k:14, line_len:8, assoc:4, alias:1, vipt:1, pad:4; -}; - -struct cpuinfo_arc_bpu { - unsigned int ver, full, num_cache, num_pred, ret_stk; -}; - -struct cpuinfo_arc_ccm { - unsigned int base_addr, sz; -}; - -struct cpuinfo_arc { - struct cpuinfo_arc_cache icache, dcache, slc; - struct cpuinfo_arc_mmu mmu; - struct cpuinfo_arc_bpu bpu; - struct bcr_identity core; - struct bcr_isa_arcv2 isa; - const char *release, *name; - unsigned int vec_base; - struct cpuinfo_arc_ccm iccm, dccm; - struct { - unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, - fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4, - ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1, - timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; - } extn; - struct bcr_mpy extn_mpy; -}; - -extern struct cpuinfo_arc cpuinfo_arc700[]; - static inline int is_isa_arcv2(void) { return IS_ENABLED(CONFIG_ISA_ARCV2); diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h index 1b0ffaeee16d..5258cb81a16b 100644 --- a/arch/arc/include/asm/atomic-llsc.h +++ b/arch/arc/include/asm/atomic-llsc.h @@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ [i] "ir" (i) \ - : "cc"); \ + : "cc", "memory"); \ } \ #define ATOMIC_OP_RETURN(op, asm_op) \ @@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ : [val] "=&r" (val) \ : [ctr] "r" (&v->counter), \ [i] "ir" (i) \ - : "cc"); \ + : "cc", "memory"); \ \ return val; \ } @@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ [orig] "=&r" (orig) \ : [ctr] "r" (&v->counter), \ [i] "ir" (i) \ - : "cc"); \ + : "cc", "memory"); \ \ return orig; \ } diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 6b6db981967a..9b5791b85471 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \ " bnz 1b \n" \ : "=&r"(val) \ : "r"(&v->counter), "ir"(a) \ - : "cc"); \ + : "cc", "memory"); \ } \ #define ATOMIC64_OP_RETURN(op, op1, op2) \ @@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \ " bnz 1b \n" \ : [val] "=&r"(val) \ : "r"(&v->counter), "ir"(a) \ - : "cc"); /* memory clobber comes from smp_mb() */ \ + : "cc", "memory"); \ \ return val; \ } @@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \ " bnz 1b \n" \ : "=&r"(orig), "=&r"(val) \ : "r"(&v->counter), "ir"(a) \ - : "cc"); /* memory clobber comes from smp_mb() */ \ + : "cc", "memory"); \ \ return orig; \ } diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h index 9b9bdd3e6538..06be89f6f2f0 100644 --- a/arch/arc/include/asm/current.h +++ b/arch/arc/include/asm/current.h @@ -13,7 +13,7 @@ #ifdef CONFIG_ARC_CURR_IN_REG -register struct task_struct *curr_arc asm("r25"); +register struct task_struct *curr_arc asm("gp"); #define current (curr_arc) #else diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h index 5f4de05bd4ee..a0d5ebe1bc3f 100644 --- a/arch/arc/include/asm/dwarf.h +++ b/arch/arc/include/asm/dwarf.h @@ -10,23 +10,31 @@ #ifdef ARC_DW2_UNWIND_AS_CFI -#define CFI_STARTPROC .cfi_startproc -#define CFI_ENDPROC .cfi_endproc -#define CFI_DEF_CFA .cfi_def_cfa -#define CFI_REGISTER .cfi_register -#define CFI_REL_OFFSET .cfi_rel_offset -#define CFI_UNDEFINED .cfi_undefined +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_UNDEFINED .cfi_undefined #else #define CFI_IGNORE # -#define CFI_STARTPROC CFI_IGNORE -#define CFI_ENDPROC CFI_IGNORE -#define CFI_DEF_CFA CFI_IGNORE -#define CFI_REGISTER CFI_IGNORE -#define CFI_REL_OFFSET CFI_IGNORE -#define CFI_UNDEFINED CFI_IGNORE +#define CFI_STARTPROC CFI_IGNORE +#define CFI_ENDPROC CFI_IGNORE +#define CFI_DEF_CFA CFI_IGNORE +#define CFI_DEF_CFA_OFFSET CFI_IGNORE +#define CFI_DEF_CFA_REGISTER CFI_IGNORE +#define CFI_OFFSET CFI_IGNORE +#define CFI_REL_OFFSET CFI_IGNORE +#define CFI_REGISTER CFI_IGNORE +#define CFI_RESTORE CFI_IGNORE +#define CFI_UNDEFINED CFI_IGNORE #endif /* !ARC_DW2_UNWIND_AS_CFI */ diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 0ff4c0610561..4d13320e0c1b 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -18,7 +18,6 @@ * | orig_r0 | * | event/ECR | * | bta | - * | user_r25 | * | gp | * | fp | * | sp | @@ -49,14 +48,18 @@ /*------------------------------------------------------------------------*/ .macro INTERRUPT_PROLOGUE - ; (A) Before jumping to Interrupt Vector, hardware micro-ops did following: + ; Before jumping to Interrupt Vector, hardware micro-ops did following: ; 1. SP auto-switched to kernel mode stack ; 2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0) ; 3. Auto save: (mandatory) Push PC and STAT32 on stack ; hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE - ; 4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI + ; 4a. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI ; - ; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair + ; Now + ; 4b. If Auto-save (optional) not enabled in hw, manually save them + ; 5. Manually save: r12,r30, sp,fp,gp, ACCL pair + ; + ; At the end, SP points to pt_regs #ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE ; carve pt_regs on stack (case #3), PC/STAT32 already on stack @@ -72,15 +75,16 @@ .endm /*------------------------------------------------------------------------*/ -.macro EXCEPTION_PROLOGUE +.macro EXCEPTION_PROLOGUE_KEEP_AE - ; (A) Before jumping to Exception Vector, hardware micro-ops did following: + ; Before jumping to Exception Vector, hardware micro-ops did following: ; 1. SP auto-switched to kernel mode stack ; 2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0) ; - ; (B) Manually save the complete reg file below + ; Now manually save rest of reg file + ; At the end, SP points to pt_regs - sub sp, sp, SZ_PT_REGS ; carve pt_regs + sub sp, sp, SZ_PT_REGS ; carve space for pt_regs ; _HARD saves r10 clobbered by _SOFT as scratch hence comes first @@ -100,6 +104,16 @@ ; OUTPUT: r10 has ECR expected by EV_Trap .endm +.macro EXCEPTION_PROLOGUE + + EXCEPTION_PROLOGUE_KEEP_AE ; return ECR in r10 + + lr r0, [efa] + mov r1, sp + + FAKE_RET_FROM_EXCPN ; clobbers r9 +.endm + /*------------------------------------------------------------------------ * This macro saves the registers manually which would normally be autosaved * by hardware on taken interrupts. It is used by @@ -135,10 +149,10 @@ */ .macro __SAVE_REGFILE_SOFT - ST2 gp, fp, PT_r26 ; gp (r26), fp (r27) - - st r12, [sp, PT_sp + 4] - st r30, [sp, PT_sp + 8] + st fp, [sp, PT_fp] ; r27 + st r30, [sp, PT_r30] + st r12, [sp, PT_r12] + st r26, [sp, PT_r26] ; gp ; Saving pt_regs->sp correctly requires some extra work due to the way ; Auto stack switch works @@ -153,30 +167,30 @@ ; ISA requires ADD.nz to have same dest and src reg operands mov.nz r10, sp - add.nz r10, r10, SZ_PT_REGS ; K mode SP + add2.nz r10, r10, SZ_PT_REGS/4 ; K mode SP st r10, [sp, PT_sp] ; SP (pt_regs->sp) -#ifdef CONFIG_ARC_CURR_IN_REG - st r25, [sp, PT_user_r25] - GET_CURR_TASK_ON_CPU r25 -#endif - #ifdef CONFIG_ARC_HAS_ACCL_REGS ST2 r58, r59, PT_r58 #endif /* clobbers r10, r11 registers pair */ DSP_SAVE_REGFILE_IRQ + +#ifdef CONFIG_ARC_CURR_IN_REG + GET_CURR_TASK_ON_CPU gp +#endif + .endm /*------------------------------------------------------------------------*/ .macro __RESTORE_REGFILE_SOFT - LD2 gp, fp, PT_r26 ; gp (r26), fp (r27) - - ld r12, [sp, PT_r12] + ld fp, [sp, PT_fp] ld r30, [sp, PT_r30] + ld r12, [sp, PT_r12] + ld r26, [sp, PT_r26] ; Restore SP (into AUX_USER_SP) only if returning to U mode ; - for K mode, it will be implicitly restored as stack is unwound @@ -188,10 +202,6 @@ sr r10, [AUX_USER_SP] 1: -#ifdef CONFIG_ARC_CURR_IN_REG - ld r25, [sp, PT_user_r25] -#endif - /* clobbers r10, r11 registers pair */ DSP_RESTORE_REGFILE_IRQ @@ -249,7 +259,7 @@ btst r0, STATUS_U_BIT ; Z flag set if K, used in restoring SP - ld r10, [sp, PT_event + 4] + ld r10, [sp, PT_bta] sr r10, [erbta] LD2 r10, r11, PT_ret @@ -264,8 +274,8 @@ .macro FAKE_RET_FROM_EXCPN lr r9, [status32] - bic r9, r9, STATUS_AE_MASK - or r9, r9, STATUS_IE_MASK + bclr r9, r9, STATUS_AE_BIT + bset r9, r9, STATUS_IE_BIT kflag r9 .endm diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 67ff06e15cea..a0e760eb35a8 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -140,7 +140,7 @@ * * After this it is safe to call the "C" handlers *-------------------------------------------------------------*/ -.macro EXCEPTION_PROLOGUE +.macro EXCEPTION_PROLOGUE_KEEP_AE /* Need at least 1 reg to code the early exception prologue */ PROLOG_FREEUP_REG r9, @ex_saved_reg1 @@ -151,14 +151,6 @@ /* ARC700 doesn't provide auto-stack switching */ SWITCH_TO_KERNEL_STK -#ifdef CONFIG_ARC_CURR_IN_REG - /* Treat r25 as scratch reg (save on stack) and load with "current" */ - PUSH r25 - GET_CURR_TASK_ON_CPU r25 -#else - sub sp, sp, 4 -#endif - st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */ sub sp, sp, 4 /* skip pt_regs->sp, already saved above */ @@ -178,7 +170,23 @@ PUSHAX erbta lr r10, [ecr] - st r10, [sp, PT_event] /* EV_Trap expects r10 to have ECR */ + st r10, [sp, PT_event] + +#ifdef CONFIG_ARC_CURR_IN_REG + /* gp already saved on stack: now load with "current" */ + GET_CURR_TASK_ON_CPU gp +#endif + ; OUTPUT: r10 has ECR expected by EV_Trap +.endm + +.macro EXCEPTION_PROLOGUE + + EXCEPTION_PROLOGUE_KEEP_AE ; return ECR in r10 + + lr r0, [efa] + mov r1, sp + + FAKE_RET_FROM_EXCPN ; clobbers r9 .endm /*-------------------------------------------------------------- @@ -208,11 +216,8 @@ POP gp RESTORE_R12_TO_R0 -#ifdef CONFIG_ARC_CURR_IN_REG - ld r25, [sp, 12] -#endif ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ + /* orig_r0, ECR skipped automatically */ .endm /* Dummy ECR values for Interrupts */ @@ -229,13 +234,6 @@ SWITCH_TO_KERNEL_STK -#ifdef CONFIG_ARC_CURR_IN_REG - /* Treat r25 as scratch reg (save on stack) and load with "current" */ - PUSH r25 - GET_CURR_TASK_ON_CPU r25 -#else - sub sp, sp, 4 -#endif PUSH 0x003\LVL\()abcd /* Dummy ECR */ sub sp, sp, 8 /* skip orig_r0 (not needed) @@ -255,6 +253,10 @@ PUSHAX lp_start PUSHAX bta_l\LVL\() +#ifdef CONFIG_ARC_CURR_IN_REG + /* gp already saved on stack: now load with "current" */ + GET_CURR_TASK_ON_CPU gp +#endif .endm /*-------------------------------------------------------------- @@ -282,11 +284,7 @@ POP gp RESTORE_R12_TO_R0 -#ifdef CONFIG_ARC_CURR_IN_REG - ld r25, [sp, 12] -#endif - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ + ld sp, [sp] /* restore original sp; orig_r0, ECR skipped implicitly */ .endm /* Get thread_info of "current" tsk */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index fcdd59d77f42..49c2e090cb5c 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -13,6 +13,8 @@ #include <asm/processor.h> /* For VMALLOC_START */ #include <asm/mmu.h> +#ifdef __ASSEMBLY__ + #ifdef CONFIG_ISA_ARCOMPACT #include <asm/entry-compact.h> /* ISA specific bits */ #else @@ -89,7 +91,7 @@ * Helpers to save/restore callee-saved regs: * used by several macros below *-------------------------------------------------------------*/ -.macro SAVE_R13_TO_R24 +.macro SAVE_R13_TO_R25 PUSH r13 PUSH r14 PUSH r15 @@ -102,9 +104,11 @@ PUSH r22 PUSH r23 PUSH r24 + PUSH r25 .endm -.macro RESTORE_R24_TO_R13 +.macro RESTORE_R25_TO_R13 + POP r25 POP r24 POP r23 POP r22 @@ -119,81 +123,31 @@ POP r13 .endm -/*-------------------------------------------------------------- - * Collect User Mode callee regs as struct callee_regs - needed by - * fork/do_signal/unaligned-access-emulation. - * (By default only scratch regs are saved on entry to kernel) - * - * Special handling for r25 if used for caching Task Pointer. - * It would have been saved in task->thread.user_r25 already, but to keep - * the interface same it is copied into regular r25 placeholder in - * struct callee_regs. - *-------------------------------------------------------------*/ +/* + * save user mode callee regs as struct callee_regs + * - needed by fork/do_signal/unaligned-access-emulation. + */ .macro SAVE_CALLEE_SAVED_USER + SAVE_R13_TO_R25 +.endm - mov r12, sp ; save SP as ref to pt_regs - SAVE_R13_TO_R24 - -#ifdef CONFIG_ARC_CURR_IN_REG - ; Retrieve orig r25 and save it with rest of callee_regs - ld r12, [r12, PT_user_r25] - PUSH r12 -#else - PUSH r25 -#endif - +/* + * restore user mode callee regs as struct callee_regs + * - could have been changed by ptrace tracer or unaligned-access fixup + */ +.macro RESTORE_CALLEE_SAVED_USER + RESTORE_R25_TO_R13 .endm -/*-------------------------------------------------------------- - * Save kernel Mode callee regs at the time of Contect Switch. - * - * Special handling for r25 if used for caching Task Pointer. - * Kernel simply skips saving it since it will be loaded with - * incoming task pointer anyways - *-------------------------------------------------------------*/ +/* + * save/restore kernel mode callee regs at the time of context switch + */ .macro SAVE_CALLEE_SAVED_KERNEL - - SAVE_R13_TO_R24 - -#ifdef CONFIG_ARC_CURR_IN_REG - sub sp, sp, 4 -#else - PUSH r25 -#endif + SAVE_R13_TO_R25 .endm -/*-------------------------------------------------------------- - * Opposite of SAVE_CALLEE_SAVED_KERNEL - *-------------------------------------------------------------*/ .macro RESTORE_CALLEE_SAVED_KERNEL - -#ifdef CONFIG_ARC_CURR_IN_REG - add sp, sp, 4 /* skip usual r25 placeholder */ -#else - POP r25 -#endif - RESTORE_R24_TO_R13 -.endm - -/*-------------------------------------------------------------- - * Opposite of SAVE_CALLEE_SAVED_USER - * - * ptrace tracer or unaligned-access fixup might have changed a user mode - * callee reg which is saved back to usual r25 storage location - *-------------------------------------------------------------*/ -.macro RESTORE_CALLEE_SAVED_USER - -#ifdef CONFIG_ARC_CURR_IN_REG - POP r12 -#else - POP r25 -#endif - RESTORE_R24_TO_R13 - - ; SP is back to start of pt_regs -#ifdef CONFIG_ARC_CURR_IN_REG - st r12, [sp, PT_user_r25] -#endif + RESTORE_R25_TO_R13 .endm /*-------------------------------------------------------------- @@ -229,10 +183,10 @@ #ifdef CONFIG_SMP -/*------------------------------------------------- +/* * Retrieve the current running task on this CPU - * 1. Determine curr CPU id. - * 2. Use it to index into _current_task[ ] + * - loads it from backing _current_task[] (and can't use the + * caching reg for current task */ .macro GET_CURR_TASK_ON_CPU reg GET_CPU_ID \reg @@ -254,7 +208,7 @@ add2 \tmp, @_current_task, \tmp st \tsk, [\tmp] #ifdef CONFIG_ARC_CURR_IN_REG - mov r25, \tsk + mov gp, \tsk #endif .endm @@ -269,21 +223,20 @@ .macro SET_CURR_TASK_ON_CPU tsk, tmp st \tsk, [@_current_task] #ifdef CONFIG_ARC_CURR_IN_REG - mov r25, \tsk + mov gp, \tsk #endif .endm #endif /* SMP / UNI */ -/* ------------------------------------------------------------------ +/* * Get the ptr to some field of Current Task at @off in task struct - * -Uses r25 for Current task ptr if that is enabled + * - Uses current task cached in reg if enabled */ - #ifdef CONFIG_ARC_CURR_IN_REG .macro GET_CURR_TASK_FIELD_PTR off, reg - add \reg, r25, \off + add \reg, gp, \off .endm #else @@ -295,4 +248,23 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ +#else /* !__ASSEMBLY__ */ + +extern void do_signal(struct pt_regs *); +extern void do_notify_resume(struct pt_regs *); +extern int do_privilege_fault(unsigned long, struct pt_regs *); +extern int do_extension_fault(unsigned long, struct pt_regs *); +extern int insterror_is_error(unsigned long, struct pt_regs *); +extern int do_memory_error(unsigned long, struct pt_regs *); +extern int trap_is_brkpt(unsigned long, struct pt_regs *); +extern int do_misaligned_error(unsigned long, struct pt_regs *); +extern int do_trap5_error(unsigned long, struct pt_regs *); +extern int do_misaligned_access(unsigned long, struct pt_regs *, struct callee_regs *); +extern void do_machine_check_fault(unsigned long, struct pt_regs *); +extern void do_non_swi_trap(unsigned long, struct pt_regs *); +extern void do_insterror_or_kprobe(unsigned long, struct pt_regs *); +extern void do_page_fault(unsigned long, struct pt_regs *); + +#endif + #endif /* __ASM_ARC_ENTRY_H */ diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index 0309cb405cfb..c574712ad865 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -25,5 +25,6 @@ #include <asm-generic/irq.h> extern void arc_init_IRQ(void); +extern void arch_do_IRQ(unsigned int, struct pt_regs *); #endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index ca427c30f70e..9febf5bc3de6 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -14,6 +14,8 @@ typedef struct { unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; +extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *); + #endif #include <asm/mmu-arcv2.h> diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index fb844fce1ab6..d606658e2fe7 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -22,7 +22,6 @@ * struct thread_info */ struct thread_struct { - unsigned long ksp; /* kernel mode stack pointer */ unsigned long callee_reg; /* pointer to callee regs */ unsigned long fault_address; /* dbls as brkpt holder as well */ #ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS @@ -33,9 +32,7 @@ struct thread_struct { #endif }; -#define INIT_THREAD { \ - .ksp = sizeof(init_stack) + (unsigned long) init_stack, \ -} +#define INIT_THREAD { } /* Forward declaration, a strange C thing */ struct task_struct; @@ -56,7 +53,7 @@ struct task_struct; * Where about of Task's sp, fp, blink when it was last seen in kernel mode. * Look in process.c for details of kernel stack layout */ -#define TSK_K_ESP(tsk) (tsk->thread.ksp) +#define TSK_K_ESP(tsk) (task_thread_info(tsk)->ksp) #define TSK_K_REG(tsk, off) (*((unsigned long *)(TSK_K_ESP(tsk) + \ sizeof(struct callee_regs) + off))) diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 5869a74c0db2..4a2b30fb5a98 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -12,6 +12,17 @@ #ifndef __ASSEMBLY__ +typedef union { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned long state:8, vec:8, cause:8, param:8; +#else + unsigned long param:8, cause:8, vec:8, state:8; +#endif + }; + unsigned long full; +} ecr_reg; + /* THE pt_regs: Defines how regs are saved during entry into kernel */ #ifdef CONFIG_ISA_ARCOMPACT @@ -40,23 +51,10 @@ struct pt_regs { * Last word used by Linux for extra state mgmt (syscall-restart) * For interrupts, use artificial ECR values to note current prio-level */ - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned long state:8, ecr_vec:8, - ecr_cause:8, ecr_param:8; -#else - unsigned long ecr_param:8, ecr_cause:8, - ecr_vec:8, state:8; -#endif - }; - unsigned long event; - }; - - unsigned long user_r25; + ecr_reg ecr; }; -#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25) +#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr) #else @@ -64,28 +62,14 @@ struct pt_regs { unsigned long orig_r0; - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned long state:8, ecr_vec:8, - ecr_cause:8, ecr_param:8; -#else - unsigned long ecr_param:8, ecr_cause:8, - ecr_vec:8, state:8; -#endif - }; - unsigned long event; - }; - - unsigned long bta; /* bta_l1, bta_l2, erbta */ + ecr_reg ecr; /* Exception Cause Reg */ - unsigned long user_r25; + unsigned long bta; /* erbta */ - unsigned long r26; /* gp */ unsigned long fp; - unsigned long sp; /* user/kernel sp depending on where we came from */ - - unsigned long r12, r30; + unsigned long r30; + unsigned long r12; + unsigned long r26; /* gp */ #ifdef CONFIG_ARC_HAS_ACCL_REGS unsigned long r58, r59; /* ACCL/ACCH used by FPU / DSP MPY */ @@ -94,6 +78,8 @@ struct pt_regs { unsigned long DSP_CTRL; #endif + unsigned long sp; /* user/kernel sp depending on entry */ + /*------- Below list auto saved by h/w -----------*/ unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; @@ -134,13 +120,13 @@ struct callee_regs { /* return 1 if PC in delay slot */ #define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK) -#define in_syscall(regs) ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param) -#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param) +#define in_syscall(regs) ((regs->ecr.vec == ECR_V_TRAP) && !regs->ecr.param) +#define in_brkpt_trap(regs) ((regs->ecr.vec == ECR_V_TRAP) && regs->ecr.param) #define STATE_SCALL_RESTARTED 0x01 -#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED) -#define syscall_restartable(reg) !(reg->state & STATE_SCALL_RESTARTED) +#define syscall_wont_restart(regs) (regs->ecr.state |= STATE_SCALL_RESTARTED) +#define syscall_restartable(regs) !(regs->ecr.state & STATE_SCALL_RESTARTED) #define current_pt_regs() \ ({ \ @@ -181,6 +167,9 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, return *(unsigned long *)((unsigned long)regs + offset); } +extern int syscall_trace_entry(struct pt_regs *); +extern void syscall_trace_exit(struct pt_regs *); + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PTRACE_H */ diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 028a8cf76206..1c6db599e1fc 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -35,11 +35,11 @@ long __init arc_get_mem_sz(void); #define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2)) extern void arc_mmu_init(void); -extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); -extern void read_decode_mmu_bcr(void); +extern int arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); extern void arc_cache_init(void); -extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); -extern void read_decode_cache_bcr(void); +extern int arc_cache_mumbojumbo(int cpu_id, char *buf, int len); + +extern void __init handle_uboot_args(void); #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index d856491606ac..e0913f52c2cd 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -29,6 +29,8 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void __init smp_init_cpus(void); extern void first_lines_of_secondary(void); extern const char *arc_platform_smp_cpuinfo(void); +extern void arc_platform_smp_wait_to_boot(int); +extern void start_kernel_secondary(void); /* * API expected BY platform smp code (FROM arch smp code) diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 6ba7fe417095..4c530cf131f3 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -37,16 +37,16 @@ */ struct thread_info { unsigned long flags; /* low level flags */ + unsigned long ksp; /* kernel mode stack top in __switch_to */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct task_struct *task; /* main task structure */ - __u32 cpu; /* current CPU */ + int cpu; /* current CPU */ unsigned long thr_ptr; /* TLS ptr */ + struct task_struct *task; /* main task structure */ }; /* - * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. + * initilaize thread_info for any @tsk + * - this is not related to init_task per se */ #define INIT_THREAD_INFO(tsk) \ { \ diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 99712471c96a..1e8809ea000a 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -146,8 +146,9 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) if (n == 0) return 0; - /* unaligned */ - if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) { + /* fallback for unaligned access when hardware doesn't support */ + if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) && + (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) { unsigned char tmp; @@ -373,8 +374,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) if (n == 0) return 0; - /* unaligned */ - if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) { + /* fallback for unaligned access when hardware doesn't support */ + if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) && + (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) { unsigned char tmp; @@ -584,7 +586,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) return res; } -static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) +static inline unsigned long __clear_user(void __user *to, unsigned long n) { long res = n; unsigned char *d_char = to; @@ -626,17 +628,10 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) return res; } -#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE - #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -#define __clear_user(d, n) __arc_clear_user(d, n) -#else -extern unsigned long arc_clear_user_noinline(void __user *to, - unsigned long n); -#define __clear_user(d, n) arc_clear_user_noinline(d, n) -#endif +#define __clear_user __clear_user #include <asm-generic/uaccess.h> diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 0723d888ac44..95fbf9364c67 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -5,6 +5,8 @@ obj-y := head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o +obj-y += ctx_sw_asm.o + obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o @@ -24,11 +26,4 @@ ifdef CONFIG_ISA_ARCOMPACT CFLAGS_fpu.o += -mdpfp endif -ifdef CONFIG_ARC_DW2_UNWIND -CFLAGS_ctx_sw.o += -fno-omit-frame-pointer -obj-y += ctx_sw.o -else -obj-y += ctx_sw_asm.o -endif - extra-y := vmlinux.lds diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c index 0e884036ab74..f77deb799175 100644 --- a/arch/arc/kernel/asm-offsets.c +++ b/arch/arc/kernel/asm-offsets.c @@ -20,13 +20,13 @@ int main(void) BLANK(); - DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp)); DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg)); DEFINE(THREAD_FAULT_ADDR, offsetof(struct thread_struct, fault_address)); BLANK(); + DEFINE(THREAD_INFO_KSP, offsetof(struct thread_info, ksp)); DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags)); DEFINE(THREAD_INFO_PREEMPT_COUNT, offsetof(struct thread_info, preempt_count)); @@ -46,7 +46,8 @@ int main(void) BLANK(); DEFINE(PT_status32, offsetof(struct pt_regs, status32)); - DEFINE(PT_event, offsetof(struct pt_regs, event)); + DEFINE(PT_event, offsetof(struct pt_regs, ecr)); + DEFINE(PT_bta, offsetof(struct pt_regs, bta)); DEFINE(PT_sp, offsetof(struct pt_regs, sp)); DEFINE(PT_r0, offsetof(struct pt_regs, r0)); DEFINE(PT_r1, offsetof(struct pt_regs, r1)); @@ -61,13 +62,9 @@ int main(void) DEFINE(PT_r26, offsetof(struct pt_regs, r26)); DEFINE(PT_ret, offsetof(struct pt_regs, ret)); DEFINE(PT_blink, offsetof(struct pt_regs, blink)); + OFFSET(PT_fp, pt_regs, fp); DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end)); DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count)); - DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25)); - - DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); - DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); - #ifdef CONFIG_ISA_ARCV2 OFFSET(PT_r12, pt_regs, r12); OFFSET(PT_r30, pt_regs, r30); @@ -80,5 +77,8 @@ int main(void) OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL); #endif + DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); + DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); + return 0; } diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c deleted file mode 100644 index 1a76f2d6f694..000000000000 --- a/arch/arc/kernel/ctx_sw.c +++ /dev/null @@ -1,112 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * Vineetg: Aug 2009 - * -"C" version of lowest level context switch asm macro called by schedular - * gcc doesn't generate the dward CFI info for hand written asm, hence can't - * backtrace out of it (e.g. tasks sleeping in kernel). - * So we cheat a bit by writing almost similar code in inline-asm. - * -This is a hacky way of doing things, but there is no other simple way. - * I don't want/intend to extend unwinding code to understand raw asm - */ - -#include <asm/asm-offsets.h> -#include <linux/sched.h> -#include <linux/sched/debug.h> - -#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4) - -struct task_struct *__sched -__switch_to(struct task_struct *prev_task, struct task_struct *next_task) -{ - unsigned int tmp; - unsigned int prev = (unsigned int)prev_task; - unsigned int next = (unsigned int)next_task; - - __asm__ __volatile__( - /* FP/BLINK save generated by gcc (standard function prologue */ - "st.a r13, [sp, -4] \n\t" - "st.a r14, [sp, -4] \n\t" - "st.a r15, [sp, -4] \n\t" - "st.a r16, [sp, -4] \n\t" - "st.a r17, [sp, -4] \n\t" - "st.a r18, [sp, -4] \n\t" - "st.a r19, [sp, -4] \n\t" - "st.a r20, [sp, -4] \n\t" - "st.a r21, [sp, -4] \n\t" - "st.a r22, [sp, -4] \n\t" - "st.a r23, [sp, -4] \n\t" - "st.a r24, [sp, -4] \n\t" -#ifndef CONFIG_ARC_CURR_IN_REG - "st.a r25, [sp, -4] \n\t" -#else - "sub sp, sp, 4 \n\t" /* usual r25 placeholder */ -#endif - - /* set ksp of outgoing task in tsk->thread.ksp */ -#if KSP_WORD_OFF <= 255 - "st.as sp, [%3, %1] \n\t" -#else - /* - * Workaround for NR_CPUS=4k - * %1 is bigger than 255 (S9 offset for st.as) - */ - "add2 r24, %3, %1 \n\t" - "st sp, [r24] \n\t" -#endif - - /* - * setup _current_task with incoming tsk. - * optionally, set r25 to that as well - * For SMP extra work to get to &_current_task[cpu] - * (open coded SET_CURR_TASK_ON_CPU) - */ -#ifndef CONFIG_SMP - "st %2, [@_current_task] \n\t" -#else - "lr r24, [identity] \n\t" - "lsr r24, r24, 8 \n\t" - "bmsk r24, r24, 7 \n\t" - "add2 r24, @_current_task, r24 \n\t" - "st %2, [r24] \n\t" -#endif -#ifdef CONFIG_ARC_CURR_IN_REG - "mov r25, %2 \n\t" -#endif - - /* get ksp of incoming task from tsk->thread.ksp */ - "ld.as sp, [%2, %1] \n\t" - - /* start loading it's CALLEE reg file */ - -#ifndef CONFIG_ARC_CURR_IN_REG - "ld.ab r25, [sp, 4] \n\t" -#else - "add sp, sp, 4 \n\t" -#endif - "ld.ab r24, [sp, 4] \n\t" - "ld.ab r23, [sp, 4] \n\t" - "ld.ab r22, [sp, 4] \n\t" - "ld.ab r21, [sp, 4] \n\t" - "ld.ab r20, [sp, 4] \n\t" - "ld.ab r19, [sp, 4] \n\t" - "ld.ab r18, [sp, 4] \n\t" - "ld.ab r17, [sp, 4] \n\t" - "ld.ab r16, [sp, 4] \n\t" - "ld.ab r15, [sp, 4] \n\t" - "ld.ab r14, [sp, 4] \n\t" - "ld.ab r13, [sp, 4] \n\t" - - /* last (ret value) = prev : although for ARC it mov r0, r0 */ - "mov %0, %3 \n\t" - - /* FP/BLINK restore generated by gcc (standard func epilogue */ - - : "=r"(tmp) - : "n"(KSP_WORD_OFF), "r"(next), "r"(prev) - : "blink" - ); - - return (struct task_struct *)tmp; -} diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index 02c461484761..48e1f21976ed 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -11,50 +11,54 @@ #include <asm/entry.h> /* For the SAVE_* macros */ #include <asm/asm-offsets.h> -#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4) - -;################### Low Level Context Switch ########################## +; IN +; - r0: prev task (also current) +; - r1: next task +; OUT +; - r0: prev task (so r0 not touched) .section .sched.text,"ax",@progbits - .align 4 - .global __switch_to - .type __switch_to, @function -__switch_to: - CFI_STARTPROC - - /* Save regs on kernel mode stack of task */ - st.a blink, [sp, -4] - st.a fp, [sp, -4] - SAVE_CALLEE_SAVED_KERNEL +ENTRY_CFI(__switch_to) - /* Save the now KSP in task->thread.ksp */ -#if KSP_WORD_OFF <= 255 - st.as sp, [r0, KSP_WORD_OFF] -#else - /* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */ - add2 r24, r0, KSP_WORD_OFF - st sp, [r24] -#endif - /* - * Return last task in r0 (return reg) - * On ARC, Return reg = First Arg reg = r0. - * Since we already have last task in r0, - * don't need to do anything special to return it - */ + /* save kernel stack frame regs of @prev task */ + push blink + CFI_DEF_CFA_OFFSET 4 + CFI_OFFSET r31, -4 + + push fp + CFI_DEF_CFA_OFFSET 8 + CFI_OFFSET r27, -8 + + mov fp, sp + CFI_DEF_CFA_REGISTER r27 + + /* kernel mode callee regs of @prev */ + SAVE_CALLEE_SAVED_KERNEL /* - * switch to new task, contained in r1 - * Temp reg r3 is required to get the ptr to store val + * save final SP to @prev->thread_info.ksp + * @prev is "current" so thread_info derived from SP */ - SET_CURR_TASK_ON_CPU r1, r3 + GET_CURR_THR_INFO_FROM_SP r10 + st sp, [r10, THREAD_INFO_KSP] + + /* update @next in _current_task[] and GP register caching it */ + SET_CURR_TASK_ON_CPU r1, r10 - /* reload SP with kernel mode stack pointer in task->thread.ksp */ - ld.as sp, [r1, (TASK_THREAD + THREAD_KSP)/4] + /* load SP from @next->thread_info.ksp */ + ld r10, [r1, TASK_THREAD_INFO] + ld sp, [r10, THREAD_INFO_KSP] - /* restore the registers */ + /* restore callee regs, stack frame regs of @next */ RESTORE_CALLEE_SAVED_KERNEL - ld.ab fp, [sp, 4] - ld.ab blink, [sp, 4] - j [blink] + pop fp + CFI_RESTORE r27 + CFI_DEF_CFA r28, 4 + + pop blink + CFI_RESTORE r31 + CFI_DEF_CFA_OFFSET 0 + + j [blink] END_CFI(__switch_to) diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c index 721d465f1580..4c9e61457b2f 100644 --- a/arch/arc/kernel/devtree.c +++ b/arch/arc/kernel/devtree.c @@ -12,6 +12,7 @@ #include <linux/of.h> #include <linux/of_fdt.h> #include <asm/mach_desc.h> +#include <asm/serial.h> #ifdef CONFIG_SERIAL_EARLYCON diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index a7e6a2174187..2e49c81c8086 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -125,11 +125,6 @@ ENTRY(mem_service) EXCEPTION_PROLOGUE - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - bl do_memory_error b ret_from_exception END(mem_service) @@ -138,11 +133,6 @@ ENTRY(EV_Misaligned) EXCEPTION_PROLOGUE - lr r0, [efa] ; Faulting Data address - mov r1, sp - - FAKE_RET_FROM_EXCPN - SAVE_CALLEE_SAVED_USER mov r2, sp ; callee_regs @@ -163,11 +153,6 @@ ENTRY(EV_TLBProtV) EXCEPTION_PROLOGUE - lr r0, [efa] ; Faulting Data address - mov r1, sp ; pt_regs - - FAKE_RET_FROM_EXCPN - mov blink, ret_from_exception b do_page_fault diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 5cb0cd7e4eab..774c03cc1d1a 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -254,18 +254,7 @@ END(handle_interrupt_level1) ENTRY(EV_TLBProtV) - EXCEPTION_PROLOGUE - - mov r2, r10 ; ECR set into r10 already - lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above) - - ; Exception auto-disables further Intr/exceptions. - ; Re-enable them by pretending to return from exception - ; (so rest of handler executes in pure K mode) - - FAKE_RET_FROM_EXCPN - - mov r1, sp ; Handle to pt_regs + EXCEPTION_PROLOGUE ; ECR returned in r10 ;------ (5) Type of Protection Violation? ---------- ; @@ -273,8 +262,7 @@ ENTRY(EV_TLBProtV) ; -Access Violation : 00_23_(00|01|02|03)_00 ; x r w r+w ; -Unaligned Access : 00_23_04_00 - ; - bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f + bbit1 r10, ECR_C_BIT_PROTV_MISALIG_DATA, 4f ;========= (6a) Access Violation Processing ======== bl do_page_fault @@ -303,9 +291,6 @@ END(EV_TLBProtV) ENTRY(call_do_page_fault) EXCEPTION_PROLOGUE - lr r0, [efa] ; Faulting Data address - mov r1, sp - FAKE_RET_FROM_EXCPN mov blink, ret_from_exception b do_page_fault diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 54e91df678dd..089f6680518f 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -80,11 +80,6 @@ ENTRY(instr_service) EXCEPTION_PROLOGUE - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - bl do_insterror_or_kprobe b ret_from_exception END(instr_service) @@ -95,16 +90,15 @@ END(instr_service) ENTRY(EV_MachineCheck) - EXCEPTION_PROLOGUE + EXCEPTION_PROLOGUE_KEEP_AE ; ECR returned in r10 - lr r2, [ecr] lr r0, [efa] mov r1, sp ; MC excpetions disable MMU ARC_MMU_REENABLE r3 - lsr r3, r2, 8 + lsr r3, r10, 8 bmsk r3, r3, 7 brne r3, ECR_C_MCHK_DUP_TLB, 1f @@ -129,11 +123,6 @@ ENTRY(EV_PrivilegeV) EXCEPTION_PROLOGUE - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - bl do_privilege_fault b ret_from_exception END(EV_PrivilegeV) @@ -145,11 +134,6 @@ ENTRY(EV_Extension) EXCEPTION_PROLOGUE - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - bl do_extension_fault b ret_from_exception END(EV_Extension) @@ -160,20 +144,19 @@ END(EV_Extension) ; syscall Tracing ; --------------------------------------------- tracesys: - ; save EFA in case tracer wants the PC of traced task - ; using ERET won't work since next-PC has already committed + ; safekeep EFA (r12) if syscall tracer wanted PC + ; for traps, ERET is pre-commit so points to next-PC GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address - ; PRE Sys Call Ptrace hook - mov r0, sp ; pt_regs needed - bl @syscall_trace_entry + ; PRE syscall trace hook + mov r0, sp ; pt_regs + bl @syscall_trace_enter ; Tracing code now returns the syscall num (orig or modif) mov r8, r0 ; Do the Sys Call as we normally would. - ; Validate the Sys Call number cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi tracesys_exit @@ -190,37 +173,36 @@ tracesys: ld r6, [sp, PT_r6] ld r7, [sp, PT_r7] ld.as r9, [sys_call_table, r8] - jl [r9] ; Entry into Sys Call Handler + jl [r9] tracesys_exit: - st r0, [sp, PT_r0] ; sys call return value in pt_regs + st r0, [sp, PT_r0] - ;POST Sys Call Ptrace Hook + ; POST syscall trace hook mov r0, sp ; pt_regs needed bl @syscall_trace_exit - b ret_from_exception ; NOT ret_from_system_call at is saves r0 which - ; we'd done before calling post hook above + + ; don't call ret_from_system_call as it saves r0, already done above + b ret_from_exception ; --------------------------------------------- ; Breakpoint TRAP ; --------------------------------------------- trap_with_param: mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc - mov r1, sp + mov r1, sp ; pt_regs - ; Save callee regs in case gdb wants to have a look - ; SP will grow up by size of CALLEE Reg-File - ; NOTE: clobbers r12 + ; save callee regs in case tracer/gdb wants to peek SAVE_CALLEE_SAVED_USER - ; save location of saved Callee Regs @ thread_struct->pc + ; safekeep ref to callee regs GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10 st sp, [r10, THREAD_CALLEE_REG] - ; Call the trap handler + ; call the non syscall trap handler bl do_non_swi_trap - ; unwind stack to discard Callee saved Regs + ; unwind stack to discard callee regs DISCARD_CALLEE_SAVED_USER b ret_from_exception @@ -232,37 +214,33 @@ trap_with_param: ENTRY(EV_Trap) - EXCEPTION_PROLOGUE + EXCEPTION_PROLOGUE_KEEP_AE lr r12, [efa] FAKE_RET_FROM_EXCPN - ;============ TRAP 1 :breakpoints - ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR) + ;============ TRAP N : breakpoints, kprobes etc bmsk.f 0, r10, 7 bnz trap_with_param - ;============ TRAP (no param): syscall top level + ;============ TRAP 0 (no param): syscall - ; If syscall tracing ongoing, invoke pre-post-hooks + ; syscall tracing ongoing, invoke pre-post-hooks around syscall GET_CURR_THR_INFO_FLAGS r10 and.f 0, r10, _TIF_SYSCALL_WORK bnz tracesys ; this never comes back ;============ Normal syscall case - ; syscall num shd not exceed the total system calls avail cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi .Lret_from_system_call - ; Offset into the syscall_table and call handler ld.as r9,[sys_call_table, r8] - jl [r9] ; Entry into Sys Call Handler + jl [r9] .Lret_from_system_call: - st r0, [sp, PT_r0] ; sys call return value in pt_regs ; fall through to ret_from_exception @@ -318,7 +296,7 @@ resume_user_mode_begin: ; tracer might call PEEKUSR(CALLEE reg) ; ; NOTE: SP will grow up by size of CALLEE Reg-File - SAVE_CALLEE_SAVED_USER ; clobbers r12 + SAVE_CALLEE_SAVED_USER ; save location of saved Callee Regs @ thread_struct->callee GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10 diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 5cda19d0aa91..678898757e47 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -108,7 +108,7 @@ static void arcv2_irq_unmask(struct irq_data *data) write_aux_reg(AUX_IRQ_ENABLE, 1); } -void arcv2_irq_enable(struct irq_data *data) +static void arcv2_irq_enable(struct irq_data *data) { /* set default priority */ write_aux_reg(AUX_IRQ_SELECT, data->hwirq); diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 345a0000554c..4f2b5951454f 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -175,7 +175,7 @@ void kgdb_trap(struct pt_regs *regs) * with trap_s 4 (compiled) breakpoints, continuation needs to * start after the breakpoint. */ - if (regs->ecr_param == 3) + if (regs->ecr.param == 3) instruction_pointer(regs) -= BREAK_INSTR_SIZE; kgdb_handle_exception(1, SIGTRAP, 0, regs); diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index f9fdb557c263..55373ca0d28b 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -165,8 +165,6 @@ static void mcip_probe_n_setup(void) IS_AVAIL1(mp.idu, "IDU "), IS_AVAIL1(mp.dbg, "DEBUG "), IS_AVAIL1(mp.gfrc, "GFRC")); - - cpuinfo_arc700[0].extn.gfrc = mp.gfrc; } struct plat_smp_ops plat_smp_ops = { diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 980b71da2f61..186ceab661eb 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -141,7 +141,7 @@ asmlinkage void ret_from_fork(void); * | unused | * | | * ------------------ - * | r25 | <==== top of Stack (thread.ksp) + * | r25 | <==== top of Stack (thread_info.ksp) * ~ ~ * | --to-- | (CALLEE Regs of kernel mode) * | r13 | @@ -162,7 +162,6 @@ asmlinkage void ret_from_fork(void); * | SP | * | orig_r0 | * | event/ECR | - * | user_r25 | * ------------------ <===== END of PAGE */ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) @@ -182,14 +181,14 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) c_callee = ((struct callee_regs *)childksp) - 1; /* - * __switch_to() uses thread.ksp to start unwinding stack + * __switch_to() uses thread_info.ksp to start unwinding stack * For kernel threads we don't need to create callee regs, the * stack layout nevertheless needs to remain the same. * Also, since __switch_to anyways unwinds callee regs, we use * this to populate kernel thread entry-pt/args into callee regs, * so that ret_from_kernel_thread() becomes simpler. */ - p->thread.ksp = (unsigned long)c_callee; /* THREAD_KSP */ + task_thread_info(p)->ksp = (unsigned long)c_callee; /* THREAD_INFO_KSP */ /* __switch_to expects FP(0), BLINK(return addr) at top */ childksp[0] = 0; /* fp */ @@ -243,16 +242,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ c_callee->r25 = task_thread_info(p)->thr_ptr; -#ifdef CONFIG_ARC_CURR_IN_REG - /* - * setup usermode thread pointer #2: - * however for this special use of r25 in kernel, __switch_to() sets - * r25 for kernel needs and only in the final return path is usermode - * r25 setup, from pt_regs->user_r25. So set that up as well - */ - c_regs->user_r25 = c_callee->r25; -#endif - return 0; } diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 2abdcd9b09e8..e0c233c178b1 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -46,8 +46,7 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_NAME(r0), REG_OFFSET_NAME(sp), REG_OFFSET_NAME(orig_r0), - REG_OFFSET_NAME(event), - REG_OFFSET_NAME(user_r25), + REG_OFFSET_NAME(ecr), REG_OFFSET_END, }; @@ -55,9 +54,8 @@ static const struct pt_regs_offset regoffset_table[] = { static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_NAME(orig_r0), - REG_OFFSET_NAME(event), + REG_OFFSET_NAME(ecr), REG_OFFSET_NAME(bta), - REG_OFFSET_NAME(user_r25), REG_OFFSET_NAME(r26), REG_OFFSET_NAME(fp), REG_OFFSET_NAME(sp), @@ -341,7 +339,7 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -asmlinkage int syscall_trace_entry(struct pt_regs *regs) +asmlinkage int syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) if (ptrace_report_syscall_entry(regs)) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 41f07b3e594e..4dcf8589b708 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -29,6 +29,7 @@ #include <asm/mach_desc.h> #include <asm/smp.h> #include <asm/dsp-impl.h> +#include <soc/arc/mcip.h> #define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x)) @@ -43,19 +44,22 @@ const struct machine_desc *machine_desc; struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ -struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; +struct cpuinfo_arc { + int arcver; + unsigned int t0:1, t1:1; + struct { + unsigned long base; + unsigned int sz; + } iccm, dccm; +}; + +#ifdef CONFIG_ISA_ARCV2 -static const struct id_to_str arc_legacy_rel[] = { +static const struct id_to_str arc_hs_rel[] = { /* ID.ARCVER, Release */ -#ifdef CONFIG_ISA_ARCOMPACT - { 0x34, "R4.10"}, - { 0x35, "R4.11"}, -#else { 0x51, "R2.0" }, { 0x52, "R2.1" }, { 0x53, "R3.0" }, -#endif - { 0x00, NULL } }; static const struct id_to_str arc_hs_ver54_rel[] = { @@ -66,323 +70,296 @@ static const struct id_to_str arc_hs_ver54_rel[] = { { 3, "R4.00a"}, { 0xFF, NULL } }; +#endif -static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) +static int +arcompact_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) { - if (is_isa_arcompact()) { - struct bcr_iccm_arcompact iccm; - struct bcr_dccm_arcompact dccm; + int n = 0; +#ifdef CONFIG_ISA_ARCOMPACT + char *cpu_nm, *isa_nm = "ARCompact"; + struct bcr_fp_arcompact fpu_sp, fpu_dp; + int atomic = 0, be, present; + int bpu_full, bpu_cache, bpu_pred; + struct bcr_bpu_arcompact bpu; + struct bcr_iccm_arcompact iccm; + struct bcr_dccm_arcompact dccm; + struct bcr_generic isa; - READ_BCR(ARC_REG_ICCM_BUILD, iccm); - if (iccm.ver) { - cpu->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */ - cpu->iccm.base_addr = iccm.base << 16; - } + READ_BCR(ARC_REG_ISA_CFG_BCR, isa); - READ_BCR(ARC_REG_DCCM_BUILD, dccm); - if (dccm.ver) { - unsigned long base; - cpu->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */ + if (!isa.ver) /* ISA BCR absent, use Kconfig info */ + atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); + else { + /* ARC700_BUILD only has 2 bits of isa info */ + atomic = isa.info & 1; + } - base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD); - cpu->dccm.base_addr = base & ~0xF; - } - } else { - struct bcr_iccm_arcv2 iccm; - struct bcr_dccm_arcv2 dccm; - unsigned long region; - - READ_BCR(ARC_REG_ICCM_BUILD, iccm); - if (iccm.ver) { - cpu->iccm.sz = 256 << iccm.sz00; /* 512B to 16M */ - if (iccm.sz00 == 0xF && iccm.sz01 > 0) - cpu->iccm.sz <<= iccm.sz01; - - region = read_aux_reg(ARC_REG_AUX_ICCM); - cpu->iccm.base_addr = region & 0xF0000000; - } + be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - READ_BCR(ARC_REG_DCCM_BUILD, dccm); - if (dccm.ver) { - cpu->dccm.sz = 256 << dccm.sz0; - if (dccm.sz0 == 0xF && dccm.sz1 > 0) - cpu->dccm.sz <<= dccm.sz1; + if (info->arcver < 0x34) + cpu_nm = "ARC750"; + else + cpu_nm = "ARC770"; - region = read_aux_reg(ARC_REG_AUX_DCCM); - cpu->dccm.base_addr = region & 0xF0000000; - } - } -} + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s%s%s\n", + c, cpu_nm, isa_nm, + IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL1(be, "[Big-Endian]")); -static void decode_arc_core(struct cpuinfo_arc *cpu) -{ - struct bcr_uarch_build_arcv2 uarch; - const struct id_to_str *tbl; - - if (cpu->core.family < 0x54) { /* includes arc700 */ + READ_BCR(ARC_REG_FP_BCR, fpu_sp); + READ_BCR(ARC_REG_DPFP_BCR, fpu_dp); - for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) { - if (cpu->core.family == tbl->id) { - cpu->release = tbl->str; - break; - } - } + if (fpu_sp.ver | fpu_dp.ver) + n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", + IS_AVAIL1(fpu_sp.ver, "SP "), + IS_AVAIL1(fpu_dp.ver, "DP ")); - if (is_isa_arcompact()) - cpu->name = "ARC700"; - else if (tbl->str) - cpu->name = "HS38"; - else - cpu->name = cpu->release = "Unknown"; + READ_BCR(ARC_REG_BPU_BCR, bpu); + bpu_full = bpu.fam ? 1 : 0; + bpu_cache = 256 << (bpu.ent - 1); + bpu_pred = 256 << (bpu.ent - 1); - return; + n += scnprintf(buf + n, len - n, + "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n", + IS_AVAIL1(bpu_full, "full"), + IS_AVAIL1(!bpu_full, "partial"), + bpu_cache, bpu_pred); + + READ_BCR(ARC_REG_ICCM_BUILD, iccm); + if (iccm.ver) { + info->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */ + info->iccm.base = iccm.base << 16; } - /* - * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until - * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent - * releases only update it. - */ - READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch); - - if (uarch.prod == 4) { - cpu->name = "HS48"; - cpu->extn.dual = 1; + READ_BCR(ARC_REG_DCCM_BUILD, dccm); + if (dccm.ver) { + unsigned long base; + info->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */ - } else { - cpu->name = "HS38"; + base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD); + info->dccm.base = base & ~0xF; } - for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) { - if (uarch.maj == tbl->id) { - cpu->release = tbl->str; - break; - } - } + /* ARCompact ISA specific sanity checks */ + present = fpu_dp.ver; /* SP has no arch visible regs */ + CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present); +#endif + return n; + } -static void read_arc_build_cfg_regs(void) +static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) { - struct bcr_timer timer; - struct bcr_generic bcr; - struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + int n = 0; +#ifdef CONFIG_ISA_ARCV2 + const char *release, *cpu_nm, *isa_nm = "ARCv2"; + int dual_issue = 0, dual_enb = 0, mpy_opt, present; + int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk; + char mpy_nm[16], lpb_nm[32]; struct bcr_isa_arcv2 isa; - struct bcr_actionpoint ap; - - FIX_PTR(cpu); + struct bcr_mpy mpy; + struct bcr_fp_arcv2 fpu; + struct bcr_bpu_arcv2 bpu; + struct bcr_lpb lpb; + struct bcr_iccm_arcv2 iccm; + struct bcr_dccm_arcv2 dccm; + struct bcr_erp erp; - READ_BCR(AUX_IDENTITY, cpu->core); - decode_arc_core(cpu); - - READ_BCR(ARC_REG_TIMERS_BCR, timer); - cpu->extn.timer0 = timer.t0; - cpu->extn.timer1 = timer.t1; - cpu->extn.rtc = timer.rtc; - - cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); + /* + * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until + * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent + * releases only update it. + */ - READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); + cpu_nm = "HS38"; - /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */ - read_decode_ccm_bcr(cpu); + if (info->arcver > 0x50 && info->arcver <= 0x53) { + release = arc_hs_rel[info->arcver - 0x51].str; + } else { + const struct id_to_str *tbl; + struct bcr_uarch_build uarch; - read_decode_mmu_bcr(); - read_decode_cache_bcr(); + READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch); - if (is_isa_arcompact()) { - struct bcr_fp_arcompact sp, dp; - struct bcr_bpu_arcompact bpu; - - READ_BCR(ARC_REG_FP_BCR, sp); - READ_BCR(ARC_REG_DPFP_BCR, dp); - cpu->extn.fpu_sp = sp.ver ? 1 : 0; - cpu->extn.fpu_dp = dp.ver ? 1 : 0; - - READ_BCR(ARC_REG_BPU_BCR, bpu); - cpu->bpu.ver = bpu.ver; - cpu->bpu.full = bpu.fam ? 1 : 0; - if (bpu.ent) { - cpu->bpu.num_cache = 256 << (bpu.ent - 1); - cpu->bpu.num_pred = 256 << (bpu.ent - 1); + for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) { + if (uarch.maj == tbl->id) { + release = tbl->str; + break; + } } - } else { - struct bcr_fp_arcv2 spdp; - struct bcr_bpu_arcv2 bpu; - - READ_BCR(ARC_REG_FP_V2_BCR, spdp); - cpu->extn.fpu_sp = spdp.sp ? 1 : 0; - cpu->extn.fpu_dp = spdp.dp ? 1 : 0; - - READ_BCR(ARC_REG_BPU_BCR, bpu); - cpu->bpu.ver = bpu.ver; - cpu->bpu.full = bpu.ft; - cpu->bpu.num_cache = 256 << bpu.bce; - cpu->bpu.num_pred = 2048 << bpu.pte; - cpu->bpu.ret_stk = 4 << bpu.rse; - - /* if dual issue hardware, is it enabled ? */ - if (cpu->extn.dual) { + if (uarch.prod == 4) { unsigned int exec_ctrl; + cpu_nm = "HS48"; + dual_issue = 1; + /* if dual issue hardware, is it enabled ? */ READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_enb = !(exec_ctrl & 1); + dual_enb = !(exec_ctrl & 1); } } - READ_BCR(ARC_REG_AP_BCR, ap); - if (ap.ver) { - cpu->extn.ap_num = 2 << ap.num; - cpu->extn.ap_full = !ap.min; - } - - READ_BCR(ARC_REG_SMART_BCR, bcr); - cpu->extn.smart = bcr.ver ? 1 : 0; - - READ_BCR(ARC_REG_RTT_BCR, bcr); - cpu->extn.rtt = bcr.ver ? 1 : 0; - READ_BCR(ARC_REG_ISA_CFG_BCR, isa); - /* some hacks for lack of feature BCR info in old ARC700 cores */ - if (is_isa_arcompact()) { - if (!isa.ver) /* ISA BCR absent, use Kconfig info */ - cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); - else { - /* ARC700_BUILD only has 2 bits of isa info */ - struct bcr_generic bcr = *(struct bcr_generic *)&isa; - cpu->isa.atomic = bcr.info & 1; - } - - cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n", + c, cpu_nm, release, isa_nm, + IS_AVAIL1(isa.be, "[Big-Endian]"), + IS_AVAIL3(dual_issue, dual_enb, " Dual-Issue ")); + + READ_BCR(ARC_REG_MPY_BCR, mpy); + mpy_opt = 2; /* stock MPY/MPYH */ + if (mpy.dsp) /* OPT 7-9 */ + mpy_opt = mpy.dsp + 6; + + scnprintf(mpy_nm, 16, "mpy[opt %d] ", mpy_opt); + + READ_BCR(ARC_REG_FP_V2_BCR, fpu); + + n += scnprintf(buf + n, len - n, "ISA Extn\t: %s%s%s%s%s%s%s%s%s%s%s\n", + IS_AVAIL2(isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL2(isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), + IS_AVAIL2(isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS), + IS_AVAIL1(mpy.ver, mpy_nm), + IS_AVAIL1(isa.div_rem, "div_rem "), + IS_AVAIL1((fpu.sp | fpu.dp), " FPU:"), + IS_AVAIL1(fpu.sp, " sp"), + IS_AVAIL1(fpu.dp, " dp")); + + READ_BCR(ARC_REG_BPU_BCR, bpu); + bpu_full = bpu.ft; + bpu_cache = 256 << bpu.bce; + bpu_pred = 2048 << bpu.pte; + bpu_ret_stk = 4 << bpu.rse; + + READ_BCR(ARC_REG_LPB_BUILD, lpb); + if (lpb.ver) { + unsigned int ctl; + ctl = read_aux_reg(ARC_REG_LPB_CTRL); + + scnprintf(lpb_nm, sizeof(lpb_nm), " Loop Buffer:%d %s", + lpb.entries, IS_DISABLED_RUN(!ctl)); + } - /* there's no direct way to distinguish 750 vs. 770 */ - if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3)) - cpu->name = "ARC750"; - } else { - cpu->isa = isa; + n += scnprintf(buf + n, len - n, + "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d%s\n", + IS_AVAIL1(bpu_full, "full"), + IS_AVAIL1(!bpu_full, "partial"), + bpu_cache, bpu_pred, bpu_ret_stk, + lpb_nm); + + READ_BCR(ARC_REG_ICCM_BUILD, iccm); + if (iccm.ver) { + unsigned long base; + info->iccm.sz = 256 << iccm.sz00; /* 512B to 16M */ + if (iccm.sz00 == 0xF && iccm.sz01 > 0) + info->iccm.sz <<= iccm.sz01; + base = read_aux_reg(ARC_REG_AUX_ICCM); + info->iccm.base = base & 0xF0000000; } -} -static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) -{ - struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; - struct bcr_identity *core = &cpu->core; - char mpy_opt[16]; - int n = 0; + READ_BCR(ARC_REG_DCCM_BUILD, dccm); + if (dccm.ver) { + unsigned long base; + info->dccm.sz = 256 << dccm.sz0; + if (dccm.sz0 == 0xF && dccm.sz1 > 0) + info->dccm.sz <<= dccm.sz1; + base = read_aux_reg(ARC_REG_AUX_DCCM); + info->dccm.base = base & 0xF0000000; + } - FIX_PTR(cpu); + /* Error Protection: ECC/Parity */ + READ_BCR(ARC_REG_ERP_BUILD, erp); + if (erp.ver) { + struct ctl_erp ctl; + READ_BCR(ARC_REG_ERP_CTRL, ctl); + /* inverted bits: 0 means enabled */ + n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n", + IS_AVAIL3(erp.ic, !ctl.dpi, "IC "), + IS_AVAIL3(erp.dc, !ctl.dpd, "DC "), + IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU ")); + } - n += scnprintf(buf + n, len - n, - "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", - core->family, core->cpu_id, core->chip_id); + /* ARCv2 ISA specific sanity checks */ + present = fpu.sp | fpu.dp | mpy.dsp; /* DSP and/or FPU */ + CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present); - n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n", - cpu_id, cpu->name, cpu->release, - is_isa_arcompact() ? "ARCompact" : "ARCv2", - IS_AVAIL1(cpu->isa.be, "[Big-Endian]"), - IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue ")); + dsp_config_check(); +#endif + return n; +} - n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ", - IS_AVAIL1(cpu->extn.timer0, "Timer0 "), - IS_AVAIL1(cpu->extn.timer1, "Timer1 "), - IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT), - IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT)); +static char *arc_cpu_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) +{ + struct bcr_identity ident; + struct bcr_timer timer; + struct bcr_generic bcr; + struct mcip_bcr mp; + struct bcr_actionpoint ap; + unsigned long vec_base; + int ap_num, ap_full, smart, rtt, n; - if (cpu->extn_mpy.ver) { - if (is_isa_arcompact()) { - scnprintf(mpy_opt, 16, "mpy"); - } else { + memset(info, 0, sizeof(struct cpuinfo_arc)); - int opt = 2; /* stock MPY/MPYH */ + READ_BCR(AUX_IDENTITY, ident); + info->arcver = ident.family; - if (cpu->extn_mpy.dsp) /* OPT 7-9 */ - opt = cpu->extn_mpy.dsp + 6; + n = scnprintf(buf, len, + "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", + ident.family, ident.cpu_id, ident.chip_id); - scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt); - } + if (is_isa_arcompact()) { + n += arcompact_mumbojumbo(c, info, buf + n, len - n); + } else if (is_isa_arcv2()){ + n += arcv2_mumbojumbo(c, info, buf + n, len - n); } - n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", - IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), - IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), - IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS), - IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt), - IS_AVAIL1(cpu->isa.div_rem, "div_rem ")); + n += arc_mmu_mumbojumbo(c, buf + n, len - n); + n += arc_cache_mumbojumbo(c, buf + n, len - n); - if (cpu->bpu.ver) { - n += scnprintf(buf + n, len - n, - "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d", - IS_AVAIL1(cpu->bpu.full, "full"), - IS_AVAIL1(!cpu->bpu.full, "partial"), - cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk); - - if (is_isa_arcv2()) { - struct bcr_lpb lpb; - - READ_BCR(ARC_REG_LPB_BUILD, lpb); - if (lpb.ver) { - unsigned int ctl; - ctl = read_aux_reg(ARC_REG_LPB_CTRL); - - n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s", - lpb.entries, - IS_DISABLED_RUN(!ctl)); - } - } - n += scnprintf(buf + n, len - n, "\n"); - } + READ_BCR(ARC_REG_TIMERS_BCR, timer); + info->t0 = timer.t0; + info->t1 = timer.t1; - return buf; -} + READ_BCR(ARC_REG_MCIP_BCR, mp); + vec_base = read_aux_reg(AUX_INTR_VEC_BASE); -static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) -{ - int n = 0; - struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; + n += scnprintf(buf + n, len - n, + "Timers\t\t: %s%s%s%s%s%s\nVector Table\t: %#lx\n", + IS_AVAIL1(timer.t0, "Timer0 "), + IS_AVAIL1(timer.t1, "Timer1 "), + IS_AVAIL2(timer.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT), + IS_AVAIL2(mp.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT), + vec_base); - FIX_PTR(cpu); + READ_BCR(ARC_REG_AP_BCR, ap); + if (ap.ver) { + ap_num = 2 << ap.num; + ap_full = !ap.min; + } - n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base); + READ_BCR(ARC_REG_SMART_BCR, bcr); + smart = bcr.ver ? 1 : 0; - if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) - n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", - IS_AVAIL1(cpu->extn.fpu_sp, "SP "), - IS_AVAIL1(cpu->extn.fpu_dp, "DP ")); + READ_BCR(ARC_REG_RTT_BCR, bcr); + rtt = bcr.ver ? 1 : 0; - if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) { + if (ap.ver | smart | rtt) { n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s", - IS_AVAIL1(cpu->extn.smart, "smaRT "), - IS_AVAIL1(cpu->extn.rtt, "RTT ")); - if (cpu->extn.ap_num) { + IS_AVAIL1(smart, "smaRT "), + IS_AVAIL1(rtt, "RTT ")); + if (ap.ver) { n += scnprintf(buf + n, len - n, "ActionPoint %d/%s", - cpu->extn.ap_num, - cpu->extn.ap_full ? "full":"min"); + ap_num, + ap_full ? "full":"min"); } n += scnprintf(buf + n, len - n, "\n"); } - if (cpu->dccm.sz || cpu->iccm.sz) - n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n", - cpu->dccm.base_addr, TO_KB(cpu->dccm.sz), - cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); - - if (is_isa_arcv2()) { - - /* Error Protection: ECC/Parity */ - struct bcr_erp erp; - READ_BCR(ARC_REG_ERP_BUILD, erp); - - if (erp.ver) { - struct ctl_erp ctl; - READ_BCR(ARC_REG_ERP_CTRL, ctl); - - /* inverted bits: 0 means enabled */ - n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n", - IS_AVAIL3(erp.ic, !ctl.dpi, "IC "), - IS_AVAIL3(erp.dc, !ctl.dpd, "DC "), - IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU ")); - } - } + if (info->dccm.sz || info->iccm.sz) + n += scnprintf(buf + n, len - n, + "Extn [CCM]\t: DCCM @ %lx, %d KB / ICCM: @ %lx, %d KB\n", + info->dccm.base, TO_KB(info->dccm.sz), + info->iccm.base, TO_KB(info->iccm.sz)); return buf; } @@ -401,15 +378,15 @@ void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena) panic("Disable %s, hardware NOT present\n", opt_name); } -static void arc_chk_core_config(void) +/* + * ISA agnostic sanity checks + */ +static void arc_chk_core_config(struct cpuinfo_arc *info) { - struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; - int present = 0; - - if (!cpu->extn.timer0) + if (!info->t0) panic("Timer0 is not present!\n"); - if (!cpu->extn.timer1) + if (!info->t1) panic("Timer1 is not present!\n"); #ifdef CONFIG_ARC_HAS_DCCM @@ -417,35 +394,17 @@ static void arc_chk_core_config(void) * DCCM can be arbit placed in hardware. * Make sure it's placement/sz matches what Linux is built with */ - if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr) + if ((unsigned int)__arc_dccm_base != info->dccm.base) panic("Linux built with incorrect DCCM Base address\n"); - if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz) + if (CONFIG_ARC_DCCM_SZ * SZ_1K != info->dccm.sz) panic("Linux built with incorrect DCCM Size\n"); #endif #ifdef CONFIG_ARC_HAS_ICCM - if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz) + if (CONFIG_ARC_ICCM_SZ * SZ_1K != info->iccm.sz) panic("Linux built with incorrect ICCM Size\n"); #endif - - /* - * FP hardware/software config sanity - * -If hardware present, kernel needs to save/restore FPU state - * -If not, it will crash trying to save/restore the non-existant regs - */ - - if (is_isa_arcompact()) { - /* only DPDP checked since SP has no arch visible regs */ - present = cpu->extn.fpu_dp; - CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present); - } else { - /* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */ - present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp; - CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present); - - dsp_config_check(); - } } /* @@ -456,21 +415,19 @@ static void arc_chk_core_config(void) void setup_processor(void) { + struct cpuinfo_arc info; + int c = smp_processor_id(); char str[512]; - int cpu_id = smp_processor_id(); - read_arc_build_cfg_regs(); - arc_init_IRQ(); + pr_info("%s", arc_cpu_mumbojumbo(c, &info, str, sizeof(str))); + pr_info("%s", arc_platform_smp_cpuinfo()); - pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str))); + arc_chk_core_config(&info); + arc_init_IRQ(); arc_mmu_init(); arc_cache_init(); - pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str))); - pr_info("%s", arc_platform_smp_cpuinfo()); - - arc_chk_core_config(); } static inline bool uboot_arg_invalid(unsigned long addr) @@ -617,6 +574,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) char *str; int cpu_id = ptr_to_cpu(v); struct device *cpu_dev = get_cpu_device(cpu_id); + struct cpuinfo_arc info; struct clk *cpu_clk; unsigned long freq = 0; @@ -629,7 +587,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (!str) goto done; - seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE)); + seq_printf(m, arc_cpu_mumbojumbo(cpu_id, &info, str, PAGE_SIZE)); cpu_clk = clk_get(cpu_dev, NULL); if (IS_ERR(cpu_clk)) { @@ -646,9 +604,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100); - seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE)); - seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE)); - seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE)); seq_printf(m, arc_platform_smp_cpuinfo()); free_page((unsigned long)str); diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 3c1590c27fae..0b3bb529d246 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -53,6 +53,7 @@ #include <linux/sched/task_stack.h> #include <asm/ucontext.h> +#include <asm/entry.h> struct rt_sigframe { struct siginfo info; diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 409cfa4675b4..8d9b188caa27 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -23,9 +23,10 @@ #include <linux/export.h> #include <linux/of_fdt.h> -#include <asm/processor.h> -#include <asm/setup.h> #include <asm/mach_desc.h> +#include <asm/setup.h> +#include <asm/smp.h> +#include <asm/processor.h> #ifndef CONFIG_ARC_HAS_LLSC arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -351,7 +352,7 @@ static inline int __do_IPI(unsigned long msg) * arch-common ISR to handle for inter-processor interrupts * Has hooks for platform specific IPI */ -irqreturn_t do_IPI(int irq, void *dev_id) +static irqreturn_t do_IPI(int irq, void *dev_id) { unsigned long pending; unsigned long __maybe_unused copy; diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 5372dc04e784..ea99c066ef25 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -29,6 +29,7 @@ #include <asm/arcregs.h> #include <asm/unwind.h> +#include <asm/stacktrace.h> #include <asm/switch_to.h> /*------------------------------------------------------------------------- diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 6b83e3f2b41c..9b9570b79362 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -16,6 +16,7 @@ #include <linux/ptrace.h> #include <linux/kprobes.h> #include <linux/kgdb.h> +#include <asm/entry.h> #include <asm/setup.h> #include <asm/unaligned.h> #include <asm/kprobes.h> @@ -109,9 +110,7 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) */ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) { - unsigned int param = regs->ecr_param; - - switch (param) { + switch (regs->ecr.param) { case 1: trap_is_brkpt(address, regs); break; diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 7654c2e42dc0..d5b3ed2c58f5 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -115,8 +115,8 @@ static void show_ecr_verbose(struct pt_regs *regs) /* For Data fault, this is data address not instruction addr */ address = current->thread.fault_address; - vec = regs->ecr_vec; - cause_code = regs->ecr_cause; + vec = regs->ecr.vec; + cause_code = regs->ecr.cause; /* For DTLB Miss or ProtV, display the memory involved too */ if (vec == ECR_V_DTLB_MISS) { @@ -154,7 +154,7 @@ static void show_ecr_verbose(struct pt_regs *regs) pr_cont("Misaligned r/w from 0x%08lx\n", address); #endif } else if (vec == ECR_V_TRAP) { - if (regs->ecr_param == 5) + if (regs->ecr.param == 5) pr_cont("gcc generated __builtin_trap\n"); } else { pr_cont("Check Programmer's Manual\n"); @@ -184,9 +184,10 @@ void show_regs(struct pt_regs *regs) if (user_mode(regs)) show_faulting_vma(regs->ret); /* faulting code, not data */ - pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\nSTAT: 0x%08lx", - regs->event, current->thread.fault_address, regs->ret, - regs->status32); + pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n", + regs->ecr.full, current->thread.fault_address, regs->ret); + + pr_info("STAT32: 0x%08lx", regs->status32); #define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : "" diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index d2e09fece5bc..d0a5cec4cdca 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -36,12 +36,13 @@ #endif ENTRY_CFI(memset) - PREFETCHW_INSTR r0, 0 ; Prefetch the first write location mov.f 0, r2 ;;; if size is zero jz.d [blink] mov r3, r0 ; don't clobber ret val + PREFETCHW_INSTR r0, 0 ; Prefetch the first write location + ;;; if length < 8 brls.d.nt r2, 8, .Lsmallchunk mov.f lp_count,r2 diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 3c16ee942a5c..f7e05c146637 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -28,6 +28,10 @@ int slc_enable = 1, ioc_enable = 1; unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ +static struct cpuinfo_arc_cache { + unsigned int sz_k, line_len, colors; +} ic_info, dc_info, slc_info; + void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op, const int full_page); @@ -35,78 +39,24 @@ void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz); void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz); void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz); -char *arc_cache_mumbojumbo(int c, char *buf, int len) -{ - int n = 0; - struct cpuinfo_arc_cache *p; - -#define PR_CACHE(p, cfg, str) \ - if (!(p)->line_len) \ - n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ - else \ - n += scnprintf(buf + n, len - n, \ - str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", \ - (p)->sz_k, (p)->assoc, (p)->line_len, \ - (p)->vipt ? "VIPT" : "PIPT", \ - (p)->alias ? " aliasing" : "", \ - IS_USED_CFG(cfg)); - - PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); - PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); - - p = &cpuinfo_arc700[c].slc; - if (p->line_len) - n += scnprintf(buf + n, len - n, - "SLC\t\t: %uK, %uB Line%s\n", - p->sz_k, p->line_len, IS_USED_RUN(slc_enable)); - - n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", - perip_base, - IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) ")); - - return buf; -} - -/* - * Read the Cache Build Confuration Registers, Decode them and save into - * the cpuinfo structure for later use. - * No Validation done here, simply read/convert the BCRs - */ -static void read_decode_cache_bcr_arcv2(int cpu) +static int read_decode_cache_bcr_arcv2(int c, char *buf, int len) { - struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc; + struct cpuinfo_arc_cache *p_slc = &slc_info; + struct bcr_identity ident; struct bcr_generic sbcr; - - struct bcr_slc_cfg { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:24, way:2, lsz:2, sz:4; -#else - unsigned int sz:4, lsz:2, way:2, pad:24; -#endif - } slc_cfg; - - struct bcr_clust_cfg { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; -#else - unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; -#endif - } cbcr; - - struct bcr_volatile { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int start:4, limit:4, pad:22, order:1, disable:1; -#else - unsigned int disable:1, order:1, pad:22, limit:4, start:4; -#endif - } vol; - + struct bcr_clust_cfg cbcr; + struct bcr_volatile vol; + int n = 0; READ_BCR(ARC_REG_SLC_BCR, sbcr); if (sbcr.ver) { + struct bcr_slc_cfg slc_cfg; READ_BCR(ARC_REG_SLC_CFG, slc_cfg); p_slc->sz_k = 128 << slc_cfg.sz; l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; + n += scnprintf(buf + n, len - n, + "SLC\t\t: %uK, %uB Line%s\n", + p_slc->sz_k, p_slc->line_len, IS_USED_RUN(slc_enable)); } READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); @@ -129,70 +79,83 @@ static void read_decode_cache_bcr_arcv2(int cpu) ioc_enable = 0; } + READ_BCR(AUX_IDENTITY, ident); + /* HS 2.0 didn't have AUX_VOL */ - if (cpuinfo_arc700[cpu].core.family > 0x51) { + if (ident.family > 0x51) { READ_BCR(AUX_VOL, vol); perip_base = vol.start << 28; /* HS 3.0 has limit and strict-ordering fields */ - if (cpuinfo_arc700[cpu].core.family > 0x52) + if (ident.family > 0x52) perip_end = (vol.limit << 28) - 1; } + + n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", + perip_base, + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) ")); + + return n; } -void read_decode_cache_bcr(void) +int arc_cache_mumbojumbo(int c, char *buf, int len) { - struct cpuinfo_arc_cache *p_ic, *p_dc; - unsigned int cpu = smp_processor_id(); - struct bcr_cache { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; -#else - unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; -#endif - } ibcr, dbcr; + struct cpuinfo_arc_cache *p_ic = &ic_info, *p_dc = &dc_info; + struct bcr_cache ibcr, dbcr; + int vipt, assoc; + int n = 0; - p_ic = &cpuinfo_arc700[cpu].icache; READ_BCR(ARC_REG_IC_BCR, ibcr); - if (!ibcr.ver) goto dc_chk; - if (ibcr.ver <= 3) { + if (is_isa_arcompact() && (ibcr.ver <= 3)) { BUG_ON(ibcr.config != 3); - p_ic->assoc = 2; /* Fixed to 2w set assoc */ - } else if (ibcr.ver >= 4) { - p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */ + assoc = 2; /* Fixed to 2w set assoc */ + } else if (is_isa_arcv2() && (ibcr.ver >= 4)) { + assoc = 1 << ibcr.config; /* 1,2,4,8 */ } p_ic->line_len = 8 << ibcr.line_len; p_ic->sz_k = 1 << (ibcr.sz - 1); - p_ic->vipt = 1; - p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1; + p_ic->colors = p_ic->sz_k/assoc/TO_KB(PAGE_SIZE); + + n += scnprintf(buf + n, len - n, + "I-Cache\t\t: %uK, %dway/set, %uB Line, VIPT%s%s\n", + p_ic->sz_k, assoc, p_ic->line_len, + p_ic->colors > 1 ? " aliasing" : "", + IS_USED_CFG(CONFIG_ARC_HAS_ICACHE)); dc_chk: - p_dc = &cpuinfo_arc700[cpu].dcache; READ_BCR(ARC_REG_DC_BCR, dbcr); - if (!dbcr.ver) goto slc_chk; - if (dbcr.ver <= 3) { + if (is_isa_arcompact() && (dbcr.ver <= 3)) { BUG_ON(dbcr.config != 2); - p_dc->assoc = 4; /* Fixed to 4w set assoc */ - p_dc->vipt = 1; - p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; - } else if (dbcr.ver >= 4) { - p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */ - p_dc->vipt = 0; - p_dc->alias = 0; /* PIPT so can't VIPT alias */ + vipt = 1; + assoc = 4; /* Fixed to 4w set assoc */ + p_dc->colors = p_dc->sz_k/assoc/TO_KB(PAGE_SIZE); + } else if (is_isa_arcv2() && (dbcr.ver >= 4)) { + vipt = 0; + assoc = 1 << dbcr.config; /* 1,2,4,8 */ + p_dc->colors = 1; /* PIPT so can't VIPT alias */ } p_dc->line_len = 16 << dbcr.line_len; p_dc->sz_k = 1 << (dbcr.sz - 1); + n += scnprintf(buf + n, len - n, + "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", + p_dc->sz_k, assoc, p_dc->line_len, + vipt ? "VIPT" : "PIPT", + p_dc->colors > 1 ? " aliasing" : "", + IS_USED_CFG(CONFIG_ARC_HAS_DCACHE)); + slc_chk: if (is_isa_arcv2()) - read_decode_cache_bcr_arcv2(cpu); + n += read_decode_cache_bcr_arcv2(c, buf + n, len - n); + + return n; } /* @@ -581,7 +544,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ -noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op) +static noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 /* @@ -644,7 +607,7 @@ noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op) #endif } -noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op) +static __maybe_unused noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 /* @@ -1082,7 +1045,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) * 3. All Caches need to be disabled when setting up IOC to elide any in-flight * Coherency transactions */ -noinline void __init arc_ioc_setup(void) +static noinline void __init arc_ioc_setup(void) { unsigned int ioc_base, mem_sz; @@ -1144,12 +1107,10 @@ noinline void __init arc_ioc_setup(void) * one core suffices for all * - IOC setup / dma callbacks only need to be done once */ -void __init arc_cache_init_master(void) +static noinline void __init arc_cache_init_master(void) { - unsigned int __maybe_unused cpu = smp_processor_id(); - if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { - struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; + struct cpuinfo_arc_cache *ic = &ic_info; if (!ic->line_len) panic("cache support enabled but non-existent cache\n"); @@ -1162,14 +1123,14 @@ void __init arc_cache_init_master(void) * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG * pair to provide vaddr/paddr respectively, just as in MMU v3 */ - if (is_isa_arcv2() && ic->alias) + if (is_isa_arcv2() && ic->colors > 1) _cache_line_loop_ic_fn = __cache_line_loop_v3; else _cache_line_loop_ic_fn = __cache_line_loop; } if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { - struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; + struct cpuinfo_arc_cache *dc = &dc_info; if (!dc->line_len) panic("cache support enabled but non-existent cache\n"); @@ -1181,14 +1142,13 @@ void __init arc_cache_init_master(void) /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ if (is_isa_arcompact()) { int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE); - if (dc->alias) { + if (dc->colors > 1) { if (!handled) panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - if (CACHE_COLORS_NUM != num_colors) + if (CACHE_COLORS_NUM != dc->colors) panic("CACHE_COLORS_NUM not optimized for config\n"); - } else if (!dc->alias && handled) { + } else if (handled && dc->colors == 1) { panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); } } @@ -1231,9 +1191,6 @@ void __init arc_cache_init_master(void) void __ref arc_cache_init(void) { unsigned int __maybe_unused cpu = smp_processor_id(); - char str[256]; - - pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str))); if (!cpu) arc_cache_init_master(); diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c index 4e14c4244ea2..88fa3a4d4906 100644 --- a/arch/arc/mm/extable.c +++ b/arch/arc/mm/extable.c @@ -22,14 +22,3 @@ int fixup_exception(struct pt_regs *regs) return 0; } - -#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE - -unsigned long arc_clear_user_noinline(void __user *to, - unsigned long n) -{ - return __arc_clear_user(to, n); -} -EXPORT_SYMBOL(arc_clear_user_noinline); - -#endif diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index f59e722d147f..95119a5e7761 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -13,6 +13,7 @@ #include <linux/kdebug.h> #include <linux/perf_event.h> #include <linux/mm_types.h> +#include <asm/entry.h> #include <asm/mmu.h> /* @@ -99,10 +100,10 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) if (faulthandler_disabled() || !mm) goto no_context; - if (regs->ecr_cause & ECR_C_PROTV_STORE) /* ST/EX */ + if (regs->ecr.cause & ECR_C_PROTV_STORE) /* ST/EX */ write = 1; - else if ((regs->ecr_vec == ECR_V_PROTV) && - (regs->ecr_cause == ECR_C_PROTV_INST_FETCH)) + else if ((regs->ecr.vec == ECR_V_PROTV) && + (regs->ecr.cause == ECR_C_PROTV_INST_FETCH)) exec = 1; flags = FAULT_FLAG_DEFAULT; diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 9f64d729c9f8..6a71b23f1383 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -15,6 +15,7 @@ #include <linux/highmem.h> #include <asm/page.h> #include <asm/sections.h> +#include <asm/setup.h> #include <asm/arcregs.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 6f40f37e6550..e536b2dcd4b0 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -18,7 +18,9 @@ /* A copy of the ASID from the PID reg is kept in asid_cache */ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; -static int __read_mostly pae_exists; +static struct cpuinfo_arc_mmu { + unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways; +} mmuinfo; /* * Utility Routine to erase a J-TLB entry @@ -131,7 +133,7 @@ static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) noinline void local_flush_tlb_all(void) { - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + struct cpuinfo_arc_mmu *mmu = &mmuinfo; unsigned long flags; unsigned int entry; int num_tlb = mmu->sets * mmu->ways; @@ -389,7 +391,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) /* * Routine to create a TLB entry */ -void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) +static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; unsigned int asid_or_sasid, rwx; @@ -564,89 +566,64 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs */ -void read_decode_mmu_bcr(void) +int arc_mmu_mumbojumbo(int c, char *buf, int len) { - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - unsigned int tmp; - struct bcr_mmu_3 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, - u_itlb:4, u_dtlb:4; -#else - unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4, - ways:4, ver:8; -#endif - } *mmu3; - - struct bcr_mmu_4 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, - n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; -#else - /* DTLB ITLB JES JE JA */ - unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, - pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; -#endif - } *mmu4; + struct cpuinfo_arc_mmu *mmu = &mmuinfo; + unsigned int bcr, u_dtlb, u_itlb, sasid; + struct bcr_mmu_3 *mmu3; + struct bcr_mmu_4 *mmu4; + char super_pg[64] = ""; + int n = 0; - tmp = read_aux_reg(ARC_REG_MMU_BCR); - mmu->ver = (tmp >> 24); + bcr = read_aux_reg(ARC_REG_MMU_BCR); + mmu->ver = (bcr >> 24); if (is_isa_arcompact() && mmu->ver == 3) { - mmu3 = (struct bcr_mmu_3 *)&tmp; + mmu3 = (struct bcr_mmu_3 *)&bcr; mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); mmu->sets = 1 << mmu3->sets; mmu->ways = 1 << mmu3->ways; - mmu->u_dtlb = mmu3->u_dtlb; - mmu->u_itlb = mmu3->u_itlb; - mmu->sasid = mmu3->sasid; + u_dtlb = mmu3->u_dtlb; + u_itlb = mmu3->u_itlb; + sasid = mmu3->sasid; } else { - mmu4 = (struct bcr_mmu_4 *)&tmp; + mmu4 = (struct bcr_mmu_4 *)&bcr; mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11); mmu->sets = 64 << mmu4->n_entry; mmu->ways = mmu4->n_ways * 2; - mmu->u_dtlb = mmu4->u_dtlb * 4; - mmu->u_itlb = mmu4->u_itlb * 4; - mmu->sasid = mmu4->sasid; - pae_exists = mmu->pae = mmu4->pae; + u_dtlb = mmu4->u_dtlb * 4; + u_itlb = mmu4->u_itlb * 4; + sasid = mmu4->sasid; + mmu->pae = mmu4->pae; } -} -char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) -{ - int n = 0; - struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; - char super_pg[64] = ""; - - if (p_mmu->s_pg_sz_m) - scnprintf(super_pg, 64, "%dM Super Page %s", - p_mmu->s_pg_sz_m, - IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); + if (mmu->s_pg_sz_m) + scnprintf(super_pg, 64, "/%dM%s", + mmu->s_pg_sz_m, + IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)":""); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", - p_mmu->ver, p_mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS, - p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, - p_mmu->u_dtlb, p_mmu->u_itlb, - IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); - - return buf; + "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n", + mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS, + mmu->sets, mmu->ways, + u_dtlb, u_itlb, + IS_AVAIL1(sasid, ", SASID"), + IS_AVAIL2(mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); + + return n; } int pae40_exist_but_not_enab(void) { - return pae_exists && !is_pae40_enabled(); + return mmuinfo.pae && !is_pae40_enabled(); } void arc_mmu_init(void) { - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - char str[256]; + struct cpuinfo_arc_mmu *mmu = &mmuinfo; int compat = 0; - pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); - /* * Can't be done in processor.h due to header include dependencies */ @@ -723,7 +700,7 @@ volatile int dup_pd_silent; /* Be silent abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + struct cpuinfo_arc_mmu *mmu = &mmuinfo; unsigned long flags; int set, n_ways = mmu->ways; diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index b821df7b0089..1feb990a56bc 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -6,7 +6,6 @@ */ #include <linux/of_fdt.h> -#include <linux/of_platform.h> #include <linux/libfdt.h> #include <asm/asm-offsets.h> diff --git a/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi b/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi index c7b5ef15b716..868454ae6bde 100644 --- a/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi +++ b/arch/arm/boot/dts/nuvoton/nuvoton-common-npcm7xx.dtsi @@ -220,6 +220,15 @@ }; }; + peci: peci-controller@f0100000 { + compatible = "nuvoton,npcm750-peci"; + reg = <0xf0100000 0x200>; + interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk NPCM7XX_CLK_APB3>; + cmd-timeout-ms = <1000>; + status = "disabled"; + }; + spi0: spi@200000 { compatible = "nuvoton,npcm750-pspi"; reg = <0x200000 0x1000>; diff --git a/arch/arm/configs/dram_0x00000000.config b/arch/arm/configs/dram_0x00000000.config index db96dcb420ce..8803a0f58343 100644 --- a/arch/arm/configs/dram_0x00000000.config +++ b/arch/arm/configs/dram_0x00000000.config @@ -1 +1,2 @@ +# Help: DRAM base at 0x00000000 CONFIG_DRAM_BASE=0x00000000 diff --git a/arch/arm/configs/dram_0xc0000000.config b/arch/arm/configs/dram_0xc0000000.config index 343d5333d973..aab8f864686b 100644 --- a/arch/arm/configs/dram_0xc0000000.config +++ b/arch/arm/configs/dram_0xc0000000.config @@ -1 +1,2 @@ +# Help: DRAM base at 0xc0000000 CONFIG_DRAM_BASE=0xc0000000 diff --git a/arch/arm/configs/dram_0xd0000000.config b/arch/arm/configs/dram_0xd0000000.config index 61ba7045f8a1..4aabce4ea3d4 100644 --- a/arch/arm/configs/dram_0xd0000000.config +++ b/arch/arm/configs/dram_0xd0000000.config @@ -1 +1,2 @@ +# Help: DRAM base at 0xd0000000 CONFIG_DRAM_BASE=0xd0000000 diff --git a/arch/arm/configs/lpae.config b/arch/arm/configs/lpae.config index a6d6f7ab3c01..1ab94da8345d 100644 --- a/arch/arm/configs/lpae.config +++ b/arch/arm/configs/lpae.config @@ -1,2 +1,3 @@ +# Help: Enable Large Physical Address Extension mode CONFIG_ARM_LPAE=y CONFIG_VMSPLIT_2G=y diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index f3cd04ff022d..72529f5e2bed 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -227,6 +227,8 @@ static inline bool kvm_set_pmuserenr(u64 val) return false; } +static inline void kvm_vcpu_pmu_resync_el0(void) {} + /* PMU Version in DFR Register */ #define ARMV8_PMU_DFR_VER_NI 0 #define ARMV8_PMU_DFR_VER_V3P4 0x5 diff --git a/arch/arm/include/asm/ide.h b/arch/arm/include/asm/ide.h deleted file mode 100644 index a81e0b0d6747..000000000000 --- a/arch/arm/include/asm/ide.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/include/asm/ide.h - * - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -/* - * This file contains the ARM architecture specific IDE code. - */ - -#ifndef __ASMARM_IDE_H -#define __ASMARM_IDE_H - -#ifdef __KERNEL__ - -#define __ide_mm_insw(port,addr,len) readsw(port,addr,len) -#define __ide_mm_insl(port,addr,len) readsl(port,addr,len) -#define __ide_mm_outsw(port,addr,len) writesw(port,addr,len) -#define __ide_mm_outsl(port,addr,len) writesl(port,addr,len) - -#endif /* __KERNEL__ */ - -#endif /* __ASMARM_IDE_H */ diff --git a/arch/arm64/boot/dts/amlogic/meson-a1.dtsi b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi index c8f344596285..96225c421194 100644 --- a/arch/arm64/boot/dts/amlogic/meson-a1.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-a1.dtsi @@ -108,7 +108,7 @@ }; uart_AO: serial@1c00 { - compatible = "amlogic,meson-gx-uart", + compatible = "amlogic,meson-a1-uart", "amlogic,meson-ao-uart"; reg = <0x0 0x1c00 0x0 0x18>; interrupts = <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>; @@ -118,7 +118,7 @@ }; uart_AO_B: serial@2000 { - compatible = "amlogic,meson-gx-uart", + compatible = "amlogic,meson-a1-uart", "amlogic,meson-ao-uart"; reg = <0x0 0x2000 0x0 0x18>; interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>; diff --git a/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi b/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi index aa7aac8c3774..ecd171b2feba 100644 --- a/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi +++ b/arch/arm64/boot/dts/nuvoton/nuvoton-common-npcm8xx.dtsi @@ -68,6 +68,15 @@ ranges = <0x0 0x0 0xf0000000 0x00300000>, <0xfff00000 0x0 0xfff00000 0x00016000>; + peci: peci-controller@100000 { + compatible = "nuvoton,npcm845-peci"; + reg = <0x100000 0x1000>; + interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk NPCM8XX_CLK_APB3>; + cmd-timeout-ms = <1000>; + status = "disabled"; + }; + timer0: timer@8000 { compatible = "nuvoton,npcm845-timer"; interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2e9636da2147..5315789f4868 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1159,7 +1159,6 @@ CONFIG_XEN_GNTDEV=y CONFIG_XEN_GRANT_DEV_ALLOC=y CONFIG_STAGING=y CONFIG_STAGING_MEDIA=y -CONFIG_VIDEO_IMX_MEDIA=m CONFIG_VIDEO_MAX96712=m CONFIG_CHROME_PLATFORMS=y CONFIG_CROS_EC=y diff --git a/arch/arm64/configs/virt.config b/arch/arm64/configs/virt.config index 6865d54e68f8..c47c36f8f67b 100644 --- a/arch/arm64/configs/virt.config +++ b/arch/arm64/configs/virt.config @@ -1,3 +1,4 @@ +# Help: Virtualization guest # # Base options for platforms # diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 58e5eb27da68..5882b2415596 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -18,10 +18,19 @@ #define HCR_DCT (UL(1) << 57) #define HCR_ATA_SHIFT 56 #define HCR_ATA (UL(1) << HCR_ATA_SHIFT) +#define HCR_TTLBOS (UL(1) << 55) +#define HCR_TTLBIS (UL(1) << 54) +#define HCR_ENSCXT (UL(1) << 53) +#define HCR_TOCU (UL(1) << 52) #define HCR_AMVOFFEN (UL(1) << 51) +#define HCR_TICAB (UL(1) << 50) #define HCR_TID4 (UL(1) << 49) #define HCR_FIEN (UL(1) << 47) #define HCR_FWB (UL(1) << 46) +#define HCR_NV2 (UL(1) << 45) +#define HCR_AT (UL(1) << 44) +#define HCR_NV1 (UL(1) << 43) +#define HCR_NV (UL(1) << 42) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) #define HCR_TEA (UL(1) << 37) @@ -89,7 +98,6 @@ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3) -#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) @@ -324,6 +332,47 @@ BIT(18) | \ GENMASK(16, 15)) +/* + * FGT register definitions + * + * RES0 and polarity masks as of DDI0487J.a, to be updated as needed. + * We're not using the generated masks as they are usually ahead of + * the published ARM ARM, which we use as a reference. + * + * Once we get to a point where the two describe the same thing, we'll + * merge the definitions. One day. + */ +#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) +#define __HFGRTR_EL2_MASK GENMASK(49, 0) +#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) + +#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ + BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ + GENMASK(26, 25) | BIT(21) | BIT(18) | \ + GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) +#define __HFGWTR_EL2_MASK GENMASK(49, 0) +#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) + +#define __HFGITR_EL2_RES0 GENMASK(63, 57) +#define __HFGITR_EL2_MASK GENMASK(54, 0) +#define __HFGITR_EL2_nMASK GENMASK(56, 55) + +#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \ + GENMASK(21, 20) | BIT(8)) +#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK +#define __HDFGRTR_EL2_nMASK GENMASK(62, 59) + +#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \ + BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \ + BIT(22) | BIT(9) | BIT(6)) +#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK +#define __HDFGWTR_EL2_nMASK GENMASK(62, 60) + +/* Similar definitions for HCRX_EL2 */ +#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12)) +#define __HCRX_EL2_MASK (0) +#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0)) + /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) /* diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24e28bb2d95b..24b5e6b23417 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -70,6 +70,7 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, @@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); +extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d3dd05bbfe23..af06ccb7ee34 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -49,6 +49,7 @@ #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) +#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -380,6 +381,7 @@ enum vcpu_sysreg { CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ HSTR_EL2, /* Hypervisor System Trap Register */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ + HCRX_EL2, /* Extended Hypervisor Configuration Register */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ @@ -400,6 +402,11 @@ enum vcpu_sysreg { TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ + HFGRTR_EL2, + HFGWTR_EL2, + HFGITR_EL2, + HDFGRTR_EL2, + HDFGWTR_EL2, CNTHP_CTL_EL2, CNTHP_CVAL_EL2, CNTHV_CTL_EL2, @@ -567,8 +574,7 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; - /* Target CPU and feature flags */ - int target; + /* feature flags */ DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ @@ -669,6 +675,8 @@ struct kvm_vcpu_arch { #define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1)) /* PTRAUTH exposed to guest */ #define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2)) +/* KVM_ARM_VCPU_INIT completed */ +#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3)) /* Exception pending */ #define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0)) @@ -899,7 +907,6 @@ struct kvm_vcpu_stat { u64 exits; }; -void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); @@ -967,8 +974,6 @@ void kvm_arm_resume_guest(struct kvm *kvm); #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__) #endif /* __KVM_NVHE_HYPERVISOR__ */ -void force_vm_exit(const cpumask_t *mask); - int handle_exit(struct kvm_vcpu *vcpu, int exception_index); void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); @@ -983,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); +int __init populate_nv_trap_config(void); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); @@ -1049,8 +1055,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void) return cpus_have_const_cap(ARM64_SPECTRE_V3A); } -void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); - static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} @@ -1113,13 +1117,15 @@ int __init kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS + +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE + static inline bool kvm_vm_is_protected(struct kvm *kvm) { return false; } -void kvm_init_protected_traps(struct kvm_vcpu *vcpu); - int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 0e1e1ab17b4d..96a80e8f6226 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **haddr); int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void **haddr); +int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); void __init free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 8fb67f032fd1..fa23cc9c2adc 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features)); } +extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); + struct sys_reg_params; struct sys_reg_desc; diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 929d355eae0a..d3e354bb8351 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte); * kvm_pgtable_prot format. */ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte); + +/** + * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries + * + * @mmu: Stage-2 KVM MMU struct + * @addr: The base Intermediate physical address from which to invalidate + * @size: Size of the range from the base to invalidate + */ +void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t addr, size_t size); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 16464bf9a8aa..38296579a4fd 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -124,6 +124,37 @@ #define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4) #define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6) +#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0) +#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0) +#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1) + +#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1) +#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3) +#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5) + +#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1) +#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3) +#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5) + +#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1) + +#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1) +#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3) +#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5) + +#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1) +#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3) +#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5) + +#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1) +#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3) +#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5) + +/* Data cache zero operations */ +#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1) +#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3) +#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4) + /* * Automatically generated definitions for system registers, the * manual encodings below are in the process of being converted to @@ -163,6 +194,82 @@ #define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) +#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0)) +#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) +#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1)) +#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) +#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2)) +#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) +#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) + +#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) +#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) +#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) + +#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3) +#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3))) +#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3))) +#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6) +#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0) +#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0) +#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0) +#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2) +#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2) +#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0) +#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6) +#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6) +#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5) +#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5) +#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5) +#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0) +#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6) +#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7) +#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0) +#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0) +#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4) +#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7) +#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6) +#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6) +#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6) +#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6) +#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7) +#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7) +#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7) +#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7) +#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7) +#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7) +#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7) +#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6) +#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6) +#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7) +#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1) +#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4) +#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0) +#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1) +#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4))) +#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0) +#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4) +#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4) +#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4) +#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2) +#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2) +#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3) +#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0) +#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0) +#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0) +#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1) +#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0) +#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2) +#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2) +#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2) +#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2) +#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2) +#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2) +#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1) + +/* ETM */ +#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -203,8 +310,13 @@ #define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1) #define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2) #define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) +#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4) +#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5) +#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6) #define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) #define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2) +#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3) #define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) #define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) @@ -275,6 +387,8 @@ #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5) + #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) @@ -383,8 +497,6 @@ #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) -#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) -#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) @@ -478,6 +590,158 @@ #define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0) +/* AT instructions */ +#define AT_Op0 1 +#define AT_CRn 7 + +#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0) +#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1) +#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2) +#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) +#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) +#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) +#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) +#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) +#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) +#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5) +#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6) +#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) + +/* TLBI instructions */ +#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0) +#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1) +#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2) +#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3) +#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5) +#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7) +#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1) +#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3) +#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5) +#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7) +#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0) +#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1) +#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2) +#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3) +#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5) +#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7) +#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1) +#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3) +#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5) +#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7) +#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1) +#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3) +#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5) +#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7) +#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0) +#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1) +#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2) +#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3) +#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5) +#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7) +#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0) +#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1) +#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2) +#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3) +#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5) +#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7) +#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1) +#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3) +#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5) +#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7) +#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0) +#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1) +#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2) +#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3) +#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5) +#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7) +#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1) +#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3) +#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5) +#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7) +#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1) +#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3) +#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5) +#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7) +#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0) +#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1) +#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2) +#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3) +#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5) +#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7) +#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1) +#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2) +#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5) +#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6) +#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0) +#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1) +#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4) +#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5) +#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6) +#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1) +#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5) +#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0) +#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1) +#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4) +#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5) +#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6) +#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0) +#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1) +#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2) +#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3) +#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4) +#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5) +#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6) +#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7) +#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1) +#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5) +#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1) +#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5) +#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0) +#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1) +#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4) +#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5) +#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6) +#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1) +#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2) +#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5) +#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6) +#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0) +#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1) +#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4) +#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5) +#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6) +#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1) +#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5) +#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0) +#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1) +#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4) +#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5) +#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6) +#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0) +#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1) +#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2) +#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3) +#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4) +#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5) +#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6) +#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7) +#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1) +#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5) +#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1) +#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5) +#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0) +#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1) +#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4) +#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5) +#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) + +/* Misc instructions */ +#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) +#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) +#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) +#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) +#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 55b50e1d4a84..b149cf9f91bc 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -335,14 +335,77 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) */ #define MAX_TLBI_OPS PTRS_PER_PTE +/* + * __flush_tlb_range_op - Perform TLBI operation upon a range + * + * @op: TLBI instruction that operates on a range (has 'r' prefix) + * @start: The start address of the range + * @pages: Range as the number of pages from 'start' + * @stride: Flush granularity + * @asid: The ASID of the task (0 for IPA instructions) + * @tlb_level: Translation Table level hint, if known + * @tlbi_user: If 'true', call an additional __tlbi_user() + * (typically for user ASIDs). 'flase' for IPA instructions + * + * When the CPU does not support TLB range operations, flush the TLB + * entries one by one at the granularity of 'stride'. If the TLB + * range ops are supported, then: + * + * 1. If 'pages' is odd, flush the first page through non-range + * operations; + * + * 2. For remaining pages: the minimum range granularity is decided + * by 'scale', so multiple range TLBI operations may be required. + * Start from scale = 0, flush the corresponding number of pages + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it + * until no pages left. + * + * Note that certain ranges can be represented by either num = 31 and + * scale or num = 0 and scale + 1. The loop below favours the latter + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. + */ +#define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user) \ +do { \ + int num = 0; \ + int scale = 0; \ + unsigned long addr; \ + \ + while (pages > 0) { \ + if (!system_supports_tlb_range() || \ + pages % 2 == 1) { \ + addr = __TLBI_VADDR(start, asid); \ + __tlbi_level(op, addr, tlb_level); \ + if (tlbi_user) \ + __tlbi_user_level(op, addr, tlb_level); \ + start += stride; \ + pages -= stride >> PAGE_SHIFT; \ + continue; \ + } \ + \ + num = __TLBI_RANGE_NUM(pages, scale); \ + if (num >= 0) { \ + addr = __TLBI_VADDR_RANGE(start, asid, scale, \ + num, tlb_level); \ + __tlbi(r##op, addr); \ + if (tlbi_user) \ + __tlbi_user(r##op, addr); \ + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + pages -= __TLBI_RANGE_PAGES(num, scale); \ + } \ + scale++; \ + } \ +} while (0) + +#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) { - int num = 0; - int scale = 0; - unsigned long asid, addr, pages; + unsigned long asid, pages; start = round_down(start, stride); end = round_up(end, stride); @@ -364,56 +427,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); asid = ASID(vma->vm_mm); - /* - * When the CPU does not support TLB range operations, flush the TLB - * entries one by one at the granularity of 'stride'. If the TLB - * range ops are supported, then: - * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; - * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. - */ - while (pages > 0) { - if (!system_supports_tlb_range() || - pages % 2 == 1) { - addr = __TLBI_VADDR(start, asid); - if (last_level) { - __tlbi_level(vale1is, addr, tlb_level); - __tlbi_user_level(vale1is, addr, tlb_level); - } else { - __tlbi_level(vae1is, addr, tlb_level); - __tlbi_user_level(vae1is, addr, tlb_level); - } - start += stride; - pages -= stride >> PAGE_SHIFT; - continue; - } - - num = __TLBI_RANGE_NUM(pages, scale); - if (num >= 0) { - addr = __TLBI_VADDR_RANGE(start, asid, scale, - num, tlb_level); - if (last_level) { - __tlbi(rvale1is, addr); - __tlbi_user(rvale1is, addr); - } else { - __tlbi(rvae1is, addr); - __tlbi_user(rvae1is, addr); - } - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; - pages -= __TLBI_RANGE_PAGES(num, scale); - } - scale++; - } + if (last_level) + __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); + else + __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); + dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a5f533f63b60..b018ae12ff5f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2627,6 +2627,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP) }, + { + .desc = "Fine Grained Traps", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_FGT, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP) + }, #ifdef CONFIG_ARM64_SME { .desc = "Scalable Matrix Extension", diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index aee12c75b738..3addc09f8746 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) if (!len) return; - len = strscpy(buf, cmdline, ARRAY_SIZE(buf)); - if (len == -E2BIG) - len = ARRAY_SIZE(buf) - 1; + len = min(len, ARRAY_SIZE(buf) - 1); + memcpy(buf, cmdline, len); + buf[len] = '\0'; if (strcmp(buf, "--") == 0) return; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index f531da6b362e..83c1e09be42e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -25,7 +25,6 @@ menuconfig KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT - select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_XFER_TO_GUEST_WORK @@ -43,6 +42,7 @@ menuconfig KVM select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS select INTERVAL_TREE + select XARRAY_MULTI help Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d1cb298a58a0..4866b3f7b4ea 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -36,6 +36,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> +#include <asm/kvm_nested.h> #include <asm/kvm_pkvm.h> #include <asm/kvm_emulate.h> #include <asm/sections.h> @@ -365,7 +366,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) #endif /* Force users to call KVM_ARM_VCPU_INIT */ - vcpu->arch.target = -1; + vcpu_clear_flag(vcpu, VCPU_INITIALIZED); bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; @@ -462,7 +463,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_ptrauth_disable(vcpu); kvm_arch_vcpu_load_debug_state_flags(vcpu); - if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus)) + if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) vcpu_set_on_unsupported_cpu(vcpu); } @@ -574,7 +575,7 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) { - return vcpu->arch.target >= 0; + return vcpu_get_flag(vcpu, VCPU_INITIALIZED); } /* @@ -803,6 +804,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) kvm_pmu_handle_pmcr(vcpu, __vcpu_sys_reg(vcpu, PMCR_EL0)); + if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu)) + kvm_vcpu_pmu_restore_guest(vcpu); + if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) return kvm_vcpu_suspend(vcpu); @@ -818,6 +822,9 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) if (likely(!vcpu_mode_is_32bit(vcpu))) return false; + if (vcpu_has_nv(vcpu)) + return true; + return !kvm_supports_32bit_el0(); } @@ -1058,7 +1065,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * invalid. The VMM can try and fix it by issuing a * KVM_ARM_VCPU_INIT if it really wants to. */ - vcpu->arch.target = -1; + vcpu_clear_flag(vcpu, VCPU_INITIALIZED); ret = ARM_EXCEPTION_IL; } @@ -1219,8 +1226,7 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu, { unsigned long features = init->features[0]; - return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) || - vcpu->arch.target != init->target; + return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES); } static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu, @@ -1236,20 +1242,18 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu, !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES)) goto out_unlock; - vcpu->arch.target = init->target; bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES); /* Now we know what it is, we can reset it. */ ret = kvm_reset_vcpu(vcpu); if (ret) { - vcpu->arch.target = -1; bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); goto out_unlock; } bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES); set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags); - + vcpu_set_flag(vcpu, VCPU_INITIALIZED); out_unlock: mutex_unlock(&kvm->arch.config_lock); return ret; @@ -1260,14 +1264,15 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, { int ret; - if (init->target != kvm_target_cpu()) + if (init->target != KVM_ARM_TARGET_GENERIC_V8 && + init->target != kvm_target_cpu()) return -EINVAL; ret = kvm_vcpu_init_check_features(vcpu, init); if (ret) return ret; - if (vcpu->arch.target == -1) + if (!kvm_vcpu_initialized(vcpu)) return __kvm_vcpu_set_target(vcpu, init); if (kvm_vcpu_init_changed(vcpu, init)) @@ -1532,12 +1537,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr) { @@ -1595,9 +1594,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } case KVM_ARM_PREFERRED_TARGET: { - struct kvm_vcpu_init init; - - kvm_vcpu_preferred_target(&init); + struct kvm_vcpu_init init = { + .target = KVM_ARM_TARGET_GENERIC_V8, + }; if (copy_to_user(argp, &init, sizeof(init))) return -EFAULT; @@ -2276,30 +2275,8 @@ static int __init init_hyp_mode(void) for_each_possible_cpu(cpu) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); - unsigned long hyp_addr; - /* - * Allocate a contiguous HYP private VA range for the stack - * and guard page. The allocation is also aligned based on - * the order of its size. - */ - err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); - if (err) { - kvm_err("Cannot allocate hyp stack guard page\n"); - goto out_err; - } - - /* - * Since the stack grows downwards, map the stack to the page - * at the higher address and leave the lower guard page - * unbacked. - * - * Any valid stack address now has the PAGE_SHIFT bit as 1 - * and addresses corresponding to the guard page have the - * PAGE_SHIFT bit as 0 - this is used for overflow detection. - */ - err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE, - __pa(stack_page), PAGE_HYP); + err = create_hyp_stack(__pa(stack_page), ¶ms->stack_hyp_va); if (err) { kvm_err("Cannot map hyp stack\n"); goto out_err; @@ -2312,8 +2289,6 @@ static int __init init_hyp_mode(void) * has been mapped in the flexible private VA space. */ params->stack_pa = __pa(stack_page); - - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } for_each_possible_cpu(cpu) { diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index b96662029fb1..9ced1bf0c2b7 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -14,6 +14,1858 @@ #include "trace.h" +enum trap_behaviour { + BEHAVE_HANDLE_LOCALLY = 0, + BEHAVE_FORWARD_READ = BIT(0), + BEHAVE_FORWARD_WRITE = BIT(1), + BEHAVE_FORWARD_ANY = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE, +}; + +struct trap_bits { + const enum vcpu_sysreg index; + const enum trap_behaviour behaviour; + const u64 value; + const u64 mask; +}; + +/* Coarse Grained Trap definitions */ +enum cgt_group_id { + /* Indicates no coarse trap control */ + __RESERVED__, + + /* + * The first batch of IDs denote coarse trapping that are used + * on their own instead of being part of a combination of + * trap controls. + */ + CGT_HCR_TID1, + CGT_HCR_TID2, + CGT_HCR_TID3, + CGT_HCR_IMO, + CGT_HCR_FMO, + CGT_HCR_TIDCP, + CGT_HCR_TACR, + CGT_HCR_TSW, + CGT_HCR_TPC, + CGT_HCR_TPU, + CGT_HCR_TTLB, + CGT_HCR_TVM, + CGT_HCR_TDZ, + CGT_HCR_TRVM, + CGT_HCR_TLOR, + CGT_HCR_TERR, + CGT_HCR_APK, + CGT_HCR_NV, + CGT_HCR_NV_nNV2, + CGT_HCR_NV1_nNV2, + CGT_HCR_AT, + CGT_HCR_nFIEN, + CGT_HCR_TID4, + CGT_HCR_TICAB, + CGT_HCR_TOCU, + CGT_HCR_ENSCXT, + CGT_HCR_TTLBIS, + CGT_HCR_TTLBOS, + + CGT_MDCR_TPMCR, + CGT_MDCR_TPM, + CGT_MDCR_TDE, + CGT_MDCR_TDA, + CGT_MDCR_TDOSA, + CGT_MDCR_TDRA, + CGT_MDCR_E2PB, + CGT_MDCR_TPMS, + CGT_MDCR_TTRF, + CGT_MDCR_E2TB, + CGT_MDCR_TDCC, + + /* + * Anything after this point is a combination of coarse trap + * controls, which must all be evaluated to decide what to do. + */ + __MULTIPLE_CONTROL_BITS__, + CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__, + CGT_HCR_TID2_TID4, + CGT_HCR_TTLB_TTLBIS, + CGT_HCR_TTLB_TTLBOS, + CGT_HCR_TVM_TRVM, + CGT_HCR_TPU_TICAB, + CGT_HCR_TPU_TOCU, + CGT_HCR_NV1_nNV2_ENSCXT, + CGT_MDCR_TPM_TPMCR, + CGT_MDCR_TDE_TDA, + CGT_MDCR_TDE_TDOSA, + CGT_MDCR_TDE_TDRA, + CGT_MDCR_TDCC_TDE_TDA, + + /* + * Anything after this point requires a callback evaluating a + * complex trap condition. Ugly stuff. + */ + __COMPLEX_CONDITIONS__, + CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__, + CGT_CNTHCTL_EL1PTEN, + + /* Must be last */ + __NR_CGT_GROUP_IDS__ +}; + +static const struct trap_bits coarse_trap_bits[] = { + [CGT_HCR_TID1] = { + .index = HCR_EL2, + .value = HCR_TID1, + .mask = HCR_TID1, + .behaviour = BEHAVE_FORWARD_READ, + }, + [CGT_HCR_TID2] = { + .index = HCR_EL2, + .value = HCR_TID2, + .mask = HCR_TID2, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TID3] = { + .index = HCR_EL2, + .value = HCR_TID3, + .mask = HCR_TID3, + .behaviour = BEHAVE_FORWARD_READ, + }, + [CGT_HCR_IMO] = { + .index = HCR_EL2, + .value = HCR_IMO, + .mask = HCR_IMO, + .behaviour = BEHAVE_FORWARD_WRITE, + }, + [CGT_HCR_FMO] = { + .index = HCR_EL2, + .value = HCR_FMO, + .mask = HCR_FMO, + .behaviour = BEHAVE_FORWARD_WRITE, + }, + [CGT_HCR_TIDCP] = { + .index = HCR_EL2, + .value = HCR_TIDCP, + .mask = HCR_TIDCP, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TACR] = { + .index = HCR_EL2, + .value = HCR_TACR, + .mask = HCR_TACR, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TSW] = { + .index = HCR_EL2, + .value = HCR_TSW, + .mask = HCR_TSW, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TPC] = { /* Also called TCPC when FEAT_DPB is implemented */ + .index = HCR_EL2, + .value = HCR_TPC, + .mask = HCR_TPC, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TPU] = { + .index = HCR_EL2, + .value = HCR_TPU, + .mask = HCR_TPU, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TTLB] = { + .index = HCR_EL2, + .value = HCR_TTLB, + .mask = HCR_TTLB, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TVM] = { + .index = HCR_EL2, + .value = HCR_TVM, + .mask = HCR_TVM, + .behaviour = BEHAVE_FORWARD_WRITE, + }, + [CGT_HCR_TDZ] = { + .index = HCR_EL2, + .value = HCR_TDZ, + .mask = HCR_TDZ, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TRVM] = { + .index = HCR_EL2, + .value = HCR_TRVM, + .mask = HCR_TRVM, + .behaviour = BEHAVE_FORWARD_READ, + }, + [CGT_HCR_TLOR] = { + .index = HCR_EL2, + .value = HCR_TLOR, + .mask = HCR_TLOR, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TERR] = { + .index = HCR_EL2, + .value = HCR_TERR, + .mask = HCR_TERR, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_APK] = { + .index = HCR_EL2, + .value = 0, + .mask = HCR_APK, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_NV] = { + .index = HCR_EL2, + .value = HCR_NV, + .mask = HCR_NV, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_NV_nNV2] = { + .index = HCR_EL2, + .value = HCR_NV, + .mask = HCR_NV | HCR_NV2, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_NV1_nNV2] = { + .index = HCR_EL2, + .value = HCR_NV | HCR_NV1, + .mask = HCR_NV | HCR_NV1 | HCR_NV2, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_AT] = { + .index = HCR_EL2, + .value = HCR_AT, + .mask = HCR_AT, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_nFIEN] = { + .index = HCR_EL2, + .value = 0, + .mask = HCR_FIEN, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TID4] = { + .index = HCR_EL2, + .value = HCR_TID4, + .mask = HCR_TID4, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TICAB] = { + .index = HCR_EL2, + .value = HCR_TICAB, + .mask = HCR_TICAB, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TOCU] = { + .index = HCR_EL2, + .value = HCR_TOCU, + .mask = HCR_TOCU, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_ENSCXT] = { + .index = HCR_EL2, + .value = 0, + .mask = HCR_ENSCXT, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TTLBIS] = { + .index = HCR_EL2, + .value = HCR_TTLBIS, + .mask = HCR_TTLBIS, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_HCR_TTLBOS] = { + .index = HCR_EL2, + .value = HCR_TTLBOS, + .mask = HCR_TTLBOS, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TPMCR] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TPMCR, + .mask = MDCR_EL2_TPMCR, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TPM] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TPM, + .mask = MDCR_EL2_TPM, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TDE] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TDE, + .mask = MDCR_EL2_TDE, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TDA] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TDA, + .mask = MDCR_EL2_TDA, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TDOSA] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TDOSA, + .mask = MDCR_EL2_TDOSA, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TDRA] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TDRA, + .mask = MDCR_EL2_TDRA, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_E2PB] = { + .index = MDCR_EL2, + .value = 0, + .mask = BIT(MDCR_EL2_E2PB_SHIFT), + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TPMS] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TPMS, + .mask = MDCR_EL2_TPMS, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TTRF] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TTRF, + .mask = MDCR_EL2_TTRF, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_E2TB] = { + .index = MDCR_EL2, + .value = 0, + .mask = BIT(MDCR_EL2_E2TB_SHIFT), + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_MDCR_TDCC] = { + .index = MDCR_EL2, + .value = MDCR_EL2_TDCC, + .mask = MDCR_EL2_TDCC, + .behaviour = BEHAVE_FORWARD_ANY, + }, +}; + +#define MCB(id, ...) \ + [id - __MULTIPLE_CONTROL_BITS__] = \ + (const enum cgt_group_id[]){ \ + __VA_ARGS__, __RESERVED__ \ + } + +static const enum cgt_group_id *coarse_control_combo[] = { + MCB(CGT_HCR_IMO_FMO, CGT_HCR_IMO, CGT_HCR_FMO), + MCB(CGT_HCR_TID2_TID4, CGT_HCR_TID2, CGT_HCR_TID4), + MCB(CGT_HCR_TTLB_TTLBIS, CGT_HCR_TTLB, CGT_HCR_TTLBIS), + MCB(CGT_HCR_TTLB_TTLBOS, CGT_HCR_TTLB, CGT_HCR_TTLBOS), + MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM), + MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB), + MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU), + MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT), + MCB(CGT_MDCR_TPM_TPMCR, CGT_MDCR_TPM, CGT_MDCR_TPMCR), + MCB(CGT_MDCR_TDE_TDA, CGT_MDCR_TDE, CGT_MDCR_TDA), + MCB(CGT_MDCR_TDE_TDOSA, CGT_MDCR_TDE, CGT_MDCR_TDOSA), + MCB(CGT_MDCR_TDE_TDRA, CGT_MDCR_TDE, CGT_MDCR_TDRA), + MCB(CGT_MDCR_TDCC_TDE_TDA, CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA), +}; + +typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *); + +/* + * Warning, maximum confusion ahead. + * + * When E2H=0, CNTHCTL_EL2[1:0] are defined as EL1PCEN:EL1PCTEN + * When E2H=1, CNTHCTL_EL2[11:10] are defined as EL1PTEN:EL1PCTEN + * + * Note the single letter difference? Yet, the bits have the same + * function despite a different layout and a different name. + * + * We don't try to reconcile this mess. We just use the E2H=0 bits + * to generate something that is in the E2H=1 format, and live with + * it. You're welcome. + */ +static u64 get_sanitized_cnthctl(struct kvm_vcpu *vcpu) +{ + u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; + + return val & ((CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN) << 10); +} + +static enum trap_behaviour check_cnthctl_el1pcten(struct kvm_vcpu *vcpu) +{ + if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCTEN << 10)) + return BEHAVE_HANDLE_LOCALLY; + + return BEHAVE_FORWARD_ANY; +} + +static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu) +{ + if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCEN << 10)) + return BEHAVE_HANDLE_LOCALLY; + + return BEHAVE_FORWARD_ANY; +} + +#define CCC(id, fn) \ + [id - __COMPLEX_CONDITIONS__] = fn + +static const complex_condition_check ccc[] = { + CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten), + CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten), +}; + +/* + * Bit assignment for the trap controls. We use a 64bit word with the + * following layout for each trapped sysreg: + * + * [9:0] enum cgt_group_id (10 bits) + * [13:10] enum fgt_group_id (4 bits) + * [19:14] bit number in the FGT register (6 bits) + * [20] trap polarity (1 bit) + * [25:21] FG filter (5 bits) + * [62:26] Unused (37 bits) + * [63] RES0 - Must be zero, as lost on insertion in the xarray + */ +#define TC_CGT_BITS 10 +#define TC_FGT_BITS 4 +#define TC_FGF_BITS 5 + +union trap_config { + u64 val; + struct { + unsigned long cgt:TC_CGT_BITS; /* Coarse Grained Trap id */ + unsigned long fgt:TC_FGT_BITS; /* Fine Grained Trap id */ + unsigned long bit:6; /* Bit number */ + unsigned long pol:1; /* Polarity */ + unsigned long fgf:TC_FGF_BITS; /* Fine Grained Filter */ + unsigned long unused:37; /* Unused, should be zero */ + unsigned long mbz:1; /* Must Be Zero */ + }; +}; + +struct encoding_to_trap_config { + const u32 encoding; + const u32 end; + const union trap_config tc; + const unsigned int line; +}; + +#define SR_RANGE_TRAP(sr_start, sr_end, trap_id) \ + { \ + .encoding = sr_start, \ + .end = sr_end, \ + .tc = { \ + .cgt = trap_id, \ + }, \ + .line = __LINE__, \ + } + +#define SR_TRAP(sr, trap_id) SR_RANGE_TRAP(sr, sr, trap_id) + +/* + * Map encoding to trap bits for exception reported with EC=0x18. + * These must only be evaluated when running a nested hypervisor, but + * that the current context is not a hypervisor context. When the + * trapped access matches one of the trap controls, the exception is + * re-injected in the nested hypervisor. + */ +static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { + SR_TRAP(SYS_REVIDR_EL1, CGT_HCR_TID1), + SR_TRAP(SYS_AIDR_EL1, CGT_HCR_TID1), + SR_TRAP(SYS_SMIDR_EL1, CGT_HCR_TID1), + SR_TRAP(SYS_CTR_EL0, CGT_HCR_TID2), + SR_TRAP(SYS_CCSIDR_EL1, CGT_HCR_TID2_TID4), + SR_TRAP(SYS_CCSIDR2_EL1, CGT_HCR_TID2_TID4), + SR_TRAP(SYS_CLIDR_EL1, CGT_HCR_TID2_TID4), + SR_TRAP(SYS_CSSELR_EL1, CGT_HCR_TID2_TID4), + SR_RANGE_TRAP(SYS_ID_PFR0_EL1, + sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3), + SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO), + SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO), + SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO), + SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0), + sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0), + sys_reg(3, 1, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 2, 11, 0, 0), + sys_reg(3, 2, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 3, 11, 0, 0), + sys_reg(3, 3, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 4, 11, 0, 0), + sys_reg(3, 4, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 5, 11, 0, 0), + sys_reg(3, 5, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 6, 11, 0, 0), + sys_reg(3, 6, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 7, 11, 0, 0), + sys_reg(3, 7, 11, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 0, 15, 0, 0), + sys_reg(3, 0, 15, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 1, 15, 0, 0), + sys_reg(3, 1, 15, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 2, 15, 0, 0), + sys_reg(3, 2, 15, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 3, 15, 0, 0), + sys_reg(3, 3, 15, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 4, 15, 0, 0), + sys_reg(3, 4, 15, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 5, 15, 0, 0), + sys_reg(3, 5, 15, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 6, 15, 0, 0), + sys_reg(3, 6, 15, 15, 7), CGT_HCR_TIDCP), + SR_RANGE_TRAP(sys_reg(3, 7, 15, 0, 0), + sys_reg(3, 7, 15, 15, 7), CGT_HCR_TIDCP), + SR_TRAP(SYS_ACTLR_EL1, CGT_HCR_TACR), + SR_TRAP(SYS_DC_ISW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_CSW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_CISW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_IGSW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_IGDSW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_CGSW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_CGDSW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_CIGSW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_CIGDSW, CGT_HCR_TSW), + SR_TRAP(SYS_DC_CIVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CVAP, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CVADP, CGT_HCR_TPC), + SR_TRAP(SYS_DC_IVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CIGVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CIGDVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_IGVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_IGDVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CGVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CGDVAC, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CGVAP, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CGDVAP, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CGVADP, CGT_HCR_TPC), + SR_TRAP(SYS_DC_CGDVADP, CGT_HCR_TPC), + SR_TRAP(SYS_IC_IVAU, CGT_HCR_TPU_TOCU), + SR_TRAP(SYS_IC_IALLU, CGT_HCR_TPU_TOCU), + SR_TRAP(SYS_IC_IALLUIS, CGT_HCR_TPU_TICAB), + SR_TRAP(SYS_DC_CVAU, CGT_HCR_TPU_TOCU), + SR_TRAP(OP_TLBI_RVAE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVAAE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVALE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVAALE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VMALLE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VAE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_ASIDE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VAAE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VALE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VAALE1, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVAE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVAAE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVALE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVAALE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VMALLE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VAE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_ASIDE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VAAE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VALE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_VAALE1NXS, CGT_HCR_TTLB), + SR_TRAP(OP_TLBI_RVAE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_RVAAE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_RVALE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_RVAALE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VMALLE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VAE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_ASIDE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VAAE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VALE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VAALE1IS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_RVAE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_RVAAE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_RVALE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_RVAALE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VMALLE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VAE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_ASIDE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VAAE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VALE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VAALE1ISNXS, CGT_HCR_TTLB_TTLBIS), + SR_TRAP(OP_TLBI_VMALLE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VAE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_ASIDE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VAAE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VALE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VAALE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVAE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVAAE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVALE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVAALE1OS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VMALLE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VAE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_ASIDE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VAAE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VALE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_VAALE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVAE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVAAE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVALE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(OP_TLBI_RVAALE1OSNXS, CGT_HCR_TTLB_TTLBOS), + SR_TRAP(SYS_SCTLR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_TTBR0_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_TTBR1_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_TCR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_ESR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_FAR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_AFSR0_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_AFSR1_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_MAIR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_AMAIR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_CONTEXTIDR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_DC_ZVA, CGT_HCR_TDZ), + SR_TRAP(SYS_DC_GVA, CGT_HCR_TDZ), + SR_TRAP(SYS_DC_GZVA, CGT_HCR_TDZ), + SR_TRAP(SYS_LORSA_EL1, CGT_HCR_TLOR), + SR_TRAP(SYS_LOREA_EL1, CGT_HCR_TLOR), + SR_TRAP(SYS_LORN_EL1, CGT_HCR_TLOR), + SR_TRAP(SYS_LORC_EL1, CGT_HCR_TLOR), + SR_TRAP(SYS_LORID_EL1, CGT_HCR_TLOR), + SR_TRAP(SYS_ERRIDR_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERRSELR_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXADDR_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXCTLR_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXFR_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXMISC0_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXMISC1_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXMISC2_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXMISC3_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_ERXSTATUS_EL1, CGT_HCR_TERR), + SR_TRAP(SYS_APIAKEYLO_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APIAKEYHI_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APIBKEYLO_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APIBKEYHI_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APDAKEYLO_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APDAKEYHI_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APDBKEYLO_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APDBKEYHI_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APGAKEYLO_EL1, CGT_HCR_APK), + SR_TRAP(SYS_APGAKEYHI_EL1, CGT_HCR_APK), + /* All _EL2 registers */ + SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0), + sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV), + /* Skip the SP_EL1 encoding... */ + SR_TRAP(SYS_SPSR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_ELR_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1), + sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV), + SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0), + sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV), + /* All _EL02, _EL12 registers */ + SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0), + sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV), + SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0), + sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV), + SR_TRAP(OP_AT_S1E2R, CGT_HCR_NV), + SR_TRAP(OP_AT_S1E2W, CGT_HCR_NV), + SR_TRAP(OP_AT_S12E1R, CGT_HCR_NV), + SR_TRAP(OP_AT_S12E1W, CGT_HCR_NV), + SR_TRAP(OP_AT_S12E0R, CGT_HCR_NV), + SR_TRAP(OP_AT_S12E0W, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2E1, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2E1, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2LE1, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2LE1, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVAE2, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVALE2, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE2, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VAE2, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE1, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VALE2, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VMALLS12E1, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2E1NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2E1NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2LE1NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2LE1NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVAE2NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVALE2NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE2NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VAE2NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE1NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VALE2NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VMALLS12E1NXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2E1IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2E1IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2LE1IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2LE1IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVAE2IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVALE2IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE2IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VAE2IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE1IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VALE2IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VMALLS12E1IS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2E1ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2E1ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2LE1ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2LE1ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVAE2ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVALE2ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE2ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VAE2ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE1ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VALE2ISNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VMALLS12E1ISNXS,CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE2OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VAE2OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE1OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VALE2OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VMALLS12E1OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2E1OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2E1OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2LE1OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2LE1OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVAE2OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVALE2OS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE2OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VAE2OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_ALLE1OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VALE2OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_VMALLS12E1OSNXS,CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2E1OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2E1OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_IPAS2LE1OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RIPAS2LE1OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVAE2OSNXS, CGT_HCR_NV), + SR_TRAP(OP_TLBI_RVALE2OSNXS, CGT_HCR_NV), + SR_TRAP(OP_CPP_RCTX, CGT_HCR_NV), + SR_TRAP(OP_DVP_RCTX, CGT_HCR_NV), + SR_TRAP(OP_CFP_RCTX, CGT_HCR_NV), + SR_TRAP(SYS_SP_EL1, CGT_HCR_NV_nNV2), + SR_TRAP(SYS_VBAR_EL1, CGT_HCR_NV1_nNV2), + SR_TRAP(SYS_ELR_EL1, CGT_HCR_NV1_nNV2), + SR_TRAP(SYS_SPSR_EL1, CGT_HCR_NV1_nNV2), + SR_TRAP(SYS_SCXTNUM_EL1, CGT_HCR_NV1_nNV2_ENSCXT), + SR_TRAP(SYS_SCXTNUM_EL0, CGT_HCR_ENSCXT), + SR_TRAP(OP_AT_S1E1R, CGT_HCR_AT), + SR_TRAP(OP_AT_S1E1W, CGT_HCR_AT), + SR_TRAP(OP_AT_S1E0R, CGT_HCR_AT), + SR_TRAP(OP_AT_S1E0W, CGT_HCR_AT), + SR_TRAP(OP_AT_S1E1RP, CGT_HCR_AT), + SR_TRAP(OP_AT_S1E1WP, CGT_HCR_AT), + SR_TRAP(SYS_ERXPFGF_EL1, CGT_HCR_nFIEN), + SR_TRAP(SYS_ERXPFGCTL_EL1, CGT_HCR_nFIEN), + SR_TRAP(SYS_ERXPFGCDN_EL1, CGT_HCR_nFIEN), + SR_TRAP(SYS_PMCR_EL0, CGT_MDCR_TPM_TPMCR), + SR_TRAP(SYS_PMCNTENSET_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMCNTENCLR_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMOVSSET_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMOVSCLR_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMCEID0_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMCEID1_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMXEVTYPER_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMSWINC_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMSELR_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMXEVCNTR_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMCCNTR_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMUSERENR_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_PMINTENSET_EL1, CGT_MDCR_TPM), + SR_TRAP(SYS_PMINTENCLR_EL1, CGT_MDCR_TPM), + SR_TRAP(SYS_PMMIR_EL1, CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(0), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(1), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(2), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(3), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(4), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(5), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(6), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(7), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(8), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(9), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(10), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(11), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(12), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(13), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(14), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(15), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(16), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(17), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(18), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(19), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(20), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(21), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(22), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(23), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(24), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(25), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(26), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(27), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(28), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(29), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVCNTRn_EL0(30), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(0), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(1), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(2), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(3), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(4), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(5), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(6), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(7), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(8), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(9), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(10), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(11), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(12), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(13), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(14), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(15), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(16), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(17), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(18), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(19), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(20), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(21), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(22), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(23), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(24), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(25), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(26), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(27), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(28), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(29), CGT_MDCR_TPM), + SR_TRAP(SYS_PMEVTYPERn_EL0(30), CGT_MDCR_TPM), + SR_TRAP(SYS_PMCCFILTR_EL0, CGT_MDCR_TPM), + SR_TRAP(SYS_MDCCSR_EL0, CGT_MDCR_TDCC_TDE_TDA), + SR_TRAP(SYS_MDCCINT_EL1, CGT_MDCR_TDCC_TDE_TDA), + SR_TRAP(SYS_OSDTRRX_EL1, CGT_MDCR_TDCC_TDE_TDA), + SR_TRAP(SYS_OSDTRTX_EL1, CGT_MDCR_TDCC_TDE_TDA), + SR_TRAP(SYS_DBGDTR_EL0, CGT_MDCR_TDCC_TDE_TDA), + /* + * Also covers DBGDTRRX_EL0, which has the same encoding as + * SYS_DBGDTRTX_EL0... + */ + SR_TRAP(SYS_DBGDTRTX_EL0, CGT_MDCR_TDCC_TDE_TDA), + SR_TRAP(SYS_MDSCR_EL1, CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_OSECCR_EL1, CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(0), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(1), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(2), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(3), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(4), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(5), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(6), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(7), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(8), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(9), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(10), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(11), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(12), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(13), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(14), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBVRn_EL1(15), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(0), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(1), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(2), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(3), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(4), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(5), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(6), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(7), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(8), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(9), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(10), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(11), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(12), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(13), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(14), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGBCRn_EL1(15), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(0), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(1), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(2), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(3), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(4), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(5), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(6), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(7), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(8), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(9), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(10), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(11), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(12), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(13), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(14), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWVRn_EL1(15), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(0), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(1), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(2), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(3), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(4), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(5), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(6), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(7), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(8), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(9), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(10), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(11), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(12), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(13), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(14), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGCLAIMSET_EL1, CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGCLAIMCLR_EL1, CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGAUTHSTATUS_EL1, CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_OSLAR_EL1, CGT_MDCR_TDE_TDOSA), + SR_TRAP(SYS_OSLSR_EL1, CGT_MDCR_TDE_TDOSA), + SR_TRAP(SYS_OSDLR_EL1, CGT_MDCR_TDE_TDOSA), + SR_TRAP(SYS_DBGPRCR_EL1, CGT_MDCR_TDE_TDOSA), + SR_TRAP(SYS_MDRAR_EL1, CGT_MDCR_TDE_TDRA), + SR_TRAP(SYS_PMBLIMITR_EL1, CGT_MDCR_E2PB), + SR_TRAP(SYS_PMBPTR_EL1, CGT_MDCR_E2PB), + SR_TRAP(SYS_PMBSR_EL1, CGT_MDCR_E2PB), + SR_TRAP(SYS_PMSCR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSEVFR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSFCR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSICR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSIDR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSIRR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSLATFR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSNEVFR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_TRFCR_EL1, CGT_MDCR_TTRF), + SR_TRAP(SYS_TRBBASER_EL1, CGT_MDCR_E2TB), + SR_TRAP(SYS_TRBLIMITR_EL1, CGT_MDCR_E2TB), + SR_TRAP(SYS_TRBMAR_EL1, CGT_MDCR_E2TB), + SR_TRAP(SYS_TRBPTR_EL1, CGT_MDCR_E2TB), + SR_TRAP(SYS_TRBSR_EL1, CGT_MDCR_E2TB), + SR_TRAP(SYS_TRBTRG_EL1, CGT_MDCR_E2TB), + SR_TRAP(SYS_CNTP_TVAL_EL0, CGT_CNTHCTL_EL1PTEN), + SR_TRAP(SYS_CNTP_CVAL_EL0, CGT_CNTHCTL_EL1PTEN), + SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN), + SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN), + SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN), +}; + +static DEFINE_XARRAY(sr_forward_xa); + +enum fgt_group_id { + __NO_FGT_GROUP__, + HFGxTR_GROUP, + HDFGRTR_GROUP, + HDFGWTR_GROUP, + HFGITR_GROUP, + + /* Must be last */ + __NR_FGT_GROUP_IDS__ +}; + +enum fg_filter_id { + __NO_FGF__, + HCRX_FGTnXS, + + /* Must be last */ + __NR_FG_FILTER_IDS__ +}; + +#define SR_FGF(sr, g, b, p, f) \ + { \ + .encoding = sr, \ + .end = sr, \ + .tc = { \ + .fgt = g ## _GROUP, \ + .bit = g ## _EL2_ ## b ## _SHIFT, \ + .pol = p, \ + .fgf = f, \ + }, \ + .line = __LINE__, \ + } + +#define SR_FGT(sr, g, b, p) SR_FGF(sr, g, b, p, __NO_FGF__) + +static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { + /* HFGRTR_EL2, HFGWTR_EL2 */ + SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0), + SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0), + SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0), + SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1), + SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1), + SR_FGT(SYS_ERXPFGCTL_EL1, HFGxTR, ERXPFGCTL_EL1, 1), + SR_FGT(SYS_ERXPFGF_EL1, HFGxTR, ERXPFGF_EL1, 1), + SR_FGT(SYS_ERXMISC0_EL1, HFGxTR, ERXMISCn_EL1, 1), + SR_FGT(SYS_ERXMISC1_EL1, HFGxTR, ERXMISCn_EL1, 1), + SR_FGT(SYS_ERXMISC2_EL1, HFGxTR, ERXMISCn_EL1, 1), + SR_FGT(SYS_ERXMISC3_EL1, HFGxTR, ERXMISCn_EL1, 1), + SR_FGT(SYS_ERXSTATUS_EL1, HFGxTR, ERXSTATUS_EL1, 1), + SR_FGT(SYS_ERXCTLR_EL1, HFGxTR, ERXCTLR_EL1, 1), + SR_FGT(SYS_ERXFR_EL1, HFGxTR, ERXFR_EL1, 1), + SR_FGT(SYS_ERRSELR_EL1, HFGxTR, ERRSELR_EL1, 1), + SR_FGT(SYS_ERRIDR_EL1, HFGxTR, ERRIDR_EL1, 1), + SR_FGT(SYS_ICC_IGRPEN0_EL1, HFGxTR, ICC_IGRPENn_EL1, 1), + SR_FGT(SYS_ICC_IGRPEN1_EL1, HFGxTR, ICC_IGRPENn_EL1, 1), + SR_FGT(SYS_VBAR_EL1, HFGxTR, VBAR_EL1, 1), + SR_FGT(SYS_TTBR1_EL1, HFGxTR, TTBR1_EL1, 1), + SR_FGT(SYS_TTBR0_EL1, HFGxTR, TTBR0_EL1, 1), + SR_FGT(SYS_TPIDR_EL0, HFGxTR, TPIDR_EL0, 1), + SR_FGT(SYS_TPIDRRO_EL0, HFGxTR, TPIDRRO_EL0, 1), + SR_FGT(SYS_TPIDR_EL1, HFGxTR, TPIDR_EL1, 1), + SR_FGT(SYS_TCR_EL1, HFGxTR, TCR_EL1, 1), + SR_FGT(SYS_SCXTNUM_EL0, HFGxTR, SCXTNUM_EL0, 1), + SR_FGT(SYS_SCXTNUM_EL1, HFGxTR, SCXTNUM_EL1, 1), + SR_FGT(SYS_SCTLR_EL1, HFGxTR, SCTLR_EL1, 1), + SR_FGT(SYS_REVIDR_EL1, HFGxTR, REVIDR_EL1, 1), + SR_FGT(SYS_PAR_EL1, HFGxTR, PAR_EL1, 1), + SR_FGT(SYS_MPIDR_EL1, HFGxTR, MPIDR_EL1, 1), + SR_FGT(SYS_MIDR_EL1, HFGxTR, MIDR_EL1, 1), + SR_FGT(SYS_MAIR_EL1, HFGxTR, MAIR_EL1, 1), + SR_FGT(SYS_LORSA_EL1, HFGxTR, LORSA_EL1, 1), + SR_FGT(SYS_LORN_EL1, HFGxTR, LORN_EL1, 1), + SR_FGT(SYS_LORID_EL1, HFGxTR, LORID_EL1, 1), + SR_FGT(SYS_LOREA_EL1, HFGxTR, LOREA_EL1, 1), + SR_FGT(SYS_LORC_EL1, HFGxTR, LORC_EL1, 1), + SR_FGT(SYS_ISR_EL1, HFGxTR, ISR_EL1, 1), + SR_FGT(SYS_FAR_EL1, HFGxTR, FAR_EL1, 1), + SR_FGT(SYS_ESR_EL1, HFGxTR, ESR_EL1, 1), + SR_FGT(SYS_DCZID_EL0, HFGxTR, DCZID_EL0, 1), + SR_FGT(SYS_CTR_EL0, HFGxTR, CTR_EL0, 1), + SR_FGT(SYS_CSSELR_EL1, HFGxTR, CSSELR_EL1, 1), + SR_FGT(SYS_CPACR_EL1, HFGxTR, CPACR_EL1, 1), + SR_FGT(SYS_CONTEXTIDR_EL1, HFGxTR, CONTEXTIDR_EL1, 1), + SR_FGT(SYS_CLIDR_EL1, HFGxTR, CLIDR_EL1, 1), + SR_FGT(SYS_CCSIDR_EL1, HFGxTR, CCSIDR_EL1, 1), + SR_FGT(SYS_APIBKEYLO_EL1, HFGxTR, APIBKey, 1), + SR_FGT(SYS_APIBKEYHI_EL1, HFGxTR, APIBKey, 1), + SR_FGT(SYS_APIAKEYLO_EL1, HFGxTR, APIAKey, 1), + SR_FGT(SYS_APIAKEYHI_EL1, HFGxTR, APIAKey, 1), + SR_FGT(SYS_APGAKEYLO_EL1, HFGxTR, APGAKey, 1), + SR_FGT(SYS_APGAKEYHI_EL1, HFGxTR, APGAKey, 1), + SR_FGT(SYS_APDBKEYLO_EL1, HFGxTR, APDBKey, 1), + SR_FGT(SYS_APDBKEYHI_EL1, HFGxTR, APDBKey, 1), + SR_FGT(SYS_APDAKEYLO_EL1, HFGxTR, APDAKey, 1), + SR_FGT(SYS_APDAKEYHI_EL1, HFGxTR, APDAKey, 1), + SR_FGT(SYS_AMAIR_EL1, HFGxTR, AMAIR_EL1, 1), + SR_FGT(SYS_AIDR_EL1, HFGxTR, AIDR_EL1, 1), + SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1), + SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1), + /* HFGITR_EL2 */ + SR_FGT(OP_BRB_IALL, HFGITR, nBRBIALL, 0), + SR_FGT(OP_BRB_INJ, HFGITR, nBRBINJ, 0), + SR_FGT(SYS_DC_CVAC, HFGITR, DCCVAC, 1), + SR_FGT(SYS_DC_CGVAC, HFGITR, DCCVAC, 1), + SR_FGT(SYS_DC_CGDVAC, HFGITR, DCCVAC, 1), + SR_FGT(OP_CPP_RCTX, HFGITR, CPPRCTX, 1), + SR_FGT(OP_DVP_RCTX, HFGITR, DVPRCTX, 1), + SR_FGT(OP_CFP_RCTX, HFGITR, CFPRCTX, 1), + SR_FGT(OP_TLBI_VAALE1, HFGITR, TLBIVAALE1, 1), + SR_FGT(OP_TLBI_VALE1, HFGITR, TLBIVALE1, 1), + SR_FGT(OP_TLBI_VAAE1, HFGITR, TLBIVAAE1, 1), + SR_FGT(OP_TLBI_ASIDE1, HFGITR, TLBIASIDE1, 1), + SR_FGT(OP_TLBI_VAE1, HFGITR, TLBIVAE1, 1), + SR_FGT(OP_TLBI_VMALLE1, HFGITR, TLBIVMALLE1, 1), + SR_FGT(OP_TLBI_RVAALE1, HFGITR, TLBIRVAALE1, 1), + SR_FGT(OP_TLBI_RVALE1, HFGITR, TLBIRVALE1, 1), + SR_FGT(OP_TLBI_RVAAE1, HFGITR, TLBIRVAAE1, 1), + SR_FGT(OP_TLBI_RVAE1, HFGITR, TLBIRVAE1, 1), + SR_FGT(OP_TLBI_RVAALE1IS, HFGITR, TLBIRVAALE1IS, 1), + SR_FGT(OP_TLBI_RVALE1IS, HFGITR, TLBIRVALE1IS, 1), + SR_FGT(OP_TLBI_RVAAE1IS, HFGITR, TLBIRVAAE1IS, 1), + SR_FGT(OP_TLBI_RVAE1IS, HFGITR, TLBIRVAE1IS, 1), + SR_FGT(OP_TLBI_VAALE1IS, HFGITR, TLBIVAALE1IS, 1), + SR_FGT(OP_TLBI_VALE1IS, HFGITR, TLBIVALE1IS, 1), + SR_FGT(OP_TLBI_VAAE1IS, HFGITR, TLBIVAAE1IS, 1), + SR_FGT(OP_TLBI_ASIDE1IS, HFGITR, TLBIASIDE1IS, 1), + SR_FGT(OP_TLBI_VAE1IS, HFGITR, TLBIVAE1IS, 1), + SR_FGT(OP_TLBI_VMALLE1IS, HFGITR, TLBIVMALLE1IS, 1), + SR_FGT(OP_TLBI_RVAALE1OS, HFGITR, TLBIRVAALE1OS, 1), + SR_FGT(OP_TLBI_RVALE1OS, HFGITR, TLBIRVALE1OS, 1), + SR_FGT(OP_TLBI_RVAAE1OS, HFGITR, TLBIRVAAE1OS, 1), + SR_FGT(OP_TLBI_RVAE1OS, HFGITR, TLBIRVAE1OS, 1), + SR_FGT(OP_TLBI_VAALE1OS, HFGITR, TLBIVAALE1OS, 1), + SR_FGT(OP_TLBI_VALE1OS, HFGITR, TLBIVALE1OS, 1), + SR_FGT(OP_TLBI_VAAE1OS, HFGITR, TLBIVAAE1OS, 1), + SR_FGT(OP_TLBI_ASIDE1OS, HFGITR, TLBIASIDE1OS, 1), + SR_FGT(OP_TLBI_VAE1OS, HFGITR, TLBIVAE1OS, 1), + SR_FGT(OP_TLBI_VMALLE1OS, HFGITR, TLBIVMALLE1OS, 1), + /* nXS variants must be checked against HCRX_EL2.FGTnXS */ + SR_FGF(OP_TLBI_VAALE1NXS, HFGITR, TLBIVAALE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VALE1NXS, HFGITR, TLBIVALE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAAE1NXS, HFGITR, TLBIVAAE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_ASIDE1NXS, HFGITR, TLBIASIDE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAE1NXS, HFGITR, TLBIVAE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VMALLE1NXS, HFGITR, TLBIVMALLE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAALE1NXS, HFGITR, TLBIRVAALE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVALE1NXS, HFGITR, TLBIRVALE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAAE1NXS, HFGITR, TLBIRVAAE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAE1NXS, HFGITR, TLBIRVAE1, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAALE1ISNXS, HFGITR, TLBIRVAALE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVALE1ISNXS, HFGITR, TLBIRVALE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAAE1ISNXS, HFGITR, TLBIRVAAE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAE1ISNXS, HFGITR, TLBIRVAE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAALE1ISNXS, HFGITR, TLBIVAALE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VALE1ISNXS, HFGITR, TLBIVALE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAAE1ISNXS, HFGITR, TLBIVAAE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_ASIDE1ISNXS, HFGITR, TLBIASIDE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAE1ISNXS, HFGITR, TLBIVAE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VMALLE1ISNXS, HFGITR, TLBIVMALLE1IS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAALE1OSNXS, HFGITR, TLBIRVAALE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVALE1OSNXS, HFGITR, TLBIRVALE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAAE1OSNXS, HFGITR, TLBIRVAAE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_RVAE1OSNXS, HFGITR, TLBIRVAE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAALE1OSNXS, HFGITR, TLBIVAALE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VALE1OSNXS, HFGITR, TLBIVALE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAAE1OSNXS, HFGITR, TLBIVAAE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_ASIDE1OSNXS, HFGITR, TLBIASIDE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VAE1OSNXS, HFGITR, TLBIVAE1OS, 1, HCRX_FGTnXS), + SR_FGF(OP_TLBI_VMALLE1OSNXS, HFGITR, TLBIVMALLE1OS, 1, HCRX_FGTnXS), + SR_FGT(OP_AT_S1E1WP, HFGITR, ATS1E1WP, 1), + SR_FGT(OP_AT_S1E1RP, HFGITR, ATS1E1RP, 1), + SR_FGT(OP_AT_S1E0W, HFGITR, ATS1E0W, 1), + SR_FGT(OP_AT_S1E0R, HFGITR, ATS1E0R, 1), + SR_FGT(OP_AT_S1E1W, HFGITR, ATS1E1W, 1), + SR_FGT(OP_AT_S1E1R, HFGITR, ATS1E1R, 1), + SR_FGT(SYS_DC_ZVA, HFGITR, DCZVA, 1), + SR_FGT(SYS_DC_GVA, HFGITR, DCZVA, 1), + SR_FGT(SYS_DC_GZVA, HFGITR, DCZVA, 1), + SR_FGT(SYS_DC_CIVAC, HFGITR, DCCIVAC, 1), + SR_FGT(SYS_DC_CIGVAC, HFGITR, DCCIVAC, 1), + SR_FGT(SYS_DC_CIGDVAC, HFGITR, DCCIVAC, 1), + SR_FGT(SYS_DC_CVADP, HFGITR, DCCVADP, 1), + SR_FGT(SYS_DC_CGVADP, HFGITR, DCCVADP, 1), + SR_FGT(SYS_DC_CGDVADP, HFGITR, DCCVADP, 1), + SR_FGT(SYS_DC_CVAP, HFGITR, DCCVAP, 1), + SR_FGT(SYS_DC_CGVAP, HFGITR, DCCVAP, 1), + SR_FGT(SYS_DC_CGDVAP, HFGITR, DCCVAP, 1), + SR_FGT(SYS_DC_CVAU, HFGITR, DCCVAU, 1), + SR_FGT(SYS_DC_CISW, HFGITR, DCCISW, 1), + SR_FGT(SYS_DC_CIGSW, HFGITR, DCCISW, 1), + SR_FGT(SYS_DC_CIGDSW, HFGITR, DCCISW, 1), + SR_FGT(SYS_DC_CSW, HFGITR, DCCSW, 1), + SR_FGT(SYS_DC_CGSW, HFGITR, DCCSW, 1), + SR_FGT(SYS_DC_CGDSW, HFGITR, DCCSW, 1), + SR_FGT(SYS_DC_ISW, HFGITR, DCISW, 1), + SR_FGT(SYS_DC_IGSW, HFGITR, DCISW, 1), + SR_FGT(SYS_DC_IGDSW, HFGITR, DCISW, 1), + SR_FGT(SYS_DC_IVAC, HFGITR, DCIVAC, 1), + SR_FGT(SYS_DC_IGVAC, HFGITR, DCIVAC, 1), + SR_FGT(SYS_DC_IGDVAC, HFGITR, DCIVAC, 1), + SR_FGT(SYS_IC_IVAU, HFGITR, ICIVAU, 1), + SR_FGT(SYS_IC_IALLU, HFGITR, ICIALLU, 1), + SR_FGT(SYS_IC_IALLUIS, HFGITR, ICIALLUIS, 1), + /* HDFGRTR_EL2 */ + SR_FGT(SYS_PMBIDR_EL1, HDFGRTR, PMBIDR_EL1, 1), + SR_FGT(SYS_PMSNEVFR_EL1, HDFGRTR, nPMSNEVFR_EL1, 0), + SR_FGT(SYS_BRBINF_EL1(0), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(1), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(2), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(3), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(4), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(5), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(6), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(7), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(8), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(9), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(10), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(11), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(12), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(13), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(14), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(15), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(16), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(17), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(18), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(19), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(20), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(21), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(22), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(23), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(24), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(25), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(26), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(27), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(28), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(29), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(30), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINF_EL1(31), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBINFINJ_EL1, HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(0), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(1), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(2), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(3), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(4), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(5), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(6), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(7), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(8), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(9), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(10), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(11), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(12), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(13), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(14), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(15), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(16), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(17), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(18), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(19), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(20), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(21), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(22), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(23), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(24), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(25), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(26), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(27), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(28), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(29), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(30), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRC_EL1(31), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBSRCINJ_EL1, HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(0), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(1), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(2), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(3), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(4), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(5), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(6), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(7), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(8), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(9), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(10), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(11), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(12), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(13), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(14), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(15), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(16), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(17), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(18), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(19), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(20), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(21), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(22), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(23), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(24), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(25), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(26), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(27), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(28), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(29), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(30), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGT_EL1(31), HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTGTINJ_EL1, HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBTS_EL1, HDFGRTR, nBRBDATA, 0), + SR_FGT(SYS_BRBCR_EL1, HDFGRTR, nBRBCTL, 0), + SR_FGT(SYS_BRBFCR_EL1, HDFGRTR, nBRBCTL, 0), + SR_FGT(SYS_BRBIDR0_EL1, HDFGRTR, nBRBIDR, 0), + SR_FGT(SYS_PMCEID0_EL0, HDFGRTR, PMCEIDn_EL0, 1), + SR_FGT(SYS_PMCEID1_EL0, HDFGRTR, PMCEIDn_EL0, 1), + SR_FGT(SYS_PMUSERENR_EL0, HDFGRTR, PMUSERENR_EL0, 1), + SR_FGT(SYS_TRBTRG_EL1, HDFGRTR, TRBTRG_EL1, 1), + SR_FGT(SYS_TRBSR_EL1, HDFGRTR, TRBSR_EL1, 1), + SR_FGT(SYS_TRBPTR_EL1, HDFGRTR, TRBPTR_EL1, 1), + SR_FGT(SYS_TRBMAR_EL1, HDFGRTR, TRBMAR_EL1, 1), + SR_FGT(SYS_TRBLIMITR_EL1, HDFGRTR, TRBLIMITR_EL1, 1), + SR_FGT(SYS_TRBIDR_EL1, HDFGRTR, TRBIDR_EL1, 1), + SR_FGT(SYS_TRBBASER_EL1, HDFGRTR, TRBBASER_EL1, 1), + SR_FGT(SYS_TRCVICTLR, HDFGRTR, TRCVICTLR, 1), + SR_FGT(SYS_TRCSTATR, HDFGRTR, TRCSTATR, 1), + SR_FGT(SYS_TRCSSCSR(0), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSSCSR(1), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSSCSR(2), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSSCSR(3), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSSCSR(4), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSSCSR(5), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSSCSR(6), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSSCSR(7), HDFGRTR, TRCSSCSRn, 1), + SR_FGT(SYS_TRCSEQSTR, HDFGRTR, TRCSEQSTR, 1), + SR_FGT(SYS_TRCPRGCTLR, HDFGRTR, TRCPRGCTLR, 1), + SR_FGT(SYS_TRCOSLSR, HDFGRTR, TRCOSLSR, 1), + SR_FGT(SYS_TRCIMSPEC(0), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCIMSPEC(1), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCIMSPEC(2), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCIMSPEC(3), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCIMSPEC(4), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCIMSPEC(5), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCIMSPEC(6), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCIMSPEC(7), HDFGRTR, TRCIMSPECn, 1), + SR_FGT(SYS_TRCDEVARCH, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCDEVID, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR0, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR1, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR2, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR3, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR4, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR5, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR6, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR7, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR8, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR9, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR10, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR11, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR12, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCIDR13, HDFGRTR, TRCID, 1), + SR_FGT(SYS_TRCCNTVR(0), HDFGRTR, TRCCNTVRn, 1), + SR_FGT(SYS_TRCCNTVR(1), HDFGRTR, TRCCNTVRn, 1), + SR_FGT(SYS_TRCCNTVR(2), HDFGRTR, TRCCNTVRn, 1), + SR_FGT(SYS_TRCCNTVR(3), HDFGRTR, TRCCNTVRn, 1), + SR_FGT(SYS_TRCCLAIMCLR, HDFGRTR, TRCCLAIM, 1), + SR_FGT(SYS_TRCCLAIMSET, HDFGRTR, TRCCLAIM, 1), + SR_FGT(SYS_TRCAUXCTLR, HDFGRTR, TRCAUXCTLR, 1), + SR_FGT(SYS_TRCAUTHSTATUS, HDFGRTR, TRCAUTHSTATUS, 1), + SR_FGT(SYS_TRCACATR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(4), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(5), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(6), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(7), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(8), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(9), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(10), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(11), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(12), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(13), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(14), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACATR(15), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(4), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(5), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(6), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(7), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(8), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(9), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(10), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(11), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(12), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(13), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(14), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCACVR(15), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCBBCTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCCCTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCCTLR0, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCCTLR1, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(4), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(5), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(6), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCIDCVR(7), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTCTLR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTCTLR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTCTLR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTCTLR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTRLDVR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTRLDVR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTRLDVR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCNTRLDVR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCCONFIGR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCEVENTCTL0R, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCEVENTCTL1R, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCEXTINSELR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCEXTINSELR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCEXTINSELR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCEXTINSELR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCQCTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(4), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(5), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(6), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(7), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(8), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(9), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(10), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(11), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(12), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(13), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(14), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(15), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(16), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(17), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(18), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(19), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(20), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(21), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(22), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(23), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(24), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(25), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(26), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(27), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(28), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(29), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(30), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSCTLR(31), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCRSR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSEQEVR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSEQEVR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSEQEVR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSEQRSTEVR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(4), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(5), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(6), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSCCR(7), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(4), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(5), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(6), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSSPCICR(7), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSTALLCTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCSYNCPR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCTRACEIDR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCTSCTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVIIECTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVIPCSSCTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVISSCTLR, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCCTLR0, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCCTLR1, HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(0), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(1), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(2), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(3), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(4), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(5), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(6), HDFGRTR, TRC, 1), + SR_FGT(SYS_TRCVMIDCVR(7), HDFGRTR, TRC, 1), + SR_FGT(SYS_PMSLATFR_EL1, HDFGRTR, PMSLATFR_EL1, 1), + SR_FGT(SYS_PMSIRR_EL1, HDFGRTR, PMSIRR_EL1, 1), + SR_FGT(SYS_PMSIDR_EL1, HDFGRTR, PMSIDR_EL1, 1), + SR_FGT(SYS_PMSICR_EL1, HDFGRTR, PMSICR_EL1, 1), + SR_FGT(SYS_PMSFCR_EL1, HDFGRTR, PMSFCR_EL1, 1), + SR_FGT(SYS_PMSEVFR_EL1, HDFGRTR, PMSEVFR_EL1, 1), + SR_FGT(SYS_PMSCR_EL1, HDFGRTR, PMSCR_EL1, 1), + SR_FGT(SYS_PMBSR_EL1, HDFGRTR, PMBSR_EL1, 1), + SR_FGT(SYS_PMBPTR_EL1, HDFGRTR, PMBPTR_EL1, 1), + SR_FGT(SYS_PMBLIMITR_EL1, HDFGRTR, PMBLIMITR_EL1, 1), + SR_FGT(SYS_PMMIR_EL1, HDFGRTR, PMMIR_EL1, 1), + SR_FGT(SYS_PMSELR_EL0, HDFGRTR, PMSELR_EL0, 1), + SR_FGT(SYS_PMOVSCLR_EL0, HDFGRTR, PMOVS, 1), + SR_FGT(SYS_PMOVSSET_EL0, HDFGRTR, PMOVS, 1), + SR_FGT(SYS_PMINTENCLR_EL1, HDFGRTR, PMINTEN, 1), + SR_FGT(SYS_PMINTENSET_EL1, HDFGRTR, PMINTEN, 1), + SR_FGT(SYS_PMCNTENCLR_EL0, HDFGRTR, PMCNTEN, 1), + SR_FGT(SYS_PMCNTENSET_EL0, HDFGRTR, PMCNTEN, 1), + SR_FGT(SYS_PMCCNTR_EL0, HDFGRTR, PMCCNTR_EL0, 1), + SR_FGT(SYS_PMCCFILTR_EL0, HDFGRTR, PMCCFILTR_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(0), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(1), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(2), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(3), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(4), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(5), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(6), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(7), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(8), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(9), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(10), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(11), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(12), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(13), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(14), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(15), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(16), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(17), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(18), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(19), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(20), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(21), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(22), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(23), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(24), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(25), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(26), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(27), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(28), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(29), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVTYPERn_EL0(30), HDFGRTR, PMEVTYPERn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(0), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(1), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(2), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(3), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(4), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(5), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(6), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(7), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(8), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(9), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(10), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(11), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(12), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(13), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(14), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(15), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(16), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(17), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(18), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(19), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(20), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(21), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(22), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(23), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(24), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(25), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(26), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(27), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(28), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(29), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_PMEVCNTRn_EL0(30), HDFGRTR, PMEVCNTRn_EL0, 1), + SR_FGT(SYS_OSDLR_EL1, HDFGRTR, OSDLR_EL1, 1), + SR_FGT(SYS_OSECCR_EL1, HDFGRTR, OSECCR_EL1, 1), + SR_FGT(SYS_OSLSR_EL1, HDFGRTR, OSLSR_EL1, 1), + SR_FGT(SYS_DBGPRCR_EL1, HDFGRTR, DBGPRCR_EL1, 1), + SR_FGT(SYS_DBGAUTHSTATUS_EL1, HDFGRTR, DBGAUTHSTATUS_EL1, 1), + SR_FGT(SYS_DBGCLAIMSET_EL1, HDFGRTR, DBGCLAIM, 1), + SR_FGT(SYS_DBGCLAIMCLR_EL1, HDFGRTR, DBGCLAIM, 1), + SR_FGT(SYS_MDSCR_EL1, HDFGRTR, MDSCR_EL1, 1), + /* + * The trap bits capture *64* debug registers per bit, but the + * ARM ARM only describes the encoding for the first 16, and + * we don't really support more than that anyway. + */ + SR_FGT(SYS_DBGWVRn_EL1(0), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(1), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(2), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(3), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(4), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(5), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(6), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(7), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(8), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(9), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(10), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(11), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(12), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(13), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(14), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWVRn_EL1(15), HDFGRTR, DBGWVRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(0), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(1), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(2), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(3), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(4), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(5), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(6), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(7), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(8), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(9), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(10), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(11), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(12), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(13), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(14), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGWCRn_EL1(15), HDFGRTR, DBGWCRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(0), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(1), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(2), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(3), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(4), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(5), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(6), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(7), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(8), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(9), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(10), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(11), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(12), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(13), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(14), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBVRn_EL1(15), HDFGRTR, DBGBVRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(0), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(1), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(2), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(3), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(4), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(5), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(6), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(7), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(8), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(9), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(10), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(11), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(12), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(13), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(14), HDFGRTR, DBGBCRn_EL1, 1), + SR_FGT(SYS_DBGBCRn_EL1(15), HDFGRTR, DBGBCRn_EL1, 1), + /* + * HDFGWTR_EL2 + * + * Although HDFGRTR_EL2 and HDFGWTR_EL2 registers largely + * overlap in their bit assignment, there are a number of bits + * that are RES0 on one side, and an actual trap bit on the + * other. The policy chosen here is to describe all the + * read-side mappings, and only the write-side mappings that + * differ from the read side, and the trap handler will pick + * the correct shadow register based on the access type. + */ + SR_FGT(SYS_TRFCR_EL1, HDFGWTR, TRFCR_EL1, 1), + SR_FGT(SYS_TRCOSLAR, HDFGWTR, TRCOSLAR, 1), + SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1), + SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1), + SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1), +}; + +static union trap_config get_trap_config(u32 sysreg) +{ + return (union trap_config) { + .val = xa_to_value(xa_load(&sr_forward_xa, sysreg)), + }; +} + +static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc, + const char *type, int err) +{ + kvm_err("%s line %d encoding range " + "(%d, %d, %d, %d, %d) - (%d, %d, %d, %d, %d) (err=%d)\n", + type, tc->line, + sys_reg_Op0(tc->encoding), sys_reg_Op1(tc->encoding), + sys_reg_CRn(tc->encoding), sys_reg_CRm(tc->encoding), + sys_reg_Op2(tc->encoding), + sys_reg_Op0(tc->end), sys_reg_Op1(tc->end), + sys_reg_CRn(tc->end), sys_reg_CRm(tc->end), + sys_reg_Op2(tc->end), + err); +} + +int __init populate_nv_trap_config(void) +{ + int ret = 0; + + BUILD_BUG_ON(sizeof(union trap_config) != sizeof(void *)); + BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS)); + BUILD_BUG_ON(__NR_FGT_GROUP_IDS__ > BIT(TC_FGT_BITS)); + BUILD_BUG_ON(__NR_FG_FILTER_IDS__ > BIT(TC_FGF_BITS)); + + for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) { + const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i]; + void *prev; + + if (cgt->tc.val & BIT(63)) { + kvm_err("CGT[%d] has MBZ bit set\n", i); + ret = -EINVAL; + } + + if (cgt->encoding != cgt->end) { + prev = xa_store_range(&sr_forward_xa, + cgt->encoding, cgt->end, + xa_mk_value(cgt->tc.val), + GFP_KERNEL); + } else { + prev = xa_store(&sr_forward_xa, cgt->encoding, + xa_mk_value(cgt->tc.val), GFP_KERNEL); + if (prev && !xa_is_err(prev)) { + ret = -EINVAL; + print_nv_trap_error(cgt, "Duplicate CGT", ret); + } + } + + if (xa_is_err(prev)) { + ret = xa_err(prev); + print_nv_trap_error(cgt, "Failed CGT insertion", ret); + } + } + + kvm_info("nv: %ld coarse grained trap handlers\n", + ARRAY_SIZE(encoding_to_cgt)); + + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + goto check_mcb; + + for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) { + const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i]; + union trap_config tc; + + if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) { + ret = -EINVAL; + print_nv_trap_error(fgt, "Invalid FGT", ret); + } + + tc = get_trap_config(fgt->encoding); + + if (tc.fgt) { + ret = -EINVAL; + print_nv_trap_error(fgt, "Duplicate FGT", ret); + } + + tc.val |= fgt->tc.val; + xa_store(&sr_forward_xa, fgt->encoding, + xa_mk_value(tc.val), GFP_KERNEL); + } + + kvm_info("nv: %ld fine grained trap handlers\n", + ARRAY_SIZE(encoding_to_fgt)); + +check_mcb: + for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) { + const enum cgt_group_id *cgids; + + cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__]; + + for (int i = 0; cgids[i] != __RESERVED__; i++) { + if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) { + kvm_err("Recursive MCB %d/%d\n", id, cgids[i]); + ret = -EINVAL; + } + } + } + + if (ret) + xa_destroy(&sr_forward_xa); + + return ret; +} + +static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu, + const struct trap_bits *tb) +{ + enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY; + u64 val; + + val = __vcpu_sys_reg(vcpu, tb->index); + if ((val & tb->mask) == tb->value) + b |= tb->behaviour; + + return b; +} + +static enum trap_behaviour __compute_trap_behaviour(struct kvm_vcpu *vcpu, + const enum cgt_group_id id, + enum trap_behaviour b) +{ + switch (id) { + const enum cgt_group_id *cgids; + + case __RESERVED__ ... __MULTIPLE_CONTROL_BITS__ - 1: + if (likely(id != __RESERVED__)) + b |= get_behaviour(vcpu, &coarse_trap_bits[id]); + break; + case __MULTIPLE_CONTROL_BITS__ ... __COMPLEX_CONDITIONS__ - 1: + /* Yes, this is recursive. Don't do anything stupid. */ + cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__]; + for (int i = 0; cgids[i] != __RESERVED__; i++) + b |= __compute_trap_behaviour(vcpu, cgids[i], b); + break; + default: + if (ARRAY_SIZE(ccc)) + b |= ccc[id - __COMPLEX_CONDITIONS__](vcpu); + break; + } + + return b; +} + +static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu, + const union trap_config tc) +{ + enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY; + + return __compute_trap_behaviour(vcpu, tc.cgt, b); +} + +static bool check_fgt_bit(u64 val, const union trap_config tc) +{ + return ((val >> tc.bit) & 1) == tc.pol; +} + +#define sanitised_sys_reg(vcpu, reg) \ + ({ \ + u64 __val; \ + __val = __vcpu_sys_reg(vcpu, reg); \ + __val &= ~__ ## reg ## _RES0; \ + (__val); \ + }) + +bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) +{ + union trap_config tc; + enum trap_behaviour b; + bool is_read; + u32 sysreg; + u64 esr, val; + + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + return false; + + esr = kvm_vcpu_get_esr(vcpu); + sysreg = esr_sys64_to_sysreg(esr); + is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; + + tc = get_trap_config(sysreg); + + /* + * A value of 0 for the whole entry means that we know nothing + * for this sysreg, and that it cannot be re-injected into the + * nested hypervisor. In this situation, let's cut it short. + * + * Note that ultimately, we could also make use of the xarray + * to store the index of the sysreg in the local descriptor + * array, avoiding another search... Hint, hint... + */ + if (!tc.val) + return false; + + switch ((enum fgt_group_id)tc.fgt) { + case __NO_FGT_GROUP__: + break; + + case HFGxTR_GROUP: + if (is_read) + val = sanitised_sys_reg(vcpu, HFGRTR_EL2); + else + val = sanitised_sys_reg(vcpu, HFGWTR_EL2); + break; + + case HDFGRTR_GROUP: + case HDFGWTR_GROUP: + if (is_read) + val = sanitised_sys_reg(vcpu, HDFGRTR_EL2); + else + val = sanitised_sys_reg(vcpu, HDFGWTR_EL2); + break; + + case HFGITR_GROUP: + val = sanitised_sys_reg(vcpu, HFGITR_EL2); + switch (tc.fgf) { + u64 tmp; + + case __NO_FGF__: + break; + + case HCRX_FGTnXS: + tmp = sanitised_sys_reg(vcpu, HCRX_EL2); + if (tmp & HCRX_EL2_FGTnXS) + tc.fgt = __NO_FGT_GROUP__; + } + break; + + case __NR_FGT_GROUP_IDS__: + /* Something is really wrong, bail out */ + WARN_ONCE(1, "__NR_FGT_GROUP_IDS__"); + return false; + } + + if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc)) + goto inject; + + b = compute_trap_behaviour(vcpu, tc); + + if (((b & BEHAVE_FORWARD_READ) && is_read) || + ((b & BEHAVE_FORWARD_WRITE) && !is_read)) + goto inject; + + return false; + +inject: + trace_kvm_forward_sysreg_trap(vcpu, sysreg, is_read); + + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + return true; +} + static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr) { u64 mode = spsr & PSR_MODE_MASK; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 20280a5233f6..95f6945c4432 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -884,21 +884,6 @@ u32 __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_GENERIC_V8; } -void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) -{ - u32 target = kvm_target_cpu(); - - memset(init, 0, sizeof(*init)); - - /* - * For now, we don't return any features. - * In future, we might use features to return target - * specific features available for the preferred - * target type. - */ - init->target = (__u32)target; -} - int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 6dcd6604b6bc..617ae6dea5d5 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -222,7 +222,33 @@ static int kvm_handle_eret(struct kvm_vcpu *vcpu) if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET) return kvm_handle_ptrauth(vcpu); - kvm_emulate_nested_eret(vcpu); + /* + * If we got here, two possibilities: + * + * - the guest is in EL2, and we need to fully emulate ERET + * + * - the guest is in EL1, and we need to reinject the + * exception into the L1 hypervisor. + * + * If KVM ever traps ERET for its own use, we'll have to + * revisit this. + */ + if (is_hyp_ctxt(vcpu)) + kvm_emulate_nested_eret(vcpu); + else + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + + return 1; +} + +static int handle_svc(struct kvm_vcpu *vcpu) +{ + /* + * So far, SVC traps only for NV via HFGITR_EL2. A SVC from a + * 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so + * we should only have to deal with a 64 bit exception. + */ + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); return 1; } @@ -239,6 +265,7 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_SMC32] = handle_smc, [ESR_ELx_EC_HVC64] = handle_hvc, [ESR_ELx_EC_SMC64] = handle_smc, + [ESR_ELx_EC_SVC64] = handle_svc, [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, [ESR_ELx_EC_SVE] = handle_sve, [ESR_ELx_EC_ERET] = kvm_handle_eret, diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 34f222af6165..9cfe6bd1dbe4 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -70,20 +70,26 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } -static inline bool __hfgxtr_traps_required(void) -{ - if (cpus_have_final_cap(ARM64_SME)) - return true; - - if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) - return true; +#define compute_clr_set(vcpu, reg, clr, set) \ + do { \ + u64 hfg; \ + hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \ + set |= hfg & __ ## reg ## _MASK; \ + clr |= ~hfg & __ ## reg ## _nMASK; \ + } while(0) - return false; -} -static inline void __activate_traps_hfgxtr(void) +static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; + u64 r_val, w_val; + + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; + + ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2); + ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2); if (cpus_have_final_cap(ARM64_SME)) { tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK; @@ -98,26 +104,72 @@ static inline void __activate_traps_hfgxtr(void) if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) w_set |= HFGxTR_EL2_TCR_EL1_MASK; - sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set); - sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set); + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set); + compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set); + } + + /* The default is not to trap anything but ACCDATA_EL1 */ + r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1; + r_val |= r_set; + r_val &= ~r_clr; + + w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1; + w_val |= w_set; + w_val &= ~w_clr; + + write_sysreg_s(r_val, SYS_HFGRTR_EL2); + write_sysreg_s(w_val, SYS_HFGWTR_EL2); + + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + return; + + ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2); + + r_set = r_clr = 0; + compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set); + r_val = __HFGITR_EL2_nMASK; + r_val |= r_set; + r_val &= ~r_clr; + + write_sysreg_s(r_val, SYS_HFGITR_EL2); + + ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2); + ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2); + + r_clr = r_set = w_clr = w_set = 0; + + compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set); + compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set); + + r_val = __HDFGRTR_EL2_nMASK; + r_val |= r_set; + r_val &= ~r_clr; + + w_val = __HDFGWTR_EL2_nMASK; + w_val |= w_set; + w_val &= ~w_clr; + + write_sysreg_s(r_val, SYS_HDFGRTR_EL2); + write_sysreg_s(w_val, SYS_HDFGWTR_EL2); } -static inline void __deactivate_traps_hfgxtr(void) +static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; + struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; - if (cpus_have_final_cap(ARM64_SME)) { - tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK; + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; - r_set |= tmp; - w_set |= tmp; - } + write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2); - if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) - w_clr |= HFGxTR_EL2_TCR_EL1_MASK; + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + return; - sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set); - sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set); + write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2); } static inline void __activate_traps_common(struct kvm_vcpu *vcpu) @@ -145,8 +197,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); - if (__hfgxtr_traps_required()) - __activate_traps_hfgxtr(); + if (cpus_have_final_cap(ARM64_HAS_HCX)) { + u64 hcrx = HCRX_GUEST_FLAGS; + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + u64 clr = 0, set = 0; + + compute_clr_set(vcpu, HCRX_EL2, clr, set); + + hcrx |= set; + hcrx &= ~clr; + } + + write_sysreg_s(hcrx, SYS_HCRX_EL2); + } + + __activate_traps_hfgxtr(vcpu); } static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) @@ -162,8 +227,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } - if (__hfgxtr_traps_required()) - __deactivate_traps_hfgxtr(); + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + + __deactivate_traps_hfgxtr(vcpu); } static inline void ___activate_traps(struct kvm_vcpu *vcpu) @@ -177,9 +244,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2); } static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) @@ -194,9 +258,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~HCR_VSE; vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; } - - if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); } static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index d5ec972b5c1e..230e4f2527de 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot, unsigned long *haddr); +int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr); int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a169c619db60..857d9bc04fd4 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level); } +static void +handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, start, host_ctxt, 2); + DECLARE_REG(unsigned long, pages, host_ctxt, 3); + + __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages); +} + static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); @@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh), HANDLE_FUNC(__kvm_tlb_flush_vmid), + HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__vgic_v3_read_vmcr), diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 318298eb3d6b..65a7a186d7b2 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, return err; } +static int __pkvm_alloc_private_va_range(unsigned long start, size_t size) +{ + unsigned long cur; + + hyp_assert_lock_held(&pkvm_pgd_lock); + + if (!start || start < __io_map_base) + return -EINVAL; + + /* The allocated size is always a multiple of PAGE_SIZE */ + cur = start + PAGE_ALIGN(size); + + /* Are we overflowing on the vmemmap ? */ + if (cur > __hyp_vmemmap) + return -ENOMEM; + + __io_map_base = cur; + + return 0; +} + /** * pkvm_alloc_private_va_range - Allocates a private VA range. * @size: The size of the VA range to reserve. @@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, */ int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) { - unsigned long base, addr; - int ret = 0; + unsigned long addr; + int ret; hyp_spin_lock(&pkvm_pgd_lock); - - /* Align the allocation based on the order of its size */ - addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); - - /* The allocated size is always a multiple of PAGE_SIZE */ - base = addr + PAGE_ALIGN(size); - - /* Are we overflowing on the vmemmap ? */ - if (!addr || base > __hyp_vmemmap) - ret = -ENOMEM; - else { - __io_map_base = base; - *haddr = addr; - } - + addr = __io_map_base; + ret = __pkvm_alloc_private_va_range(addr, size); hyp_spin_unlock(&pkvm_pgd_lock); + *haddr = addr; + return ret; } @@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits) return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); } +int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr) +{ + unsigned long addr, prev_base; + size_t size; + int ret; + + hyp_spin_lock(&pkvm_pgd_lock); + + prev_base = __io_map_base; + /* + * Efficient stack verification using the PAGE_SHIFT bit implies + * an alignment of our allocation on the order of the size. + */ + size = PAGE_SIZE * 2; + addr = ALIGN(__io_map_base, size); + + ret = __pkvm_alloc_private_va_range(addr, size); + if (!ret) { + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, + PAGE_SIZE, phys, PAGE_HYP); + if (ret) + __io_map_base = prev_base; + } + hyp_spin_unlock(&pkvm_pgd_lock); + + *haddr = addr + size; + + return ret; +} + static void *admit_host_page(void *arg) { struct kvm_hyp_memcache *host_mc = arg; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bb98630dfeaf..0d5e0a89ddce 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, for (i = 0; i < hyp_nr_cpus; i++) { struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i); - unsigned long hyp_addr; start = (void *)kern_hyp_va(per_cpu_base[i]); end = start + PAGE_ALIGN(hyp_percpu_size); @@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - /* - * Allocate a contiguous HYP private VA range for the stack - * and guard page. The allocation is also aligned based on - * the order of its size. - */ - ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + ret = pkvm_create_stack(params->stack_pa, ¶ms->stack_hyp_va); if (ret) return ret; - - /* - * Since the stack grows downwards, map the stack to the page - * at the higher address and leave the lower guard page - * unbacked. - * - * Any valid stack address now has the PAGE_SHIFT bit as 1 - * and addresses corresponding to the guard page have the - * PAGE_SHIFT bit as 0 - this is used for overflow detection. - */ - hyp_spin_lock(&pkvm_pgd_lock); - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, - PAGE_SIZE, params->stack_pa, PAGE_HYP); - hyp_spin_unlock(&pkvm_pgd_lock); - if (ret) - return ret; - - /* Update stack_hyp_va to end of the stack's private VA range */ - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } /* diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e89a23153e85..c353a06ee7e6 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) * KVM_ARM_VCPU_INIT, however, this is likely not possible for * protected VMs. */ - vcpu->arch.target = -1; + vcpu_clear_flag(vcpu, VCPU_INITIALIZED); *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); *exit_code |= ARM_EXCEPTION_IL; } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index b9991bbd8e3f..1b265713d6be 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, __tlb_switch_to_host(&cxt); } +void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages) +{ + struct tlb_inv_context cxt; + unsigned long stride; + + /* + * Since the range of addresses may not be mapped at + * the same level, assume the worst case as PAGE_SIZE + */ + stride = PAGE_SIZE; + start = round_down(start, stride); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt, false); + + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + /* See the comment in __kvm_tlb_flush_vmid_ipa() */ + if (icache_is_vpipt()) + icache_inval_all_pou(); + + __tlb_switch_to_host(&cxt); +} + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f7a93ef29250..f155b8c9e98c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt) return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); } +void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t addr, size_t size) +{ + unsigned long pages, inval_pages; + + if (!system_supports_tlb_range()) { + kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + return; + } + + pages = size >> PAGE_SHIFT; + while (pages > 0) { + inval_pages = min(pages, MAX_TLBI_RANGE_PAGES); + kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages); + + addr += inval_pages << PAGE_SHIFT; + pages -= inval_pages; + } +} + #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, @@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * evicted pte value (if any). */ if (kvm_pte_table(ctx->old, ctx->level)) - kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + kvm_tlb_flush_vmid_range(mmu, ctx->addr, + kvm_granule_size(ctx->level)); else if (kvm_pte_valid(ctx->old)) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); @@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n smp_store_release(ctx->ptep, new); } -static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, - struct kvm_pgtable_mm_ops *mm_ops) +static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt) +{ + /* + * If FEAT_TLBIRANGE is implemented, defer the individual + * TLB invalidations until the entire walk is finished, and + * then use the range-based TLBI instructions to do the + * invalidations. Condition deferred TLB invalidation on the + * system supporting FWB as the optimization is entirely + * pointless when the unmap walker needs to perform CMOs. + */ + return system_supports_tlb_range() && stage2_has_fwb(pgt); +} + +static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, + struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) { + struct kvm_pgtable *pgt = ctx->arg; + /* - * Clear the existing PTE, and perform break-before-make with - * TLB maintenance if it was valid. + * Clear the existing PTE, and perform break-before-make if it was + * valid. Depending on the system support, defer the TLB maintenance + * for the same until the entire unmap walk is completed. */ if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + + if (!stage2_unmap_defer_tlb_flush(pgt)) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, + ctx->addr, ctx->level); } mm_ops->put_page(ctx->ptep); @@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, * block entry and rely on the remaining portions being faulted * back lazily. */ - stage2_put_pte(ctx, mmu, mm_ops); + stage2_unmap_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), @@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { + int ret; struct kvm_pgtable_walker walker = { .cb = stage2_unmap_walker, .arg = pgt, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - return kvm_pgtable_walk(pgt, addr, size, &walker); + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + if (stage2_unmap_defer_tlb_flush(pgt)) + /* Perform the deferred TLB invalidations */ + kvm_tlb_flush_vmid_range(pgt->mmu, addr, size); + + return ret; } struct stage2_attr_data { diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index e69da550cdc5..46bd43f61d76 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, __tlb_switch_to_host(&cxt); } +void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages) +{ + struct tlb_inv_context cxt; + unsigned long stride; + + /* + * Since the range of addresses may not be mapped at + * the same level, assume the worst case as PAGE_SIZE + */ + stride = PAGE_SIZE; + start = round_down(start, stride); + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d3b4feed460c..587a104f66c3 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) } /** - * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 + * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8 * @kvm: pointer to kvm structure. * * Interface to HYP function to flush all VM TLB entries */ -void kvm_flush_remote_tlbs(struct kvm *kvm) +int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { - ++kvm->stat.generic.remote_tlb_flush_requests; kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); + return 0; +} + +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + kvm_tlb_flush_vmid_range(&kvm->arch.mmu, + gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); + return 0; } static bool kvm_is_device_pfn(unsigned long pfn) @@ -592,6 +600,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return 0; } +static int __hyp_alloc_private_va_range(unsigned long base) +{ + lockdep_assert_held(&kvm_hyp_pgd_mutex); + + if (!PAGE_ALIGNED(base)) + return -EINVAL; + + /* + * Verify that BIT(VA_BITS - 1) hasn't been flipped by + * allocating the new area, as it would indicate we've + * overflowed the idmap/IO address range. + */ + if ((base ^ io_map_base) & BIT(VA_BITS - 1)) + return -ENOMEM; + + io_map_base = base; + + return 0; +} /** * hyp_alloc_private_va_range - Allocates a private VA range. @@ -612,26 +639,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr) /* * This assumes that we have enough space below the idmap - * page to allocate our VAs. If not, the check below will - * kick. A potential alternative would be to detect that - * overflow and switch to an allocation above the idmap. + * page to allocate our VAs. If not, the check in + * __hyp_alloc_private_va_range() will kick. A potential + * alternative would be to detect that overflow and switch + * to an allocation above the idmap. * * The allocated size is always a multiple of PAGE_SIZE. */ - base = io_map_base - PAGE_ALIGN(size); - - /* Align the allocation based on the order of its size */ - base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size)); - - /* - * Verify that BIT(VA_BITS - 1) hasn't been flipped by - * allocating the new area, as it would indicate we've - * overflowed the idmap/IO address range. - */ - if ((base ^ io_map_base) & BIT(VA_BITS - 1)) - ret = -ENOMEM; - else - *haddr = io_map_base = base; + size = PAGE_ALIGN(size); + base = io_map_base - size; + ret = __hyp_alloc_private_va_range(base); mutex_unlock(&kvm_hyp_pgd_mutex); @@ -668,6 +685,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, return ret; } +int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr) +{ + unsigned long base; + size_t size; + int ret; + + mutex_lock(&kvm_hyp_pgd_mutex); + /* + * Efficient stack verification using the PAGE_SHIFT bit implies + * an alignment of our allocation on the order of the size. + */ + size = PAGE_SIZE * 2; + base = ALIGN_DOWN(io_map_base - size, size); + + ret = __hyp_alloc_private_va_range(base); + + mutex_unlock(&kvm_hyp_pgd_mutex); + + if (ret) { + kvm_err("Cannot allocate hyp stack guard page\n"); + return ret; + } + + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr, + PAGE_HYP); + if (ret) + kvm_err("Cannot map hyp stack\n"); + + *haddr = base + size; + + return ret; +} + /** * create_hyp_io_mappings - Map IO into both kernel and HYP * @phys_addr: The physical start address which gets mapped @@ -1075,7 +1134,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) write_lock(&kvm->mmu_lock); stage2_wp_range(&kvm->arch.mmu, start, end); write_unlock(&kvm->mmu_lock); - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_memslot(kvm, memslot); } /** @@ -1541,7 +1600,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, out_unlock: read_unlock(&kvm->mmu_lock); - kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; } @@ -1721,7 +1779,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - kvm_pfn_t pfn = pte_pfn(range->pte); + kvm_pfn_t pfn = pte_pfn(range->arg.pte); if (!kvm->arch.mmu.pgt) return false; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 315354d27978..042695a210ce 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -71,8 +71,9 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, break; case SYS_ID_AA64MMFR0_EL1: - /* Hide ECV, FGT, ExS, Secure Memory */ - val &= ~(GENMASK_ULL(63, 43) | + /* Hide ECV, ExS, Secure Memory */ + val &= ~(NV_FTR(MMFR0, ECV) | + NV_FTR(MMFR0, EXS) | NV_FTR(MMFR0, TGRAN4_2) | NV_FTR(MMFR0, TGRAN16_2) | NV_FTR(MMFR0, TGRAN64_2) | @@ -116,7 +117,8 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, break; case SYS_ID_AA64MMFR1_EL1: - val &= (NV_FTR(MMFR1, PAN) | + val &= (NV_FTR(MMFR1, HCX) | + NV_FTR(MMFR1, PAN) | NV_FTR(MMFR1, LO) | NV_FTR(MMFR1, HPDS) | NV_FTR(MMFR1, VH) | @@ -124,8 +126,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, break; case SYS_ID_AA64MMFR2_EL1: - val &= ~(NV_FTR(MMFR2, EVT) | - NV_FTR(MMFR2, BBM) | + val &= ~(NV_FTR(MMFR2, BBM) | NV_FTR(MMFR2, TTL) | GENMASK_ULL(47, 44) | NV_FTR(MMFR2, ST) | diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 560650972478..6b066e04dc5d 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -14,6 +14,7 @@ #include <asm/kvm_emulate.h> #include <kvm/arm_pmu.h> #include <kvm/arm_vgic.h> +#include <asm/arm_pmuv3.h> #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) @@ -35,12 +36,8 @@ static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) return &vcpu->arch.pmu.pmc[cnt_idx]; } -static u32 kvm_pmu_event_mask(struct kvm *kvm) +static u32 __kvm_pmu_event_mask(unsigned int pmuver) { - unsigned int pmuver; - - pmuver = kvm->arch.arm_pmu->pmuver; - switch (pmuver) { case ID_AA64DFR0_EL1_PMUVer_IMP: return GENMASK(9, 0); @@ -55,6 +52,14 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) } } +static u32 kvm_pmu_event_mask(struct kvm *kvm) +{ + u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1); + u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0); + + return __kvm_pmu_event_mask(pmuver); +} + /** * kvm_pmc_is_64bit - determine if counter is 64bit * @pmc: counter context @@ -672,8 +677,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || - pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) + /* + * Check the sanitised PMU version for the system, as KVM does not + * support implementations where PMUv3 exists on a subset of CPUs. + */ + if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit())) return; mutex_lock(&arm_pmus_lock); @@ -750,11 +758,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) } else { val = read_sysreg(pmceid1_el0); /* - * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled + * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled * as RAZ */ - if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4) - val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); + val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) | + BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) | + BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32)); base = 32; } @@ -950,11 +959,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return 0; } case KVM_ARM_VCPU_PMU_V3_FILTER: { + u8 pmuver = kvm_arm_pmu_get_pmuver_limit(); struct kvm_pmu_event_filter __user *uaddr; struct kvm_pmu_event_filter filter; int nr_events; - nr_events = kvm_pmu_event_mask(kvm) + 1; + /* + * Allow userspace to specify an event filter for the entire + * event range supported by PMUVer of the hardware, rather + * than the guest's PMUVer for KVM backward compatibility. + */ + nr_events = __kvm_pmu_event_mask(pmuver) + 1; uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 121f1a14c829..0eea225fd09a 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -236,3 +236,21 @@ bool kvm_set_pmuserenr(u64 val) ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; return true; } + +/* + * If we interrupted the guest to update the host PMU context, make + * sure we re-apply the guest EL0 state. + */ +void kvm_vcpu_pmu_resync_el0(void) +{ + struct kvm_vcpu *vcpu; + + if (!has_vhe() || !in_interrupt()) + return; + + vcpu = kvm_get_running_vcpu(); + if (!vcpu) + return; + + kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu); +} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index bc8556b6f459..7a65a35ee4ac 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -248,21 +248,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } } - switch (vcpu->arch.target) { - default: - if (vcpu_el1_is_32bit(vcpu)) { - pstate = VCPU_RESET_PSTATE_SVC; - } else if (vcpu_has_nv(vcpu)) { - pstate = VCPU_RESET_PSTATE_EL2; - } else { - pstate = VCPU_RESET_PSTATE_EL1; - } - - if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) { - ret = -EINVAL; - goto out; - } - break; + if (vcpu_el1_is_32bit(vcpu)) + pstate = VCPU_RESET_PSTATE_SVC; + else if (vcpu_has_nv(vcpu)) + pstate = VCPU_RESET_PSTATE_EL2; + else + pstate = VCPU_RESET_PSTATE_EL1; + + if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) { + ret = -EINVAL; + goto out; } /* Reset core registers */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2ca2973abe66..e92ec810d449 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2151,6 +2151,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 }, + { SYS_DESC(SYS_ACCDATA_EL1), undef_access }, + { SYS_DESC(SYS_SCXTNUM_EL1), undef_access }, { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, @@ -2365,8 +2367,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(MDCR_EL2, access_rw, reset_val, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), EL2_REG(HSTR_EL2, access_rw, reset_val, 0), + EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0), + EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0), + EL2_REG(HFGITR_EL2, access_rw, reset_val, 0), EL2_REG(HACR_EL2, access_rw, reset_val, 0), + EL2_REG(HCRX_EL2, access_rw, reset_val, 0), + EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), @@ -2374,6 +2381,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(VTCR_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, + EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0), + EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0), EL2_REG(SPSR_EL2, access_rw, reset_val, 0), EL2_REG(ELR_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_SP_EL1), access_sp_el1}, @@ -3170,6 +3179,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); + if (__check_nv_sr_forward(vcpu)) + return 1; + params = esr_sys64_to_params(esr); params.regval = vcpu_get_reg(vcpu, Rt); @@ -3587,5 +3599,8 @@ int __init kvm_sys_reg_table_init(void) if (!first_idreg) return -EINVAL; + if (kvm_get_mode() == KVM_MODE_NV) + return populate_nv_trap_config(); + return 0; } diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 6ce5c025218d..8ad53104934d 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception, __entry->hcr_el2) ); +TRACE_EVENT(kvm_forward_sysreg_trap, + TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read), + TP_ARGS(vcpu, sysreg, is_read), + + TP_STRUCT__entry( + __field(u64, pc) + __field(u32, sysreg) + __field(bool, is_read) + ), + + TP_fast_assign( + __entry->pc = *vcpu_pc(vcpu); + __entry->sysreg = sysreg; + __entry->is_read = is_read; + ), + + TP_printk("%llx %c (%d,%d,%d,%d,%d)", + __entry->pc, + __entry->is_read ? 'R' : 'W', + sys_reg_Op0(__entry->sysreg), + sys_reg_Op1(__entry->sysreg), + sys_reg_CRn(__entry->sysreg), + sys_reg_CRm(__entry->sysreg), + sys_reg_Op2(__entry->sysreg)) +); + #endif /* _TRACE_ARM_ARM64_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index f9923beedd27..0ab09b0d4440 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -199,7 +199,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); -void vgic_v2_set_npie(struct kvm_vcpu *vcpu); int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); @@ -233,7 +232,6 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); -void vgic_v3_set_npie(struct kvm_vcpu *vcpu); void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_enable(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c index 78b87a64ca0a..2432683e48a6 100644 --- a/arch/arm64/lib/csum.c +++ b/arch/arm64/lib/csum.c @@ -24,7 +24,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) const u64 *ptr; u64 data, sum64 = 0; - if (unlikely(len == 0)) + if (unlikely(len <= 0)) return 0; offset = (unsigned long)buff & 7; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c80ed4f3cbce..c3f06fdef609 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -26,6 +26,7 @@ HAS_ECV HAS_ECV_CNTPOFF HAS_EPAN HAS_EVT +HAS_FGT HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH_QARMA3 HAS_GENERIC_AUTH_ARCH_QARMA5 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 65866bf819c3..2517ef7c21cf 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2156,6 +2156,135 @@ Field 1 ICIALLU Field 0 ICIALLUIS EndSysreg +Sysreg HDFGRTR_EL2 3 4 3 1 4 +Field 63 PMBIDR_EL1 +Field 62 nPMSNEVFR_EL1 +Field 61 nBRBDATA +Field 60 nBRBCTL +Field 59 nBRBIDR +Field 58 PMCEIDn_EL0 +Field 57 PMUSERENR_EL0 +Field 56 TRBTRG_EL1 +Field 55 TRBSR_EL1 +Field 54 TRBPTR_EL1 +Field 53 TRBMAR_EL1 +Field 52 TRBLIMITR_EL1 +Field 51 TRBIDR_EL1 +Field 50 TRBBASER_EL1 +Res0 49 +Field 48 TRCVICTLR +Field 47 TRCSTATR +Field 46 TRCSSCSRn +Field 45 TRCSEQSTR +Field 44 TRCPRGCTLR +Field 43 TRCOSLSR +Res0 42 +Field 41 TRCIMSPECn +Field 40 TRCID +Res0 39:38 +Field 37 TRCCNTVRn +Field 36 TRCCLAIM +Field 35 TRCAUXCTLR +Field 34 TRCAUTHSTATUS +Field 33 TRC +Field 32 PMSLATFR_EL1 +Field 31 PMSIRR_EL1 +Field 30 PMSIDR_EL1 +Field 29 PMSICR_EL1 +Field 28 PMSFCR_EL1 +Field 27 PMSEVFR_EL1 +Field 26 PMSCR_EL1 +Field 25 PMBSR_EL1 +Field 24 PMBPTR_EL1 +Field 23 PMBLIMITR_EL1 +Field 22 PMMIR_EL1 +Res0 21:20 +Field 19 PMSELR_EL0 +Field 18 PMOVS +Field 17 PMINTEN +Field 16 PMCNTEN +Field 15 PMCCNTR_EL0 +Field 14 PMCCFILTR_EL0 +Field 13 PMEVTYPERn_EL0 +Field 12 PMEVCNTRn_EL0 +Field 11 OSDLR_EL1 +Field 10 OSECCR_EL1 +Field 9 OSLSR_EL1 +Res0 8 +Field 7 DBGPRCR_EL1 +Field 6 DBGAUTHSTATUS_EL1 +Field 5 DBGCLAIM +Field 4 MDSCR_EL1 +Field 3 DBGWVRn_EL1 +Field 2 DBGWCRn_EL1 +Field 1 DBGBVRn_EL1 +Field 0 DBGBCRn_EL1 +EndSysreg + +Sysreg HDFGWTR_EL2 3 4 3 1 5 +Res0 63 +Field 62 nPMSNEVFR_EL1 +Field 61 nBRBDATA +Field 60 nBRBCTL +Res0 59:58 +Field 57 PMUSERENR_EL0 +Field 56 TRBTRG_EL1 +Field 55 TRBSR_EL1 +Field 54 TRBPTR_EL1 +Field 53 TRBMAR_EL1 +Field 52 TRBLIMITR_EL1 +Res0 51 +Field 50 TRBBASER_EL1 +Field 49 TRFCR_EL1 +Field 48 TRCVICTLR +Res0 47 +Field 46 TRCSSCSRn +Field 45 TRCSEQSTR +Field 44 TRCPRGCTLR +Res0 43 +Field 42 TRCOSLAR +Field 41 TRCIMSPECn +Res0 40:38 +Field 37 TRCCNTVRn +Field 36 TRCCLAIM +Field 35 TRCAUXCTLR +Res0 34 +Field 33 TRC +Field 32 PMSLATFR_EL1 +Field 31 PMSIRR_EL1 +Res0 30 +Field 29 PMSICR_EL1 +Field 28 PMSFCR_EL1 +Field 27 PMSEVFR_EL1 +Field 26 PMSCR_EL1 +Field 25 PMBSR_EL1 +Field 24 PMBPTR_EL1 +Field 23 PMBLIMITR_EL1 +Res0 22 +Field 21 PMCR_EL0 +Field 20 PMSWINC_EL0 +Field 19 PMSELR_EL0 +Field 18 PMOVS +Field 17 PMINTEN +Field 16 PMCNTEN +Field 15 PMCCNTR_EL0 +Field 14 PMCCFILTR_EL0 +Field 13 PMEVTYPERn_EL0 +Field 12 PMEVCNTRn_EL0 +Field 11 OSDLR_EL1 +Field 10 OSECCR_EL1 +Res0 9 +Field 8 OSLAR_EL1 +Field 7 DBGPRCR_EL1 +Res0 6 +Field 5 DBGCLAIM +Field 4 MDSCR_EL1 +Field 3 DBGWVRn_EL1 +Field 2 DBGWCRn_EL1 +Field 1 DBGBVRn_EL1 +Field 0 DBGBCRn_EL1 +EndSysreg + Sysreg ZCR_EL2 3 4 1 2 0 Fields ZCR_ELx EndSysreg diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 33733245f42b..aefae2efde9f 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table.h generic-y += agp.h -generic-y += export.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += vtime.h diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 5eba3fb2e311..ac06d44b9b27 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -37,7 +37,7 @@ * pNonSys: !pSys */ - +#include <linux/export.h> #include <linux/pgtable.h> #include <asm/asmmacro.h> #include <asm/cache.h> @@ -49,7 +49,6 @@ #include <asm/thread_info.h> #include <asm/unistd.h> #include <asm/ftrace.h> -#include <asm/export.h> #include "minstate.h" diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S index 821e68d10598..9928c5b2957c 100644 --- a/arch/ia64/kernel/esi_stub.S +++ b/arch/ia64/kernel/esi_stub.S @@ -34,9 +34,9 @@ #define PSR_BITS_TO_SET \ (IA64_PSR_BN) +#include <linux/export.h> #include <asm/processor.h> #include <asm/asmmacro.h> -#include <asm/export.h> /* * Inputs: diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index c096500590e9..85c8a57da402 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -20,7 +20,7 @@ * Support for CPU Hotplug */ - +#include <linux/export.h> #include <linux/pgtable.h> #include <asm/asmmacro.h> #include <asm/fpu.h> @@ -33,7 +33,6 @@ #include <asm/mca_asm.h> #include <linux/init.h> #include <linux/linkage.h> -#include <asm/export.h> #ifdef CONFIG_HOTPLUG_CPU #define SAL_PSR_BITS_TO_SET \ diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 7a418e324d30..da90c49df628 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -47,7 +47,7 @@ * Table is based upon EAS2.6 (Oct 1999) */ - +#include <linux/export.h> #include <linux/pgtable.h> #include <asm/asmmacro.h> #include <asm/break.h> @@ -58,7 +58,6 @@ #include <asm/thread_info.h> #include <asm/unistd.h> #include <asm/errno.h> -#include <asm/export.h> #if 0 # define PSR_DEFAULT_BITS psr.ac diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S index 06d01a070aae..fb6db6966f70 100644 --- a/arch/ia64/kernel/pal.S +++ b/arch/ia64/kernel/pal.S @@ -13,9 +13,9 @@ * 05/24/2000 eranian Added support for physical mode static calls */ +#include <linux/export.h> #include <asm/asmmacro.h> #include <asm/processor.h> -#include <asm/export.h> .data pal_entry_point: diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S index 65b75085c8f4..ba0dd2538fa5 100644 --- a/arch/ia64/lib/clear_page.S +++ b/arch/ia64/lib/clear_page.S @@ -10,9 +10,9 @@ * 3/08/02 davidm Some more tweaking */ +#include <linux/export.h> #include <asm/asmmacro.h> #include <asm/page.h> -#include <asm/export.h> #ifdef CONFIG_ITANIUM # define L3_LINE_SIZE 64 // Itanium L3 line size diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S index a28f39d349eb..1d9e45ccf8e5 100644 --- a/arch/ia64/lib/clear_user.S +++ b/arch/ia64/lib/clear_user.S @@ -12,8 +12,8 @@ * Stephane Eranian <eranian@hpl.hp.com> */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> // // arguments diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S index 176f857c522e..c0a0e6b2af00 100644 --- a/arch/ia64/lib/copy_page.S +++ b/arch/ia64/lib/copy_page.S @@ -15,9 +15,9 @@ * * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. */ +#include <linux/export.h> #include <asm/asmmacro.h> #include <asm/page.h> -#include <asm/export.h> #define PIPE_DEPTH 3 #define EPI p[PIPE_DEPTH-1] diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S index d6fd56e4f1c1..5e8bb4b4b535 100644 --- a/arch/ia64/lib/copy_page_mck.S +++ b/arch/ia64/lib/copy_page_mck.S @@ -60,9 +60,9 @@ * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in * an order that avoids bank conflicts. */ +#include <linux/export.h> #include <asm/asmmacro.h> #include <asm/page.h> -#include <asm/export.h> #define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S index f681556c6b86..8daab72cfe77 100644 --- a/arch/ia64/lib/copy_user.S +++ b/arch/ia64/lib/copy_user.S @@ -30,8 +30,8 @@ * - fix extraneous stop bit introduced by the EX() macro. */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> // // Tuneable parameters diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S index 8573d59c9ed1..f8e795fe45cb 100644 --- a/arch/ia64/lib/flush.S +++ b/arch/ia64/lib/flush.S @@ -8,9 +8,8 @@ * 05/28/05 Zoltan Menyhart Dynamic stride size */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> - /* * flush_icache_range(start,end) diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S index def92b708e6e..83586fbc51ff 100644 --- a/arch/ia64/lib/idiv32.S +++ b/arch/ia64/lib/idiv32.S @@ -15,8 +15,8 @@ * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> #ifdef MODULO # define OP mod diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S index a8ba3bd3d4d8..5c9113691f72 100644 --- a/arch/ia64/lib/idiv64.S +++ b/arch/ia64/lib/idiv64.S @@ -15,8 +15,8 @@ * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> #ifdef MODULO # define OP mod diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index dc9e6e6fe876..fcc0b812ce2e 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S @@ -13,8 +13,8 @@ * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> /* * Since we know that most likely this function is called with buf aligned diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S index 91a625fddbf0..35c9069a8345 100644 --- a/arch/ia64/lib/memcpy.S +++ b/arch/ia64/lib/memcpy.S @@ -14,8 +14,8 @@ * Stephane Eranian <eranian@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com> */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> GLOBAL_ENTRY(memcpy) diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S index cc4e6ac914b6..c0d4362217ae 100644 --- a/arch/ia64/lib/memcpy_mck.S +++ b/arch/ia64/lib/memcpy_mck.S @@ -14,9 +14,9 @@ * Copyright (C) 2002 Intel Corp. * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> */ +#include <linux/export.h> #include <asm/asmmacro.h> #include <asm/page.h> -#include <asm/export.h> #define EK(y...) EX(y) diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S index 07a8b92c6496..552c5c7e4d06 100644 --- a/arch/ia64/lib/memset.S +++ b/arch/ia64/lib/memset.S @@ -18,8 +18,8 @@ Since a stf.spill f0 can store 16B in one go, we use this instruction to get peak speed when value = 0. */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> #undef ret #define dest in0 diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S index d66de5966974..1f4a46c15127 100644 --- a/arch/ia64/lib/strlen.S +++ b/arch/ia64/lib/strlen.S @@ -17,8 +17,8 @@ * 09/24/99 S.Eranian add speculation recovery code */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> // // diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S index 49eb81b69cd2..a287169bd953 100644 --- a/arch/ia64/lib/strncpy_from_user.S +++ b/arch/ia64/lib/strncpy_from_user.S @@ -17,8 +17,8 @@ * by Andreas Schwab <schwab@suse.de>). */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> GLOBAL_ENTRY(__strncpy_from_user) alloc r2=ar.pfs,3,0,0,0 diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S index 4b684d4da106..a7eb56e840a9 100644 --- a/arch/ia64/lib/strnlen_user.S +++ b/arch/ia64/lib/strnlen_user.S @@ -13,8 +13,8 @@ * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com> */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> GLOBAL_ENTRY(__strnlen_user) .prologue diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S index 5413dafe6b2e..6e2a69662c06 100644 --- a/arch/ia64/lib/xor.S +++ b/arch/ia64/lib/xor.S @@ -5,8 +5,8 @@ * Optimized RAID-5 checksumming functions for IA-64. */ +#include <linux/export.h> #include <asm/asmmacro.h> -#include <asm/export.h> GLOBAL_ENTRY(xor_ia64_2) .prologue diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ecf282dee513..e14396a2ddcb 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -8,11 +8,13 @@ config LOONGARCH select ACPI_PPTT if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE + select ARCH_DISABLE_KASAN_INLINE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_KCOV select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL @@ -91,6 +93,9 @@ config LOONGARCH select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_KASAN + select HAVE_ARCH_KFENCE + select HAVE_ARCH_KGDB if PERF_EVENTS select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK @@ -115,6 +120,7 @@ config LOONGARCH select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IOREMAP_PROT @@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION config AS_HAS_LASX_EXTENSION def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0) +config AS_HAS_LBT_EXTENSION + def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0) + menu "Kernel type and options" source "kernel/Kconfig.hz" @@ -534,6 +543,18 @@ config CPU_HAS_LASX If unsure, say Y. +config CPU_HAS_LBT + bool "Support for the Loongson Binary Translation Extension" + depends on AS_HAS_LBT_EXTENSION + help + Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0 + to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop). + Enabling this option allows the kernel to allocate and switch registers + specific to LBT. + + If you want to use this feature, such as the Loongson Architecture + Translator (LAT), say Y. + config CPU_HAS_PREFETCH bool default y @@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX config ARCH_SUPPORTS_UPROBES def_bool y +config KASAN_SHADOW_OFFSET + hex + default 0x0 + depends on KASAN + menu "Power management options" config ARCH_SUSPEND_POSSIBLE diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index ef87bab46754..fb0fada43197 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -84,7 +84,10 @@ LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext endif cflags-y += $(call cc-option, -mno-check-zero-division) + +ifndef CONFIG_KASAN cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset +endif load-y = 0x9000000000200000 bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index d64849b4cba1..a3b52aaa83b3 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y @@ -47,8 +46,12 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y CONFIG_NR_CPUS=64 CONFIG_NUMA=y +CONFIG_CPU_HAS_FPU=y +CONFIG_CPU_HAS_LSX=y +CONFIG_CPU_HAS_LASX=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +CONFIG_RANDOMIZE_BASE=y CONFIG_SUSPEND=y CONFIG_HIBERNATION=y CONFIG_ACPI=y @@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EFI_TEST=m +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_INET_ESP=m CONFIG_INET_UDP_DIAG=y CONFIG_TCP_CONG_ADVANCED=y @@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m CONFIG_NFT_REDIR=m CONFIG_NFT_NAT=m CONFIG_NFT_TUNNEL=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_QUEUE=m CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m @@ -208,7 +216,11 @@ CONFIG_IP_VS=m CONFIG_IP_VS_IPV6=y CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m CONFIG_IP_VS_NFCT=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m @@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m CONFIG_IP_NF_TARGET_NETMAP=m CONFIG_IP_NF_TARGET_REDIRECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m @@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m CONFIG_MTD_CFI_STAA=m CONFIG_MTD_RAM=m CONFIG_MTD_ROM=m +CONFIG_MTD_UBI=m +CONFIG_MTD_UBI_BLOCK=y CONFIG_PARPORT=y CONFIG_PARPORT_PC=y CONFIG_PARPORT_SERIAL=y @@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y CONFIG_ZRAM=m CONFIG_ZRAM_DEF_COMP_ZSTD=y CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 @@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set +CONFIG_NGBE=y +CONFIG_TXGBE=y # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m @@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y CONFIG_I2C_GPIO=y +CONFIG_I2C_LS2X=y CONFIG_SPI=y +CONFIG_SPI_LOONGSON_PCI=m +CONFIG_SPI_LOONGSON_PLATFORM=m +CONFIG_PINCTRL=y +CONFIG_PINCTRL_LOONGSON2=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_LOONGSON=y +CONFIG_GPIO_LOONGSON_64BIT=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_RESTART=y CONFIG_POWER_RESET_SYSCON=y @@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM93=m CONFIG_SENSORS_W83795=m CONFIG_SENSORS_W83627HF=m +CONFIG_LOONGSON2_THERMAL=m CONFIG_RC_CORE=m CONFIG_LIRC=y CONFIG_RC_DECODERS=y @@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AST=y CONFIG_DRM_QXL=m CONFIG_DRM_VIRTIO_GPU=m +CONFIG_DRM_LOONGSON=y CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y @@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m CONFIG_INFINIBAND=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_LOONGSON=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m @@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m CONFIG_COMEDI_NI_PCIDIO=m CONFIG_COMEDI_NI_PCIMIO=m CONFIG_STAGING=y -CONFIG_R8188EU=m +CONFIG_COMMON_CLK_LOONGSON2=y +CONFIG_LOONGSON2_GUTS=y +CONFIG_LOONGSON2_PM=y CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y @@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y +CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y @@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=y CONFIG_OVERLAY_FS_INDEX=y CONFIG_OVERLAY_FS_XINO_AUTO=y CONFIG_OVERLAY_FS_METACOPY=y CONFIG_FSCACHE=y +CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_FAT_DEFAULT_CODEPAGE=936 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" +CONFIG_EXFAT_FS=m +CONFIG_NTFS3_FS=m +CONFIG_NTFS3_64BIT_CLUSTER=y +CONFIG_NTFS3_LZX_XPRESS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=y +CONFIG_ORANGEFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_ECRYPT_FS_MESSAGING=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m +CONFIG_UBIFS_FS=m +CONFIG_UBIFS_FS_ADVANCED_COMPR=y CONFIG_CRAMFS=m CONFIG_SQUASHFS=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y +CONFIG_MINIX_FS=m +CONFIG_ROMFS_FS=m +CONFIG_PSTORE=m +CONFIG_PSTORE_LZO_COMPRESS=m +CONFIG_PSTORE_LZ4_COMPRESS=m +CONFIG_PSTORE_LZ4HC_COMPRESS=m +CONFIG_PSTORE_842_COMPRESS=y +CONFIG_PSTORE_ZSTD_COMPRESS=y +CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_ZIP_LZMA=y +CONFIG_EROFS_FS_PCPU_KTHREAD=y CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y @@ -807,6 +867,10 @@ CONFIG_NFSD=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_BLOCKLAYOUT=y +CONFIG_CEPH_FS=m +CONFIG_CEPH_FSCACHE=y +CONFIG_CEPH_FS_POSIX_ACL=y +CONFIG_CEPH_FS_SECURITY_LABEL=y CONFIG_CIFS=m # CONFIG_CIFS_DEBUG is not set CONFIG_9P_FS=y @@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_936=y CONFIG_NLS_ASCII=y CONFIG_NLS_UTF8=y +CONFIG_DLM=m CONFIG_KEY_DH_OPERATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_SELINUX=y @@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_CRC32_LOONGARCH=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index ed06d3997420..cf8e1a4e7c19 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/uaccess.h> #include <asm/fpu.h> +#include <asm/lbt.h> #include <asm/mmu_context.h> #include <asm/page.h> #include <asm/ftrace.h> diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index 79e1d53fea89..c9544f358c33 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -10,113 +10,6 @@ #include <asm/fpregdef.h> #include <asm/loongarch.h> - .macro parse_v var val - \var = \val - .endm - - .macro parse_r var r - \var = -1 - .ifc \r, $r0 - \var = 0 - .endif - .ifc \r, $r1 - \var = 1 - .endif - .ifc \r, $r2 - \var = 2 - .endif - .ifc \r, $r3 - \var = 3 - .endif - .ifc \r, $r4 - \var = 4 - .endif - .ifc \r, $r5 - \var = 5 - .endif - .ifc \r, $r6 - \var = 6 - .endif - .ifc \r, $r7 - \var = 7 - .endif - .ifc \r, $r8 - \var = 8 - .endif - .ifc \r, $r9 - \var = 9 - .endif - .ifc \r, $r10 - \var = 10 - .endif - .ifc \r, $r11 - \var = 11 - .endif - .ifc \r, $r12 - \var = 12 - .endif - .ifc \r, $r13 - \var = 13 - .endif - .ifc \r, $r14 - \var = 14 - .endif - .ifc \r, $r15 - \var = 15 - .endif - .ifc \r, $r16 - \var = 16 - .endif - .ifc \r, $r17 - \var = 17 - .endif - .ifc \r, $r18 - \var = 18 - .endif - .ifc \r, $r19 - \var = 19 - .endif - .ifc \r, $r20 - \var = 20 - .endif - .ifc \r, $r21 - \var = 21 - .endif - .ifc \r, $r22 - \var = 22 - .endif - .ifc \r, $r23 - \var = 23 - .endif - .ifc \r, $r24 - \var = 24 - .endif - .ifc \r, $r25 - \var = 25 - .endif - .ifc \r, $r26 - \var = 26 - .endif - .ifc \r, $r27 - \var = 27 - .endif - .ifc \r, $r28 - \var = 28 - .endif - .ifc \r, $r29 - \var = 29 - .endif - .ifc \r, $r30 - \var = 30 - .endif - .ifc \r, $r31 - \var = 31 - .endif - .iflt \var - .error "Unable to parse register name \r" - .endif - .endm - .macro cpu_save_nonscratch thread stptr.d s0, \thread, THREAD_REG23 stptr.d s1, \thread, THREAD_REG24 @@ -148,12 +41,51 @@ .macro fpu_save_csr thread tmp movfcsr2gr \tmp, fcsr0 - stptr.w \tmp, \thread, THREAD_FCSR + stptr.w \tmp, \thread, THREAD_FCSR +#ifdef CONFIG_CPU_HAS_LBT + /* TM bit is always 0 if LBT not supported */ + andi \tmp, \tmp, FPU_CSR_TM + beqz \tmp, 1f + /* Save FTOP */ + x86mftop \tmp + stptr.w \tmp, \thread, THREAD_FTOP + /* Turn off TM to ensure the order of FPR in memory independent of TM */ + x86clrtm +1: +#endif .endm - .macro fpu_restore_csr thread tmp - ldptr.w \tmp, \thread, THREAD_FCSR - movgr2fcsr fcsr0, \tmp + .macro fpu_restore_csr thread tmp0 tmp1 + ldptr.w \tmp0, \thread, THREAD_FCSR + movgr2fcsr fcsr0, \tmp0 +#ifdef CONFIG_CPU_HAS_LBT + /* TM bit is always 0 if LBT not supported */ + andi \tmp0, \tmp0, FPU_CSR_TM + beqz \tmp0, 2f + /* Restore FTOP */ + ldptr.w \tmp0, \thread, THREAD_FTOP + andi \tmp0, \tmp0, 0x7 + la.pcrel \tmp1, 1f + alsl.d \tmp1, \tmp0, \tmp1, 3 + jr \tmp1 +1: + x86mttop 0 + b 2f + x86mttop 1 + b 2f + x86mttop 2 + b 2f + x86mttop 3 + b 2f + x86mttop 4 + b 2f + x86mttop 5 + b 2f + x86mttop 6 + b 2f + x86mttop 7 +2: +#endif .endm .macro fpu_save_cc thread tmp0 tmp1 @@ -353,7 +285,7 @@ .macro lsx_restore_all thread tmp0 tmp1 lsx_restore_data \thread, \tmp0 fpu_restore_cc \thread, \tmp0, \tmp1 - fpu_restore_csr \thread, \tmp0 + fpu_restore_csr \thread, \tmp0, \tmp1 .endm .macro lsx_save_upper vd base tmp off @@ -563,7 +495,7 @@ .macro lasx_restore_all thread tmp0 tmp1 lasx_restore_data \thread, \tmp0 fpu_restore_cc \thread, \tmp0, \tmp1 - fpu_restore_csr \thread, \tmp0 + fpu_restore_csr \thread, \tmp0, \tmp1 .endm .macro lasx_save_upper xd base tmp off diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h new file mode 100644 index 000000000000..deeff8158f45 --- /dev/null +++ b/arch/loongarch/include/asm/kasan.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#include <linux/linkage.h> +#include <linux/mmzone.h> +#include <asm/addrspace.h> +#include <asm/io.h> +#include <asm/pgtable.h> + +#define __HAVE_ARCH_SHADOW_MAP + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +#define XRANGE_SHIFT (48) + +/* Valid address length */ +#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +/* Used for taking out the valid address */ +#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) +/* One segment whole address space size */ +#define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1) + +/* 64-bit segment value. */ +#define XKPRANGE_UC_SEG (0x8000) +#define XKPRANGE_CC_SEG (0x9000) +#define XKVRANGE_VC_SEG (0xffff) + +/* Cached */ +#define XKPRANGE_CC_START CACHE_BASE +#define XKPRANGE_CC_SIZE XRANGE_SIZE +#define XKPRANGE_CC_KASAN_OFFSET (0) +#define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE) + +/* UnCached */ +#define XKPRANGE_UC_START UNCACHE_BASE +#define XKPRANGE_UC_SIZE XRANGE_SIZE +#define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END +#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE) + +/* VMALLOC (Cached or UnCached) */ +#define XKVRANGE_VC_START MODULES_VADDR +#define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE) +#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE) + +/* KAsan shadow memory start right after vmalloc. */ +#define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE) +#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET) +#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) + +#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET) +#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET) +#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET) + +extern bool kasan_early_stage; +extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; + +#define kasan_arch_is_ready kasan_arch_is_ready +static __always_inline bool kasan_arch_is_ready(void) +{ + return !kasan_early_stage; +} + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + if (!kasan_arch_is_ready()) { + return (void *)(kasan_early_shadow_page); + } else { + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); + } +} + +static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + unsigned long addr = (unsigned long)shadow_addr; + + if (unlikely(addr > KASAN_SHADOW_END) || + unlikely(addr < KASAN_SHADOW_START)) { + WARN_ON(1); + return NULL; + } + + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); + else { + WARN_ON(1); + return NULL; + } +} + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#endif +#endif diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h new file mode 100644 index 000000000000..6c82aea1c993 --- /dev/null +++ b/arch/loongarch/include/asm/kfence.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KFENCE support for LoongArch. + * + * Author: Enze Li <lienze@kylinos.cn> + * Copyright (C) 2022-2023 KylinSoft Corporation. + */ + +#ifndef _ASM_LOONGARCH_KFENCE_H +#define _ASM_LOONGARCH_KFENCE_H + +#include <linux/kfence.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> + +static inline bool arch_kfence_init_pool(void) +{ + int err; + char *kfence_pool = __kfence_pool; + struct vm_struct *area; + + area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP, + KFENCE_AREA_START, KFENCE_AREA_END, + __builtin_return_address(0)); + if (!area) + return false; + + __kfence_pool = (char *)area->addr; + err = ioremap_page_range((unsigned long)__kfence_pool, + (unsigned long)__kfence_pool + KFENCE_POOL_SIZE, + virt_to_phys((void *)kfence_pool), PAGE_KERNEL); + if (err) { + free_vm_area(area); + __kfence_pool = kfence_pool; + return false; + } + + return true; +} + +/* Protect the given page and flush TLB. */ +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + pte_t *pte = virt_to_kpte(addr); + + if (WARN_ON(!pte) || pte_none(*pte)) + return false; + + if (protect) + set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); + else + set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); + + preempt_disable(); + local_flush_tlb_one(addr); + preempt_enable(); + + return true; +} + +#endif /* _ASM_LOONGARCH_KFENCE_H */ diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h new file mode 100644 index 000000000000..2041ae58b161 --- /dev/null +++ b/arch/loongarch/include/asm/kgdb.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_LOONGARCH_KGDB_H +#define _ASM_LOONGARCH_KGDB_H + +#define GDB_SIZEOF_REG sizeof(u64) + +/* gdb remote procotol expects the following register layout. */ + +/* + * General purpose registers: + * r0-r31: 64 bit + * orig_a0: 64 bit + * pc : 64 bit + * csr_badvaddr: 64 bit + */ +#define DBG_PT_REGS_BASE 0 +#define DBG_PT_REGS_NUM 35 +#define DBG_PT_REGS_END (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1) + +/* + * Floating point registers: + * f0-f31: 64 bit + */ +#define DBG_FPR_BASE (DBG_PT_REGS_END + 1) +#define DBG_FPR_NUM 32 +#define DBG_FPR_END (DBG_FPR_BASE + DBG_FPR_NUM - 1) + +/* + * Condition Flag registers: + * fcc0-fcc8: 8 bit + */ +#define DBG_FCC_BASE (DBG_FPR_END + 1) +#define DBG_FCC_NUM 8 +#define DBG_FCC_END (DBG_FCC_BASE + DBG_FCC_NUM - 1) + +/* + * Floating-point Control and Status registers: + * fcsr: 32 bit + */ +#define DBG_FCSR_NUM 1 +#define DBG_FCSR (DBG_FCC_END + 1) + +#define DBG_MAX_REG_NUM (DBG_FCSR + 1) + +/* + * Size of I/O buffer for gdb packet. + * considering to hold all register contents, size is set + */ +#define BUFMAX 2048 + +/* + * Number of bytes required for gdb_regs buffer. + * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes. + * GDB fails to connect for size beyond this with error + * "'g' packet reply is too long" + */ +#define NUMREGBYTES ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4) + +#define BREAK_INSTR_SIZE 4 +#define CACHE_FLUSH_IS_SAFE 0 + +/* Register numbers of various important registers. */ +enum dbg_loongarch_regnum { + DBG_LOONGARCH_ZERO = 0, + DBG_LOONGARCH_RA, + DBG_LOONGARCH_TP, + DBG_LOONGARCH_SP, + DBG_LOONGARCH_A0, + DBG_LOONGARCH_FP = 22, + DBG_LOONGARCH_S0, + DBG_LOONGARCH_S1, + DBG_LOONGARCH_S2, + DBG_LOONGARCH_S3, + DBG_LOONGARCH_S4, + DBG_LOONGARCH_S5, + DBG_LOONGARCH_S6, + DBG_LOONGARCH_S7, + DBG_LOONGARCH_S8, + DBG_LOONGARCH_ORIG_A0, + DBG_LOONGARCH_PC, + DBG_LOONGARCH_BADV +}; + +void kgdb_breakinst(void); +void arch_kgdb_breakpoint(void); + +#ifdef CONFIG_KGDB +bool kgdb_breakpoint_handler(struct pt_regs *regs); +#else /* !CONFIG_KGDB */ +static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; } +#endif /* CONFIG_KGDB */ + +#endif /* __ASM_KGDB_H_ */ diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h new file mode 100644 index 000000000000..e671978bf552 --- /dev/null +++ b/arch/loongarch/include/asm/lbt.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Qi Hu <huqi@loongson.cn> + * Huacai Chen <chenhuacai@loongson.cn> + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ +#ifndef _ASM_LBT_H +#define _ASM_LBT_H + +#include <asm/cpu.h> +#include <asm/current.h> +#include <asm/loongarch.h> +#include <asm/processor.h> + +extern void _init_lbt(void); +extern void _save_lbt(struct loongarch_lbt *); +extern void _restore_lbt(struct loongarch_lbt *); + +static inline int is_lbt_enabled(void) +{ + if (!cpu_has_lbt) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ? + 1 : 0; +} + +static inline int is_lbt_owner(void) +{ + return test_thread_flag(TIF_USEDLBT); +} + +#ifdef CONFIG_CPU_HAS_LBT + +static inline void enable_lbt(void) +{ + if (cpu_has_lbt) + csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lbt(void) +{ + if (cpu_has_lbt) + csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); +} + +static inline void __own_lbt(void) +{ + enable_lbt(); + set_thread_flag(TIF_USEDLBT); + KSTK_EUEN(current) |= CSR_EUEN_LBTEN; +} + +static inline void own_lbt_inatomic(int restore) +{ + if (cpu_has_lbt && !is_lbt_owner()) { + __own_lbt(); + if (restore) + _restore_lbt(¤t->thread.lbt); + } +} + +static inline void own_lbt(int restore) +{ + preempt_disable(); + own_lbt_inatomic(restore); + preempt_enable(); +} + +static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) +{ + if (cpu_has_lbt && is_lbt_owner()) { + if (save) + _save_lbt(&tsk->thread.lbt); + + disable_lbt(); + clear_tsk_thread_flag(tsk, TIF_USEDLBT); + } + KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN); +} + +static inline void lose_lbt(int save) +{ + preempt_disable(); + lose_lbt_inatomic(save, current); + preempt_enable(); +} + +static inline void init_lbt(void) +{ + __own_lbt(); + _init_lbt(); +} +#else +static inline void own_lbt_inatomic(int restore) {} +static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {} +static inline void init_lbt(void) {} +static inline void lose_lbt(int save) {} +#endif + +static inline int thread_lbt_context_live(void) +{ + if (!cpu_has_lbt) + return 0; + + return test_thread_flag(TIF_LBT_CTX_LIVE); +} + +#endif /* _ASM_LBT_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 10748a20a2ab..33531d432b49 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -12,49 +12,6 @@ #ifndef __ASSEMBLY__ #include <larchintrin.h> -/* - * parse_r var, r - Helper assembler macro for parsing register names. - * - * This converts the register name in $n form provided in \r to the - * corresponding register number, which is assigned to the variable \var. It is - * needed to allow explicit encoding of instructions in inline assembly where - * registers are chosen by the compiler in $n form, allowing us to avoid using - * fixed register numbers. - * - * It also allows newer instructions (not implemented by the assembler) to be - * transparently implemented using assembler macros, instead of needing separate - * cases depending on toolchain support. - * - * Simple usage example: - * __asm__ __volatile__("parse_r addr, %0\n\t" - * "#invtlb op, 0, %0\n\t" - * ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)" - * : "=r" (status); - */ - -/* Match an individual register number and assign to \var */ -#define _IFC_REG(n) \ - ".ifc \\r, $r" #n "\n\t" \ - "\\var = " #n "\n\t" \ - ".endif\n\t" - -__asm__(".macro parse_r var r\n\t" - "\\var = -1\n\t" - _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) - _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) - _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) - _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) - _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) - _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) - _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) - _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) - ".iflt \\var\n\t" - ".error \"Unable to parse register name \\r\"\n\t" - ".endif\n\t" - ".endm"); - -#undef _IFC_REG - /* CPUCFG */ #define read_cpucfg(reg) __cpucfg(reg) @@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx) #define FPU_CSR_RU 0x200 /* towards +Infinity */ #define FPU_CSR_RD 0x300 /* towards -Infinity */ +/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */ +#define FPU_CSR_TM_SHIFT 0x6 +#define FPU_CSR_TM (_ULCAST_(1) << FPU_CSR_TM_SHIFT) + #define read_fcsr(source) \ ({ \ unsigned int __res; \ diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h index fe67d0b4b33d..2b9a90727e19 100644 --- a/arch/loongarch/include/asm/mmzone.h +++ b/arch/loongarch/include/asm/mmzone.h @@ -13,6 +13,4 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[(nid)]) -extern void setup_zero_pages(void); - #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 26e8dccb6619..63f137ce82a4 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) #define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) -#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) + +#define virt_to_page(kaddr) \ +({ \ + (likely((unsigned long)kaddr < vm_map_base)) ? \ + dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\ +}) extern int __virt_addr_valid(volatile void *kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 23f5b1107246..79470f0b4f1d 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) #endif /* __PAGETABLE_PUD_FOLDED */ +extern pte_t * __init populate_kernel_pte(unsigned long addr); #endif /* _ASM_PGALLOC_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 06963a172319..29d9b12298bc 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -70,12 +70,9 @@ struct vm_area_struct; * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page; -extern unsigned long zero_page_mask; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) \ - (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) -#define __HAVE_COLOR_ZERO_PAGE +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) /* * TLB refill handlers may also map the vmalloc area into xkvrange. @@ -85,14 +82,30 @@ extern unsigned long zero_page_mask; #define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE)) #define MODULES_END (MODULES_VADDR + SZ_256M) +#ifdef CONFIG_KFENCE +#define KFENCE_AREA_SIZE (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE) +#else +#define KFENCE_AREA_SIZE 0 +#endif + #define VMALLOC_START MODULES_END + +#ifndef CONFIG_KASAN #define VMALLOC_END \ (vm_map_base + \ - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE) +#else +#define VMALLOC_END \ + (vm_map_base + \ + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE) +#endif #define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) #define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) +#define KFENCE_AREA_START (VMEMMAP_END + 1) +#define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #ifndef __PAGETABLE_PMD_FOLDED @@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt extern pgd_t swapper_pg_dir[]; extern pgd_t invalid_pg_dir[]; +struct page *dmw_virt_to_page(unsigned long kaddr); +struct page *tlb_virt_to_page(unsigned long kaddr); + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ +#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0) +#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0) + /* * We provide our own get_unmapped area to cope with the virtual aliasing * constraints placed on us by the cache architecture. diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 636e1c66398c..c3bc44b5f5b3 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32) BUILD_FPR_ACCESS(64) struct loongarch_fpu { - unsigned int fcsr; uint64_t fcc; /* 8x8 */ + uint32_t fcsr; + uint32_t ftop; union fpureg fpr[NUM_FPU_REGS]; }; +struct loongarch_lbt { + /* Scratch registers */ + unsigned long scr0; + unsigned long scr1; + unsigned long scr2; + unsigned long scr3; + /* Eflags register */ + unsigned long eflags; +}; + #define INIT_CPUMASK { \ {0,} \ } @@ -113,15 +124,6 @@ struct thread_struct { unsigned long csr_ecfg; unsigned long csr_badvaddr; /* Last user fault */ - /* Scratch registers */ - unsigned long scr0; - unsigned long scr1; - unsigned long scr2; - unsigned long scr3; - - /* Eflags register */ - unsigned long eflags; - /* Other stuff associated with the thread. */ unsigned long trap_nr; unsigned long error_code; @@ -133,6 +135,7 @@ struct thread_struct { * context because they are conditionally copied at fork(). */ struct loongarch_fpu fpu FPU_ALIGN; + struct loongarch_lbt lbt; /* Also conditionally copied */ /* Hardware breakpoints pinned to this task. */ struct perf_event *hbp_break[LOONGARCH_MAX_BRP]; @@ -174,8 +177,9 @@ struct thread_struct { * FPU & vector registers \ */ \ .fpu = { \ - .fcsr = 0, \ .fcc = 0, \ + .fcsr = 0, \ + .ftop = 0, \ .fpr = {{{0,},},}, \ }, \ .hbp_break = {0}, \ diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index be05c0e706a2..a0bc159ce8bd 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -7,6 +7,7 @@ #define _LOONGARCH_SETUP_H #include <linux/types.h> +#include <asm/sections.h> #include <uapi/asm/setup.h> #define VECSIZE 0x200 @@ -33,8 +34,13 @@ extern long __la_abs_end; extern long __rela_dyn_begin; extern long __rela_dyn_end; -extern void * __init relocate_kernel(void); +extern unsigned long __init relocate_kernel(void); #endif +static inline unsigned long kaslr_offset(void) +{ + return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS; +} + #endif /* __SETUP_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 7df80e6ae9d2..4fb1e6408b98 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -158,6 +158,10 @@ cfi_st u0, PT_R21, \docfi csrrd u0, PERCPU_BASE_KS 9: +#ifdef CONFIG_KGDB + li.w t0, CSR_CRMD_WE + csrxchg t0, t0, LOONGARCH_CSR_CRMD +#endif .endm .macro SAVE_ALL docfi=0 diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h index 7b29cc9c70aa..5bb5a90d2681 100644 --- a/arch/loongarch/include/asm/string.h +++ b/arch/loongarch/include/asm/string.h @@ -7,11 +7,31 @@ #define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); +extern void *__memset(void *__s, int __c, size_t __count); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); +extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); +extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memset(s, c, n) __memset(s, c, n) +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif #endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h index 24e3094bebab..5b225aff3ba2 100644 --- a/arch/loongarch/include/asm/switch_to.h +++ b/arch/loongarch/include/asm/switch_to.h @@ -7,6 +7,7 @@ #include <asm/cpu-features.h> #include <asm/fpu.h> +#include <asm/lbt.h> struct task_struct; @@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, #define switch_to(prev, next, last) \ do { \ lose_fpu_inatomic(1, prev); \ + lose_lbt_inatomic(1, prev); \ hw_breakpoint_thread_switch(next); \ (last) = __switch_to(prev, next, task_thread_info(next), \ __builtin_return_address(0), __builtin_frame_address(0)); \ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 1a3354ca056e..8cb653d49a54 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp"); #define TIF_SINGLESTEP 16 /* Single Step */ #define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */ #define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */ +#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */ +#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */ #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) @@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp"); #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) #define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE) #define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE) +#define _TIF_USEDLBT (1<<TIF_USEDLBT) +#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE) #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h new file mode 100644 index 000000000000..12467fffee46 --- /dev/null +++ b/arch/loongarch/include/asm/xor.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> + */ +#ifndef _ASM_LOONGARCH_XOR_H +#define _ASM_LOONGARCH_XOR_H + +#include <asm/cpu-features.h> +#include <asm/xor_simd.h> + +#ifdef CONFIG_CPU_HAS_LSX +static struct xor_block_template xor_block_lsx = { + .name = "lsx", + .do_2 = xor_lsx_2, + .do_3 = xor_lsx_3, + .do_4 = xor_lsx_4, + .do_5 = xor_lsx_5, +}; + +#define XOR_SPEED_LSX() \ + do { \ + if (cpu_has_lsx) \ + xor_speed(&xor_block_lsx); \ + } while (0) +#else /* CONFIG_CPU_HAS_LSX */ +#define XOR_SPEED_LSX() +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +static struct xor_block_template xor_block_lasx = { + .name = "lasx", + .do_2 = xor_lasx_2, + .do_3 = xor_lasx_3, + .do_4 = xor_lasx_4, + .do_5 = xor_lasx_5, +}; + +#define XOR_SPEED_LASX() \ + do { \ + if (cpu_has_lasx) \ + xor_speed(&xor_block_lasx); \ + } while (0) +#else /* CONFIG_CPU_HAS_LASX */ +#define XOR_SPEED_LASX() +#endif /* CONFIG_CPU_HAS_LASX */ + +/* + * For grins, also test the generic routines. + * + * More importantly: it cannot be ruled out at this point of time, that some + * future (maybe reduced) models could run the vector algorithms slower than + * the scalar ones, maybe for errata or micro-op reasons. It may be + * appropriate to revisit this after one or two more uarch generations. + */ +#include <asm-generic/xor.h> + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + XOR_SPEED_LSX(); \ + XOR_SPEED_LASX(); \ +} while (0) + +#endif /* _ASM_LOONGARCH_XOR_H */ diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h new file mode 100644 index 000000000000..471b96332f38 --- /dev/null +++ b/arch/loongarch/include/asm/xor_simd.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> + */ +#ifndef _ASM_LOONGARCH_XOR_SIMD_H +#define _ASM_LOONGARCH_XOR_SIMD_H + +#ifdef CONFIG_CPU_HAS_LSX +void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LASX */ + +#endif /* _ASM_LOONGARCH_XOR_SIMD_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index 06e3be52cb04..ac915f841650 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -56,6 +56,12 @@ struct user_lasx_state { uint64_t vregs[32*4]; }; +struct user_lbt_state { + uint64_t scr[4]; + uint32_t eflags; + uint32_t ftop; +}; + struct user_watch_state { uint64_t dbg_info; struct { diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index 4cd7d16f7037..6c22f616b8f1 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -59,4 +59,14 @@ struct lasx_context { __u32 fcsr; }; +/* LBT context */ +#define LBT_CTX_MAGIC 0x42540001 +#define LBT_CTX_ALIGN 8 +struct lbt_context { + __u64 regs[4]; + __u32 eflags; + __u32 ftop; +}; + + #endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 8e279f04f9e7..c56ea0b75448 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -15,6 +15,8 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o +obj-$(CONFIG_CPU_HAS_LBT) += lbt.o + obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o ifdef CONFIG_FUNCTION_TRACER @@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE) endif +KASAN_SANITIZE_efi.o := n +KASAN_SANITIZE_cpu-probe.o := n +KASAN_SANITIZE_traps.o := n +KASAN_SANITIZE_smp.o := n +KASAN_SANITIZE_vdso.o := n + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o obj-$(CONFIG_UPROBES) += uprobes.o diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 505e4bf59603..8da0726777ed 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -118,13 +118,6 @@ void output_thread_defines(void) OFFSET(THREAD_CSRECFG, task_struct, thread.csr_ecfg); - OFFSET(THREAD_SCR0, task_struct, thread.scr0); - OFFSET(THREAD_SCR1, task_struct, thread.scr1); - OFFSET(THREAD_SCR2, task_struct, thread.scr2); - OFFSET(THREAD_SCR3, task_struct, thread.scr3); - - OFFSET(THREAD_EFLAGS, task_struct, thread.eflags); - OFFSET(THREAD_FPU, task_struct, thread.fpu); OFFSET(THREAD_BVADDR, task_struct, \ @@ -172,6 +165,17 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); OFFSET(THREAD_FCC, loongarch_fpu, fcc); + OFFSET(THREAD_FTOP, loongarch_fpu, ftop); + BLANK(); +} + +void output_thread_lbt_defines(void) +{ + OFFSET(THREAD_SCR0, loongarch_lbt, scr0); + OFFSET(THREAD_SCR1, loongarch_lbt, scr1); + OFFSET(THREAD_SCR2, loongarch_lbt, scr2); + OFFSET(THREAD_SCR3, loongarch_lbt, scr3); + OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags); BLANK(); } diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index e925579c7a71..55320813ee08 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; } +#ifdef CONFIG_CPU_HAS_LBT + if (config & CPUCFG2_X86BT) { + c->options |= LOONGARCH_CPU_LBT_X86; + elf_hwcap |= HWCAP_LOONGARCH_LBT_X86; + } + if (config & CPUCFG2_ARMBT) { + c->options |= LOONGARCH_CPU_LBT_ARM; + elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM; + } + if (config & CPUCFG2_MIPSBT) { + c->options |= LOONGARCH_CPU_LBT_MIPS; + elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS; + } +#endif config = read_cpucfg(LOONGARCH_CPUCFG6); if (config & CPUCFG6_PMP) diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index d737e3cf42d3..65518bb8f472 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall) SAVE_STATIC +#ifdef CONFIG_KGDB + li.w t1, CSR_CRMD_WE + csrxchg t1, t1, LOONGARCH_CSR_CRMD +#endif + move u0, t0 li.d tp, ~_THREAD_MASK and tp, tp, sp diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 501094a09f5d..d53ab10f4644 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -22,7 +22,7 @@ .macro EX insn, reg, src, offs .ex\@: \insn \reg, \src, \offs - _asm_extable .ex\@, fault + _asm_extable .ex\@, .L_fpu_fault .endm .macro sc_save_fp base @@ -138,6 +138,13 @@ .macro sc_save_fcsr base, tmp0 movfcsr2gr \tmp0, fcsr0 EX st.w \tmp0, \base, 0 +#if defined(CONFIG_CPU_HAS_LBT) + /* TM bit is always 0 if LBT not supported */ + andi \tmp0, \tmp0, FPU_CSR_TM + beqz \tmp0, 1f + x86clrtm +1: +#endif .endm .macro sc_restore_fcsr base, tmp0 @@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp) */ SYM_FUNC_START(_restore_fp) fpu_restore_double a0 t1 # clobbers t1 - fpu_restore_csr a0 t1 + fpu_restore_csr a0 t1 t2 fpu_restore_cc a0 t1 t2 # clobbers t1, t2 jr ra SYM_FUNC_END(_restore_fp) @@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context) jr ra SYM_FUNC_END(_restore_lasx_context) -SYM_FUNC_START(fault) +.L_fpu_fault: li.w a0, -EFAULT # failure jr ra -SYM_FUNC_END(fault) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 5e828a8bc0a0..53b883db0786 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry) # kernel entry point PTR_LI sp, (_THREAD_SIZE - PT_SIZE) PTR_ADD sp, sp, tp set_saved_sp sp, t0, t1 -#endif - /* relocate_kernel() returns the new kernel entry point */ - jr a0 - ASM_BUG() + /* Jump to the new kernel: new_pc = current_pc + random_offset */ + pcaddi t0, 0 + add.d t0, t0, a0 + jirl zero, t0, 0xc +#endif /* CONFIG_RANDOMIZE_BASE */ + +#endif /* CONFIG_RELOCATABLE */ +#ifdef CONFIG_KASAN + bl kasan_early_init #endif bl start_kernel diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index 5c46ae8c6cac..ec5b28e570c9 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -8,19 +8,40 @@ #include <asm/fpu.h> #include <asm/smp.h> +static unsigned int euen_mask = CSR_EUEN_FPEN; + +/* + * The critical section between kernel_fpu_begin() and kernel_fpu_end() + * is non-reentrant. It is the caller's responsibility to avoid reentrance. + * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example. + */ static DEFINE_PER_CPU(bool, in_kernel_fpu); +static DEFINE_PER_CPU(unsigned int, euen_current); void kernel_fpu_begin(void) { + unsigned int *euen_curr; + preempt_disable(); WARN_ON(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); + euen_curr = this_cpu_ptr(&euen_current); - if (!is_fpu_owner()) - enable_fpu(); + *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN); + +#ifdef CONFIG_CPU_HAS_LASX + if (*euen_curr & CSR_EUEN_LASXEN) + _save_lasx(¤t->thread.fpu); + else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (*euen_curr & CSR_EUEN_LSXEN) + _save_lsx(¤t->thread.fpu); else +#endif + if (*euen_curr & CSR_EUEN_FPEN) _save_fp(¤t->thread.fpu); write_fcsr(LOONGARCH_FCSR0, 0); @@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin); void kernel_fpu_end(void) { + unsigned int *euen_curr; + WARN_ON(!this_cpu_read(in_kernel_fpu)); - if (!is_fpu_owner()) - disable_fpu(); + euen_curr = this_cpu_ptr(&euen_current); + +#ifdef CONFIG_CPU_HAS_LASX + if (*euen_curr & CSR_EUEN_LASXEN) + _restore_lasx(¤t->thread.fpu); else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (*euen_curr & CSR_EUEN_LSXEN) + _restore_lsx(¤t->thread.fpu); + else +#endif + if (*euen_curr & CSR_EUEN_FPEN) _restore_fp(¤t->thread.fpu); + *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN); + this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); + +static int __init init_euen_mask(void) +{ + if (cpu_has_lsx) + euen_mask |= CSR_EUEN_LSXEN; + + if (cpu_has_lasx) + euen_mask |= CSR_EUEN_LASXEN; + + return 0; +} +arch_initcall(init_euen_mask); diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c new file mode 100644 index 000000000000..445c452d72a7 --- /dev/null +++ b/arch/loongarch/kernel/kgdb.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * LoongArch KGDB support + * + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +#include <linux/hw_breakpoint.h> +#include <linux/kdebug.h> +#include <linux/kgdb.h> +#include <linux/processor.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/smp.h> + +#include <asm/cacheflush.h> +#include <asm/fpu.h> +#include <asm/hw_breakpoint.h> +#include <asm/inst.h> +#include <asm/irq_regs.h> +#include <asm/ptrace.h> +#include <asm/sigcontext.h> + +int kgdb_watch_activated; +static unsigned int stepped_opcode; +static unsigned long stepped_address; + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) }, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) }, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) }, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) }, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) }, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) }, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) }, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) }, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) }, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) }, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) }, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) }, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) }, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) }, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) }, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) }, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) }, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) }, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) }, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) }, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) }, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) }, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) }, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) }, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) }, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) }, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) }, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) }, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) }, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) }, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) }, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) }, + { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) }, + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) }, + { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) }, + { "f0", GDB_SIZEOF_REG, 0 }, + { "f1", GDB_SIZEOF_REG, 1 }, + { "f2", GDB_SIZEOF_REG, 2 }, + { "f3", GDB_SIZEOF_REG, 3 }, + { "f4", GDB_SIZEOF_REG, 4 }, + { "f5", GDB_SIZEOF_REG, 5 }, + { "f6", GDB_SIZEOF_REG, 6 }, + { "f7", GDB_SIZEOF_REG, 7 }, + { "f8", GDB_SIZEOF_REG, 8 }, + { "f9", GDB_SIZEOF_REG, 9 }, + { "f10", GDB_SIZEOF_REG, 10 }, + { "f11", GDB_SIZEOF_REG, 11 }, + { "f12", GDB_SIZEOF_REG, 12 }, + { "f13", GDB_SIZEOF_REG, 13 }, + { "f14", GDB_SIZEOF_REG, 14 }, + { "f15", GDB_SIZEOF_REG, 15 }, + { "f16", GDB_SIZEOF_REG, 16 }, + { "f17", GDB_SIZEOF_REG, 17 }, + { "f18", GDB_SIZEOF_REG, 18 }, + { "f19", GDB_SIZEOF_REG, 19 }, + { "f20", GDB_SIZEOF_REG, 20 }, + { "f21", GDB_SIZEOF_REG, 21 }, + { "f22", GDB_SIZEOF_REG, 22 }, + { "f23", GDB_SIZEOF_REG, 23 }, + { "f24", GDB_SIZEOF_REG, 24 }, + { "f25", GDB_SIZEOF_REG, 25 }, + { "f26", GDB_SIZEOF_REG, 26 }, + { "f27", GDB_SIZEOF_REG, 27 }, + { "f28", GDB_SIZEOF_REG, 28 }, + { "f29", GDB_SIZEOF_REG, 29 }, + { "f30", GDB_SIZEOF_REG, 30 }, + { "f31", GDB_SIZEOF_REG, 31 }, + { "fcc0", 1, 0 }, + { "fcc1", 1, 1 }, + { "fcc2", 1, 2 }, + { "fcc3", 1, 3 }, + { "fcc4", 1, 4 }, + { "fcc5", 1, 5 }, + { "fcc6", 1, 6 }, + { "fcc7", 1, 7 }, + { "fcsr", 4, 0 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + int reg_offset, reg_size; + + if (regno < 0 || regno >= DBG_MAX_REG_NUM) + return NULL; + + reg_offset = dbg_reg_def[regno].offset; + reg_size = dbg_reg_def[regno].size; + + if (reg_offset == -1) + goto out; + + /* Handle general-purpose/orig_a0/pc/badv registers */ + if (regno <= DBG_PT_REGS_END) { + memcpy(mem, (void *)regs + reg_offset, reg_size); + goto out; + } + + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + goto out; + + save_fp(current); + + /* Handle FP registers */ + switch (regno) { + case DBG_FCSR: /* Process the fcsr */ + memcpy(mem, (void *)¤t->thread.fpu.fcsr, reg_size); + break; + case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */ + memcpy(mem, (void *)¤t->thread.fpu.fcc + reg_offset, reg_size); + break; + case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */ + memcpy(mem, (void *)¤t->thread.fpu.fpr[reg_offset], reg_size); + break; + default: + break; + } + +out: + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + int reg_offset, reg_size; + + if (regno < 0 || regno >= DBG_MAX_REG_NUM) + return -EINVAL; + + reg_offset = dbg_reg_def[regno].offset; + reg_size = dbg_reg_def[regno].size; + + if (reg_offset == -1) + return 0; + + /* Handle general-purpose/orig_a0/pc/badv registers */ + if (regno <= DBG_PT_REGS_END) { + memcpy((void *)regs + reg_offset, mem, reg_size); + return 0; + } + + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + return 0; + + /* Handle FP registers */ + switch (regno) { + case DBG_FCSR: /* Process the fcsr */ + memcpy((void *)¤t->thread.fpu.fcsr, mem, reg_size); + break; + case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */ + memcpy((void *)¤t->thread.fpu.fcc + reg_offset, mem, reg_size); + break; + case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */ + memcpy((void *)¤t->thread.fpu.fpr[reg_offset], mem, reg_size); + break; + default: + break; + } + + restore_fp(current); + + return 0; +} + +/* + * Similar to regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + + gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01; + gdb_regs[DBG_LOONGARCH_TP] = (long)p; + gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03; + + /* S0 - S8 */ + gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23; + gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24; + gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25; + gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26; + gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27; + gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28; + gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29; + gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30; + gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31; + + /* + * PC use return address (RA), i.e. the moment after return from __switch_to() + */ + gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->csr_era = pc; +} + +void arch_kgdb_breakpoint(void) +{ + __asm__ __volatile__ ( \ + ".globl kgdb_breakinst\n\t" \ + "nop\n" \ + "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */ +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger + */ +static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + + /* Userspace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + + if (!kgdb_io_module_registered) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_active) != -1) + kgdb_nmicallback(smp_processor_id(), regs); + + if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_setting_breakpoint)) + if (regs->csr_era == (unsigned long)&kgdb_breakinst) + regs->csr_era += LOONGARCH_INSN_SIZE; + + return NOTIFY_STOP; +} + +bool kgdb_breakpoint_handler(struct pt_regs *regs) +{ + struct die_args args = { + .regs = regs, + .str = "Break", + .err = BRK_KDB, + .trapnr = read_csr_excode(), + .signr = SIGTRAP, + + }; + + return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_loongarch_notify, +}; + +static inline void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + regs->csr_era = addr; +} + +/* Calculate the new address for after a step */ +static int get_step_address(struct pt_regs *regs, unsigned long *next_addr) +{ + char cj_val; + unsigned int si, si_l, si_h, rd, rj, cj; + unsigned long pc = instruction_pointer(regs); + union loongarch_instruction *ip = (union loongarch_instruction *)pc; + + if (pc & 3) { + pr_warn("%s: invalid pc 0x%lx\n", __func__, pc); + return -EINVAL; + } + + *next_addr = pc + LOONGARCH_INSN_SIZE; + + si_h = ip->reg0i26_format.immediate_h; + si_l = ip->reg0i26_format.immediate_l; + switch (ip->reg0i26_format.opcode) { + case b_op: + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27); + return 0; + case bl_op: + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27); + regs->regs[1] = pc + LOONGARCH_INSN_SIZE; + return 0; + } + + rj = ip->reg1i21_format.rj; + cj = (rj & 0x07) + DBG_FCC_BASE; + si_l = ip->reg1i21_format.immediate_l; + si_h = ip->reg1i21_format.immediate_h; + dbg_get_reg(cj, &cj_val, regs); + switch (ip->reg1i21_format.opcode) { + case beqz_op: + if (regs->regs[rj] == 0) + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + return 0; + case bnez_op: + if (regs->regs[rj] != 0) + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + return 0; + case bceqz_op: /* bceqz_op = bcnez_op */ + if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */ + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */ + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + return 0; + } + + rj = ip->reg2i16_format.rj; + rd = ip->reg2i16_format.rd; + si = ip->reg2i16_format.immediate; + switch (ip->reg2i16_format.opcode) { + case beq_op: + if (regs->regs[rj] == regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bne_op: + if (regs->regs[rj] != regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case blt_op: + if ((long)regs->regs[rj] < (long)regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bge_op: + if ((long)regs->regs[rj] >= (long)regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bltu_op: + if (regs->regs[rj] < regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bgeu_op: + if (regs->regs[rj] >= regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case jirl_op: + regs->regs[rd] = pc + LOONGARCH_INSN_SIZE; + *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17); + return 0; + } + + return 0; +} + +static int do_single_step(struct pt_regs *regs) +{ + int error = 0; + unsigned long addr = 0; /* Determine where the target instruction will send us to */ + + error = get_step_address(regs, &addr); + if (error) + return error; + + /* Store the opcode in the stepped address */ + error = get_kernel_nofault(stepped_opcode, (void *)addr); + if (error) + return error; + + stepped_address = addr; + + /* Replace the opcode with the break instruction */ + error = copy_to_kernel_nofault((void *)stepped_address, + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); + flush_icache_range(addr, addr + BREAK_INSTR_SIZE); + + if (error) { + stepped_opcode = 0; + stepped_address = 0; + } else { + kgdb_single_step = 1; + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + } + + return error; +} + +/* Undo a single step */ +static void undo_single_step(struct pt_regs *regs) +{ + if (stepped_opcode) { + copy_to_kernel_nofault((void *)stepped_address, + (void *)&stepped_opcode, BREAK_INSTR_SIZE); + flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); + } + + stepped_opcode = 0; + stepped_address = 0; + kgdb_single_step = 0; + atomic_set(&kgdb_cpu_doing_single_step, -1); +} + +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + int ret = 0; + + undo_single_step(regs); + regs->csr_prmd |= CSR_PRMD_PWE; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + regs->csr_prmd &= ~CSR_PRMD_PWE; + fallthrough; + case 'c': + kgdb_arch_update_addr(regs, remcom_in_buffer); + break; + case 's': + kgdb_arch_update_addr(regs, remcom_in_buffer); + ret = do_single_step(regs); + break; + default: + ret = -1; + } + + return ret; +} + +static struct hw_breakpoint { + unsigned int enabled; + unsigned long addr; + int len; + int type; + struct perf_event * __percpu *pev; +} breakinfo[LOONGARCH_MAX_BRP]; + +static int hw_break_reserve_slot(int breakno) +{ + int cpu, cnt = 0; + struct perf_event **pevent; + + for_each_online_cpu(cpu) { + cnt++; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_reserve_bp_slot(*pevent)) + goto fail; + } + + return 0; + +fail: + for_each_online_cpu(cpu) { + cnt--; + if (!cnt) + break; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + dbg_release_bp_slot(*pevent); + } + + return -1; +} + +static int hw_break_release_slot(int breakno) +{ + int cpu; + struct perf_event **pevent; + + if (dbg_is_early) + return 0; + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_release_bp_slot(*pevent)) + /* + * The debugger is responsible for handing the retry on + * remove failure. + */ + return -1; + } + + return 0; +} + +static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) + if (!breakinfo[i].enabled) + break; + + if (i == LOONGARCH_MAX_BRP) + return -1; + + switch (bptype) { + case BP_HARDWARE_BREAKPOINT: + breakinfo[i].type = HW_BREAKPOINT_X; + break; + case BP_READ_WATCHPOINT: + breakinfo[i].type = HW_BREAKPOINT_R; + break; + case BP_WRITE_WATCHPOINT: + breakinfo[i].type = HW_BREAKPOINT_W; + break; + case BP_ACCESS_WATCHPOINT: + breakinfo[i].type = HW_BREAKPOINT_RW; + break; + default: + return -1; + } + + switch (len) { + case 1: + breakinfo[i].len = HW_BREAKPOINT_LEN_1; + break; + case 2: + breakinfo[i].len = HW_BREAKPOINT_LEN_2; + break; + case 4: + breakinfo[i].len = HW_BREAKPOINT_LEN_4; + break; + case 8: + breakinfo[i].len = HW_BREAKPOINT_LEN_8; + break; + default: + return -1; + } + + breakinfo[i].addr = addr; + if (hw_break_reserve_slot(i)) { + breakinfo[i].addr = 0; + return -1; + } + breakinfo[i].enabled = 1; + + return 0; +} + +static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) + if (breakinfo[i].addr == addr && breakinfo[i].enabled) + break; + + if (i == LOONGARCH_MAX_BRP) + return -1; + + if (hw_break_release_slot(i)) { + pr_err("Cannot remove hw breakpoint at %lx\n", addr); + return -1; + } + breakinfo[i].enabled = 0; + + return 0; +} + +static void kgdb_disable_hw_break(struct pt_regs *regs) +{ + int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (!breakinfo[i].enabled) + continue; + + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } + + /* Disable hardware debugging while we are in kgdb */ + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); +} + +static void kgdb_remove_all_hw_break(void) +{ + int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (!breakinfo[i].enabled) + continue; + + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (!bp->attr.disabled) { + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + continue; + } + + if (hw_break_release_slot(i)) + pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr); + breakinfo[i].enabled = 0; + } + + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + kgdb_watch_activated = 0; +} + +static void kgdb_correct_hw_break(void) +{ + int i, activated = 0; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + struct perf_event *bp; + int val; + int cpu = raw_smp_processor_id(); + + if (!breakinfo[i].enabled) + continue; + + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled != 1) + continue; + + bp->attr.bp_addr = breakinfo[i].addr; + bp->attr.bp_len = breakinfo[i].len; + bp->attr.bp_type = breakinfo[i].type; + + val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp)); + if (val) + return; + + val = arch_install_hw_breakpoint(bp); + if (!val) + bp->attr.disabled = 0; + activated = 1; + } + + csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + kgdb_watch_activated = activated; +} + +const struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */ + .flags = KGDB_HW_BREAKPOINT, + .set_hw_breakpoint = kgdb_set_hw_break, + .remove_hw_breakpoint = kgdb_remove_hw_break, + .disable_hw_break = kgdb_disable_hw_break, + .remove_all_hw_break = kgdb_remove_all_hw_break, + .correct_hw_break = kgdb_correct_hw_break, +}; + +int kgdb_arch_init(void) +{ + return register_die_notifier(&kgdb_notifier); +} + +void kgdb_arch_late(void) +{ + int i, cpu; + struct perf_event_attr attr; + struct perf_event **pevent; + + hw_breakpoint_init(&attr); + + attr.bp_addr = (unsigned long)kgdb_arch_init; + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (breakinfo[i].pev) + continue; + + breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); + if (IS_ERR((void * __force)breakinfo[i].pev)) { + pr_err("kgdb: Could not allocate hw breakpoints.\n"); + breakinfo[i].pev = NULL; + return; + } + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[i].pev, cpu); + if (pevent[0]->destroy) { + pevent[0]->destroy = NULL; + release_bp_slot(*pevent); + } + } + } +} + +void kgdb_arch_exit(void) +{ + int i; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (breakinfo[i].pev) { + unregister_wide_hw_breakpoint(breakinfo[i].pev); + breakinfo[i].pev = NULL; + } + } + + unregister_die_notifier(&kgdb_notifier); +} diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S new file mode 100644 index 000000000000..9c75120a26d8 --- /dev/null +++ b/arch/loongarch/kernel/lbt.S @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Qi Hu <huqi@loongson.cn> + * Huacai Chen <chenhuacai@loongson.cn> + * + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ +#include <asm/asm.h> +#include <asm/asmmacro.h> +#include <asm/asm-extable.h> +#include <asm/asm-offsets.h> +#include <asm/errno.h> +#include <asm/regdef.h> + +#define SCR_REG_WIDTH 8 + + .macro EX insn, reg, src, offs +.ex\@: \insn \reg, \src, \offs + _asm_extable .ex\@, .L_lbt_fault + .endm + +/* + * Save a thread's lbt context. + */ +SYM_FUNC_START(_save_lbt) + movscr2gr t1, $scr0 # save scr + stptr.d t1, a0, THREAD_SCR0 + movscr2gr t1, $scr1 + stptr.d t1, a0, THREAD_SCR1 + movscr2gr t1, $scr2 + stptr.d t1, a0, THREAD_SCR2 + movscr2gr t1, $scr3 + stptr.d t1, a0, THREAD_SCR3 + + x86mfflag t1, 0x3f # save eflags + stptr.d t1, a0, THREAD_EFLAGS + jr ra +SYM_FUNC_END(_save_lbt) +EXPORT_SYMBOL(_save_lbt) + +/* + * Restore a thread's lbt context. + */ +SYM_FUNC_START(_restore_lbt) + ldptr.d t1, a0, THREAD_SCR0 # restore scr + movgr2scr $scr0, t1 + ldptr.d t1, a0, THREAD_SCR1 + movgr2scr $scr1, t1 + ldptr.d t1, a0, THREAD_SCR2 + movgr2scr $scr2, t1 + ldptr.d t1, a0, THREAD_SCR3 + movgr2scr $scr3, t1 + + ldptr.d t1, a0, THREAD_EFLAGS # restore eflags + x86mtflag t1, 0x3f + jr ra +SYM_FUNC_END(_restore_lbt) +EXPORT_SYMBOL(_restore_lbt) + +/* + * Load scr/eflag with zero. + */ +SYM_FUNC_START(_init_lbt) + movgr2scr $scr0, zero + movgr2scr $scr1, zero + movgr2scr $scr2, zero + movgr2scr $scr3, zero + + x86mtflag zero, 0x3f + jr ra +SYM_FUNC_END(_init_lbt) + +/* + * a0: scr + * a1: eflag + */ +SYM_FUNC_START(_save_lbt_context) + movscr2gr t1, $scr0 # save scr + EX st.d t1, a0, (0 * SCR_REG_WIDTH) + movscr2gr t1, $scr1 + EX st.d t1, a0, (1 * SCR_REG_WIDTH) + movscr2gr t1, $scr2 + EX st.d t1, a0, (2 * SCR_REG_WIDTH) + movscr2gr t1, $scr3 + EX st.d t1, a0, (3 * SCR_REG_WIDTH) + + x86mfflag t1, 0x3f # save eflags + EX st.w t1, a1, 0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_lbt_context) + +/* + * a0: scr + * a1: eflag + */ +SYM_FUNC_START(_restore_lbt_context) + EX ld.d t1, a0, (0 * SCR_REG_WIDTH) # restore scr + movgr2scr $scr0, t1 + EX ld.d t1, a0, (1 * SCR_REG_WIDTH) + movgr2scr $scr1, t1 + EX ld.d t1, a0, (2 * SCR_REG_WIDTH) + movgr2scr $scr2, t1 + EX ld.d t1, a0, (3 * SCR_REG_WIDTH) + movgr2scr $scr3, t1 + + EX ld.w t1, a1, 0 # restore eflags + x86mtflag t1, 0x3f + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_lbt_context) + +/* + * a0: ftop + */ +SYM_FUNC_START(_save_ftop_context) + x86mftop t1 + st.w t1, a0, 0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_ftop_context) + +/* + * a0: ftop + */ +SYM_FUNC_START(_restore_ftop_context) + ld.w t1, a0, 0 + andi t1, t1, 0x7 + la.pcrel a0, 1f + alsl.d a0, t1, a0, 3 + jr a0 +1: + x86mttop 0 + b 2f + x86mttop 1 + b 2f + x86mttop 2 + b 2f + x86mttop 3 + b 2f + x86mttop 4 + b 2f + x86mttop 5 + b 2f + x86mttop 6 + b 2f + x86mttop 7 +2: + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_ftop_context) + +.L_lbt_fault: + li.w a0, -EFAULT # failure + jr ra diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 708665895b47..c7d33c489e04 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) void __init pcpu_populate_pte(unsigned long addr) { - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d = p4d_offset(pgd, addr); - pud_t *pud; - pmd_t *pmd; - - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pgd_populate(&init_mm, pgd, new); -#ifndef __PAGETABLE_PUD_FOLDED - pud_init(new); -#endif - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); -#ifndef __PAGETABLE_PMD_FOLDED - pmd_init(new); -#endif - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); - } + populate_kernel_pte(addr); } void __init setup_per_cpu_areas(void) @@ -470,7 +438,6 @@ void __init mem_init(void) { high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ } int pcibus_to_node(struct pci_bus *bus) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index ba457e43f5be..3cb082e0c992 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -38,6 +38,7 @@ #include <asm/cpu.h> #include <asm/elf.h> #include <asm/fpu.h> +#include <asm/lbt.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/irq_regs.h> @@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) euen = regs->csr_euen & ~(CSR_EUEN_FPEN); regs->csr_euen = euen; lose_fpu(0); + lose_lbt(0); clear_thread_flag(TIF_LSX_CTX_LIVE); clear_thread_flag(TIF_LASX_CTX_LIVE); + clear_thread_flag(TIF_LBT_CTX_LIVE); clear_used_math(); regs->csr_era = pc; regs->regs[3] = sp; @@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) preempt_enable(); - if (used_math()) - memcpy(dst, src, sizeof(struct task_struct)); - else + if (!used_math()) memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); + else + memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0)); + +#ifdef CONFIG_CPU_HAS_LBT + memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt)); +#endif return 0; } @@ -189,8 +196,10 @@ out: ptrace_hw_copy_thread(p); clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); + clear_tsk_thread_flag(p, TIF_USEDLBT); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); + clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE); return 0; } diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index f72adbf530c6..c114c5ef1332 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -38,6 +38,7 @@ #include <asm/cpu.h> #include <asm/cpu-info.h> #include <asm/fpu.h> +#include <asm/lbt.h> #include <asm/loongarch.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target, #endif /* CONFIG_CPU_HAS_LSX */ +#ifdef CONFIG_CPU_HAS_LBT +static int lbt_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int r; + + r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0)); + r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1)); + r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2)); + r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3)); + r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32)); + r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32)); + + return r; +} + +static int lbt_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err = 0; + const int eflags_start = 4 * sizeof(target->thread.lbt.scr0); + const int ftop_start = eflags_start + sizeof(u32); + + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.lbt.scr0, + 0, 4 * sizeof(target->thread.lbt.scr0)); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.lbt.eflags, + eflags_start, ftop_start); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.ftop, + ftop_start, ftop_start + sizeof(u32)); + + return err; +} +#endif /* CONFIG_CPU_HAS_LBT */ + #ifdef CONFIG_HAVE_HW_BREAKPOINT /* @@ -802,6 +843,9 @@ enum loongarch_regset { #ifdef CONFIG_CPU_HAS_LASX REGSET_LASX, #endif +#ifdef CONFIG_CPU_HAS_LBT + REGSET_LBT, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT REGSET_HW_BREAK, REGSET_HW_WATCH, @@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = { .set = simd_set, }, #endif +#ifdef CONFIG_CPU_HAS_LBT + [REGSET_LBT] = { + .core_note_type = NT_LOONGARCH_LBT, + .n = 5, + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = lbt_get, + .set = lbt_set, + }, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT [REGSET_HW_BREAK] = { .core_note_type = NT_LOONGARCH_HW_BREAK, diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 01f94d1e3edf..6c3eff9af9fb 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o *new_addr = (unsigned long)reloc_offset; } -void * __init relocate_kernel(void) +unsigned long __init relocate_kernel(void) { unsigned long kernel_length; unsigned long random_offset = 0; void *location_new = _text; /* Default to original kernel start */ - void *kernel_entry = start_kernel; /* Default to original kernel entry point */ char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */ strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); @@ -190,9 +189,6 @@ void * __init relocate_kernel(void) reloc_offset += random_offset; - /* Return the new kernel's entry point */ - kernel_entry = RELOCATED_KASLR(start_kernel); - /* The current thread is now within the relocated kernel */ __current_thread_info = RELOCATED_KASLR(__current_thread_info); @@ -204,7 +200,7 @@ void * __init relocate_kernel(void) relocate_absolute(random_offset); - return kernel_entry; + return random_offset; } /* diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 9d830ab4e302..7783f0a3d742 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); + +#ifdef CONFIG_KASAN + kasan_init(); +#endif } diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index ceb899366c0a..504fdfe85203 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -32,6 +32,7 @@ #include <asm/cacheflush.h> #include <asm/cpu-features.h> #include <asm/fpu.h> +#include <asm/lbt.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -44,6 +45,9 @@ /* Make sure we will not lose FPU ownership */ #define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); }) +/* Make sure we will not lose LBT ownership */ +#define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) +#define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) /* Assembly functions to move context to/from the FPU */ extern asmlinkage int @@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); extern asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +#ifdef CONFIG_CPU_HAS_LBT +extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); +extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); +extern asmlinkage int _save_ftop_context(void __user *ftop); +extern asmlinkage int _restore_ftop_context(void __user *ftop); +#endif + struct rt_sigframe { struct siginfo rs_info; struct ucontext rs_uctx; @@ -75,6 +86,7 @@ struct extctx_layout { struct _ctx_layout fpu; struct _ctx_layout lsx; struct _ctx_layout lasx; + struct _ctx_layout lbt; struct _ctx_layout end; }; @@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) return err; } +#ifdef CONFIG_CPU_HAS_LBT +static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx) +{ + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + err |= __put_user(current->thread.lbt.scr0, ®s[0]); + err |= __put_user(current->thread.lbt.scr1, ®s[1]); + err |= __put_user(current->thread.lbt.scr2, ®s[2]); + err |= __put_user(current->thread.lbt.scr3, ®s[3]); + err |= __put_user(current->thread.lbt.eflags, eflags); + + return err; +} + +static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx) +{ + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + err |= __get_user(current->thread.lbt.scr0, ®s[0]); + err |= __get_user(current->thread.lbt.scr1, ®s[1]); + err |= __get_user(current->thread.lbt.scr2, ®s[2]); + err |= __get_user(current->thread.lbt.scr3, ®s[3]); + err |= __get_user(current->thread.lbt.eflags, eflags); + + return err; +} + +static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return __put_user(current->thread.fpu.ftop, ftop); +} + +static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return __get_user(current->thread.fpu.ftop, ftop); +} +#endif + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx) return _restore_lasx_context(regs, fcc, fcsr); } +/* + * Wrappers for the assembly _{save,restore}_lbt_context functions. + */ +#ifdef CONFIG_CPU_HAS_LBT +static int save_hw_lbt_context(struct lbt_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + return _save_lbt_context(regs, eflags); +} + +static int restore_hw_lbt_context(struct lbt_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + return _restore_lbt_context(regs, eflags); +} + +static int save_hw_ftop_context(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return _save_ftop_context(ftop); +} + +static int restore_hw_ftop_context(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return _restore_ftop_context(ftop); +} +#endif + static int fcsr_pending(unsigned int __user *fcsr) { int err, sig = 0; @@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx) return err ?: sig; } +#ifdef CONFIG_CPU_HAS_LBT +static int protected_save_lbt_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lbt.addr; + struct lbt_context __user *lbt_ctx = + (struct lbt_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs; + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; + + while (1) { + lock_lbt_owner(); + if (is_lbt_owner()) + err |= save_hw_lbt_context(lbt_ctx); + else + err |= copy_lbt_to_sigcontext(lbt_ctx); + if (is_fpu_owner()) + err |= save_hw_ftop_context(lbt_ctx); + else + err |= copy_ftop_to_sigcontext(lbt_ctx); + unlock_lbt_owner(); + + err |= __put_user(LBT_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lbt.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LBT context and try again */ + err = __put_user(0, ®s[0]) | __put_user(0, eflags); + + if (err) + return err; + } + + return err; +} + +static int protected_restore_lbt_context(struct extctx_layout *extctx) +{ + int err = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lbt.addr; + struct lbt_context __user *lbt_ctx = + (struct lbt_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs; + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; + + while (1) { + lock_lbt_owner(); + if (is_lbt_owner()) + err |= restore_hw_lbt_context(lbt_ctx); + else + err |= copy_lbt_from_sigcontext(lbt_ctx); + if (is_fpu_owner()) + err |= restore_hw_ftop_context(lbt_ctx); + else + err |= copy_ftop_from_sigcontext(lbt_ctx); + unlock_lbt_owner(); + + if (likely(!err)) + break; + /* Touch the LBT context and try again */ + err = __get_user(tmp, ®s[0]) | __get_user(tmp, eflags); + + if (err) + return err; + } + + return err; +} +#endif + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); +#ifdef CONFIG_CPU_HAS_LBT + if (extctx->lbt.addr) + err |= protected_save_lbt_context(extctx); +#endif + /* Set the "end" magic */ info = (struct sctx_info *)extctx->end.addr; err |= __put_user(0, &info->magic); @@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->lasx.addr = info; break; + case LBT_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lbt_context))) + goto invalid; + extctx->lbt.addr = info; + break; + default: goto invalid; } @@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); +#ifdef CONFIG_CPU_HAS_LBT + if (extctx.lbt.addr) + err |= protected_restore_lbt_context(&extctx); +#endif + bad: return err; } @@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } +#ifdef CONFIG_CPU_HAS_LBT + if (cpu_has_lbt && thread_lbt_context_live()) { + new_sp = extframe_alloc(extctx, &extctx->lbt, + sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp); + } +#endif + return new_sp; } diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 2463d2fea21f..92270f14db94 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct pt_regs dummyregs; struct unwind_state state; - regs = &dummyregs; + if (!regs) { + regs = &dummyregs; - if (task == current) { - regs->regs[3] = (unsigned long)__builtin_frame_address(0); - regs->csr_era = (unsigned long)__builtin_return_address(0); - } else { - regs->regs[3] = thread_saved_fp(task); - regs->csr_era = thread_saved_ra(task); + if (task == current) { + regs->regs[3] = (unsigned long)__builtin_frame_address(0); + regs->csr_era = (unsigned long)__builtin_return_address(0); + } else { + regs->regs[3] = thread_saved_fp(task); + regs->csr_era = thread_saved_ra(task); + } + regs->regs[1] = 0; } - regs->regs[1] = 0; for (unwind_start(&state, task, regs); !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); diff --git a/arch/loongarch/kernel/sysrq.c b/arch/loongarch/kernel/sysrq.c index 366baef72d29..e663c10fa39c 100644 --- a/arch/loongarch/kernel/sysrq.c +++ b/arch/loongarch/kernel/sysrq.c @@ -43,7 +43,7 @@ static void sysrq_tlbdump_othercpus(struct work_struct *dummy) static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus); #endif -static void sysrq_handle_tlbdump(int key) +static void sysrq_handle_tlbdump(u8 key) { sysrq_tlbdump_single(NULL); #ifdef CONFIG_SMP diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 89699db45cec..65214774ef7c 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -36,7 +36,9 @@ #include <asm/break.h> #include <asm/cpu.h> #include <asm/fpu.h> +#include <asm/lbt.h> #include <asm/inst.h> +#include <asm/kgdb.h> #include <asm/loongarch.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> @@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) * pertain to them. */ switch (bcode) { + case BRK_KDB: + if (kgdb_breakpoint_handler(regs)) + goto out; + else + break; case BRK_KPROBE_BP: if (kprobe_breakpoint_handler(regs)) goto out; @@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs) #ifndef CONFIG_HAVE_HW_BREAKPOINT pr_warn("Hardware watch point handler not implemented!\n"); #else + if (kgdb_breakpoint_handler(regs)) + goto out; + if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) { int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1); unsigned long pc = instruction_pointer(regs); @@ -966,13 +976,47 @@ out: irqentry_exit(regs, state); } +static void init_restore_lbt(void) +{ + if (!thread_lbt_context_live()) { + /* First time LBT context user */ + init_lbt(); + set_thread_flag(TIF_LBT_CTX_LIVE); + } else { + if (!is_lbt_owner()) + own_lbt_inatomic(1); + } + + BUG_ON(!is_lbt_enabled()); +} + asmlinkage void noinstr do_lbt(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); - local_irq_enable(); - force_sig(SIGILL); - local_irq_disable(); + /* + * BTD (Binary Translation Disable exception) can be triggered + * during FP save/restore if TM (Top Mode) is on, which may + * cause irq_enable during 'switch_to'. To avoid this situation + * (including the user using 'MOVGR2GCSR' to turn on TM, which + * will not trigger the BTE), we need to check PRMD first. + */ + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + + if (!cpu_has_lbt) { + force_sig(SIGILL); + goto out; + } + BUG_ON(is_lbt_enabled()); + + preempt_disable(); + init_restore_lbt(); + preempt_enable(); + +out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); irqentry_exit(regs, state); } diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index d60d4e096cfa..a77bf160bfc4 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -6,4 +6,6 @@ lib-y += delay.o memset.o memcpy.o memmove.o \ clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o +obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o + obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 0790eadce166..be741544e62b 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -11,19 +11,6 @@ #include <asm/cpu.h> #include <asm/regdef.h> -.irp to, 0, 1, 2, 3, 4, 5, 6, 7 -.L_fixup_handle_\to\(): - sub.d a0, a2, a0 - addi.d a0, a0, (\to) * (-8) - jr ra -.endr - -.irp to, 0, 2, 4 -.L_fixup_handle_s\to\(): - addi.d a0, a1, -\to - jr ra -.endr - SYM_FUNC_START(__clear_user) /* * Some CPUs support hardware unaligned access @@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic) 2: move a0, a1 jr ra - _asm_extable 1b, .L_fixup_handle_s0 + _asm_extable 1b, 2b SYM_FUNC_END(__clear_user_generic) /* @@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast) jr ra /* fixup and ex_table */ - _asm_extable 0b, .L_fixup_handle_0 - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_1 - _asm_extable 3b, .L_fixup_handle_2 - _asm_extable 4b, .L_fixup_handle_3 - _asm_extable 5b, .L_fixup_handle_4 - _asm_extable 6b, .L_fixup_handle_5 - _asm_extable 7b, .L_fixup_handle_6 - _asm_extable 8b, .L_fixup_handle_7 - _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_1 - _asm_extable 11b, .L_fixup_handle_2 - _asm_extable 12b, .L_fixup_handle_3 - _asm_extable 13b, .L_fixup_handle_0 - _asm_extable 14b, .L_fixup_handle_1 - _asm_extable 15b, .L_fixup_handle_0 - _asm_extable 16b, .L_fixup_handle_0 - _asm_extable 17b, .L_fixup_handle_s0 - _asm_extable 18b, .L_fixup_handle_s0 - _asm_extable 19b, .L_fixup_handle_s0 - _asm_extable 20b, .L_fixup_handle_s2 - _asm_extable 21b, .L_fixup_handle_s0 - _asm_extable 22b, .L_fixup_handle_s0 - _asm_extable 23b, .L_fixup_handle_s4 - _asm_extable 24b, .L_fixup_handle_s0 - _asm_extable 25b, .L_fixup_handle_s4 - _asm_extable 26b, .L_fixup_handle_s0 - _asm_extable 27b, .L_fixup_handle_s4 - _asm_extable 28b, .L_fixup_handle_s0 +.Llarge_fixup: + sub.d a1, a2, a0 + +.Lsmall_fixup: +29: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 29b + +.Lexit: + move a0, a1 + jr ra + + _asm_extable 0b, .Lsmall_fixup + _asm_extable 1b, .Llarge_fixup + _asm_extable 2b, .Llarge_fixup + _asm_extable 3b, .Llarge_fixup + _asm_extable 4b, .Llarge_fixup + _asm_extable 5b, .Llarge_fixup + _asm_extable 6b, .Llarge_fixup + _asm_extable 7b, .Llarge_fixup + _asm_extable 8b, .Llarge_fixup + _asm_extable 9b, .Llarge_fixup + _asm_extable 10b, .Llarge_fixup + _asm_extable 11b, .Llarge_fixup + _asm_extable 12b, .Llarge_fixup + _asm_extable 13b, .Llarge_fixup + _asm_extable 14b, .Llarge_fixup + _asm_extable 15b, .Llarge_fixup + _asm_extable 16b, .Llarge_fixup + _asm_extable 17b, .Lexit + _asm_extable 18b, .Lsmall_fixup + _asm_extable 19b, .Lsmall_fixup + _asm_extable 20b, .Lsmall_fixup + _asm_extable 21b, .Lsmall_fixup + _asm_extable 22b, .Lsmall_fixup + _asm_extable 23b, .Lsmall_fixup + _asm_extable 24b, .Lsmall_fixup + _asm_extable 25b, .Lsmall_fixup + _asm_extable 26b, .Lsmall_fixup + _asm_extable 27b, .Lsmall_fixup + _asm_extable 28b, .Lsmall_fixup + _asm_extable 29b, .Lexit SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index bfe3d2793d00..feec3d362803 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -11,19 +11,6 @@ #include <asm/cpu.h> #include <asm/regdef.h> -.irp to, 0, 1, 2, 3, 4, 5, 6, 7 -.L_fixup_handle_\to\(): - sub.d a0, a2, a0 - addi.d a0, a0, (\to) * (-8) - jr ra -.endr - -.irp to, 0, 2, 4 -.L_fixup_handle_s\to\(): - addi.d a0, a2, -\to - jr ra -.endr - SYM_FUNC_START(__copy_user) /* * Some CPUs support hardware unaligned access @@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic) 3: move a0, a2 jr ra - _asm_extable 1b, .L_fixup_handle_s0 - _asm_extable 2b, .L_fixup_handle_s0 + _asm_extable 1b, 3b + _asm_extable 2b, 3b SYM_FUNC_END(__copy_user_generic) /* @@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast) sltui t0, a2, 9 bnez t0, .Lsmall - add.d a3, a1, a2 - add.d a2, a0, a2 0: ld.d t0, a1, 0 1: st.d t0, a0, 0 + add.d a3, a1, a2 + add.d a2, a0, a2 /* align up destination address */ andi t1, a0, 7 @@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast) 7: ld.d t5, a1, 40 8: ld.d t6, a1, 48 9: ld.d t7, a1, 56 - addi.d a1, a1, 64 10: st.d t0, a0, 0 11: st.d t1, a0, 8 12: st.d t2, a0, 16 @@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast) 15: st.d t5, a0, 40 16: st.d t6, a0, 48 17: st.d t7, a0, 56 + addi.d a1, a1, 64 addi.d a0, a0, 64 bltu a1, a4, .Lloop64 @@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast) 19: ld.d t1, a1, 8 20: ld.d t2, a1, 16 21: ld.d t3, a1, 24 - addi.d a1, a1, 32 22: st.d t0, a0, 0 23: st.d t1, a0, 8 24: st.d t2, a0, 16 25: st.d t3, a0, 24 + addi.d a1, a1, 32 addi.d a0, a0, 32 .Llt32: @@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast) bgeu a1, a4, .Llt16 26: ld.d t0, a1, 0 27: ld.d t1, a1, 8 - addi.d a1, a1, 16 28: st.d t0, a0, 0 29: st.d t1, a0, 8 + addi.d a1, a1, 16 addi.d a0, a0, 16 .Llt16: @@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast) bgeu a1, a4, .Llt8 30: ld.d t0, a1, 0 31: st.d t0, a0, 0 + addi.d a1, a1, 8 addi.d a0, a0, 8 .Llt8: @@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast) jr ra /* fixup and ex_table */ - _asm_extable 0b, .L_fixup_handle_0 - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_0 - _asm_extable 3b, .L_fixup_handle_0 - _asm_extable 4b, .L_fixup_handle_0 - _asm_extable 5b, .L_fixup_handle_0 - _asm_extable 6b, .L_fixup_handle_0 - _asm_extable 7b, .L_fixup_handle_0 - _asm_extable 8b, .L_fixup_handle_0 - _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_0 - _asm_extable 11b, .L_fixup_handle_1 - _asm_extable 12b, .L_fixup_handle_2 - _asm_extable 13b, .L_fixup_handle_3 - _asm_extable 14b, .L_fixup_handle_4 - _asm_extable 15b, .L_fixup_handle_5 - _asm_extable 16b, .L_fixup_handle_6 - _asm_extable 17b, .L_fixup_handle_7 - _asm_extable 18b, .L_fixup_handle_0 - _asm_extable 19b, .L_fixup_handle_0 - _asm_extable 20b, .L_fixup_handle_0 - _asm_extable 21b, .L_fixup_handle_0 - _asm_extable 22b, .L_fixup_handle_0 - _asm_extable 23b, .L_fixup_handle_1 - _asm_extable 24b, .L_fixup_handle_2 - _asm_extable 25b, .L_fixup_handle_3 - _asm_extable 26b, .L_fixup_handle_0 - _asm_extable 27b, .L_fixup_handle_0 - _asm_extable 28b, .L_fixup_handle_0 - _asm_extable 29b, .L_fixup_handle_1 - _asm_extable 30b, .L_fixup_handle_0 - _asm_extable 31b, .L_fixup_handle_0 - _asm_extable 32b, .L_fixup_handle_0 - _asm_extable 33b, .L_fixup_handle_0 - _asm_extable 34b, .L_fixup_handle_s0 - _asm_extable 35b, .L_fixup_handle_s0 - _asm_extable 36b, .L_fixup_handle_s0 - _asm_extable 37b, .L_fixup_handle_s0 - _asm_extable 38b, .L_fixup_handle_s0 - _asm_extable 39b, .L_fixup_handle_s0 - _asm_extable 40b, .L_fixup_handle_s0 - _asm_extable 41b, .L_fixup_handle_s2 - _asm_extable 42b, .L_fixup_handle_s0 - _asm_extable 43b, .L_fixup_handle_s0 - _asm_extable 44b, .L_fixup_handle_s0 - _asm_extable 45b, .L_fixup_handle_s0 - _asm_extable 46b, .L_fixup_handle_s0 - _asm_extable 47b, .L_fixup_handle_s4 - _asm_extable 48b, .L_fixup_handle_s0 - _asm_extable 49b, .L_fixup_handle_s0 - _asm_extable 50b, .L_fixup_handle_s0 - _asm_extable 51b, .L_fixup_handle_s4 - _asm_extable 52b, .L_fixup_handle_s0 - _asm_extable 53b, .L_fixup_handle_s0 - _asm_extable 54b, .L_fixup_handle_s0 - _asm_extable 55b, .L_fixup_handle_s4 - _asm_extable 56b, .L_fixup_handle_s0 - _asm_extable 57b, .L_fixup_handle_s0 +.Llarge_fixup: + sub.d a2, a2, a0 + +.Lsmall_fixup: +58: ld.b t0, a1, 0 +59: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 58b + +.Lexit: + move a0, a2 + jr ra + + _asm_extable 0b, .Lsmall_fixup + _asm_extable 1b, .Lsmall_fixup + _asm_extable 2b, .Llarge_fixup + _asm_extable 3b, .Llarge_fixup + _asm_extable 4b, .Llarge_fixup + _asm_extable 5b, .Llarge_fixup + _asm_extable 6b, .Llarge_fixup + _asm_extable 7b, .Llarge_fixup + _asm_extable 8b, .Llarge_fixup + _asm_extable 9b, .Llarge_fixup + _asm_extable 10b, .Llarge_fixup + _asm_extable 11b, .Llarge_fixup + _asm_extable 12b, .Llarge_fixup + _asm_extable 13b, .Llarge_fixup + _asm_extable 14b, .Llarge_fixup + _asm_extable 15b, .Llarge_fixup + _asm_extable 16b, .Llarge_fixup + _asm_extable 17b, .Llarge_fixup + _asm_extable 18b, .Llarge_fixup + _asm_extable 19b, .Llarge_fixup + _asm_extable 20b, .Llarge_fixup + _asm_extable 21b, .Llarge_fixup + _asm_extable 22b, .Llarge_fixup + _asm_extable 23b, .Llarge_fixup + _asm_extable 24b, .Llarge_fixup + _asm_extable 25b, .Llarge_fixup + _asm_extable 26b, .Llarge_fixup + _asm_extable 27b, .Llarge_fixup + _asm_extable 28b, .Llarge_fixup + _asm_extable 29b, .Llarge_fixup + _asm_extable 30b, .Llarge_fixup + _asm_extable 31b, .Llarge_fixup + _asm_extable 32b, .Llarge_fixup + _asm_extable 33b, .Llarge_fixup + _asm_extable 34b, .Lexit + _asm_extable 35b, .Lexit + _asm_extable 36b, .Lsmall_fixup + _asm_extable 37b, .Lsmall_fixup + _asm_extable 38b, .Lsmall_fixup + _asm_extable 39b, .Lsmall_fixup + _asm_extable 40b, .Lsmall_fixup + _asm_extable 41b, .Lsmall_fixup + _asm_extable 42b, .Lsmall_fixup + _asm_extable 43b, .Lsmall_fixup + _asm_extable 44b, .Lsmall_fixup + _asm_extable 45b, .Lsmall_fixup + _asm_extable 46b, .Lsmall_fixup + _asm_extable 47b, .Lsmall_fixup + _asm_extable 48b, .Lsmall_fixup + _asm_extable 49b, .Lsmall_fixup + _asm_extable 50b, .Lsmall_fixup + _asm_extable 51b, .Lsmall_fixup + _asm_extable 52b, .Lsmall_fixup + _asm_extable 53b, .Lsmall_fixup + _asm_extable 54b, .Lsmall_fixup + _asm_extable 55b, .Lsmall_fixup + _asm_extable 56b, .Lsmall_fixup + _asm_extable 57b, .Lsmall_fixup + _asm_extable 58b, .Lexit + _asm_extable 59b, .Lexit SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index cc30b3b6252f..fa1148878d2b 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -10,6 +10,8 @@ #include <asm/cpu.h> #include <asm/regdef.h> +.section .noinstr.text, "ax" + SYM_FUNC_START(memcpy) /* * Some CPUs support hardware unaligned access @@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy) ALTERNATIVE "b __memcpy_generic", \ "b __memcpy_fast", CPU_FEATURE_UAL SYM_FUNC_END(memcpy) -_ASM_NOKPROBE(memcpy) +SYM_FUNC_ALIAS(__memcpy, memcpy) EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL(__memcpy) + +_ASM_NOKPROBE(memcpy) +_ASM_NOKPROBE(__memcpy) /* * void *__memcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index 7dc76d1484b6..82dae062fec8 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -10,23 +10,29 @@ #include <asm/cpu.h> #include <asm/regdef.h> +.section .noinstr.text, "ax" + SYM_FUNC_START(memmove) - blt a0, a1, memcpy /* dst < src, memcpy */ - blt a1, a0, rmemcpy /* src < dst, rmemcpy */ - jr ra /* dst == src, return */ + blt a0, a1, __memcpy /* dst < src, memcpy */ + blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ SYM_FUNC_END(memmove) -_ASM_NOKPROBE(memmove) +SYM_FUNC_ALIAS(__memmove, memmove) EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL(__memmove) + +_ASM_NOKPROBE(memmove) +_ASM_NOKPROBE(__memmove) -SYM_FUNC_START(rmemcpy) +SYM_FUNC_START(__rmemcpy) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __rmemcpy_generic", \ "b __rmemcpy_fast", CPU_FEATURE_UAL -SYM_FUNC_END(rmemcpy) -_ASM_NOKPROBE(rmemcpy) +SYM_FUNC_END(__rmemcpy) +_ASM_NOKPROBE(__rmemcpy) /* * void *__rmemcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 3f20f7996e8e..06d3ca54cbfe 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -16,6 +16,8 @@ bstrins.d \r0, \r0, 63, 32 .endm +.section .noinstr.text, "ax" + SYM_FUNC_START(memset) /* * Some CPUs support hardware unaligned access @@ -23,9 +25,13 @@ SYM_FUNC_START(memset) ALTERNATIVE "b __memset_generic", \ "b __memset_fast", CPU_FEATURE_UAL SYM_FUNC_END(memset) -_ASM_NOKPROBE(memset) +SYM_FUNC_ALIAS(__memset, memset) EXPORT_SYMBOL(memset) +EXPORT_SYMBOL(__memset) + +_ASM_NOKPROBE(memset) +_ASM_NOKPROBE(__memset) /* * void *__memset_generic(void *s, int c, size_t n) diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c new file mode 100644 index 000000000000..84cd24b728c4 --- /dev/null +++ b/arch/loongarch/lib/xor_simd.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch SIMD XOR operations + * + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> + */ + +#include "xor_simd.h" + +/* + * Process one cache line (64 bytes) per loop. This is assuming all future + * popular LoongArch cores are similar performance-characteristics-wise to the + * current models. + */ +#define LINE_WIDTH 64 + +#ifdef CONFIG_CPU_HAS_LSX + +#define LD(reg, base, offset) \ + "vld $vr" #reg ", %[" #base "], " #offset "\n\t" +#define ST(reg, base, offset) \ + "vst $vr" #reg ", %[" #base "], " #offset "\n\t" +#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" + +#define LD_INOUT_LINE(base) \ + LD(0, base, 0) \ + LD(1, base, 16) \ + LD(2, base, 32) \ + LD(3, base, 48) + +#define LD_AND_XOR_LINE(base) \ + LD(4, base, 0) \ + LD(5, base, 16) \ + LD(6, base, 32) \ + LD(7, base, 48) \ + XOR(0, 4) \ + XOR(1, 5) \ + XOR(2, 6) \ + XOR(3, 7) + +#define ST_LINE(base) \ + ST(0, base, 0) \ + ST(1, base, 16) \ + ST(2, base, 32) \ + ST(3, base, 48) + +#define XOR_FUNC_NAME(nr) __xor_lsx_##nr +#include "xor_template.c" + +#undef LD +#undef ST +#undef XOR +#undef LD_INOUT_LINE +#undef LD_AND_XOR_LINE +#undef ST_LINE +#undef XOR_FUNC_NAME + +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX + +#define LD(reg, base, offset) \ + "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" +#define ST(reg, base, offset) \ + "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" +#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" + +#define LD_INOUT_LINE(base) \ + LD(0, base, 0) \ + LD(1, base, 32) + +#define LD_AND_XOR_LINE(base) \ + LD(2, base, 0) \ + LD(3, base, 32) \ + XOR(0, 2) \ + XOR(1, 3) + +#define ST_LINE(base) \ + ST(0, base, 0) \ + ST(1, base, 32) + +#define XOR_FUNC_NAME(nr) __xor_lasx_##nr +#include "xor_template.c" + +#undef LD +#undef ST +#undef XOR +#undef LD_INOUT_LINE +#undef LD_AND_XOR_LINE +#undef ST_LINE +#undef XOR_FUNC_NAME + +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h new file mode 100644 index 000000000000..f50f32514d80 --- /dev/null +++ b/arch/loongarch/lib/xor_simd.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Simple interface to link xor_simd.c and xor_simd_glue.c + * + * Separating these files ensures that no SIMD instructions are run outside of + * the kfpu critical section. + */ + +#ifndef __LOONGARCH_LIB_XOR_SIMD_H +#define __LOONGARCH_LIB_XOR_SIMD_H + +#ifdef CONFIG_CPU_HAS_LSX +void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LASX */ + +#endif /* __LOONGARCH_LIB_XOR_SIMD_H */ diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c new file mode 100644 index 000000000000..393f689dbcf6 --- /dev/null +++ b/arch/loongarch/lib/xor_simd_glue.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch SIMD XOR operations + * + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <asm/fpu.h> +#include <asm/xor_simd.h> +#include "xor_simd.h" + +#define MAKE_XOR_GLUE_2(flavor) \ +void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_2(bytes, p1, p2); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_2) + +#define MAKE_XOR_GLUE_3(flavor) \ +void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_3(bytes, p1, p2, p3); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_3) + +#define MAKE_XOR_GLUE_4(flavor) \ +void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3, \ + const unsigned long * __restrict p4) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_4) + +#define MAKE_XOR_GLUE_5(flavor) \ +void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3, \ + const unsigned long * __restrict p4, \ + const unsigned long * __restrict p5) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_5) + +#define MAKE_XOR_GLUES(flavor) \ + MAKE_XOR_GLUE_2(flavor); \ + MAKE_XOR_GLUE_3(flavor); \ + MAKE_XOR_GLUE_4(flavor); \ + MAKE_XOR_GLUE_5(flavor) + +#ifdef CONFIG_CPU_HAS_LSX +MAKE_XOR_GLUES(lsx); +#endif + +#ifdef CONFIG_CPU_HAS_LASX +MAKE_XOR_GLUES(lasx); +#endif diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c new file mode 100644 index 000000000000..0358ced7fe33 --- /dev/null +++ b/arch/loongarch/lib/xor_template.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> + * + * Template for XOR operations, instantiated in xor_simd.c. + * + * Expected preprocessor definitions: + * + * - LINE_WIDTH + * - XOR_FUNC_NAME(nr) + * - LD_INOUT_LINE(buf) + * - LD_AND_XOR_LINE(buf) + * - ST_LINE(buf) + */ + +void XOR_FUNC_NAME(2)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(3)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2, + const unsigned long * __restrict v3) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(4)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2, + const unsigned long * __restrict v3, + const unsigned long * __restrict v4) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + LD_AND_XOR_LINE(v4) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) + : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + v4 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(5)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2, + const unsigned long * __restrict v3, + const unsigned long * __restrict v4, + const unsigned long * __restrict v5) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + LD_AND_XOR_LINE(v4) + LD_AND_XOR_LINE(v5) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), + [v5] "r"(v5) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + v4 += LINE_WIDTH / sizeof(unsigned long); + v5 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile index 8ffc6383f836..e4d1e581dbae 100644 --- a/arch/loongarch/mm/Makefile +++ b/arch/loongarch/mm/Makefile @@ -7,3 +7,6 @@ obj-y += init.o cache.o tlb.o tlbex.o extable.o \ fault.o ioremap.o maccess.o mmap.o pgtable.o page.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_KASAN) += kasan_init.o + +KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index 72685a48eaf0..6be04d36ca07 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -156,7 +156,6 @@ void cpu_cache_init(void) current_cpu_data.cache_leaves_present = leaf; current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; - shm_align_mask = PAGE_SIZE - 1; } static const pgprot_t protection_map[16] = { diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index da5b6d518cdb..e6376e3dce86 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -23,6 +23,7 @@ #include <linux/kprobes.h> #include <linux/perf_event.h> #include <linux/uaccess.h> +#include <linux/kfence.h> #include <asm/branch.h> #include <asm/mmu_context.h> @@ -30,7 +31,8 @@ int show_unhandled_signals = 1; -static void __kprobes no_context(struct pt_regs *regs, unsigned long address) +static void __kprobes no_context(struct pt_regs *regs, + unsigned long write, unsigned long address) { const int field = sizeof(unsigned long) * 2; @@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address) if (fixup_exception(regs)) return; + if (kfence_handle_page_fault(address, write, regs)) + return; + /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. @@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address) die("Oops", regs); } -static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address) +static void __kprobes do_out_of_memory(struct pt_regs *regs, + unsigned long write, unsigned long address) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ if (!user_mode(regs)) { - no_context(regs, address); + no_context(regs, write, address); return; } pagefault_out_of_memory(); @@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs, { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { - no_context(regs, address); + no_context(regs, write, address); return; } @@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs, /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { - no_context(regs, address); + no_context(regs, write, address); return; } @@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, */ if (address & __UA_LIMIT) { if (!user_mode(regs)) - no_context(regs, address); + no_context(regs, write, address); else do_sigsegv(regs, write, address, si_code); return; @@ -211,7 +217,7 @@ good_area: if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) - no_context(regs, address); + no_context(regs, write, address); return; } @@ -232,7 +238,7 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { mmap_read_unlock(mm); if (fault & VM_FAULT_OOM) { - do_out_of_memory(regs, address); + do_out_of_memory(regs, write, address); return; } else if (fault & VM_FAULT_SIGSEGV) { do_sigsegv(regs, write, address, si_code); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 3b7d8129570b..f3fe8c06ba4d 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -35,33 +35,8 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> -/* - * We have up to 8 empty zeroed pages so we can map one of the right colour - * when needed. Since page is never written to after the initialization we - * don't have to care about aliases on other CPUs. - */ -unsigned long empty_zero_page, zero_page_mask; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(zero_page_mask); - -void setup_zero_pages(void) -{ - unsigned int order, i; - struct page *page; - - order = 0; - - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Oh boy, that early out of memory?"); - - page = virt_to_page((void *)empty_zero_page); - split_page(page, order); - for (i = 0; i < (1 << order); i++, page++) - mark_page_reserved(page); - - zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; -} void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) @@ -106,7 +81,6 @@ void __init mem_init(void) high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); - setup_zero_pages(); /* Setup zeroed pages. */ } #endif /* !CONFIG_NUMA */ @@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al #endif #endif -static pte_t *fixmap_pte(unsigned long addr) +pte_t * __init populate_kernel_pte(unsigned long addr) { - pgd_t *pgd; - p4d_t *p4d; + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(addr); - p4d = p4d_offset(pgd, addr); - - if (pgd_none(*pgd)) { - pud_t *new __maybe_unused; - - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pgd_populate(&init_mm, pgd, new); + if (p4d_none(*p4d)) { + pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pud) + panic("%s: Failed to allocate memory\n", __func__); + p4d_populate(&init_mm, p4d, pud); #ifndef __PAGETABLE_PUD_FOLDED - pud_init(new); + pud_init(pud); #endif } pud = pud_offset(p4d, addr); if (pud_none(*pud)) { - pmd_t *new __maybe_unused; - - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); + pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pmd) + panic("%s: Failed to allocate memory\n", __func__); + pud_populate(&init_mm, pud, pmd); #ifndef __PAGETABLE_PMD_FOLDED - pmd_init(new); + pmd_init(pmd); #endif } pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - pte_t *new __maybe_unused; + if (!pmd_present(*pmd)) { + pte_t *pte; - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate memory\n", __func__); + pmd_populate_kernel(&init_mm, pmd, pte); } return pte_offset_kernel(pmd, addr); @@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); - ptep = fixmap_pte(addr); + ptep = populate_kernel_pte(addr); if (!pte_none(*ptep)) { pte_ERROR(*ptep); return; diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c new file mode 100644 index 000000000000..da68bc1a4643 --- /dev/null +++ b/arch/loongarch/mm/kasan_init.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ +#define pr_fmt(fmt) "kasan: " fmt +#include <linux/kasan.h> +#include <linux/memblock.h> +#include <linux/sched/task.h> + +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> +#include <asm-generic/sections.h> + +static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); + +#ifdef __PAGETABLE_PUD_FOLDED +#define __p4d_none(early, p4d) (0) +#else +#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \ +(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud))) +#endif + +#ifdef __PAGETABLE_PMD_FOLDED +#define __pud_none(early, pud) (0) +#else +#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \ +(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd))) +#endif + +#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \ +(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte))) + +#define __pte_none(early, pte) (early ? pte_none(pte) : \ +((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page))) + +bool kasan_early_stage = true; + +/* + * Alloc memory for shadow memory page table. + */ +static phys_addr_t __init kasan_alloc_zeroed_page(int node) +{ + void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node); + if (!p) + panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n", + __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS)); + + return __pa(p); +} + +static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) +{ + if (__pmd_none(early, READ_ONCE(*pmdp))) { + phys_addr_t pte_phys = early ? + __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte)); + pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys)); + } + + return pte_offset_kernel(pmdp, addr); +} + +static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) +{ + if (__pud_none(early, READ_ONCE(*pudp))) { + phys_addr_t pmd_phys = early ? + __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd)); + pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys)); + } + + return pmd_offset(pudp, addr); +} + +static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) +{ + if (__p4d_none(early, READ_ONCE(*p4dp))) { + phys_addr_t pud_phys = early ? + __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud)); + p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys)); + } + + return pud_offset(p4dp, addr); +} + +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early); + + do { + phys_addr_t page_phys = early ? + __pa_symbol(kasan_early_shadow_page) + : kasan_alloc_zeroed_page(node); + next = addr + PAGE_SIZE; + set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); + } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); +} + +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early); + + do { + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, node, early); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); +} + +static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early); + + do { + next = pud_addr_end(addr, end); + kasan_pmd_populate(pudp, addr, next, node, early); + } while (pudp++, addr = next, addr != end); +} + +static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + p4d_t *p4dp = p4d_offset(pgdp, addr); + + do { + next = p4d_addr_end(addr, end); + kasan_pud_populate(p4dp, addr, next, node, early); + } while (p4dp++, addr = next, addr != end); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + int node, bool early) +{ + unsigned long next; + pgd_t *pgdp; + + pgdp = pgd_offset_k(addr); + + do { + next = pgd_addr_end(addr, end); + kasan_p4d_populate(pgdp, addr, next, node, early); + } while (pgdp++, addr = next, addr != end); + +} + +/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */ +static void __init kasan_map_populate(unsigned long start, unsigned long end, + int node) +{ + kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); +} + +asmlinkage void __init kasan_early_init(void) +{ + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); +} + +static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval) +{ + WRITE_ONCE(*pgdp, pgdval); +} + +static void __init clear_pgds(unsigned long start, unsigned long end) +{ + /* + * Remove references to kasan page tables from + * swapper_pg_dir. pgd_clear() can't be used + * here because it's nop on 2,3-level pagetable setups + */ + for (; start < end; start += PGDIR_SIZE) + kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0)); +} + +void __init kasan_init(void) +{ + u64 i; + phys_addr_t pa_start, pa_end; + + /* + * PGD was populated as invalid_pmd_table or invalid_pud_table + * in pagetable_init() which depends on how many levels of page + * table you are using, but we had to clean the gpd of kasan + * shadow memory, as the pgd value is none-zero. + * The assertion pgd_none is going to be false and the formal populate + * afterwards is not going to create any new pgd at all. + */ + memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir)); + csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH); + local_flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + /* Maps everything to a single page of zeroes */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); + + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)KFENCE_AREA_END)); + + kasan_early_stage = false; + + /* Populate the linear mapping */ + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = (void *)phys_to_virt(pa_start); + void *end = (void *)phys_to_virt(pa_end); + + if (start >= end) + break; + + kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE); + } + + /* Populate modules mapping */ + kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR), + (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); + /* + * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_early_shadow_pte[i], + pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO)); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH); + local_flush_tlb_all(); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized.\n"); +} diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index fbe1a4856fc4..a9630a81b38a 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -8,12 +8,11 @@ #include <linux/mm.h> #include <linux/mman.h> -unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ -EXPORT_SYMBOL(shm_align_mask); +#define SHM_ALIGN_MASK (SHMLBA - 1) -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + shm_align_mask) & ~shm_align_mask) + \ - (((pgoff) << PAGE_SHIFT) & shm_align_mask)) +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \ + + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK)) enum mmap_allocation_direction {UP, DOWN}; @@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, * cache aliasing constraints. */ if ((flags & MAP_SHARED) && - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK)) return -EINVAL; return addr; } @@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; if (dir == DOWN) { diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index b14343e211b6..71d0539e2d0b 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -9,6 +9,18 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> +struct page *dmw_virt_to_page(unsigned long kaddr) +{ + return pfn_to_page(virt_to_pfn(kaddr)); +} +EXPORT_SYMBOL_GPL(dmw_virt_to_page); + +struct page *tlb_virt_to_page(unsigned long kaddr) +{ + return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); +} +EXPORT_SYMBOL_GPL(tlb_virt_to_page); + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *init, *ret = NULL; diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index a50308b6fc25..5c97d1463328 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +KASAN_SANITIZE := n +KCOV_INSTRUMENT := n + # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile diff --git a/arch/m68k/coldfire/dma_timer.c b/arch/m68k/coldfire/dma_timer.c index cbb289439606..91e6728f51ed 100644 --- a/arch/m68k/coldfire/dma_timer.c +++ b/arch/m68k/coldfire/dma_timer.c @@ -48,7 +48,7 @@ static struct clocksource clocksource_cf_dt = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static int __init init_cf_dt_clocksource(void) +static int __init init_cf_dt_clocksource(void) { /* * We setup DMA timer 0 in free run mode. This incrementing counter is diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c index 6fdc13610565..3a74d493eb3e 100644 --- a/arch/m68k/emu/nfcon.c +++ b/arch/m68k/emu/nfcon.c @@ -70,16 +70,16 @@ static void nfcon_tty_close(struct tty_struct *tty, struct file *filp) { } -static int nfcon_tty_write(struct tty_struct *tty, const unsigned char *buf, - int count) +static ssize_t nfcon_tty_write(struct tty_struct *tty, const u8 *buf, + size_t count) { nfputs(buf, count); return count; } -static int nfcon_tty_put_char(struct tty_struct *tty, unsigned char ch) +static int nfcon_tty_put_char(struct tty_struct *tty, u8 ch) { - char temp[2] = { ch, 0 }; + u8 temp[2] = { ch, 0 }; nf_call(stderr_id, virt_to_phys(temp)); return 1; diff --git a/arch/m68k/include/asm/ide.h b/arch/m68k/include/asm/ide.h deleted file mode 100644 index 05cc7dc00e0c..000000000000 --- a/arch/m68k/include/asm/ide.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -/* Copyright(c) 1996 Kars de Jong */ -/* Based on the ide driver from 1.2.13pl8 */ - -/* - * Credits (alphabetical): - * - * - Bjoern Brauel - * - Kars de Jong - * - Torsten Ebeling - * - Dwight Engen - * - Thorsten Floeck - * - Roman Hodek - * - Guenther Kelleter - * - Chris Lawrence - * - Michael Rausch - * - Christian Sauer - * - Michael Schmitz - * - Jes Soerensen - * - Michael Thurm - * - Geert Uytterhoeven - */ - -#ifndef _M68K_IDE_H -#define _M68K_IDE_H - -#ifdef __KERNEL__ -#include <asm/setup.h> -#include <asm/io.h> -#include <asm/irq.h> - -#ifdef CONFIG_MMU - -/* - * Get rid of defs from io.h - ide has its private and conflicting versions - * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we - * always use the `raw' MMIO versions - */ -#undef readb -#undef readw -#undef writeb -#undef writew - -#define readb in_8 -#define readw in_be16 -#define __ide_mm_insw(port, addr, n) raw_insw((u16 *)port, addr, n) -#define __ide_mm_insl(port, addr, n) raw_insl((u32 *)port, addr, n) -#define writeb(val, port) out_8(port, val) -#define writew(val, port) out_be16(port, val) -#define __ide_mm_outsw(port, addr, n) raw_outsw((u16 *)port, addr, n) -#define __ide_mm_outsl(port, addr, n) raw_outsl((u32 *)port, addr, n) - -#else - -#define __ide_mm_insw(port, addr, n) io_insw((unsigned int)port, addr, n) -#define __ide_mm_insl(port, addr, n) io_insl((unsigned int)port, addr, n) -#define __ide_mm_outsw(port, addr, n) io_outsw((unsigned int)port, addr, n) -#define __ide_mm_outsl(port, addr, n) io_outsl((unsigned int)port, addr, n) - -#endif /* CONFIG_MMU */ - -#endif /* __KERNEL__ */ -#endif /* _M68K_IDE_H */ diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c index b0e110d3d2e6..9504eb19d73a 100644 --- a/arch/m68k/kernel/pcibios.c +++ b/arch/m68k/kernel/pcibios.c @@ -92,9 +92,3 @@ void pcibios_fixup_bus(struct pci_bus *bus) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 32); } } - -char *pcibios_setup(char *str) -{ - return str; -} - diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 337f23eabc71..86a4ce07c192 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -99,9 +99,6 @@ extern int page_is_ram(unsigned long pfn); # define phys_to_pfn(phys) (PFN_DOWN(phys)) # define pfn_to_phys(pfn) (PFN_PHYS(pfn)) -# define virt_to_pfn(vaddr) (phys_to_pfn((__pa(vaddr)))) -# define pfn_to_virt(pfn) __va(pfn_to_phys((pfn))) - # define virt_to_page(kaddr) (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)) # define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) # define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) @@ -109,11 +106,6 @@ extern int page_is_ram(unsigned long pfn); # define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) # endif /* __ASSEMBLY__ */ -#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) - -# define __pa(x) __virt_to_phys((unsigned long)(x)) -# define __va(x) ((void *)__phys_to_virt((unsigned long)(x))) - /* Convert between virtual and physical address for MMU. */ /* Handle MicroBlaze processor with virtual memory. */ #define __virt_to_phys(addr) \ @@ -125,6 +117,25 @@ extern int page_is_ram(unsigned long pfn); #define tovirt(rd, rs) \ addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR) +#ifndef __ASSEMBLY__ + +# define __pa(x) __virt_to_phys((unsigned long)(x)) +# define __va(x) ((void *)__phys_to_virt((unsigned long)(x))) + +static inline unsigned long virt_to_pfn(const void *vaddr) +{ + return phys_to_pfn(__pa(vaddr)); +} + +static inline const void *pfn_to_virt(unsigned long pfn) +{ + return __va(pfn_to_phys((pfn))); +} + +#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) + +#endif /* __ASSEMBLY__ */ + #define TOPHYS(addr) __virt_to_phys(addr) #endif /* __KERNEL__ */ diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index 3657f5e78a3d..bf2600f75959 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -25,7 +25,5 @@ void machine_shutdown(void); void machine_halt(void); void machine_power_off(void); -extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); - # endif /* __ASSEMBLY__ */ #endif /* _ASM_MICROBLAZE_SETUP_H */ diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c index 5f4722908164..2f66c7963084 100644 --- a/arch/microblaze/kernel/reset.c +++ b/arch/microblaze/kernel/reset.c @@ -9,7 +9,6 @@ #include <linux/init.h> #include <linux/delay.h> -#include <linux/of_platform.h> #include <linux/reboot.h> void machine_shutdown(void) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 353fabdfcbc5..3827dc76edd8 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -270,22 +270,6 @@ asmlinkage void __init mmu_init(void) memblock_dump_all(); } -void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) -{ - void *p; - - if (mem_init_done) { - p = kzalloc(size, mask); - } else { - p = memblock_alloc(size, SMP_CACHE_BYTES); - if (!p) - panic("%s: Failed to allocate %zu bytes\n", - __func__, size); - } - - return p; -} - static const pgprot_t protection_map[16] = { [VM_NONE] = PAGE_NONE, [VM_READ] = PAGE_READONLY_X, diff --git a/arch/mips/Makefile b/arch/mips/Makefile index a47593d72f6f..f49807e1f19b 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -181,12 +181,16 @@ endif cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1 cflags-$(CONFIG_CPU_BMIPS) += -march=mips32 -Wa,-mips32 -Wa,--trap -cflags-$(CONFIG_CPU_LOONGSON2E) += $(call cc-option,-march=loongson2e) -Wa,--trap -cflags-$(CONFIG_CPU_LOONGSON2F) += $(call cc-option,-march=loongson2f) -Wa,--trap -cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-march=loongson3a,-march=mips64r2) -Wa,--trap +cflags-$(CONFIG_CPU_LOONGSON2E) += -march=loongson2e -Wa,--trap +cflags-$(CONFIG_CPU_LOONGSON2F) += -march=loongson2f -Wa,--trap # Some -march= flags enable MMI instructions, and GCC complains about that # support being enabled alongside -msoft-float. Thus explicitly disable MMI. cflags-$(CONFIG_CPU_LOONGSON2EF) += $(call cc-option,-mno-loongson-mmi) +ifdef CONFIG_CPU_LOONGSON64 +cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap +cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a +cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2 +endif cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-mno-loongson-mmi) cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,) @@ -299,8 +303,8 @@ ifdef CONFIG_64BIT endif endif - ifeq ($(KBUILD_SYM32)$(call cc-option-yn,-msym32), yy) - cflags-y += -msym32 -DKBUILD_64BIT_SYM32 + ifeq ($(KBUILD_SYM32), y) + cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32 else ifeq ($(CONFIG_CPU_DADDI_WORKAROUNDS), y) $(error CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32) @@ -341,7 +345,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_LDFLAGS += -m $(ld-emul) -ifdef CONFIG_MIPS +ifdef need-compiler CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 053805cb741c..ec180ab92eaa 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -16,7 +16,6 @@ #include <linux/of.h> #include <linux/of_clk.h> #include <linux/of_fdt.h> -#include <linux/of_platform.h> #include <linux/libfdt.h> #include <linux/smp.h> #include <asm/addrspace.h> diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile index 7c02e542959a..2a5926578841 100644 --- a/arch/mips/cavium-octeon/Makefile +++ b/arch/mips/cavium-octeon/Makefile @@ -18,4 +18,3 @@ obj-y += crypto/ obj-$(CONFIG_MTD) += flash_setup.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_OCTEON_ILM) += oct_ilm.o -obj-$(CONFIG_USB) += octeon-usb.o diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c index c8a8c6d359b9..3395acde4d60 100644 --- a/arch/mips/cavium-octeon/flash_setup.c +++ b/arch/mips/cavium-octeon/flash_setup.c @@ -12,7 +12,8 @@ #include <linux/semaphore.h> #include <linux/mtd/mtd.h> #include <linux/mtd/map.h> -#include <linux/of_platform.h> +#include <linux/of.h> +#include <linux/platform_device.h> #include <linux/mtd/partitions.h> #include <asm/octeon/octeon.h> diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 25860fba6218..fef0c6de3fa1 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -13,9 +13,9 @@ * Mnemonic names for arguments to memcpy/__copy_user */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/regdef.h> #define dst a0 diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index ce05c0dd3acd..f76783c24338 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -8,8 +8,10 @@ */ #include <linux/etherdevice.h> +#include <linux/of.h> #include <linux/of_platform.h> #include <linux/of_fdt.h> +#include <linux/platform_device.h> #include <linux/libfdt.h> #include <asm/octeon/octeon.h> @@ -450,7 +452,6 @@ static const struct of_device_id octeon_ids[] __initconst = { { .compatible = "cavium,octeon-3860-bootbus", }, { .compatible = "cavium,mdio-mux", }, { .compatible = "gpio-leds", }, - { .compatible = "cavium,octeon-7130-usb-uctl", }, {}, }; diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c deleted file mode 100644 index 2add435ad038..000000000000 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ /dev/null @@ -1,551 +0,0 @@ -/* - * XHCI HCD glue for Cavium Octeon III SOCs. - * - * Copyright (C) 2010-2017 Cavium Networks - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include <linux/bitfield.h> -#include <linux/bits.h> -#include <linux/device.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/of_platform.h> - -/* - * USB Control Register - */ -#define USBDRD_UCTL_CTL 0x00 -/* BIST fast-clear mode select. A BIST run with this bit set - * clears all entries in USBH RAMs to 0x0. - */ -# define USBDRD_UCTL_CTL_CLEAR_BIST BIT(63) -/* 1 = Start BIST and cleared by hardware */ -# define USBDRD_UCTL_CTL_START_BIST BIT(62) -/* Reference clock select for SuperSpeed and HighSpeed PLLs: - * 0x0 = Both PLLs use DLMC_REF_CLK0 for reference clock - * 0x1 = Both PLLs use DLMC_REF_CLK1 for reference clock - * 0x2 = SuperSpeed PLL uses DLMC_REF_CLK0 for reference clock & - * HighSpeed PLL uses PLL_REF_CLK for reference clck - * 0x3 = SuperSpeed PLL uses DLMC_REF_CLK1 for reference clock & - * HighSpeed PLL uses PLL_REF_CLK for reference clck - */ -# define USBDRD_UCTL_CTL_REF_CLK_SEL GENMASK(61, 60) -/* 1 = Spread-spectrum clock enable, 0 = SS clock disable */ -# define USBDRD_UCTL_CTL_SSC_EN BIT(59) -/* Spread-spectrum clock modulation range: - * 0x0 = -4980 ppm downspread - * 0x1 = -4492 ppm downspread - * 0x2 = -4003 ppm downspread - * 0x3 - 0x7 = Reserved - */ -# define USBDRD_UCTL_CTL_SSC_RANGE GENMASK(58, 56) -/* Enable non-standard oscillator frequencies: - * [55:53] = modules -1 - * [52:47] = 2's complement push amount, 0 = Feature disabled - */ -# define USBDRD_UCTL_CTL_SSC_REF_CLK_SEL GENMASK(55, 47) -/* Reference clock multiplier for non-standard frequencies: - * 0x19 = 100MHz on DLMC_REF_CLK* if REF_CLK_SEL = 0x0 or 0x1 - * 0x28 = 125MHz on DLMC_REF_CLK* if REF_CLK_SEL = 0x0 or 0x1 - * 0x32 = 50MHz on DLMC_REF_CLK* if REF_CLK_SEL = 0x0 or 0x1 - * Other Values = Reserved - */ -# define USBDRD_UCTL_CTL_MPLL_MULTIPLIER GENMASK(46, 40) -/* Enable reference clock to prescaler for SuperSpeed functionality. - * Should always be set to "1" - */ -# define USBDRD_UCTL_CTL_REF_SSP_EN BIT(39) -/* Divide the reference clock by 2 before entering the - * REF_CLK_FSEL divider: - * If REF_CLK_SEL = 0x0 or 0x1, then only 0x0 is legal - * If REF_CLK_SEL = 0x2 or 0x3, then: - * 0x1 = DLMC_REF_CLK* is 125MHz - * 0x0 = DLMC_REF_CLK* is another supported frequency - */ -# define USBDRD_UCTL_CTL_REF_CLK_DIV2 BIT(38) -/* Select reference clock freqnuency for both PLL blocks: - * 0x27 = REF_CLK_SEL is 0x0 or 0x1 - * 0x07 = REF_CLK_SEL is 0x2 or 0x3 - */ -# define USBDRD_UCTL_CTL_REF_CLK_FSEL GENMASK(37, 32) -/* Controller clock enable. */ -# define USBDRD_UCTL_CTL_H_CLK_EN BIT(30) -/* Select bypass input to controller clock divider: - * 0x0 = Use divided coprocessor clock from H_CLKDIV - * 0x1 = Use clock from GPIO pins - */ -# define USBDRD_UCTL_CTL_H_CLK_BYP_SEL BIT(29) -/* Reset controller clock divider. */ -# define USBDRD_UCTL_CTL_H_CLKDIV_RST BIT(28) -/* Clock divider select: - * 0x0 = divide by 1 - * 0x1 = divide by 2 - * 0x2 = divide by 4 - * 0x3 = divide by 6 - * 0x4 = divide by 8 - * 0x5 = divide by 16 - * 0x6 = divide by 24 - * 0x7 = divide by 32 - */ -# define USBDRD_UCTL_CTL_H_CLKDIV_SEL GENMASK(26, 24) -/* USB3 port permanently attached: 0x0 = No, 0x1 = Yes */ -# define USBDRD_UCTL_CTL_USB3_PORT_PERM_ATTACH BIT(21) -/* USB2 port permanently attached: 0x0 = No, 0x1 = Yes */ -# define USBDRD_UCTL_CTL_USB2_PORT_PERM_ATTACH BIT(20) -/* Disable SuperSpeed PHY: 0x0 = No, 0x1 = Yes */ -# define USBDRD_UCTL_CTL_USB3_PORT_DISABLE BIT(18) -/* Disable HighSpeed PHY: 0x0 = No, 0x1 = Yes */ -# define USBDRD_UCTL_CTL_USB2_PORT_DISABLE BIT(16) -/* Enable PHY SuperSpeed block power: 0x0 = No, 0x1 = Yes */ -# define USBDRD_UCTL_CTL_SS_POWER_EN BIT(14) -/* Enable PHY HighSpeed block power: 0x0 = No, 0x1 = Yes */ -# define USBDRD_UCTL_CTL_HS_POWER_EN BIT(12) -/* Enable USB UCTL interface clock: 0xx = No, 0x1 = Yes */ -# define USBDRD_UCTL_CTL_CSCLK_EN BIT(4) -/* Controller mode: 0x0 = Host, 0x1 = Device */ -# define USBDRD_UCTL_CTL_DRD_MODE BIT(3) -/* PHY reset */ -# define USBDRD_UCTL_CTL_UPHY_RST BIT(2) -/* Software reset UAHC */ -# define USBDRD_UCTL_CTL_UAHC_RST BIT(1) -/* Software resets UCTL */ -# define USBDRD_UCTL_CTL_UCTL_RST BIT(0) - -#define USBDRD_UCTL_BIST_STATUS 0x08 -#define USBDRD_UCTL_SPARE0 0x10 -#define USBDRD_UCTL_INTSTAT 0x30 -#define USBDRD_UCTL_PORT_CFG_HS(port) (0x40 + (0x20 * port)) -#define USBDRD_UCTL_PORT_CFG_SS(port) (0x48 + (0x20 * port)) -#define USBDRD_UCTL_PORT_CR_DBG_CFG(port) (0x50 + (0x20 * port)) -#define USBDRD_UCTL_PORT_CR_DBG_STATUS(port) (0x58 + (0x20 * port)) - -/* - * UCTL Configuration Register - */ -#define USBDRD_UCTL_HOST_CFG 0xe0 -/* Indicates minimum value of all received BELT values */ -# define USBDRD_UCTL_HOST_CFG_HOST_CURRENT_BELT GENMASK(59, 48) -/* HS jitter adjustment */ -# define USBDRD_UCTL_HOST_CFG_FLA GENMASK(37, 32) -/* Bus-master enable: 0x0 = Disabled (stall DMAs), 0x1 = enabled */ -# define USBDRD_UCTL_HOST_CFG_BME BIT(28) -/* Overcurrent protection enable: 0x0 = unavailable, 0x1 = available */ -# define USBDRD_UCTL_HOST_OCI_EN BIT(27) -/* Overcurrent sene selection: - * 0x0 = Overcurrent indication from off-chip is active-low - * 0x1 = Overcurrent indication from off-chip is active-high - */ -# define USBDRD_UCTL_HOST_OCI_ACTIVE_HIGH_EN BIT(26) -/* Port power control enable: 0x0 = unavailable, 0x1 = available */ -# define USBDRD_UCTL_HOST_PPC_EN BIT(25) -/* Port power control sense selection: - * 0x0 = Port power to off-chip is active-low - * 0x1 = Port power to off-chip is active-high - */ -# define USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN BIT(24) - -/* - * UCTL Shim Features Register - */ -#define USBDRD_UCTL_SHIM_CFG 0xe8 -/* Out-of-bound UAHC register access: 0 = read, 1 = write */ -# define USBDRD_UCTL_SHIM_CFG_XS_NCB_OOB_WRN BIT(63) -/* SRCID error log for out-of-bound UAHC register access: - * [59:58] = chipID - * [57] = Request source: 0 = core, 1 = NCB-device - * [56:51] = Core/NCB-device number, [56] always 0 for NCB devices - * [50:48] = SubID - */ -# define USBDRD_UCTL_SHIM_CFG_XS_NCB_OOB_OSRC GENMASK(59, 48) -/* Error log for bad UAHC DMA access: 0 = Read log, 1 = Write log */ -# define USBDRD_UCTL_SHIM_CFG_XM_BAD_DMA_WRN BIT(47) -/* Encoded error type for bad UAHC DMA */ -# define USBDRD_UCTL_SHIM_CFG_XM_BAD_DMA_TYPE GENMASK(43, 40) -/* Select the IOI read command used by DMA accesses */ -# define USBDRD_UCTL_SHIM_CFG_DMA_READ_CMD BIT(12) -/* Select endian format for DMA accesses to the L2C: - * 0x0 = Little endian - * 0x1 = Big endian - * 0x2 = Reserved - * 0x3 = Reserved - */ -# define USBDRD_UCTL_SHIM_CFG_DMA_ENDIAN_MODE GENMASK(9, 8) -/* Select endian format for IOI CSR access to UAHC: - * 0x0 = Little endian - * 0x1 = Big endian - * 0x2 = Reserved - * 0x3 = Reserved - */ -# define USBDRD_UCTL_SHIM_CFG_CSR_ENDIAN_MODE GENMASK(1, 0) - -#define USBDRD_UCTL_ECC 0xf0 -#define USBDRD_UCTL_SPARE1 0xf8 - -static DEFINE_MUTEX(dwc3_octeon_clocks_mutex); - -#ifdef CONFIG_CAVIUM_OCTEON_SOC -#include <asm/octeon/octeon.h> -static inline uint64_t dwc3_octeon_readq(void __iomem *addr) -{ - return cvmx_readq_csr(addr); -} - -static inline void dwc3_octeon_writeq(void __iomem *base, uint64_t val) -{ - cvmx_writeq_csr(base, val); -} - -static void dwc3_octeon_config_gpio(int index, int gpio) -{ - union cvmx_gpio_bit_cfgx gpio_bit; - - if ((OCTEON_IS_MODEL(OCTEON_CN73XX) || - OCTEON_IS_MODEL(OCTEON_CNF75XX)) - && gpio <= 31) { - gpio_bit.u64 = cvmx_read_csr(CVMX_GPIO_BIT_CFGX(gpio)); - gpio_bit.s.tx_oe = 1; - gpio_bit.s.output_sel = (index == 0 ? 0x14 : 0x15); - cvmx_write_csr(CVMX_GPIO_BIT_CFGX(gpio), gpio_bit.u64); - } else if (gpio <= 15) { - gpio_bit.u64 = cvmx_read_csr(CVMX_GPIO_BIT_CFGX(gpio)); - gpio_bit.s.tx_oe = 1; - gpio_bit.s.output_sel = (index == 0 ? 0x14 : 0x19); - cvmx_write_csr(CVMX_GPIO_BIT_CFGX(gpio), gpio_bit.u64); - } else { - gpio_bit.u64 = cvmx_read_csr(CVMX_GPIO_XBIT_CFGX(gpio)); - gpio_bit.s.tx_oe = 1; - gpio_bit.s.output_sel = (index == 0 ? 0x14 : 0x19); - cvmx_write_csr(CVMX_GPIO_XBIT_CFGX(gpio), gpio_bit.u64); - } -} -#else -static inline uint64_t dwc3_octeon_readq(void __iomem *addr) -{ - return 0; -} - -static inline void dwc3_octeon_writeq(void __iomem *base, uint64_t val) { } - -static inline void dwc3_octeon_config_gpio(int index, int gpio) { } -#endif - -static int dwc3_octeon_get_divider(void) -{ - static const uint8_t clk_div[] = { 1, 2, 4, 6, 8, 16, 24, 32 }; - int div = 0; - - while (div < ARRAY_SIZE(clk_div)) { - uint64_t rate = octeon_get_io_clock_rate() / clk_div[div]; - if (rate <= 300000000 && rate >= 150000000) - break; - div++; - } - - return div; -} - -static int dwc3_octeon_config_power(struct device *dev, void __iomem *base) -{ - uint32_t gpio_pwr[3]; - int gpio, len, power_active_low; - struct device_node *node = dev->of_node; - u64 val; - void __iomem *uctl_host_cfg_reg = base + USBDRD_UCTL_HOST_CFG; - - if (of_find_property(node, "power", &len) != NULL) { - if (len == 12) { - of_property_read_u32_array(node, "power", gpio_pwr, 3); - power_active_low = gpio_pwr[2] & 0x01; - gpio = gpio_pwr[1]; - } else if (len == 8) { - of_property_read_u32_array(node, "power", gpio_pwr, 2); - power_active_low = 0; - gpio = gpio_pwr[1]; - } else { - dev_err(dev, "invalid power configuration\n"); - return -EINVAL; - } - dwc3_octeon_config_gpio(((u64)base >> 24) & 1, gpio); - - /* Enable XHCI power control and set if active high or low. */ - val = dwc3_octeon_readq(uctl_host_cfg_reg); - val |= USBDRD_UCTL_HOST_PPC_EN; - if (power_active_low) - val &= ~USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN; - else - val |= USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN; - dwc3_octeon_writeq(uctl_host_cfg_reg, val); - } else { - /* Disable XHCI power control and set if active high. */ - val = dwc3_octeon_readq(uctl_host_cfg_reg); - val &= ~USBDRD_UCTL_HOST_PPC_EN; - val &= ~USBDRD_UCTL_HOST_PPC_ACTIVE_HIGH_EN; - dwc3_octeon_writeq(uctl_host_cfg_reg, val); - dev_info(dev, "power control disabled\n"); - } - return 0; -} - -static int dwc3_octeon_clocks_start(struct device *dev, void __iomem *base) -{ - int i, div, mpll_mul, ref_clk_fsel, ref_clk_sel = 2; - u32 clock_rate; - u64 val; - void __iomem *uctl_ctl_reg = base + USBDRD_UCTL_CTL; - - if (dev->of_node) { - const char *ss_clock_type; - const char *hs_clock_type; - - i = of_property_read_u32(dev->of_node, - "refclk-frequency", &clock_rate); - if (i) { - dev_err(dev, "No UCTL \"refclk-frequency\"\n"); - return -EINVAL; - } - i = of_property_read_string(dev->of_node, - "refclk-type-ss", &ss_clock_type); - if (i) { - dev_err(dev, "No UCTL \"refclk-type-ss\"\n"); - return -EINVAL; - } - i = of_property_read_string(dev->of_node, - "refclk-type-hs", &hs_clock_type); - if (i) { - dev_err(dev, "No UCTL \"refclk-type-hs\"\n"); - return -EINVAL; - } - if (strcmp("dlmc_ref_clk0", ss_clock_type) == 0) { - if (strcmp(hs_clock_type, "dlmc_ref_clk0") == 0) - ref_clk_sel = 0; - else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) - ref_clk_sel = 2; - else - dev_warn(dev, "Invalid HS clock type %s, using pll_ref_clk instead\n", - hs_clock_type); - } else if (strcmp(ss_clock_type, "dlmc_ref_clk1") == 0) { - if (strcmp(hs_clock_type, "dlmc_ref_clk1") == 0) - ref_clk_sel = 1; - else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) - ref_clk_sel = 3; - else { - dev_warn(dev, "Invalid HS clock type %s, using pll_ref_clk instead\n", - hs_clock_type); - ref_clk_sel = 3; - } - } else - dev_warn(dev, "Invalid SS clock type %s, using dlmc_ref_clk0 instead\n", - ss_clock_type); - - if ((ref_clk_sel == 0 || ref_clk_sel == 1) && - (clock_rate != 100000000)) - dev_warn(dev, "Invalid UCTL clock rate of %u, using 100000000 instead\n", - clock_rate); - - } else { - dev_err(dev, "No USB UCTL device node\n"); - return -EINVAL; - } - - /* - * Step 1: Wait for all voltages to be stable...that surely - * happened before starting the kernel. SKIP - */ - - /* Step 2: Select GPIO for overcurrent indication, if desired. SKIP */ - - /* Step 3: Assert all resets. */ - val = dwc3_octeon_readq(uctl_ctl_reg); - val |= USBDRD_UCTL_CTL_UPHY_RST | - USBDRD_UCTL_CTL_UAHC_RST | - USBDRD_UCTL_CTL_UCTL_RST; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - /* Step 4a: Reset the clock dividers. */ - val = dwc3_octeon_readq(uctl_ctl_reg); - val |= USBDRD_UCTL_CTL_H_CLKDIV_RST; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - /* Step 4b: Select controller clock frequency. */ - div = dwc3_octeon_get_divider(); - val = dwc3_octeon_readq(uctl_ctl_reg); - val &= ~USBDRD_UCTL_CTL_H_CLKDIV_SEL; - val |= FIELD_PREP(USBDRD_UCTL_CTL_H_CLKDIV_SEL, div); - val |= USBDRD_UCTL_CTL_H_CLK_EN; - dwc3_octeon_writeq(uctl_ctl_reg, val); - val = dwc3_octeon_readq(uctl_ctl_reg); - if ((div != FIELD_GET(USBDRD_UCTL_CTL_H_CLKDIV_SEL, val)) || - (!(FIELD_GET(USBDRD_UCTL_CTL_H_CLK_EN, val)))) { - dev_err(dev, "dwc3 controller clock init failure.\n"); - return -EINVAL; - } - - /* Step 4c: Deassert the controller clock divider reset. */ - val &= ~USBDRD_UCTL_CTL_H_CLKDIV_RST; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - /* Step 5a: Reference clock configuration. */ - val = dwc3_octeon_readq(uctl_ctl_reg); - val &= ~USBDRD_UCTL_CTL_REF_CLK_DIV2; - val &= ~USBDRD_UCTL_CTL_REF_CLK_SEL; - val |= FIELD_PREP(USBDRD_UCTL_CTL_REF_CLK_SEL, ref_clk_sel); - - ref_clk_fsel = 0x07; - switch (clock_rate) { - default: - dev_warn(dev, "Invalid ref_clk %u, using 100000000 instead\n", - clock_rate); - fallthrough; - case 100000000: - mpll_mul = 0x19; - if (ref_clk_sel < 2) - ref_clk_fsel = 0x27; - break; - case 50000000: - mpll_mul = 0x32; - break; - case 125000000: - mpll_mul = 0x28; - break; - } - val &= ~USBDRD_UCTL_CTL_REF_CLK_FSEL; - val |= FIELD_PREP(USBDRD_UCTL_CTL_REF_CLK_FSEL, ref_clk_fsel); - - val &= ~USBDRD_UCTL_CTL_MPLL_MULTIPLIER; - val |= FIELD_PREP(USBDRD_UCTL_CTL_MPLL_MULTIPLIER, mpll_mul); - - /* Step 5b: Configure and enable spread-spectrum for SuperSpeed. */ - val |= USBDRD_UCTL_CTL_SSC_EN; - - /* Step 5c: Enable SuperSpeed. */ - val |= USBDRD_UCTL_CTL_REF_SSP_EN; - - /* Step 5d: Configure PHYs. SKIP */ - - /* Step 6a & 6b: Power up PHYs. */ - val |= USBDRD_UCTL_CTL_HS_POWER_EN; - val |= USBDRD_UCTL_CTL_SS_POWER_EN; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - /* Step 7: Wait 10 controller-clock cycles to take effect. */ - udelay(10); - - /* Step 8a: Deassert UCTL reset signal. */ - val = dwc3_octeon_readq(uctl_ctl_reg); - val &= ~USBDRD_UCTL_CTL_UCTL_RST; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - /* Step 8b: Wait 10 controller-clock cycles. */ - udelay(10); - - /* Steo 8c: Setup power-power control. */ - if (dwc3_octeon_config_power(dev, base)) - return -EINVAL; - - /* Step 8d: Deassert UAHC reset signal. */ - val = dwc3_octeon_readq(uctl_ctl_reg); - val &= ~USBDRD_UCTL_CTL_UAHC_RST; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - /* Step 8e: Wait 10 controller-clock cycles. */ - udelay(10); - - /* Step 9: Enable conditional coprocessor clock of UCTL. */ - val = dwc3_octeon_readq(uctl_ctl_reg); - val |= USBDRD_UCTL_CTL_CSCLK_EN; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - /*Step 10: Set for host mode only. */ - val = dwc3_octeon_readq(uctl_ctl_reg); - val &= ~USBDRD_UCTL_CTL_DRD_MODE; - dwc3_octeon_writeq(uctl_ctl_reg, val); - - return 0; -} - -static void __init dwc3_octeon_set_endian_mode(void __iomem *base) -{ - u64 val; - void __iomem *uctl_shim_cfg_reg = base + USBDRD_UCTL_SHIM_CFG; - - val = dwc3_octeon_readq(uctl_shim_cfg_reg); - val &= ~USBDRD_UCTL_SHIM_CFG_DMA_ENDIAN_MODE; - val &= ~USBDRD_UCTL_SHIM_CFG_CSR_ENDIAN_MODE; -#ifdef __BIG_ENDIAN - val |= FIELD_PREP(USBDRD_UCTL_SHIM_CFG_DMA_ENDIAN_MODE, 1); - val |= FIELD_PREP(USBDRD_UCTL_SHIM_CFG_CSR_ENDIAN_MODE, 1); -#endif - dwc3_octeon_writeq(uctl_shim_cfg_reg, val); -} - -static void __init dwc3_octeon_phy_reset(void __iomem *base) -{ - u64 val; - void __iomem *uctl_ctl_reg = base + USBDRD_UCTL_CTL; - - val = dwc3_octeon_readq(uctl_ctl_reg); - val &= ~USBDRD_UCTL_CTL_UPHY_RST; - dwc3_octeon_writeq(uctl_ctl_reg, val); -} - -static int __init dwc3_octeon_device_init(void) -{ - const char compat_node_name[] = "cavium,octeon-7130-usb-uctl"; - struct platform_device *pdev; - struct device_node *node; - struct resource *res; - void __iomem *base; - - /* - * There should only be three universal controllers, "uctl" - * in the device tree. Two USB and a SATA, which we ignore. - */ - node = NULL; - do { - node = of_find_node_by_name(node, "uctl"); - if (!node) - return -ENODEV; - - if (of_device_is_compatible(node, compat_node_name)) { - pdev = of_find_device_by_node(node); - if (!pdev) - return -ENODEV; - - /* - * The code below maps in the registers necessary for - * setting up the clocks and reseting PHYs. We must - * release the resources so the dwc3 subsystem doesn't - * know the difference. - */ - base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); - if (IS_ERR(base)) { - put_device(&pdev->dev); - return PTR_ERR(base); - } - - mutex_lock(&dwc3_octeon_clocks_mutex); - if (dwc3_octeon_clocks_start(&pdev->dev, base) == 0) - dev_info(&pdev->dev, "clocks initialized.\n"); - dwc3_octeon_set_endian_mode(base); - dwc3_octeon_phy_reset(base); - mutex_unlock(&dwc3_octeon_clocks_mutex); - devm_iounmap(&pdev->dev, base); - devm_release_mem_region(&pdev->dev, res->start, - resource_size(res)); - put_device(&pdev->dev); - } - } while (node != NULL); - - return 0; -} -device_initcall(dwc3_octeon_device_init); - -MODULE_AUTHOR("David Daney <david.daney@cavium.com>"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("USB driver for OCTEON III SoC"); diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 44821f497261..dc49b09d492b 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 2b4133176930..07839a4b397e 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -283,6 +283,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AMD_ACP=y CONFIG_DRM_AMD_DC=y CONFIG_DRM_AMD_DC_SI=y +CONFIG_DRM_AST=m CONFIG_DRM_RADEON=m CONFIG_DRM_QXL=y CONFIG_DRM_VIRTIO_GPU=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 743209047792..ae1a7793e810 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index dd2b9c181f32..c07e30f63d8b 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -131,7 +131,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 97c2d7f530b3..0a5701020d3f 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -128,7 +128,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index e0e312dd968a..5c5e2186210c 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -90,7 +90,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index dee172716581..7ba67a0d6c97 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -7,7 +7,6 @@ generated-y += unistd_nr_n32.h generated-y += unistd_nr_n64.h generated-y += unistd_nr_o32.h -generic-y += export.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 04cedf9f8811..54a85f1d4f2c 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -int kvm_arch_flush_remote_tlb(struct kvm *kvm); +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h index 7971272345d3..84f45461c832 100644 --- a/arch/mips/include/asm/mach-loongson32/loongson1.h +++ b/arch/mips/include/asm/mach-loongson32/loongson1.h @@ -45,8 +45,6 @@ #define LS1X_NAND_BASE 0x1fe78000 #define LS1X_CLK_BASE 0x1fe78030 -#include <regs-clk.h> #include <regs-mux.h> -#include <regs-rtc.h> #endif /* __ASM_MACH_LOONGSON32_LOONGSON1_H */ diff --git a/arch/mips/include/asm/mach-loongson32/regs-clk.h b/arch/mips/include/asm/mach-loongson32/regs-clk.h deleted file mode 100644 index 98136fa8bee1..000000000000 --- a/arch/mips/include/asm/mach-loongson32/regs-clk.h +++ /dev/null @@ -1,81 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 Clock Register Definitions. - */ - -#ifndef __ASM_MACH_LOONGSON32_REGS_CLK_H -#define __ASM_MACH_LOONGSON32_REGS_CLK_H - -#define LS1X_CLK_REG(x) \ - ((void __iomem *)KSEG1ADDR(LS1X_CLK_BASE + (x))) - -#define LS1X_CLK_PLL_FREQ LS1X_CLK_REG(0x0) -#define LS1X_CLK_PLL_DIV LS1X_CLK_REG(0x4) - -#if defined(CONFIG_LOONGSON1_LS1B) -/* Clock PLL Divisor Register Bits */ -#define DIV_DC_EN BIT(31) -#define DIV_DC_RST BIT(30) -#define DIV_CPU_EN BIT(25) -#define DIV_CPU_RST BIT(24) -#define DIV_DDR_EN BIT(19) -#define DIV_DDR_RST BIT(18) -#define RST_DC_EN BIT(5) -#define RST_DC BIT(4) -#define RST_DDR_EN BIT(3) -#define RST_DDR BIT(2) -#define RST_CPU_EN BIT(1) -#define RST_CPU BIT(0) - -#define DIV_DC_SHIFT 26 -#define DIV_CPU_SHIFT 20 -#define DIV_DDR_SHIFT 14 - -#define DIV_DC_WIDTH 4 -#define DIV_CPU_WIDTH 4 -#define DIV_DDR_WIDTH 4 - -#define BYPASS_DC_SHIFT 12 -#define BYPASS_DDR_SHIFT 10 -#define BYPASS_CPU_SHIFT 8 - -#define BYPASS_DC_WIDTH 1 -#define BYPASS_DDR_WIDTH 1 -#define BYPASS_CPU_WIDTH 1 - -#elif defined(CONFIG_LOONGSON1_LS1C) -/* PLL/SDRAM Frequency configuration register Bits */ -#define PLL_VALID BIT(31) -#define FRAC_N GENMASK(23, 16) -#define RST_TIME GENMASK(3, 2) -#define SDRAM_DIV GENMASK(1, 0) - -/* CPU/CAMERA/DC Frequency configuration register Bits */ -#define DIV_DC_EN BIT(31) -#define DIV_DC GENMASK(30, 24) -#define DIV_CAM_EN BIT(23) -#define DIV_CAM GENMASK(22, 16) -#define DIV_CPU_EN BIT(15) -#define DIV_CPU GENMASK(14, 8) -#define DIV_DC_SEL_EN BIT(5) -#define DIV_DC_SEL BIT(4) -#define DIV_CAM_SEL_EN BIT(3) -#define DIV_CAM_SEL BIT(2) -#define DIV_CPU_SEL_EN BIT(1) -#define DIV_CPU_SEL BIT(0) - -#define DIV_DC_SHIFT 24 -#define DIV_CAM_SHIFT 16 -#define DIV_CPU_SHIFT 8 -#define DIV_DDR_SHIFT 0 - -#define DIV_DC_WIDTH 7 -#define DIV_CAM_WIDTH 7 -#define DIV_CPU_WIDTH 7 -#define DIV_DDR_WIDTH 2 - -#endif - -#endif /* __ASM_MACH_LOONGSON32_REGS_CLK_H */ diff --git a/arch/mips/include/asm/mach-loongson32/regs-rtc.h b/arch/mips/include/asm/mach-loongson32/regs-rtc.h deleted file mode 100644 index a3d096be1607..000000000000 --- a/arch/mips/include/asm/mach-loongson32/regs-rtc.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com> - * - * Loongson 1 RTC timer Register Definitions. - */ - -#ifndef __ASM_MACH_LOONGSON32_REGS_RTC_H -#define __ASM_MACH_LOONGSON32_REGS_RTC_H - -#define LS1X_RTC_REG(x) \ - ((void __iomem *)KSEG1ADDR(LS1X_RTC_BASE + (x))) - -#define LS1X_RTC_CTRL LS1X_RTC_REG(0x40) - -#define RTC_EXTCLK_OK (BIT(5) | BIT(8)) -#define RTC_EXTCLK_EN BIT(8) - -#endif /* __ASM_MACH_LOONGSON32_REGS_RTC_H */ diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index cff52b283e03..fcec579f64e9 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -10,7 +10,7 @@ * Author: Wu Zhangjin <wuzhangjin@gmail.com> */ -#include <asm/export.h> +#include <linux/export.h> #include <asm/regdef.h> #include <asm/stackframe.h> #include <asm/ftrace.h> diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index 9b7c8ab6f08c..447a3ea14aa1 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -11,7 +11,6 @@ * written by Carsten Langgaard, carstenl@mips.com */ #include <asm/asm.h> -#include <asm/export.h> #include <asm/asm-offsets.h> #include <asm/mipsregs.h> #include <asm/regdef.h> diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 6c745aa9e825..c000b22e3fcd 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -11,10 +11,10 @@ * Further modifications to make this work: * Copyright (c) 1998 Harald Koerfgen */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/errno.h> -#include <asm/export.h> #include <asm/fpregdef.h> #include <asm/mipsregs.h> #include <asm/asm-offsets.h> diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 71b1aafae1bb..48e63943e6f7 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -13,7 +13,6 @@ */ #include <asm/asm.h> #include <asm/cachectl.h> -#include <asm/export.h> #include <asm/fpregdef.h> #include <asm/mipsregs.h> #include <asm/asm-offsets.h> diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 4e8c98517d9d..4bb97ee89904 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -12,10 +12,10 @@ * Copyright (C) 2000 MIPS Technologies, Inc. * Copyright (C) 1999, 2001 Silicon Graphics, Inc. */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/errno.h> -#include <asm/export.h> #include <asm/fpregdef.h> #include <asm/mipsregs.h> #include <asm/asm-offsets.h> diff --git a/arch/mips/kernel/sysrq.c b/arch/mips/kernel/sysrq.c index 9c1a2019113b..2e98049fe783 100644 --- a/arch/mips/kernel/sysrq.c +++ b/arch/mips/kernel/sysrq.c @@ -44,7 +44,7 @@ static void sysrq_tlbdump_othercpus(struct work_struct *dummy) static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus); #endif -static void sysrq_handle_tlbdump(int key) +static void sysrq_handle_tlbdump(u8 key) { sysrq_tlbdump_single(NULL); #ifdef CONFIG_SMP diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index aa5583a7b05b..231ac052b506 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, /* Flush slot from GPA */ kvm_mips_flush_gpa_pt(kvm, slot->base_gfn, slot->base_gfn + slot->npages - 1); - kvm_arch_flush_remote_tlbs_memslot(kvm, slot); + kvm_flush_remote_tlbs_memslot(kvm, slot); spin_unlock(&kvm->mmu_lock); } @@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn, new->base_gfn + new->npages - 1); if (needs_flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, new); + kvm_flush_remote_tlbs_memslot(kvm, new); spin_unlock(&kvm->mmu_lock); } } @@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) } -int kvm_arch_flush_remote_tlb(struct kvm *kvm) +int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { kvm_mips_callbacks->prepare_flush_shadow(kvm); return 1; } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { int r; diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index e8c08988ed37..7b2ac1319d70 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -447,7 +447,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { gpa_t gpa = range->start << PAGE_SHIFT; - pte_t hva_pte = range->pte; + pte_t hva_pte = range->arg.pte; pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); pte_t old_pte; diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 20622bf0a9b3..8f208007b8e8 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -10,7 +10,7 @@ #include <linux/sched.h> #include <linux/irqchip.h> #include <linux/irqdomain.h> -#include <linux/of_platform.h> +#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> diff --git a/arch/mips/lantiq/xway/dcdc.c b/arch/mips/lantiq/xway/dcdc.c index 96199966a350..4a808f8c5beb 100644 --- a/arch/mips/lantiq/xway/dcdc.c +++ b/arch/mips/lantiq/xway/dcdc.c @@ -6,7 +6,8 @@ */ #include <linux/ioport.h> -#include <linux/of_platform.h> +#include <linux/mod_devicetable.h> +#include <linux/platform_device.h> #include <lantiq_soc.h> diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c index a492b1eb1925..8d52001301de 100644 --- a/arch/mips/lantiq/xway/gptu.c +++ b/arch/mips/lantiq/xway/gptu.c @@ -8,8 +8,9 @@ #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/init.h> -#include <linux/of_platform.h> +#include <linux/mod_devicetable.h> #include <linux/of_irq.h> +#include <linux/platform_device.h> #include <lantiq_soc.h> #include "../clk.h" diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index d444a1b98a72..3ed078225222 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -10,7 +10,6 @@ #include <linux/clkdev.h> #include <linux/spinlock.h> #include <linux/of.h> -#include <linux/of_platform.h> #include <linux/of_address.h> #include <lantiq_soc.h> diff --git a/arch/mips/lantiq/xway/vmmc.c b/arch/mips/lantiq/xway/vmmc.c index 2796e87dfcae..37c133052ef7 100644 --- a/arch/mips/lantiq/xway/vmmc.c +++ b/arch/mips/lantiq/xway/vmmc.c @@ -7,7 +7,8 @@ #include <linux/err.h> #include <linux/export.h> #include <linux/gpio/consumer.h> -#include <linux/of_platform.h> +#include <linux/mod_devicetable.h> +#include <linux/platform_device.h> #include <linux/dma-mapping.h> #include <lantiq_soc.h> diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 7767137c3e49..3d2ff4118d79 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -11,9 +11,9 @@ * Copyright (C) 2014 Imagination Technologies Ltd. */ #include <linux/errno.h> +#include <linux/export.h> #include <asm/asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/regdef.h> #ifdef CONFIG_64BIT diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 18a43f2e29c8..a4b4e805ff13 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -32,9 +32,9 @@ #undef CONFIG_CPU_HAS_PREFETCH #endif +#include <linux/export.h> #include <asm/asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/regdef.h> #define dst a0 diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 0b342bae9a98..79405c32cc85 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -8,9 +8,9 @@ * Copyright (C) 2007 by Maciej W. Rozycki * Copyright (C) 2011, 2012 MIPS Technologies, Inc. */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/regdef.h> #if LONGSIZE == 4 diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 13aaa9927ad1..94f4203563c1 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S @@ -7,9 +7,9 @@ * Copyright (C) 2011 MIPS Technologies, Inc. */ #include <linux/errno.h> +#include <linux/export.h> #include <asm/asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/regdef.h> #define EX(insn,reg,addr,handler) \ diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 6de31b616f9c..c192a6f6cd84 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -6,9 +6,9 @@ * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle * Copyright (c) 1999 Silicon Graphics, Inc. */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/regdef.h> #define EX(insn,reg,addr,handler) \ diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c index 64d7979394e6..8075590a9f83 100644 --- a/arch/mips/loongson32/common/platform.c +++ b/arch/mips/loongson32/common/platform.c @@ -265,14 +265,6 @@ struct platform_device ls1x_ehci_pdev = { }; /* Real Time Clock */ -void __init ls1x_rtc_set_extclk(struct platform_device *pdev) -{ - u32 val = __raw_readl(LS1X_RTC_CTRL); - - if (!(val & RTC_EXTCLK_OK)) - __raw_writel(val | RTC_EXTCLK_EN, LS1X_RTC_CTRL); -} - struct platform_device ls1x_rtc_pdev = { .name = "ls1x-rtc", .id = -1, diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index cdecd7af11a6..e015a26a40f7 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -187,181 +187,181 @@ static void csr_ipi_probe(void) static void ipi_set0_regs_init(void) { - ipi_set0_regs[0] = (void *) + ipi_set0_regs[0] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0); - ipi_set0_regs[1] = (void *) + ipi_set0_regs[1] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0); - ipi_set0_regs[2] = (void *) + ipi_set0_regs[2] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0); - ipi_set0_regs[3] = (void *) + ipi_set0_regs[3] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0); - ipi_set0_regs[4] = (void *) + ipi_set0_regs[4] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0); - ipi_set0_regs[5] = (void *) + ipi_set0_regs[5] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0); - ipi_set0_regs[6] = (void *) + ipi_set0_regs[6] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0); - ipi_set0_regs[7] = (void *) + ipi_set0_regs[7] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0); - ipi_set0_regs[8] = (void *) + ipi_set0_regs[8] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0); - ipi_set0_regs[9] = (void *) + ipi_set0_regs[9] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0); - ipi_set0_regs[10] = (void *) + ipi_set0_regs[10] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0); - ipi_set0_regs[11] = (void *) + ipi_set0_regs[11] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0); - ipi_set0_regs[12] = (void *) + ipi_set0_regs[12] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0); - ipi_set0_regs[13] = (void *) + ipi_set0_regs[13] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0); - ipi_set0_regs[14] = (void *) + ipi_set0_regs[14] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0); - ipi_set0_regs[15] = (void *) + ipi_set0_regs[15] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0); } static void ipi_clear0_regs_init(void) { - ipi_clear0_regs[0] = (void *) + ipi_clear0_regs[0] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0); - ipi_clear0_regs[1] = (void *) + ipi_clear0_regs[1] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0); - ipi_clear0_regs[2] = (void *) + ipi_clear0_regs[2] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0); - ipi_clear0_regs[3] = (void *) + ipi_clear0_regs[3] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0); - ipi_clear0_regs[4] = (void *) + ipi_clear0_regs[4] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0); - ipi_clear0_regs[5] = (void *) + ipi_clear0_regs[5] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0); - ipi_clear0_regs[6] = (void *) + ipi_clear0_regs[6] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0); - ipi_clear0_regs[7] = (void *) + ipi_clear0_regs[7] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0); - ipi_clear0_regs[8] = (void *) + ipi_clear0_regs[8] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0); - ipi_clear0_regs[9] = (void *) + ipi_clear0_regs[9] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0); - ipi_clear0_regs[10] = (void *) + ipi_clear0_regs[10] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0); - ipi_clear0_regs[11] = (void *) + ipi_clear0_regs[11] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0); - ipi_clear0_regs[12] = (void *) + ipi_clear0_regs[12] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0); - ipi_clear0_regs[13] = (void *) + ipi_clear0_regs[13] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0); - ipi_clear0_regs[14] = (void *) + ipi_clear0_regs[14] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0); - ipi_clear0_regs[15] = (void *) + ipi_clear0_regs[15] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0); } static void ipi_status0_regs_init(void) { - ipi_status0_regs[0] = (void *) + ipi_status0_regs[0] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0); - ipi_status0_regs[1] = (void *) + ipi_status0_regs[1] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0); - ipi_status0_regs[2] = (void *) + ipi_status0_regs[2] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0); - ipi_status0_regs[3] = (void *) + ipi_status0_regs[3] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0); - ipi_status0_regs[4] = (void *) + ipi_status0_regs[4] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0); - ipi_status0_regs[5] = (void *) + ipi_status0_regs[5] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0); - ipi_status0_regs[6] = (void *) + ipi_status0_regs[6] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0); - ipi_status0_regs[7] = (void *) + ipi_status0_regs[7] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0); - ipi_status0_regs[8] = (void *) + ipi_status0_regs[8] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0); - ipi_status0_regs[9] = (void *) + ipi_status0_regs[9] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0); - ipi_status0_regs[10] = (void *) + ipi_status0_regs[10] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0); - ipi_status0_regs[11] = (void *) + ipi_status0_regs[11] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0); - ipi_status0_regs[12] = (void *) + ipi_status0_regs[12] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0); - ipi_status0_regs[13] = (void *) + ipi_status0_regs[13] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0); - ipi_status0_regs[14] = (void *) + ipi_status0_regs[14] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0); - ipi_status0_regs[15] = (void *) + ipi_status0_regs[15] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0); } static void ipi_en0_regs_init(void) { - ipi_en0_regs[0] = (void *) + ipi_en0_regs[0] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0); - ipi_en0_regs[1] = (void *) + ipi_en0_regs[1] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0); - ipi_en0_regs[2] = (void *) + ipi_en0_regs[2] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0); - ipi_en0_regs[3] = (void *) + ipi_en0_regs[3] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0); - ipi_en0_regs[4] = (void *) + ipi_en0_regs[4] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0); - ipi_en0_regs[5] = (void *) + ipi_en0_regs[5] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0); - ipi_en0_regs[6] = (void *) + ipi_en0_regs[6] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0); - ipi_en0_regs[7] = (void *) + ipi_en0_regs[7] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0); - ipi_en0_regs[8] = (void *) + ipi_en0_regs[8] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0); - ipi_en0_regs[9] = (void *) + ipi_en0_regs[9] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0); - ipi_en0_regs[10] = (void *) + ipi_en0_regs[10] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0); - ipi_en0_regs[11] = (void *) + ipi_en0_regs[11] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0); - ipi_en0_regs[12] = (void *) + ipi_en0_regs[12] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0); - ipi_en0_regs[13] = (void *) + ipi_en0_regs[13] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0); - ipi_en0_regs[14] = (void *) + ipi_en0_regs[14] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0); - ipi_en0_regs[15] = (void *) + ipi_en0_regs[15] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0); } static void ipi_mailbox_buf_init(void) { - ipi_mailbox_buf[0] = (void *) + ipi_mailbox_buf[0] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF); - ipi_mailbox_buf[1] = (void *) + ipi_mailbox_buf[1] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF); - ipi_mailbox_buf[2] = (void *) + ipi_mailbox_buf[2] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF); - ipi_mailbox_buf[3] = (void *) + ipi_mailbox_buf[3] = (void __iomem *) (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF); - ipi_mailbox_buf[4] = (void *) + ipi_mailbox_buf[4] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF); - ipi_mailbox_buf[5] = (void *) + ipi_mailbox_buf[5] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF); - ipi_mailbox_buf[6] = (void *) + ipi_mailbox_buf[6] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF); - ipi_mailbox_buf[7] = (void *) + ipi_mailbox_buf[7] = (void __iomem *) (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF); - ipi_mailbox_buf[8] = (void *) + ipi_mailbox_buf[8] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF); - ipi_mailbox_buf[9] = (void *) + ipi_mailbox_buf[9] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF); - ipi_mailbox_buf[10] = (void *) + ipi_mailbox_buf[10] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF); - ipi_mailbox_buf[11] = (void *) + ipi_mailbox_buf[11] = (void __iomem *) (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF); - ipi_mailbox_buf[12] = (void *) + ipi_mailbox_buf[12] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF); - ipi_mailbox_buf[13] = (void *) + ipi_mailbox_buf[13] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF); - ipi_mailbox_buf[14] = (void *) + ipi_mailbox_buf[14] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF); - ipi_mailbox_buf[15] = (void *) + ipi_mailbox_buf[15] = (void __iomem *) (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF); } diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S index 43181ac0a1af..42d0516ca18a 100644 --- a/arch/mips/mm/page-funcs.S +++ b/arch/mips/mm/page-funcs.S @@ -8,8 +8,8 @@ * Copyright (C) 2012 MIPS Technologies, Inc. * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> */ +#include <linux/export.h> #include <asm/asm.h> -#include <asm/export.h> #include <asm/regdef.h> #ifdef CONFIG_SIBYTE_DMA_PAGEOPS diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S index 00fef578c8cd..2705d7dcb33e 100644 --- a/arch/mips/mm/tlb-funcs.S +++ b/arch/mips/mm/tlb-funcs.S @@ -11,8 +11,8 @@ * Copyright (C) 2012 MIPS Technologies, Inc. * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> */ +#include <linux/export.h> #include <asm/asm.h> -#include <asm/export.h> #include <asm/regdef.h> #define FASTPATH_SIZE 128 diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 79e29bf42a24..80f7293166bb 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -13,9 +13,9 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/clk.h> -#include <linux/of_platform.h> -#include <linux/of_irq.h> +#include <linux/of.h> #include <linux/of_pci.h> +#include <linux/platform_device.h> #include <asm/addrspace.h> diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index e9dd01431f21..1cada09fa5db 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -13,9 +13,8 @@ #include <linux/pci.h> #include <linux/io.h> #include <linux/init.h> -#include <linux/of_platform.h> -#include <linux/of_irq.h> -#include <linux/of_pci.h> +#include <linux/mod_devicetable.h> +#include <linux/platform_device.h> #include <asm/mach-ralink/rt288x.h> diff --git a/arch/mips/pic32/pic32mzda/config.c b/arch/mips/pic32/pic32mzda/config.c index f69532007717..73be5689e0df 100644 --- a/arch/mips/pic32/pic32mzda/config.c +++ b/arch/mips/pic32/pic32mzda/config.c @@ -5,7 +5,7 @@ */ #include <linux/init.h> #include <linux/io.h> -#include <linux/of_platform.h> +#include <linux/spinlock.h> #include <asm/mach-pic32/pic32.h> diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index f395ae218470..25341b2319d0 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -5,8 +5,10 @@ */ #include <linux/interrupt.h> +#include <linux/of.h> #include <linux/of_platform.h> #include <linux/of_irq.h> +#include <linux/platform_device.h> #include <asm/mach-ralink/ralink_regs.h> diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index fa353bc13947..46aef0a1b22a 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -7,7 +7,7 @@ #include <linux/io.h> #include <linux/bitops.h> -#include <linux/of_platform.h> +#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/irqdomain.h> diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 45d60c094496..7f90068c68f2 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -14,7 +14,7 @@ #include <linux/of_fdt.h> #include <linux/kernel.h> #include <linux/memblock.h> -#include <linux/of_platform.h> +#include <linux/of.h> #include <linux/of_address.h> #include <asm/reboot.h> diff --git a/arch/mips/ralink/prom.c b/arch/mips/ralink/prom.c index aaac1e6ec7d9..c3b96861844c 100644 --- a/arch/mips/ralink/prom.c +++ b/arch/mips/ralink/prom.c @@ -7,8 +7,6 @@ */ #include <linux/string.h> -#include <linux/of_fdt.h> -#include <linux/of_platform.h> #include <asm/bootinfo.h> #include <asm/addrspace.h> diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index e98845543b77..5ae30b78d38d 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -51,6 +51,7 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus, unsigned short vid; int cap66 = -1; u16 stat; + int ret; /* It seems SLC90E66 needs some time after PCI reset... */ mdelay(80); @@ -60,9 +61,9 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus, for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) { if (PCI_FUNC(pci_devfn)) continue; - if (early_read_config_word(hose, top_bus, current_bus, - pci_devfn, PCI_VENDOR_ID, &vid) != - PCIBIOS_SUCCESSFUL) + ret = early_read_config_word(hose, top_bus, current_bus, + pci_devfn, PCI_VENDOR_ID, &vid); + if (ret != PCIBIOS_SUCCESSFUL) continue; if (vid == 0xffff) continue; @@ -343,26 +344,28 @@ static void tc35815_fixup(struct pci_dev *dev) static void final_fixup(struct pci_dev *dev) { + unsigned long timeout; unsigned char bist; + int ret; /* Do build-in self test */ - if (pci_read_config_byte(dev, PCI_BIST, &bist) == PCIBIOS_SUCCESSFUL && - (bist & PCI_BIST_CAPABLE)) { - unsigned long timeout; - pci_set_power_state(dev, PCI_D0); - pr_info("PCI: %s BIST...", pci_name(dev)); - pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START); - timeout = jiffies + HZ * 2; /* timeout after 2 sec */ - do { - pci_read_config_byte(dev, PCI_BIST, &bist); - if (time_after(jiffies, timeout)) - break; - } while (bist & PCI_BIST_START); - if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START)) - pr_cont("failed. (0x%x)\n", bist); - else - pr_cont("OK.\n"); - } + ret = pci_read_config_byte(dev, PCI_BIST, &bist); + if ((ret != PCIBIOS_SUCCESSFUL) || !(bist & PCI_BIST_CAPABLE)) + return; + + pci_set_power_state(dev, PCI_D0); + pr_info("PCI: %s BIST...", pci_name(dev)); + pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START); + timeout = jiffies + HZ * 2; /* timeout after 2 sec */ + do { + pci_read_config_byte(dev, PCI_BIST, &bist); + if (time_after(jiffies, timeout)) + break; + } while (bist & PCI_BIST_START); + if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START)) + pr_cont("failed. (0x%x)\n", bist); + else + pr_cont("OK.\n"); } #ifdef CONFIG_TOSHIBA_FPCIB0 diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S index d90b65724d78..836465e3bcb8 100644 --- a/arch/mips/vdso/vdso.lds.S +++ b/arch/mips/vdso/vdso.lds.S @@ -94,7 +94,9 @@ VERSION #ifndef CONFIG_MIPS_DISABLE_VDSO global: __vdso_clock_gettime; +#ifdef CONFIG_MIPS_CLOCK_VSYSCALL __vdso_gettimeofday; +#endif __vdso_clock_getres; #if _MIPS_SIM != _MIPS_SIM_ABI64 __vdso_clock_gettime64; diff --git a/arch/openrisc/include/asm/bug.h b/arch/openrisc/include/asm/bug.h new file mode 100644 index 000000000000..6d04776eaf10 --- /dev/null +++ b/arch/openrisc/include/asm/bug.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_OPENRISC_BUG_H +#define __ASM_OPENRISC_BUG_H + +#include <asm-generic/bug.h> + +struct pt_regs; + +void __noreturn die(const char *str, struct pt_regs *regs, long err); + +#endif /* __ASM_OPENRISC_BUG_H */ diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index 52b0d7e76446..44fc1fd56717 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -72,8 +72,15 @@ typedef struct page *pgtable_t; #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET)) #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) -#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return __pa(kaddr) >> PAGE_SHIFT; +} + +static inline void * pfn_to_virt(unsigned long pfn) +{ + return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT); +} #define virt_to_page(addr) \ (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index ed9efb430afa..3b736e74e6ed 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -73,6 +73,7 @@ struct thread_struct { void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); unsigned long __get_wchan(struct task_struct *p); +void show_registers(struct pt_regs *regs); #define cpu_relax() barrier() diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index dfa558f98ed8..86e02929f3ac 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -14,6 +14,7 @@ */ #define __KERNEL_SYSCALLS__ +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/sched/debug.h> @@ -38,6 +39,7 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/spr_defs.h> +#include <asm/switch_to.h> #include <linux/smp.h> @@ -119,8 +121,6 @@ void flush_thread(void) void show_regs(struct pt_regs *regs) { - extern void show_registers(struct pt_regs *regs); - show_regs_print_info(KERN_DEFAULT); /* __PHX__ cleanup this mess */ show_registers(regs); diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index 0b7d2ca6ba3b..1eeac3b62e9d 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -27,6 +27,10 @@ #include <asm/thread_info.h> #include <asm/page.h> +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); + +asmlinkage void do_syscall_trace_leave(struct pt_regs *regs); + /* * Copy the thread state to a regset that can be interpreted by userspace. * diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 2e7257a433ff..e2f21a5d8ad9 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -34,6 +34,11 @@ struct rt_sigframe { unsigned char retcode[16]; /* trampoline code */ }; +asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs); + +asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, + int syscall); + static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { @@ -224,7 +229,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * mode below. */ -int do_signal(struct pt_regs *regs, int syscall) +static int do_signal(struct pt_regs *regs, int syscall) { struct ksignal ksig; unsigned long continue_addr = 0; diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index 0a7a059e2dff..1c5a2d71d675 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -23,6 +23,8 @@ #include <asm/cacheflush.h> #include <asm/time.h> +asmlinkage __init void secondary_start_kernel(void); + static void (*smp_cross_call)(const struct cpumask *, unsigned int); unsigned long secondary_release = -1; diff --git a/arch/openrisc/kernel/time.c b/arch/openrisc/kernel/time.c index 8e26c1af5441..764c7bfb5df3 100644 --- a/arch/openrisc/kernel/time.c +++ b/arch/openrisc/kernel/time.c @@ -25,6 +25,8 @@ #include <asm/cpuinfo.h> #include <asm/time.h> +irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs); + /* Test the timer ticks to count, used in sync routine */ inline void openrisc_timer_set(unsigned long count) { diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 0aa6b07efda1..9370888c9a7e 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -30,14 +30,23 @@ #include <linux/kallsyms.h> #include <linux/uaccess.h> +#include <asm/bug.h> #include <asm/io.h> +#include <asm/processor.h> #include <asm/unwinder.h> #include <asm/sections.h> -static int kstack_depth_to_print = 0x180; int lwa_flag; static unsigned long __user *lwa_addr; +asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector); +asmlinkage void do_trap(struct pt_regs *regs, unsigned long address); +asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address); +asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address); +asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address); +asmlinkage void do_illegal_instruction(struct pt_regs *regs, + unsigned long address); + static void print_trace(void *data, unsigned long addr, int reliable) { const char *loglvl = data; @@ -143,80 +152,6 @@ bad: printk("\n"); } -void nommu_dump_state(struct pt_regs *regs, - unsigned long ea, unsigned long vector) -{ - int i; - unsigned long addr, stack = regs->sp; - - printk("\n\r[nommu_dump_state] :: ea %lx, vector %lx\n\r", ea, vector); - - printk("CPU #: %d\n" - " PC: %08lx SR: %08lx SP: %08lx\n", - 0, regs->pc, regs->sr, regs->sp); - printk("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n", - 0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]); - printk("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n", - regs->gpr[4], regs->gpr[5], regs->gpr[6], regs->gpr[7]); - printk("GPR08: %08lx GPR09: %08lx GPR10: %08lx GPR11: %08lx\n", - regs->gpr[8], regs->gpr[9], regs->gpr[10], regs->gpr[11]); - printk("GPR12: %08lx GPR13: %08lx GPR14: %08lx GPR15: %08lx\n", - regs->gpr[12], regs->gpr[13], regs->gpr[14], regs->gpr[15]); - printk("GPR16: %08lx GPR17: %08lx GPR18: %08lx GPR19: %08lx\n", - regs->gpr[16], regs->gpr[17], regs->gpr[18], regs->gpr[19]); - printk("GPR20: %08lx GPR21: %08lx GPR22: %08lx GPR23: %08lx\n", - regs->gpr[20], regs->gpr[21], regs->gpr[22], regs->gpr[23]); - printk("GPR24: %08lx GPR25: %08lx GPR26: %08lx GPR27: %08lx\n", - regs->gpr[24], regs->gpr[25], regs->gpr[26], regs->gpr[27]); - printk("GPR28: %08lx GPR29: %08lx GPR30: %08lx GPR31: %08lx\n", - regs->gpr[28], regs->gpr[29], regs->gpr[30], regs->gpr[31]); - printk(" RES: %08lx oGPR11: %08lx\n", - regs->gpr[11], regs->orig_gpr11); - - printk("Process %s (pid: %d, stackpage=%08lx)\n", - ((struct task_struct *)(__pa(current)))->comm, - ((struct task_struct *)(__pa(current)))->pid, - (unsigned long)current); - - printk("\nStack: "); - printk("Stack dump [0x%08lx]:\n", (unsigned long)stack); - for (i = 0; i < kstack_depth_to_print; i++) { - if (((long)stack & (THREAD_SIZE - 1)) == 0) - break; - stack++; - - printk("%lx :: sp + %02d: 0x%08lx\n", stack, i * 4, - *((unsigned long *)(__pa(stack)))); - } - printk("\n"); - - printk("Call Trace: "); - i = 1; - while (((long)stack & (THREAD_SIZE - 1)) != 0) { - addr = *((unsigned long *)__pa(stack)); - stack++; - - if (kernel_text_address(addr)) { - if (i && ((i % 6) == 0)) - printk("\n "); - printk(" [<%08lx>]", addr); - i++; - } - } - printk("\n"); - - printk("\nCode: "); - - for (i = -24; i < 24; i++) { - unsigned long word; - - word = ((unsigned long *)(__pa(regs->pc)))[i]; - - print_data(regs->pc, word, i); - } - printk("\n"); -} - /* This is normally the 'Oops' routine */ void __noreturn die(const char *str, struct pt_regs *regs, long err) { diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index a9dcd4381d1a..29e232d78d82 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -18,6 +18,7 @@ #include <linux/perf_event.h> #include <linux/uaccess.h> +#include <asm/bug.h> #include <asm/mmu_context.h> #include <asm/siginfo.h> #include <asm/signal.h> @@ -30,7 +31,8 @@ */ volatile pgd_t *current_pgd[NR_CPUS]; -extern void __noreturn die(char *, struct pt_regs *, long); +asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long vector, int write_acc); /* * This routine handles page faults. It determines the address, diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index d531ab82be12..1dcd78c8f0e9 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -123,8 +123,6 @@ static void __init map_ram(void) void __init paging_init(void) { - extern void tlb_init(void); - int i; printk(KERN_INFO "Setting up paging and PTEs.\n"); diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 91c8259d4b7e..f59ea4c10b0f 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -22,7 +22,7 @@ extern int mem_init_done; -/** +/* * OK, this one's a bit tricky... ioremap can get called before memory is * initialized (early serial console does this) and will want to alloc a page * for its mapping. No userspace pages will ever get allocated before memory diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c index e2f2a3c3bb22..3115f2e4f864 100644 --- a/arch/openrisc/mm/tlb.c +++ b/arch/openrisc/mm/tlb.c @@ -182,12 +182,3 @@ void destroy_context(struct mm_struct *mm) flush_tlb_mm(mm); } - -/* called once during VM initialization, from init.c */ - -void __init tlb_init(void) -{ - /* Do nothing... */ - /* invalidate the entire TLB */ - /* flush_tlb_all(); */ -} diff --git a/arch/parisc/include/asm/ide.h b/arch/parisc/include/asm/ide.h deleted file mode 100644 index 7aa75b93a1b6..000000000000 --- a/arch/parisc/include/asm/ide.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/include/asm-parisc/ide.h - * - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -/* - * This file contains the PARISC architecture specific IDE code. - */ - -#ifndef __ASM_PARISC_IDE_H -#define __ASM_PARISC_IDE_H - -/* Generic I/O and MEMIO string operations. */ - -#define __ide_insw insw -#define __ide_insl insl -#define __ide_outsw outsw -#define __ide_outsl outsl - -static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count) -{ - while (count--) { - *(u16 *)addr = __raw_readw(port); - addr += 2; - } -} - -static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count) -{ - while (count--) { - *(u32 *)addr = __raw_readl(port); - addr += 4; - } -} - -static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count) -{ - while (count--) { - __raw_writew(*(u16 *)addr, port); - addr += 2; - } -} - -static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count) -{ - while (count--) { - __raw_writel(*(u32 *)addr, port); - addr += 4; - } -} - -#endif /* __ASM_PARISC_IDE_H */ diff --git a/arch/powerpc/configs/disable-werror.config b/arch/powerpc/configs/disable-werror.config index 6ea12a12432c..7776b91da37f 100644 --- a/arch/powerpc/configs/disable-werror.config +++ b/arch/powerpc/configs/disable-werror.config @@ -1 +1,2 @@ +# Help: Disable -Werror CONFIG_PPC_DISABLE_WERROR=y diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config index 1c91a35c6a73..0d54e29e2cdf 100644 --- a/arch/powerpc/configs/security.config +++ b/arch/powerpc/configs/security.config @@ -1,3 +1,5 @@ +# Help: Common security options for PowerPC builds + # This is the equivalent of booting with lockdown=integrity CONFIG_SECURITY=y CONFIG_SECURITYFS=y @@ -12,4 +14,4 @@ CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y # UBSAN bounds checking is very cheap and good for hardening CONFIG_UBSAN=y -# CONFIG_UBSAN_MISC is not set
\ No newline at end of file +# CONFIG_UBSAN_MISC is not set diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 803da4a8a9a2..6fc2248ca561 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -113,7 +113,7 @@ config CRYPTO_AES_GCM_P10 config CRYPTO_CHACHA20_P10 tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)" - depends on PPC64 && CPU_LITTLE_ENDIAN + depends on PPC64 && CPU_LITTLE_ENDIAN && VSX select CRYPTO_SKCIPHER select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA @@ -127,7 +127,7 @@ config CRYPTO_CHACHA20_P10 config CRYPTO_POLY1305_P10 tristate "Hash functions: Poly1305 (P10 or later)" - depends on PPC64 && CPU_LITTLE_ENDIAN + depends on PPC64 && CPU_LITTLE_ENDIAN && VSX select CRYPTO_HASH select CRYPTO_LIB_POLY1305_GENERIC help diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h deleted file mode 100644 index d530f68b4eef..000000000000 --- a/arch/powerpc/include/asm/fs_pd.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Platform information definitions. - * - * 2006 (c) MontaVista Software, Inc. - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef FS_PD_H -#define FS_PD_H -#include <sysdev/fsl_soc.h> -#include <asm/time.h> - -static inline int uart_baudrate(void) -{ - return get_baudrate(); -} - -static inline int uart_clock(void) -{ - return ppc_proc_freq; -} - -#endif diff --git a/arch/powerpc/include/asm/ide.h b/arch/powerpc/include/asm/ide.h deleted file mode 100644 index ce87a4441ca3..000000000000 --- a/arch/powerpc/include/asm/ide.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1994-1996 Linus Torvalds & authors - * - * This file contains the powerpc architecture specific IDE code. - */ -#ifndef _ASM_POWERPC_IDE_H -#define _ASM_POWERPC_IDE_H - -#include <linux/compiler.h> -#include <asm/io.h> - -#define __ide_mm_insw(p, a, c) readsw((void __iomem *)(p), (a), (c)) -#define __ide_mm_insl(p, a, c) readsl((void __iomem *)(p), (a), (c)) -#define __ide_mm_outsw(p, a, c) writesw((void __iomem *)(p), (a), (c)) -#define __ide_mm_outsl(p, a, c) writesl((void __iomem *)(p), (a), (c)) - -#endif /* _ASM_POWERPC_IDE_H */ diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 76c7cd78c17e..2d899be746eb 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -21,7 +21,6 @@ #include <linux/device.h> #include <linux/delay.h> -#include <linux/fs_uart_pd.h> #include <linux/fsl_devices.h> #include <linux/mii.h> #include <linux/of_address.h> diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 1670dfd30809..d97a7910c594 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -24,7 +24,6 @@ #include <linux/device.h> #include <linux/delay.h> -#include <linux/fs_uart_pd.h> #include <linux/fsl_devices.h> #include <linux/mii.h> #include <linux/of_fdt.h> diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 528506f6e2b8..3949ceb79e64 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -22,7 +22,6 @@ #include <linux/phy.h> #include <linux/spi/spi.h> #include <linux/fsl_devices.h> -#include <linux/fs_uart_pd.h> #include <linux/reboot.h> #include <linux/atomic.h> @@ -35,7 +34,6 @@ #include <asm/cpm2.h> #include <asm/fsl_hcalls.h> /* For the Freescale hypervisor */ -extern void init_smc_ioports(struct fs_uart_platform_info*); static phys_addr_t immrbase = -1; phys_addr_t get_immrbase(void) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 5888fcd8e408..b3b94cd37713 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3988,7 +3988,7 @@ static void xmon_init(int enable) } #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_xmon(int key) +static void sysrq_handle_xmon(u8 key) { if (xmon_is_locked_down()) { clear_all_bpt(); diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config index f6af0f708df4..16ee163847b4 100644 --- a/arch/riscv/configs/32-bit.config +++ b/arch/riscv/configs/32-bit.config @@ -1,3 +1,4 @@ +# Help: Build a 32-bit image CONFIG_ARCH_RV32I=y CONFIG_32BIT=y # CONFIG_PORTABLE is not set diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config index 313edc554d84..d872a2d533f2 100644 --- a/arch/riscv/configs/64-bit.config +++ b/arch/riscv/configs/64-bit.config @@ -1,2 +1,3 @@ +# Help: Build a 64-bit image CONFIG_ARCH_RV64I=y CONFIG_64BIT=y diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 7bac43a3176e..777cb8299551 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -54,6 +54,7 @@ #ifndef CONFIG_64BIT #define SATP_PPN _AC(0x003FFFFF, UL) #define SATP_MODE_32 _AC(0x80000000, UL) +#define SATP_MODE_SHIFT 31 #define SATP_ASID_BITS 9 #define SATP_ASID_SHIFT 22 #define SATP_ASID_MASK _AC(0x1FF, UL) @@ -62,6 +63,7 @@ #define SATP_MODE_39 _AC(0x8000000000000000, UL) #define SATP_MODE_48 _AC(0x9000000000000000, UL) #define SATP_MODE_57 _AC(0xa000000000000000, UL) +#define SATP_MODE_SHIFT 60 #define SATP_ASID_BITS 16 #define SATP_ASID_SHIFT 44 #define SATP_ASID_MASK _AC(0xFFFF, UL) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 2d8ee53b66c7..1ebf20dfbaa6 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -337,6 +337,15 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch); +void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu); +unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu, + u64 __user *uindices); +int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); + int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/kvm_vcpu_vector.h b/arch/riscv/include/asm/kvm_vcpu_vector.h index ff994fdd6d0d..27f5bccdd8b0 100644 --- a/arch/riscv/include/asm/kvm_vcpu_vector.h +++ b/arch/riscv/include/asm/kvm_vcpu_vector.h @@ -74,9 +74,7 @@ static inline void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu) #endif int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg, - unsigned long rtype); + const struct kvm_one_reg *reg); int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg, - unsigned long rtype); + const struct kvm_one_reg *reg); #endif diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 930fdc4101cd..992c5e407104 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -55,6 +55,7 @@ struct kvm_riscv_config { unsigned long marchid; unsigned long mimpid; unsigned long zicboz_block_size; + unsigned long satp_mode; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SSAIA, KVM_RISCV_ISA_EXT_V, KVM_RISCV_ISA_EXT_SVNAPOT, + KVM_RISCV_ISA_EXT_ZBA, + KVM_RISCV_ISA_EXT_ZBS, + KVM_RISCV_ISA_EXT_ZICNTR, + KVM_RISCV_ISA_EXT_ZICSR, + KVM_RISCV_ISA_EXT_ZIFENCEI, + KVM_RISCV_ISA_EXT_ZIHPM, KVM_RISCV_ISA_EXT_MAX, }; @@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID { /* ISA Extension registers are mapped as type 7 */ #define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_ISA_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_ISA_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_ISA_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id) \ + ((__ext_id) / __BITS_PER_LONG) +#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id) \ + (1UL << ((__ext_id) % __BITS_PER_LONG)) +#define KVM_REG_RISCV_ISA_MULTI_REG_LAST \ + KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1) /* SBI extension registers are mapped as type 8 */ #define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT) diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index fee0671e2dc1..4c2067fc59fc 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -19,6 +19,7 @@ kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o kvm-y += vcpu_vector.o kvm-y += vcpu_insn.o +kvm-y += vcpu_onereg.o kvm-y += vcpu_switch.o kvm-y += vcpu_sbi.o kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 585a3b42c52c..74bb27440527 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -176,7 +176,7 @@ int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) - return -EINVAL; + return -ENOENT; *out_val = 0; if (kvm_riscv_aia_available()) @@ -192,7 +192,7 @@ int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu, struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) - return -EINVAL; + return -ENOENT; if (kvm_riscv_aia_available()) { ((unsigned long *)csr)[reg_num] = val; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index f2eb47925806..068c74593871 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free) { } @@ -559,7 +553,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { int ret; - kvm_pfn_t pfn = pte_pfn(range->pte); + kvm_pfn_t pfn = pte_pfn(range->arg.pte); if (!kvm->arch.pgd) return false; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index d12ef99901fc..82229db1ce73 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -13,16 +13,12 @@ #include <linux/kdebug.h> #include <linux/module.h> #include <linux/percpu.h> -#include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/sched/signal.h> #include <linux/fs.h> #include <linux/kvm_host.h> #include <asm/csr.h> #include <asm/cacheflush.h> -#include <asm/hwcap.h> -#include <asm/sbi.h> -#include <asm/vector.h> #include <asm/kvm_vcpu_vector.h> const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { @@ -46,79 +42,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; -#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) - -#define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext - -/* Mapping between KVM ISA Extension ID & Host ISA extension ID */ -static const unsigned long kvm_isa_ext_arr[] = { - [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, - [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, - [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, - [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, - [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, - [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, - [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, - [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, - - KVM_ISA_EXT_ARR(SSAIA), - KVM_ISA_EXT_ARR(SSTC), - KVM_ISA_EXT_ARR(SVINVAL), - KVM_ISA_EXT_ARR(SVNAPOT), - KVM_ISA_EXT_ARR(SVPBMT), - KVM_ISA_EXT_ARR(ZBB), - KVM_ISA_EXT_ARR(ZIHINTPAUSE), - KVM_ISA_EXT_ARR(ZICBOM), - KVM_ISA_EXT_ARR(ZICBOZ), -}; - -static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) -{ - unsigned long i; - - for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { - if (kvm_isa_ext_arr[i] == base_ext) - return i; - } - - return KVM_RISCV_ISA_EXT_MAX; -} - -static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) -{ - switch (ext) { - case KVM_RISCV_ISA_EXT_H: - return false; - case KVM_RISCV_ISA_EXT_V: - return riscv_v_vstate_ctrl_user_allowed(); - default: - break; - } - - return true; -} - -static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) -{ - switch (ext) { - case KVM_RISCV_ISA_EXT_A: - case KVM_RISCV_ISA_EXT_C: - case KVM_RISCV_ISA_EXT_I: - case KVM_RISCV_ISA_EXT_M: - case KVM_RISCV_ISA_EXT_SSAIA: - case KVM_RISCV_ISA_EXT_SSTC: - case KVM_RISCV_ISA_EXT_SVINVAL: - case KVM_RISCV_ISA_EXT_SVNAPOT: - case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: - case KVM_RISCV_ISA_EXT_ZBB: - return false; - default: - break; - } - - return true; -} - static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; @@ -176,7 +99,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) int rc; struct kvm_cpu_context *cntx; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; - unsigned long host_isa, i; /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; @@ -184,12 +106,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); /* Setup ISA features available to VCPU */ - for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) { - host_isa = kvm_isa_ext_arr[i]; - if (__riscv_isa_extension_available(NULL, host_isa) && - kvm_riscv_vcpu_isa_enable_allowed(i)) - set_bit(host_isa, vcpu->arch.isa); - } + kvm_riscv_vcpu_setup_isa(vcpu); /* Setup vendor, arch, and implementation details */ vcpu->arch.mvendorid = sbi_get_mvendorid(); @@ -294,450 +211,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_CONFIG); - unsigned long reg_val; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - - switch (reg_num) { - case KVM_REG_RISCV_CONFIG_REG(isa): - reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; - break; - case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) - return -EINVAL; - reg_val = riscv_cbom_block_size; - break; - case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) - return -EINVAL; - reg_val = riscv_cboz_block_size; - break; - case KVM_REG_RISCV_CONFIG_REG(mvendorid): - reg_val = vcpu->arch.mvendorid; - break; - case KVM_REG_RISCV_CONFIG_REG(marchid): - reg_val = vcpu->arch.marchid; - break; - case KVM_REG_RISCV_CONFIG_REG(mimpid): - reg_val = vcpu->arch.mimpid; - break; - default: - return -EINVAL; - } - - if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - return 0; -} - -static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_CONFIG); - unsigned long i, isa_ext, reg_val; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - - if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - switch (reg_num) { - case KVM_REG_RISCV_CONFIG_REG(isa): - /* - * This ONE REG interface is only defined for - * single letter extensions. - */ - if (fls(reg_val) >= RISCV_ISA_EXT_BASE) - return -EINVAL; - - if (!vcpu->arch.ran_atleast_once) { - /* Ignore the enable/disable request for certain extensions */ - for (i = 0; i < RISCV_ISA_EXT_BASE; i++) { - isa_ext = kvm_riscv_vcpu_base2isa_ext(i); - if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) { - reg_val &= ~BIT(i); - continue; - } - if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext)) - if (reg_val & BIT(i)) - reg_val &= ~BIT(i); - if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext)) - if (!(reg_val & BIT(i))) - reg_val |= BIT(i); - } - reg_val &= riscv_isa_extension_base(NULL); - /* Do not modify anything beyond single letter extensions */ - reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) | - (reg_val & KVM_RISCV_BASE_ISA_MASK); - vcpu->arch.isa[0] = reg_val; - kvm_riscv_vcpu_fp_reset(vcpu); - } else { - return -EOPNOTSUPP; - } - break; - case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - return -EOPNOTSUPP; - case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - return -EOPNOTSUPP; - case KVM_REG_RISCV_CONFIG_REG(mvendorid): - if (!vcpu->arch.ran_atleast_once) - vcpu->arch.mvendorid = reg_val; - else - return -EBUSY; - break; - case KVM_REG_RISCV_CONFIG_REG(marchid): - if (!vcpu->arch.ran_atleast_once) - vcpu->arch.marchid = reg_val; - else - return -EBUSY; - break; - case KVM_REG_RISCV_CONFIG_REG(mimpid): - if (!vcpu->arch.ran_atleast_once) - vcpu->arch.mimpid = reg_val; - else - return -EBUSY; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_CORE); - unsigned long reg_val; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) - return -EINVAL; - - if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) - reg_val = cntx->sepc; - else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && - reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6)) - reg_val = ((unsigned long *)cntx)[reg_num]; - else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) - reg_val = (cntx->sstatus & SR_SPP) ? - KVM_RISCV_MODE_S : KVM_RISCV_MODE_U; - else - return -EINVAL; - - if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - return 0; -} - -static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_CORE); - unsigned long reg_val; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) - return -EINVAL; - - if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) - cntx->sepc = reg_val; - else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && - reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6)) - ((unsigned long *)cntx)[reg_num] = reg_val; - else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) { - if (reg_val == KVM_RISCV_MODE_S) - cntx->sstatus |= SR_SPP; - else - cntx->sstatus &= ~SR_SPP; - } else - return -EINVAL; - - return 0; -} - -static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long *out_val) -{ - struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) - return -EINVAL; - - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { - kvm_riscv_vcpu_flush_interrupts(vcpu); - *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; - *out_val |= csr->hvip & ~IRQ_LOCAL_MASK; - } else - *out_val = ((unsigned long *)csr)[reg_num]; - - return 0; -} - -static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long reg_val) -{ - struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) - return -EINVAL; - - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { - reg_val &= VSIP_VALID_MASK; - reg_val <<= VSIP_TO_HVIP_SHIFT; - } - - ((unsigned long *)csr)[reg_num] = reg_val; - - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) - WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0); - - return 0; -} - -static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - int rc; - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_CSR); - unsigned long reg_val, reg_subtype; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - - reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; - reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_CSR_GENERAL: - rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, ®_val); - break; - case KVM_REG_RISCV_CSR_AIA: - rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val); - break; - default: - rc = -EINVAL; - break; - } - if (rc) - return rc; - - if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - return 0; -} - -static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - int rc; - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_CSR); - unsigned long reg_val, reg_subtype; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - - if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; - reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_CSR_GENERAL: - rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val); - break; - case KVM_REG_RISCV_CSR_AIA: - rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val); - break; - default: - rc = -EINVAL; - break; - } - if (rc) - return rc; - - return 0; -} - -static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_ISA_EXT); - unsigned long reg_val = 0; - unsigned long host_isa_ext; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - - if (reg_num >= KVM_RISCV_ISA_EXT_MAX || - reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) - return -EINVAL; - - host_isa_ext = kvm_isa_ext_arr[reg_num]; - if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) - reg_val = 1; /* Mark the given extension as available */ - - if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - return 0; -} - -static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - unsigned long __user *uaddr = - (unsigned long __user *)(unsigned long)reg->addr; - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | - KVM_REG_SIZE_MASK | - KVM_REG_RISCV_ISA_EXT); - unsigned long reg_val; - unsigned long host_isa_ext; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) - return -EINVAL; - - if (reg_num >= KVM_RISCV_ISA_EXT_MAX || - reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) - return -EINVAL; - - if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - host_isa_ext = kvm_isa_ext_arr[reg_num]; - if (!__riscv_isa_extension_available(NULL, host_isa_ext)) - return -EOPNOTSUPP; - - if (!vcpu->arch.ran_atleast_once) { - /* - * All multi-letter extension and a few single letter - * extension can be disabled - */ - if (reg_val == 1 && - kvm_riscv_vcpu_isa_enable_allowed(reg_num)) - set_bit(host_isa_ext, vcpu->arch.isa); - else if (!reg_val && - kvm_riscv_vcpu_isa_disable_allowed(reg_num)) - clear_bit(host_isa_ext, vcpu->arch.isa); - else - return -EINVAL; - kvm_riscv_vcpu_fp_reset(vcpu); - } else { - return -EOPNOTSUPP; - } - - return 0; -} - -static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - switch (reg->id & KVM_REG_RISCV_TYPE_MASK) { - case KVM_REG_RISCV_CONFIG: - return kvm_riscv_vcpu_set_reg_config(vcpu, reg); - case KVM_REG_RISCV_CORE: - return kvm_riscv_vcpu_set_reg_core(vcpu, reg); - case KVM_REG_RISCV_CSR: - return kvm_riscv_vcpu_set_reg_csr(vcpu, reg); - case KVM_REG_RISCV_TIMER: - return kvm_riscv_vcpu_set_reg_timer(vcpu, reg); - case KVM_REG_RISCV_FP_F: - return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, - KVM_REG_RISCV_FP_F); - case KVM_REG_RISCV_FP_D: - return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, - KVM_REG_RISCV_FP_D); - case KVM_REG_RISCV_ISA_EXT: - return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); - case KVM_REG_RISCV_SBI_EXT: - return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg); - case KVM_REG_RISCV_VECTOR: - return kvm_riscv_vcpu_set_reg_vector(vcpu, reg, - KVM_REG_RISCV_VECTOR); - default: - break; - } - - return -EINVAL; -} - -static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) -{ - switch (reg->id & KVM_REG_RISCV_TYPE_MASK) { - case KVM_REG_RISCV_CONFIG: - return kvm_riscv_vcpu_get_reg_config(vcpu, reg); - case KVM_REG_RISCV_CORE: - return kvm_riscv_vcpu_get_reg_core(vcpu, reg); - case KVM_REG_RISCV_CSR: - return kvm_riscv_vcpu_get_reg_csr(vcpu, reg); - case KVM_REG_RISCV_TIMER: - return kvm_riscv_vcpu_get_reg_timer(vcpu, reg); - case KVM_REG_RISCV_FP_F: - return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, - KVM_REG_RISCV_FP_F); - case KVM_REG_RISCV_FP_D: - return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, - KVM_REG_RISCV_FP_D); - case KVM_REG_RISCV_ISA_EXT: - return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); - case KVM_REG_RISCV_SBI_EXT: - return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg); - case KVM_REG_RISCV_VECTOR: - return kvm_riscv_vcpu_get_reg_vector(vcpu, reg, - KVM_REG_RISCV_VECTOR); - default: - break; - } - - return -EINVAL; -} - long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -781,6 +254,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_riscv_vcpu_get_reg(vcpu, ®); break; } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned int n; + + r = -EFAULT; + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + break; + n = reg_list.n; + reg_list.n = kvm_riscv_vcpu_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + break; + r = -E2BIG; + if (n < reg_list.n) + break; + r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg); + break; + } default: break; } diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 9d8cbc42057a..08ba48a395aa 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -96,7 +96,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) reg_val = &cntx->fp.f.f[reg_num]; else - return -EINVAL; + return -ENOENT; } else if ((rtype == KVM_REG_RISCV_FP_D) && riscv_isa_extension_available(vcpu->arch.isa, d)) { if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) { @@ -109,9 +109,9 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, return -EINVAL; reg_val = &cntx->fp.d.f[reg_num]; } else - return -EINVAL; + return -ENOENT; } else - return -EINVAL; + return -ENOENT; if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -141,7 +141,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) reg_val = &cntx->fp.f.f[reg_num]; else - return -EINVAL; + return -ENOENT; } else if ((rtype == KVM_REG_RISCV_FP_D) && riscv_isa_extension_available(vcpu->arch.isa, d)) { if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) { @@ -154,9 +154,9 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, return -EINVAL; reg_val = &cntx->fp.d.f[reg_num]; } else - return -EINVAL; + return -ENOENT; } else - return -EINVAL; + return -ENOENT; if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c new file mode 100644 index 000000000000..1b7e9fa265cb --- /dev/null +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -0,0 +1,1051 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * Copyright (C) 2023 Ventana Micro Systems Inc. + * + * Authors: + * Anup Patel <apatel@ventanamicro.com> + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/uaccess.h> +#include <linux/kvm_host.h> +#include <asm/cacheflush.h> +#include <asm/hwcap.h> +#include <asm/kvm_vcpu_vector.h> +#include <asm/vector.h> + +#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) + +#define KVM_ISA_EXT_ARR(ext) \ +[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext + +/* Mapping between KVM ISA Extension ID & Host ISA extension ID */ +static const unsigned long kvm_isa_ext_arr[] = { + /* Single letter extensions (alphabetically sorted) */ + [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, + [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, + [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, + [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, + [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, + [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, + [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, + [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, + /* Multi letter extensions (alphabetically sorted) */ + KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVINVAL), + KVM_ISA_EXT_ARR(SVNAPOT), + KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(ZBA), + KVM_ISA_EXT_ARR(ZBB), + KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICNTR), + KVM_ISA_EXT_ARR(ZICSR), + KVM_ISA_EXT_ARR(ZIFENCEI), + KVM_ISA_EXT_ARR(ZIHINTPAUSE), + KVM_ISA_EXT_ARR(ZIHPM), +}; + +static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) +{ + unsigned long i; + + for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + if (kvm_isa_ext_arr[i] == base_ext) + return i; + } + + return KVM_RISCV_ISA_EXT_MAX; +} + +static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) +{ + switch (ext) { + case KVM_RISCV_ISA_EXT_H: + return false; + case KVM_RISCV_ISA_EXT_V: + return riscv_v_vstate_ctrl_user_allowed(); + default: + break; + } + + return true; +} + +static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) +{ + switch (ext) { + case KVM_RISCV_ISA_EXT_A: + case KVM_RISCV_ISA_EXT_C: + case KVM_RISCV_ISA_EXT_I: + case KVM_RISCV_ISA_EXT_M: + case KVM_RISCV_ISA_EXT_SSAIA: + case KVM_RISCV_ISA_EXT_SSTC: + case KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_RISCV_ISA_EXT_ZBA: + case KVM_RISCV_ISA_EXT_ZBB: + case KVM_RISCV_ISA_EXT_ZBS: + case KVM_RISCV_ISA_EXT_ZICNTR: + case KVM_RISCV_ISA_EXT_ZICSR: + case KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_RISCV_ISA_EXT_ZIHPM: + return false; + default: + break; + } + + return true; +} + +void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu) +{ + unsigned long host_isa, i; + + for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) { + host_isa = kvm_isa_ext_arr[i]; + if (__riscv_isa_extension_available(NULL, host_isa) && + kvm_riscv_vcpu_isa_enable_allowed(i)) + set_bit(host_isa, vcpu->arch.isa); + } +} + +static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CONFIG); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + switch (reg_num) { + case KVM_REG_RISCV_CONFIG_REG(isa): + reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; + break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + return -ENOENT; + reg_val = riscv_cbom_block_size; + break; + case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + return -ENOENT; + reg_val = riscv_cboz_block_size; + break; + case KVM_REG_RISCV_CONFIG_REG(mvendorid): + reg_val = vcpu->arch.mvendorid; + break; + case KVM_REG_RISCV_CONFIG_REG(marchid): + reg_val = vcpu->arch.marchid; + break; + case KVM_REG_RISCV_CONFIG_REG(mimpid): + reg_val = vcpu->arch.mimpid; + break; + case KVM_REG_RISCV_CONFIG_REG(satp_mode): + reg_val = satp_mode >> SATP_MODE_SHIFT; + break; + default: + return -ENOENT; + } + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CONFIG); + unsigned long i, isa_ext, reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg_num) { + case KVM_REG_RISCV_CONFIG_REG(isa): + /* + * This ONE REG interface is only defined for + * single letter extensions. + */ + if (fls(reg_val) >= RISCV_ISA_EXT_BASE) + return -EINVAL; + + /* + * Return early (i.e. do nothing) if reg_val is the same + * value retrievable via kvm_riscv_vcpu_get_reg_config(). + */ + if (reg_val == (vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK)) + break; + + if (!vcpu->arch.ran_atleast_once) { + /* Ignore the enable/disable request for certain extensions */ + for (i = 0; i < RISCV_ISA_EXT_BASE; i++) { + isa_ext = kvm_riscv_vcpu_base2isa_ext(i); + if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) { + reg_val &= ~BIT(i); + continue; + } + if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext)) + if (reg_val & BIT(i)) + reg_val &= ~BIT(i); + if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext)) + if (!(reg_val & BIT(i))) + reg_val |= BIT(i); + } + reg_val &= riscv_isa_extension_base(NULL); + /* Do not modify anything beyond single letter extensions */ + reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) | + (reg_val & KVM_RISCV_BASE_ISA_MASK); + vcpu->arch.isa[0] = reg_val; + kvm_riscv_vcpu_fp_reset(vcpu); + } else { + return -EBUSY; + } + break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + return -ENOENT; + if (reg_val != riscv_cbom_block_size) + return -EINVAL; + break; + case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + return -ENOENT; + if (reg_val != riscv_cboz_block_size) + return -EINVAL; + break; + case KVM_REG_RISCV_CONFIG_REG(mvendorid): + if (reg_val == vcpu->arch.mvendorid) + break; + if (!vcpu->arch.ran_atleast_once) + vcpu->arch.mvendorid = reg_val; + else + return -EBUSY; + break; + case KVM_REG_RISCV_CONFIG_REG(marchid): + if (reg_val == vcpu->arch.marchid) + break; + if (!vcpu->arch.ran_atleast_once) + vcpu->arch.marchid = reg_val; + else + return -EBUSY; + break; + case KVM_REG_RISCV_CONFIG_REG(mimpid): + if (reg_val == vcpu->arch.mimpid) + break; + if (!vcpu->arch.ran_atleast_once) + vcpu->arch.mimpid = reg_val; + else + return -EBUSY; + break; + case KVM_REG_RISCV_CONFIG_REG(satp_mode): + if (reg_val != (satp_mode >> SATP_MODE_SHIFT)) + return -EINVAL; + break; + default: + return -ENOENT; + } + + return 0; +} + +static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CORE); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + return -ENOENT; + + if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) + reg_val = cntx->sepc; + else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && + reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6)) + reg_val = ((unsigned long *)cntx)[reg_num]; + else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) + reg_val = (cntx->sstatus & SR_SPP) ? + KVM_RISCV_MODE_S : KVM_RISCV_MODE_U; + else + return -ENOENT; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CORE); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + return -ENOENT; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) + cntx->sepc = reg_val; + else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && + reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6)) + ((unsigned long *)cntx)[reg_num] = reg_val; + else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) { + if (reg_val == KVM_RISCV_MODE_S) + cntx->sstatus |= SR_SPP; + else + cntx->sstatus &= ~SR_SPP; + } else + return -ENOENT; + + return 0; +} + +static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *out_val) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + return -ENOENT; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { + kvm_riscv_vcpu_flush_interrupts(vcpu); + *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; + *out_val |= csr->hvip & ~IRQ_LOCAL_MASK; + } else + *out_val = ((unsigned long *)csr)[reg_num]; + + return 0; +} + +static int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + return -ENOENT; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { + reg_val &= VSIP_VALID_MASK; + reg_val <<= VSIP_TO_HVIP_SHIFT; + } + + ((unsigned long *)csr)[reg_num] = reg_val; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) + WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0); + + return 0; +} + +static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + int rc; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CSR); + unsigned long reg_val, reg_subtype; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + switch (reg_subtype) { + case KVM_REG_RISCV_CSR_GENERAL: + rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, ®_val); + break; + case KVM_REG_RISCV_CSR_AIA: + rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val); + break; + default: + rc = -ENOENT; + break; + } + if (rc) + return rc; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + int rc; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CSR); + unsigned long reg_val, reg_subtype; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + switch (reg_subtype) { + case KVM_REG_RISCV_CSR_GENERAL: + rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val); + break; + case KVM_REG_RISCV_CSR_AIA: + rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val); + break; + default: + rc = -ENOENT; + break; + } + if (rc) + return rc; + + return 0; +} + +static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *reg_val) +{ + unsigned long host_isa_ext; + + if (reg_num >= KVM_RISCV_ISA_EXT_MAX || + reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) + return -ENOENT; + + *reg_val = 0; + host_isa_ext = kvm_isa_ext_arr[reg_num]; + if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) + *reg_val = 1; /* Mark the given extension as available */ + + return 0; +} + +static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val) +{ + unsigned long host_isa_ext; + + if (reg_num >= KVM_RISCV_ISA_EXT_MAX || + reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) + return -ENOENT; + + host_isa_ext = kvm_isa_ext_arr[reg_num]; + if (!__riscv_isa_extension_available(NULL, host_isa_ext)) + return -ENOENT; + + if (reg_val == test_bit(host_isa_ext, vcpu->arch.isa)) + return 0; + + if (!vcpu->arch.ran_atleast_once) { + /* + * All multi-letter extension and a few single letter + * extension can be disabled + */ + if (reg_val == 1 && + kvm_riscv_vcpu_isa_enable_allowed(reg_num)) + set_bit(host_isa_ext, vcpu->arch.isa); + else if (!reg_val && + kvm_riscv_vcpu_isa_disable_allowed(reg_num)) + clear_bit(host_isa_ext, vcpu->arch.isa); + else + return -EINVAL; + kvm_riscv_vcpu_fp_reset(vcpu); + } else { + return -EBUSY; + } + + return 0; +} + +static int riscv_vcpu_get_isa_ext_multi(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *reg_val) +{ + unsigned long i, ext_id, ext_val; + + if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST) + return -ENOENT; + + for (i = 0; i < BITS_PER_LONG; i++) { + ext_id = i + reg_num * BITS_PER_LONG; + if (ext_id >= KVM_RISCV_ISA_EXT_MAX) + break; + + ext_val = 0; + riscv_vcpu_get_isa_ext_single(vcpu, ext_id, &ext_val); + if (ext_val) + *reg_val |= KVM_REG_RISCV_ISA_MULTI_MASK(ext_id); + } + + return 0; +} + +static int riscv_vcpu_set_isa_ext_multi(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val, bool enable) +{ + unsigned long i, ext_id; + + if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST) + return -ENOENT; + + for_each_set_bit(i, ®_val, BITS_PER_LONG) { + ext_id = i + reg_num * BITS_PER_LONG; + if (ext_id >= KVM_RISCV_ISA_EXT_MAX) + break; + + riscv_vcpu_set_isa_ext_single(vcpu, ext_id, enable); + } + + return 0; +} + +static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + int rc; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_ISA_EXT); + unsigned long reg_val, reg_subtype; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + reg_val = 0; + switch (reg_subtype) { + case KVM_REG_RISCV_ISA_SINGLE: + rc = riscv_vcpu_get_isa_ext_single(vcpu, reg_num, ®_val); + break; + case KVM_REG_RISCV_ISA_MULTI_EN: + case KVM_REG_RISCV_ISA_MULTI_DIS: + rc = riscv_vcpu_get_isa_ext_multi(vcpu, reg_num, ®_val); + if (!rc && reg_subtype == KVM_REG_RISCV_ISA_MULTI_DIS) + reg_val = ~reg_val; + break; + default: + rc = -ENOENT; + } + if (rc) + return rc; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_ISA_EXT); + unsigned long reg_val, reg_subtype; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg_subtype) { + case KVM_REG_RISCV_ISA_SINGLE: + return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val); + case KVM_REG_RISCV_SBI_MULTI_EN: + return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true); + case KVM_REG_RISCV_SBI_MULTI_DIS: + return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false); + default: + return -ENOENT; + } + + return 0; +} + +static int copy_config_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + int n = 0; + + for (int i = 0; i < sizeof(struct kvm_riscv_config)/sizeof(unsigned long); + i++) { + u64 size; + u64 reg; + + /* + * Avoid reporting config reg if the corresponding extension + * was not available. + */ + if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) && + !riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + continue; + else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) && + !riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + continue; + + size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CONFIG | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + n++; + } + + return n; +} + +static unsigned long num_config_regs(const struct kvm_vcpu *vcpu) +{ + return copy_config_reg_indices(vcpu, NULL); +} + +static inline unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_riscv_core) / sizeof(unsigned long); +} + +static int copy_core_reg_indices(u64 __user *uindices) +{ + int n = num_core_regs(); + + for (int i = 0; i < n; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CORE | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + return n; +} + +static inline unsigned long num_csr_regs(const struct kvm_vcpu *vcpu) +{ + unsigned long n = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long); + + if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) + n += sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long); + + return n; +} + +static int copy_csr_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + int n1 = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long); + int n2 = 0; + + /* copy general csr regs */ + for (int i = 0; i < n1; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR | + KVM_REG_RISCV_CSR_GENERAL | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + /* copy AIA csr regs */ + if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) { + n2 = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long); + + for (int i = 0; i < n2; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR | + KVM_REG_RISCV_CSR_AIA | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + } + + return n1 + n2; +} + +static inline unsigned long num_timer_regs(void) +{ + return sizeof(struct kvm_riscv_timer) / sizeof(u64); +} + +static int copy_timer_reg_indices(u64 __user *uindices) +{ + int n = num_timer_regs(); + + for (int i = 0; i < n; i++) { + u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 | + KVM_REG_RISCV_TIMER | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + return n; +} + +static inline unsigned long num_fp_f_regs(const struct kvm_vcpu *vcpu) +{ + const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + + if (riscv_isa_extension_available(vcpu->arch.isa, f)) + return sizeof(cntx->fp.f) / sizeof(u32); + else + return 0; +} + +static int copy_fp_f_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + int n = num_fp_f_regs(vcpu); + + for (int i = 0; i < n; i++) { + u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 | + KVM_REG_RISCV_FP_F | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + return n; +} + +static inline unsigned long num_fp_d_regs(const struct kvm_vcpu *vcpu) +{ + const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + + if (riscv_isa_extension_available(vcpu->arch.isa, d)) + return sizeof(cntx->fp.d.f) / sizeof(u64) + 1; + else + return 0; +} + +static int copy_fp_d_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + int i; + int n = num_fp_d_regs(vcpu); + u64 reg; + + /* copy fp.d.f indices */ + for (i = 0; i < n-1; i++) { + reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 | + KVM_REG_RISCV_FP_D | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + /* copy fp.d.fcsr indices */ + reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | i; + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + return n; +} + +static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + unsigned int n = 0; + unsigned long isa_ext; + + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; + + isa_ext = kvm_isa_ext_arr[i]; + if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext)) + continue; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + n++; + } + + return n; +} + +static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) +{ + return copy_isa_ext_reg_indices(vcpu, NULL);; +} + +static inline unsigned long num_sbi_ext_regs(void) +{ + /* + * number of KVM_REG_RISCV_SBI_SINGLE + + * 2 x (number of KVM_REG_RISCV_SBI_MULTI) + */ + return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1); +} + +static int copy_sbi_ext_reg_indices(u64 __user *uindices) +{ + int n; + + /* copy KVM_REG_RISCV_SBI_SINGLE */ + n = KVM_RISCV_SBI_EXT_MAX; + for (int i = 0; i < n; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | + KVM_REG_RISCV_SBI_SINGLE | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + /* copy KVM_REG_RISCV_SBI_MULTI */ + n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1; + for (int i = 0; i < n; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | + KVM_REG_RISCV_SBI_MULTI_EN | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | + KVM_REG_RISCV_SBI_MULTI_DIS | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + return num_sbi_ext_regs(); +} + +/* + * kvm_riscv_vcpu_num_regs - how many registers do we present via KVM_GET/SET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu) +{ + unsigned long res = 0; + + res += num_config_regs(vcpu); + res += num_core_regs(); + res += num_csr_regs(vcpu); + res += num_timer_regs(); + res += num_fp_f_regs(vcpu); + res += num_fp_d_regs(vcpu); + res += num_isa_ext_regs(vcpu); + res += num_sbi_ext_regs(); + + return res; +} + +/* + * kvm_riscv_vcpu_copy_reg_indices - get indices of all registers. + */ +int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + int ret; + + ret = copy_config_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; + + ret = copy_core_reg_indices(uindices); + if (ret < 0) + return ret; + uindices += ret; + + ret = copy_csr_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; + + ret = copy_timer_reg_indices(uindices); + if (ret < 0) + return ret; + uindices += ret; + + ret = copy_fp_f_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; + + ret = copy_fp_d_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; + + ret = copy_isa_ext_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; + + ret = copy_sbi_ext_reg_indices(uindices); + if (ret < 0) + return ret; + + return 0; +} + +int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + switch (reg->id & KVM_REG_RISCV_TYPE_MASK) { + case KVM_REG_RISCV_CONFIG: + return kvm_riscv_vcpu_set_reg_config(vcpu, reg); + case KVM_REG_RISCV_CORE: + return kvm_riscv_vcpu_set_reg_core(vcpu, reg); + case KVM_REG_RISCV_CSR: + return kvm_riscv_vcpu_set_reg_csr(vcpu, reg); + case KVM_REG_RISCV_TIMER: + return kvm_riscv_vcpu_set_reg_timer(vcpu, reg); + case KVM_REG_RISCV_FP_F: + return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_F); + case KVM_REG_RISCV_FP_D: + return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_D); + case KVM_REG_RISCV_ISA_EXT: + return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); + case KVM_REG_RISCV_SBI_EXT: + return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_set_reg_vector(vcpu, reg); + default: + break; + } + + return -ENOENT; +} + +int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + switch (reg->id & KVM_REG_RISCV_TYPE_MASK) { + case KVM_REG_RISCV_CONFIG: + return kvm_riscv_vcpu_get_reg_config(vcpu, reg); + case KVM_REG_RISCV_CORE: + return kvm_riscv_vcpu_get_reg_core(vcpu, reg); + case KVM_REG_RISCV_CSR: + return kvm_riscv_vcpu_get_reg_csr(vcpu, reg); + case KVM_REG_RISCV_TIMER: + return kvm_riscv_vcpu_get_reg_timer(vcpu, reg); + case KVM_REG_RISCV_FP_F: + return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_F); + case KVM_REG_RISCV_FP_D: + return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_D); + case KVM_REG_RISCV_ISA_EXT: + return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); + case KVM_REG_RISCV_SBI_EXT: + return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_get_reg_vector(vcpu, reg); + default: + break; + } + + return -ENOENT; +} diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 7b46e04fb667..9cd97091c723 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -140,8 +140,10 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu, const struct kvm_riscv_sbi_extension_entry *sext = NULL; struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; - if (reg_num >= KVM_RISCV_SBI_EXT_MAX || - (reg_val != 1 && reg_val != 0)) + if (reg_num >= KVM_RISCV_SBI_EXT_MAX) + return -ENOENT; + + if (reg_val != 1 && reg_val != 0) return -EINVAL; for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { @@ -175,7 +177,7 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; if (reg_num >= KVM_RISCV_SBI_EXT_MAX) - return -EINVAL; + return -ENOENT; for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { if (sbi_ext[i].ext_idx == reg_num) { @@ -206,7 +208,7 @@ static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu, unsigned long i, ext_id; if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST) - return -EINVAL; + return -ENOENT; for_each_set_bit(i, ®_val, BITS_PER_LONG) { ext_id = i + reg_num * BITS_PER_LONG; @@ -226,7 +228,7 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu, unsigned long i, ext_id, ext_val; if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST) - return -EINVAL; + return -ENOENT; for (i = 0; i < BITS_PER_LONG; i++) { ext_id = i + reg_num * BITS_PER_LONG; @@ -272,7 +274,7 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_SBI_MULTI_DIS: return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false); default: - return -EINVAL; + return -ENOENT; } return 0; @@ -307,7 +309,7 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, reg_val = ~reg_val; break; default: - rc = -EINVAL; + rc = -ENOENT; } if (rc) return rc; diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 3ac2ff6a65da..75486b25ac45 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -170,7 +170,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu, if (KVM_REG_SIZE(reg->id) != sizeof(u64)) return -EINVAL; if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64)) - return -EINVAL; + return -ENOENT; switch (reg_num) { case KVM_REG_RISCV_TIMER_REG(frequency): @@ -187,7 +187,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu, KVM_RISCV_TIMER_STATE_OFF; break; default: - return -EINVAL; + return -ENOENT; } if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) @@ -211,14 +211,15 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu, if (KVM_REG_SIZE(reg->id) != sizeof(u64)) return -EINVAL; if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64)) - return -EINVAL; + return -ENOENT; if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; switch (reg_num) { case KVM_REG_RISCV_TIMER_REG(frequency): - ret = -EOPNOTSUPP; + if (reg_val != riscv_timebase) + return -EINVAL; break; case KVM_REG_RISCV_TIMER_REG(time): gt->time_delta = reg_val - get_cycles64(); @@ -233,7 +234,7 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu, ret = kvm_riscv_vcpu_timer_cancel(t); break; default: - ret = -EINVAL; + ret = -ENOENT; break; } diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index edd2eecbddc2..b430cbb69521 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -91,95 +91,93 @@ void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu) } #endif -static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu, - unsigned long reg_num, - size_t reg_size) +static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + size_t reg_size, + void **reg_addr) { struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; - void *reg_val; size_t vlenb = riscv_v_vsize / 32; if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) { if (reg_size != sizeof(unsigned long)) - return NULL; + return -EINVAL; switch (reg_num) { case KVM_REG_RISCV_VECTOR_CSR_REG(vstart): - reg_val = &cntx->vector.vstart; + *reg_addr = &cntx->vector.vstart; break; case KVM_REG_RISCV_VECTOR_CSR_REG(vl): - reg_val = &cntx->vector.vl; + *reg_addr = &cntx->vector.vl; break; case KVM_REG_RISCV_VECTOR_CSR_REG(vtype): - reg_val = &cntx->vector.vtype; + *reg_addr = &cntx->vector.vtype; break; case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): - reg_val = &cntx->vector.vcsr; + *reg_addr = &cntx->vector.vcsr; break; case KVM_REG_RISCV_VECTOR_CSR_REG(datap): default: - return NULL; + return -ENOENT; } } else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) { if (reg_size != vlenb) - return NULL; - reg_val = cntx->vector.datap - + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb; + return -EINVAL; + *reg_addr = cntx->vector.datap + + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb; } else { - return NULL; + return -ENOENT; } - return reg_val; + return 0; } int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg, - unsigned long rtype) + const struct kvm_one_reg *reg) { unsigned long *isa = vcpu->arch.isa; unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | - rtype); - void *reg_val = NULL; + KVM_REG_RISCV_VECTOR); size_t reg_size = KVM_REG_SIZE(reg->id); + void *reg_addr; + int rc; - if (rtype == KVM_REG_RISCV_VECTOR && - riscv_isa_extension_available(isa, v)) { - reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size); - } + if (!riscv_isa_extension_available(isa, v)) + return -ENOENT; - if (!reg_val) - return -EINVAL; + rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr); + if (rc) + return rc; - if (copy_to_user(uaddr, reg_val, reg_size)) + if (copy_to_user(uaddr, reg_addr, reg_size)) return -EFAULT; return 0; } int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg, - unsigned long rtype) + const struct kvm_one_reg *reg) { unsigned long *isa = vcpu->arch.isa; unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | - rtype); - void *reg_val = NULL; + KVM_REG_RISCV_VECTOR); size_t reg_size = KVM_REG_SIZE(reg->id); + void *reg_addr; + int rc; - if (rtype == KVM_REG_RISCV_VECTOR && - riscv_isa_extension_available(isa, v)) { - reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size); - } + if (!riscv_isa_extension_available(isa, v)) + return -ENOENT; - if (!reg_val) - return -EINVAL; + rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr); + if (rc) + return rc; - if (copy_from_user(reg_val, uaddr, reg_size)) + if (copy_from_user(reg_addr, uaddr, reg_size)) return -EFAULT; return 0; diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 8753cb0339e5..7b7521762633 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -19,7 +19,6 @@ struct parmarea parmarea __section(".parmarea") = { }; char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; -int __bootdata(noexec_disabled); unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; struct ipl_parameter_block __bootdata_preserved(ipl_block); @@ -290,12 +289,6 @@ void parse_boot_command_line(void) zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG; } - if (!strcmp(param, "noexec")) { - rc = kstrtobool(val, &enabled); - if (!rc && !enabled) - noexec_disabled = 1; - } - if (!strcmp(param, "facilities") && val) modify_fac_list(val); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index b9681cb22753..d3e48bd9c394 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -53,10 +53,8 @@ static void detect_facilities(void) } if (test_facility(78)) machine.has_edat2 = 1; - if (!noexec_disabled && test_facility(130)) { + if (test_facility(130)) machine.has_nx = 1; - __ctl_set_bit(0, 20); - } } static void setup_lpp(void) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index c67f59db7a51..01257ce3b89c 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -287,7 +287,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (kasan_pte_populate_zero_shadow(pte, mode)) continue; entry = __pte(_pa(addr, PAGE_SIZE, mode)); - entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); + entry = set_pte_bit(entry, PAGE_KERNEL); + if (!machine.has_nx) + entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); set_pte(pte, entry); pages++; } @@ -311,7 +313,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e continue; if (can_large_pmd(pmd, addr, next)) { entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); - entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); + entry = set_pmd_bit(entry, SEGMENT_KERNEL); + if (!machine.has_nx) + entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmd, entry); pages++; continue; @@ -342,7 +346,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e continue; if (can_large_pud(pud, addr, next)) { entry = __pud(_pa(addr, _REGION3_SIZE, mode)); - entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); + entry = set_pud_bit(entry, REGION3_KERNEL); + if (!machine.has_nx) + entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); set_pud(pud, entry); pages++; continue; diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config index 39227b4511af..eb7f84f5925c 100644 --- a/arch/s390/configs/btf.config +++ b/arch/s390/configs/btf.config @@ -1 +1,2 @@ +# Help: Enable BTF debug info CONFIG_DEBUG_INFO_BTF=y diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config index 700a8b25c3ff..84c2b551e992 100644 --- a/arch/s390/configs/kasan.config +++ b/arch/s390/configs/kasan.config @@ -1,3 +1,4 @@ +# Help: Enable KASan for debugging CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_KASAN_VMALLOC=y diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index e82e5626e139..c4c28c2609a5 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -18,7 +18,6 @@ struct airq_struct { struct hlist_node list; /* Handler queueing. */ void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info); u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ - u8 lsi_mask; /* Local-Summary-Indicator mask */ u8 isc; /* Interrupt-subclass */ u8 flags; }; diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h index c260adb25997..7fe3e31956d7 100644 --- a/arch/s390/include/asm/dma.h +++ b/arch/s390/include/asm/dma.h @@ -9,6 +9,6 @@ * to DMA. It _is_ used for the s390 memory zone split at 2GB caused * by the 31 bit heritage. */ -#define MAX_DMA_ADDRESS 0x80000000 +#define MAX_DMA_ADDRESS __va(0x80000000) #endif /* _ASM_S390_DMA_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 91bfecb91321..427f9528a7b6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -817,6 +817,8 @@ struct kvm_s390_cpu_model { __u64 *fac_list; u64 cpuid; unsigned short ibc; + /* subset of available UV-features for pv-guests enabled by user space */ + struct kvm_s390_vm_cpu_uv_feat uv_feat_guest; }; typedef int (*crypto_hook)(struct kvm_vcpu *vcpu); diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 3fecaa4e8b74..0486e6ef62bf 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -23,7 +23,7 @@ */ #define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var -extern unsigned long __samode31, __eamode31; -extern unsigned long __stext_amode31, __etext_amode31; +extern char *__samode31, *__eamode31; +extern char *__stext_amode31, *__etext_amode31; #endif diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index 7a3eefd7a242..06fbabe2f66c 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -24,43 +24,41 @@ enum { #define SET_MEMORY_INV BIT(_SET_MEMORY_INV_BIT) #define SET_MEMORY_DEF BIT(_SET_MEMORY_DEF_BIT) -int __set_memory(unsigned long addr, int numpages, unsigned long flags); - -static inline int set_memory_ro(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RO); -} - -static inline int set_memory_rw(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RW); -} - -static inline int set_memory_nx(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_NX); -} - -static inline int set_memory_x(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_X); -} +int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags); #define set_memory_rox set_memory_rox -static inline int set_memory_rox(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X); -} -static inline int set_memory_rwnx(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RW | SET_MEMORY_NX); +/* + * Generate two variants of each set_memory() function: + * + * set_memory_yy(unsigned long addr, int numpages); + * __set_memory_yy(void *start, void *end); + * + * The second variant exists for both convenience to avoid the usual + * (unsigned long) casts, but unlike the first variant it can also be used + * for areas larger than 8TB, which may happen at memory initialization. + */ +#define __SET_MEMORY_FUNC(fname, flags) \ +static inline int fname(unsigned long addr, int numpages) \ +{ \ + return __set_memory(addr, numpages, (flags)); \ +} \ + \ +static inline int __##fname(void *start, void *end) \ +{ \ + unsigned long numpages; \ + \ + numpages = (end - start) >> PAGE_SHIFT; \ + return __set_memory((unsigned long)start, numpages, (flags)); \ } -static inline int set_memory_4k(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_4K); -} +__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO) +__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW) +__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX) +__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X) +__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X) +__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX) +__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K) int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b30fe91166e3..25cadc2b9cff 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -72,7 +72,6 @@ extern unsigned int zlib_dfltcc_support; #define ZLIB_DFLTCC_INFLATE_ONLY 3 #define ZLIB_DFLTCC_FULL_DEBUG 4 -extern int noexec_disabled; extern unsigned long ident_map_size; extern unsigned long max_mappable; diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index d2cd42bb2c26..0e7bd3873907 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -99,6 +99,8 @@ enum uv_cmds_inst { enum uv_feat_ind { BIT_UV_FEAT_MISC = 0, BIT_UV_FEAT_AIV = 1, + BIT_UV_FEAT_AP = 4, + BIT_UV_FEAT_AP_INTR = 5, }; struct uv_cb_header { @@ -159,7 +161,15 @@ struct uv_cb_cgc { u64 guest_handle; u64 conf_base_stor_origin; u64 conf_virt_stor_origin; - u64 reserved30; + u8 reserved30[6]; + union { + struct { + u16 : 14; + u16 ap_instr_intr : 1; + u16 ap_allow_instr : 1; + }; + u16 raw; + } flags; u64 guest_stor_origin; u64 guest_stor_len; u64 guest_sca; @@ -397,6 +407,13 @@ struct uv_info { extern struct uv_info uv_info; +static inline bool uv_has_feature(u8 feature_bit) +{ + if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8) + return false; + return test_bit_inv(feature_bit, &uv_info.uv_feature_indications); +} + #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST extern int prot_virt_guest; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a73cf01a1606..abe926d43cbe 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc { __u8 reserved[1728]; }; +#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6 +#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7 + +#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64 +struct kvm_s390_vm_cpu_uv_feat { + union { + struct { + __u64 : 4; + __u64 ap : 1; /* bit 4 */ + __u64 ap_intr : 1; /* bit 5 */ + __u64 : 58; + }; + __u64 feat; + }; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 2dd5976a55ac..442ce0489e1a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -44,7 +44,6 @@ early_param(#param, ignore_decompressor_param_##param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); decompressor_handled_param(dfltcc); -decompressor_handled_param(noexec); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); #if IS_ENABLED(CONFIG_KVM) @@ -233,10 +232,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VX; __ctl_set_bit(0, 17); } - if (test_facility(130) && !noexec_disabled) { + if (test_facility(130)) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; - __ctl_set_bit(0, 20); - } if (test_facility(133)) S390_lowcore.machine_flags |= MACHINE_FLAG_GS; if (test_facility(139) && (tod_clock_base.tod >> 63)) { diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 12a2bd4fc88c..ce65fc01671f 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -216,8 +216,8 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); - vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); + vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31); + vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); abs_lc = get_abs_lowcore(); abs_lc->vmcore_info = paddr_vmcoreinfo_note(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c744104e4a9c..de6ad0fb2328 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -97,10 +97,10 @@ EXPORT_SYMBOL(console_irq); * relocated above 2 GB, because it has to use 31 bit addresses. * Such code and data is part of the .amode31 section. */ -unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31; -unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31; -unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31; -unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31; +char __amode31_ref *__samode31 = _samode31; +char __amode31_ref *__eamode31 = _eamode31; +char __amode31_ref *__stext_amode31 = _stext_amode31; +char __amode31_ref *__etext_amode31 = _etext_amode31; struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; @@ -145,7 +145,6 @@ static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; -int __bootdata(noexec_disabled); unsigned long __bootdata_preserved(max_mappable); unsigned long __bootdata(ident_map_size); struct physmem_info __bootdata(physmem_info); @@ -771,15 +770,15 @@ static void __init setup_memory(void) static void __init relocate_amode31_section(void) { unsigned long amode31_size = __eamode31 - __samode31; - long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31; - long *ptr; + long amode31_offset, *ptr; + amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31; pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); /* Move original AMODE31 section to the new one */ - memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size); + memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size); /* Zero out the old AMODE31 section to catch invalid accesses within it */ - memset((void *)__samode31, 0, amode31_size); + memset(__samode31, 0, amode31_size); /* Update all AMODE31 region references */ for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index b771f1b4cdd1..fc07bc39e698 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -258,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str * shared page from a different protected VM will automatically also * transfer its ownership. */ - if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications)) + if (uv_has_feature(BIT_UV_FEAT_MISC)) return false; if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) return false; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 341abafb96e4..b16352083ff9 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu) #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER) +static bool should_handle_per_event(const struct kvm_vcpu *vcpu) +{ + if (!guestdbg_enabled(vcpu) || !per_event(vcpu)) + return false; + if (guestdbg_sstep_enabled(vcpu) && + vcpu->arch.sie_block->iprcc != PGM_PER) { + /* + * __vcpu_run() will exit after delivering the concurrently + * indicated condition. + */ + return false; + } + return true; +} + static int handle_prog(struct kvm_vcpu *vcpu) { psw_t psw; @@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) if (kvm_s390_pv_cpu_is_protected(vcpu)) return -EOPNOTSUPP; - if (guestdbg_enabled(vcpu) && per_event(vcpu)) { + if (should_handle_per_event(vcpu)) { rc = kvm_s390_handle_per_event(vcpu); if (rc) return rc; @@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu) return handle_instruction(vcpu); } +static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc) +{ + /* Process PER, also if the instruction is processed in user space. */ + if (!(vcpu->arch.sie_block->icptstatus & 0x02)) + return false; + if (rc != 0 && rc != -EOPNOTSUPP) + return false; + if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs) + /* __vcpu_run() will exit after delivering the interrupt. */ + return false; + return true; +} + int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { int rc, per_rc = 0; @@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) rc = handle_partial_execution(vcpu); break; case ICPT_KSS: - rc = kvm_s390_skey_check_enable(vcpu); - break; + /* Instruction will be redriven, skip the PER check. */ + return kvm_s390_skey_check_enable(vcpu); case ICPT_MCHKREQ: case ICPT_INT_ENABLE: /* @@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - /* process PER, also if the instruction is processed in user space */ - if (vcpu->arch.sie_block->icptstatus & 0x02 && - (!rc || rc == -EOPNOTSUPP)) + if (should_handle_per_ifetch(vcpu, rc)) per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu); return per_rc ? per_rc : rc; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9bd0a873f3b1..c1b47d608a2b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc = 0; + bool delivered = false; unsigned long irq_type; unsigned long irqs; @@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) WARN_ONCE(1, "Unknown pending irq type %ld", irq_type); clear_bit(irq_type, &li->pending_irqs); } + delivered |= !rc; + } + + /* + * We delivered at least one interrupt and modified the PC. Force a + * singlestep event now. + */ + if (delivered && guestdbg_sstep_enabled(vcpu)) { + struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch; + + debug_exit->addr = vcpu->arch.sie_block->gpsw.addr; + debug_exit->type = KVM_SINGLESTEP; + vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; } set_intercept_indicators(vcpu); @@ -3398,7 +3412,6 @@ static void gib_alert_irq_handler(struct airq_struct *airq, static struct airq_struct gib_alert_irq = { .handler = gib_alert_irq_handler, - .lsi_ptr = &gib_alert_irq.lsi_mask, }; void kvm_s390_gib_destroy(void) @@ -3438,6 +3451,8 @@ int __init kvm_s390_gib_init(u8 nisc) rc = -EIO; goto out_free_gib; } + /* adapter interrupts used for AP (applicable here) don't use the LSI */ + *gib_alert_irq.lsi_ptr = 0xff; gib->nisc = nisc; gib_origin = virt_to_phys(gib); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d1e768bcfe1d..b3f17e014cab 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm, return 0; } +#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \ +( \ + ((struct kvm_s390_vm_cpu_uv_feat){ \ + .ap = 1, \ + .ap_intr = 1, \ + }) \ + .feat \ +) + +static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr; + unsigned long data, filter; + + filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK; + if (get_user(data, &ptr->feat)) + return -EFAULT; + if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS)) + return -EINVAL; + + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) { + mutex_unlock(&kvm->lock); + return -EBUSY; + } + kvm->arch.model.uv_feat_guest.feat = data; + mutex_unlock(&kvm->lock); + + VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data); + + return 0; +} + static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: ret = kvm_s390_set_processor_subfunc(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: + ret = kvm_s390_set_uv_feat(kvm, attr); + break; } return ret; } @@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm, return 0; } +static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr; + unsigned long feat = kvm->arch.model.uv_feat_guest.feat; + + if (put_user(feat, &dst->feat)) + return -EFAULT; + VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat); + + return 0; +} + +static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr; + unsigned long feat; + + BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications)); + + feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK; + if (put_user(feat, &dst->feat)) + return -EFAULT; + VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat); + + return 0; +} + static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE_SUBFUNC: ret = kvm_s390_get_machine_subfunc(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: + ret = kvm_s390_get_processor_uv_feat(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST: + ret = kvm_s390_get_machine_uv_feat(kvm, attr); + break; } return ret; } @@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE_FEAT: case KVM_S390_VM_CPU_MACHINE_SUBFUNC: case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST: + case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: ret = 0; break; default: @@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) struct kvm_vcpu *vcpu; /* Disable the GISA if the ultravisor does not support AIV. */ - if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) + if (!uv_has_feature(BIT_UV_FEAT_AIV)) kvm_s390_gisa_disable(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; + kvm->arch.model.uv_feat_guest.feat = 0; + kvm_s390_crypto_init(kvm); if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { @@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) if (!kvm_is_ucontrol(vcpu->kvm)) { rc = kvm_s390_deliver_pending_interrupts(vcpu); - if (rc) + if (rc || guestdbg_exit_pending(vcpu)) return rc; } @@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) do { rc = vcpu_pre_run(vcpu); - if (rc) + if (rc || guestdbg_exit_pending(vcpu)) break; kvm_vcpu_srcu_read_unlock(vcpu); @@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; + int rc; switch (ioctl) { case KVM_S390_IRQ: { @@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, if (copy_from_user(&s390irq, argp, sizeof(s390irq))) return -EFAULT; - return kvm_s390_inject_vcpu(vcpu, &s390irq); + rc = kvm_s390_inject_vcpu(vcpu, &s390irq); + break; } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; @@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, return -EFAULT; if (s390int_to_s390irq(&s390int, &s390irq)) return -EINVAL; - return kvm_s390_inject_vcpu(vcpu, &s390irq); + rc = kvm_s390_inject_vcpu(vcpu, &s390irq); + break; } + default: + rc = -ENOIOCTLCMD; + break; } - return -ENOIOCTLCMD; + + /* + * To simplify single stepping of userspace-emulated instructions, + * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see + * should_handle_per_ifetch()). However, if userspace emulation injects + * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens + * after (and not before) the interrupt delivery. + */ + if (!rc) + vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING; + + return rc; } static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu, diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 8d3f39a8a11e..75e81ba26d04 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -285,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", uvcb.header.rc, uvcb.header.rrc); - WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x", + WARN_ONCE(cc && uvcb.header.rc != 0x104, + "protvirt destroy vm fast failed handle %llx rc %x rrc %x", kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); /* Intended memory leak on "impossible" error */ if (!cc) @@ -575,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.conf_base_stor_origin = virt_to_phys((void *)kvm->arch.pv.stor_base); uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; + uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap; + uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr; cc = uv_call_sched(0, (u64)&uvcb); *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; - KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x", - uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x", + uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw); /* Outputs */ kvm->arch.pv.handle = uvcb.guest_handle; diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index afa5db750d92..b51666967aa1 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -290,8 +290,8 @@ static int pt_dump_init(void) max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; - address_markers[AMODE31_START_NR].start_address = __samode31; - address_markers[AMODE31_END_NR].start_address = __eamode31; + address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; + address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 099c4824dd8a..b678295931c3 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -598,7 +598,7 @@ void do_secure_storage_access(struct pt_regs *regs) * reliable without the misc UV feature so we need to check * for that as well. */ - if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) && + if (uv_has_feature(BIT_UV_FEAT_MISC) && !test_bit_inv(61, ®s->int_parm_long)) { /* * When this happens, userspace did something that it diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8d94e29adcdb..8b94d2212d33 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -98,7 +98,7 @@ void __init paging_init(void) sparse_init(); zone_dma_bits = 31; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); + max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init(max_zone_pfns); } @@ -107,7 +107,7 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; - set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT); + __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); debug_checkwx(); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index e5ec76271b16..b87e96c64b61 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -373,7 +373,7 @@ static int change_page_attr_alias(unsigned long addr, unsigned long end, return rc; } -int __set_memory(unsigned long addr, int numpages, unsigned long flags) +int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags) { unsigned long end; int rc; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index e44243b9c0a4..6957d2ed97bf 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -5,7 +5,6 @@ #include <linux/memory_hotplug.h> #include <linux/memblock.h> -#include <linux/kasan.h> #include <linux/pfn.h> #include <linux/mm.h> #include <linux/init.h> @@ -291,14 +290,9 @@ out: static void try_free_pmd_table(pud_t *pud, unsigned long start) { - const unsigned long end = start + PUD_SIZE; pmd_t *pmd; int i; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ - if (end > VMALLOC_START) - return; - pmd = pmd_offset(pud, start); for (i = 0; i < PTRS_PER_PMD; i++, pmd++) if (!pmd_none(*pmd)) @@ -363,14 +357,9 @@ out: static void try_free_pud_table(p4d_t *p4d, unsigned long start) { - const unsigned long end = start + P4D_SIZE; pud_t *pud; int i; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ - if (end > VMALLOC_START) - return; - pud = pud_offset(p4d, start); for (i = 0; i < PTRS_PER_PUD; i++, pud++) { if (!pud_none(*pud)) @@ -413,14 +402,9 @@ out: static void try_free_p4d_table(pgd_t *pgd, unsigned long start) { - const unsigned long end = start + PGDIR_SIZE; p4d_t *p4d; int i; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ - if (end > VMALLOC_START) - return; - p4d = p4d_offset(pgd, start); for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { if (!p4d_none(*p4d)) @@ -440,6 +424,9 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) return -EINVAL; + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (WARN_ON_ONCE(end > VMALLOC_START)) + return -EINVAL; for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); @@ -650,122 +637,29 @@ void vmem_unmap_4k_page(unsigned long addr) mutex_unlock(&vmem_mutex); } -static int __init memblock_region_cmp(const void *a, const void *b) -{ - const struct memblock_region *r1 = a; - const struct memblock_region *r2 = b; - - if (r1->base < r2->base) - return -1; - if (r1->base > r2->base) - return 1; - return 0; -} - -static void __init memblock_region_swap(void *a, void *b, int size) -{ - swap(*(struct memblock_region *)a, *(struct memblock_region *)b); -} - -#ifdef CONFIG_KASAN -#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) - -static inline int set_memory_kasan(unsigned long start, unsigned long end) -{ - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT); -} -#endif - -/* - * map whole physical memory to virtual memory (identity mapping) - * we reserve enough space in the vmalloc area for vmemmap to hotplug - * additional memory segments. - */ void __init vmem_map_init(void) { - struct memblock_region memory_rwx_regions[] = { - { - .base = 0, - .size = sizeof(struct lowcore), - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - { - .base = __pa(_stext), - .size = _etext - _stext, - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - { - .base = __pa(_sinittext), - .size = _einittext - _sinittext, - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - { - .base = __stext_amode31, - .size = __etext_amode31 - __stext_amode31, - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - }; - struct memblock_type memory_rwx = { - .regions = memory_rwx_regions, - .cnt = ARRAY_SIZE(memory_rwx_regions), - .max = ARRAY_SIZE(memory_rwx_regions), - }; - phys_addr_t base, end; - u64 i; - + __set_memory_rox(_stext, _etext); + __set_memory_ro(_etext, __end_rodata); + __set_memory_rox(_sinittext, _einittext); + __set_memory_rox(__stext_amode31, __etext_amode31); /* - * Set RW+NX attribute on all memory, except regions enumerated with - * memory_rwx exclude type. These regions need different attributes, - * which are enforced afterwards. - * - * __for_each_mem_range() iterate and exclude types should be sorted. - * The relative location of _stext and _sinittext is hardcoded in the - * linker script. However a location of __stext_amode31 and the kernel - * image itself are chosen dynamically. Thus, sort the exclude type. + * If the BEAR-enhancement facility is not installed the first + * prefix page is used to return to the previous context with + * an LPSWE instruction and therefore must be executable. */ - sort(&memory_rwx_regions, - ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]), - memblock_region_cmp, memblock_region_swap); - __for_each_mem_range(i, &memblock.memory, &memory_rwx, - NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) { - set_memory_rwnx((unsigned long)__va(base), - (end - base) >> PAGE_SHIFT); + if (!static_key_enabled(&cpu_has_bear)) + set_memory_x(0, 1); + if (debug_pagealloc_enabled()) { + /* + * Use RELOC_HIDE() as long as __va(0) translates to NULL, + * since performing pointer arithmetic on a NULL pointer + * has undefined behavior and generates compiler warnings. + */ + __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); } - -#ifdef CONFIG_KASAN - for_each_mem_range(i, &base, &end) - set_memory_kasan(base, end); -#endif - set_memory_rox((unsigned long)_stext, - (unsigned long)(_etext - _stext) >> PAGE_SHIFT); - set_memory_ro((unsigned long)_etext, - (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT); - set_memory_rox((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); - set_memory_rox(__stext_amode31, - (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT); - - /* lowcore must be executable for LPSWE */ - if (static_key_enabled(&cpu_has_bear)) - set_memory_nx(0, 1); - set_memory_nx(PAGE_SIZE, 1); - if (debug_pagealloc_enabled()) - set_memory_4k(0, ident_map_size >> PAGE_SHIFT); - + if (MACHINE_HAS_NX) + ctl_set_bit(0, 20); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 5e9371fbf3d5..de2fb12120d2 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2088,6 +2088,7 @@ struct bpf_tramp_jit { */ int r14_off; /* Offset of saved %r14 */ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */ + int tccnt_off; /* Offset of saved tailcall counter */ int do_fexit; /* do_fexit: label */ }; @@ -2258,12 +2259,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, tjit->r14_off = alloc_stack(tjit, sizeof(u64)); tjit->run_ctx_off = alloc_stack(tjit, sizeof(struct bpf_tramp_run_ctx)); + tjit->tccnt_off = alloc_stack(tjit, sizeof(u64)); /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */ tjit->stack_size -= STACK_FRAME_OVERHEAD; tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD; /* aghi %r15,-stack_size */ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size); + /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ + _EMIT6(0xd203f000 | tjit->tccnt_off, + 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); /* stmg %r2,%rN,fwd_reg_args_off(%r15) */ if (nr_reg_args) EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2, @@ -2400,6 +2405,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, (nr_stack_args * sizeof(u64) - 1) << 16 | tjit->stack_args_off, 0xf000 | tjit->orig_stack_args_off); + /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ + _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off); /* lgr %r1,%r8 */ EMIT4(0xb9040000, REG_1, REG_8); /* %r1() */ @@ -2456,6 +2463,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET)) EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15, tjit->retval_off); + /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ + _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT), + 0xf000 | tjit->tccnt_off); /* aghi %r15,stack_size */ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size); /* Emit an expoline for the following indirect jump. */ diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 595ca0be286b..43b0ae4c2c21 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -2,6 +2,5 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generic-y += agp.h -generic-y += export.h generic-y += kvm_para.h generic-y += mcs_spinlock.h diff --git a/arch/sparc/include/asm/ide.h b/arch/sparc/include/asm/ide.h deleted file mode 100644 index 904cc6cbc155..000000000000 --- a/arch/sparc/include/asm/ide.h +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* ide.h: SPARC PCI specific IDE glue. - * - * Copyright (C) 1997 David S. Miller (davem@davemloft.net) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * Adaptation from sparc64 version to sparc by Pete Zaitcev. - */ - -#ifndef _SPARC_IDE_H -#define _SPARC_IDE_H - -#ifdef __KERNEL__ - -#include <asm/io.h> -#ifdef CONFIG_SPARC64 -#include <asm/spitfire.h> -#include <asm/cacheflush.h> -#include <asm/page.h> -#else -#include <linux/pgtable.h> -#include <asm/psr.h> -#endif - -#define __ide_insl(data_reg, buffer, wcount) \ - __ide_insw(data_reg, buffer, (wcount)<<1) -#define __ide_outsl(data_reg, buffer, wcount) \ - __ide_outsw(data_reg, buffer, (wcount)<<1) - -/* On sparc, I/O ports and MMIO registers are accessed identically. */ -#define __ide_mm_insw __ide_insw -#define __ide_mm_insl __ide_insl -#define __ide_mm_outsw __ide_outsw -#define __ide_mm_outsl __ide_outsl - -static inline void __ide_insw(void __iomem *port, void *dst, u32 count) -{ -#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE) - unsigned long end = (unsigned long)dst + (count << 1); -#endif - u16 *ps = dst; - u32 *pi; - - if(((unsigned long)ps) & 0x2) { - *ps++ = __raw_readw(port); - count--; - } - pi = (u32 *)ps; - while(count >= 2) { - u32 w; - - w = __raw_readw(port) << 16; - w |= __raw_readw(port); - *pi++ = w; - count -= 2; - } - ps = (u16 *)pi; - if(count) - *ps++ = __raw_readw(port); - -#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE) - __flush_dcache_range((unsigned long)dst, end); -#endif -} - -static inline void __ide_outsw(void __iomem *port, const void *src, u32 count) -{ -#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE) - unsigned long end = (unsigned long)src + (count << 1); -#endif - const u16 *ps = src; - const u32 *pi; - - if(((unsigned long)src) & 0x2) { - __raw_writew(*ps++, port); - count--; - } - pi = (const u32 *)ps; - while(count >= 2) { - u32 w; - - w = *pi++; - __raw_writew((w >> 16), port); - __raw_writew(w, port); - count -= 2; - } - ps = (const u16 *)pi; - if(count) - __raw_writew(*ps, port); - -#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE) - __flush_dcache_range((unsigned long)src, end); -#endif -} - -#endif /* __KERNEL__ */ - -#endif /* _SPARC_IDE_H */ diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 8a0c3c11c9ce..587fb7841096 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -284,7 +284,7 @@ struct vio_dring_state { struct ldc_trans_cookie cookies[VIO_MAX_RING_COOKIES]; }; -#define VIO_TAG_SIZE ((int)sizeof(struct vio_msg_tag)) +#define VIO_TAG_SIZE (sizeof(struct vio_msg_tag)) #define VIO_VCC_MTU_SIZE (LDC_PACKET_SIZE - VIO_TAG_SIZE) struct vio_vcc { diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index a269ad2fe6df..a3fdee4cd6fa 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -8,6 +8,7 @@ * Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au) */ +#include <linux/export.h> #include <linux/linkage.h> #include <linux/errno.h> #include <linux/pgtable.h> @@ -30,7 +31,6 @@ #include <asm/unistd.h> #include <asm/asmmacro.h> -#include <asm/export.h> #define curptr g6 diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index 6044b82b9767..964c61b5cd03 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -11,6 +11,7 @@ * CompactPCI platform by Eric Brower, 1999. */ +#include <linux/export.h> #include <linux/version.h> #include <linux/init.h> @@ -25,7 +26,6 @@ #include <asm/thread_info.h> /* TI_UWINMASK */ #include <asm/errno.h> #include <asm/pgtable.h> /* PGDIR_SHIFT */ -#include <asm/export.h> .data /* The following are used with the prom_vector node-ops to figure out diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 72a5bdc833ea..cf0549134234 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -9,6 +9,7 @@ #include <linux/version.h> #include <linux/errno.h> +#include <linux/export.h> #include <linux/threads.h> #include <linux/init.h> #include <linux/linkage.h> @@ -33,7 +34,6 @@ #include <asm/estate.h> #include <asm/sfafsr.h> #include <asm/unistd.h> -#include <asm/export.h> /* This section from from _start to sparc64_boot_end should fit into * 0x0000000000404000 to 0x0000000000408000. diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 1ea3f37fa985..529adfecd58c 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -295,7 +295,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_globreg(int key) +static void sysrq_handle_globreg(u8 key) { trigger_all_cpu_backtrace(); } @@ -370,7 +370,7 @@ static void pmu_snapshot_all_cpus(void) spin_unlock_irqrestore(&global_cpu_snapshot_lock, flags); } -static void sysrq_handle_globpmu(int key) +static void sysrq_handle_globpmu(u8 key) { pmu_snapshot_all_cpus(); } diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index a6f4ee391897..635398ec7540 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -6,10 +6,10 @@ */ #ifdef __KERNEL__ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> -#include <asm/export.h> #define GLOBAL_SPARE g7 #else #define GLOBAL_SPARE g5 diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index 9c8eb2017d5b..31a0c336c185 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -7,6 +7,7 @@ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> @@ -14,7 +15,6 @@ #include <asm/ptrace.h> #include <asm/visasm.h> #include <asm/thread_info.h> -#include <asm/export.h> /* On entry: %o5=current FPRS value, %g7 is callers address */ /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */ diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S index 2d72de88af90..2a9e7c4fb260 100644 --- a/arch/sparc/lib/ashldi3.S +++ b/arch/sparc/lib/ashldi3.S @@ -6,8 +6,8 @@ * Copyright (C) 1999 David S. Miller (davem@redhat.com) */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text ENTRY(__ashldi3) diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S index 05dfda9f5005..8fd0b311722f 100644 --- a/arch/sparc/lib/ashrdi3.S +++ b/arch/sparc/lib/ashrdi3.S @@ -6,8 +6,8 @@ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text ENTRY(__ashrdi3) diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index 8245d4a97301..4f8cab2fb9cd 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -4,10 +4,10 @@ * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> #include <asm/backoff.h> -#include <asm/export.h> .text diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S index 9d647f977618..9c91cbb310e7 100644 --- a/arch/sparc/lib/bitops.S +++ b/arch/sparc/lib/bitops.S @@ -4,10 +4,10 @@ * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> #include <asm/backoff.h> -#include <asm/export.h> .text diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S index 76ddd1ff6833..5b92959a4d48 100644 --- a/arch/sparc/lib/blockops.S +++ b/arch/sparc/lib/blockops.S @@ -5,9 +5,9 @@ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/page.h> -#include <asm/export.h> /* Zero out 64 bytes of memory at (buf + offset). * Assumes %g1 contains zero. diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S index 87fec4cbe10c..2bfa44a6b25e 100644 --- a/arch/sparc/lib/bzero.S +++ b/arch/sparc/lib/bzero.S @@ -5,8 +5,8 @@ * Copyright (C) 2005 David S. Miller <davem@davemloft.net> */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 781e39b3c009..84ad709cbecb 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -14,8 +14,8 @@ * BSD4.4 portable checksum routine */ +#include <linux/export.h> #include <asm/errno.h> -#include <asm/export.h> #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \ ldd [buf + offset + 0x00], t0; \ diff --git a/arch/sparc/lib/checksum_64.S b/arch/sparc/lib/checksum_64.S index 9700ef1730df..32b626f3fe4d 100644 --- a/arch/sparc/lib/checksum_64.S +++ b/arch/sparc/lib/checksum_64.S @@ -14,7 +14,7 @@ * BSD4.4 portable checksum routine */ -#include <asm/export.h> +#include <linux/export.h> .text csum_partial_fix_alignment: diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S index 302d3454a994..e63458194f5a 100644 --- a/arch/sparc/lib/clear_page.S +++ b/arch/sparc/lib/clear_page.S @@ -5,13 +5,13 @@ * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) */ +#include <linux/export.h> #include <linux/pgtable.h> #include <asm/visasm.h> #include <asm/thread_info.h> #include <asm/page.h> #include <asm/spitfire.h> #include <asm/head.h> -#include <asm/export.h> /* What we used to do was lock a TLB entry into a specific * TLB slot, clear the page with interrupts disabled, then diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 66e90bf528e2..e23e6a69ff92 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -4,9 +4,9 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> -#include <asm/export.h> #define XCC xcc diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S index 5ebcfd479f4f..7a041f3ebc58 100644 --- a/arch/sparc/lib/copy_page.S +++ b/arch/sparc/lib/copy_page.S @@ -5,13 +5,13 @@ * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) */ +#include <linux/export.h> #include <asm/visasm.h> #include <asm/thread_info.h> #include <asm/page.h> #include <linux/pgtable.h> #include <asm/spitfire.h> #include <asm/head.h> -#include <asm/export.h> /* What we used to do was lock a TLB entry into a specific * TLB slot, clear the page with interrupts disabled, then diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S index 954572c78539..7bb2ef68881d 100644 --- a/arch/sparc/lib/copy_user.S +++ b/arch/sparc/lib/copy_user.S @@ -12,11 +12,11 @@ * Returns 0 if successful, otherwise count of bytes not copied yet */ +#include <linux/export.h> #include <asm/ptrace.h> #include <asm/asmmacro.h> #include <asm/page.h> #include <asm/thread_info.h> -#include <asm/export.h> /* Work around cpp -rob */ #define ALLOC #alloc diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S index d839956407a7..f968e83bc93b 100644 --- a/arch/sparc/lib/csum_copy.S +++ b/arch/sparc/lib/csum_copy.S @@ -4,7 +4,7 @@ * Copyright (C) 2005 David S. Miller <davem@davemloft.net> */ -#include <asm/export.h> +#include <linux/export.h> #ifdef __KERNEL__ #define GLOBAL_SPARE %g7 diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S index a7389409d9fa..4ba901acd572 100644 --- a/arch/sparc/lib/divdi3.S +++ b/arch/sparc/lib/divdi3.S @@ -5,7 +5,7 @@ This file is part of GNU CC. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 .globl __divdi3 diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S index 5a11d864fa05..3a9ad8ffdfe8 100644 --- a/arch/sparc/lib/ffs.S +++ b/arch/sparc/lib/ffs.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .register %g2,#scratch diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S index 06b8d300bcae..ccf97fb7d8cd 100644 --- a/arch/sparc/lib/fls.S +++ b/arch/sparc/lib/fls.S @@ -5,8 +5,8 @@ * and onward. */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text .register %g2, #scratch diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S index c83e22ae9586..87005b67d378 100644 --- a/arch/sparc/lib/fls64.S +++ b/arch/sparc/lib/fls64.S @@ -5,8 +5,8 @@ * and onward. */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text .register %g2, #scratch diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S index 0ddbbb031822..eebee59b0655 100644 --- a/arch/sparc/lib/hweight.S +++ b/arch/sparc/lib/hweight.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text .align 32 diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S index 531d89c9d5d9..7fa8fd4b795a 100644 --- a/arch/sparc/lib/ipcsum.S +++ b/arch/sparc/lib/ipcsum.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */ diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S index 9a1289a3fb28..47a39f4384a2 100644 --- a/arch/sparc/lib/locks.S +++ b/arch/sparc/lib/locks.S @@ -7,11 +7,11 @@ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/export.h> #include <asm/ptrace.h> #include <asm/psr.h> #include <asm/smp.h> #include <asm/spinlock.h> -#include <asm/export.h> .text .align 4 diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S index 509ca6682da8..09bf581a0ba5 100644 --- a/arch/sparc/lib/lshrdi3.S +++ b/arch/sparc/lib/lshrdi3.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> ENTRY(__lshrdi3) cmp %o2, 0 diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S index deba6fa0bc78..f7f7910eb41e 100644 --- a/arch/sparc/lib/mcount.S +++ b/arch/sparc/lib/mcount.S @@ -6,8 +6,8 @@ * This can also be tweaked for kernel stack overflow detection. */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> /* * This is the main variant and is called by C code. GCC's -pg option diff --git a/arch/sparc/lib/memcmp.S b/arch/sparc/lib/memcmp.S index a18076ef5af1..c87e8000feba 100644 --- a/arch/sparc/lib/memcmp.S +++ b/arch/sparc/lib/memcmp.S @@ -5,9 +5,9 @@ * Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asm.h> -#include <asm/export.h> .text ENTRY(memcmp) diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S index ee823d8c9215..57b1ae0f5924 100644 --- a/arch/sparc/lib/memcpy.S +++ b/arch/sparc/lib/memcpy.S @@ -8,7 +8,8 @@ * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ -#include <asm/export.h> +#include <linux/export.h> + #define FUNC(x) \ .globl x; \ .type x,@function; \ diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index 3132b6316144..543dda7b9dac 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -5,8 +5,8 @@ * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text ENTRY(memmove) /* o0=dst o1=src o2=len */ diff --git a/arch/sparc/lib/memscan_32.S b/arch/sparc/lib/memscan_32.S index c4c2d5b3a2e9..5386a3a20019 100644 --- a/arch/sparc/lib/memscan_32.S +++ b/arch/sparc/lib/memscan_32.S @@ -5,7 +5,7 @@ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ -#include <asm/export.h> +#include <linux/export.h> /* In essence, this is just a fancy strlen. */ diff --git a/arch/sparc/lib/memscan_64.S b/arch/sparc/lib/memscan_64.S index 36dd638905c3..70a4f21057f2 100644 --- a/arch/sparc/lib/memscan_64.S +++ b/arch/sparc/lib/memscan_64.S @@ -6,7 +6,7 @@ * Copyright (C) 1998 David S. Miller (davem@redhat.com) */ - #include <asm/export.h> +#include <linux/export.h> #define HI_MAGIC 0x8080808080808080 #define LO_MAGIC 0x0101010101010101 diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index eaff68213fdf..a33419dbb464 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -9,8 +9,8 @@ * clear_user. */ +#include <linux/export.h> #include <asm/ptrace.h> -#include <asm/export.h> /* Work around cpp -rob */ #define ALLOC #alloc diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S index 53054dee66d6..7e1e8cd30a22 100644 --- a/arch/sparc/lib/muldi3.S +++ b/arch/sparc/lib/muldi3.S @@ -5,7 +5,7 @@ This file is part of GNU CC. */ -#include <asm/export.h> +#include <linux/export.h> .text .align 4 .globl __muldi3 diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S index 2f187b299345..5bb4c122a2cf 100644 --- a/arch/sparc/lib/multi3.S +++ b/arch/sparc/lib/multi3.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text .align 4 diff --git a/arch/sparc/lib/strlen.S b/arch/sparc/lib/strlen.S index dd111bbad5df..27478b3f1647 100644 --- a/arch/sparc/lib/strlen.S +++ b/arch/sparc/lib/strlen.S @@ -6,9 +6,9 @@ * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asm.h> -#include <asm/export.h> #define LO_MAGIC 0x01010101 #define HI_MAGIC 0x80808080 diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S index 794733f036b6..387bbf621548 100644 --- a/arch/sparc/lib/strncmp_32.S +++ b/arch/sparc/lib/strncmp_32.S @@ -4,8 +4,8 @@ * generic strncmp routine. */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> .text ENTRY(strncmp) diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S index 3d37d65f674c..76c1207ecf5a 100644 --- a/arch/sparc/lib/strncmp_64.S +++ b/arch/sparc/lib/strncmp_64.S @@ -5,9 +5,9 @@ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> -#include <asm/export.h> .text ENTRY(strncmp) diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S index f6af7c7ee6fc..35461e3b2a9b 100644 --- a/arch/sparc/lib/xor.S +++ b/arch/sparc/lib/xor.S @@ -9,12 +9,12 @@ * Copyright (C) 2006 David S. Miller <davem@davemloft.net> */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #include <asm/dcu.h> #include <asm/spitfire.h> -#include <asm/export.h> /* * Requirements: diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 0d41c94ec3ac..b44d79d778c7 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -128,6 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, goto no_cache_flush; /* A real file page? */ + folio = page_folio(page); mapping = folio_flush_mapping(folio); if (!mapping) goto no_cache_flush; diff --git a/arch/um/Kbuild b/arch/um/Kbuild index a4e40e534e6a..6cf0c1e5927b 100644 --- a/arch/um/Kbuild +++ b/arch/um/Kbuild @@ -1 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only + +obj-y += kernel/ drivers/ os-Linux/ diff --git a/arch/um/Makefile b/arch/um/Makefile index da4d5256af2f..82f05f250634 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -18,15 +18,10 @@ else endif ARCH_DIR := arch/um -OS := $(shell uname -s) # We require bash because the vmlinux link and loader script cpp use bash # features. SHELL := /bin/bash -core-y += $(ARCH_DIR)/kernel/ \ - $(ARCH_DIR)/drivers/ \ - $(ARCH_DIR)/os-$(OS)/ - MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas HEADER_ARCH := $(SUBARCH) @@ -78,7 +73,7 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ #This will adjust *FLAGS accordingly to the platform. -include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS) +include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \ -I$(srctree)/$(HOST_DIR)/include/uapi \ @@ -155,4 +150,4 @@ archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -o -name '*.gcov' \) -type f -print | xargs rm -f -export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS DEV_NULL_PATH +export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING DEV_NULL_PATH diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index 630be793759e..e543cbac8792 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -34,6 +34,7 @@ CONFIG_TTY_CHAN=y CONFIG_XTERM_CHAN=y CONFIG_CON_CHAN="pts" CONFIG_SSL_CHAN="pts" +CONFIG_SOUND=m CONFIG_UML_SOUND=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig index 8540d3370272..939cb12318ca 100644 --- a/arch/um/configs/x86_64_defconfig +++ b/arch/um/configs/x86_64_defconfig @@ -32,6 +32,7 @@ CONFIG_TTY_CHAN=y CONFIG_XTERM_CHAN=y CONFIG_CON_CHAN="pts" CONFIG_SSL_CHAN="pts" +CONFIG_SOUND=m CONFIG_UML_SOUND=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 36911b1fddcf..b94b2618e7d8 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -111,24 +111,14 @@ config SSL_CHAN config UML_SOUND tristate "Sound support" + depends on SOUND + select SOUND_OSS_CORE help This option enables UML sound support. If enabled, it will pull in - soundcore and the UML hostaudio relay, which acts as a intermediary + the UML hostaudio relay, which acts as a intermediary between the host's dsp and mixer devices and the UML sound system. It is safe to say 'Y' here. -config SOUND - tristate - default UML_SOUND - -config SOUND_OSS_CORE - bool - default UML_SOUND - -config HOSTAUDIO - tristate - default UML_SOUND - endmenu menu "UML Network Devices" diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index a461a950f051..0e6af81096fd 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -54,7 +54,7 @@ obj-$(CONFIG_UML_NET) += net.o obj-$(CONFIG_MCONSOLE) += mconsole.o obj-$(CONFIG_MMAPPER) += mmapper_kern.o obj-$(CONFIG_BLK_DEV_UBD) += ubd.o -obj-$(CONFIG_HOSTAUDIO) += hostaudio.o +obj-$(CONFIG_UML_SOUND) += hostaudio.o obj-$(CONFIG_NULL_CHAN) += null.o obj-$(CONFIG_PORT_CHAN) += port.o obj-$(CONFIG_PTY_CHAN) += pty.o diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 5b064d360cb7..c42b793bce65 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -310,7 +310,7 @@ static const struct file_operations hostmixer_fops = { .release = hostmixer_release, }; -struct { +static struct { int dev_audio; int dev_mixer; } module_data; diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 02b0befd6763..b98545f3edb5 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -184,7 +184,7 @@ void line_flush_chars(struct tty_struct *tty) line_flush_buffer(tty); } -int line_write(struct tty_struct *tty, const unsigned char *buf, int len) +ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len) { struct line *line = tty->driver_data; unsigned long flags; diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h index f15be75a3bf3..e84fb9b4165e 100644 --- a/arch/um/drivers/line.h +++ b/arch/um/drivers/line.h @@ -64,8 +64,7 @@ extern void line_cleanup(struct tty_struct *tty); extern void line_hangup(struct tty_struct *tty); extern int line_setup(char **conf, unsigned nlines, char **def, char *init, char *name); -extern int line_write(struct tty_struct *tty, const unsigned char *buf, - int len); +extern ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len); extern unsigned int line_chars_in_buffer(struct tty_struct *tty); extern void line_flush_buffer(struct tty_struct *tty); extern void line_flush_chars(struct tty_struct *tty); diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index efa8b7304090..c52b3ff3c092 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -144,7 +144,7 @@ static void port_work_proc(struct work_struct *unused) local_irq_restore(flags); } -DECLARE_WORK(port_work, port_work_proc); +static DECLARE_WORK(port_work, port_work_proc); static irqreturn_t port_interrupt(int irq, void *data) { diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c index 2d9769237f08..0a6151ee9572 100644 --- a/arch/um/drivers/slirp_kern.c +++ b/arch/um/drivers/slirp_kern.c @@ -15,7 +15,7 @@ struct slirp_init { struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ }; -void slirp_init(struct net_device *dev, void *data) +static void slirp_init(struct net_device *dev, void *data) { struct uml_net_private *private; struct slirp_data *spri; diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 7699ca5f35d4..ffe2ee8a0246 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -544,6 +544,7 @@ static void um_pci_irq_vq_cb(struct virtqueue *vq) } } +#ifdef CONFIG_OF /* Copied from arch/x86/kernel/devicetree.c */ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) { @@ -562,6 +563,7 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) } return NULL; } +#endif static int um_pci_init_vqs(struct um_pci_device *dev) { diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index 50f11b7b4774..8011e51993d5 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -9,6 +9,7 @@ #include <asm/irq.h> #include <irq_kern.h> #include <os.h> +#include "xterm.h" struct xterm_wait { struct completion ready; diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h index f2dc817abb7c..44357fa6ee29 100644 --- a/arch/um/include/shared/irq_kern.h +++ b/arch/um/include/shared/irq_kern.h @@ -76,4 +76,5 @@ static inline bool um_irq_timetravel_handler_used(void) } void um_free_irq(int irq, void *dev_id); +void free_irqs(void); #endif diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index a8873d9bc28b..635d44606bfe 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -23,8 +23,6 @@ #include <linux/time-internal.h> -extern void free_irqs(void); - /* When epoll triggers we do not know why it did so * we can also have different IRQs for read and write. * This is why we keep a small irq_reg array for each fd - diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 95315d3474a2..5bfe5caaa444 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -335,9 +335,5 @@ define archhelp echo ' bzdisk/fdimage*/hdimage/isoimage also accept:' echo ' FDARGS="..." arguments for the booted kernel' echo ' FDINITRD=file initrd for the booted kernel' - echo '' - echo ' kvm_guest.config - Enable Kconfig items for running this kernel as a KVM guest' - echo ' xen.config - Enable Kconfig items for running this kernel as a Xen guest' - echo ' x86_debug.config - Enable tip tree debugging options for testing' endef diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 187e13b15e9a..97bfe5f0531f 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -175,8 +175,11 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask))) return true; - if (!hv_hypercall_pg) - return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; @@ -229,9 +232,15 @@ static bool __send_ipi_one(int cpu, int vector) trace_hyperv_send_ipi_one(cpu, vector); - if (!hv_hypercall_pg || (vp == VP_INVAL)) + if (vp == VP_INVAL) return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 507d98331e7c..783ed339f341 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -19,6 +19,7 @@ #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <asm/idtentry.h> +#include <asm/set_memory.h> #include <linux/kexec.h> #include <linux/version.h> #include <linux/vmalloc.h> @@ -52,7 +53,7 @@ static int hyperv_init_ghcb(void) void *ghcb_va; void **ghcb_base; - if (!hv_isolation_type_snp()) + if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp()) return 0; if (!hv_ghcb_pg) @@ -80,7 +81,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; + struct hv_vp_assist_page **hvp; int ret; ret = hv_common_cpu_init(cpu); @@ -90,6 +91,7 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; + hvp = &hv_vp_assist_page[cpu]; if (hv_root_partition) { /* * For root partition we get the hypervisor provided VP assist @@ -107,8 +109,21 @@ static int hv_cpu_init(unsigned int cpu) * in hv_cpu_die(), otherwise a CPU may not be stopped in the * case of CPU offlining and the VM will hang. */ - if (!*hvp) + if (!*hvp) { *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); + + /* + * Hyper-V should never specify a VM that is a Confidential + * VM and also running in the root partition. Root partition + * is blocked to run in Confidential VM. So only decrypt assist + * page in non-root partition here. + */ + if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) { + WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1)); + memset(*hvp, 0, PAGE_SIZE); + } + } + if (*hvp) msr.pfn = vmalloc_to_pfn(*hvp); @@ -379,6 +394,36 @@ static void __init hv_get_partition_id(void) local_irq_restore(flags); } +static u8 __init get_vtl(void) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; + struct hv_get_vp_registers_input *input; + struct hv_get_vp_registers_output *output; + unsigned long flags; + u64 ret; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = (struct hv_get_vp_registers_output *)input; + + memset(input, 0, struct_size(input, element, 1)); + input->header.partitionid = HV_PARTITION_ID_SELF; + input->header.vpindex = HV_VP_INDEX_SELF; + input->header.inputvtl = 0; + input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS; + + ret = hv_do_hypercall(control, input, output); + if (hv_result_success(ret)) { + ret = output->as64.low & HV_X64_VTL_MASK; + } else { + pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret); + ret = 0; + } + + local_irq_restore(flags); + return ret; +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -399,14 +444,24 @@ void __init hyperv_init(void) if (hv_common_init()) return; - hv_vp_assist_page = kcalloc(num_possible_cpus(), - sizeof(*hv_vp_assist_page), GFP_KERNEL); + /* + * The VP assist page is useless to a TDX guest: the only use we + * would have for it is lazy EOI, which can not be used with TDX. + */ + if (hv_isolation_type_tdx()) + hv_vp_assist_page = NULL; + else + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), + GFP_KERNEL); if (!hv_vp_assist_page) { ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - goto common_free; + + if (!hv_isolation_type_tdx()) + goto common_free; } - if (hv_isolation_type_snp()) { + if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { /* Negotiate GHCB Version. */ if (!hv_ghcb_negotiate_protocol()) hv_ghcb_terminate(SEV_TERM_SET_GEN, @@ -426,12 +481,32 @@ void __init hyperv_init(void) * Setup the hypercall page and enable hypercalls. * 1. Register the guest ID * 2. Enable the hypercall and register the hypercall page + * + * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg: + * when the hypercall input is a page, such a VM must pass a decrypted + * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page + * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present. + * + * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls, + * which are handled by the paravisor and the VM must use an encrypted + * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and + * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and + * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls: + * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8(). + * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e. + * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg; + * instead, hv_post_message() uses the post_msg_page, which is decrypted + * in such a VM and is only used in such a VM. */ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); + /* With the paravisor, the VM must also write the ID via GHCB/GHCI */ + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); + + /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) + goto skip_hypercall_pg_init; hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, @@ -472,6 +547,7 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } +skip_hypercall_pg_init: /* * Some versions of Hyper-V that provide IBT in guest VMs have a bug * in that there's no ENDBR64 instruction at the entry to the @@ -527,11 +603,15 @@ void __init hyperv_init(void) /* Query the VMs extended capability once, so that it can be cached. */ hv_query_ext_cap(0); + /* Find the VTL */ + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) + ms_hyperv.vtl = get_vtl(); + return; clean_guest_os_id: wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); cpuhp_remove_state(cpuhp); free_ghcb_page: free_percpu(hv_ghcb_pg); @@ -552,7 +632,7 @@ void hyperv_cleanup(void) /* Reset our OS id */ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); /* * Reset hypercall page reference before reset the page, @@ -615,6 +695,9 @@ bool hv_is_hyperv_initialized(void) if (x86_hyper_type != X86_HYPER_MS_HYPERV) return false; + /* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) + return true; /* * Verify that earlier initialization succeeded by checking * that the hypercall page is setup diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 28be6df88063..8c6bf07f7d2b 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -18,6 +18,11 @@ #include <asm/mshyperv.h> #include <asm/hypervisor.h> #include <asm/mtrr.h> +#include <asm/io_apic.h> +#include <asm/realmode.h> +#include <asm/e820/api.h> +#include <asm/desc.h> +#include <uapi/asm/vmx.h> #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -56,8 +61,10 @@ union hv_ghcb { } hypercall; } __packed __aligned(HV_HYP_PAGE_SIZE); +/* Only used in an SNP VM with the paravisor */ static u16 hv_ghcb_version __ro_after_init; +/* Functions only used in an SNP VM with the paravisor go here. */ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { union hv_ghcb *hv_ghcb; @@ -175,7 +182,7 @@ bool hv_ghcb_negotiate_protocol(void) return true; } -void hv_ghcb_msr_write(u64 msr, u64 value) +static void hv_ghcb_msr_write(u64 msr, u64 value) { union hv_ghcb *hv_ghcb; void **ghcb_base; @@ -203,9 +210,8 @@ void hv_ghcb_msr_write(u64 msr, u64 value) local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(hv_ghcb_msr_write); -void hv_ghcb_msr_read(u64 msr, u64 *value) +static void hv_ghcb_msr_read(u64 msr, u64 *value) { union hv_ghcb *hv_ghcb; void **ghcb_base; @@ -235,7 +241,217 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); + +/* Only used in a fully enlightened SNP VM, i.e. without the paravisor */ +static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); +static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE); +static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa); + +/* Functions only used in an SNP VM without the paravisor go here. */ + +#define hv_populate_vmcb_seg(seg, gdtr_base) \ +do { \ + if (seg.selector) { \ + seg.base = 0; \ + seg.limit = HV_AP_SEGMENT_LIMIT; \ + seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \ + seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \ + } \ +} while (0) \ + +static int snp_set_vmsa(void *va, bool vmsa) +{ + u64 attrs; + + /* + * Running at VMPL0 allows the kernel to change the VMSA bit for a page + * using the RMPADJUST instruction. However, for the instruction to + * succeed it must target the permissions of a lesser privileged + * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST + * instruction in the AMD64 APM Volume 3). + */ + attrs = 1; + if (vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); +} + +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) +{ + int err; + + err = snp_set_vmsa(vmsa, false); + if (err) + pr_err("clear VMSA page failed (%u), leaking page\n", err); + else + free_page((unsigned long)vmsa); +} + +int hv_snp_boot_ap(int cpu, unsigned long start_ip) +{ + struct sev_es_save_area *vmsa = (struct sev_es_save_area *) + __get_free_page(GFP_KERNEL | __GFP_ZERO); + struct sev_es_save_area *cur_vmsa; + struct desc_ptr gdtr; + u64 ret, retry = 5; + struct hv_enable_vp_vtl *start_vp_input; + unsigned long flags; + + if (!vmsa) + return -ENOMEM; + + native_store_gdt(&gdtr); + + vmsa->gdtr.base = gdtr.address; + vmsa->gdtr.limit = gdtr.size; + + asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); + hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); + + asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); + hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); + + asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); + hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); + + asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); + hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); + + vmsa->efer = native_read_msr(MSR_EFER); + + asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4)); + asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3)); + asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0)); + + vmsa->xcr0 = 1; + vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT; + vmsa->rip = (u64)secondary_startup_64_no_verify; + vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE]; + + /* + * Set the SNP-specific fields for this VMSA: + * VMPL level + * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) + */ + vmsa->vmpl = 0; + vmsa->sev_features = sev_status >> 2; + + ret = snp_set_vmsa(vmsa, true); + if (!ret) { + pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); + free_page((u64)vmsa); + return ret; + } + + local_irq_save(flags); + start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; + memset(start_vp_input, 0, sizeof(*start_vp_input)); + start_vp_input->partition_id = -1; + start_vp_input->vp_index = cpu; + start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; + *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; + + do { + ret = hv_do_hypercall(HVCALL_START_VP, + start_vp_input, NULL); + } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--); + + local_irq_restore(flags); + + if (!hv_result_success(ret)) { + pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret); + snp_cleanup_vmsa(vmsa); + vmsa = NULL; + } + + cur_vmsa = per_cpu(hv_sev_vmsa, cpu); + /* Free up any previous VMSA page */ + if (cur_vmsa) + snp_cleanup_vmsa(cur_vmsa); + + /* Record the current VMSA page */ + per_cpu(hv_sev_vmsa, cpu) = vmsa; + + return ret; +} + +#else +static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} +static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +#ifdef CONFIG_INTEL_TDX_GUEST +static void hv_tdx_msr_write(u64 msr, u64 val) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = EXIT_REASON_MSR_WRITE, + .r12 = msr, + .r13 = val, + }; + + u64 ret = __tdx_hypercall(&args); + + WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret); +} + +static void hv_tdx_msr_read(u64 msr, u64 *val) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = EXIT_REASON_MSR_READ, + .r12 = msr, + }; + + u64 ret = __tdx_hypercall_ret(&args); + + if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret)) + *val = 0; + else + *val = args.r11; +} + +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) +{ + struct tdx_hypercall_args args = { }; + + args.r10 = control; + args.rdx = param1; + args.r8 = param2; + + (void)__tdx_hypercall_ret(&args); + + return args.r11; +} + +#else +static inline void hv_tdx_msr_write(u64 msr, u64 value) {} +static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} +#endif /* CONFIG_INTEL_TDX_GUEST */ + +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) +void hv_ivm_msr_write(u64 msr, u64 value) +{ + if (!ms_hyperv.paravisor_present) + return; + + if (hv_isolation_type_tdx()) + hv_tdx_msr_write(msr, value); + else if (hv_isolation_type_snp()) + hv_ghcb_msr_write(msr, value); +} + +void hv_ivm_msr_read(u64 msr, u64 *value) +{ + if (!ms_hyperv.paravisor_present) + return; + + if (hv_isolation_type_tdx()) + hv_tdx_msr_read(msr, value); + else if (hv_isolation_type_snp()) + hv_ghcb_msr_read(msr, value); +} /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. @@ -358,13 +574,34 @@ static bool hv_is_private_mmio(u64 addr) void __init hv_vtom_init(void) { + enum hv_isolation_type type = hv_get_isolation_type(); + + switch (type) { + case HV_ISOLATION_TYPE_VBS: + fallthrough; /* * By design, a VM using vTOM doesn't see the SEV setting, * so SEV initialization is bypassed and sev_status isn't set. * Set it here to indicate a vTOM VM. + * + * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is + * defined as 0ULL, to which we can't assigned a value. */ - sev_status = MSR_AMD64_SNP_VTOM; - cc_vendor = CC_VENDOR_AMD; +#ifdef CONFIG_AMD_MEM_ENCRYPT + case HV_ISOLATION_TYPE_SNP: + sev_status = MSR_AMD64_SNP_VTOM; + cc_vendor = CC_VENDOR_AMD; + break; +#endif + + case HV_ISOLATION_TYPE_TDX: + cc_vendor = CC_VENDOR_INTEL; + break; + + default: + panic("hv_vtom_init: unsupported isolation type %d\n", type); + } + cc_set_mask(ms_hyperv.shared_gpa_boundary); physical_mask &= ms_hyperv.shared_gpa_boundary - 1; @@ -377,7 +614,7 @@ void __init hv_vtom_init(void) mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } -#endif /* CONFIG_AMD_MEM_ENCRYPT */ +#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ enum hv_isolation_type hv_get_isolation_type(void) { @@ -405,10 +642,20 @@ bool hv_is_isolation_supported(void) DEFINE_STATIC_KEY_FALSE(isolation_type_snp); /* - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based * isolation VM. */ bool hv_isolation_type_snp(void) { return static_branch_unlikely(&isolation_type_snp); } + +DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); +/* + * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based + * isolated VM. + */ +bool hv_isolation_type_tdx(void) +{ + return static_branch_unlikely(&isolation_type_tdx); +} diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h index 36aec57ea7a3..fa918f01333e 100644 --- a/arch/x86/include/asm/audit.h +++ b/arch/x86/include/asm/audit.h @@ -4,4 +4,11 @@ int ia32_classify_syscall(unsigned int syscall); +extern unsigned ia32_dir_class[]; +extern unsigned ia32_write_class[]; +extern unsigned ia32_read_class[]; +extern unsigned ia32_chattr_class[]; +extern unsigned ia32_signal_class[]; + + #endif /* _ASM_X86_AUDIT_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2061ed1c398f..58cb9495e40f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -439,6 +439,7 @@ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cea95dcd27c2..2ff26f53cd62 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -169,7 +169,8 @@ enum hv_isolation_type { HV_ISOLATION_TYPE_NONE = 0, HV_ISOLATION_TYPE_VBS = 1, - HV_ISOLATION_TYPE_SNP = 2 + HV_ISOLATION_TYPE_SNP = 2, + HV_ISOLATION_TYPE_TDX = 3 }; /* Hyper-V specific model specific registers (MSRs) */ @@ -301,6 +302,13 @@ enum hv_isolation_type { #define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT #define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC +/* + * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and + * there is not associated MSR address. + */ +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#define HV_X64_VTL_MASK GENMASK(3, 0) + /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { VMBUS_PAGE_NOT_VISIBLE = 0, diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 3be6a98751f0..c9f6a6c5de3c 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image); #endif #endif -typedef void crash_vmclear_fn(void); -extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); #ifdef CONFIG_CRASH_HOTPLUG diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3bc146dfd38d..1a4def36d5bb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry; * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page * also includes TDP pages) to determine whether or not a page can be used in * the given MMU context. This is a subset of the overall kvm_cpu_role to - * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating - * 2 bytes per gfn instead of 4 bytes per gfn. + * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows + * allocating 2 bytes per gfn instead of 4 bytes per gfn. * * Upper-level shadow pages having gptes are tracked for write-protection via - * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create - * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise - * gfn_track will overflow and explosions will ensure. + * gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must + * not create more than 2^16-1 upper-level shadow pages at a single gfn, + * otherwise gfn_write_track will overflow and explosions will ensue. * * A unique shadow page (SP) for a gfn is created if and only if an existing SP * cannot be reused. The ability to reuse a SP is tracked by its role, which @@ -746,7 +746,6 @@ struct kvm_vcpu_arch { u64 smi_count; bool at_instruction_boundary; bool tpr_access_reporting; - bool xsaves_enabled; bool xfd_no_write_intercept; u64 ia32_xss; u64 microcode_version; @@ -831,6 +830,25 @@ struct kvm_vcpu_arch { struct kvm_cpuid_entry2 *cpuid_entries; struct kvm_hypervisor_cpuid kvm_cpuid; + /* + * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly + * when "struct kvm_vcpu_arch" is no longer defined in an + * arch/x86/include/asm header. The max is mostly arbitrary, i.e. + * can be increased as necessary. + */ +#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG + + /* + * Track whether or not the guest is allowed to use features that are + * governed by KVM, where "governed" means KVM needs to manage state + * and/or explicitly enable the feature in hardware. Typically, but + * not always, governed features can be used by the guest if and only + * if both KVM and userspace want to expose the feature to the guest. + */ + struct { + DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES); + } governed_features; + u64 reserved_gpa_bits; int maxphyaddr; @@ -1005,7 +1023,7 @@ struct kvm_lpage_info { struct kvm_arch_memory_slot { struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; - unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; + unsigned short *gfn_write_track; }; /* @@ -1247,8 +1265,9 @@ struct kvm_arch { * create an NX huge page (without hanging the guest). */ struct list_head possible_nx_huge_pages; - struct kvm_page_track_notifier_node mmu_sp_tracker; +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING struct kvm_page_track_notifier_head track_notifier_head; +#endif /* * Protects marking pages unsync during page faults, as TDP MMU page * faults only take mmu_lock for read. For simplicity, the unsync @@ -1655,8 +1674,8 @@ struct kvm_x86_ops { u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); - void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); + void (*write_tsc_offset)(struct kvm_vcpu *vcpu); + void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); /* * Retrieve somewhat arbitrary exit information. Intended to @@ -1795,8 +1814,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void) #define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { if (kvm_x86_ops.flush_remote_tlbs && !static_call(kvm_x86_flush_remote_tlbs)(kvm)) @@ -1805,6 +1824,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) return -ENOTSUPP; } +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE + #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) @@ -1833,7 +1854,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot); -void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index eb186bc57f6a..3d040741044b 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -2,11 +2,9 @@ #ifndef _ASM_X86_KVM_PAGE_TRACK_H #define _ASM_X86_KVM_PAGE_TRACK_H -enum kvm_page_track_mode { - KVM_PAGE_TRACK_WRITE, - KVM_PAGE_TRACK_MAX, -}; +#include <linux/kvm_types.h> +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING /* * The notifier represented by @kvm_page_track_notifier_node is linked into * the head which will be notified when guest is triggering the track event. @@ -26,54 +24,39 @@ struct kvm_page_track_notifier_node { * It is called when guest is writing the write-tracked page * and write emulation is finished at that time. * - * @vcpu: the vcpu where the write access happened. * @gpa: the physical address written by guest. * @new: the data was written to the address. * @bytes: the written length. * @node: this node */ - void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes, struct kvm_page_track_notifier_node *node); + void (*track_write)(gpa_t gpa, const u8 *new, int bytes, + struct kvm_page_track_notifier_node *node); + /* - * It is called when memory slot is being moved or removed - * users can drop write-protection for the pages in that memory slot + * Invoked when a memory region is removed from the guest. Or in KVM + * terms, when a memslot is deleted. * - * @kvm: the kvm where memory slot being moved or removed - * @slot: the memory slot being moved or removed - * @node: this node + * @gfn: base gfn of the region being removed + * @nr_pages: number of pages in the to-be-removed region + * @node: this node */ - void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot, - struct kvm_page_track_notifier_node *node); + void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages, + struct kvm_page_track_notifier_node *node); }; -int kvm_page_track_init(struct kvm *kvm); -void kvm_page_track_cleanup(struct kvm *kvm); +int kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); -bool kvm_page_track_write_tracking_enabled(struct kvm *kvm); -int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot); - -void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); -int kvm_page_track_create_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, - unsigned long npages); - -void kvm_slot_page_track_add_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode); -void kvm_slot_page_track_remove_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode); -bool kvm_slot_page_track_is_active(struct kvm *kvm, - const struct kvm_memory_slot *slot, - gfn_t gfn, enum kvm_page_track_mode mode); +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn); +int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn); +#else +/* + * Allow defining a node in a structure even if page tracking is disabled, e.g. + * to play nice with testing headers via direct inclusion from the command line. + */ +struct kvm_page_track_notifier_node {}; +#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */ -void -kvm_page_track_register_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n); -void -kvm_page_track_unregister_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n); -void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes); -void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot); #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fa83d88e4c99..033b53f993c6 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -26,6 +26,7 @@ union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); +DECLARE_STATIC_KEY_FALSE(isolation_type_tdx); typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, @@ -40,6 +41,7 @@ static inline unsigned char hv_get_nmi_reason(void) #if IS_ENABLED(CONFIG_HYPERV) extern int hyperv_init_cpuhp; +extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; @@ -47,10 +49,25 @@ extern u64 hv_current_partition_id; extern union hv_ghcb * __percpu *hv_ghcb_pg; +bool hv_isolation_type_snp(void); +bool hv_isolation_type_tdx(void); +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); + +/* + * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA + * to start AP in enlightened SEV guest. + */ +#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL +#define HV_AP_SEGMENT_LIMIT 0xffffffff + int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); +/* + * If the hypercall involves no input or output parameters, the hypervisor + * ignores the corresponding GPA pointer. + */ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; @@ -58,6 +75,19 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) u64 hv_status; #ifdef CONFIG_X86_64 + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) + return hv_tdx_hypercall(control, input_address, output_address); + + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { + __asm__ __volatile__("mov %4, %%r8\n" + "vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input_address) + : "r" (output_address) + : "cc", "memory", "r8", "r9", "r10", "r11"); + return hv_status; + } + if (!hv_hypercall_pg) return U64_MAX; @@ -101,7 +131,16 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) u64 hv_status; #ifdef CONFIG_X86_64 - { + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) + return hv_tdx_hypercall(control, input1, 0); + + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { + __asm__ __volatile__( + "vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + :: "cc", "r8", "r9", "r10", "r11"); + } else { __asm__ __volatile__(CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) @@ -146,7 +185,17 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) u64 hv_status; #ifdef CONFIG_X86_64 - { + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) + return hv_tdx_hypercall(control, input1, input2); + + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { + __asm__ __volatile__("mov %4, %%r8\n" + "vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + : "r" (input2) + : "cc", "r8", "r9", "r10", "r11"); + } else { __asm__ __volatile__("mov %4, %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -225,20 +274,24 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); #ifdef CONFIG_AMD_MEM_ENCRYPT -void hv_ghcb_msr_write(u64 msr, u64 value); -void hv_ghcb_msr_read(u64 msr, u64 *value); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -void hv_vtom_init(void); +int hv_snp_boot_ap(int cpu, unsigned long start_ip); #else -static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} -static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline void hv_vtom_init(void) {} +static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } #endif -extern bool hv_isolation_type_snp(void); +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) +void hv_vtom_init(void); +void hv_ivm_msr_write(u64 msr, u64 value); +void hv_ivm_msr_read(u64 msr, u64 *value); +#else +static inline void hv_vtom_init(void) {} +static inline void hv_ivm_msr_write(u64 msr, u64 value) {} +static inline void hv_ivm_msr_read(u64 msr, u64 *value) {} +#endif static inline bool hv_is_synic_reg(unsigned int reg) { diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 9177b4354c3f..6536873f8fc0 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,7 +25,14 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 +#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) +typedef void (cpu_emergency_virt_cb)(void); +void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); +void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_disable_virtualization(void); +#else +static inline void cpu_emergency_disable_virtualization(void) {} +#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index e7c7379d6ac7..19bf955b67e0 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_ #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) +#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5) struct vmcb_seg { u16 selector; @@ -345,7 +346,7 @@ struct vmcb_save_area { u64 last_excp_from; u64 last_excp_to; u8 reserved_0x298[72]; - u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ + u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ } __packed; /* Save area definition for SEV-ES and SEV-SNP guests */ @@ -512,7 +513,7 @@ struct ghcb { } __packed; -#define EXPECTED_VMCB_SAVE_AREA_SIZE 740 +#define EXPECTED_VMCB_SAVE_AREA_SIZE 744 #define EXPECTED_GHCB_SAVE_AREA_SIZE 1032 #define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648 #define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h deleted file mode 100644 index 3b12e6b99412..000000000000 --- a/arch/x86/include/asm/virtext.h +++ /dev/null @@ -1,154 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* CPU virtualization extensions handling - * - * This should carry the code for handling CPU virtualization extensions - * that needs to live in the kernel core. - * - * Author: Eduardo Habkost <ehabkost@redhat.com> - * - * Copyright (C) 2008, Red Hat Inc. - * - * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. - */ -#ifndef _ASM_X86_VIRTEX_H -#define _ASM_X86_VIRTEX_H - -#include <asm/processor.h> - -#include <asm/vmx.h> -#include <asm/svm.h> -#include <asm/tlbflush.h> - -/* - * VMX functions: - */ - -static inline int cpu_has_vmx(void) -{ - unsigned long ecx = cpuid_ecx(1); - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ -} - - -/** - * cpu_vmxoff() - Disable VMX on the current CPU - * - * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) - * - * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to - * atomically track post-VMXON state, e.g. this may be called in NMI context. - * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. - * faults are guaranteed to be due to the !post-VMXON check unless the CPU is - * magically in RM, VM86, compat mode, or at CPL>0. - */ -static inline int cpu_vmxoff(void) -{ - asm_volatile_goto("1: vmxoff\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "cc", "memory" : fault); - - cr4_clear_bits(X86_CR4_VMXE); - return 0; - -fault: - cr4_clear_bits(X86_CR4_VMXE); - return -EIO; -} - -static inline int cpu_vmx_enabled(void) -{ - return __read_cr4() & X86_CR4_VMXE; -} - -/** Disable VMX if it is enabled on the current CPU - * - * You shouldn't call this if cpu_has_vmx() returns 0. - */ -static inline void __cpu_emergency_vmxoff(void) -{ - if (cpu_vmx_enabled()) - cpu_vmxoff(); -} - -/** Disable VMX if it is supported and enabled on the current CPU - */ -static inline void cpu_emergency_vmxoff(void) -{ - if (cpu_has_vmx()) - __cpu_emergency_vmxoff(); -} - - - - -/* - * SVM functions: - */ - -/** Check if the CPU has SVM support - * - * You can use the 'msg' arg to get a message describing the problem, - * if the function returns zero. Simply pass NULL if you are not interested - * on the messages; gcc should take care of not generating code for - * the messages on this case. - */ -static inline int cpu_has_svm(const char **msg) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) { - if (msg) - *msg = "not amd or hygon"; - return 0; - } - - if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) { - if (msg) - *msg = "can't execute cpuid_8000000a"; - return 0; - } - - if (!boot_cpu_has(X86_FEATURE_SVM)) { - if (msg) - *msg = "svm not available"; - return 0; - } - return 1; -} - - -/** Disable SVM on the current CPU - * - * You should call this only if cpu_has_svm() returned true. - */ -static inline void cpu_svm_disable(void) -{ - uint64_t efer; - - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - if (efer & EFER_SVME) { - /* - * Force GIF=1 prior to disabling SVM to ensure INIT and NMI - * aren't blocked, e.g. if a fatal error occurred between CLGI - * and STGI. Note, STGI may #UD if SVM is disabled from NMI - * context between reading EFER and executing STGI. In that - * case, GIF must already be set, otherwise the NMI would have - * been blocked, so just eat the fault. - */ - asm_volatile_goto("1: stgi\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "memory" : fault); -fault: - wrmsrl(MSR_EFER, efer & ~EFER_SVME); - } -} - -/** Makes sure SVM is disabled, if it is supported on the CPU - */ -static inline void cpu_emergency_svm_disable(void) -{ - if (cpu_has_svm(NULL)) - cpu_svm_disable(); -} - -#endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 0d02c4aafa6f..0e73616b82f3 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,7 +71,7 @@ #define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) #define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) #define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) -#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES) +#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES) #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) #define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c6c15ce1952f..5934ee5bc087 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -239,12 +239,6 @@ extern int (*console_blank_hook)(int); #endif /* - * The apm_bios device is one of the misc char devices. - * This is its minor number. - */ -#define APM_MINOR_DEV 134 - -/* * Various options can be changed at boot time as follows: * (We allow underscores for compatibility with the modules code) * apm=on/off enable/disable APM diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c index 44c3601cfdc4..190c120f4285 100644 --- a/arch/x86/kernel/audit_64.c +++ b/arch/x86/kernel/audit_64.c @@ -63,11 +63,6 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { #ifdef CONFIG_IA32_EMULATION - extern __u32 ia32_dir_class[]; - extern __u32 ia32_write_class[]; - extern __u32 ia32_read_class[]; - extern __u32 ia32_chattr_class[]; - extern __u32 ia32_signal_class[]; audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class); audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6d75fab10161..382d4e6b848d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1280,11 +1280,11 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 0100468e72ca..e6bba12c759c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -32,6 +32,7 @@ #include <asm/nmi.h> #include <clocksource/hyperv_timer.h> #include <asm/numa.h> +#include <asm/svm.h> /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -39,6 +40,10 @@ bool hv_root_partition; bool hv_nested; struct ms_hyperv_info ms_hyperv; +/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ +bool hyperv_paravisor_present __ro_after_init; +EXPORT_SYMBOL_GPL(hyperv_paravisor_present); + #if IS_ENABLED(CONFIG_HYPERV) static inline unsigned int hv_get_nested_reg(unsigned int reg) { @@ -65,8 +70,8 @@ u64 hv_get_non_nested_register(unsigned int reg) { u64 value; - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) - hv_ghcb_msr_read(reg, &value); + if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) + hv_ivm_msr_read(reg, &value); else rdmsrl(reg, value); return value; @@ -75,8 +80,8 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register); void hv_set_non_nested_register(unsigned int reg, u64 value) { - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { - hv_ghcb_msr_write(reg, value); + if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) { + hv_ivm_msr_write(reg, value); /* Write proxy bit via wrmsl instruction */ if (hv_is_sint_reg(reg)) @@ -295,6 +300,15 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) native_smp_prepare_cpus(max_cpus); + /* + * Override wakeup_secondary_cpu_64 callback for SEV-SNP + * enlightened guest. + */ + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) { + apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap; + return; + } + #ifdef CONFIG_X86_64 for_each_present_cpu(i) { if (i == 0) @@ -313,6 +327,26 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) } #endif +/* + * When a fully enlightened TDX VM runs on Hyper-V, the firmware sets the + * HW_REDUCED flag: refer to acpi_tb_create_local_fadt(). Consequently ttyS0 + * interrupts can't work because request_irq() -> ... -> irq_to_desc() returns + * NULL for ttyS0. This happens because mp_config_acpi_legacy_irqs() sees a + * nr_legacy_irqs() of 0, so it doesn't initialize the array 'mp_irqs[]', and + * later setup_IO_APIC_irqs() -> find_irq_entry() fails to find the legacy irqs + * from the array and hence doesn't create the necessary irq description info. + * + * Clone arch/x86/kernel/acpi/boot.c: acpi_generic_reduced_hw_init() here, + * except don't change 'legacy_pic', which keeps its default value + * 'default_legacy_pic'. This way, mp_config_acpi_legacy_irqs() sees a non-zero + * nr_legacy_irqs() and eventually serial console interrupts works properly. + */ +static void __init reduced_hw_init(void) +{ + x86_init.timers.timer_init = x86_init_noop; + x86_init.irqs.pre_vector_init = x86_init_noop; +} + static void __init ms_hyperv_init_platform(void) { int hv_max_functions_eax; @@ -399,11 +433,33 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.shared_gpa_boundary = BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + hyperv_paravisor_present = !!ms_hyperv.paravisor_present; + pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); - if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) + + if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { static_branch_enable(&isolation_type_snp); + } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { + static_branch_enable(&isolation_type_tdx); + + /* A TDX VM must use x2APIC and doesn't use lazy EOI. */ + ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; + + if (!ms_hyperv.paravisor_present) { + /* To be supported: more work is required. */ + ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; + + /* HV_REGISTER_CRASH_CTL is unsupported. */ + ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + + /* Don't trust Hyper-V's TLB-flushing hypercalls. */ + ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + + x86_init.acpi.reduced_hw_early_init = reduced_hw_init; + } + } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { @@ -473,7 +529,7 @@ static void __init ms_hyperv_init_platform(void) #if IS_ENABLED(CONFIG_HYPERV) if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || - (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)) + ms_hyperv.paravisor_present) hv_vtom_init(); /* * Setup the hook to get control post apic initialization. @@ -497,7 +553,8 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; - if (hv_root_partition) + if (hv_root_partition || + (!ms_hyperv.paravisor_present && hv_isolation_type_snp())) smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; # endif @@ -560,6 +617,22 @@ static bool __init ms_hyperv_msi_ext_dest_id(void) return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE; } +#ifdef CONFIG_AMD_MEM_ENCRYPT +static void hv_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs) +{ + /* RAX and CPL are already in the GHCB */ + ghcb_set_rcx(ghcb, regs->cx); + ghcb_set_rdx(ghcb, regs->dx); + ghcb_set_r8(ghcb, regs->r8); +} + +static bool hv_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) +{ + /* No checking of the return state needed */ + return true; +} +#endif + const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, @@ -567,4 +640,8 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init.x2apic_available = ms_hyperv_x2apic_available, .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, .init.init_platform = ms_hyperv_init_platform, +#ifdef CONFIG_AMD_MEM_ENCRYPT + .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, + .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, +#endif }; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 458cb7419502..8f559eeae08e 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -45,7 +45,21 @@ static u64 prefetch_disable_bits; */ static unsigned int pseudo_lock_major; static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0); -static struct class *pseudo_lock_class; + +static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) +{ + const struct rdtgroup *rdtgrp; + + rdtgrp = dev_get_drvdata(dev); + if (mode) + *mode = 0600; + return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); +} + +static const struct class pseudo_lock_class = { + .name = "pseudo_lock", + .devnode = pseudo_lock_devnode, +}; /** * get_prefetch_disable_bits - prefetch disable bits of supported platforms @@ -1353,7 +1367,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) &pseudo_measure_fops); } - dev = device_create(pseudo_lock_class, NULL, + dev = device_create(&pseudo_lock_class, NULL, MKDEV(pseudo_lock_major, new_minor), rdtgrp, "%s", rdtgrp->kn->name); @@ -1383,7 +1397,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) goto out; out_device: - device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor)); + device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor)); out_debugfs: debugfs_remove_recursive(plr->debugfs_dir); pseudo_lock_minor_release(new_minor); @@ -1424,7 +1438,7 @@ void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) pseudo_lock_cstates_relax(plr); debugfs_remove_recursive(rdtgrp->plr->debugfs_dir); - device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor)); + device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor)); pseudo_lock_minor_release(plr->minor); free: @@ -1560,16 +1574,6 @@ static const struct file_operations pseudo_lock_dev_fops = { .mmap = pseudo_lock_dev_mmap, }; -static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) -{ - const struct rdtgroup *rdtgrp; - - rdtgrp = dev_get_drvdata(dev); - if (mode) - *mode = 0600; - return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); -} - int rdt_pseudo_lock_init(void) { int ret; @@ -1580,21 +1584,18 @@ int rdt_pseudo_lock_init(void) pseudo_lock_major = ret; - pseudo_lock_class = class_create("pseudo_lock"); - if (IS_ERR(pseudo_lock_class)) { - ret = PTR_ERR(pseudo_lock_class); + ret = class_register(&pseudo_lock_class); + if (ret) { unregister_chrdev(pseudo_lock_major, "pseudo_lock"); return ret; } - pseudo_lock_class->devnode = pseudo_lock_devnode; return 0; } void rdt_pseudo_lock_release(void) { - class_destroy(pseudo_lock_class); - pseudo_lock_class = NULL; + class_unregister(&pseudo_lock_class); unregister_chrdev(pseudo_lock_major, "pseudo_lock"); pseudo_lock_major = 0; } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index bdc0d5539b57..dae436253de4 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -40,7 +40,6 @@ #include <asm/processor.h> #include <asm/msr.h> -static struct class *cpuid_class; static enum cpuhp_state cpuhp_cpuid_state; struct cpuid_regs_done { @@ -124,26 +123,31 @@ static const struct file_operations cpuid_fops = { .open = cpuid_open, }; +static char *cpuid_devnode(const struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); +} + +static const struct class cpuid_class = { + .name = "cpuid", + .devnode = cpuid_devnode, +}; + static int cpuid_device_create(unsigned int cpu) { struct device *dev; - dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), NULL, + dev = device_create(&cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), NULL, "cpu%d", cpu); return PTR_ERR_OR_ZERO(dev); } static int cpuid_device_destroy(unsigned int cpu) { - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + device_destroy(&cpuid_class, MKDEV(CPUID_MAJOR, cpu)); return 0; } -static char *cpuid_devnode(const struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); -} - static int __init cpuid_init(void) { int err; @@ -154,12 +158,9 @@ static int __init cpuid_init(void) CPUID_MAJOR); return -EBUSY; } - cpuid_class = class_create("cpuid"); - if (IS_ERR(cpuid_class)) { - err = PTR_ERR(cpuid_class); + err = class_register(&cpuid_class); + if (err) goto out_chrdev; - } - cpuid_class->devnode = cpuid_devnode; err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/cpuid:online", cpuid_device_create, cpuid_device_destroy); @@ -170,7 +171,7 @@ static int __init cpuid_init(void) return 0; out_class: - class_destroy(cpuid_class); + class_unregister(&cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); return err; @@ -180,7 +181,7 @@ module_init(cpuid_init); static void __exit cpuid_exit(void) { cpuhp_remove_state(cpuhp_cpuid_state); - class_destroy(cpuid_class); + class_unregister(&cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); } module_exit(cpuid_exit); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 587c7743fd21..c92d88680dbf 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -48,27 +48,6 @@ struct crash_memmap_data { unsigned int type; }; -/* - * This is used to VMCLEAR all VMCSs loaded on the - * processor. And when loading kvm_intel module, the - * callback function pointer will be assigned. - * - * protected by rcu. - */ -crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; -EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); - -static inline void cpu_crash_vmclear_loaded_vmcss(void) -{ - crash_vmclear_fn *do_vmclear_operation = NULL; - - rcu_read_lock(); - do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); - if (do_vmclear_operation) - do_vmclear_operation(); - rcu_read_unlock(); -} - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) @@ -76,11 +55,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) crash_save_cpu(regs, cpu); /* - * VMCLEAR VMCSs loaded on all cpus if needed. - */ - cpu_crash_vmclear_loaded_vmcss(); - - /* * Disable Intel PT to stop its logging */ cpu_emergency_stop_pt(); @@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); - /* - * VMCLEAR VMCSs loaded on this cpu if needed. - */ - cpu_crash_vmclear_loaded_vmcss(); - cpu_emergency_disable_virtualization(); /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 41dac93b8ea4..cadf68737e6b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -71,7 +71,7 @@ static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, - [XFEATURE_PKRU] = X86_FEATURE_PKU, + [XFEATURE_PKRU] = X86_FEATURE_OSPKE, [XFEATURE_PASID] = X86_FEATURE_ENQCMD, [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 4d8aff05a509..30a55207c000 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -231,9 +231,7 @@ struct irq_chip i8259A_chip = { }; static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ +/* ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ */ static void restore_ELCR(char *trigger) { outb(trigger[0], PIC_ELCR1); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7bb17d37db01..e17c16c54a37 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -39,7 +39,6 @@ #include <asm/cpufeature.h> #include <asm/msr.h> -static struct class *msr_class; static enum cpuhp_state cpuhp_msr_state; enum allow_write_msrs { @@ -235,26 +234,31 @@ static const struct file_operations msr_fops = { .compat_ioctl = msr_ioctl, }; +static char *msr_devnode(const struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); +} + +static const struct class msr_class = { + .name = "msr", + .devnode = msr_devnode, +}; + static int msr_device_create(unsigned int cpu) { struct device *dev; - dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL, + dev = device_create(&msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL, "msr%d", cpu); return PTR_ERR_OR_ZERO(dev); } static int msr_device_destroy(unsigned int cpu) { - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); + device_destroy(&msr_class, MKDEV(MSR_MAJOR, cpu)); return 0; } -static char *msr_devnode(const struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); -} - static int __init msr_init(void) { int err; @@ -263,12 +267,9 @@ static int __init msr_init(void) pr_err("unable to get major %d for msr\n", MSR_MAJOR); return -EBUSY; } - msr_class = class_create("msr"); - if (IS_ERR(msr_class)) { - err = PTR_ERR(msr_class); + err = class_register(&msr_class); + if (err) goto out_chrdev; - } - msr_class->devnode = msr_devnode; err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/msr:online", msr_device_create, msr_device_destroy); @@ -278,7 +279,7 @@ static int __init msr_init(void) return 0; out_class: - class_destroy(msr_class); + class_unregister(&msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); return err; @@ -288,7 +289,7 @@ module_init(msr_init); static void __exit msr_exit(void) { cpuhp_remove_state(cpuhp_msr_state); - class_destroy(msr_class); + class_unregister(&msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); } module_exit(msr_exit) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 3adbe97015c1..830425e6d38e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -22,7 +22,6 @@ #include <asm/reboot_fixups.h> #include <asm/reboot.h> #include <asm/pci_x86.h> -#include <asm/virtext.h> #include <asm/cpu.h> #include <asm/nmi.h> #include <asm/smp.h> @@ -530,9 +529,54 @@ static inline void kb_wait(void) static inline void nmi_shootdown_cpus_on_restart(void); +#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) +/* RCU-protected callback to disable virtualization prior to reboot. */ +static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; + +void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback))) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, callback); +} +EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); + +void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback)) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); + +/* + * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during + * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if + * GIF=0, i.e. if the crash occurred between CLGI and STGI. + */ +void cpu_emergency_disable_virtualization(void) +{ + cpu_emergency_virt_cb *callback; + + /* + * IRQs must be disabled as KVM enables virtualization in hardware via + * function call IPIs, i.e. IRQs need to be disabled to guarantee + * virtualization stays disabled. + */ + lockdep_assert_irqs_disabled(); + + rcu_read_lock(); + callback = rcu_dereference(cpu_emergency_virt_callback); + if (callback) + callback(); + rcu_read_unlock(); +} + static void emergency_reboot_disable_virtualization(void) { - /* Just make sure we won't change CPUs while doing this */ local_irq_disable(); /* @@ -545,7 +589,7 @@ static void emergency_reboot_disable_virtualization(void) * Do the NMI shootdown even if virtualization is off on _this_ CPU, as * other CPUs may have virtualization enabled. */ - if (cpu_has_vmx() || cpu_has_svm(NULL)) { + if (rcu_access_pointer(cpu_emergency_virt_callback)) { /* Safely force _this_ CPU out of VMX/SVM operation. */ cpu_emergency_disable_virtualization(); @@ -553,7 +597,9 @@ static void emergency_reboot_disable_virtualization(void) nmi_shootdown_cpus_on_restart(); } } - +#else +static void emergency_reboot_disable_virtualization(void) { } +#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ void __attribute__((weak)) mach_reboot_fixups(void) { @@ -787,21 +833,9 @@ void machine_crash_shutdown(struct pt_regs *regs) } #endif - /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; -/* - * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during - * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if - * GIF=0, i.e. if the crash occurred between CLGI and STGI. - */ -void cpu_emergency_disable_virtualization(void) -{ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); -} - #if defined(CONFIG_SMP) static nmi_shootdown_cb shootdown_callback; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 89ca7f4c1464..ed90f148140d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -101,7 +101,7 @@ config X86_SGX_KVM config KVM_AMD tristate "KVM for AMD processors support" - depends on KVM + depends on KVM && (CPU_SUP_AMD || CPU_SUP_HYGON) help Provides support for KVM on AMD processors equipped with the AMD-V (SVM) extensions. @@ -138,6 +138,19 @@ config KVM_XEN If in doubt, say "N". +config KVM_PROVE_MMU + bool "Prove KVM MMU correctness" + depends on DEBUG_KERNEL + depends on KVM + depends on EXPERT + help + Enables runtime assertions in KVM's MMU that are too costly to enable + in anything remotely resembling a production environment, e.g. this + gates code that verifies a to-be-freed page table doesn't have any + present SPTEs. + + If in doubt, say "N". + config KVM_EXTERNAL_WRITE_TRACKING bool diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d3432687c9e6..0544e30b4946 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> +#include "linux/lockdep.h" #include <linux/export.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> @@ -84,6 +85,18 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find( struct kvm_cpuid_entry2 *e; int i; + /* + * KVM has a semi-arbitrary rule that querying the guest's CPUID model + * with IRQs disabled is disallowed. The CPUID model can legitimately + * have over one hundred entries, i.e. the lookup is slow, and IRQs are + * typically disabled in KVM only when KVM is in a performance critical + * path, e.g. the core VM-Enter/VM-Exit run loop. Nothing will break + * if this rule is violated, this assertion is purely to flag potential + * performance issues. If this fires, consider moving the lookup out + * of the hotpath, e.g. by caching information during CPUID updates. + */ + lockdep_assert_irqs_enabled(); + for (i = 0; i < nent; i++) { e = &entries[i]; @@ -312,6 +325,27 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; + bool allow_gbpages; + + BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES); + bitmap_zero(vcpu->arch.governed_features.enabled, + KVM_MAX_NR_GOVERNED_FEATURES); + + /* + * If TDP is enabled, let the guest use GBPAGES if they're supported in + * hardware. The hardware page walker doesn't let KVM disable GBPAGES, + * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA + * walk for performance and complexity reasons. Not to mention KVM + * _can't_ solve the problem because GVA->GPA walks aren't visible to + * KVM once a TDP translation is installed. Mimic hardware behavior so + * that KVM's is at least consistent, i.e. doesn't randomly inject #PF. + * If TDP is disabled, honor *only* guest CPUID as KVM has full control + * and can install smaller shadow pages if the host lacks 1GiB support. + */ + allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) : + guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES); + if (allow_gbpages) + kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES); best = kvm_find_cpuid_entry(vcpu, 1); if (best && apic) { @@ -647,7 +681,8 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, - F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) + F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) | + F(AMX_COMPLEX) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, @@ -1154,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_8000_0001_EDX); cpuid_entry_override(entry, CPUID_8000_0001_ECX); break; + case 0x80000005: + /* Pass host L1 cache and TLB info. */ + break; case 0x80000006: /* Drop reserved bits, pass host L2 cache and TLB info. */ entry->edx &= ~GENMASK(17, 16); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b1658c0de847..284fa4704553 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -232,4 +232,50 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu, return vcpu->arch.pv_cpuid.features & (1u << kvm_feature); } +enum kvm_governed_features { +#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x, +#include "governed_features.h" + KVM_NR_GOVERNED_FEATURES +}; + +static __always_inline int kvm_governed_feature_index(unsigned int x86_feature) +{ + switch (x86_feature) { +#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x; +#include "governed_features.h" + default: + return -1; + } +} + +static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature) +{ + return kvm_governed_feature_index(x86_feature) >= 0; +} + +static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu, + unsigned int x86_feature) +{ + BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature)); + + __set_bit(kvm_governed_feature_index(x86_feature), + vcpu->arch.governed_features.enabled); +} + +static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu, + unsigned int x86_feature) +{ + if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature)) + kvm_governed_feature_set(vcpu, x86_feature); +} + +static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu, + unsigned int x86_feature) +{ + BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature)); + + return test_bit(kvm_governed_feature_index(x86_feature), + vcpu->arch.governed_features.enabled); +} + #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 936a397a08cd..2673cd5c46cb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1799,13 +1799,11 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) op->addr.mem, &op->val, op->bytes); - break; case OP_MEM_STR: return segmented_write(ctxt, op->addr.mem, op->data, op->bytes * op->count); - break; case OP_XMM: kvm_write_sse_reg(op->addr.xmm, &op->vec_val); break; diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h new file mode 100644 index 000000000000..423a73395c10 --- /dev/null +++ b/arch/x86/kvm/governed_features.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE) +BUILD_BUG() +#endif + +#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x) + +KVM_GOVERNED_X86_FEATURE(GBPAGES) +KVM_GOVERNED_X86_FEATURE(XSAVES) +KVM_GOVERNED_X86_FEATURE(VMX) +KVM_GOVERNED_X86_FEATURE(NRIPS) +KVM_GOVERNED_X86_FEATURE(TSCRATEMSR) +KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD) +KVM_GOVERNED_X86_FEATURE(LBRV) +KVM_GOVERNED_X86_FEATURE(PAUSEFILTER) +KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD) +KVM_GOVERNED_X86_FEATURE(VGIF) +KVM_GOVERNED_X86_FEATURE(VNMI) + +#undef KVM_GOVERNED_X86_FEATURE +#undef KVM_GOVERNED_FEATURE diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b28fd020066f..7c2dac6824e2 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1293,7 +1293,6 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr) case HV_X64_MSR_VP_ASSIST_PAGE: return hv_vcpu->cpuid_cache.features_eax & HV_MSR_APIC_ACCESS_AVAILABLE; - break; case HV_X64_MSR_TSC_FREQUENCY: case HV_X64_MSR_APIC_FREQUENCY: return hv_vcpu->cpuid_cache.features_eax & diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index ab65f3a47dfd..be7aeb9b8ea3 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -213,7 +213,6 @@ struct x86_emulate_ops { bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only); - bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a983a16163b1..dcd60b39e794 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -376,7 +376,8 @@ void kvm_recalculate_apic_map(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long i; u32 max_id = 255; /* enough space for any xAPIC ID */ - bool xapic_id_mismatch = false; + bool xapic_id_mismatch; + int r; /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) @@ -386,9 +387,14 @@ void kvm_recalculate_apic_map(struct kvm *kvm) "Dirty APIC map without an in-kernel local APIC"); mutex_lock(&kvm->arch.apic_map_lock); + +retry: /* - * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map - * (if clean) or the APIC registers (if dirty). + * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean) + * or the APIC registers (if dirty). Note, on retry the map may have + * not yet been marked dirty by whatever task changed a vCPU's x2APIC + * ID, i.e. the map may still show up as in-progress. In that case + * this task still needs to retry and complete its calculation. */ if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { @@ -397,6 +403,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm) return; } + /* + * Reset the mismatch flag between attempts so that KVM does the right + * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e. + * keep max_id strictly increasing. Disallowing max_id from shrinking + * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU + * with the highest x2APIC ID is toggling its APIC on and off. + */ + xapic_id_mismatch = false; + kvm_for_each_vcpu(i, vcpu, kvm) if (kvm_apic_present(vcpu)) max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); @@ -415,9 +430,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm) if (!kvm_apic_present(vcpu)) continue; - if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) { + r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch); + if (r) { kvfree(new); new = NULL; + if (r == -E2BIG) { + cond_resched(); + goto retry; + } + goto out; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 92d5a1924fc1..253fb2093d5d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -121,6 +121,8 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ec169f5c7dce..e1d011c67cc6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -25,6 +25,7 @@ #include "kvm_cache_regs.h" #include "smm.h" #include "kvm_emulate.h" +#include "page_track.h" #include "cpuid.h" #include "spte.h" @@ -53,7 +54,7 @@ #include <asm/io.h> #include <asm/set_memory.h> #include <asm/vmx.h> -#include <asm/kvm_page_track.h> + #include "trace.h" extern bool itlb_multihit_kvm_mitigation; @@ -115,11 +116,6 @@ static int max_huge_page_level __read_mostly; static int tdp_root_level __read_mostly; static int max_tdp_level __read_mostly; -#ifdef MMU_DEBUG -bool dbg = 0; -module_param(dbg, bool, 0644); -#endif - #define PTE_PREFETCH_NUM 8 #include <trace/events/kvm.h> @@ -278,16 +274,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void) return kvm_x86_ops.flush_remote_tlbs_range; } -void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, - gfn_t nr_pages) +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { - int ret = -EOPNOTSUPP; + if (!kvm_x86_ops.flush_remote_tlbs_range) + return -EOPNOTSUPP; - if (kvm_x86_ops.flush_remote_tlbs_range) - ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn, - nr_pages); - if (ret) - kvm_flush_remote_tlbs(kvm); + return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages); } static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index); @@ -490,7 +482,7 @@ retry: */ static void mmu_spte_set(u64 *sptep, u64 new_spte) { - WARN_ON(is_shadow_present_pte(*sptep)); + WARN_ON_ONCE(is_shadow_present_pte(*sptep)); __set_spte(sptep, new_spte); } @@ -502,7 +494,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte) { u64 old_spte = *sptep; - WARN_ON(!is_shadow_present_pte(new_spte)); + WARN_ON_ONCE(!is_shadow_present_pte(new_spte)); check_spte_writable_invariants(new_spte); if (!is_shadow_present_pte(old_spte)) { @@ -515,7 +507,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte) else old_spte = __update_clear_spte_slow(sptep, new_spte); - WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); + WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); return old_spte; } @@ -597,7 +589,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) * by a refcounted page, the refcount is elevated. */ page = kvm_pfn_to_refcounted_page(pfn); - WARN_ON(page && !page_count(page)); + WARN_ON_ONCE(page && !page_count(page)); if (is_accessed_spte(old_spte)) kvm_set_pfn_accessed(pfn); @@ -812,7 +804,7 @@ static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot, for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { linfo = lpage_info_slot(gfn, slot, i); linfo->disallow_lpage += count; - WARN_ON(linfo->disallow_lpage < 0); + WARN_ON_ONCE(linfo->disallow_lpage < 0); } } @@ -839,8 +831,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) /* the non-leaf shadow pages are keeping readonly. */ if (sp->role.level > PG_LEVEL_4K) - return kvm_slot_page_track_add_page(kvm, slot, gfn, - KVM_PAGE_TRACK_WRITE); + return __kvm_write_track_add_gfn(kvm, slot, gfn); kvm_mmu_gfn_disallow_lpage(slot, gfn); @@ -886,8 +877,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); if (sp->role.level > PG_LEVEL_4K) - return kvm_slot_page_track_remove_page(kvm, slot, gfn, - KVM_PAGE_TRACK_WRITE); + return __kvm_write_track_remove_gfn(kvm, slot, gfn); kvm_mmu_gfn_allow_lpage(slot, gfn); } @@ -941,10 +931,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, int count = 0; if (!rmap_head->val) { - rmap_printk("%p %llx 0->1\n", spte, *spte); rmap_head->val = (unsigned long)spte; } else if (!(rmap_head->val & 1)) { - rmap_printk("%p %llx 1->many\n", spte, *spte); desc = kvm_mmu_memory_cache_alloc(cache); desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; @@ -953,7 +941,6 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, rmap_head->val = (unsigned long)desc | 1; ++count; } else { - rmap_printk("%p %llx many->many\n", spte, *spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); count = desc->tail_count + desc->spte_count; @@ -973,7 +960,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, return count; } -static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, +static void pte_list_desc_remove_entry(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, struct pte_list_desc *desc, int i) { struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); @@ -984,7 +972,7 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, * when adding an entry and the previous head is full, and heads are * removed (this flow) when they become empty. */ - BUG_ON(j < 0); + KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm); /* * Replace the to-be-freed SPTE with the last valid entry from the head @@ -1009,35 +997,34 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, mmu_free_pte_list_desc(head_desc); } -static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) +static void pte_list_remove(struct kvm *kvm, u64 *spte, + struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; int i; - if (!rmap_head->val) { - pr_err("%s: %p 0->BUG\n", __func__, spte); - BUG(); - } else if (!(rmap_head->val & 1)) { - rmap_printk("%p 1->0\n", spte); - if ((u64 *)rmap_head->val != spte) { - pr_err("%s: %p 1->BUG\n", __func__, spte); - BUG(); - } + if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm)) + return; + + if (!(rmap_head->val & 1)) { + if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm)) + return; + rmap_head->val = 0; } else { - rmap_printk("%p many->many\n", spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); while (desc) { for (i = 0; i < desc->spte_count; ++i) { if (desc->sptes[i] == spte) { - pte_list_desc_remove_entry(rmap_head, desc, i); + pte_list_desc_remove_entry(kvm, rmap_head, + desc, i); return; } } desc = desc->more; } - pr_err("%s: %p many->many\n", __func__, spte); - BUG(); + + KVM_BUG_ON_DATA_CORRUPTION(true, kvm); } } @@ -1045,7 +1032,7 @@ static void kvm_zap_one_rmap_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, u64 *sptep) { mmu_spte_clear_track_bits(kvm, sptep); - pte_list_remove(sptep, rmap_head); + pte_list_remove(kvm, sptep, rmap_head); } /* Return true if at least one SPTE was zapped, false otherwise */ @@ -1120,7 +1107,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) slot = __gfn_to_memslot(slots, gfn); rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); - pte_list_remove(spte, rmap_head); + pte_list_remove(kvm, spte, rmap_head); } /* @@ -1212,7 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) struct kvm_mmu_page *sp; sp = sptep_to_sp(sptep); - WARN_ON(sp->role.level == PG_LEVEL_4K); + WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K); drop_spte(kvm, sptep); @@ -1241,8 +1228,6 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect) !(pt_protect && is_mmu_writable_spte(spte))) return false; - rmap_printk("spte %p %llx\n", sptep, *sptep); - if (pt_protect) spte &= ~shadow_mmu_writable_mask; spte = spte & ~PT_WRITABLE_MASK; @@ -1267,9 +1252,7 @@ static bool spte_clear_dirty(u64 *sptep) { u64 spte = *sptep; - rmap_printk("spte %p %llx\n", sptep, *sptep); - - MMU_WARN_ON(!spte_ad_enabled(spte)); + KVM_MMU_WARN_ON(!spte_ad_enabled(spte)); spte &= ~shadow_dirty_mask; return mmu_spte_update(sptep, spte); } @@ -1475,14 +1458,11 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, u64 new_spte; kvm_pfn_t new_pfn; - WARN_ON(pte_huge(pte)); + WARN_ON_ONCE(pte_huge(pte)); new_pfn = pte_pfn(pte); restart: for_each_rmap_spte(rmap_head, &iter, sptep) { - rmap_printk("spte %p %llx gfn %llx (%d)\n", - sptep, *sptep, gfn, level); - need_flush = true; if (pte_write(pte)) { @@ -1588,7 +1568,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, range->start, range->end - 1, &iterator) ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn, - iterator.level, range->pte); + iterator.level, range->arg.pte); return ret; } @@ -1710,21 +1690,19 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return young; } -#ifdef MMU_DEBUG -static int is_empty_shadow_page(u64 *spt) +static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp) { - u64 *pos; - u64 *end; +#ifdef CONFIG_KVM_PROVE_MMU + int i; - for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++) - if (is_shadow_present_pte(*pos)) { - printk(KERN_ERR "%s: %p %llx\n", __func__, - pos, *pos); - return 0; - } - return 1; -} + for (i = 0; i < SPTE_ENT_PER_PAGE; i++) { + if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i]))) + pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free", + sp->spt[i], &sp->spt[i], + kvm_mmu_page_get_gfn(sp, i)); + } #endif +} /* * This value is the sum of all of the kvm instances's @@ -1752,7 +1730,8 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp) { - MMU_WARN_ON(!is_empty_shadow_page(sp->spt)); + kvm_mmu_check_sptes_at_free(sp); + hlist_del(&sp->hash_link); list_del(&sp->link); free_page((unsigned long)sp->spt); @@ -1775,16 +1754,16 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache, pte_list_add(cache, parent_pte, &sp->parent_ptes); } -static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, +static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte) { - pte_list_remove(parent_pte, &sp->parent_ptes); + pte_list_remove(kvm, parent_pte, &sp->parent_ptes); } -static void drop_parent_pte(struct kvm_mmu_page *sp, +static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte) { - mmu_page_remove_parent_pte(sp, parent_pte); + mmu_page_remove_parent_pte(kvm, sp, parent_pte); mmu_spte_clear_no_track(parent_pte); } @@ -1840,7 +1819,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx) { --sp->unsync_children; - WARN_ON((int)sp->unsync_children < 0); + WARN_ON_ONCE((int)sp->unsync_children < 0); __clear_bit(idx, sp->unsync_child_bitmap); } @@ -1898,7 +1877,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - WARN_ON(!sp->unsync); + WARN_ON_ONCE(!sp->unsync); trace_kvm_mmu_sync_page(sp); sp->unsync = 0; --kvm->stat.mmu_unsync; @@ -2073,11 +2052,11 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec, if (pvec->nr == 0) return 0; - WARN_ON(pvec->page[0].idx != INVALID_INDEX); + WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX); sp = pvec->page[0].sp; level = sp->role.level; - WARN_ON(level == PG_LEVEL_4K); + WARN_ON_ONCE(level == PG_LEVEL_4K); parents->parent[level-2] = sp; @@ -2099,7 +2078,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) if (!sp) return; - WARN_ON(idx == INVALID_INDEX); + WARN_ON_ONCE(idx == INVALID_INDEX); clear_unsync_child_bit(sp, idx); level++; } while (!sp->unsync_children); @@ -2220,7 +2199,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm, if (ret < 0) break; - WARN_ON(!list_empty(&invalid_list)); + WARN_ON_ONCE(!list_empty(&invalid_list)); if (ret > 0) kvm_flush_remote_tlbs(kvm); } @@ -2499,7 +2478,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (child->role.access == direct_access) return; - drop_parent_pte(child, sptep); + drop_parent_pte(vcpu->kvm, child, sptep); kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep); } } @@ -2517,7 +2496,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, drop_spte(kvm, spte); } else { child = spte_to_child_sp(pte); - drop_parent_pte(child, spte); + drop_parent_pte(kvm, child, spte); /* * Recursively zap nested TDP SPs, parentless SPs are @@ -2548,13 +2527,13 @@ static int kvm_mmu_page_unlink_children(struct kvm *kvm, return zapped; } -static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp) +static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) { u64 *sptep; struct rmap_iterator iter; while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) - drop_parent_pte(sp, sptep); + drop_parent_pte(kvm, sp, sptep); } static int mmu_zap_unsync_children(struct kvm *kvm, @@ -2593,7 +2572,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list); - kvm_mmu_unlink_parents(sp); + kvm_mmu_unlink_parents(kvm, sp); /* Zapping children means active_mmu_pages has become unstable. */ list_unstable = *nr_zapped; @@ -2675,7 +2654,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, kvm_flush_remote_tlbs(kvm); list_for_each_entry_safe(sp, nsp, invalid_list, link) { - WARN_ON(!sp->role.invalid || sp->root_count); + WARN_ON_ONCE(!sp->role.invalid || sp->root_count); kvm_mmu_free_shadow_page(sp); } } @@ -2775,12 +2754,9 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) LIST_HEAD(invalid_list); int r; - pgprintk("%s: looking for gfn %llx\n", __func__, gfn); r = 0; write_lock(&kvm->mmu_lock); for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) { - pgprintk("%s: gfn %llx role %x\n", __func__, gfn, - sp->role.word); r = 1; kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); } @@ -2831,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, * track machinery is used to write-protect upper-level shadow pages, * i.e. this guards the role.level == 4K assertion below! */ - if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE)) + if (kvm_gfn_is_write_tracked(kvm, slot, gfn)) return -EPERM; /* @@ -2873,7 +2849,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, continue; } - WARN_ON(sp->role.level != PG_LEVEL_4K); + WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(kvm, sp); } if (locked) @@ -2938,9 +2914,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, bool prefetch = !fault || fault->prefetch; bool write_fault = fault && fault->write; - pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, - *sptep, write_fault, gfn); - if (unlikely(is_noslot_pfn(pfn))) { vcpu->stat.pf_mmio_spte_created++; mark_mmio_spte(vcpu, sptep, gfn, pte_access); @@ -2957,11 +2930,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, u64 pte = *sptep; child = spte_to_child_sp(pte); - drop_parent_pte(child, sptep); + drop_parent_pte(vcpu->kvm, child, sptep); flush = true; } else if (pfn != spte_to_pfn(*sptep)) { - pgprintk("hfn old %llx new %llx\n", - spte_to_pfn(*sptep), pfn); drop_spte(vcpu->kvm, sptep); flush = true; } else @@ -2986,8 +2957,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, if (flush) kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); - pgprintk("%s: setting spte %llx\n", __func__, *sptep); - if (!was_rmapped) { WARN_ON_ONCE(ret == RET_PF_SPURIOUS); rmap_add(vcpu, slot, sptep, gfn, pte_access); @@ -3033,7 +3002,7 @@ static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *spte, *start = NULL; int i; - WARN_ON(!sp->role.direct); + WARN_ON_ONCE(!sp->role.direct); i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1); spte = sp->spt + i; @@ -3574,12 +3543,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, if (!VALID_PAGE(*root_hpa)) return; - /* - * The "root" may be a special root, e.g. a PAE entry, treat it as a - * SPTE to ensure any non-PA bits are dropped. - */ - sp = spte_to_child_sp(*root_hpa); - if (WARN_ON(!sp)) + sp = root_to_sp(*root_hpa); + if (WARN_ON_ONCE(!sp)) return; if (is_tdp_mmu_page(sp)) @@ -3624,7 +3589,9 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, &invalid_list); if (free_active_root) { - if (to_shadow_page(mmu->root.hpa)) { + if (kvm_mmu_is_dummy_root(mmu->root.hpa)) { + /* Nothing to cleanup for dummy roots. */ + } else if (root_to_sp(mmu->root.hpa)) { mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list); } else if (mmu->pae_root) { for (i = 0; i < 4; ++i) { @@ -3648,6 +3615,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) { unsigned long roots_to_free = 0; + struct kvm_mmu_page *sp; hpa_t root_hpa; int i; @@ -3662,8 +3630,8 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) if (!VALID_PAGE(root_hpa)) continue; - if (!to_shadow_page(root_hpa) || - to_shadow_page(root_hpa)->role.guest_mode) + sp = root_to_sp(root_hpa); + if (!sp || sp->role.guest_mode) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); } @@ -3671,19 +3639,6 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) } EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots); - -static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) -{ - int ret = 0; - - if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) { - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - ret = 1; - } - - return ret; -} - static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant, u8 level) { @@ -3821,8 +3776,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu); root_gfn = root_pgd >> PAGE_SHIFT; - if (mmu_check_root(vcpu, root_gfn)) - return 1; + if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) { + mmu->root.hpa = kvm_mmu_get_dummy_root(); + return 0; + } /* * On SVM, reading PDPTRs might access guest memory, which might fault @@ -3834,8 +3791,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (!(pdptrs[i] & PT_PRESENT_MASK)) continue; - if (mmu_check_root(vcpu, pdptrs[i] >> PAGE_SHIFT)) - return 1; + if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT)) + pdptrs[i] = 0; } } @@ -4002,7 +3959,7 @@ static bool is_unsync_root(hpa_t root) { struct kvm_mmu_page *sp; - if (!VALID_PAGE(root)) + if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root)) return false; /* @@ -4018,7 +3975,7 @@ static bool is_unsync_root(hpa_t root) * requirement isn't satisfied. */ smp_rmb(); - sp = to_shadow_page(root); + sp = root_to_sp(root); /* * PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the @@ -4048,11 +4005,12 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu->root.hpa; - sp = to_shadow_page(root); if (!is_unsync_root(root)) return; + sp = root_to_sp(root); + write_lock(&vcpu->kvm->mmu_lock); mmu_sync_children(vcpu, sp, true); write_unlock(&vcpu->kvm->mmu_lock); @@ -4194,7 +4152,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) return RET_PF_EMULATE; reserved = get_mmio_spte(vcpu, addr, &spte); - if (WARN_ON(reserved)) + if (WARN_ON_ONCE(reserved)) return -EINVAL; if (is_mmio_spte(spte)) { @@ -4232,7 +4190,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, * guest is writing the page which is write tracked which can * not be fixed by page fault handler. */ - if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE)) + if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn)) return true; return false; @@ -4382,7 +4340,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, static bool is_page_fault_stale(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa); + struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa); /* Special roots, e.g. pae_root, are not backed by shadow pages. */ if (sp && is_obsolete_sp(vcpu->kvm, sp)) @@ -4407,6 +4365,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault { int r; + /* Dummy roots are used only for shadowing bad guest roots. */ + if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) + return RET_PF_RETRY; + if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_EMULATE; @@ -4443,8 +4405,6 @@ out_unlock: static int nonpaging_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code); - /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */ fault->max_level = PG_LEVEL_2M; return direct_page_fault(vcpu, fault); @@ -4562,9 +4522,19 @@ static void nonpaging_init_context(struct kvm_mmu *context) static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd, union kvm_mmu_page_role role) { - return (role.direct || pgd == root->pgd) && - VALID_PAGE(root->hpa) && - role.word == to_shadow_page(root->hpa)->role.word; + struct kvm_mmu_page *sp; + + if (!VALID_PAGE(root->hpa)) + return false; + + if (!role.direct && pgd != root->pgd) + return false; + + sp = root_to_sp(root->hpa); + if (WARN_ON_ONCE(!sp)) + return false; + + return role.word == sp->role.word; } /* @@ -4634,11 +4604,10 @@ static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { /* - * For now, limit the caching to 64-bit hosts+VMs in order to avoid - * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs - * later if necessary. + * Limit reuse to 64-bit hosts+VMs without "special" roots in order to + * avoid having to deal with PDPTEs and other complexities. */ - if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa)) + if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa)) kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT); if (VALID_PAGE(mmu->root.hpa)) @@ -4684,9 +4653,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) * If this is a direct root page, it doesn't have a write flooding * count. Otherwise, clear the write flooding count. */ - if (!new_role.direct) - __clear_sp_write_flooding_count( - to_shadow_page(vcpu->arch.mmu->root.hpa)); + if (!new_role.direct) { + struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa); + + if (!WARN_ON_ONCE(!sp)) + __clear_sp_write_flooding_count(sp); + } } EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); @@ -4808,28 +4780,13 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, } } -static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu) -{ - /* - * If TDP is enabled, let the guest use GBPAGES if they're supported in - * hardware. The hardware page walker doesn't let KVM disable GBPAGES, - * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA - * walk for performance and complexity reasons. Not to mention KVM - * _can't_ solve the problem because GVA->GPA walks aren't visible to - * KVM once a TDP translation is installed. Mimic hardware behavior so - * that KVM's is at least consistent, i.e. doesn't randomly inject #PF. - */ - return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) : - guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES); -} - static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { __reset_rsvds_bits_mask(&context->guest_rsvd_check, vcpu->arch.reserved_gpa_bits, context->cpu_role.base.level, is_efer_nx(context), - guest_can_use_gbpages(vcpu), + guest_can_use(vcpu, X86_FEATURE_GBPAGES), is_cr4_pse(context), guest_cpuid_is_amd_or_hygon(vcpu)); } @@ -4906,7 +4863,8 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), context->root_role.level, context->root_role.efer_nx, - guest_can_use_gbpages(vcpu), is_pse, is_amd); + guest_can_use(vcpu, X86_FEATURE_GBPAGES), + is_pse, is_amd); if (!shadow_me_mask) return; @@ -5467,8 +5425,8 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * physical address properties) in a single VM would require tracking * all relevant CPUID information in kvm_mmu_page_role. That is very * undesirable as it would increase the memory requirements for - * gfn_track (see struct kvm_mmu_page_role comments). For now that - * problem is swept under the rug; KVM's CPUID API is horrific and + * gfn_write_track (see struct kvm_mmu_page_role comments). For now + * that problem is swept under the rug; KVM's CPUID API is horrific and * it's all but impossible to solve it without introducing a new API. */ vcpu->arch.root_mmu.root_role.word = 0; @@ -5531,9 +5489,9 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL); - WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa)); + WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa)); kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); - WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa)); + WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa)); vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); } @@ -5546,16 +5504,21 @@ static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa) /* * When freeing obsolete roots, treat roots as obsolete if they don't - * have an associated shadow page. This does mean KVM will get false + * have an associated shadow page, as it's impossible to determine if + * such roots are fresh or stale. This does mean KVM will get false * positives and free roots that don't strictly need to be freed, but * such false positives are relatively rare: * - * (a) only PAE paging and nested NPT has roots without shadow pages + * (a) only PAE paging and nested NPT have roots without shadow pages + * (or any shadow paging flavor with a dummy root, see note below) * (b) remote reloads due to a memslot update obsoletes _all_ roots * (c) KVM doesn't track previous roots for PAE paging, and the guest * is unlikely to zap an in-use PGD. + * + * Note! Dummy roots are unique in that they are obsoleted by memslot + * _creation_! See also FNAME(fetch). */ - sp = to_shadow_page(root_hpa); + sp = root_to_sp(root_hpa); return !sp || is_obsolete_sp(kvm, sp); } @@ -5634,9 +5597,6 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, { unsigned offset, pte_size, misaligned; - pgprintk("misaligned: gpa %llx bytes %d role %x\n", - gpa, bytes, sp->role.word); - offset = offset_in_page(gpa); pte_size = sp->role.has_4_byte_gpte ? 4 : 8; @@ -5684,9 +5644,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) return spte; } -static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes, - struct kvm_page_track_notifier_node *node) +void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; @@ -5702,8 +5661,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; - pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - write_lock(&vcpu->kvm->mmu_lock); gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); @@ -5742,7 +5699,18 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err int r, emulation_type = EMULTYPE_PF; bool direct = vcpu->arch.mmu->root_role.direct; - if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) + /* + * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP + * checks when emulating instructions that triggers implicit access. + * WARN if hardware generates a fault with an error code that collides + * with the KVM-defined value. Clear the flag and continue on, i.e. + * don't terminate the VM, as KVM can't possibly be relying on a flag + * that KVM doesn't know about. + */ + if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS)) + error_code &= ~PFERR_IMPLICIT_ACCESS; + + if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; r = RET_PF_INVALID; @@ -6099,7 +6067,7 @@ restart: * pages. Skip the bogus page, otherwise we'll get stuck in an * infinite loop if the page gets put back on the list (again). */ - if (WARN_ON(sp->role.invalid)) + if (WARN_ON_ONCE(sp->role.invalid)) continue; /* @@ -6199,16 +6167,8 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); } -static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, - struct kvm_page_track_notifier_node *node) -{ - kvm_mmu_zap_all_fast(kvm); -} - int kvm_mmu_init_vm(struct kvm *kvm) { - struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; int r; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); @@ -6222,10 +6182,6 @@ int kvm_mmu_init_vm(struct kvm *kvm) return r; } - node->track_write = kvm_mmu_pte_write; - node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; - kvm_page_track_register_notifier(kvm, node); - kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; @@ -6246,10 +6202,6 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm) void kvm_mmu_uninit_vm(struct kvm *kvm) { - struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; - - kvm_page_track_unregister_notifier(kvm, node); - if (tdp_mmu_enabled) kvm_mmu_uninit_tdp_mmu(kvm); @@ -6670,7 +6622,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, */ if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true)) - kvm_arch_flush_remote_tlbs_memslot(kvm, slot); + kvm_flush_remote_tlbs_memslot(kvm, slot); } void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, @@ -6689,20 +6641,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, } } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - /* - * All current use cases for flushing the TLBs for a specific memslot - * related to dirty logging, and many do the TLB flush out of mmu_lock. - * The interaction between the various operations on memslot must be - * serialized by slots_locks to ensure the TLB flush from one operation - * is observed by any other operation on the same memslot. - */ - lockdep_assert_held(&kvm->slots_lock); - kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); -} - void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { @@ -6732,7 +6670,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, */ } -void kvm_mmu_zap_all(struct kvm *kvm) +static void kvm_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; LIST_HEAD(invalid_list); @@ -6741,7 +6679,7 @@ void kvm_mmu_zap_all(struct kvm *kvm) write_lock(&kvm->mmu_lock); restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { - if (WARN_ON(sp->role.invalid)) + if (WARN_ON_ONCE(sp->role.invalid)) continue; if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) goto restart; @@ -6757,9 +6695,20 @@ restart: write_unlock(&kvm->mmu_lock); } +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + kvm_mmu_zap_all(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + kvm_mmu_zap_all_fast(kvm); +} + void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { - WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); + WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); gen &= MMIO_SPTE_GEN_MASK; @@ -6862,7 +6811,7 @@ static void mmu_destroy_caches(void) static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp) { if (nx_hugepage_mitigation_hard_disabled) - return sprintf(buffer, "never\n"); + return sysfs_emit(buffer, "never\n"); return param_get_bool(buffer, kp); } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index d39af5639ce9..b102014e2c60 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -6,18 +6,10 @@ #include <linux/kvm_host.h> #include <asm/kvm_host.h> -#undef MMU_DEBUG - -#ifdef MMU_DEBUG -extern bool dbg; - -#define pgprintk(x...) do { if (dbg) printk(x); } while (0) -#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0) -#define MMU_WARN_ON(x) WARN_ON(x) +#ifdef CONFIG_KVM_PROVE_MMU +#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x) #else -#define pgprintk(x...) do { } while (0) -#define rmap_printk(x...) do { } while (0) -#define MMU_WARN_ON(x) do { } while (0) +#define KVM_MMU_WARN_ON(x) BUILD_BUG_ON_INVALID(x) #endif /* Page table builder macros common to shadow (host) PTEs and guest PTEs. */ @@ -44,6 +36,16 @@ extern bool dbg; #define INVALID_PAE_ROOT 0 #define IS_VALID_PAE_ROOT(x) (!!(x)) +static inline hpa_t kvm_mmu_get_dummy_root(void) +{ + return my_zero_pfn(0) << PAGE_SHIFT; +} + +static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page) +{ + return is_zero_pfn(shadow_page >> PAGE_SHIFT); +} + typedef u64 __rcu *tdp_ptep_t; struct kvm_mmu_page { @@ -170,9 +172,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level); -void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, - gfn_t nr_pages); - /* Flush the given page (huge or not) of guest memory. */ static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level) { diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 0a2ac438d647..c87da11f3a04 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -12,13 +12,13 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/lockdep.h> #include <linux/kvm_host.h> #include <linux/rculist.h> -#include <asm/kvm_page_track.h> - #include "mmu.h" #include "mmu_internal.h" +#include "page_track.h" bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) { @@ -28,103 +28,64 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) { - int i; - - for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { - kvfree(slot->arch.gfn_track[i]); - slot->arch.gfn_track[i] = NULL; - } + kvfree(slot->arch.gfn_write_track); + slot->arch.gfn_write_track = NULL; } -int kvm_page_track_create_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, - unsigned long npages) +static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, + unsigned long npages) { - int i; - - for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { - if (i == KVM_PAGE_TRACK_WRITE && - !kvm_page_track_write_tracking_enabled(kvm)) - continue; - - slot->arch.gfn_track[i] = - __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); - if (!slot->arch.gfn_track[i]) - goto track_free; - } + const size_t size = sizeof(*slot->arch.gfn_write_track); - return 0; + if (!slot->arch.gfn_write_track) + slot->arch.gfn_write_track = __vcalloc(npages, size, + GFP_KERNEL_ACCOUNT); -track_free: - kvm_page_track_free_memslot(slot); - return -ENOMEM; + return slot->arch.gfn_write_track ? 0 : -ENOMEM; } -static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode) +int kvm_page_track_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, + unsigned long npages) { - if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX) - return false; + if (!kvm_page_track_write_tracking_enabled(kvm)) + return 0; - return true; + return __kvm_page_track_write_tracking_alloc(slot, npages); } int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) { - unsigned short *gfn_track; - - if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) - return 0; - - gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track), - GFP_KERNEL_ACCOUNT); - if (gfn_track == NULL) - return -ENOMEM; - - slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track; - return 0; + return __kvm_page_track_write_tracking_alloc(slot, slot->npages); } -static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode, short count) +static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, + short count) { int index, val; index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); - val = slot->arch.gfn_track[mode][index]; + val = slot->arch.gfn_write_track[index]; - if (WARN_ON(val + count < 0 || val + count > USHRT_MAX)) + if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) return; - slot->arch.gfn_track[mode][index] += count; + slot->arch.gfn_write_track[index] += count; } -/* - * add guest page to the tracking pool so that corresponding access on that - * page will be intercepted. - * - * It should be called under the protection both of mmu-lock and kvm->srcu - * or kvm->slots_lock. - * - * @kvm: the guest instance we are interested in. - * @slot: the @gfn belongs to. - * @gfn: the guest page. - * @mode: tracking mode, currently only write track is supported. - */ -void kvm_slot_page_track_add_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode) +void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn) { + lockdep_assert_held_write(&kvm->mmu_lock); - if (WARN_ON(!page_track_mode_is_valid(mode))) - return; + lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || + srcu_read_lock_held(&kvm->srcu)); - if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && - !kvm_page_track_write_tracking_enabled(kvm))) + if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) return; - update_gfn_track(slot, gfn, mode, 1); + update_gfn_write_track(slot, gfn, 1); /* * new track stops large page mapping for the @@ -132,37 +93,22 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, */ kvm_mmu_gfn_disallow_lpage(slot, gfn); - if (mode == KVM_PAGE_TRACK_WRITE) - if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) - kvm_flush_remote_tlbs(kvm); + if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) + kvm_flush_remote_tlbs(kvm); } -EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page); -/* - * remove the guest page from the tracking pool which stops the interception - * of corresponding access on that page. It is the opposed operation of - * kvm_slot_page_track_add_page(). - * - * It should be called under the protection both of mmu-lock and kvm->srcu - * or kvm->slots_lock. - * - * @kvm: the guest instance we are interested in. - * @slot: the @gfn belongs to. - * @gfn: the guest page. - * @mode: tracking mode, currently only write track is supported. - */ -void kvm_slot_page_track_remove_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode) +void __kvm_write_track_remove_gfn(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn) { - if (WARN_ON(!page_track_mode_is_valid(mode))) - return; + lockdep_assert_held_write(&kvm->mmu_lock); - if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && - !kvm_page_track_write_tracking_enabled(kvm))) + lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || + srcu_read_lock_held(&kvm->srcu)); + + if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) return; - update_gfn_track(slot, gfn, mode, -1); + update_gfn_write_track(slot, gfn, -1); /* * allow large page mapping for the tracked page @@ -170,31 +116,26 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm, */ kvm_mmu_gfn_allow_lpage(slot, gfn); } -EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page); /* * check if the corresponding access on the specified guest page is tracked. */ -bool kvm_slot_page_track_is_active(struct kvm *kvm, - const struct kvm_memory_slot *slot, - gfn_t gfn, enum kvm_page_track_mode mode) +bool kvm_gfn_is_write_tracked(struct kvm *kvm, + const struct kvm_memory_slot *slot, gfn_t gfn) { int index; - if (WARN_ON(!page_track_mode_is_valid(mode))) - return false; - if (!slot) return false; - if (mode == KVM_PAGE_TRACK_WRITE && - !kvm_page_track_write_tracking_enabled(kvm)) + if (!kvm_page_track_write_tracking_enabled(kvm)) return false; index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); - return !!READ_ONCE(slot->arch.gfn_track[mode][index]); + return !!READ_ONCE(slot->arch.gfn_write_track[index]); } +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING void kvm_page_track_cleanup(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; @@ -216,17 +157,22 @@ int kvm_page_track_init(struct kvm *kvm) * register the notifier so that event interception for the tracked guest * pages can be received. */ -void -kvm_page_track_register_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n) +int kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) { struct kvm_page_track_notifier_head *head; + if (!kvm || kvm->mm != current->mm) + return -ESRCH; + + kvm_get_kvm(kvm); + head = &kvm->arch.track_notifier_head; write_lock(&kvm->mmu_lock); hlist_add_head_rcu(&n->node, &head->track_notifier_list); write_unlock(&kvm->mmu_lock); + return 0; } EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); @@ -234,9 +180,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); * stop receiving the event interception. It is the opposed operation of * kvm_page_track_register_notifier(). */ -void -kvm_page_track_unregister_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n) +void kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) { struct kvm_page_track_notifier_head *head; @@ -246,6 +191,8 @@ kvm_page_track_unregister_notifier(struct kvm *kvm, hlist_del_rcu(&n->node); write_unlock(&kvm->mmu_lock); synchronize_srcu(&head->track_srcu); + + kvm_put_kvm(kvm); } EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); @@ -256,34 +203,30 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); * The node should figure out if the written page is the one that node is * interested in by itself. */ -void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes) +void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) { struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_node *n; int idx; - head = &vcpu->kvm->arch.track_notifier_head; + head = &kvm->arch.track_notifier_head; if (hlist_empty(&head->track_notifier_list)) return; idx = srcu_read_lock(&head->track_srcu); hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, - srcu_read_lock_held(&head->track_srcu)) + srcu_read_lock_held(&head->track_srcu)) if (n->track_write) - n->track_write(vcpu, gpa, new, bytes, n); + n->track_write(gpa, new, bytes, n); srcu_read_unlock(&head->track_srcu, idx); } /* - * Notify the node that memory slot is being removed or moved so that it can - * drop write-protection for the pages in the memory slot. - * - * The node should figure out it has any write-protected pages in this slot - * by itself. + * Notify external page track nodes that a memory region is being removed from + * the VM, e.g. so that users can free any associated metadata. */ -void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot) +void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) { struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_node *n; @@ -296,8 +239,69 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot) idx = srcu_read_lock(&head->track_srcu); hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, - srcu_read_lock_held(&head->track_srcu)) - if (n->track_flush_slot) - n->track_flush_slot(kvm, slot, n); + srcu_read_lock_held(&head->track_srcu)) + if (n->track_remove_region) + n->track_remove_region(slot->base_gfn, slot->npages, n); srcu_read_unlock(&head->track_srcu, idx); } + +/* + * add guest page to the tracking pool so that corresponding access on that + * page will be intercepted. + * + * @kvm: the guest instance we are interested in. + * @gfn: the guest page. + */ +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + + slot = gfn_to_memslot(kvm, gfn); + if (!slot) { + srcu_read_unlock(&kvm->srcu, idx); + return -EINVAL; + } + + write_lock(&kvm->mmu_lock); + __kvm_write_track_add_gfn(kvm, slot, gfn); + write_unlock(&kvm->mmu_lock); + + srcu_read_unlock(&kvm->srcu, idx); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); + +/* + * remove the guest page from the tracking pool which stops the interception + * of corresponding access on that page. + * + * @kvm: the guest instance we are interested in. + * @gfn: the guest page. + */ +int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + + slot = gfn_to_memslot(kvm, gfn); + if (!slot) { + srcu_read_unlock(&kvm->srcu, idx); + return -EINVAL; + } + + write_lock(&kvm->mmu_lock); + __kvm_write_track_remove_gfn(kvm, slot, gfn); + write_unlock(&kvm->mmu_lock); + + srcu_read_unlock(&kvm->srcu, idx); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); +#endif diff --git a/arch/x86/kvm/mmu/page_track.h b/arch/x86/kvm/mmu/page_track.h new file mode 100644 index 000000000000..d4d72ed999b1 --- /dev/null +++ b/arch/x86/kvm/mmu/page_track.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_PAGE_TRACK_H +#define __KVM_X86_PAGE_TRACK_H + +#include <linux/kvm_host.h> + +#include <asm/kvm_page_track.h> + + +bool kvm_page_track_write_tracking_enabled(struct kvm *kvm); +int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot); + +void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); +int kvm_page_track_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, + unsigned long npages); + +void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn); +void __kvm_write_track_remove_gfn(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn); + +bool kvm_gfn_is_write_tracked(struct kvm *kvm, + const struct kvm_memory_slot *slot, gfn_t gfn); + +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING +int kvm_page_track_init(struct kvm *kvm); +void kvm_page_track_cleanup(struct kvm *kvm); + +void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes); +void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot); + +static inline bool kvm_page_track_has_external_user(struct kvm *kvm) +{ + return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list); +} +#else +static inline int kvm_page_track_init(struct kvm *kvm) { return 0; } +static inline void kvm_page_track_cleanup(struct kvm *kvm) { } + +static inline void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, + const u8 *new, int bytes) { } +static inline void kvm_page_track_delete_slot(struct kvm *kvm, + struct kvm_memory_slot *slot) { } + +static inline bool kvm_page_track_has_external_user(struct kvm *kvm) { return false; } + +#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */ + +static inline void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes) +{ + __kvm_page_track_write(vcpu->kvm, gpa, new, bytes); + + kvm_mmu_track_write(vcpu, gpa, new, bytes); +} + +#endif /* __KVM_X86_PAGE_TRACK_H */ diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 0662e0278e70..c85255073f67 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -338,7 +338,6 @@ retry_walk: } #endif walker->max_level = walker->level; - ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); /* * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging @@ -348,9 +347,21 @@ retry_walk: nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; pte_access = ~0; + + /* + * Queue a page fault for injection if this assertion fails, as callers + * assume that walker.fault contains sane info on a walk failure. I.e. + * avoid making the situation worse by inducing even worse badness + * between when the assertion fails and when KVM kicks the vCPU out to + * userspace (because the VM is bugged). + */ + if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm)) + goto error; + ++walker->level; do { + struct kvm_memory_slot *slot; unsigned long host_addr; pt_access = pte_access; @@ -381,7 +392,11 @@ retry_walk: if (unlikely(real_gpa == INVALID_GPA)) return 0; - host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa), + slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa)); + if (!kvm_is_visible_memslot(slot)) + goto error; + + host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa), &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; @@ -456,9 +471,6 @@ retry_walk: goto retry_walk; } - pgprintk("%s: pte %llx pte_access %x pt_access %x\n", - __func__, (u64)pte, walker->pte_access, - walker->pt_access[walker->level - 1]); return 1; error: @@ -529,8 +541,6 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) return false; - pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); - gfn = gpte_to_gfn(gpte); pte_access = sp->role.access & FNAME(gpte_access)(gpte); FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte); @@ -638,8 +648,19 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; - if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) + if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) + goto out_gpte_changed; + + /* + * Load a new root and retry the faulting instruction in the extremely + * unlikely scenario that the guest root gfn became visible between + * loading a dummy root and handling the resulting page fault, e.g. if + * userspace create a memslot in the interim. + */ + if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) { + kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu); goto out_gpte_changed; + } for_each_shadow_entry(vcpu, fault->addr, it) { gfn_t table_gfn; @@ -758,7 +779,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault struct guest_walker walker; int r; - pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code); WARN_ON_ONCE(fault->is_tdp); /* @@ -773,7 +793,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault * The page is not mapped by the guest. Let the guest handle it. */ if (!r) { - pgprintk("%s: guest page fault\n", __func__); if (!fault->prefetch) kvm_inject_emulated_page_fault(vcpu, &walker.fault); @@ -837,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) { int offset = 0; - WARN_ON(sp->role.level != PG_LEVEL_4K); + WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K); if (PTTYPE == 32) offset = sp->role.quadrant << SPTE_LEVEL_BITS; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index cf2c6426a6fc..4a599130e9c9 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -61,7 +61,7 @@ static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; - WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); + WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK); mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK; mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK; @@ -221,8 +221,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * shadow pages and unsync'ing pages is not allowed. */ if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) { - pgprintk("%s: found shadow page for %llx, marking ro\n", - __func__, gfn); wrprot = true; pte_access &= ~ACC_WRITE_MASK; spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask); @@ -242,7 +240,7 @@ out: if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { /* Enforced by kvm_mmu_hugepage_adjust. */ - WARN_ON(level > PG_LEVEL_4K); + WARN_ON_ONCE(level > PG_LEVEL_4K); mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 1279db2eab44..a129951c9a88 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -3,6 +3,7 @@ #ifndef KVM_X86_MMU_SPTE_H #define KVM_X86_MMU_SPTE_H +#include "mmu.h" #include "mmu_internal.h" /* @@ -236,6 +237,18 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) return to_shadow_page(__pa(sptep)); } +static inline struct kvm_mmu_page *root_to_sp(hpa_t root) +{ + if (kvm_mmu_is_dummy_root(root)) + return NULL; + + /* + * The "root" may be a special root, e.g. a PAE entry, treat it as a + * SPTE to ensure any non-PA bits are dropped. + */ + return spte_to_child_sp(root); +} + static inline bool is_mmio_spte(u64 spte) { return (spte & shadow_mmio_mask) == shadow_mmio_value && @@ -265,13 +278,13 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) static inline bool spte_ad_enabled(u64 spte) { - MMU_WARN_ON(!is_shadow_present_pte(spte)); + KVM_MMU_WARN_ON(!is_shadow_present_pte(spte)); return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED; } static inline bool spte_ad_need_write_protect(u64 spte) { - MMU_WARN_ON(!is_shadow_present_pte(spte)); + KVM_MMU_WARN_ON(!is_shadow_present_pte(spte)); /* * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0', * and non-TDP SPTEs will never set these bits. Optimize for 64-bit @@ -282,13 +295,13 @@ static inline bool spte_ad_need_write_protect(u64 spte) static inline u64 spte_shadow_accessed_mask(u64 spte) { - MMU_WARN_ON(!is_shadow_present_pte(spte)); + KVM_MMU_WARN_ON(!is_shadow_present_pte(spte)); return spte_ad_enabled(spte) ? shadow_accessed_mask : 0; } static inline u64 spte_shadow_dirty_mask(u64 spte) { - MMU_WARN_ON(!is_shadow_present_pte(spte)); + KVM_MMU_WARN_ON(!is_shadow_present_pte(spte)); return spte_ad_enabled(spte) ? shadow_dirty_mask : 0; } diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index d2eb0d4f8710..bd30ebfb2f2c 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -39,13 +39,14 @@ void tdp_iter_restart(struct tdp_iter *iter) void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, int min_level, gfn_t next_last_level_gfn) { - int root_level = root->role.level; - - WARN_ON(root_level < 1); - WARN_ON(root_level > PT64_ROOT_MAX_LEVEL); + if (WARN_ON_ONCE(!root || (root->role.level < 1) || + (root->role.level > PT64_ROOT_MAX_LEVEL))) { + iter->valid = false; + return; + } iter->next_last_level_gfn = next_last_level_gfn; - iter->root_level = root_level; + iter->root_level = root->role.level; iter->min_level = min_level; iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt; iter->as_id = kvm_mmu_page_as_id(root); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 512163d52194..6c63f2d1675f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -475,9 +475,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, bool is_leaf = is_present && is_last_spte(new_spte, level); bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); - WARN_ON(level > PT64_ROOT_MAX_LEVEL); - WARN_ON(level < PG_LEVEL_4K); - WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); + WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL); + WARN_ON_ONCE(level < PG_LEVEL_4K); + WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); /* * If this warning were to trigger it would indicate that there was a @@ -522,9 +522,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, * impact the guest since both the former and current SPTEs * are nonpresent. */ - if (WARN_ON(!is_mmio_spte(old_spte) && - !is_mmio_spte(new_spte) && - !is_removed_spte(new_spte))) + if (WARN_ON_ONCE(!is_mmio_spte(old_spte) && + !is_mmio_spte(new_spte) && + !is_removed_spte(new_spte))) pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" "should not be replaced with another,\n" "different nonpresent SPTE, unless one or both\n" @@ -661,7 +661,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, * should be used. If operating under the MMU lock in write mode, the * use of the removed SPTE should not be necessary. */ - WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte)); + WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte)); old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level); @@ -689,7 +689,7 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter, else #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ - for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end) + for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end) /* * Yield if the MMU lock is contended or this thread needs to return control @@ -709,7 +709,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter, bool flush, bool shared) { - WARN_ON(iter->yielded); + WARN_ON_ONCE(iter->yielded); /* Ensure forward progress has been made before yielding. */ if (iter->next_last_level_gfn == iter->yielded_gfn) @@ -728,7 +728,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm, rcu_read_lock(); - WARN_ON(iter->gfn > iter->next_last_level_gfn); + WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn); iter->yielded = true; } @@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte; /* Huge pages aren't expected to be modified without first being zapped. */ - WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end); + WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end); if (iter->level != PG_LEVEL_4K || !is_shadow_present_pte(iter->old_spte)) @@ -1255,9 +1255,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, */ tdp_mmu_iter_set_spte(kvm, iter, 0); - if (!pte_write(range->pte)) { + if (!pte_write(range->arg.pte)) { new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte, - pte_pfn(range->pte)); + pte_pfn(range->arg.pte)); tdp_mmu_iter_set_spte(kvm, iter, new_spte); } @@ -1548,8 +1548,8 @@ retry: if (!is_shadow_present_pte(iter.old_spte)) continue; - MMU_WARN_ON(kvm_ad_enabled() && - spte_ad_need_write_protect(iter.old_spte)); + KVM_MMU_WARN_ON(kvm_ad_enabled() && + spte_ad_need_write_protect(iter.old_spte)); if (!(iter.old_spte & dbit)) continue; @@ -1600,6 +1600,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, shadow_dirty_mask; struct tdp_iter iter; + lockdep_assert_held_write(&kvm->mmu_lock); + rcu_read_lock(); tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), @@ -1607,8 +1609,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, if (!mask) break; - MMU_WARN_ON(kvm_ad_enabled() && - spte_ad_need_write_protect(iter.old_spte)); + KVM_MMU_WARN_ON(kvm_ad_enabled() && + spte_ad_need_write_protect(iter.old_spte)); if (iter.level > PG_LEVEL_4K || !(mask & (1UL << (iter.gfn - gfn)))) @@ -1646,7 +1648,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, { struct kvm_mmu_page *root; - lockdep_assert_held_write(&kvm->mmu_lock); for_each_tdp_mmu_root(kvm, root, slot->as_id) clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot); } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index bf653df86112..edb89b51b383 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -382,9 +382,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) struct kvm_x86_pmu_event_filter *filter; struct kvm *kvm = pmc->vcpu->kvm; - if (!static_call(kvm_x86_pmu_hw_event_available)(pmc)) - return false; - filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); if (!filter) return true; @@ -398,6 +395,7 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) static bool pmc_event_is_allowed(struct kvm_pmc *pmc) { return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && + static_call(kvm_x86_pmu_hw_event_available)(pmc) && check_pmu_event_filter(pmc); } diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 56cbdb24400a..b81650678375 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -43,6 +43,7 @@ enum kvm_only_cpuid_leafs { /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ #define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) +#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8) #define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14) /* CPUID level 0x80000007 (EDX). */ diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index cfc8ab773025..2092db892d7d 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) int ret = 0; unsigned long flags; struct amd_svm_iommu_ir *ir; + u64 entry; /** * In some cases, the existing irte is updated and re-set, @@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) ir->data = pi->ir_data; spin_lock_irqsave(&svm->ir_list_lock, flags); + + /* + * Update the target pCPU for IOMMU doorbells if the vCPU is running. + * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM + * will update the pCPU info when the vCPU awkened and/or scheduled in. + * See also avic_vcpu_load(). + */ + entry = READ_ONCE(*(svm->avic_physical_id_cache)); + if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) + amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK, + true, pi->ir_data); + list_add(&ir->node, &svm->ir_list); spin_unlock_irqrestore(&svm->ir_list_lock, flags); out: @@ -986,10 +999,11 @@ static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) { int ret = 0; - unsigned long flags; struct amd_svm_iommu_ir *ir; struct vcpu_svm *svm = to_svm(vcpu); + lockdep_assert_held(&svm->ir_list_lock); + if (!kvm_arch_has_assigned_device(vcpu->kvm)) return 0; @@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) * Here, we go through the per-vcpu ir_list to update all existing * interrupt remapping table entry targeting this vcpu. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); - if (list_empty(&svm->ir_list)) - goto out; + return 0; list_for_each_entry(ir, &svm->ir_list, node) { ret = amd_iommu_update_ga(cpu, r, ir->data); if (ret) - break; + return ret; } -out: - spin_unlock_irqrestore(&svm->ir_list_lock, flags); - return ret; + return 0; } void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) u64 entry; int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); + unsigned long flags; lockdep_assert_preemption_disabled(); @@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (kvm_vcpu_is_blocking(vcpu)) return; + /* + * Grab the per-vCPU interrupt remapping lock even if the VM doesn't + * _currently_ have assigned devices, as that can change. Holding + * ir_list_lock ensures that either svm_ir_list_add() will consume + * up-to-date entry information, or that this task will wait until + * svm_ir_list_add() completes to set the new target pCPU. + */ + spin_lock_irqsave(&svm->ir_list_lock, flags); + entry = READ_ONCE(*(svm->avic_physical_id_cache)); WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); @@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); + + spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); + unsigned long flags; lockdep_assert_preemption_disabled(); + /* + * Note, reading the Physical ID entry outside of ir_list_lock is safe + * as only the pCPU that has loaded (or is loading) the vCPU is allowed + * to modify the entry, and preemption is disabled. I.e. the vCPU + * can't be scheduled out and thus avic_vcpu_{put,load}() can't run + * recursively. + */ entry = READ_ONCE(*(svm->avic_physical_id_cache)); /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) return; + /* + * Take and hold the per-vCPU interrupt remapping lock while updating + * the Physical ID entry even though the lock doesn't protect against + * multiple writers (see above). Holding ir_list_lock ensures that + * either svm_ir_list_add() will consume up-to-date entry information, + * or that this task will wait until svm_ir_list_add() completes to + * mark the vCPU as not running. + */ + spin_lock_irqsave(&svm->ir_list_lock, flags); + avic_update_iommu_vcpu_affinity(vcpu, -1, 0); entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; WRITE_ONCE(*(svm->avic_physical_id_cache), entry); + + spin_unlock_irqrestore(&svm->ir_list_lock, flags); + } void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 96936ddf1b3c..dd496c9e5f91 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -107,7 +107,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm) { - if (!svm->v_vmload_vmsave_enabled) + if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD)) return true; if (!nested_npt_enabled(svm)) @@ -552,6 +552,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 bool new_vmcb12 = false; struct vmcb *vmcb01 = svm->vmcb01.ptr; struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + struct kvm_vcpu *vcpu = &svm->vcpu; nested_vmcb02_compute_g_pat(svm); @@ -577,18 +578,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 vmcb_mark_dirty(vmcb02, VMCB_DT); } - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); + kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); - svm_set_efer(&svm->vcpu, svm->nested.save.efer); + svm_set_efer(vcpu, svm->nested.save.efer); - svm_set_cr0(&svm->vcpu, svm->nested.save.cr0); - svm_set_cr4(&svm->vcpu, svm->nested.save.cr4); + svm_set_cr0(vcpu, svm->nested.save.cr0); + svm_set_cr4(vcpu, svm->nested.save.cr4); svm->vcpu.arch.cr2 = vmcb12->save.cr2; - kvm_rax_write(&svm->vcpu, vmcb12->save.rax); - kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp); - kvm_rip_write(&svm->vcpu, vmcb12->save.rip); + kvm_rax_write(vcpu, vmcb12->save.rax); + kvm_rsp_write(vcpu, vmcb12->save.rsp); + kvm_rip_write(vcpu, vmcb12->save.rip); /* In case we don't even reach vcpu_run, the fields are not updated */ vmcb02->save.rax = vmcb12->save.rax; @@ -602,7 +603,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 vmcb_mark_dirty(vmcb02, VMCB_DR); } - if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { /* * Reserved bits of DEBUGCTL are ignored. Be consistent with * svm_set_msr's definition of reserved bits. @@ -658,7 +660,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. */ - if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK)) + if (guest_can_use(vcpu, X86_FEATURE_VGIF) && + (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK)) int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); else int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); @@ -695,10 +698,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.tsc_offset = vcpu->arch.tsc_offset; - if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) { - WARN_ON(!svm->tsc_scaling_enabled); + if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) nested_svm_update_tsc_ratio_msr(vcpu); - } vmcb02->control.int_ctl = (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | @@ -717,7 +719,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, * what a nrips=0 CPU would do (L1 is responsible for advancing RIP * prior to injecting the event). */ - if (svm->nrips_enabled) + if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) vmcb02->control.next_rip = svm->nested.ctl.next_rip; else if (boot_cpu_has(X86_FEATURE_NRIPS)) vmcb02->control.next_rip = vmcb12_rip; @@ -727,7 +729,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->soft_int_injected = true; svm->soft_int_csbase = vmcb12_csbase; svm->soft_int_old_rip = vmcb12_rip; - if (svm->nrips_enabled) + if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) svm->soft_int_next_rip = svm->nested.ctl.next_rip; else svm->soft_int_next_rip = vmcb12_rip; @@ -735,15 +737,21 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.virt_ext = vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK; - if (svm->lbrv_enabled) + if (guest_can_use(vcpu, X86_FEATURE_LBRV)) vmcb02->control.virt_ext |= (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; - pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; - pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; + if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER)) + pause_count12 = svm->nested.ctl.pause_filter_count; + else + pause_count12 = 0; + if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD)) + pause_thresh12 = svm->nested.ctl.pause_filter_thresh; + else + pause_thresh12 = 0; if (kvm_pause_in_guest(svm->vcpu.kvm)) { /* use guest values since host doesn't intercept PAUSE */ vmcb02->control.pause_filter_count = pause_count12; @@ -1027,7 +1035,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (vmcb12->control.exit_code != SVM_EXIT_ERR) nested_save_pending_event_to_vmcb12(svm, vmcb12); - if (svm->nrips_enabled) + if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) vmcb12->control.next_rip = vmcb02->control.next_rip; vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; @@ -1066,7 +1074,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (!nested_exit_on_intr(svm)) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { svm_copy_lbrs(vmcb12, vmcb02); svm_update_lbrv(vcpu); } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { @@ -1101,10 +1110,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); } - if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) { - WARN_ON(!svm->tsc_scaling_enabled); + if (kvm_caps.has_tsc_control && + vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) { vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; - __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); + svm_write_tsc_multiplier(vcpu); } svm->nested.ctl.nested_cr3 = 0; @@ -1537,7 +1546,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio, svm->tsc_ratio_msr); - __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); + svm_write_tsc_multiplier(vcpu); } /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index d3aec1f2cad2..b9a0a939d59f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -23,6 +23,7 @@ #include <asm/pkru.h> #include <asm/trapnr.h> #include <asm/fpu/xcr.h> +#include <asm/debugreg.h> #include "mmu.h" #include "x86.h" @@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444); /* enable/disable SEV-ES support */ static bool sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); + +/* enable/disable SEV-ES DebugSwap support */ +static bool sev_es_debug_swap_enabled = true; +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); #else #define sev_enabled false #define sev_es_enabled false +#define sev_es_debug_swap_enabled false #endif /* CONFIG_KVM_AMD_SEV */ static u8 sev_enc_bit; @@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; + if (sev_es_debug_swap_enabled) + save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; + pr_debug("Virtual Machine Save Area (VMSA):\n"); print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); @@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, struct vcpu_svm *svm = to_svm(vcpu); int ret; + if (vcpu->guest_debug) { + pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported"); + return -EINVAL; + } + /* Perform some pre-encryption checks against the VMSA */ ret = sev_es_sync_vmsa(svm); if (ret) @@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) * Note, the source is not required to have the same number of * vCPUs as the destination when migrating a vanilla SEV VM. */ - src_vcpu = kvm_get_vcpu(dst_kvm, i); + src_vcpu = kvm_get_vcpu(src_kvm, i); src_svm = to_svm(src_vcpu); /* @@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void) bool sev_es_supported = false; bool sev_supported = false; - if (!sev_enabled || !npt_enabled) + if (!sev_enabled || !npt_enabled || !nrips) goto out; /* @@ -2256,6 +2270,9 @@ out: sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; + if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) || + !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) + sev_es_debug_swap_enabled = false; #endif } @@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_NMI_COMPLETE: - ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET); + ++vcpu->stat.nmi_window_exits; + svm->nmi_masked = false; + kvm_make_request(KVM_REQ_EVENT, vcpu); + ret = 1; break; case SVM_VMGEXIT_AP_HLT_LOOP: ret = kvm_emulate_ap_reset_hold(vcpu); @@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) static void sev_es_init_vmcb(struct vcpu_svm *svm) { + struct vmcb *vmcb = svm->vmcb01.ptr; struct kvm_vcpu *vcpu = &svm->vcpu; svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; @@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) /* * An SEV-ES guest requires a VMSA area that is a separate from the * VMCB page. Do not include the encryption mask on the VMSA physical - * address since hardware will access it using the guest key. + * address since hardware will access it using the guest key. Note, + * the VMSA will be NULL if this vCPU is the destination for intrahost + * migration, and will be copied later. */ - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); /* Can't intercept CR register access, HV can't modify CR registers */ svm_clr_intercept(svm, INTERCEPT_CR0_READ); @@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) svm_set_intercept(svm, TRAP_CR4_WRITE); svm_set_intercept(svm, TRAP_CR8_WRITE); - /* No support for enable_vmware_backdoor */ - clr_exception_intercept(svm, GP_VECTOR); + vmcb->control.intercepts[INTERCEPT_DR] = 0; + if (!sev_es_debug_swap_enabled) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + recalc_intercepts(svm); + } else { + /* + * Disable #DB intercept iff DebugSwap is enabled. KVM doesn't + * allow debugging SEV-ES guests, and enables DebugSwap iff + * NO_NESTED_DATA_BP is supported, so there's no reason to + * intercept #DB when DebugSwap is enabled. For simplicity + * with respect to guest debug, intercept #DB for other VMs + * even if NO_NESTED_DATA_BP is supported, i.e. even if the + * guest can't DoS the CPU with infinite #DB vectoring. + */ + clr_exception_intercept(svm, DB_VECTOR); + } /* Can't intercept XSETBV, HV can't modify XCR0 directly */ svm_clr_intercept(svm, INTERCEPT_XSETBV); @@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm) svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; clr_exception_intercept(svm, UD_VECTOR); + /* + * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as + * KVM can't decrypt guest memory to decode the faulting instruction. + */ + clr_exception_intercept(svm, GP_VECTOR); + if (sev_es_guest(svm->vcpu.kvm)) sev_es_init_vmcb(svm); } @@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) { /* - * As an SEV-ES guest, hardware will restore the host state on VMEXIT, - * of which one step is to perform a VMLOAD. KVM performs the - * corresponding VMSAVE in svm_prepare_guest_switch for both - * traditional and SEV-ES guests. + * All host state for SEV-ES guests is categorized into three swap types + * based on how it is handled by hardware during a world switch: + * + * A: VMRUN: Host state saved in host save area + * VMEXIT: Host state loaded from host save area + * + * B: VMRUN: Host state _NOT_ saved in host save area + * VMEXIT: Host state loaded from host save area + * + * C: VMRUN: Host state _NOT_ saved in host save area + * VMEXIT: Host state initialized to default(reset) values + * + * Manually save type-B state, i.e. state that is loaded by VMEXIT but + * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed + * by common SVM code). */ - - /* XCR0 is restored on VMEXIT, save the current host value */ hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - - /* PKRU is restored on VMEXIT, save the current host value */ hostsa->pkru = read_pkru(); - - /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ hostsa->xss = host_xss; + + /* + * If DebugSwap is enabled, debug registers are loaded but NOT saved by + * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both + * saves and loads debug registers (Type-A). + */ + if (sev_es_debug_swap_enabled) { + hostsa->dr0 = native_get_debugreg(0); + hostsa->dr1 = native_get_debugreg(1); + hostsa->dr2 = native_get_debugreg(2); + hostsa->dr3 = native_get_debugreg(3); + hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); + hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); + hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); + hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3); + } } void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d4bfdc607fe7..f283eb47f6ac 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -39,10 +39,9 @@ #include <asm/spec-ctrl.h> #include <asm/cpu_device_id.h> #include <asm/traps.h> +#include <asm/reboot.h> #include <asm/fpu/api.h> -#include <asm/virtext.h> - #include <trace/events/ipi.h> #include "trace.h" @@ -203,7 +202,7 @@ static int nested = true; module_param(nested, int, S_IRUGO); /* enable/disable Next RIP Save */ -static int nrips = true; +int nrips = true; module_param(nrips, int, 0444); /* enable/disable Virtual VMLOAD VMSAVE */ @@ -365,6 +364,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; } +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, bool commit_side_effects) @@ -385,6 +386,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, } if (!svm->next_rip) { + /* + * FIXME: Drop this when kvm_emulate_instruction() does the + * right thing and treats "can't emulate" as outright failure + * for EMULTYPE_SKIP. + */ + if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0)) + return 0; + if (unlikely(!commit_side_effects)) old_rflags = svm->vmcb->save.rflags; @@ -517,14 +526,21 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu) vcpu->arch.osvw.status |= 1; } -static bool kvm_is_svm_supported(void) +static bool __kvm_is_svm_supported(void) { - int cpu = raw_smp_processor_id(); - const char *msg; + int cpu = smp_processor_id(); + struct cpuinfo_x86 *c = &cpu_data(cpu); + u64 vm_cr; - if (!cpu_has_svm(&msg)) { - pr_err("SVM not supported by CPU %d, %s\n", cpu, msg); + if (c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) { + pr_err("CPU %d isn't AMD or Hygon\n", cpu); + return false; + } + + if (!cpu_has(c, X86_FEATURE_SVM)) { + pr_err("SVM not supported by CPU %d\n", cpu); return false; } @@ -542,25 +558,55 @@ static bool kvm_is_svm_supported(void) return true; } +static bool kvm_is_svm_supported(void) +{ + bool supported; + + migrate_disable(); + supported = __kvm_is_svm_supported(); + migrate_enable(); + + return supported; +} + static int svm_check_processor_compat(void) { - if (!kvm_is_svm_supported()) + if (!__kvm_is_svm_supported()) return -EIO; return 0; } -void __svm_write_tsc_multiplier(u64 multiplier) +static void __svm_write_tsc_multiplier(u64 multiplier) { - preempt_disable(); - if (multiplier == __this_cpu_read(current_tsc_ratio)) - goto out; + return; wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); __this_cpu_write(current_tsc_ratio, multiplier); -out: - preempt_enable(); +} + +static inline void kvm_cpu_svm_disable(void) +{ + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) { + /* + * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and + * NMI aren't blocked. + */ + stgi(); + wrmsrl(MSR_EFER, efer & ~EFER_SVME); + } +} + +static void svm_emergency_disable(void) +{ + kvm_rebooting = true; + + kvm_cpu_svm_disable(); } static void svm_hardware_disable(void) @@ -569,7 +615,7 @@ static void svm_hardware_disable(void) if (tsc_scaling) __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT); - cpu_svm_disable(); + kvm_cpu_svm_disable(); amd_pmu_disable_virt(); } @@ -677,6 +723,39 @@ free_save_area: } +static void set_dr_intercepts(struct vcpu_svm *svm) +{ + struct vmcb *vmcb = svm->vmcb01.ptr; + + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + + recalc_intercepts(svm); +} + +static void clr_dr_intercepts(struct vcpu_svm *svm) +{ + struct vmcb *vmcb = svm->vmcb01.ptr; + + vmcb->control.intercepts[INTERCEPT_DR] = 0; + + recalc_intercepts(svm); +} + static int direct_access_msr_slot(u32 msr) { u32 i; @@ -947,50 +1026,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu) svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb); } -static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index) +static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm) { /* - * If the LBR virtualization is disabled, the LBR msrs are always - * kept in the vmcb01 to avoid copying them on nested guest entries. - * - * If nested, and the LBR virtualization is enabled/disabled, the msrs - * are moved between the vmcb01 and vmcb02 as needed. + * If LBR virtualization is disabled, the LBR MSRs are always kept in + * vmcb01. If LBR virtualization is enabled and L1 is running VMs of + * its own, the MSRs are moved between vmcb01 and vmcb02 as needed. */ - struct vmcb *vmcb = - (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ? - svm->vmcb : svm->vmcb01.ptr; - - switch (index) { - case MSR_IA32_DEBUGCTLMSR: - return vmcb->save.dbgctl; - case MSR_IA32_LASTBRANCHFROMIP: - return vmcb->save.br_from; - case MSR_IA32_LASTBRANCHTOIP: - return vmcb->save.br_to; - case MSR_IA32_LASTINTFROMIP: - return vmcb->save.last_excp_from; - case MSR_IA32_LASTINTTOIP: - return vmcb->save.last_excp_to; - default: - KVM_BUG(false, svm->vcpu.kvm, - "%s: Unknown MSR 0x%x", __func__, index); - return 0; - } + return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb : + svm->vmcb01.ptr; } void svm_update_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - - bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) & - DEBUGCTLMSR_LBR; - - bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext & - LBR_CTL_ENABLE_MASK); - - if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled)) - if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)) - enable_lbrv = true; + bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || + (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) && + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); if (enable_lbrv == current_enable_lbrv) return; @@ -1101,21 +1154,23 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) return svm->tsc_ratio_msr; } -static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static void svm_write_tsc_offset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset; - svm->vmcb->control.tsc_offset = offset; + svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset; vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) +void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu) { - __svm_write_tsc_multiplier(multiplier); + preempt_disable(); + if (to_svm(vcpu)->guest_state_loaded) + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); + preempt_enable(); } - /* Evaluate instruction intercepts that depend on guest CPUID features. */ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, struct vcpu_svm *svm) @@ -1156,8 +1211,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0); - - svm->v_vmload_vmsave_enabled = false; } else { /* * If hardware supports Virtual VMLOAD VMSAVE then enable it @@ -1201,10 +1254,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu) * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway - * as VMware does. Don't intercept #GP for SEV guests as KVM can't - * decrypt guest memory to decode the faulting instruction. + * as VMware does. */ - if (enable_vmware_backdoor && !sev_guest(vcpu->kvm)) + if (enable_vmware_backdoor) set_exception_intercept(svm, GP_VECTOR); svm_set_intercept(svm, INTERCEPT_INTR); @@ -1949,7 +2001,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (vcpu->arch.guest_state_protected) + if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm))) return; get_debugreg(vcpu->arch.db[0], 0); @@ -2510,12 +2562,13 @@ static int iret_interception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + WARN_ON_ONCE(sev_es_guest(vcpu->kvm)); + ++vcpu->stat.nmi_window_exits; svm->awaiting_iret_completion = true; svm_clr_iret_intercept(svm); - if (!sev_es_guest(vcpu->kvm)) - svm->nmi_iret_rip = kvm_rip_read(vcpu); + svm->nmi_iret_rip = kvm_rip_read(vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); return 1; @@ -2680,6 +2733,13 @@ static int dr_interception(struct kvm_vcpu *vcpu) unsigned long val; int err = 0; + /* + * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT + * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early. + */ + if (sev_es_guest(vcpu->kvm)) + return 1; + if (vcpu->guest_debug == 0) { /* * No more DR vmexits; force a reload of the debug registers @@ -2764,7 +2824,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_AMD64_TSC_RATIO: - if (!msr_info->host_initiated && !svm->tsc_scaling_enabled) + if (!msr_info->host_initiated && + !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) return 1; msr_info->data = svm->tsc_ratio_msr; break; @@ -2802,11 +2863,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->tsc_aux; break; case MSR_IA32_DEBUGCTLMSR: + msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl; + break; case MSR_IA32_LASTBRANCHFROMIP: + msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from; + break; case MSR_IA32_LASTBRANCHTOIP: + msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to; + break; case MSR_IA32_LASTINTFROMIP: + msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from; + break; case MSR_IA32_LASTINTTOIP: - msr_info->data = svm_get_lbr_msr(svm, msr_info->index); + msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to; break; case MSR_VM_HSAVE_PA: msr_info->data = svm->nested.hsave_msr; @@ -2906,7 +2975,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) switch (ecx) { case MSR_AMD64_TSC_RATIO: - if (!svm->tsc_scaling_enabled) { + if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) { if (!msr->host_initiated) return 1; @@ -2928,7 +2997,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->tsc_ratio_msr = data; - if (svm->tsc_scaling_enabled && is_guest_mode(vcpu)) + if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) && + is_guest_mode(vcpu)) nested_svm_update_tsc_ratio_msr(vcpu); break; @@ -3037,13 +3107,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & DEBUGCTL_RESERVED_BITS) return 1; - if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) - svm->vmcb->save.dbgctl = data; - else - svm->vmcb01.ptr->save.dbgctl = data; - + svm_get_lbr_vmcb(svm)->save.dbgctl = data; svm_update_lbrv(vcpu); - break; case MSR_VM_HSAVE_PA: /* @@ -3769,6 +3834,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion) return; /* IRET will cause a vm exit */ + /* + * SEV-ES guests are responsible for signaling when a vCPU is ready to + * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e. + * KVM can't intercept and single-step IRET to detect when NMIs are + * unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE. + * + * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware + * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not + * supported NAEs in the GHCB protocol. + */ + if (sev_es_guest(vcpu->kvm)) + return; + if (!gif_set(svm)) { if (vgif) svm_set_intercept(svm, INTERCEPT_STGI); @@ -3918,12 +3996,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) svm->soft_int_injected = false; /* - * If we've made progress since setting HF_IRET_MASK, we've + * If we've made progress since setting awaiting_iret_completion, we've * executed an IRET and can allow NMI injection. */ if (svm->awaiting_iret_completion && - (sev_es_guest(vcpu->kvm) || - kvm_rip_read(vcpu) != svm->nmi_iret_rip)) { + kvm_rip_read(vcpu) != svm->nmi_iret_rip) { svm->awaiting_iret_completion = false; svm->nmi_masked = false; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -4209,28 +4286,37 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct kvm_cpuid_entry2 *best; - vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - boot_cpu_has(X86_FEATURE_XSAVE) && - boot_cpu_has(X86_FEATURE_XSAVES); - - /* Update nrips enabled cache */ - svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && - guest_cpuid_has(vcpu, X86_FEATURE_NRIPS); - - svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR); - svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV); - - svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); - - svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) && - guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER); + /* + * SVM doesn't provide a way to disable just XSAVES in the guest, KVM + * can only disable all variants of by disallowing CR4.OSXSAVE from + * being set. As a result, if the host has XSAVE and XSAVES, and the + * guest has XSAVE enabled, the guest can execute XSAVES without + * faulting. Treat XSAVES as enabled in this case regardless of + * whether it's advertised to the guest so that KVM context switches + * XSS on VM-Enter/VM-Exit. Failure to do so would effectively give + * the guest read/write access to the host's XSS. + */ + if (boot_cpu_has(X86_FEATURE_XSAVE) && + boot_cpu_has(X86_FEATURE_XSAVES) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) + kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES); - svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) && - guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV); - svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF); + /* + * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that + * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing + * SVM on Intel is bonkers and extremely unlikely to work). + */ + if (!guest_cpuid_is_intel(vcpu)) + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); - svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI); svm_recalc_instruction_intercepts(vcpu, svm); @@ -4651,16 +4737,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and * decode garbage. * - * Inject #UD if KVM reached this point without an instruction buffer. - * In practice, this path should never be hit by a well-behaved guest, - * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path - * is still theoretically reachable, e.g. via unaccelerated fault-like - * AVIC access, and needs to be handled by KVM to avoid putting the - * guest into an infinite loop. Injecting #UD is somewhat arbitrary, - * but its the least awful option given lack of insight into the guest. + * If KVM is NOT trying to simply skip an instruction, inject #UD if + * KVM reached this point without an instruction buffer. In practice, + * this path should never be hit by a well-behaved guest, e.g. KVM + * doesn't intercept #UD or #GP for SEV guests, but this path is still + * theoretically reachable, e.g. via unaccelerated fault-like AVIC + * access, and needs to be handled by KVM to avoid putting the guest + * into an infinite loop. Injecting #UD is somewhat arbitrary, but + * its the least awful option given lack of insight into the guest. + * + * If KVM is trying to skip an instruction, simply resume the guest. + * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM + * will attempt to re-inject the INT3/INTO and skip the instruction. + * In that scenario, retrying the INT3/INTO and hoping the guest will + * make forward progress is the only option that has a chance of + * success (and in practice it will work the vast majority of the time). */ if (unlikely(!insn)) { - kvm_queue_exception(vcpu, UD_VECTOR); + if (!(emul_type & EMULTYPE_SKIP)) + kvm_queue_exception(vcpu, UD_VECTOR); return false; } @@ -5112,9 +5207,11 @@ static __init int svm_hardware_setup(void) svm_adjust_mmio_mask(); + nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS); + /* * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which - * may be modified by svm_adjust_mmio_mask()). + * may be modified by svm_adjust_mmio_mask()), as well as nrips. */ sev_hardware_setup(); @@ -5126,11 +5223,6 @@ static __init int svm_hardware_setup(void) goto err; } - if (nrips) { - if (!boot_cpu_has(X86_FEATURE_NRIPS)) - nrips = false; - } - enable_apicv = avic = avic && avic_hardware_setup(); if (!enable_apicv) { @@ -5213,6 +5305,13 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = { .pmu_ops = &amd_pmu_ops, }; +static void __svm_exit(void) +{ + kvm_x86_vendor_exit(); + + cpu_emergency_unregister_virt_callback(svm_emergency_disable); +} + static int __init svm_init(void) { int r; @@ -5226,6 +5325,8 @@ static int __init svm_init(void) if (r) return r; + cpu_emergency_register_virt_callback(svm_emergency_disable); + /* * Common KVM initialization _must_ come last, after this, /dev/kvm is * exposed to userspace! @@ -5238,14 +5339,14 @@ static int __init svm_init(void) return 0; err_kvm_init: - kvm_x86_vendor_exit(); + __svm_exit(); return r; } static void __exit svm_exit(void) { kvm_exit(); - kvm_x86_vendor_exit(); + __svm_exit(); } module_init(svm_init) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 8239c8de45ac..f41253958357 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -22,6 +22,7 @@ #include <asm/svm.h> #include <asm/sev-common.h> +#include "cpuid.h" #include "kvm_cache_regs.h" #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) @@ -33,6 +34,7 @@ #define MSRPM_OFFSETS 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; +extern int nrips; extern int vgif; extern bool intercept_smi; extern bool x2avic_enabled; @@ -260,16 +262,6 @@ struct vcpu_svm { unsigned long soft_int_next_rip; bool soft_int_injected; - /* optional nested SVM features that are enabled for this guest */ - bool nrips_enabled : 1; - bool tsc_scaling_enabled : 1; - bool v_vmload_vmsave_enabled : 1; - bool lbrv_enabled : 1; - bool pause_filter_enabled : 1; - bool pause_threshold_enabled : 1; - bool vgif_enabled : 1; - bool vnmi_enabled : 1; - u32 ldr_reg; u32 dfr_reg; struct page *avic_backing_page; @@ -406,48 +398,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3 return test_bit(bit, (unsigned long *)&control->intercepts); } -static inline void set_dr_intercepts(struct vcpu_svm *svm) -{ - struct vmcb *vmcb = svm->vmcb01.ptr; - - if (!sev_es_guest(svm->vcpu.kvm)) { - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); - } - - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); - - recalc_intercepts(svm); -} - -static inline void clr_dr_intercepts(struct vcpu_svm *svm) -{ - struct vmcb *vmcb = svm->vmcb01.ptr; - - vmcb->control.intercepts[INTERCEPT_DR] = 0; - - /* DR7 access must remain intercepted for an SEV-ES guest */ - if (sev_es_guest(svm->vcpu.kvm)) { - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); - } - - recalc_intercepts(svm); -} - static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit) { struct vmcb *vmcb = svm->vmcb01.ptr; @@ -493,7 +443,8 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit) static inline bool nested_vgif_enabled(struct vcpu_svm *svm) { - return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK); + return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) && + (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK); } static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm) @@ -544,7 +495,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm) static inline bool nested_vnmi_enabled(struct vcpu_svm *svm) { - return svm->vnmi_enabled && + return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) && (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK); } @@ -660,7 +611,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); -void __svm_write_tsc_multiplier(u64 multiplier); +void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu); void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, struct vmcb_control_area *control); void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d0abee35d7ba..41a4533f9989 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -252,7 +252,7 @@ static inline bool cpu_has_vmx_pml(void) static inline bool cpu_has_vmx_xsaves(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_XSAVES; + SECONDARY_EXEC_ENABLE_XSAVES; } static inline bool cpu_has_vmx_waitpkg(void) diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index 79450e1ed7cf..313b8bb5b8a7 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -78,7 +78,7 @@ SECONDARY_EXEC_DESC | \ SECONDARY_EXEC_ENABLE_RDTSCP | \ SECONDARY_EXEC_ENABLE_INVPCID | \ - SECONDARY_EXEC_XSAVES | \ + SECONDARY_EXEC_ENABLE_XSAVES | \ SECONDARY_EXEC_RDSEED_EXITING | \ SECONDARY_EXEC_RDRAND_EXITING | \ SECONDARY_EXEC_TSC_SCALING | \ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 516391cc0d64..c5ec0ef51ff7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2307,7 +2307,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_ENABLE_RDTSCP | - SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_ENABLE_XSAVES | SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | @@ -6331,7 +6331,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, * If if it were, XSS would have to be checked against * the XSS exit bitmap in vmcs12. */ - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES); case EXIT_REASON_UMWAIT: case EXIT_REASON_TPAUSE: return nested_cpu_has2(vmcs12, @@ -6426,7 +6426,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); - if (nested_vmx_allowed(vcpu) && + if (guest_can_use(vcpu, X86_FEATURE_VMX) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr; @@ -6567,7 +6567,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) return -EINVAL; } else { - if (!nested_vmx_allowed(vcpu)) + if (!guest_can_use(vcpu, X86_FEATURE_VMX)) return -EINVAL; if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) @@ -6601,7 +6601,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && - (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) + (!guest_can_use(vcpu, X86_FEATURE_VMX) || + !vmx->nested.enlightened_vmcs_enabled)) return -EINVAL; vmx_leave_nested(vcpu); @@ -6874,7 +6875,7 @@ static void nested_vmx_setup_secondary_ctls(u32 ept_caps, SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_ENABLE_VMFUNC | SECONDARY_EXEC_RDSEED_EXITING | - SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_ENABLE_XSAVES | SECONDARY_EXEC_TSC_SCALING | SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 96952263b029..b4b9d51438c6 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -168,7 +168,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) { - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES); } static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 80c769c58a87..f2efa0bf7ae8 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -22,23 +22,51 @@ #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) +enum intel_pmu_architectural_events { + /* + * The order of the architectural events matters as support for each + * event is enumerated via CPUID using the index of the event. + */ + INTEL_ARCH_CPU_CYCLES, + INTEL_ARCH_INSTRUCTIONS_RETIRED, + INTEL_ARCH_REFERENCE_CYCLES, + INTEL_ARCH_LLC_REFERENCES, + INTEL_ARCH_LLC_MISSES, + INTEL_ARCH_BRANCHES_RETIRED, + INTEL_ARCH_BRANCHES_MISPREDICTED, + + NR_REAL_INTEL_ARCH_EVENTS, + + /* + * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a. + * TSC reference cycles. The architectural reference cycles event may + * or may not actually use the TSC as the reference, e.g. might use the + * core crystal clock or the bus clock (yeah, "architectural"). + */ + PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS, + NR_INTEL_ARCH_EVENTS, +}; + static struct { u8 eventsel; u8 unit_mask; } const intel_arch_events[] = { - [0] = { 0x3c, 0x00 }, - [1] = { 0xc0, 0x00 }, - [2] = { 0x3c, 0x01 }, - [3] = { 0x2e, 0x4f }, - [4] = { 0x2e, 0x41 }, - [5] = { 0xc4, 0x00 }, - [6] = { 0xc5, 0x00 }, - /* The above index must match CPUID 0x0A.EBX bit vector */ - [7] = { 0x00, 0x03 }, + [INTEL_ARCH_CPU_CYCLES] = { 0x3c, 0x00 }, + [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { 0xc0, 0x00 }, + [INTEL_ARCH_REFERENCE_CYCLES] = { 0x3c, 0x01 }, + [INTEL_ARCH_LLC_REFERENCES] = { 0x2e, 0x4f }, + [INTEL_ARCH_LLC_MISSES] = { 0x2e, 0x41 }, + [INTEL_ARCH_BRANCHES_RETIRED] = { 0xc4, 0x00 }, + [INTEL_ARCH_BRANCHES_MISPREDICTED] = { 0xc5, 0x00 }, + [PSEUDO_ARCH_REFERENCE_CYCLES] = { 0x00, 0x03 }, }; /* mapping between fixed pmc index and intel_arch_events array */ -static int fixed_pmc_events[] = {1, 0, 7}; +static int fixed_pmc_events[] = { + [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED, + [1] = INTEL_ARCH_CPU_CYCLES, + [2] = PSEUDO_ARCH_REFERENCE_CYCLES, +}; static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { @@ -80,16 +108,18 @@ static bool intel_hw_event_available(struct kvm_pmc *pmc) u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; - for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) { + BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS); + + /* + * Disallow events reported as unavailable in guest CPUID. Note, this + * doesn't apply to pseudo-architectural events. + */ + for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) { if (intel_arch_events[i].eventsel != event_select || intel_arch_events[i].unit_mask != unit_mask) continue; - /* disable event that reported as not present by cpuid */ - if ((i < 7) && !(pmu->available_event_types & (1 << i))) - return false; - - break; + return pmu->available_event_types & BIT(i); } return true; @@ -438,16 +468,17 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) { - size_t size = ARRAY_SIZE(fixed_pmc_events); - struct kvm_pmc *pmc; - u32 event; int i; + BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED); + for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { - pmc = &pmu->fixed_counters[i]; - event = fixed_pmc_events[array_index_nospec(i, size)]; + int index = array_index_nospec(i, KVM_PMC_MAX_FIXED); + struct kvm_pmc *pmc = &pmu->fixed_counters[index]; + u32 event = fixed_pmc_events[index]; + pmc->eventsel = (intel_arch_events[event].unit_mask << 8) | - intel_arch_events[event].eventsel; + intel_arch_events[event].eventsel; } } @@ -508,10 +539,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (pmu->version == 1) { pmu->nr_arch_fixed_counters = 0; } else { - pmu->nr_arch_fixed_counters = - min3(ARRAY_SIZE(fixed_pmc_events), - (size_t) edx.split.num_counters_fixed, - (size_t)kvm_pmu_cap.num_counters_fixed); + pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, + kvm_pmu_cap.num_counters_fixed); edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed, kvm_pmu_cap.bit_width_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b483a8baaacf..72e3943f3693 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -41,13 +41,12 @@ #include <asm/idtentry.h> #include <asm/io.h> #include <asm/irq_remapping.h> -#include <asm/kexec.h> +#include <asm/reboot.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> #include <asm/mshyperv.h> #include <asm/mwait.h> #include <asm/spec-ctrl.h> -#include <asm/virtext.h> #include <asm/vmx.h> #include "capabilities.h" @@ -237,9 +236,6 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; -/* Control for disabling CPU Fill buffer clear */ -static bool __read_mostly vmx_fb_clear_ctrl_available; - static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -255,14 +251,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) return 0; } - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { - u64 msr; - - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } + if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; } /* If set to auto use the default l1tf mitigation method */ @@ -366,22 +357,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) { if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) - return sprintf(s, "???\n"); + return sysfs_emit(s, "???\n"); - return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); -} - -static void vmx_setup_fb_clear_ctrl(void) -{ - u64 msr; - - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && - !boot_cpu_has_bug(X86_BUG_MDS) && - !boot_cpu_has_bug(X86_BUG_TAA)) { - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); - if (msr & ARCH_CAP_FB_CLEAR_CTRL) - vmx_fb_clear_ctrl_available = true; - } + return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) @@ -409,7 +387,9 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA); /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -754,17 +734,51 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, return ret; } -#ifdef CONFIG_KEXEC_CORE -static void crash_vmclear_local_loaded_vmcss(void) +/* + * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) + * + * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to + * atomically track post-VMXON state, e.g. this may be called in NMI context. + * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. + * faults are guaranteed to be due to the !post-VMXON check unless the CPU is + * magically in RM, VM86, compat mode, or at CPL>0. + */ +static int kvm_cpu_vmxoff(void) +{ + asm_volatile_goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "cc", "memory" : fault); + + cr4_clear_bits(X86_CR4_VMXE); + return 0; + +fault: + cr4_clear_bits(X86_CR4_VMXE); + return -EIO; +} + +static void vmx_emergency_disable(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; + kvm_rebooting = true; + + /* + * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be + * set in task context. If this races with VMX is disabled by an NMI, + * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to + * kvm_rebooting set. + */ + if (!(__read_cr4() & X86_CR4_VMXE)) + return; + list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), loaded_vmcss_on_cpu_link) vmcs_clear(v->vmcs); + + kvm_cpu_vmxoff(); } -#endif /* CONFIG_KEXEC_CORE */ static void __loaded_vmcs_clear(void *arg) { @@ -1899,25 +1913,14 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) return kvm_caps.default_tsc_scaling_ratio; } -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu) { - vmcs_write64(TSC_OFFSET, offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); } -static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) +static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu) { - vmcs_write64(TSC_MULTIPLIER, multiplier); -} - -/* - * nested_vmx_allowed() checks whether a guest should be allowed to use VMX - * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for - * all guests if the "nested" module option is off, and can also be disabled - * for a single guest by disabling its VMX cpuid bit. - */ -bool nested_vmx_allowed(struct kvm_vcpu *vcpu) -{ - return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); + vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); } /* @@ -2047,7 +2050,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; break; case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: - if (!nested_vmx_allowed(vcpu)) + if (!guest_can_use(vcpu, X86_FEATURE_VMX)) return 1; if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, &msr_info->data)) @@ -2355,7 +2358,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!msr_info->host_initiated) return 1; /* they are read-only */ - if (!nested_vmx_allowed(vcpu)) + if (!guest_can_use(vcpu, X86_FEATURE_VMX)) return 1; return vmx_set_vmx_msr(vcpu, msr_index, data); case MSR_IA32_RTIT_CTL: @@ -2729,11 +2732,11 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, return 0; } -static bool kvm_is_vmx_supported(void) +static bool __kvm_is_vmx_supported(void) { - int cpu = raw_smp_processor_id(); + int cpu = smp_processor_id(); - if (!cpu_has_vmx()) { + if (!(cpuid_ecx(1) & feature_bit(VMX))) { pr_err("VMX not supported by CPU %d\n", cpu); return false; } @@ -2747,13 +2750,24 @@ static bool kvm_is_vmx_supported(void) return true; } +static bool kvm_is_vmx_supported(void) +{ + bool supported; + + migrate_disable(); + supported = __kvm_is_vmx_supported(); + migrate_enable(); + + return supported; +} + static int vmx_check_processor_compat(void) { int cpu = raw_smp_processor_id(); struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; - if (!kvm_is_vmx_supported()) + if (!__kvm_is_vmx_supported()) return -EIO; if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) { @@ -2833,7 +2847,7 @@ static void vmx_hardware_disable(void) { vmclear_local_loaded_vmcss(); - if (cpu_vmxoff()) + if (kvm_cpu_vmxoff()) kvm_spurious_fault(); hv_reset_evmcs(); @@ -3071,13 +3085,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx->rmode.vm86_active = 1; - /* - * Very old userspace does not call KVM_SET_TSS_ADDR before entering - * vcpu. Warn the user that an update is overdue. - */ - if (!kvm_vmx->tss_addr) - pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n"); - vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); @@ -3350,7 +3357,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmx->emulation_required = vmx_emulation_required(vcpu); } -static int vmx_get_max_tdp_level(void) +static int vmx_get_max_ept_level(void) { if (cpu_has_vmx_ept_5levels()) return 5; @@ -4553,16 +4560,19 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, * based on a single guest CPUID bit, with a dedicated feature bit. This also * verifies that the control is actually supported by KVM and hardware. */ -#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \ -({ \ - bool __enabled; \ - \ - if (cpu_has_vmx_##name()) { \ - __enabled = guest_cpuid_has(&(vmx)->vcpu, \ - X86_FEATURE_##feat_name); \ - vmx_adjust_secondary_exec_control(vmx, exec_control, \ - SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \ - } \ +#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \ +({ \ + struct kvm_vcpu *__vcpu = &(vmx)->vcpu; \ + bool __enabled; \ + \ + if (cpu_has_vmx_##name()) { \ + if (kvm_is_governed_feature(X86_FEATURE_##feat_name)) \ + __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name); \ + else \ + __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name); \ + vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\ + __enabled, exiting); \ + } \ }) /* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */ @@ -4622,19 +4632,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging)) exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - if (cpu_has_vmx_xsaves()) { - /* Exposing XSAVES only when XSAVE is exposed */ - bool xsaves_enabled = - boot_cpu_has(X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); - - vcpu->arch.xsaves_enabled = xsaves_enabled; - - vmx_adjust_secondary_exec_control(vmx, &exec_control, - SECONDARY_EXEC_XSAVES, - xsaves_enabled, false); - } + vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES); /* * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either @@ -4653,6 +4651,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) SECONDARY_EXEC_ENABLE_RDTSCP, rdpid_or_rdtscp_enabled, false); } + vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID); vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND); @@ -6796,8 +6795,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn)); read_unlock(&vcpu->kvm->mmu_lock); - vmx_flush_tlb_current(vcpu); - + /* + * No need for a manual TLB flush at this point, KVM has already done a + * flush if there were SPTEs pointing at the previous page. + */ out: /* * Do not pin apic access page in memory, the MMU notifier @@ -7243,13 +7244,20 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, flags); vcpu->arch.cr2 = native_read_cr2(); + vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET; + + vmx->idt_vectoring_info = 0; vmx_enable_fb_clear(vmx); - if (unlikely(vmx->fail)) + if (unlikely(vmx->fail)) { vmx->exit_reason.full = 0xdead; - else - vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON); + goto out; + } + + vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON); + if (likely(!vmx->exit_reason.failed_vmentry)) + vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI && is_nmi(vmx_get_intr_info(vcpu))) { @@ -7258,6 +7266,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, kvm_after_interrupt(vcpu); } +out: guest_state_exit_irqoff(); } @@ -7379,8 +7388,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) loadsegment(es, __USER_DS); #endif - vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET; - pt_guest_exit(vmx); kvm_load_host_xsave_state(vcpu); @@ -7397,17 +7404,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->nested.nested_run_pending = 0; } - vmx->idt_vectoring_info = 0; - if (unlikely(vmx->fail)) return EXIT_FASTPATH_NONE; if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY)) kvm_machine_check(); - if (likely(!vmx->exit_reason.failed_vmentry)) - vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - trace_kvm_exit(vcpu, KVM_ISA_VMX); if (unlikely(vmx->exit_reason.failed_vmentry)) @@ -7751,8 +7753,16 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ - vcpu->arch.xsaves_enabled = false; + /* + * XSAVES is effectively enabled if and only if XSAVE is also exposed + * to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be + * set if and only if XSAVE is supported. + */ + if (boot_cpu_has(X86_FEATURE_XSAVE) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES); + + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX); vmx_setup_uret_msrs(vmx); @@ -7760,7 +7770,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmcs_set_secondary_exec_control(vmx, vmx_secondary_exec_control(vmx)); - if (nested_vmx_allowed(vcpu)) + if (guest_can_use(vcpu, X86_FEATURE_VMX)) vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; @@ -7769,7 +7779,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); - if (nested_vmx_allowed(vcpu)) + if (guest_can_use(vcpu, X86_FEATURE_VMX)) nested_vmx_cr_fixed1_bits_update(vcpu); if (boot_cpu_has(X86_FEATURE_INTEL_PT) && @@ -8526,7 +8536,7 @@ static __init int hardware_setup(void) */ vmx_setup_me_spte_mask(); - kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(), + kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(), ept_caps_to_lpage_level(vmx_capability.ept)); /* @@ -8622,10 +8632,8 @@ static void __vmx_exit(void) { allow_smaller_maxphyaddr = false; -#ifdef CONFIG_KEXEC_CORE - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); - synchronize_rcu(); -#endif + cpu_emergency_unregister_virt_callback(vmx_emergency_disable); + vmx_cleanup_l1d_flush(); } @@ -8666,18 +8674,14 @@ static int __init vmx_init(void) if (r) goto err_l1d_flush; - vmx_setup_fb_clear_ctrl(); - for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); pi_init_cpu(cpu); } -#ifdef CONFIG_KEXEC_CORE - rcu_assign_pointer(crash_vmclear_loaded_vmcss, - crash_vmclear_local_loaded_vmcss); -#endif + cpu_emergency_register_virt_callback(vmx_emergency_disable); + vmx_check_vmcs12_offsets(); /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 32384ba38499..c2130d2c8e24 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -374,7 +374,6 @@ struct kvm_vmx { u64 *pid_table; }; -bool nested_vmx_allowed(struct kvm_vcpu *vcpu); void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, struct loaded_vmcs *buddy); int allocate_vpid(void); @@ -562,7 +561,7 @@ static inline u8 vmx_get_rvi(void) SECONDARY_EXEC_APIC_REGISTER_VIRT | \ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ SECONDARY_EXEC_SHADOW_VMCS | \ - SECONDARY_EXEC_XSAVES | \ + SECONDARY_EXEC_ENABLE_XSAVES | \ SECONDARY_EXEC_RDSEED_EXITING | \ SECONDARY_EXEC_RDRAND_EXITING | \ SECONDARY_EXEC_ENABLE_PML | \ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c381770bcbf1..6c9c81e82e65 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -25,6 +25,7 @@ #include "tss.h" #include "kvm_cache_regs.h" #include "kvm_emulate.h" +#include "mmu/page_track.h" #include "x86.h" #include "cpuid.h" #include "pmu.h" @@ -237,6 +238,9 @@ EXPORT_SYMBOL_GPL(enable_apicv); u64 __read_mostly host_xss; EXPORT_SYMBOL_GPL(host_xss); +u64 __read_mostly host_arch_capabilities; +EXPORT_SYMBOL_GPL(host_arch_capabilities); + const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, mmu_shadow_zapped), @@ -1021,7 +1025,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) if (vcpu->arch.xcr0 != host_xcr0) xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); - if (vcpu->arch.xsaves_enabled && + if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && vcpu->arch.ia32_xss != host_xss) wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); } @@ -1052,7 +1056,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) if (vcpu->arch.xcr0 != host_xcr0) xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); - if (vcpu->arch.xsaves_enabled && + if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && vcpu->arch.ia32_xss != host_xss) wrmsrl(MSR_IA32_XSS, host_xss); } @@ -1620,12 +1624,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) static u64 kvm_get_arch_capabilities(void) { - u64 data = 0; - - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); - data &= KVM_SUPPORTED_ARCH_CAP; - } + u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP; /* * If nx_huge_pages is enabled, KVM's shadow paging will ensure that @@ -2631,7 +2630,7 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset) else vcpu->arch.tsc_offset = l1_offset; - static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset); + static_call(kvm_x86_write_tsc_offset)(vcpu); } static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier) @@ -2647,8 +2646,7 @@ static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multipli vcpu->arch.tsc_scaling_ratio = l1_multiplier; if (kvm_caps.has_tsc_control) - static_call(kvm_x86_write_tsc_multiplier)( - vcpu, vcpu->arch.tsc_scaling_ratio); + static_call(kvm_x86_write_tsc_multiplier)(vcpu); } static inline bool kvm_check_tsc_unstable(void) @@ -4665,7 +4663,6 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) return 0; default: return -ENXIO; - break; } } @@ -6532,7 +6529,7 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, struct kvm_msr_filter_range *user_range) { - unsigned long *bitmap = NULL; + unsigned long *bitmap; size_t bitmap_size; if (!user_range->nmsrs) @@ -8245,11 +8242,6 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only); } -static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt) -{ - return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM); -} - static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt) { return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE); @@ -8351,7 +8343,6 @@ static const struct x86_emulate_ops emulate_ops = { .fix_hypercall = emulator_fix_hypercall, .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, - .guest_has_long_mode = emulator_guest_has_long_mode, .guest_has_movbe = emulator_guest_has_movbe, .guest_has_fxsr = emulator_guest_has_fxsr, .guest_has_rdpid = emulator_guest_has_rdpid, @@ -9172,7 +9163,7 @@ static int kvmclock_cpu_down_prep(unsigned int cpu) static void tsc_khz_changed(void *data) { struct cpufreq_freqs *freq = data; - unsigned long khz = 0; + unsigned long khz; WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)); @@ -9512,6 +9503,9 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_init_pmu_capability(ops->pmu_ops); + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities); + r = ops->hardware_setup(); if (r != 0) goto out_mmu_exit; @@ -11111,12 +11105,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) r = -EINTR; goto out; } + /* - * It should be impossible for the hypervisor timer to be in - * use before KVM has ever run the vCPU. + * Don't bother switching APIC timer emulation from the + * hypervisor timer to the software timer, the only way for the + * APIC timer to be active is if userspace stuffed vCPU state, + * i.e. put the vCPU into a nonsensical state. Only an INIT + * will transition the vCPU out of UNINITIALIZED (without more + * state stuffing from userspace), which will reset the local + * APIC and thus cancel the timer or drop the IRQ (if the timer + * already expired). */ - WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu)); - kvm_vcpu_srcu_read_unlock(vcpu); kvm_vcpu_block(vcpu); kvm_vcpu_srcu_read_lock(vcpu); @@ -11798,15 +11797,22 @@ static int sync_regs(struct kvm_vcpu *vcpu) __set_regs(vcpu, &vcpu->run->s.regs.regs); vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { - if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs)) + struct kvm_sregs sregs = vcpu->run->s.regs.sregs; + + if (__set_sregs(vcpu, &sregs)) return -EINVAL; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { - if (kvm_vcpu_ioctl_x86_set_vcpu_events( - vcpu, &vcpu->run->s.regs.events)) + struct kvm_vcpu_events events = vcpu->run->s.regs.events; + + if (kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events)) return -EINVAL; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; } @@ -12627,6 +12633,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *new, enum kvm_mr_change change) { + /* + * KVM doesn't support moving memslots when there are external page + * trackers attached to the VM, i.e. if KVMGT is in use. + */ + if (change == KVM_MR_MOVE && kvm_page_track_has_external_user(kvm)) + return -EINVAL; + if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) { if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn()) return -EINVAL; @@ -12772,7 +12785,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, * See is_writable_pte() for more details (the case involving * access-tracked SPTEs is particularly relevant). */ - kvm_arch_flush_remote_tlbs_memslot(kvm, new); + kvm_flush_remote_tlbs_memslot(kvm, new); } } @@ -12781,6 +12794,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { + if (change == KVM_MR_DELETE) + kvm_page_track_delete_slot(kvm, old); + if (!kvm->arch.n_requested_mmu_pages && (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { unsigned long nr_mmu_pages; @@ -12797,17 +12813,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_arch_free_memslot(kvm, old); } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ - kvm_mmu_zap_all(kvm); -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_page_track_flush_slot(kvm, slot); -} - static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { return (is_guest_mode(vcpu) && diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 82e3dafc5453..1e7be1f6ab29 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -323,6 +323,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); extern u64 host_xcr0; extern u64 host_xss; +extern u64 host_arch_capabilities; extern struct kvm_caps kvm_caps; diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index ee89f6bb9242..8bc72a51b257 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -13,15 +13,16 @@ obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \ ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ stub_$(BITS).o stub_segv.o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ - mem_$(BITS).o subarch.o os-$(OS)/ + mem_$(BITS).o subarch.o os-Linux/ ifeq ($(CONFIG_X86_32),y) -obj-y += checksum_32.o syscalls_32.o +obj-y += syscalls_32.o obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o subarch-y += ../lib/cmpxchg8b_emu.o ../lib/atomic64_386_32.o +subarch-y += ../lib/checksum_32.o subarch-y += ../kernel/sys_ia32.o else diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h index 4a73d63e4760..dc32dc023c2f 100644 --- a/arch/x86/um/asm/mm_context.h +++ b/arch/x86/um/asm/mm_context.h @@ -11,8 +11,6 @@ #include <linux/mutex.h> #include <asm/ldt.h> -extern void ldt_host_info(void); - #define LDT_PAGES_MAX \ ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE) #define LDT_ENTRIES_PER_PAGE \ diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S deleted file mode 100644 index aed782ab7721..000000000000 --- a/arch/x86/um/checksum_32.S +++ /dev/null @@ -1,214 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IP/TCP/UDP checksumming routines - * - * Authors: Jorge Cwik, <jorge@laser.satlink.net> - * Arnt Gulbrandsen, <agulbra@nvg.unit.no> - * Tom May, <ftom@netcom.com> - * Pentium Pro/II routines: - * Alexander Kjeldaas <astor@guardian.no> - * Finn Arne Gangstad <finnag@guardian.no> - * Lots of code moved from tcp.c and ip.c; see those files - * for more names. - * - * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception - * handling. - * Andi Kleen, add zeroing on error - * converted to pure assembler - */ - -#include <asm/errno.h> -#include <asm/asm.h> -#include <asm/export.h> - -/* - * computes a partial checksum, e.g. for TCP/UDP fragments - */ - -/* -unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) - */ - -.text -.align 4 -.globl csum_partial - -#ifndef CONFIG_X86_USE_PPRO_CHECKSUM - - /* - * Experiments with Ethernet and SLIP connections show that buff - * is aligned on either a 2-byte or 4-byte boundary. We get at - * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. - * Fortunately, it is easy to convert 2-byte alignment to 4-byte - * alignment for the unrolled loop. - */ -csum_partial: - pushl %esi - pushl %ebx - movl 20(%esp),%eax # Function arg: unsigned int sum - movl 16(%esp),%ecx # Function arg: int len - movl 12(%esp),%esi # Function arg: unsigned char *buff - testl $2, %esi # Check alignment. - jz 2f # Jump if alignment is ok. - subl $2, %ecx # Alignment uses up two bytes. - jae 1f # Jump if we had at least two bytes. - addl $2, %ecx # ecx was < 2. Deal with it. - jmp 4f -1: movw (%esi), %bx - addl $2, %esi - addw %bx, %ax - adcl $0, %eax -2: - movl %ecx, %edx - shrl $5, %ecx - jz 2f - testl %esi, %esi -1: movl (%esi), %ebx - adcl %ebx, %eax - movl 4(%esi), %ebx - adcl %ebx, %eax - movl 8(%esi), %ebx - adcl %ebx, %eax - movl 12(%esi), %ebx - adcl %ebx, %eax - movl 16(%esi), %ebx - adcl %ebx, %eax - movl 20(%esi), %ebx - adcl %ebx, %eax - movl 24(%esi), %ebx - adcl %ebx, %eax - movl 28(%esi), %ebx - adcl %ebx, %eax - lea 32(%esi), %esi - dec %ecx - jne 1b - adcl $0, %eax -2: movl %edx, %ecx - andl $0x1c, %edx - je 4f - shrl $2, %edx # This clears CF -3: adcl (%esi), %eax - lea 4(%esi), %esi - dec %edx - jne 3b - adcl $0, %eax -4: andl $3, %ecx - jz 7f - cmpl $2, %ecx - jb 5f - movw (%esi),%cx - leal 2(%esi),%esi - je 6f - shll $16,%ecx -5: movb (%esi),%cl -6: addl %ecx,%eax - adcl $0, %eax -7: - popl %ebx - popl %esi - RET - -#else - -/* Version for PentiumII/PPro */ - -csum_partial: - pushl %esi - pushl %ebx - movl 20(%esp),%eax # Function arg: unsigned int sum - movl 16(%esp),%ecx # Function arg: int len - movl 12(%esp),%esi # Function arg: const unsigned char *buf - - testl $2, %esi - jnz 30f -10: - movl %ecx, %edx - movl %ecx, %ebx - andl $0x7c, %ebx - shrl $7, %ecx - addl %ebx,%esi - shrl $2, %ebx - negl %ebx - lea 45f(%ebx,%ebx,2), %ebx - testl %esi, %esi - jmp *%ebx - - # Handle 2-byte-aligned regions -20: addw (%esi), %ax - lea 2(%esi), %esi - adcl $0, %eax - jmp 10b - -30: subl $2, %ecx - ja 20b - je 32f - movzbl (%esi),%ebx # csumming 1 byte, 2-aligned - addl %ebx, %eax - adcl $0, %eax - jmp 80f -32: - addw (%esi), %ax # csumming 2 bytes, 2-aligned - adcl $0, %eax - jmp 80f - -40: - addl -128(%esi), %eax - adcl -124(%esi), %eax - adcl -120(%esi), %eax - adcl -116(%esi), %eax - adcl -112(%esi), %eax - adcl -108(%esi), %eax - adcl -104(%esi), %eax - adcl -100(%esi), %eax - adcl -96(%esi), %eax - adcl -92(%esi), %eax - adcl -88(%esi), %eax - adcl -84(%esi), %eax - adcl -80(%esi), %eax - adcl -76(%esi), %eax - adcl -72(%esi), %eax - adcl -68(%esi), %eax - adcl -64(%esi), %eax - adcl -60(%esi), %eax - adcl -56(%esi), %eax - adcl -52(%esi), %eax - adcl -48(%esi), %eax - adcl -44(%esi), %eax - adcl -40(%esi), %eax - adcl -36(%esi), %eax - adcl -32(%esi), %eax - adcl -28(%esi), %eax - adcl -24(%esi), %eax - adcl -20(%esi), %eax - adcl -16(%esi), %eax - adcl -12(%esi), %eax - adcl -8(%esi), %eax - adcl -4(%esi), %eax -45: - lea 128(%esi), %esi - adcl $0, %eax - dec %ecx - jge 40b - movl %edx, %ecx -50: andl $3, %ecx - jz 80f - - # Handle the last 1-3 bytes without jumping - notl %ecx # 1->2, 2->1, 3->0, higher bits are masked - movl $0xffffff,%ebx # by the shll and shrl instructions - shll $3,%ecx - shrl %cl,%ebx - andl -128(%esi),%ebx # esi is 4-aligned so should be ok - addl %ebx,%eax - adcl $0,%eax -80: - popl %ebx - popl %esi - RET - -#endif - EXPORT_SYMBOL(csum_partial) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index a5488cc40f58..7d792077e5fd 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -71,6 +71,9 @@ config ARCH_HAS_ILOG2_U32 config ARCH_HAS_ILOG2_U64 def_bool n +config ARCH_MTD_XIP + def_bool y + config NO_IOPORT_MAP def_bool n diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index 0e1bb6f019d6..3f5ffae89b58 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -52,4 +52,13 @@ #define XTENSA_STACK_ALIGNMENT 16 #endif +#ifndef XCHAL_HW_MIN_VERSION +#if defined(XCHAL_HW_MIN_VERSION_MAJOR) && defined(XCHAL_HW_MIN_VERSION_MINOR) +#define XCHAL_HW_MIN_VERSION (XCHAL_HW_MIN_VERSION_MAJOR * 100 + \ + XCHAL_HW_MIN_VERSION_MINOR) +#else +#define XCHAL_HW_MIN_VERSION 0 +#endif +#endif + #endif diff --git a/arch/xtensa/include/asm/mtd-xip.h b/arch/xtensa/include/asm/mtd-xip.h new file mode 100644 index 000000000000..514325155cf8 --- /dev/null +++ b/arch/xtensa/include/asm/mtd-xip.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_MTD_XIP_H +#define _ASM_MTD_XIP_H + +#include <asm/processor.h> + +#define xip_irqpending() (xtensa_get_sr(interrupt) & xtensa_get_sr(intenable)) +#define xip_currtime() (xtensa_get_sr(ccount)) +#define xip_elapsed_since(x) ((xtensa_get_sr(ccount) - (x)) / 1000) /* should work up to 1GHz */ +#define xip_cpu_idle() do { asm volatile ("waiti 0"); } while (0) + +#endif /* _ASM_MTD_XIP_H */ + diff --git a/arch/xtensa/include/asm/sections.h b/arch/xtensa/include/asm/sections.h index 3bc6b9afa993..e5da6d7092be 100644 --- a/arch/xtensa/include/asm/sections.h +++ b/arch/xtensa/include/asm/sections.h @@ -34,6 +34,10 @@ extern char _SecondaryResetVector_text_start[]; extern char _SecondaryResetVector_text_end[]; #endif #ifdef CONFIG_XIP_KERNEL +#ifdef CONFIG_VECTORS_ADDR +extern char _xip_text_start[]; +extern char _xip_text_end[]; +#endif extern char _xip_start[]; extern char _xip_end[]; #endif diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index a0d05c8598d0..183618090d05 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -13,17 +13,26 @@ #include <linux/perf_event.h> #include <linux/platform_device.h> +#include <asm/core.h> #include <asm/processor.h> #include <asm/stacktrace.h> +#define XTENSA_HWVERSION_RG_2015_0 260000 + +#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0 +#define XTENSA_PMU_ERI_BASE 0x00101000 +#else +#define XTENSA_PMU_ERI_BASE 0x00001000 +#endif + /* Global control/status for all perf counters */ -#define XTENSA_PMU_PMG 0x1000 +#define XTENSA_PMU_PMG XTENSA_PMU_ERI_BASE /* Perf counter values */ -#define XTENSA_PMU_PM(i) (0x1080 + (i) * 4) +#define XTENSA_PMU_PM(i) (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4) /* Perf counter control registers */ -#define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4) +#define XTENSA_PMU_PMCTRL(i) (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4) /* Perf counter status registers */ -#define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4) +#define XTENSA_PMU_PMSTAT(i) (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4) #define XTENSA_PMU_PMG_PMEN 0x1 diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index aba3ff4e60d8..52d6e4870a04 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -311,6 +311,9 @@ void __init setup_arch(char **cmdline_p) mem_reserve(__pa(_stext), __pa(_end)); #ifdef CONFIG_XIP_KERNEL +#ifdef CONFIG_VECTORS_ADDR + mem_reserve(__pa(_xip_text_start), __pa(_xip_text_end)); +#endif mem_reserve(__pa(_xip_start), __pa(_xip_end)); #endif diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index c14fd96f459d..f47e9bbbd291 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -118,6 +118,7 @@ SECTIONS SECTION_VECTOR2 (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR) *(.exception.text) + *(.xiptext) #endif IRQENTRY_TEXT @@ -201,6 +202,9 @@ SECTIONS .DebugInterruptVector.text); RELOCATE_ENTRY(_exception_text, .exception.text); +#ifdef CONFIG_XIP_KERNEL + RELOCATE_ENTRY(_xip_text, .xiptext); +#endif #endif #ifdef CONFIG_XIP_KERNEL RELOCATE_ENTRY(_xip_data, .data); @@ -319,7 +323,12 @@ SECTIONS LAST) #undef LAST #define LAST .exception.text - + SECTION_VECTOR4 (_xip_text, + .xiptext, + , + LAST) +#undef LAST +#define LAST .xiptext #endif . = (LOADADDR(LAST) + SIZEOF(LAST) + 3) & ~ 3; diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 10b79d3c74e0..7d1f8b398a46 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -52,8 +52,7 @@ static void rs_close(struct tty_struct *tty, struct file * filp) } -static int rs_write(struct tty_struct * tty, - const unsigned char *buf, int count) +static ssize_t rs_write(struct tty_struct * tty, const u8 *buf, size_t count) { /* see drivers/char/serialX.c to reference original version */ @@ -82,32 +81,12 @@ static void rs_poll(struct timer_list *unused) mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE); } - -static int rs_put_char(struct tty_struct *tty, unsigned char ch) -{ - return rs_write(tty, &ch, 1); -} - -static void rs_flush_chars(struct tty_struct *tty) -{ -} - static unsigned int rs_write_room(struct tty_struct *tty) { /* Let's say iss can always accept 2K characters.. */ return 2 * 1024; } -static void rs_hangup(struct tty_struct *tty) -{ - /* Stub, once again.. */ -} - -static void rs_wait_until_sent(struct tty_struct *tty, int timeout) -{ - /* Stub, once again.. */ -} - static int rs_proc_show(struct seq_file *m, void *v) { seq_printf(m, "serinfo:1.0 driver:0.1\n"); @@ -118,11 +97,7 @@ static const struct tty_operations serial_ops = { .open = rs_open, .close = rs_close, .write = rs_write, - .put_char = rs_put_char, - .flush_chars = rs_flush_chars, .write_room = rs_write_room, - .hangup = rs_hangup, - .wait_until_sent = rs_wait_until_sent, .proc_show = rs_proc_show, }; |