437 files changed, 19350 insertions, 3336 deletions
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 264e266a85bf..c3af3f324c5a 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
 #define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index f599064dd8dc..bdf5f10f8b9f 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -48,6 +48,24 @@ enum perf_event_powerpc_regs {
 	PERF_REG_POWERPC_DSISR,
 	PERF_REG_POWERPC_SIER,
 	PERF_REG_POWERPC_MMCRA,
-	PERF_REG_POWERPC_MAX,
+	/* Extended registers */
+	PERF_REG_POWERPC_MMCR0,
+	PERF_REG_POWERPC_MMCR1,
+	PERF_REG_POWERPC_MMCR2,
+	PERF_REG_POWERPC_MMCR3,
+	PERF_REG_POWERPC_SIER2,
+	PERF_REG_POWERPC_SIER3,
+	/* Max regs without the extended regs */
+	PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
 };
+
+#define PERF_REG_PMU_MASK	((1ULL << PERF_REG_POWERPC_MAX) - 1)
+
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
+#define PERF_REG_PMU_MASK_300   (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
+#define PERF_REG_PMU_MASK_31   (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
+
+#define PERF_REG_MAX_ISA_300   (PERF_REG_POWERPC_MMCR2 + 1)
+#define PERF_REG_MAX_ISA_31    (PERF_REG_POWERPC_SIER3 + 1)
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h
index 0e2eeeb1fd27..f506cca520b0 100644
--- a/tools/arch/riscv/include/uapi/asm/unistd.h
+++ b/tools/arch/riscv/include/uapi/asm/unistd.h
@@ -12,7 +12,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
 #ifdef __LP64__
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 436ec7636927..7a6b14874d65 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_GSCB   (1UL << 9)
 #define KVM_SYNC_BPBC   (1UL << 10)
 #define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
 
 #define KVM_SYNC_S390_VALID_FIELDS \
 	(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
 	 KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
-	 KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+	 KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+	 KVM_SYNC_DIAG318)
 
 /* length and alignment of the sdnx as a power of two */
 #define SDNXC 8
@@ -264,7 +266,8 @@ struct kvm_sync_regs {
 	__u8 reserved2 : 7;
 	__u8 padding1[51];	/* riccb needs to be 64byte aligned */
 	__u8 riccb[64];		/* runtime instrumentation controls block */
-	__u8 padding2[192];	/* sdnx needs to be 256byte aligned */
+	__u64 diag318;		/* diagnose 0x318 info */
+	__u8 padding2[184];	/* sdnx needs to be 256byte aligned */
 	union {
 		__u8 sdnx[SDNXL];  /* state description annex */
 		struct {
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 02dabc9e77b0..2901d5df4366 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -96,6 +96,7 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
+/* free					( 3*32+17) */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -107,6 +108,7 @@
 #define X86_FEATURE_EXTD_APICID		( 3*32+26) /* Extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM		( 3*32+27) /* AMD multi-node processor */
 #define X86_FEATURE_APERFMPERF		( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+/* free					( 3*32+29) */
 #define X86_FEATURE_NONSTOP_TSC_S3	( 3*32+30) /* TSC doesn't stop in S3 state */
 #define X86_FEATURE_TSC_KNOWN_FREQ	( 3*32+31) /* TSC has known frequency */
 
@@ -365,7 +367,9 @@
 #define X86_FEATURE_SRBDS_CTRL		(18*32+ 9) /* "" SRBDS mitigation MSR available */
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index 2ccd588fbad4..000000000000
--- a/tools/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index e8370e64a155..2859ee4f39a8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -149,6 +149,10 @@
 
 #define MSR_LBR_SELECT			0x000001c8
 #define MSR_LBR_TOS			0x000001c9
+
+#define MSR_IA32_POWER_CTL		0x000001fc
+#define MSR_IA32_POWER_CTL_BIT_EE	19
+
 #define MSR_LBR_NHM_FROM		0x00000680
 #define MSR_LBR_NHM_TO			0x000006c0
 #define MSR_LBR_CORE_FROM		0x00000040
@@ -158,7 +162,23 @@
 #define LBR_INFO_MISPRED		BIT_ULL(63)
 #define LBR_INFO_IN_TX			BIT_ULL(62)
 #define LBR_INFO_ABORT			BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID		BIT_ULL(60)
 #define LBR_INFO_CYCLES			0xffff
+#define LBR_INFO_BR_TYPE_OFFSET		56
+#define LBR_INFO_BR_TYPE		(0xfull << LBR_INFO_BR_TYPE_OFFSET)
+
+#define MSR_ARCH_LBR_CTL		0x000014ce
+#define ARCH_LBR_CTL_LBREN		BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET		1
+#define ARCH_LBR_CTL_CPL		(0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET	3
+#define ARCH_LBR_CTL_STACK		(0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET	16
+#define ARCH_LBR_CTL_FILTER		(0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH		0x000014cf
+#define MSR_ARCH_LBR_FROM_0		0x00001500
+#define MSR_ARCH_LBR_TO_0		0x00001600
+#define MSR_ARCH_LBR_INFO_0		0x00001200
 
 #define MSR_IA32_PEBS_ENABLE		0x000003f1
 #define MSR_PEBS_DATA_CFG		0x000003f2
@@ -253,8 +273,6 @@
 
 #define MSR_PEBS_FRONTEND		0x000003f7
 
-#define MSR_IA32_POWER_CTL		0x000001fc
-
 #define MSR_IA32_MC0_CTL		0x00000400
 #define MSR_IA32_MC0_STATUS		0x00000401
 #define MSR_IA32_MC0_ADDR		0x00000402
@@ -418,7 +436,6 @@
 #define MSR_AMD64_PATCH_LEVEL		0x0000008b
 #define MSR_AMD64_TSC_RATIO		0xc0000104
 #define MSR_AMD64_NB_CFG		0xc001001f
-#define MSR_AMD64_CPUID_FN_1		0xc0011004
 #define MSR_AMD64_PATCH_LOADER		0xc0010020
 #define MSR_AMD_PERF_CTL		0xc0010062
 #define MSR_AMD_PERF_STATUS		0xc0010063
@@ -427,6 +444,7 @@
 #define MSR_AMD64_OSVW_STATUS		0xc0010141
 #define MSR_AMD_PPIN_CTL		0xc00102f0
 #define MSR_AMD_PPIN			0xc00102f1
+#define MSR_AMD64_CPUID_FN_1		0xc0011004
 #define MSR_AMD64_LS_CFG		0xc0011020
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define MSR_AMD64_BU_CFG2		0xc001102a
@@ -466,6 +484,8 @@
 #define MSR_F16H_DR0_ADDR_MASK		0xc0011027
 
 /* Fam 15h MSRs */
+#define MSR_F15H_CU_PWR_ACCUMULATOR     0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
 #define MSR_F15H_PERF_CTL		0xc0010200
 #define MSR_F15H_PERF_CTL0		MSR_F15H_PERF_CTL
 #define MSR_F15H_PERF_CTL1		(MSR_F15H_PERF_CTL + 2)
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index d25534940bde..fdbffec4cfde 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -39,27 +39,6 @@
 #define ORC_REG_SP_INDIRECT		9
 #define ORC_REG_MAX			15
 
-/*
- * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
- * caller's SP right before it made the call).  Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
- * to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
- * points to the iret return frame.
- *
- * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
- * aren't used in struct orc_entry due to size and complexity constraints.
- * Objtool converts them to real types when it converts the hints to orc
- * entries.
- */
-#define ORC_TYPE_CALL			0
-#define ORC_TYPE_REGS			1
-#define ORC_TYPE_REGS_IRET		2
-#define UNWIND_HINT_TYPE_RET_OFFSET	3
-
 #ifndef __ASSEMBLY__
 /*
  * This struct is more or less a vastly simplified version of the DWARF Call
@@ -78,19 +57,6 @@ struct orc_entry {
 	unsigned	end:1;
 } __packed;
 
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack for the ORC unwinder.
- *
- * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
- */
-struct unwind_hint {
-	u32		ip;
-	s16		sp_offset;
-	u8		sp_reg;
-	u8		type;
-	u8		end;
-};
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 45f8e1b02241..0b5b8ae56bd9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 
@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
 SYM_FUNC_END(memcpy_orig)
 
 .popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
-	cmpl $8, %edx
-	/* Less than 8 bytes? Go to byte copy loop */
-	jb .L_no_whole_words
-
-	/* Check for bad alignment of source */
-	testl $7, %esi
-	/* Already aligned */
-	jz .L_8byte_aligned
-
-	/* Copy one byte at a time until source is 8-byte aligned */
-	movl %esi, %ecx
-	andl $7, %ecx
-	subl $8, %ecx
-	negl %ecx
-	subl %ecx, %edx
-.L_read_leading_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
-	movl %edx, %ecx
-	andl $7, %edx
-	shrl $3, %ecx
-	jz .L_no_whole_words
-
-.L_read_words:
-	movq (%rsi), %r8
-	MCSAFE_TEST_SRC %rsi 8 .E_read_words
-	MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
-	movq %r8, (%rdi)
-	addq $8, %rsi
-	addq $8, %rdi
-	decl %ecx
-	jnz .L_read_words
-
-	/* Any trailing bytes? */
-.L_no_whole_words:
-	andl %edx, %edx
-	jz .L_done_memcpy_trap
-
-	/* Copy trailing bytes */
-	movl %edx, %ecx
-.L_read_trailing_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_trailing_bytes
-
-	/* Copy successful. Return zero */
-.L_done_memcpy_trap:
-	xorl %eax, %eax
-.L_done:
-	ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
-	.section .fixup, "ax"
-	/*
-	 * Return number of bytes not copied for any failure. Note that
-	 * there is no "tail" handling since the source buffer is 8-byte
-	 * aligned and poison is cacheline aligned.
-	 */
-.E_read_words:
-	shll	$3, %ecx
-.E_leading_bytes:
-	addl	%edx, %ecx
-.E_trailing_bytes:
-	mov	%ecx, %eax
-	jmp	.L_done
-
-	/*
-	 * For write fault handling, given the destination is unaligned,
-	 * we handle faults on multi-byte writes with a byte-by-byte
-	 * copy up to the write-protected page.
-	 */
-.E_write_words:
-	shll	$3, %ecx
-	addl	%edx, %ecx
-	movl	%ecx, %edx
-	jmp mcsafe_handle_tail
-
-	.previous
-
-	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
-	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
-	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE(.L_write_words, .E_write_words)
-	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index a42015b305f4..af38469afd14 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -362,6 +362,9 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 END {
 	if (awkchecked != "")
 		exit 1
+
+	print "#ifndef __BOOT_COMPRESSED\n"
+
 	# print escape opcode map's array
 	print "/* Escape opcode map array */"
 	print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
@@ -388,6 +391,51 @@ END {
 		for (j = 0; j < max_lprefix; j++)
 			if (atable[i,j])
 				print "	["i"]["j"] = "atable[i,j]","
-	print "};"
+	print "};\n"
+
+	print "#else /* !__BOOT_COMPRESSED */\n"
+
+	print "/* Escape opcode map array */"
+	print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
+	      "[INAT_LSTPFX_MAX + 1];"
+	print ""
+
+	print "/* Group opcode map array */"
+	print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
+	      "[INAT_LSTPFX_MAX + 1];"
+	print ""
+
+	print "/* AVX opcode map array */"
+	print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
+	      "[INAT_LSTPFX_MAX + 1];"
+	print ""
+
+	print "static void inat_init_tables(void)"
+	print "{"
+
+	# print escape opcode map's array
+	print "\t/* Print Escape opcode map array */"
+	for (i = 0; i < geid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (etable[i,j])
+				print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
+	print ""
+
+	# print group opcode map's array
+	print "\t/* Print Group opcode map array */"
+	for (i = 0; i < ggid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (gtable[i,j])
+				print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
+	print ""
+	# print AVX opcode map's array
+	print "\t/* Print AVX opcode map array */"
+	for (i = 0; i < gaid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (atable[i,j])
+				print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+
+	print "}"
+	print "#endif"
 }
 
diff --git a/tools/bootconfig/samples/bad-override.bconf b/tools/bootconfig/samples/bad-override.bconf
new file mode 100644
index 000000000000..fde6c561512e
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override.bconf
@@ -0,0 +1,3 @@
+key.subkey = value
+# We can not override pre-defined subkeys with value
+key := value
diff --git a/tools/bootconfig/samples/bad-override2.bconf b/tools/bootconfig/samples/bad-override2.bconf
new file mode 100644
index 000000000000..688587cb023c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override2.bconf
@@ -0,0 +1,3 @@
+key = value
+# We can not override pre-defined value with subkey
+key.subkey := value
diff --git a/tools/bootconfig/samples/good-override.bconf b/tools/bootconfig/samples/good-override.bconf
new file mode 100644
index 000000000000..7d31d5f8fbd8
--- /dev/null
+++ b/tools/bootconfig/samples/good-override.bconf
@@ -0,0 +1,6 @@
+# Override the value
+key.word = 1,2,4
+key.word := 2,3
+
+# No pre-defined key
+key.new.word := "new"
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 3c2ab9e75730..d295e406a756 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -117,6 +117,19 @@ xpass grep -q "bar" $OUTFILE
 xpass grep -q "baz" $OUTFILE
 xpass grep -q "qux" $OUTFILE
 
+echo "Override same-key values"
+cat > $TEMPCONF << EOF
+key = bar, baz
+key := qux
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q "bar" $OUTFILE
+xfail grep -q "baz" $OUTFILE
+xpass grep -q "qux" $OUTFILE
+
 echo "Double/single quotes test"
 echo "key = '\"string\"';" > $TEMPCONF
 $BOOTCONF -a $TEMPCONF $INITRD
@@ -124,6 +137,31 @@ $BOOTCONF $INITRD > $TEMPCONF
 cat $TEMPCONF
 xpass grep \'\"string\"\' $TEMPCONF
 
+echo "Repeat same-key tree"
+cat > $TEMPCONF << EOF
+foo
+bar
+foo { buz }
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xpass grep -q "bar" $OUTFILE
+
+
+echo "Remove/keep tailing spaces"
+cat > $TEMPCONF << EOF
+foo = val     # comment
+bar = "val2 " # comment
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q val[[:space:]] $OUTFILE
+xpass grep -q val2[[:space:]] $OUTFILE
+
 echo "=== expected failure cases ==="
 for i in samples/bad-* ; do
   xfail $BOOTCONF -a $i $INITRD
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 0a6d09a3e91f..39bb322707b4 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args
 FEATURE_DISPLAY = libbfd disassembler-four-args
 
 check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
 ifdef MAKECMDGOALS
 ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
   check_feat := 0
@@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
 $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
 $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
 
-clean: bpftool_clean runqslower_clean
+clean: bpftool_clean runqslower_clean resolve_btfids_clean
 	$(call QUIET_CLEAN, bpf-progs)
 	$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
 	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8462690a039b..4828913703b6 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -25,7 +25,7 @@ endif
 
 LIBBPF = $(LIBBPF_PATH)libbpf.a
 
-BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
 $(LIBBPF): FORCE
 	$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index ede162f83eea..0e9310727281 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -67,7 +67,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
 	if (!info->btf_id || !info->nr_func_info ||
 	    btf__get_from_id(info->btf_id, &prog_btf))
 		goto print;
-	finfo = (struct bpf_func_info *)info->func_info;
+	finfo = u64_to_ptr(info->func_info);
 	func_type = btf__type_by_id(prog_btf, finfo->type_id);
 	if (!func_type || !btf_is_func(func_type))
 		goto print;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 8a4c2b3b0cd6..f61184653633 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -143,6 +143,20 @@ static int codegen_datasec_def(struct bpf_object *obj,
 			      var_name, align);
 			return -EINVAL;
 		}
+		/* Assume 32-bit architectures when generating data section
+		 * struct memory layout. Given bpftool can't know which target
+		 * host architecture it's emitting skeleton for, we need to be
+		 * conservative and assume 32-bit one to ensure enough padding
+		 * bytes are generated for pointer and long types. This will
+		 * still work correctly for 64-bit architectures, because in
+		 * the worst case we'll generate unnecessary padding field,
+		 * which on 64-bit architectures is not strictly necessary and
+		 * would be handled by natural 8-byte alignment. But it still
+		 * will be a correct memory layout, based on recorded offsets
+		 * in BTF.
+		 */
+		if (align > 4)
+			align = 4;
 
 		align_off = (off + align - 1) / align * align;
 		if (align_off != need_off) {
@@ -397,7 +411,7 @@ static int do_skeleton(int argc, char **argv)
 		{							    \n\
 			struct %1$s *obj;				    \n\
 									    \n\
-			obj = (typeof(obj))calloc(1, sizeof(*obj));	    \n\
+			obj = (struct %1$s *)calloc(1, sizeof(*obj));	    \n\
 			if (!obj)					    \n\
 				return NULL;				    \n\
 			if (%1$s__create_skeleton(obj))			    \n\
@@ -461,7 +475,7 @@ static int do_skeleton(int argc, char **argv)
 		{							    \n\
 			struct bpf_object_skeleton *s;			    \n\
 									    \n\
-			s = (typeof(s))calloc(1, sizeof(*s));		    \n\
+			s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
 			if (!s)						    \n\
 				return -1;				    \n\
 			obj->skeleton = s;				    \n\
@@ -479,7 +493,7 @@ static int do_skeleton(int argc, char **argv)
 				/* maps */				    \n\
 				s->map_cnt = %zu;			    \n\
 				s->map_skel_sz = sizeof(*s->maps);	    \n\
-				s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\
+				s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
 				if (!s->maps)				    \n\
 					goto err;			    \n\
 			",
@@ -515,7 +529,7 @@ static int do_skeleton(int argc, char **argv)
 				/* programs */				    \n\
 				s->prog_cnt = %zu;			    \n\
 				s->prog_skel_sz = sizeof(*s->progs);	    \n\
-				s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\
+				s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
 				if (!s->progs)				    \n\
 					goto err;			    \n\
 			",
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index c9dba7543dba..3b1aad7535dd 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -11,6 +11,7 @@
 static int do_pin(int argc, char **argv)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
+	union bpf_iter_link_info linfo;
 	const char *objfile, *path;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
@@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv)
 			map_fd = map_parse_fd(&argc, &argv);
 			if (map_fd < 0)
 				return -1;
+
+			memset(&linfo, 0, sizeof(linfo));
+			linfo.map.map_fd = map_fd;
+			iter_opts.link_info = &linfo;
+			iter_opts.link_info_len = sizeof(linfo);
 		}
 	}
 
@@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv)
 		goto close_obj;
 	}
 
-	if (map_fd >= 0)
-		iter_opts.map_fd = map_fd;
-
 	link = bpf_program__attach_iter(prog, &iter_opts);
 	if (IS_ERR(link)) {
 		err = PTR_ERR(link);
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 1b793759170e..a89f09e3c848 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -106,7 +106,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 	switch (info->type) {
 	case BPF_LINK_TYPE_RAW_TRACEPOINT:
 		jsonw_string_field(json_wtr, "tp_name",
-				   (const char *)info->raw_tracepoint.tp_name);
+				   u64_to_ptr(info->raw_tracepoint.tp_name));
 		break;
 	case BPF_LINK_TYPE_TRACING:
 		err = get_prog_info(info->prog_id, &prog_info);
@@ -185,7 +185,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 	switch (info->type) {
 	case BPF_LINK_TYPE_RAW_TRACEPOINT:
 		printf("\n\ttp '%s'  ",
-		       (const char *)info->raw_tracepoint.tp_name);
+		       (const char *)u64_to_ptr(info->raw_tracepoint.tp_name));
 		break;
 	case BPF_LINK_TYPE_TRACING:
 		err = get_prog_info(info->prog_id, &prog_info);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index e3a79b5a9960..c46e52137b87 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -21,7 +21,15 @@
 /* Make sure we do not use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
 
-#define ptr_to_u64(ptr)	((__u64)(unsigned long)(ptr))
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static inline void *u64_to_ptr(__u64 ptr)
+{
+	return (void *)(unsigned long)ptr;
+}
 
 #define NEXT_ARG()	({ argc--; argv++; if (argc < 0) usage(); })
 #define NEXT_ARGP()	({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index e3b116325403..df7d8ec76036 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -134,6 +134,8 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 	while (true) {
 		ret = read(fd, buf, sizeof(buf));
 		if (ret < 0) {
+			if (errno == EAGAIN)
+				continue;
 			err = -errno;
 			p_err("failed to read PID iterator output: %d", err);
 			goto out;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 158995d853b0..d393eb8263a6 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -428,14 +428,14 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 			p_info("no instructions returned");
 			return -1;
 		}
-		buf = (unsigned char *)(info->jited_prog_insns);
+		buf = u64_to_ptr(info->jited_prog_insns);
 		member_len = info->jited_prog_len;
 	} else {	/* DUMP_XLATED */
 		if (info->xlated_prog_len == 0 || !info->xlated_prog_insns) {
 			p_err("error retrieving insn dump: kernel.kptr_restrict set?");
 			return -1;
 		}
-		buf = (unsigned char *)info->xlated_prog_insns;
+		buf = u64_to_ptr(info->xlated_prog_insns);
 		member_len = info->xlated_prog_len;
 	}
 
@@ -444,7 +444,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 		return -1;
 	}
 
-	func_info = (void *)info->func_info;
+	func_info = u64_to_ptr(info->func_info);
 
 	if (info->nr_line_info) {
 		prog_linfo = bpf_prog_linfo__new(info);
@@ -462,7 +462,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 
 		n = write(fd, buf, member_len);
 		close(fd);
-		if (n != member_len) {
+		if (n != (ssize_t)member_len) {
 			p_err("error writing output file: %s",
 			      n < 0 ? strerror(errno) : "short write");
 			return -1;
@@ -492,13 +492,13 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 			__u32 i;
 			if (info->nr_jited_ksyms) {
 				kernel_syms_load(&dd);
-				ksyms = (__u64 *) info->jited_ksyms;
+				ksyms = u64_to_ptr(info->jited_ksyms);
 			}
 
 			if (json_output)
 				jsonw_start_array(json_wtr);
 
-			lens = (__u32 *) info->jited_func_lens;
+			lens = u64_to_ptr(info->jited_func_lens);
 			for (i = 0; i < info->nr_jited_func_lens; i++) {
 				if (ksyms) {
 					sym = kernel_syms_search(&dd, ksyms[i]);
@@ -559,7 +559,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 	} else {
 		kernel_syms_load(&dd);
 		dd.nr_jited_ksyms = info->nr_jited_ksyms;
-		dd.jited_ksyms = (__u64 *) info->jited_ksyms;
+		dd.jited_ksyms = u64_to_ptr(info->jited_ksyms);
 		dd.btf = btf;
 		dd.func_info = func_info;
 		dd.finfo_rec_size = info->func_info_rec_size;
@@ -1681,7 +1681,7 @@ static char *profile_target_name(int tgt_fd)
 		goto out;
 	}
 
-	func_info = (struct bpf_func_info *)(info_linear->info.func_info);
+	func_info = u64_to_ptr(info_linear->info.func_info);
 	t = btf__type_by_id(btf, func_info[0].type_id);
 	if (!t) {
 		p_err("btf %d doesn't have type %d",
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index a88cd4426398..fe8eb537688b 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -80,6 +80,7 @@ libbpf-clean:
 clean: libsubcmd-clean libbpf-clean fixdep-clean
 	$(call msg,CLEAN,$(BINARY))
 	$(Q)$(RM) -f $(BINARY); \
+	$(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \
 	find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
 
 tags:
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 52d883325a23..0def0bb1f783 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -233,6 +233,39 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
 	return btf_id__add(root, id, false);
 }
 
+/*
+ * The data of compressed section should be aligned to 4
+ * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
+ * sets sh_addralign to 1, which makes libelf fail with
+ * misaligned section error during the update:
+ *    FAILED elf_update(WRITE): invalid section alignment
+ *
+ * While waiting for ld fix, we fix the compressed sections
+ * sh_addralign value manualy.
+ */
+static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
+{
+	int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8;
+
+	if (!(sh->sh_flags & SHF_COMPRESSED))
+		return 0;
+
+	if (sh->sh_addralign == expected)
+		return 0;
+
+	pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n",
+		  sh->sh_addralign, expected);
+
+	sh->sh_addralign = expected;
+
+	if (gelf_update_shdr(scn, sh) == 0) {
+		printf("FAILED cannot update section header: %s\n",
+			elf_errmsg(-1));
+		return -1;
+	}
+	return 0;
+}
+
 static int elf_collect(struct object *obj)
 {
 	Elf_Scn *scn = NULL;
@@ -309,6 +342,9 @@ static int elf_collect(struct object *obj)
 			obj->efile.idlist_shndx = idx;
 			obj->efile.idlist_addr  = sh.sh_addr;
 		}
+
+		if (compressed_section_fix(elf, scn, &sh))
+			return -1;
 	}
 
 	return 0;
@@ -566,6 +602,7 @@ static int sets_patch(struct object *obj)
 
 		next = rb_next(next);
 	}
+	return 0;
 }
 
 static int symbols_patch(struct object *obj)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index cb152370fdef..c1daf4d57518 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -8,7 +8,7 @@ endif
 
 feature_check = $(eval $(feature_check_code))
 define feature_check_code
-  feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+  feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
 endef
 
 feature_set = $(eval $(feature_set_code))
@@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA :=                  \
          llvm-version                   \
          clang                          \
          libbpf                         \
-         libpfm4
+         libpfm4                        \
+         libdebuginfod
 
 FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
 
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 88371f7f0369..d220fe952747 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -26,6 +26,7 @@ FILES=                                          \
          test-libelf-gelf_getnote.bin           \
          test-libelf-getshdrstrndx.bin          \
          test-libelf-mmap.bin                   \
+         test-libdebuginfod.bin                 \
          test-libnuma.bin                       \
          test-numa_num_possible_cpus.bin        \
          test-libperl.bin                       \
@@ -74,8 +75,6 @@ FILES=                                          \
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
-CC ?= $(CROSS_COMPILE)gcc
-CXX ?= $(CROSS_COMPILE)g++
 PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
 LLVM_CONFIG ?= llvm-config
 CLANG ?= clang
@@ -159,6 +158,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin:
 $(OUTPUT)test-libelf-getshdrstrndx.bin:
 	$(BUILD) -lelf
 
+$(OUTPUT)test-libdebuginfod.bin:
+	$(BUILD) -ldebuginfod
+
 $(OUTPUT)test-libnuma.bin:
 	$(BUILD) -lnuma
 
diff --git a/tools/build/feature/test-libdebuginfod.c b/tools/build/feature/test-libdebuginfod.c
new file mode 100644
index 000000000000..da22548b8413
--- /dev/null
+++ b/tools/build/feature/test-libdebuginfod.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/debuginfod.h>
+
+int main(void)
+{
+	debuginfod_client* c = debuginfod_begin();
+	return (long)c;
+}
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index f4699f9b46ba..c4ff907c078b 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -45,8 +45,7 @@ except:
     err('The kernel does not have iocost enabled')
 
 IOC_RUNNING     = prog['IOC_RUNNING'].value_()
-NR_USAGE_SLOTS  = prog['NR_USAGE_SLOTS'].value_()
-HWEIGHT_WHOLE   = prog['HWEIGHT_WHOLE'].value_()
+WEIGHT_ONE      = prog['WEIGHT_ONE'].value_()
 VTIME_PER_SEC   = prog['VTIME_PER_SEC'].value_()
 VTIME_PER_USEC  = prog['VTIME_PER_USEC'].value_()
 AUTOP_SSD_FAST  = prog['AUTOP_SSD_FAST'].value_()
@@ -100,7 +99,7 @@ class IocStat:
         self.period_ms = ioc.period_us.value_() / 1_000
         self.period_at = ioc.period_at.value_() / 1_000_000
         self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC
-        self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC
+        self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC
         self.busy_level = ioc.busy_level.value_()
         self.autop_idx = ioc.autop_idx.value_()
         self.user_cost_model = ioc.user_cost_model.value_()
@@ -136,7 +135,7 @@ class IocStat:
 
     def table_header_str(self):
         return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \
-               f'{"dbt":>3} {"delay":>6} {"usages%"}'
+               f'{"debt":>7} {"delay":>7} {"usage%"}'
 
 class IocgStat:
     def __init__(self, iocg):
@@ -144,11 +143,11 @@ class IocgStat:
         blkg = iocg.pd.blkg
 
         self.is_active = not list_empty(iocg.active_list.address_of_())
-        self.weight = iocg.weight.value_()
-        self.active = iocg.active.value_()
-        self.inuse = iocg.inuse.value_()
-        self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE
-        self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE
+        self.weight = iocg.weight.value_() / WEIGHT_ONE
+        self.active = iocg.active.value_() / WEIGHT_ONE
+        self.inuse = iocg.inuse.value_() / WEIGHT_ONE
+        self.hwa_pct = iocg.hweight_active.value_() * 100 / WEIGHT_ONE
+        self.hwi_pct = iocg.hweight_inuse.value_() * 100 / WEIGHT_ONE
         self.address = iocg.value_()
 
         vdone = iocg.done_vtime.counter.value_()
@@ -160,23 +159,13 @@ class IocgStat:
         else:
             self.inflight_pct = 0
 
-        # vdebt used to be an atomic64_t and is now u64, support both
-        try:
-            self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
-        except:
-            self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
-
-        self.use_delay = blkg.use_delay.counter.value_()
-        self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
-
-        usage_idx = iocg.usage_idx.value_()
-        self.usages = []
-        self.usage = 0
-        for i in range(NR_USAGE_SLOTS):
-            usage = iocg.usages[(usage_idx + 1 + i) % NR_USAGE_SLOTS].value_()
-            upct = usage * 100 / HWEIGHT_WHOLE
-            self.usages.append(upct)
-            self.usage = max(self.usage, upct)
+        self.usage = (100 * iocg.usage_delta_us.value_() /
+                      ioc.period_us.value_()) if self.active else 0
+        self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+        if blkg.use_delay.counter.value_() != 0:
+            self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
+        else:
+            self.delay_ms = 0
 
     def dict(self, now, path):
         out = { 'cgroup'                : path,
@@ -189,25 +178,20 @@ class IocgStat:
                 'hweight_inuse_pct'     : self.hwi_pct,
                 'inflight_pct'          : self.inflight_pct,
                 'debt_ms'               : self.debt_ms,
-                'use_delay'             : self.use_delay,
                 'delay_ms'              : self.delay_ms,
                 'usage_pct'             : self.usage,
                 'address'               : self.address }
-        for i in range(len(self.usages)):
-            out[f'usage_pct_{i}'] = str(self.usages[i])
         return out
 
     def table_row_str(self, path):
         out = f'{path[-28:]:28} ' \
               f'{"*" if self.is_active else " "} ' \
-              f'{self.inuse:5}/{self.active:5} ' \
+              f'{round(self.inuse):5}/{round(self.active):5} ' \
               f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \
               f'{self.inflight_pct:6.2f} ' \
-              f'{min(math.ceil(self.debt_ms), 999):3} ' \
-              f'{min(self.use_delay, 99):2}*'\
-              f'{min(math.ceil(self.delay_ms), 999):03} '
-        for u in self.usages:
-            out += f'{min(round(u), 999):03d}:'
+              f'{self.debt_ms:7.2f} ' \
+              f'{self.delay_ms:7.2f} '\
+              f'{min(self.usage, 999):6.2f}'
         out = out.rstrip(':')
         return out
 
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
new file mode 100644
index 000000000000..c4225ed63565
--- /dev/null
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env drgn
+#
+# Copyright (C) 2020 Roman Gushchin <guro@fb.com>
+# Copyright (C) 2020 Facebook
+
+from os import stat
+import argparse
+import sys
+
+from drgn.helpers.linux import list_for_each_entry, list_empty
+from drgn.helpers.linux import for_each_page
+from drgn.helpers.linux.cpumask import for_each_online_cpu
+from drgn.helpers.linux.percpu import per_cpu_ptr
+from drgn import container_of, FaultError, Object
+
+
+DESC = """
+This is a drgn script to provide slab statistics for memory cgroups.
+It supports cgroup v2 and v1 and can emulate memory.kmem.slabinfo
+interface of cgroup v1.
+For drgn, visit https://github.com/osandov/drgn.
+"""
+
+
+MEMCGS = {}
+
+OO_SHIFT = 16
+OO_MASK = ((1 << OO_SHIFT) - 1)
+
+
+def err(s):
+    print('slabinfo.py: error: %s' % s, file=sys.stderr, flush=True)
+    sys.exit(1)
+
+
+def find_memcg_ids(css=prog['root_mem_cgroup'].css, prefix=''):
+    if not list_empty(css.children.address_of_()):
+        for css in list_for_each_entry('struct cgroup_subsys_state',
+                                       css.children.address_of_(),
+                                       'sibling'):
+            name = prefix + '/' + css.cgroup.kn.name.string_().decode('utf-8')
+            memcg = container_of(css, 'struct mem_cgroup', 'css')
+            MEMCGS[css.cgroup.kn.id.value_()] = memcg
+            find_memcg_ids(css, name)
+
+
+def is_root_cache(s):
+    try:
+        return False if s.memcg_params.root_cache else True
+    except AttributeError:
+        return True
+
+
+def cache_name(s):
+    if is_root_cache(s):
+        return s.name.string_().decode('utf-8')
+    else:
+        return s.memcg_params.root_cache.name.string_().decode('utf-8')
+
+
+# SLUB
+
+def oo_order(s):
+    return s.oo.x >> OO_SHIFT
+
+
+def oo_objects(s):
+    return s.oo.x & OO_MASK
+
+
+def count_partial(n, fn):
+    nr_pages = 0
+    for page in list_for_each_entry('struct page', n.partial.address_of_(),
+                                    'lru'):
+         nr_pages += fn(page)
+    return nr_pages
+
+
+def count_free(page):
+    return page.objects - page.inuse
+
+
+def slub_get_slabinfo(s, cfg):
+    nr_slabs = 0
+    nr_objs = 0
+    nr_free = 0
+
+    for node in range(cfg['nr_nodes']):
+        n = s.node[node]
+        nr_slabs += n.nr_slabs.counter.value_()
+        nr_objs += n.total_objects.counter.value_()
+        nr_free += count_partial(n, count_free)
+
+    return {'active_objs': nr_objs - nr_free,
+            'num_objs': nr_objs,
+            'active_slabs': nr_slabs,
+            'num_slabs': nr_slabs,
+            'objects_per_slab': oo_objects(s),
+            'cache_order': oo_order(s),
+            'limit': 0,
+            'batchcount': 0,
+            'shared': 0,
+            'shared_avail': 0}
+
+
+def cache_show(s, cfg, objs):
+    if cfg['allocator'] == 'SLUB':
+        sinfo = slub_get_slabinfo(s, cfg)
+    else:
+        err('SLAB isn\'t supported yet')
+
+    if cfg['shared_slab_pages']:
+        sinfo['active_objs'] = objs
+        sinfo['num_objs'] = objs
+
+    print('%-17s %6lu %6lu %6u %4u %4d'
+          ' : tunables %4u %4u %4u'
+          ' : slabdata %6lu %6lu %6lu' % (
+              cache_name(s), sinfo['active_objs'], sinfo['num_objs'],
+              s.size, sinfo['objects_per_slab'], 1 << sinfo['cache_order'],
+              sinfo['limit'], sinfo['batchcount'], sinfo['shared'],
+              sinfo['active_slabs'], sinfo['num_slabs'],
+              sinfo['shared_avail']))
+
+
+def detect_kernel_config():
+    cfg = {}
+
+    cfg['nr_nodes'] = prog['nr_online_nodes'].value_()
+
+    if prog.type('struct kmem_cache').members[1][1] == 'flags':
+        cfg['allocator'] = 'SLUB'
+    elif prog.type('struct kmem_cache').members[1][1] == 'batchcount':
+        cfg['allocator'] = 'SLAB'
+    else:
+        err('Can\'t determine the slab allocator')
+
+    cfg['shared_slab_pages'] = False
+    try:
+        if prog.type('struct obj_cgroup'):
+            cfg['shared_slab_pages'] = True
+    except:
+        pass
+
+    return cfg
+
+
+def for_each_slab_page(prog):
+    PGSlab = 1 << prog.constant('PG_slab')
+    PGHead = 1 << prog.constant('PG_head')
+
+    for page in for_each_page(prog):
+        try:
+            if page.flags.value_() & PGSlab:
+                yield page
+        except FaultError:
+            pass
+
+
+def main():
+    parser = argparse.ArgumentParser(description=DESC,
+                                     formatter_class=
+                                     argparse.RawTextHelpFormatter)
+    parser.add_argument('cgroup', metavar='CGROUP',
+                        help='Target memory cgroup')
+    args = parser.parse_args()
+
+    try:
+        cgroup_id = stat(args.cgroup).st_ino
+        find_memcg_ids()
+        memcg = MEMCGS[cgroup_id]
+    except KeyError:
+        err('Can\'t find the memory cgroup')
+
+    cfg = detect_kernel_config()
+
+    print('# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>'
+          ' : tunables <limit> <batchcount> <sharedfactor>'
+          ' : slabdata <active_slabs> <num_slabs> <sharedavail>')
+
+    if cfg['shared_slab_pages']:
+        obj_cgroups = set()
+        stats = {}
+        caches = {}
+
+        # find memcg pointers belonging to the specified cgroup
+        obj_cgroups.add(memcg.objcg.value_())
+        for ptr in list_for_each_entry('struct obj_cgroup',
+                                       memcg.objcg_list.address_of_(),
+                                       'list'):
+            obj_cgroups.add(ptr.value_())
+
+        # look over all slab pages, belonging to non-root memcgs
+        # and look for objects belonging to the given memory cgroup
+        for page in for_each_slab_page(prog):
+            objcg_vec_raw = page.obj_cgroups.value_()
+            if objcg_vec_raw == 0:
+                continue
+            cache = page.slab_cache
+            if not cache:
+                continue
+            addr = cache.value_()
+            caches[addr] = cache
+            # clear the lowest bit to get the true obj_cgroups
+            objcg_vec = Object(prog, page.obj_cgroups.type_,
+                               value=objcg_vec_raw & ~1)
+
+            if addr not in stats:
+                stats[addr] = 0
+
+            for i in range(oo_objects(cache)):
+                if objcg_vec[i].value_() in obj_cgroups:
+                    stats[addr] += 1
+
+        for addr in caches:
+            if stats[addr] > 0:
+                cache_show(caches[addr], cfg, stats[addr])
+
+    else:
+        for s in list_for_each_entry('struct kmem_cache',
+                                     memcg.kmem_caches.address_of_(),
+                                     'memcg_params.kmem_caches_node'):
+            cache_show(s, cfg, None)
+
+
+main()
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 1a303a81aeef..90c3155f05b1 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -23,17 +23,17 @@
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <linux/gpio.h>
+#include "gpio-utils.h"
 
 int monitor_device(const char *device_name,
-		   unsigned int line,
-		   uint32_t handleflags,
-		   uint32_t eventflags,
+		   unsigned int *lines,
+		   unsigned int num_lines,
+		   struct gpio_v2_line_config *config,
 		   unsigned int loops)
 {
-	struct gpioevent_request req;
-	struct gpiohandle_data data;
+	struct gpio_v2_line_values values;
 	char *chrdev_name;
-	int fd;
+	int cfd, lfd;
 	int ret;
 	int i = 0;
 
@@ -41,44 +41,55 @@ int monitor_device(const char *device_name,
 	if (ret < 0)
 		return -ENOMEM;
 
-	fd = open(chrdev_name, 0);
-	if (fd == -1) {
+	cfd = open(chrdev_name, 0);
+	if (cfd == -1) {
 		ret = -errno;
 		fprintf(stderr, "Failed to open %s\n", chrdev_name);
 		goto exit_free_name;
 	}
 
-	req.lineoffset = line;
-	req.handleflags = handleflags;
-	req.eventflags = eventflags;
-	strcpy(req.consumer_label, "gpio-event-mon");
-
-	ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req);
-	if (ret == -1) {
-		ret = -errno;
-		fprintf(stderr, "Failed to issue GET EVENT "
-			"IOCTL (%d)\n",
-			ret);
-		goto exit_close_error;
-	}
+	ret = gpiotools_request_line(device_name, lines, num_lines, config,
+				     "gpio-event-mon");
+	if (ret < 0)
+		goto exit_device_close;
+	else
+		lfd = ret;
 
 	/* Read initial states */
-	ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
-	if (ret == -1) {
-		ret = -errno;
-		fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
-			"VALUES IOCTL (%d)\n",
+	values.mask = 0;
+	values.bits = 0;
+	for (i = 0; i < num_lines; i++)
+		gpiotools_set_bit(&values.mask, i);
+	ret = gpiotools_get_values(lfd, &values);
+	if (ret < 0) {
+		fprintf(stderr,
+			"Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n",
 			ret);
-		goto exit_close_error;
+		goto exit_line_close;
 	}
 
-	fprintf(stdout, "Monitoring line %d on %s\n", line, device_name);
-	fprintf(stdout, "Initial line value: %d\n", data.values[0]);
+	if (num_lines == 1) {
+		fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name);
+		fprintf(stdout, "Initial line value: %d\n",
+			gpiotools_test_bit(values.bits, 0));
+	} else {
+		fprintf(stdout, "Monitoring lines %d", lines[0]);
+		for (i = 1; i < num_lines - 1; i++)
+			fprintf(stdout, ", %d", lines[i]);
+		fprintf(stdout, " and %d on %s\n", lines[i], device_name);
+		fprintf(stdout, "Initial line values: %d",
+			gpiotools_test_bit(values.bits, 0));
+		for (i = 1; i < num_lines - 1; i++)
+			fprintf(stdout, ", %d",
+				gpiotools_test_bit(values.bits, i));
+		fprintf(stdout, " and %d\n",
+			gpiotools_test_bit(values.bits, i));
+	}
 
 	while (1) {
-		struct gpioevent_data event;
+		struct gpio_v2_line_event event;
 
-		ret = read(req.fd, &event, sizeof(event));
+		ret = read(lfd, &event, sizeof(event));
 		if (ret == -1) {
 			if (errno == -EAGAIN) {
 				fprintf(stderr, "nothing available\n");
@@ -96,12 +107,14 @@ int monitor_device(const char *device_name,
 			ret = -EIO;
 			break;
 		}
-		fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp);
+		fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ",
+			event.timestamp_ns, event.offset, event.line_seqno,
+			event.seqno);
 		switch (event.id) {
-		case GPIOEVENT_EVENT_RISING_EDGE:
+		case GPIO_V2_LINE_EVENT_RISING_EDGE:
 			fprintf(stdout, "rising edge");
 			break;
-		case GPIOEVENT_EVENT_FALLING_EDGE:
+		case GPIO_V2_LINE_EVENT_FALLING_EDGE:
 			fprintf(stdout, "falling edge");
 			break;
 		default:
@@ -114,8 +127,11 @@ int monitor_device(const char *device_name,
 			break;
 	}
 
-exit_close_error:
-	if (close(fd) == -1)
+exit_line_close:
+	if (close(lfd) == -1)
+		perror("Failed to close line file");
+exit_device_close:
+	if (close(cfd) == -1)
 		perror("Failed to close GPIO character device file");
 exit_free_name:
 	free(chrdev_name);
@@ -127,29 +143,37 @@ void print_usage(void)
 	fprintf(stderr, "Usage: gpio-event-mon [options]...\n"
 		"Listen to events on GPIO lines, 0->1 1->0\n"
 		"  -n <name>  Listen on GPIOs on a named device (must be stated)\n"
-		"  -o <n>     Offset to monitor\n"
+		"  -o <n>     Offset of line to monitor (may be repeated)\n"
 		"  -d         Set line as open drain\n"
 		"  -s         Set line as open source\n"
 		"  -r         Listen for rising edges\n"
 		"  -f         Listen for falling edges\n"
+		"  -b <n>     Debounce the line with period n microseconds\n"
 		" [-c <n>]    Do <n> loops (optional, infinite loop if not stated)\n"
 		"  -?         This helptext\n"
 		"\n"
 		"Example:\n"
-		"gpio-event-mon -n gpiochip0 -o 4 -r -f\n"
+		"gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n"
 	);
 }
 
+#define EDGE_FLAGS \
+	(GPIO_V2_LINE_FLAG_EDGE_RISING | \
+	 GPIO_V2_LINE_FLAG_EDGE_FALLING)
+
 int main(int argc, char **argv)
 {
 	const char *device_name = NULL;
-	unsigned int line = -1;
+	unsigned int lines[GPIO_V2_LINES_MAX];
+	unsigned int num_lines = 0;
 	unsigned int loops = 0;
-	uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
-	uint32_t eventflags = 0;
-	int c;
+	struct gpio_v2_line_config config;
+	int c, attr, i;
+	unsigned long debounce_period_us = 0;
 
-	while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
+	memset(&config, 0, sizeof(config));
+	config.flags = GPIO_V2_LINE_FLAG_INPUT;
+	while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) {
 		switch (c) {
 		case 'c':
 			loops = strtoul(optarg, NULL, 10);
@@ -158,19 +182,27 @@ int main(int argc, char **argv)
 			device_name = optarg;
 			break;
 		case 'o':
-			line = strtoul(optarg, NULL, 10);
+			if (num_lines >= GPIO_V2_LINES_MAX) {
+				print_usage();
+				return -1;
+			}
+			lines[num_lines] = strtoul(optarg, NULL, 10);
+			num_lines++;
+			break;
+		case 'b':
+			debounce_period_us = strtoul(optarg, NULL, 10);
 			break;
 		case 'd':
-			handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN;
+			config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN;
 			break;
 		case 's':
-			handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE;
+			config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE;
 			break;
 		case 'r':
-			eventflags |= GPIOEVENT_REQUEST_RISING_EDGE;
+			config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING;
 			break;
 		case 'f':
-			eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE;
+			config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
 			break;
 		case '?':
 			print_usage();
@@ -178,15 +210,23 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (!device_name || line == -1) {
+	if (debounce_period_us) {
+		attr = config.num_attrs;
+		config.num_attrs++;
+		for (i = 0; i < num_lines; i++)
+			gpiotools_set_bit(&config.attrs[attr].mask, i);
+		config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE;
+		config.attrs[attr].attr.debounce_period_us = debounce_period_us;
+	}
+
+	if (!device_name || num_lines == 0) {
 		print_usage();
 		return -1;
 	}
-	if (!eventflags) {
+	if (!(config.flags & EDGE_FLAGS)) {
 		printf("No flags specified, listening on both rising and "
 		       "falling edges\n");
-		eventflags = GPIOEVENT_REQUEST_BOTH_EDGES;
+		config.flags |= EDGE_FLAGS;
 	}
-	return monitor_device(device_name, line, handleflags,
-			      eventflags, loops);
+	return monitor_device(device_name, lines, num_lines, &config, loops);
 }
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 9fd926e8cb52..54fdf59dd320 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -22,39 +22,46 @@
 #include <linux/gpio.h>
 #include "gpio-utils.h"
 
-int hammer_device(const char *device_name, unsigned int *lines, int nlines,
+int hammer_device(const char *device_name, unsigned int *lines, int num_lines,
 		  unsigned int loops)
 {
-	struct gpiohandle_data data;
+	struct gpio_v2_line_values values;
+	struct gpio_v2_line_config config;
 	char swirr[] = "-\\|/";
 	int fd;
 	int ret;
 	int i, j;
 	unsigned int iteration = 0;
 
-	memset(&data.values, 0, sizeof(data.values));
-	ret = gpiotools_request_linehandle(device_name, lines, nlines,
-					   GPIOHANDLE_REQUEST_OUTPUT, &data,
-					   "gpio-hammer");
+	memset(&config, 0, sizeof(config));
+	config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+
+	ret = gpiotools_request_line(device_name, lines, num_lines,
+				     &config, "gpio-hammer");
 	if (ret < 0)
 		goto exit_error;
 	else
 		fd = ret;
 
-	ret = gpiotools_get_values(fd, &data);
+	values.mask = 0;
+	values.bits = 0;
+	for (i = 0; i < num_lines; i++)
+		gpiotools_set_bit(&values.mask, i);
+
+	ret = gpiotools_get_values(fd, &values);
 	if (ret < 0)
 		goto exit_close_error;
 
 	fprintf(stdout, "Hammer lines [");
-	for (i = 0; i < nlines; i++) {
+	for (i = 0; i < num_lines; i++) {
 		fprintf(stdout, "%d", lines[i]);
-		if (i != (nlines - 1))
+		if (i != (num_lines - 1))
 			fprintf(stdout, ", ");
 	}
 	fprintf(stdout, "] on %s, initial states: [", device_name);
-	for (i = 0; i < nlines; i++) {
-		fprintf(stdout, "%d", data.values[i]);
-		if (i != (nlines - 1))
+	for (i = 0; i < num_lines; i++) {
+		fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i));
+		if (i != (num_lines - 1))
 			fprintf(stdout, ", ");
 	}
 	fprintf(stdout, "]\n");
@@ -63,15 +70,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
 	j = 0;
 	while (1) {
 		/* Invert all lines so we blink */
-		for (i = 0; i < nlines; i++)
-			data.values[i] = !data.values[i];
+		for (i = 0; i < num_lines; i++)
+			gpiotools_change_bit(&values.bits, i);
 
-		ret = gpiotools_set_values(fd, &data);
+		ret = gpiotools_set_values(fd, &values);
 		if (ret < 0)
 			goto exit_close_error;
 
 		/* Re-read values to get status */
-		ret = gpiotools_get_values(fd, &data);
+		ret = gpiotools_get_values(fd, &values);
 		if (ret < 0)
 			goto exit_close_error;
 
@@ -81,9 +88,10 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
 			j = 0;
 
 		fprintf(stdout, "[");
-		for (i = 0; i < nlines; i++) {
-			fprintf(stdout, "%d: %d", lines[i], data.values[i]);
-			if (i != (nlines - 1))
+		for (i = 0; i < num_lines; i++) {
+			fprintf(stdout, "%d: %d", lines[i],
+				gpiotools_test_bit(values.bits, i));
+			if (i != (num_lines - 1))
 				fprintf(stdout, ", ");
 		}
 		fprintf(stdout, "]\r");
@@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
 	ret = 0;
 
 exit_close_error:
-	gpiotools_release_linehandle(fd);
+	gpiotools_release_line(fd);
 exit_error:
 	return ret;
 }
@@ -121,7 +129,7 @@ int main(int argc, char **argv)
 	const char *device_name = NULL;
 	unsigned int lines[GPIOHANDLES_MAX];
 	unsigned int loops = 0;
-	int nlines;
+	int num_lines;
 	int c;
 	int i;
 
@@ -158,11 +166,11 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
-	nlines = i;
+	num_lines = i;
 
-	if (!device_name || !nlines) {
+	if (!device_name || !num_lines) {
 		print_usage();
 		return -1;
 	}
-	return hammer_device(device_name, lines, nlines, loops);
+	return hammer_device(device_name, lines, num_lines, loops);
 }
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 16a5d9cb9da2..37187e056c8b 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -38,7 +38,7 @@
  *			such as "gpiochip0"
  * @lines:		An array desired lines, specified by offset
  *			index for the associated GPIO device.
- * @nline:		The number of lines to request.
+ * @num_lines:		The number of lines to request.
  * @flag:		The new flag for requsted gpio. Reference
  *			"linux/gpio.h" for the meaning of flag.
  * @data:		Default value will be set to gpio when flag is
@@ -56,7 +56,7 @@
  *			On failure return the errno.
  */
 int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
-				 unsigned int nlines, unsigned int flag,
+				 unsigned int num_lines, unsigned int flag,
 				 struct gpiohandle_data *data,
 				 const char *consumer_label)
 {
@@ -78,12 +78,12 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
 		goto exit_free_name;
 	}
 
-	for (i = 0; i < nlines; i++)
+	for (i = 0; i < num_lines; i++)
 		req.lineoffsets[i] = lines[i];
 
 	req.flags = flag;
 	strcpy(req.consumer_label, consumer_label);
-	req.lines = nlines;
+	req.lines = num_lines;
 	if (flag & GPIOHANDLE_REQUEST_OUTPUT)
 		memcpy(req.default_values, data, sizeof(req.default_values));
 
@@ -100,20 +100,87 @@ exit_free_name:
 	free(chrdev_name);
 	return ret < 0 ? ret : req.fd;
 }
+
+/**
+ * gpiotools_request_line() - request gpio lines in a gpiochip
+ * @device_name:	The name of gpiochip without prefix "/dev/",
+ *			such as "gpiochip0"
+ * @lines:		An array desired lines, specified by offset
+ *			index for the associated GPIO device.
+ * @num_lines:		The number of lines to request.
+ * @config:		The new config for requested gpio. Reference
+ *			"linux/gpio.h" for config details.
+ * @consumer:		The name of consumer, such as "sysfs",
+ *			"powerkey". This is useful for other users to
+ *			know who is using.
+ *
+ * Request gpio lines through the ioctl provided by chardev. User
+ * could call gpiotools_set_values() and gpiotools_get_values() to
+ * read and write respectively through the returned fd. Call
+ * gpiotools_release_line() to release these lines after that.
+ *
+ * Return:		On success return the fd;
+ *			On failure return the errno.
+ */
+int gpiotools_request_line(const char *device_name, unsigned int *lines,
+			   unsigned int num_lines,
+			   struct gpio_v2_line_config *config,
+			   const char *consumer)
+{
+	struct gpio_v2_line_request req;
+	char *chrdev_name;
+	int fd;
+	int i;
+	int ret;
+
+	ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+	if (ret < 0)
+		return -ENOMEM;
+
+	fd = open(chrdev_name, 0);
+	if (fd == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to open %s, %s\n",
+			chrdev_name, strerror(errno));
+		goto exit_free_name;
+	}
+
+	memset(&req, 0, sizeof(req));
+	for (i = 0; i < num_lines; i++)
+		req.offsets[i] = lines[i];
+
+	req.config = *config;
+	strcpy(req.consumer, consumer);
+	req.num_lines = num_lines;
+
+	ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req);
+	if (ret == -1) {
+		ret = -errno;
+		fprintf(stderr, "Failed to issue %s (%d), %s\n",
+			"GPIO_GET_LINE_IOCTL", ret, strerror(errno));
+	}
+
+	if (close(fd) == -1)
+		perror("Failed to close GPIO character device file");
+exit_free_name:
+	free(chrdev_name);
+	return ret < 0 ? ret : req.fd;
+}
+
 /**
  * gpiotools_set_values(): Set the value of gpio(s)
  * @fd:			The fd returned by
- *			gpiotools_request_linehandle().
- * @data:		The array of values want to set.
+ *			gpiotools_request_line().
+ * @values:		The array of values want to set.
  *
  * Return:		On success return 0;
  *			On failure return the errno.
  */
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values)
 {
 	int ret;
 
-	ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data);
+	ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values);
 	if (ret == -1) {
 		ret = -errno;
 		fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
 /**
  * gpiotools_get_values(): Get the value of gpio(s)
  * @fd:			The fd returned by
- *			gpiotools_request_linehandle().
- * @data:		The array of values get from hardware.
+ *			gpiotools_request_line().
+ * @values:		The array of values get from hardware.
  *
  * Return:		On success return 0;
  *			On failure return the errno.
  */
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values)
 {
 	int ret;
 
-	ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data);
+	ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values);
 	if (ret == -1) {
 		ret = -errno;
 		fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -170,6 +237,27 @@ int gpiotools_release_linehandle(const int fd)
 }
 
 /**
+ * gpiotools_release_line(): Release the line(s) of gpiochip
+ * @fd:			The fd returned by
+ *			gpiotools_request_line().
+ *
+ * Return:		On success return 0;
+ *			On failure return the errno.
+ */
+int gpiotools_release_line(const int fd)
+{
+	int ret;
+
+	ret = close(fd);
+	if (ret == -1) {
+		perror("Failed to close GPIO LINE device file");
+		ret = -errno;
+	}
+
+	return ret;
+}
+
+/**
  * gpiotools_get(): Get value from specific line
  * @device_name:	The name of gpiochip without prefix "/dev/",
  *			such as "gpiochip0"
@@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd)
  */
 int gpiotools_get(const char *device_name, unsigned int line)
 {
-	struct gpiohandle_data data;
+	int ret;
+	unsigned int value;
 	unsigned int lines[] = {line};
 
-	gpiotools_gets(device_name, lines, 1, &data);
-	return data.values[0];
+	ret = gpiotools_gets(device_name, lines, 1, &value);
+	if (ret)
+		return ret;
+	return value;
 }
 
 
@@ -194,28 +285,36 @@ int gpiotools_get(const char *device_name, unsigned int line)
  *			such as "gpiochip0".
  * @lines:		An array desired lines, specified by offset
  *			index for the associated GPIO device.
- * @nline:		The number of lines to request.
- * @data:		The array of values get from gpiochip.
+ * @num_lines:		The number of lines to request.
+ * @values:		The array of values get from gpiochip.
  *
  * Return:		On success return 0;
  *			On failure return the errno.
  */
 int gpiotools_gets(const char *device_name, unsigned int *lines,
-		   unsigned int nlines, struct gpiohandle_data *data)
+		   unsigned int num_lines, unsigned int *values)
 {
-	int fd;
+	int fd, i;
 	int ret;
 	int ret_close;
+	struct gpio_v2_line_config config;
+	struct gpio_v2_line_values lv;
 
-	ret = gpiotools_request_linehandle(device_name, lines, nlines,
-					   GPIOHANDLE_REQUEST_INPUT, data,
-					   CONSUMER);
+	memset(&config, 0, sizeof(config));
+	config.flags = GPIO_V2_LINE_FLAG_INPUT;
+	ret = gpiotools_request_line(device_name, lines, num_lines,
+				     &config, CONSUMER);
 	if (ret < 0)
 		return ret;
 
 	fd = ret;
-	ret = gpiotools_get_values(fd, data);
-	ret_close = gpiotools_release_linehandle(fd);
+	for (i = 0; i < num_lines; i++)
+		gpiotools_set_bit(&lv.mask, i);
+	ret = gpiotools_get_values(fd, &lv);
+	if (!ret)
+		for (i = 0; i < num_lines; i++)
+			values[i] = gpiotools_test_bit(lv.bits, i);
+	ret_close = gpiotools_release_line(fd);
 	return ret < 0 ? ret : ret_close;
 }
 
@@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines,
 int gpiotools_set(const char *device_name, unsigned int line,
 		  unsigned int value)
 {
-	struct gpiohandle_data data;
 	unsigned int lines[] = {line};
 
-	data.values[0] = value;
-	return gpiotools_sets(device_name, lines, 1, &data);
+	return gpiotools_sets(device_name, lines, 1, &value);
 }
 
 /**
@@ -245,23 +342,32 @@ int gpiotools_set(const char *device_name, unsigned int line,
  *			such as "gpiochip0".
  * @lines:		An array desired lines, specified by offset
  *			index for the associated GPIO device.
- * @nline:		The number of lines to request.
- * @data:		The array of values set to gpiochip, must be
+ * @num_lines:		The number of lines to request.
+ * @value:		The array of values set to gpiochip, must be
  *			0(low) or 1(high).
  *
  * Return:		On success return 0;
  *			On failure return the errno.
  */
 int gpiotools_sets(const char *device_name, unsigned int *lines,
-		   unsigned int nlines, struct gpiohandle_data *data)
+		   unsigned int num_lines, unsigned int *values)
 {
-	int ret;
+	int ret, i;
+	struct gpio_v2_line_config config;
 
-	ret = gpiotools_request_linehandle(device_name, lines, nlines,
-					   GPIOHANDLE_REQUEST_OUTPUT, data,
-					   CONSUMER);
+	memset(&config, 0, sizeof(config));
+	config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+	config.num_attrs = 1;
+	config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+	for (i = 0; i < num_lines; i++) {
+		gpiotools_set_bit(&config.attrs[0].mask, i);
+		gpiotools_assign_bit(&config.attrs[0].attr.values,
+				     i, values[i]);
+	}
+	ret = gpiotools_request_line(device_name, lines, num_lines,
+				     &config, CONSUMER);
 	if (ret < 0)
 		return ret;
 
-	return gpiotools_release_linehandle(ret);
+	return gpiotools_release_line(ret);
 }
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
index cf37f13f3dcb..6c69a9f1c253 100644
--- a/tools/gpio/gpio-utils.h
+++ b/tools/gpio/gpio-utils.h
@@ -12,7 +12,9 @@
 #ifndef _GPIO_UTILS_H_
 #define _GPIO_UTILS_H_
 
+#include <stdbool.h>
 #include <string.h>
+#include <linux/types.h>
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
@@ -23,19 +25,55 @@ static inline int check_prefix(const char *str, const char *prefix)
 }
 
 int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
-				 unsigned int nlines, unsigned int flag,
+				 unsigned int num_lines, unsigned int flag,
 				 struct gpiohandle_data *data,
 				 const char *consumer_label);
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data);
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data);
 int gpiotools_release_linehandle(const int fd);
 
+int gpiotools_request_line(const char *device_name,
+			   unsigned int *lines,
+			   unsigned int num_lines,
+			   struct gpio_v2_line_config *config,
+			   const char *consumer);
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_release_line(const int fd);
+
 int gpiotools_get(const char *device_name, unsigned int line);
 int gpiotools_gets(const char *device_name, unsigned int *lines,
-		   unsigned int nlines, struct gpiohandle_data *data);
+		   unsigned int num_lines, unsigned int *values);
 int gpiotools_set(const char *device_name, unsigned int line,
 		  unsigned int value);
 int gpiotools_sets(const char *device_name, unsigned int *lines,
-		   unsigned int nlines, struct gpiohandle_data *data);
+		   unsigned int num_lines, unsigned int *values);
+
+/* helper functions for gpio_v2_line_values bits */
+static inline void gpiotools_set_bit(__u64 *b, int n)
+{
+	*b |= _BITULL(n);
+}
+
+static inline void gpiotools_change_bit(__u64 *b, int n)
+{
+	*b ^= _BITULL(n);
+}
+
+static inline void gpiotools_clear_bit(__u64 *b, int n)
+{
+	*b &= ~_BITULL(n);
+}
+
+static inline int gpiotools_test_bit(__u64 b, int n)
+{
+	return !!(b & _BITULL(n));
+}
+
+static inline void gpiotools_assign_bit(__u64 *b, int n, bool value)
+{
+	if (value)
+		gpiotools_set_bit(b, n);
+	else
+		gpiotools_clear_bit(b, n);
+}
 
 #endif /* _GPIO_UTILS_H_ */
diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c
index 5cea24fddfa7..f229ec62301b 100644
--- a/tools/gpio/gpio-watch.c
+++ b/tools/gpio/gpio-watch.c
@@ -21,8 +21,8 @@
 
 int main(int argc, char **argv)
 {
-	struct gpioline_info_changed chg;
-	struct gpioline_info req;
+	struct gpio_v2_line_info_changed chg;
+	struct gpio_v2_line_info req;
 	struct pollfd pfd;
 	int fd, i, j, ret;
 	char *event, *end;
@@ -40,11 +40,11 @@ int main(int argc, char **argv)
 	for (i = 0, j = 2; i < argc - 2; i++, j++) {
 		memset(&req, 0, sizeof(req));
 
-		req.line_offset = strtoul(argv[j], &end, 0);
+		req.offset = strtoul(argv[j], &end, 0);
 		if (*end != '\0')
 			goto err_usage;
 
-		ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req);
+		ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req);
 		if (ret) {
 			perror("unable to set up line watch");
 			return EXIT_FAILURE;
@@ -71,13 +71,13 @@ int main(int argc, char **argv)
 			}
 
 			switch (chg.event_type) {
-			case GPIOLINE_CHANGED_REQUESTED:
+			case GPIO_V2_LINE_CHANGED_REQUESTED:
 				event = "requested";
 				break;
-			case GPIOLINE_CHANGED_RELEASED:
+			case GPIO_V2_LINE_CHANGED_RELEASED:
 				event = "released";
 				break;
-			case GPIOLINE_CHANGED_CONFIG:
+			case GPIO_V2_LINE_CHANGED_CONFIG:
 				event = "config changed";
 				break;
 			default:
@@ -87,7 +87,7 @@ int main(int argc, char **argv)
 			}
 
 			printf("line %u: %s at %llu\n",
-			       chg.info.line_offset, event, chg.timestamp);
+			       chg.info.offset, event, chg.timestamp_ns);
 		}
 	}
 
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index b08d7a5e779b..5a05a454d0c9 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -25,57 +25,73 @@
 
 struct gpio_flag {
 	char *name;
-	unsigned long mask;
+	unsigned long long mask;
 };
 
 struct gpio_flag flagnames[] = {
 	{
-		.name = "kernel",
-		.mask = GPIOLINE_FLAG_KERNEL,
+		.name = "used",
+		.mask = GPIO_V2_LINE_FLAG_USED,
+	},
+	{
+		.name = "input",
+		.mask = GPIO_V2_LINE_FLAG_INPUT,
 	},
 	{
 		.name = "output",
-		.mask = GPIOLINE_FLAG_IS_OUT,
+		.mask = GPIO_V2_LINE_FLAG_OUTPUT,
 	},
 	{
 		.name = "active-low",
-		.mask = GPIOLINE_FLAG_ACTIVE_LOW,
+		.mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW,
 	},
 	{
 		.name = "open-drain",
-		.mask = GPIOLINE_FLAG_OPEN_DRAIN,
+		.mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN,
 	},
 	{
 		.name = "open-source",
-		.mask = GPIOLINE_FLAG_OPEN_SOURCE,
+		.mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE,
 	},
 	{
 		.name = "pull-up",
-		.mask = GPIOLINE_FLAG_BIAS_PULL_UP,
+		.mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP,
 	},
 	{
 		.name = "pull-down",
-		.mask = GPIOLINE_FLAG_BIAS_PULL_DOWN,
+		.mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN,
 	},
 	{
 		.name = "bias-disabled",
-		.mask = GPIOLINE_FLAG_BIAS_DISABLE,
+		.mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED,
 	},
 };
 
-void print_flags(unsigned long flags)
+static void print_attributes(struct gpio_v2_line_info *info)
 {
 	int i;
-	int printed = 0;
+	const char *field_format = "%s";
 
 	for (i = 0; i < ARRAY_SIZE(flagnames); i++) {
-		if (flags & flagnames[i].mask) {
-			if (printed)
-				fprintf(stdout, " ");
-			fprintf(stdout, "%s", flagnames[i].name);
-			printed++;
+		if (info->flags & flagnames[i].mask) {
+			fprintf(stdout, field_format, flagnames[i].name);
+			field_format = ", %s";
 		}
 	}
+
+	if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) &&
+	    (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING))
+		fprintf(stdout, field_format, "both-edges");
+	else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING)
+		fprintf(stdout, field_format, "rising-edge");
+	else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)
+		fprintf(stdout, field_format, "falling-edge");
+
+	for (i = 0; i < info->num_attrs; i++) {
+		if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE)
+			fprintf(stdout, ", debounce_period=%dusec",
+				info->attrs[0].debounce_period_us);
+	}
 }
 
 int list_device(const char *device_name)
@@ -109,18 +125,18 @@ int list_device(const char *device_name)
 
 	/* Loop over the lines and print info */
 	for (i = 0; i < cinfo.lines; i++) {
-		struct gpioline_info linfo;
+		struct gpio_v2_line_info linfo;
 
 		memset(&linfo, 0, sizeof(linfo));
-		linfo.line_offset = i;
+		linfo.offset = i;
 
-		ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo);
+		ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo);
 		if (ret == -1) {
 			ret = -errno;
 			perror("Failed to issue LINEINFO IOCTL\n");
 			goto exit_close_error;
 		}
-		fprintf(stdout, "\tline %2d:", linfo.line_offset);
+		fprintf(stdout, "\tline %2d:", linfo.offset);
 		if (linfo.name[0])
 			fprintf(stdout, " \"%s\"", linfo.name);
 		else
@@ -131,7 +147,7 @@ int list_device(const char *device_name)
 			fprintf(stdout, " unused");
 		if (linfo.flags) {
 			fprintf(stdout, " [");
-			print_flags(linfo.flags);
+			print_attributes(&linfo);
 			fprintf(stdout, "]");
 		}
 		fprintf(stdout, "\n");
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index f115d166c985..bb03859db89d 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -119,6 +119,7 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_PM2P5] = "pm2p5",
 	[IIO_MOD_PM4] = "pm4",
 	[IIO_MOD_PM10] = "pm10",
+	[IIO_MOD_O2] = "o2",
 };
 
 static bool event_is_known(struct iio_event_data *event)
@@ -211,6 +212,7 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_MOD_PM2P5:
 	case IIO_MOD_PM4:
 	case IIO_MOD_PM10:
+	case IIO_MOD_O2:
 		break;
 	default:
 		return false;
diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h
index 348c6f47e4cc..af8d0fe1c6ce 100644
--- a/tools/include/linux/jhash.h
+++ b/tools/include/linux/jhash.h
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
- * http://burtleburtle.net/bob/hash/
+ * https://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
new file mode 100644
index 000000000000..ab82c793c897
--- /dev/null
+++ b/tools/include/linux/objtool.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_OBJTOOL_H
+#define _LINUX_OBJTOOL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack.
+ */
+struct unwind_hint {
+	u32		ip;
+	s16		sp_offset;
+	u8		sp_reg;
+	u8		type;
+	u8		end;
+};
+#endif
+
+/*
+ * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP
+ * (the caller's SP right before it made the call).  Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset
+ * points to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+ * sp_reg+sp_offset points to the iret return frame.
+ */
+#define UNWIND_HINT_TYPE_CALL		0
+#define UNWIND_HINT_TYPE_REGS		1
+#define UNWIND_HINT_TYPE_REGS_PARTIAL	2
+#define UNWIND_HINT_TYPE_RET_OFFSET	3
+
+#ifdef CONFIG_STACK_VALIDATION
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end)		\
+	"987: \n\t"						\
+	".pushsection .discard.unwind_hints\n\t"		\
+	/* struct unwind_hint */				\
+	".long 987b - .\n\t"					\
+	".short " __stringify(sp_offset) "\n\t"			\
+	".byte " __stringify(sp_reg) "\n\t"			\
+	".byte " __stringify(type) "\n\t"			\
+	".byte " __stringify(end) "\n\t"			\
+	".balign 4 \n\t"					\
+	".popsection\n\t"
+
+/*
+ * This macro marks the given function's stack frame as "non-standard", which
+ * tells objtool to ignore the function when doing stack metadata validation.
+ * It should only be used in special cases where you're 100% sure it won't
+ * affect the reliability of frame pointers and kernel stack traces.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+#define STACK_FRAME_NON_STANDARD(func) \
+	static void __used __section(.discard.func_stack_frame_non_standard) \
+		*__func_stack_frame_non_standard_##func = func
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL				\
+	999:							\
+	.pushsection .discard.intra_function_calls;		\
+	.long 999b;						\
+	.popsection;
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest.  The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack.  Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros.  Most asm code falls in this
+ * category.  In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer.  Such
+ * code needs to be annotated such that objtool can understand it.  The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction.  Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary.  It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.Lunwind_hint_ip_\@:
+	.pushsection .discard.unwind_hints
+		/* struct unwind_hint */
+		.long .Lunwind_hint_ip_\@ - .
+		.short \sp_offset
+		.byte \sp_reg
+		.byte \type
+		.byte \end
+		.balign 4
+	.popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#else /* !CONFIG_STACK_VALIDATION */
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end)	\
+	"\n\t"
+#define STACK_FRAME_NON_STANDARD(func)
+#else
+#define ANNOTATE_INTRA_FUNCTION_CALL
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.endm
+#endif
+
+#endif /* CONFIG_STACK_VALIDATION */
+
+#endif /* _LINUX_OBJTOOL_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
new file mode 100644
index 000000000000..89135bb35bf7
--- /dev/null
+++ b/tools/include/linux/static_call_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define STATIC_CALL_KEY_PREFIX		__SCK__
+#define STATIC_CALL_KEY_PREFIX_STR	__stringify(STATIC_CALL_KEY_PREFIX)
+#define STATIC_CALL_KEY_PREFIX_LEN	(sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
+#define STATIC_CALL_KEY(name)		__PASTE(STATIC_CALL_KEY_PREFIX, name)
+
+#define STATIC_CALL_TRAMP_PREFIX	__SCT__
+#define STATIC_CALL_TRAMP_PREFIX_STR	__stringify(STATIC_CALL_TRAMP_PREFIX)
+#define STATIC_CALL_TRAMP_PREFIX_LEN	(sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
+#define STATIC_CALL_TRAMP(name)		__PASTE(STATIC_CALL_TRAMP_PREFIX, name)
+#define STATIC_CALL_TRAMP_STR(name)	__stringify(STATIC_CALL_TRAMP(name))
+
+/*
+ * Flags in the low bits of static_call_site::key.
+ */
+#define STATIC_CALL_SITE_TAIL 1UL	/* tail call */
+#define STATIC_CALL_SITE_INIT 2UL	/* init section */
+#define STATIC_CALL_SITE_FLAGS 3UL
+
+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+	s32 addr;
+	s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index c8c189a5f0a6..f2b5d72a46c2 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
 #define __NR_umount2 39
 __SYSCALL(__NR_umount2, sys_umount)
 #define __NR_mount 40
-__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
 #define __NR_pivot_root 41
 __SYSCALL(__NR_pivot_root, sys_pivot_root)
 
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
 #define __NR_write 64
 __SYSCALL(__NR_write, sys_write)
 #define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
 #define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
 #define __NR_pread64 67
 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
 #define __NR_pwrite64 68
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
 #define __NR_sendmmsg 269
 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
 #define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
-          compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
-          compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 272
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
@@ -850,6 +848,8 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
 #define __NR_clone3 435
 __SYSCALL(__NR_clone3, sys_clone3)
 #endif
+#define __NR_close_range 436
+__SYSCALL(__NR_close_range, sys_close_range)
 
 #define __NR_openat2 437
 __SYSCALL(__NR_openat2, sys_openat2)
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..00546062e023 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -55,7 +55,7 @@ extern "C" {
  *	cause the related events to not be seen.
  *
  * I915_RESET_UEVENT - Event is generated just before an attempt to reset the
- *	the GPU. The value supplied with the event is always 1. NOTE: Disable
+ *	GPU. The value supplied with the event is always 1. NOTE: Disable
  *	reset via module parameter will cause this event to not be seen.
  */
 #define I915_L3_PARITY_UEVENT		"L3_PARITY_ERROR"
@@ -1934,7 +1934,7 @@ enum drm_i915_perf_property_id {
 
 	/**
 	 * The value specifies which set of OA unit metrics should be
-	 * be configured, defining the contents of any OA unit reports.
+	 * configured, defining the contents of any OA unit reports.
 	 *
 	 * This property is available in perf revision 1.
 	 */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e679e9db..b6238b2209b7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
@@ -766,7 +767,7 @@ union bpf_attr {
  *
  * 		Also, note that **bpf_trace_printk**\ () is slow, and should
  * 		only be used for debugging purposes. For this reason, a notice
- * 		bloc (spanning several lines) is printed to kernel logs and
+ * 		block (spanning several lines) is printed to kernel logs and
  * 		states that the helper should not be used "for production use"
  * 		the first time this helper is used (or more precisely, when
  * 		**trace_printk**\ () buffers are allocated). For passing values
@@ -1032,14 +1033,14 @@ union bpf_attr {
  *
  * 			int ret;
  * 			struct bpf_tunnel_key key = {};
- * 			
+ *
  * 			ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
  * 			if (ret < 0)
  * 				return TC_ACT_SHOT;	// drop packet
- * 			
+ *
  * 			if (key.remote_ipv4 != 0x0a000001)
  * 				return TC_ACT_SHOT;	// drop packet
- * 			
+ *
  * 			return TC_ACT_OK;		// accept packet
  *
  * 		This interface can also be used with all encapsulation devices
@@ -1146,7 +1147,7 @@ union bpf_attr {
  * 	Description
  * 		Retrieve the realm or the route, that is to say the
  * 		**tclassid** field of the destination for the *skb*. The
- * 		indentifier retrieved is a user-provided tag, similar to the
+ * 		identifier retrieved is a user-provided tag, similar to the
  * 		one used with the net_cls cgroup (see description for
  * 		**bpf_get_cgroup_classid**\ () helper), but here this tag is
  * 		held by a route (a destination entry), not by a task.
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 8533bf07450f..7d6687618d80 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -123,6 +123,7 @@ struct in_addr {
 #define IP_CHECKSUM	23
 #define IP_BIND_ADDRESS_NO_PORT	24
 #define IP_RECVFRAGSIZE	25
+#define IP_RECVERR_RFC4884	26
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
@@ -134,7 +135,7 @@ struct in_addr {
  * this socket to prevent accepting spoofed ones.
  */
 #define IP_PMTUDISC_INTERFACE		4
-/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
+/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
  * fragmented if they exeed the interface mtu
  */
 #define IP_PMTUDISC_OMIT		5
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 4fdf30316582..7d8eced6f459 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -289,6 +289,7 @@ struct kvm_run {
 		/* KVM_EXIT_FAIL_ENTRY */
 		struct {
 			__u64 hardware_entry_failure_reason;
+			__u32 cpu;
 		} fail_entry;
 		/* KVM_EXIT_EXCEPTION */
 		struct {
@@ -789,9 +790,10 @@ struct kvm_ppc_resize_hpt {
 #define KVM_VM_PPC_HV 1
 #define KVM_VM_PPC_PR 2
 
-/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */
-#define KVM_VM_MIPS_TE		0
+/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */
+#define KVM_VM_MIPS_AUTO	0
 #define KVM_VM_MIPS_VZ		1
+#define KVM_VM_MIPS_TE		2
 
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
@@ -1031,6 +1033,10 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_SECURE_GUEST 181
 #define KVM_CAP_HALT_POLL 182
 #define KVM_CAP_ASYNC_PF_INT 183
+#define KVM_CAP_LAST_CPU 184
+#define KVM_CAP_SMALLER_MAXPHYADDR 185
+#define KVM_CAP_S390_DIAG318 186
+#define KVM_CAP_STEAL_TIME 187
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 21a1edd08cbe..3e5dcdd48a49 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -383,7 +383,8 @@ struct perf_event_attr {
 				bpf_event      :  1, /* include bpf events */
 				aux_output     :  1, /* generate AUX records instead of events */
 				cgroup         :  1, /* include cgroup events */
-				__reserved_1   : 31;
+				text_poke      :  1, /* include text poke events */
+				__reserved_1   : 30;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -1041,12 +1042,35 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_CGROUP			= 19,
 
+	/*
+	 * Records changes to kernel text i.e. self-modified code. 'old_len' is
+	 * the number of old bytes, 'new_len' is the number of new bytes. Either
+	 * 'old_len' or 'new_len' may be zero to indicate, for example, the
+	 * addition or removal of a trampoline. 'bytes' contains the old bytes
+	 * followed immediately by the new bytes.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				addr;
+	 *	u16				old_len;
+	 *	u16				new_len;
+	 *	u8				bytes[];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_TEXT_POKE			= 20,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
 enum perf_record_ksymbol_type {
 	PERF_RECORD_KSYMBOL_TYPE_UNKNOWN	= 0,
 	PERF_RECORD_KSYMBOL_TYPE_BPF		= 1,
+	/*
+	 * Out of line code such as kprobe-replaced instructions or optimized
+	 * kprobes or ftrace trampolines.
+	 */
+	PERF_RECORD_KSYMBOL_TYPE_OOL		= 2,
 	PERF_RECORD_KSYMBOL_TYPE_MAX		/* non-ABI */
 };
 
@@ -1172,7 +1196,7 @@ union perf_mem_data_src {
 
 #define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
 /* 1 free */
-#define PERF_MEM_SNOOPX_SHIFT	37
+#define PERF_MEM_SNOOPX_SHIFT	38
 
 /* locked instruction */
 #define PERF_MEM_LOCK_NA	0x01 /* not available */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 0c2349612e77..75232185324a 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -91,6 +91,8 @@
 
 /* Use message type V2 */
 #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+/* IOTLB can accept batching hints */
+#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
 
 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
 #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 0f257139b003..7703f0118385 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -130,7 +130,7 @@ static int io_uring_register_files(struct submitter *s)
 					s->nr_files);
 }
 
-static int gettid(void)
+static int lk_gettid(void)
 {
 	return syscall(__NR_gettid);
 }
@@ -281,7 +281,7 @@ static void *submitter_fn(void *data)
 	struct io_sq_ring *ring = &s->sq_ring;
 	int ret, prepped;
 
-	printf("submitter=%d\n", gettid());
+	printf("submitter=%d\n", lk_gettid());
 
 	srand48_r(pthread_self(), &s->rand);
 
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 58d44d5eee31..5e6cb9debe37 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -8,6 +8,7 @@
 #include <poll.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <string.h>
 
 void fdarray__init(struct fdarray *fda, int nr_autogrow)
 {
@@ -19,7 +20,7 @@ void fdarray__init(struct fdarray *fda, int nr_autogrow)
 
 int fdarray__grow(struct fdarray *fda, int nr)
 {
-	void *priv;
+	struct priv *priv;
 	int nr_alloc = fda->nr_alloc + nr;
 	size_t psize = sizeof(fda->priv[0]) * nr_alloc;
 	size_t size  = sizeof(struct pollfd) * nr_alloc;
@@ -34,6 +35,9 @@ int fdarray__grow(struct fdarray *fda, int nr)
 		return -ENOMEM;
 	}
 
+	memset(&entries[fda->nr_alloc], 0, sizeof(struct pollfd) * nr);
+	memset(&priv[fda->nr_alloc], 0, sizeof(fda->priv[0]) * nr);
+
 	fda->nr_alloc = nr_alloc;
 	fda->entries  = entries;
 	fda->priv     = priv;
@@ -69,7 +73,7 @@ void fdarray__delete(struct fdarray *fda)
 	free(fda);
 }
 
-int fdarray__add(struct fdarray *fda, int fd, short revents)
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags)
 {
 	int pos = fda->nr;
 
@@ -79,6 +83,7 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
 
 	fda->entries[fda->nr].fd     = fd;
 	fda->entries[fda->nr].events = revents;
+	fda->priv[fda->nr].flags = flags;
 	fda->nr++;
 	return pos;
 }
@@ -93,22 +98,22 @@ int fdarray__filter(struct fdarray *fda, short revents,
 		return 0;
 
 	for (fd = 0; fd < fda->nr; ++fd) {
+		if (!fda->entries[fd].events)
+			continue;
+
 		if (fda->entries[fd].revents & revents) {
 			if (entry_destructor)
 				entry_destructor(fda, fd, arg);
 
+			fda->entries[fd].revents = fda->entries[fd].events = 0;
 			continue;
 		}
 
-		if (fd != nr) {
-			fda->entries[nr] = fda->entries[fd];
-			fda->priv[nr]	 = fda->priv[fd];
-		}
-
-		++nr;
+		if (!(fda->priv[fd].flags & fdarray_flag__nonfilterable))
+			++nr;
 	}
 
-	return fda->nr = nr;
+	return nr;
 }
 
 int fdarray__poll(struct fdarray *fda, int timeout)
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index b39557d1a88f..7fcf21a33c0c 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -21,19 +21,27 @@ struct fdarray {
 	int	       nr_alloc;
 	int	       nr_autogrow;
 	struct pollfd *entries;
-	union {
-		int    idx;
-		void   *ptr;
+	struct priv {
+		union {
+			int    idx;
+			void   *ptr;
+		};
+		unsigned int flags;
 	} *priv;
 };
 
+enum fdarray_flags {
+	fdarray_flag__default	    = 0x00000000,
+	fdarray_flag__nonfilterable = 0x00000001
+};
+
 void fdarray__init(struct fdarray *fda, int nr_autogrow);
 void fdarray__exit(struct fdarray *fda);
 
 struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
 void fdarray__delete(struct fdarray *fda);
 
-int fdarray__add(struct fdarray *fda, int fd, short revents);
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags);
 int fdarray__poll(struct fdarray *fda, int timeout);
 int fdarray__filter(struct fdarray *fda, short revents,
 		    void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index bf8ed134cb8a..9ae8f4ef0aac 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -59,7 +59,7 @@ FEATURE_USER = .libbpf
 FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
 FEATURE_DISPLAY = libelf zlib bpf
 
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
 
 check_feat := 1
@@ -152,6 +152,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
 			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
 			   sort -u | wc -l)
 VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+			      awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
 			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
 CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -219,6 +220,7 @@ check_abi: $(OUTPUT)libbpf.so
 		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
 		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
 		readelf --dyn-syms --wide $(OUTPUT)libbpf.so |		 \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
 		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
 		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
 		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index eab14c97c15d..0750681057c2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd,
 	attr.link_create.target_fd = target_fd;
 	attr.link_create.attach_type = attach_type;
 	attr.link_create.flags = OPTS_GET(opts, flags, 0);
+	attr.link_create.iter_info =
+		ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+	attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
 
 	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
 }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 28855fd5b5f4..015d13f25fcc 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
 LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 				enum bpf_attach_type type);
 
+union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
 struct bpf_link_create_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 	__u32 flags;
+	union bpf_iter_link_info *iter_info;
+	__u32 iter_info_len;
 };
-#define bpf_link_create_opts__last_field flags
+#define bpf_link_create_opts__last_field iter_info_len
 
 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
 			       enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index bc14db706b88..e9a4ecddb7a5 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -40,7 +40,7 @@
  * Helper macro to manipulate data structures
  */
 #ifndef offsetof
-#define offsetof(TYPE, MEMBER)  __builtin_offsetof(TYPE, MEMBER)
+#define offsetof(TYPE, MEMBER)	((unsigned long)&((TYPE *)0)->MEMBER)
 #endif
 #ifndef container_of
 #define container_of(ptr, type, member)				\
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 856b09a04563..6bdbc389b493 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -41,6 +41,7 @@ struct btf {
 	__u32 types_size;
 	__u32 data_size;
 	int fd;
+	int ptr_sz;
 };
 
 static inline __u64 ptr_to_u64(const void *ptr)
@@ -221,6 +222,70 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 	return btf->types[type_id];
 }
 
+static int determine_ptr_size(const struct btf *btf)
+{
+	const struct btf_type *t;
+	const char *name;
+	int i;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		t = btf__type_by_id(btf, i);
+		if (!btf_is_int(t))
+			continue;
+
+		name = btf__name_by_offset(btf, t->name_off);
+		if (!name)
+			continue;
+
+		if (strcmp(name, "long int") == 0 ||
+		    strcmp(name, "long unsigned int") == 0) {
+			if (t->size != 4 && t->size != 8)
+				continue;
+			return t->size;
+		}
+	}
+
+	return -1;
+}
+
+static size_t btf_ptr_sz(const struct btf *btf)
+{
+	if (!btf->ptr_sz)
+		((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+	return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz;
+}
+
+/* Return pointer size this BTF instance assumes. The size is heuristically
+ * determined by looking for 'long' or 'unsigned long' integer type and
+ * recording its size in bytes. If BTF type information doesn't have any such
+ * type, this function returns 0. In the latter case, native architecture's
+ * pointer size is assumed, so will be either 4 or 8, depending on
+ * architecture that libbpf was compiled for. It's possible to override
+ * guessed value by using btf__set_pointer_size() API.
+ */
+size_t btf__pointer_size(const struct btf *btf)
+{
+	if (!btf->ptr_sz)
+		((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+
+	if (btf->ptr_sz < 0)
+		/* not enough BTF type info to guess */
+		return 0;
+
+	return btf->ptr_sz;
+}
+
+/* Override or set pointer size in bytes. Only values of 4 and 8 are
+ * supported.
+ */
+int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
+{
+	if (ptr_sz != 4 && ptr_sz != 8)
+		return -EINVAL;
+	btf->ptr_sz = ptr_sz;
+	return 0;
+}
+
 static bool btf_type_is_void(const struct btf_type *t)
 {
 	return t == &btf_void || btf_is_fwd(t);
@@ -253,7 +318,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 			size = t->size;
 			goto done;
 		case BTF_KIND_PTR:
-			size = sizeof(void *);
+			size = btf_ptr_sz(btf);
 			goto done;
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_VOLATILE:
@@ -293,9 +358,9 @@ int btf__align_of(const struct btf *btf, __u32 id)
 	switch (kind) {
 	case BTF_KIND_INT:
 	case BTF_KIND_ENUM:
-		return min(sizeof(void *), (size_t)t->size);
+		return min(btf_ptr_sz(btf), (size_t)t->size);
 	case BTF_KIND_PTR:
-		return sizeof(void *);
+		return btf_ptr_sz(btf);
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_CONST:
@@ -533,6 +598,18 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 	if (IS_ERR(btf))
 		goto done;
 
+	switch (gelf_getclass(elf)) {
+	case ELFCLASS32:
+		btf__set_pointer_size(btf, 4);
+		break;
+	case ELFCLASS64:
+		btf__set_pointer_size(btf, 8);
+		break;
+	default:
+		pr_warn("failed to get ELF class (bitness) for %s\n", path);
+		break;
+	}
+
 	if (btf_ext && btf_ext_data) {
 		*btf_ext = btf_ext__new(btf_ext_data->d_buf,
 					btf_ext_data->d_size);
@@ -564,8 +641,8 @@ done:
 
 struct btf *btf__parse_raw(const char *path)
 {
+	struct btf *btf = NULL;
 	void *data = NULL;
-	struct btf *btf;
 	FILE *f = NULL;
 	__u16 magic;
 	int err = 0;
@@ -582,6 +659,12 @@ struct btf *btf__parse_raw(const char *path)
 		err = -EIO;
 		goto err_out;
 	}
+	if (magic == __bswap_16(BTF_MAGIC)) {
+		/* non-native endian raw BTF */
+		pr_warn("non-native BTF endianness is not supported\n");
+		err = -LIBBPF_ERRNO__ENDIAN;
+		goto err_out;
+	}
 	if (magic != BTF_MAGIC) {
 		/* definitely not a raw BTF */
 		err = -EPROTO;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index f4a1a1d2b9a3..1ca14448df4c 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -76,6 +76,8 @@ LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
 LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
 						  __u32 id);
+LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
+LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz);
 LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index cf711168d34a..57c00fa63932 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -13,6 +13,7 @@
 #include <errno.h>
 #include <linux/err.h>
 #include <linux/btf.h>
+#include <linux/kernel.h>
 #include "btf.h"
 #include "hashmap.h"
 #include "libbpf.h"
@@ -60,6 +61,7 @@ struct btf_dump {
 	const struct btf_ext *btf_ext;
 	btf_dump_printf_fn_t printf_fn;
 	struct btf_dump_opts opts;
+	int ptr_sz;
 	bool strip_mods;
 
 	/* per-type auxiliary state */
@@ -138,6 +140,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
 	d->btf_ext = btf_ext;
 	d->printf_fn = printf_fn;
 	d->opts.ctx = opts ? opts->ctx : NULL;
+	d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);
 
 	d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
 	if (IS_ERR(d->type_names)) {
@@ -549,6 +552,9 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 	}
 }
 
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+					  const struct btf_type *t);
+
 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
 				     const struct btf_type *t);
 static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
@@ -671,6 +677,9 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 
 	switch (kind) {
 	case BTF_KIND_INT:
+		/* Emit type alias definitions if necessary */
+		btf_dump_emit_missing_aliases(d, id, t);
+
 		tstate->emit_state = EMITTED;
 		break;
 	case BTF_KIND_ENUM:
@@ -797,7 +806,7 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
 				      int align, int lvl)
 {
 	int off_diff = m_off - cur_off;
-	int ptr_bits = sizeof(void *) * 8;
+	int ptr_bits = d->ptr_sz * 8;
 
 	if (off_diff <= 0)
 		/* no gap */
@@ -870,7 +879,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 			btf_dump_printf(d, ": %d", m_sz);
 			off = m_off + m_sz;
 		} else {
-			m_sz = max(0, btf__resolve_size(d->btf, m->type));
+			m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
 			off = m_off + m_sz * 8;
 		}
 		btf_dump_printf(d, ";");
@@ -890,6 +899,32 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 		btf_dump_printf(d, " __attribute__((packed))");
 }
 
+static const char *missing_base_types[][2] = {
+	/*
+	 * GCC emits typedefs to its internal __PolyX_t types when compiling Arm
+	 * SIMD intrinsics. Alias them to standard base types.
+	 */
+	{ "__Poly8_t",		"unsigned char" },
+	{ "__Poly16_t",		"unsigned short" },
+	{ "__Poly64_t",		"unsigned long long" },
+	{ "__Poly128_t",	"unsigned __int128" },
+};
+
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+					  const struct btf_type *t)
+{
+	const char *name = btf_dump_type_name(d, id);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
+		if (strcmp(name, missing_base_types[i][0]) == 0) {
+			btf_dump_printf(d, "typedef %s %s;\n\n",
+					missing_base_types[i][1], name);
+			break;
+		}
+	}
+}
+
 static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
 				   const struct btf_type *t)
 {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7be04e45d29c..e493d6048143 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2264,7 +2264,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 		data = elf_getdata(scn, NULL);
 	if (!scn || !data) {
 		pr_warn("failed to get Elf_Data from map section %d (%s)\n",
-			obj->efile.maps_shndx, MAPS_ELF_SEC);
+			obj->efile.btf_maps_shndx, MAPS_ELF_SEC);
 		return -EINVAL;
 	}
 
@@ -2434,6 +2434,8 @@ static int bpf_object__init_btf(struct bpf_object *obj,
 				BTF_ELF_SEC, err);
 			goto out;
 		}
+		/* enforce 8-byte pointers for BPF-targeted BTFs */
+		btf__set_pointer_size(obj->btf, 8);
 		err = 0;
 	}
 	if (btf_ext_data) {
@@ -2542,6 +2544,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 		if (IS_ERR(kern_btf))
 			return PTR_ERR(kern_btf);
 
+		/* enforce 8-byte pointers for BPF-targeted BTFs */
+		btf__set_pointer_size(obj->btf, 8);
 		bpf_object__sanitize_btf(obj, kern_btf);
 	}
 
@@ -3478,10 +3482,11 @@ bpf_object__probe_global_data(struct bpf_object *obj)
 
 	map = bpf_create_map_xattr(&map_attr);
 	if (map < 0) {
-		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		ret = -errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
-			__func__, cp, errno);
-		return -errno;
+			__func__, cp, -ret);
+		return ret;
 	}
 
 	insns[0].imm = map;
@@ -5194,11 +5199,12 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 static int bpf_object__collect_map_relos(struct bpf_object *obj,
 					 GElf_Shdr *shdr, Elf_Data *data)
 {
-	int i, j, nrels, new_sz, ptr_sz = sizeof(void *);
+	const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
+	int i, j, nrels, new_sz;
 	const struct btf_var_secinfo *vi = NULL;
 	const struct btf_type *sec, *var, *def;
+	struct bpf_map *map = NULL, *targ_map;
 	const struct btf_member *member;
-	struct bpf_map *map, *targ_map;
 	const char *name, *mname;
 	Elf_Data *symbols;
 	unsigned int moff;
@@ -5243,7 +5249,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
 
 			vi = btf_var_secinfos(sec) + map->btf_var_idx;
 			if (vi->offset <= rel.r_offset &&
-			    rel.r_offset + sizeof(void *) <= vi->offset + vi->size)
+			    rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
 				break;
 		}
 		if (j == obj->nr_maps) {
@@ -5279,17 +5285,20 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
 			return -EINVAL;
 
 		moff = rel.r_offset - vi->offset - moff;
-		if (moff % ptr_sz)
+		/* here we use BPF pointer size, which is always 64 bit, as we
+		 * are parsing ELF that was built for BPF target
+		 */
+		if (moff % bpf_ptr_sz)
 			return -EINVAL;
-		moff /= ptr_sz;
+		moff /= bpf_ptr_sz;
 		if (moff >= map->init_slots_sz) {
 			new_sz = moff + 1;
-			tmp = realloc(map->init_slots, new_sz * ptr_sz);
+			tmp = realloc(map->init_slots, new_sz * host_ptr_sz);
 			if (!tmp)
 				return -ENOMEM;
 			map->init_slots = tmp;
 			memset(map->init_slots + map->init_slots_sz, 0,
-			       (new_sz - map->init_slots_sz) * ptr_sz);
+			       (new_sz - map->init_slots_sz) * host_ptr_sz);
 			map->init_slots_sz = new_sz;
 		}
 		map->init_slots[moff] = targ_map;
@@ -6012,9 +6021,10 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 	}
 
 	if (bpf_obj_pin(prog->instances.fds[instance], path)) {
-		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		err = -errno;
+		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 		pr_warn("failed to pin program: %s\n", cp);
-		return -errno;
+		return err;
 	}
 	pr_debug("pinned program '%s'\n", path);
 
@@ -6915,7 +6925,7 @@ static const struct bpf_sec_def section_defs[] = {
 						BPF_XDP_DEVMAP),
 	BPF_EAPROG_SEC("xdp_cpumap/",		BPF_PROG_TYPE_XDP,
 						BPF_XDP_CPUMAP),
-	BPF_EAPROG_SEC("xdp",			BPF_PROG_TYPE_XDP,
+	BPF_APROG_SEC("xdp",			BPF_PROG_TYPE_XDP,
 						BPF_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
@@ -8306,10 +8316,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
 	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
 		return ERR_PTR(-EINVAL);
 
-	if (OPTS_HAS(opts, map_fd)) {
-		target_fd = opts->map_fd;
-		link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
-	}
+	link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
+	link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
 
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3ed1399bfbbc..5ecb4069a9f0 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
 
 struct bpf_iter_attach_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
-	__u32 map_fd;
+	union bpf_iter_link_info *link_info;
+	__u32 link_info_len;
 };
-#define bpf_iter_attach_opts__last_field map_fd
+#define bpf_iter_attach_opts__last_field link_info_len
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_iter(struct bpf_program *prog,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 0c4722bfdd0a..e35bd6cdbdbf 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -295,5 +295,7 @@ LIBBPF_0.1.0 {
 		bpf_program__set_sk_lookup;
 		btf__parse;
 		btf__parse_raw;
+		btf__pointer_size;
 		btf__set_fd;
+		btf__set_pointer_size;
 } LIBBPF_0.0.9;
diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt
index cae9757f49c1..8b75efcd67ce 100644
--- a/tools/lib/perf/Documentation/libperf-counting.txt
+++ b/tools/lib/perf/Documentation/libperf-counting.txt
@@ -7,13 +7,13 @@ libperf-counting - counting interface
 
 DESCRIPTION
 -----------
-The counting interface provides API to meassure and get count for specific perf events.
+The counting interface provides API to measure and get count for specific perf events.
 
 The following test tries to explain count on `counting.c` example.
 
 It is by no means complete guide to counting, but shows libperf basic API for counting.
 
-The `counting.c` comes with libbperf package and can be compiled and run like:
+The `counting.c` comes with libperf package and can be compiled and run like:
 
 [source,bash]
 --
@@ -26,7 +26,8 @@ count 176242, enabled 176242, run 176242
 It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
 which is available only for root.
 
-The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it:
+The `counting.c` example monitors two events on the current process and displays
+their count, in a nutshell it:
 
 * creates events
 * adds them to the event list
@@ -152,7 +153,7 @@ Configure event list with the thread map and open events:
 --
 
 Both events are created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the whole list explicitely (both events).
+so we need to enable the whole list explicitly (both events).
 
 From this moment events are counting and we can do our workload.
 
@@ -167,7 +168,8 @@ When we are done we disable the events list.
  79         perf_evlist__disable(evlist);
 --
 
-Now we need to get the counts from events, following code iterates throught the events list and read counts:
+Now we need to get the counts from events, following code iterates through the
+events list and read counts:
 
 [source,c]
 --
@@ -178,7 +180,7 @@ Now we need to get the counts from events, following code iterates throught the
  85         }
 --
 
-And finaly cleanup.
+And finally cleanup.
 
 We close the whole events list (both events) and remove it together with the threads map:
 
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d71a7b4fcf5f..d6ca24f6ef78 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -8,13 +8,13 @@ libperf-sampling - sampling interface
 
 DESCRIPTION
 -----------
-The sampling interface provides API to meassure and get count for specific perf events.
+The sampling interface provides API to measure and get count for specific perf events.
 
 The following test tries to explain count on `sampling.c` example.
 
 It is by no means complete guide to sampling, but shows libperf basic API for sampling.
 
-The `sampling.c` comes with libbperf package and can be compiled and run like:
+The `sampling.c` comes with libperf package and can be compiled and run like:
 
 [source,bash]
 --
@@ -33,7 +33,8 @@ cpu   0, pid   4465, tid   4470, ip         7f84fe0ebebf, period             176
 
 It requires root access, because it uses hardware cycles event.
 
-The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
+The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
+nutshell it:
 
 - creates events
 - adds them to the event list
@@ -90,7 +91,7 @@ Once the setup is complete we start by defining cycles event using the `struct p
  36         };
 --
 
-Next step is to prepare cpus map.
+Next step is to prepare CPUs map.
 
 In this case we will monitor all the available CPUs:
 
@@ -152,7 +153,7 @@ Once the events list is open, we can create memory maps AKA perf ring buffers:
 --
 
 The event is created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the events list explicitely.
+so we need to enable the events list explicitly.
 
 From this moment the cycles event is sampling.
 
@@ -212,7 +213,7 @@ Each sample needs to get parsed:
 106                                 cpu, pid, tid, ip, period);
 --
 
-And finaly cleanup.
+And finally cleanup.
 
 We close the whole events list (both events) and remove it together with the threads map:
 
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 5a6bb512789d..0c74c30ed23a 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -29,7 +29,7 @@ SYNOPSIS
   void libperf_init(libperf_print_fn_t fn);
 --
 
-*API to handle cpu maps:*
+*API to handle CPU maps:*
 
 [source,c]
 --
@@ -217,7 +217,7 @@ Following objects are key to the libperf interface:
 
 [horizontal]
 
-struct perf_cpu_map:: Provides a cpu list abstraction.
+struct perf_cpu_map:: Provides a CPU list abstraction.
 
 struct perf_thread_map:: Provides a thread list abstraction.
 
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 6a875a0f01bb..2208444ecb44 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -305,9 +305,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
 }
 
 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
-			    void *ptr, short revent)
+			    void *ptr, short revent, enum fdarray_flags flags)
 {
-	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags);
 
 	if (pos >= 0) {
 		evlist->pollfd.priv[pos].ptr = ptr;
@@ -488,7 +488,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
 		revent = !overwrite ? POLLIN : 0;
 
 		if (!evsel->system_wide &&
-		    perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) {
+		    perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
 			perf_mmap__put(map);
 			return -1;
 		}
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 74dc8c3f0b66..2d0fa02b036f 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -45,7 +45,7 @@ struct perf_evlist_mmap_ops {
 
 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
-			    void *ptr, short revent);
+			    void *ptr, short revent, enum fdarray_flags flags);
 
 int perf_evlist__mmap_ops(struct perf_evlist *evlist,
 			  struct perf_evlist_mmap_ops *ops,
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 69b44d2cc0f5..842028858d66 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -111,6 +111,14 @@ struct perf_record_cgroup {
 	char			 path[PATH_MAX];
 };
 
+struct perf_record_text_poke_event {
+	struct perf_event_header header;
+	__u64			addr;
+	__u16			old_len;
+	__u16			new_len;
+	__u8			bytes[];
+};
+
 struct perf_record_sample {
 	struct perf_event_header header;
 	__u64			 array[];
@@ -367,6 +375,7 @@ union perf_event {
 	struct perf_record_sample		sample;
 	struct perf_record_bpf_event		bpf;
 	struct perf_record_ksymbol		ksymbol;
+	struct perf_record_text_poke_event	text_poke;
 	struct perf_record_header_attr		attr;
 	struct perf_record_event_update		event_update;
 	struct perf_record_header_event_type	event_type;
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
index 06ac7bd2144b..727396de6be5 100644
--- a/tools/lib/rbtree.c
+++ b/tools/lib/rbtree.c
@@ -13,7 +13,7 @@
 #include <linux/export.h>
 
 /*
- * red-black trees properties:  http://en.wikipedia.org/wiki/Rbtree
+ * red-black trees properties:  https://en.wikipedia.org/wiki/Rbtree
  *
  *  1) A node is either red or black
  *  2) The root is black
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 2859f107abc8..bf02d62a3b2b 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -65,12 +65,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
 	ci = cj = ei = 0;
 	while (ci < cmds->cnt && ei < excludes->cnt) {
 		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
-		if (cmp < 0)
+		if (cmp < 0) {
 			cmds->names[cj++] = cmds->names[ci++];
-		else if (cmp == 0)
-			ci++, ei++;
-		else if (cmp > 0)
+		} else if (cmp == 0) {
+			ci++;
 			ei++;
+		} else if (cmp > 0) {
+			ei++;
+		}
 	}
 
 	while (ci < cmds->cnt)
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
index 596032ade31f..4d6394397d92 100644
--- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
+++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
@@ -3,7 +3,7 @@ libtraceevent(3)
 
 NAME
 ----
-tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins.
+tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins.
 
 SYNOPSIS
 --------
@@ -13,6 +13,12 @@ SYNOPSIS
 
 struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
 void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_,
+			   void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep,
+					       const char pass:[*]path,
+					       const char pass:[*]name,
+					       void pass:[*]data),
+			   void pass:[*]_data_);
 --
 
 DESCRIPTION
@@ -22,11 +28,13 @@ directories. The _tep_ argument is trace event parser context.
 The plugin directories are :
 [verse]
 --
+	- Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST
 	- System's plugin directory, defined at the library compile time. It
 	  depends on the library installation prefix and usually is
 	  _(install_preffix)/lib/traceevent/plugins_
 	- Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
 	- User's plugin directory, located at _~/.local/lib/traceevent/plugins_
+	- Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST
 --
 Loading of plugins can be controlled by the _tep_flags_, using the
 _tep_set_flag()_ API:
@@ -44,6 +52,12 @@ _tep_load_plugins()_. The _tep_ argument is trace event parser context. The
 _plugin_list_ is the list of loaded plugins, returned by
 the _tep_load_plugins()_ function.
 
+The _tep_load_plugins_hook_ function walks through all directories with plugins
+and calls user specified _load_plugin()_ hook for each plugin file. Only files
+with given _suffix_ are considered to be plugins. The _data_ is a user specified
+context, passed to _load_plugin()_. Directories and the walk order are the same
+as in _tep_load_plugins()_ API.
+
 RETURN VALUE
 ------------
 The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
@@ -63,6 +77,15 @@ if (plugins == NULL) {
 }
 ...
 tep_unload_plugins(plugins, tep);
+...
+void print_plugin(struct tep_handle *tep, const char *path,
+		  const char *name, void *data)
+{
+	pritnf("Found libtraceevent plugin %s/%s\n", path, name);
+}
+...
+tep_load_plugins_hook(tep, ".so", print_plugin, NULL);
+...
 --
 
 FILES
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
index cee469803a34..d805a920af6f 100644
--- a/tools/lib/traceevent/event-parse-local.h
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -13,6 +13,7 @@ struct func_map;
 struct func_list;
 struct event_handler;
 struct func_resolver;
+struct tep_plugins_dir;
 
 struct tep_handle {
 	int ref_count;
@@ -47,7 +48,6 @@ struct tep_handle {
 	struct printk_list *printklist;
 	unsigned int printk_count;
 
-
 	struct tep_event **events;
 	int nr_events;
 	struct tep_event **sort_events;
@@ -81,10 +81,30 @@ struct tep_handle {
 
 	/* cache */
 	struct tep_event *last_event;
+
+	struct tep_plugins_dir *plugins_dir;
+};
+
+enum tep_print_parse_type {
+	PRINT_FMT_STRING,
+	PRINT_FMT_ARG_DIGIT,
+	PRINT_FMT_ARG_POINTER,
+	PRINT_FMT_ARG_STRING,
+};
+
+struct tep_print_parse {
+	struct tep_print_parse	*next;
+
+	char				*format;
+	int				ls;
+	enum tep_print_parse_type	type;
+	struct tep_print_arg		*arg;
+	struct tep_print_arg		*len_as_arg;
 };
 
 void tep_free_event(struct tep_event *event);
 void tep_free_format_field(struct tep_format_field *field);
+void tep_free_plugin_paths(struct tep_handle *tep);
 
 unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data);
 unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data);
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ba4f33804af1..5acc18b32606 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4565,43 +4565,93 @@ get_bprint_format(void *data, int size __maybe_unused,
 	return format;
 }
 
-static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
-			  struct tep_event *event, struct tep_print_arg *arg)
+static int print_mac_arg(struct trace_seq *s, const char *format,
+			 void *data, int size, struct tep_event *event,
+			 struct tep_print_arg *arg)
 {
-	unsigned char *buf;
 	const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+	bool reverse = false;
+	unsigned char *buf;
+	int ret = 0;
 
 	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
-		return;
+		return 0;
 	}
 
 	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
 				 arg->type);
-		return;
+		return 0;
 	}
 
-	if (mac == 'm')
+	if (format[0] == 'm') {
 		fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
+	} else if (format[0] == 'M' && format[1] == 'F') {
+		fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x";
+		ret++;
+	}
+	if (format[1] == 'R') {
+		reverse = true;
+		ret++;
+	}
+
 	if (!arg->field.field) {
 		arg->field.field =
 			tep_find_any_field(event, arg->field.name);
 		if (!arg->field.field) {
 			do_warning_event(event, "%s: field %s not found",
 					 __func__, arg->field.name);
-			return;
+			return ret;
 		}
 	}
 	if (arg->field.field->size != 6) {
 		trace_seq_printf(s, "INVALIDMAC");
-		return;
+		return ret;
 	}
+
 	buf = data + arg->field.field->offset;
-	trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+	if (reverse)
+		trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]);
+	else
+		trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+
+	return ret;
 }
 
-static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
+static int parse_ip4_print_args(struct tep_handle *tep,
+				const char *ptr, bool *reverse)
+{
+	int ret = 0;
+
+	*reverse = false;
+
+	/* hnbl */
+	switch (*ptr) {
+	case 'h':
+		if (tep->file_bigendian)
+			*reverse = false;
+		else
+			*reverse = true;
+		ret++;
+		break;
+	case 'l':
+		*reverse = true;
+		ret++;
+		break;
+	case 'n':
+	case 'b':
+		ret++;
+		/* fall through */
+	default:
+		*reverse = false;
+		break;
+	}
+
+	return ret;
+}
+
+static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf)
 {
 	const char *fmt;
 
@@ -4610,7 +4660,11 @@ static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
 	else
 		fmt = "%d.%d.%d.%d";
 
-	trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+	if (reverse)
+		trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]);
+	else
+		trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+
 }
 
 static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
@@ -4693,7 +4747,7 @@ static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
 	if (useIPv4) {
 		if (needcolon)
 			trace_seq_printf(s, ":");
-		print_ip4_addr(s, 'I', &in6.s6_addr[12]);
+		print_ip4_addr(s, 'I', false, &in6.s6_addr[12]);
 	}
 
 	return;
@@ -4722,16 +4776,20 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
 			  void *data, int size, struct tep_event *event,
 			  struct tep_print_arg *arg)
 {
+	bool reverse = false;
 	unsigned char *buf;
+	int ret;
+
+	ret = parse_ip4_print_args(event->tep, ptr, &reverse);
 
 	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
-		return 0;
+		return ret;
 	}
 
 	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
-		return 0;
+		return ret;
 	}
 
 	if (!arg->field.field) {
@@ -4740,7 +4798,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
 		if (!arg->field.field) {
 			do_warning("%s: field %s not found",
 				   __func__, arg->field.name);
-			return 0;
+			return ret;
 		}
 	}
 
@@ -4748,11 +4806,12 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
 
 	if (arg->field.field->size != 4) {
 		trace_seq_printf(s, "INVALIDIPv4");
-		return 0;
+		return ret;
 	}
-	print_ip4_addr(s, i, buf);
 
-	return 0;
+	print_ip4_addr(s, i, reverse, buf);
+	return ret;
+
 }
 
 static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
@@ -4812,7 +4871,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
 	char have_c = 0, have_p = 0;
 	unsigned char *buf;
 	struct sockaddr_storage *sa;
+	bool reverse = false;
 	int rc = 0;
+	int ret;
 
 	/* pISpc */
 	if (i == 'I') {
@@ -4827,6 +4888,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
 			rc++;
 		}
 	}
+	ret = parse_ip4_print_args(event->tep, ptr, &reverse);
+	ptr += ret;
+	rc += ret;
 
 	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
@@ -4858,7 +4922,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
 			return rc;
 		}
 
-		print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr);
+		print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr);
 		if (have_p)
 			trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
 
@@ -4892,25 +4956,20 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
 			struct tep_print_arg *arg)
 {
 	char i = *ptr;  /* 'i' or 'I' */
-	char ver;
-	int rc = 0;
+	int rc = 1;
 
+	/* IP version */
 	ptr++;
-	rc++;
 
-	ver = *ptr;
-	ptr++;
-	rc++;
-
-	switch (ver) {
+	switch (*ptr) {
 	case '4':
-		rc += print_ipv4_arg(s, ptr, i, data, size, event, arg);
+		rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg);
 		break;
 	case '6':
-		rc += print_ipv6_arg(s, ptr, i, data, size, event, arg);
+		rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg);
 		break;
 	case 'S':
-		rc += print_ipsa_arg(s, ptr, i, data, size, event, arg);
+		rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg);
 		break;
 	default:
 		return 0;
@@ -4919,6 +4978,133 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
 	return rc;
 }
 
+static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15};
+static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+static int print_uuid_arg(struct trace_seq *s, const char *ptr,
+			void *data, int size, struct tep_event *event,
+			struct tep_print_arg *arg)
+{
+	const int *index = uuid_index;
+	char *format = "%02x";
+	int ret = 0;
+	char *buf;
+	int i;
+
+	switch (*(ptr + 1)) {
+	case 'L':
+		format = "%02X";
+		/* fall through */
+	case 'l':
+		index = guid_index;
+		ret++;
+		break;
+	case 'B':
+		format = "%02X";
+		/* fall through */
+	case 'b':
+		ret++;
+		break;
+	}
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return ret;
+	}
+
+	if (arg->type != TEP_PRINT_FIELD) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+		return ret;
+	}
+
+	if (!arg->field.field) {
+		arg->field.field =
+			tep_find_any_field(event, arg->field.name);
+		if (!arg->field.field) {
+			do_warning("%s: field %s not found",
+				   __func__, arg->field.name);
+			return ret;
+		}
+	}
+
+	if (arg->field.field->size != 16) {
+		trace_seq_printf(s, "INVALIDUUID");
+		return ret;
+	}
+
+	buf = data + arg->field.field->offset;
+
+	for (i = 0; i < 16; i++) {
+		trace_seq_printf(s, format, buf[index[i]] & 0xff);
+		switch (i) {
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+			trace_seq_printf(s, "-");
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int print_raw_buff_arg(struct trace_seq *s, const char *ptr,
+			      void *data, int size, struct tep_event *event,
+			      struct tep_print_arg *arg, int print_len)
+{
+	int plen = print_len;
+	char *delim = " ";
+	int ret = 0;
+	char *buf;
+	int i;
+	unsigned long offset;
+	int arr_len;
+
+	switch (*(ptr + 1)) {
+	case 'C':
+		delim = ":";
+		ret++;
+		break;
+	case 'D':
+		delim = "-";
+		ret++;
+		break;
+	case 'N':
+		delim = "";
+		ret++;
+		break;
+	}
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return ret;
+	}
+
+	if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+		return ret;
+	}
+
+	offset = tep_read_number(event->tep,
+				 data + arg->dynarray.field->offset,
+				 arg->dynarray.field->size);
+	arr_len = (unsigned long long)(offset >> 16);
+	buf = data + (offset & 0xffff);
+
+	if (arr_len < plen)
+		plen = arr_len;
+
+	if (plen < 1)
+		return ret;
+
+	trace_seq_printf(s, "%02x", buf[0] & 0xff);
+	for (i = 1; i < plen; i++)
+		trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff);
+
+	return ret;
+}
+
 static int is_printable_array(char *p, unsigned int len)
 {
 	unsigned int i;
@@ -5007,264 +5193,567 @@ void tep_print_fields(struct trace_seq *s, void *data,
 	}
 }
 
-static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+static int print_function(struct trace_seq *s, const char *format,
+			  void *data, int size, struct tep_event *event,
+			  struct tep_print_arg *arg)
 {
-	struct tep_handle *tep = event->tep;
-	struct tep_print_fmt *print_fmt = &event->print_fmt;
-	struct tep_print_arg *arg = print_fmt->args;
-	struct tep_print_arg *args = NULL;
-	const char *ptr = print_fmt->format;
-	unsigned long long val;
 	struct func_map *func;
-	const char *saveptr;
-	struct trace_seq p;
-	char *bprint_fmt = NULL;
-	char format[32];
-	int show_func;
-	int len_as_arg;
-	int len_arg = 0;
-	int len;
-	int ls;
+	unsigned long long val;
 
-	if (event->flags & TEP_EVENT_FL_FAILED) {
-		trace_seq_printf(s, "[FAILED TO PARSE]");
-		tep_print_fields(s, data, size, event);
-		return;
+	val = eval_num_arg(data, size, event, arg);
+	func = find_func(event->tep, val);
+	if (func) {
+		trace_seq_puts(s, func->func);
+		if (*format == 'F' || *format == 'S')
+			trace_seq_printf(s, "+0x%llx", val - func->addr);
+	} else {
+		if (event->tep->long_size == 4)
+			trace_seq_printf(s, "0x%lx", (long)val);
+		else
+			trace_seq_printf(s, "0x%llx", (long long)val);
 	}
 
-	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
-		bprint_fmt = get_bprint_format(data, size, event);
-		args = make_bprint_args(bprint_fmt, data, size, event);
-		arg = args;
-		ptr = bprint_fmt;
+	return 0;
+}
+
+static int print_arg_pointer(struct trace_seq *s, const char *format, int plen,
+			     void *data, int size,
+			     struct tep_event *event, struct tep_print_arg *arg)
+{
+	unsigned long long val;
+	int ret = 1;
+
+	if (arg->type == TEP_PRINT_BSTRING) {
+		trace_seq_puts(s, arg->string.string);
+		return 0;
+	}
+	while (*format) {
+		if (*format == 'p') {
+			format++;
+			break;
+		}
+		format++;
 	}
 
-	for (; *ptr; ptr++) {
-		ls = 0;
-		if (*ptr == '\\') {
-			ptr++;
-			switch (*ptr) {
+	switch (*format) {
+	case 'F':
+	case 'f':
+	case 'S':
+	case 's':
+		ret += print_function(s, format, data, size, event, arg);
+		break;
+	case 'M':
+	case 'm':
+		ret += print_mac_arg(s, format, data, size, event, arg);
+		break;
+	case 'I':
+	case 'i':
+		ret += print_ip_arg(s, format, data, size, event, arg);
+		break;
+	case 'U':
+		ret += print_uuid_arg(s, format, data, size, event, arg);
+		break;
+	case 'h':
+		ret += print_raw_buff_arg(s, format, data, size, event, arg, plen);
+		break;
+	default:
+		ret = 0;
+		val = eval_num_arg(data, size, event, arg);
+		trace_seq_printf(s, "%p", (void *)(intptr_t)val);
+		break;
+	}
+
+	return ret;
+
+}
+
+static int print_arg_number(struct trace_seq *s, const char *format, int plen,
+			    void *data, int size, int ls,
+			    struct tep_event *event, struct tep_print_arg *arg)
+{
+	unsigned long long val;
+
+	val = eval_num_arg(data, size, event, arg);
+
+	switch (ls) {
+	case -2:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (char)val);
+		else
+			trace_seq_printf(s, format, (char)val);
+		break;
+	case -1:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (short)val);
+		else
+			trace_seq_printf(s, format, (short)val);
+		break;
+	case 0:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (int)val);
+		else
+			trace_seq_printf(s, format, (int)val);
+		break;
+	case 1:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (long)val);
+		else
+			trace_seq_printf(s, format, (long)val);
+		break;
+	case 2:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (long long)val);
+		else
+			trace_seq_printf(s, format, (long long)val);
+		break;
+	default:
+		do_warning_event(event, "bad count (%d)", ls);
+		event->flags |= TEP_EVENT_FL_FAILED;
+	}
+	return 0;
+}
+
+
+static void print_arg_string(struct trace_seq *s, const char *format, int plen,
+			     void *data, int size,
+			     struct tep_event *event, struct tep_print_arg *arg)
+{
+	struct trace_seq p;
+
+	/* Use helper trace_seq */
+	trace_seq_init(&p);
+	print_str_arg(&p, data, size, event,
+		      format, plen, arg);
+	trace_seq_terminate(&p);
+	trace_seq_puts(s, p.buffer);
+	trace_seq_destroy(&p);
+}
+
+static int parse_arg_format_pointer(const char *format)
+{
+	int ret = 0;
+	int index;
+	int loop;
+
+	switch (*format) {
+	case 'F':
+	case 'S':
+	case 'f':
+	case 's':
+		ret++;
+		break;
+	case 'M':
+	case 'm':
+		/* [mM]R , [mM]F */
+		switch (format[1]) {
+		case 'R':
+		case 'F':
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	case 'I':
+	case 'i':
+		index = 2;
+		loop = 1;
+		switch (format[1]) {
+		case 'S':
+			/*[S][pfs]*/
+			while (loop) {
+				switch (format[index]) {
+				case 'p':
+				case 'f':
+				case 's':
+					ret++;
+					index++;
+					break;
+				default:
+					loop = 0;
+					break;
+				}
+			}
+			/* fall through */
+		case '4':
+			/* [4S][hnbl] */
+			switch (format[index]) {
+			case 'h':
 			case 'n':
-				trace_seq_putc(s, '\n');
-				break;
-			case 't':
-				trace_seq_putc(s, '\t');
-				break;
-			case 'r':
-				trace_seq_putc(s, '\r');
-				break;
-			case '\\':
-				trace_seq_putc(s, '\\');
+			case 'l':
+			case 'b':
+				ret++;
+				index++;
 				break;
-			default:
-				trace_seq_putc(s, *ptr);
+			}
+			if (format[1] == '4') {
+				ret++;
 				break;
 			}
+			/* fall through */
+		case '6':
+			/* [6S]c */
+			if (format[index] == 'c')
+				ret++;
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	case 'U':
+		switch (format[1]) {
+		case 'L':
+		case 'l':
+		case 'B':
+		case 'b':
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	case 'h':
+		switch (format[1]) {
+		case 'C':
+		case 'D':
+		case 'N':
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	default:
+		break;
+	}
 
-		} else if (*ptr == '%') {
-			saveptr = ptr;
-			show_func = 0;
-			len_as_arg = 0;
- cont_process:
-			ptr++;
-			switch (*ptr) {
-			case '%':
-				trace_seq_putc(s, '%');
-				break;
-			case '#':
-				/* FIXME: need to handle properly */
-				goto cont_process;
-			case 'h':
-				ls--;
-				goto cont_process;
-			case 'l':
-				ls++;
-				goto cont_process;
-			case 'L':
-				ls = 2;
-				goto cont_process;
-			case '*':
-				/* The argument is the length. */
-				if (!arg) {
-					do_warning_event(event, "no argument match");
-					event->flags |= TEP_EVENT_FL_FAILED;
-					goto out_failed;
-				}
-				len_arg = eval_num_arg(data, size, event, arg);
-				len_as_arg = 1;
-				arg = arg->next;
-				goto cont_process;
-			case '.':
-			case 'z':
-			case 'Z':
-			case '0' ... '9':
-			case '-':
-				goto cont_process;
-			case 'p':
-				if (tep->long_size == 4)
-					ls = 1;
-				else
-					ls = 2;
+	return ret;
+}
 
-				if (isalnum(ptr[1]))
-					ptr++;
+static void free_parse_args(struct tep_print_parse *arg)
+{
+	struct tep_print_parse *del;
 
-				if (arg->type == TEP_PRINT_BSTRING) {
-					trace_seq_puts(s, arg->string.string);
-					arg = arg->next;
-					break;
-				}
+	while (arg) {
+		del = arg;
+		arg = del->next;
+		free(del->format);
+		free(del);
+	}
+}
 
-				if (*ptr == 'F' || *ptr == 'f' ||
-				    *ptr == 'S' || *ptr == 's') {
-					show_func = *ptr;
-				} else if (*ptr == 'M' || *ptr == 'm') {
-					print_mac_arg(s, *ptr, data, size, event, arg);
-					arg = arg->next;
-					break;
-				} else if (*ptr == 'I' || *ptr == 'i') {
-					int n;
+static int parse_arg_add(struct tep_print_parse **parse, char *format,
+			 enum tep_print_parse_type type,
+			 struct tep_print_arg *arg,
+			 struct tep_print_arg *len_as_arg,
+			 int ls)
+{
+	struct tep_print_parse *parg = NULL;
 
-					n = print_ip_arg(s, ptr, data, size, event, arg);
-					if (n > 0) {
-						ptr += n - 1;
-						arg = arg->next;
-						break;
-					}
-				}
+	parg = calloc(1, sizeof(*parg));
+	if (!parg)
+		goto error;
+	parg->format = strdup(format);
+	if (!parg->format)
+		goto error;
+	parg->type = type;
+	parg->arg = arg;
+	parg->len_as_arg = len_as_arg;
+	parg->ls = ls;
+	*parse = parg;
+	return 0;
+error:
+	if (parg) {
+		free(parg->format);
+		free(parg);
+	}
+	return -1;
+}
 
-				/* fall through */
-			case 'd':
-			case 'u':
-			case 'i':
-			case 'x':
-			case 'X':
-			case 'o':
-				if (!arg) {
-					do_warning_event(event, "no argument match");
-					event->flags |= TEP_EVENT_FL_FAILED;
-					goto out_failed;
-				}
+static int parse_arg_format(struct tep_print_parse **parse,
+			    struct tep_event *event,
+			    const char *format, struct tep_print_arg **arg)
+{
+	struct tep_print_arg *len_arg = NULL;
+	char print_format[32];
+	const char *start = format;
+	int ret = 0;
+	int ls = 0;
+	int res;
+	int len;
 
-				len = ((unsigned long)ptr + 1) -
-					(unsigned long)saveptr;
+	format++;
+	ret++;
+	for (; *format; format++) {
+		switch (*format) {
+		case '#':
+			/* FIXME: need to handle properly */
+			break;
+		case 'h':
+			ls--;
+			break;
+		case 'l':
+			ls++;
+			break;
+		case 'L':
+			ls = 2;
+			break;
+		case '.':
+		case 'z':
+		case 'Z':
+		case '0' ... '9':
+		case '-':
+			break;
+		case '*':
+			/* The argument is the length. */
+			if (!*arg) {
+				do_warning_event(event, "no argument match");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+			if (len_arg) {
+				do_warning_event(event, "argument already matched");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+			len_arg = *arg;
+			*arg = (*arg)->next;
+			break;
+		case 'p':
+			if (!*arg) {
+				do_warning_event(event, "no argument match");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+			res = parse_arg_format_pointer(format + 1);
+			if (res > 0) {
+				format += res;
+				ret += res;
+			}
+			len = ((unsigned long)format + 1) -
+				(unsigned long)start;
+			/* should never happen */
+			if (len > 31) {
+				do_warning_event(event, "bad format!");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				len = 31;
+			}
+			memcpy(print_format, start, len);
+			print_format[len] = 0;
 
-				/* should never happen */
-				if (len > 31) {
-					do_warning_event(event, "bad format!");
-					event->flags |= TEP_EVENT_FL_FAILED;
-					len = 31;
-				}
+			parse_arg_add(parse, print_format,
+				      PRINT_FMT_ARG_POINTER, *arg, len_arg, ls);
+			*arg = (*arg)->next;
+			ret++;
+			return ret;
+		case 'd':
+		case 'u':
+		case 'i':
+		case 'x':
+		case 'X':
+		case 'o':
+			if (!*arg) {
+				do_warning_event(event, "no argument match");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
 
-				memcpy(format, saveptr, len);
-				format[len] = 0;
+			len = ((unsigned long)format + 1) -
+				(unsigned long)start;
 
-				val = eval_num_arg(data, size, event, arg);
-				arg = arg->next;
+			/* should never happen */
+			if (len > 30) {
+				do_warning_event(event, "bad format!");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				len = 31;
+			}
+			memcpy(print_format, start, len);
+			print_format[len] = 0;
 
-				if (show_func) {
-					func = find_func(tep, val);
-					if (func) {
-						trace_seq_puts(s, func->func);
-						if (show_func == 'F')
-							trace_seq_printf(s,
-							       "+0x%llx",
-							       val - func->addr);
-						break;
-					}
-				}
-				if (tep->long_size == 8 && ls == 1 &&
-				    sizeof(long) != 8) {
-					char *p;
-
-					/* make %l into %ll */
-					if (ls == 1 && (p = strchr(format, 'l')))
-						memmove(p+1, p, strlen(p)+1);
-					else if (strcmp(format, "%p") == 0)
-						strcpy(format, "0x%llx");
-					ls = 2;
-				}
-				switch (ls) {
-				case -2:
-					if (len_as_arg)
-						trace_seq_printf(s, format, len_arg, (char)val);
-					else
-						trace_seq_printf(s, format, (char)val);
-					break;
-				case -1:
-					if (len_as_arg)
-						trace_seq_printf(s, format, len_arg, (short)val);
-					else
-						trace_seq_printf(s, format, (short)val);
-					break;
-				case 0:
-					if (len_as_arg)
-						trace_seq_printf(s, format, len_arg, (int)val);
-					else
-						trace_seq_printf(s, format, (int)val);
-					break;
-				case 1:
-					if (len_as_arg)
-						trace_seq_printf(s, format, len_arg, (long)val);
-					else
-						trace_seq_printf(s, format, (long)val);
-					break;
-				case 2:
-					if (len_as_arg)
-						trace_seq_printf(s, format, len_arg,
-								 (long long)val);
-					else
-						trace_seq_printf(s, format, (long long)val);
-					break;
-				default:
-					do_warning_event(event, "bad count (%d)", ls);
-					event->flags |= TEP_EVENT_FL_FAILED;
-				}
-				break;
-			case 's':
-				if (!arg) {
-					do_warning_event(event, "no matching argument");
-					event->flags |= TEP_EVENT_FL_FAILED;
-					goto out_failed;
-				}
+			if (event->tep->long_size == 8 && ls == 1 &&
+			    sizeof(long) != 8) {
+				char *p;
+
+				/* make %l into %ll */
+				if (ls == 1 && (p = strchr(print_format, 'l')))
+					memmove(p+1, p, strlen(p)+1);
+				ls = 2;
+			}
+			if (ls < -2 || ls > 2) {
+				do_warning_event(event, "bad count (%d)", ls);
+				event->flags |= TEP_EVENT_FL_FAILED;
+			}
+			parse_arg_add(parse, print_format,
+				      PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls);
+			*arg = (*arg)->next;
+			ret++;
+			return ret;
+		case 's':
+			if (!*arg) {
+				do_warning_event(event, "no matching argument");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
 
-				len = ((unsigned long)ptr + 1) -
-					(unsigned long)saveptr;
+			len = ((unsigned long)format + 1) -
+				(unsigned long)start;
 
-				/* should never happen */
-				if (len > 31) {
-					do_warning_event(event, "bad format!");
-					event->flags |= TEP_EVENT_FL_FAILED;
-					len = 31;
-				}
+			/* should never happen */
+			if (len > 31) {
+				do_warning_event(event, "bad format!");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				len = 31;
+			}
+
+			memcpy(print_format, start, len);
+			print_format[len] = 0;
+
+			parse_arg_add(parse, print_format,
+					PRINT_FMT_ARG_STRING, *arg, len_arg, 0);
+			*arg = (*arg)->next;
+			ret++;
+			return ret;
+		default:
+			snprintf(print_format, 32, ">%c<", *format);
+			parse_arg_add(parse, print_format,
+					PRINT_FMT_STRING, NULL, NULL, 0);
+			ret++;
+			return ret;
+		}
+		ret++;
+	}
+
+out_failed:
+	return ret;
 
-				memcpy(format, saveptr, len);
-				format[len] = 0;
-				if (!len_as_arg)
-					len_arg = -1;
-				/* Use helper trace_seq */
-				trace_seq_init(&p);
-				print_str_arg(&p, data, size, event,
-					      format, len_arg, arg);
-				trace_seq_terminate(&p);
-				trace_seq_puts(s, p.buffer);
-				trace_seq_destroy(&p);
-				arg = arg->next;
+}
+
+static int parse_arg_string(struct tep_print_parse **parse, const char *format)
+{
+	struct trace_seq s;
+	int ret = 0;
+
+	trace_seq_init(&s);
+	for (; *format; format++) {
+		if (*format == '\\') {
+			format++;
+			ret++;
+			switch (*format) {
+			case 'n':
+				trace_seq_putc(&s, '\n');
+				break;
+			case 't':
+				trace_seq_putc(&s, '\t');
+				break;
+			case 'r':
+				trace_seq_putc(&s, '\r');
+				break;
+			case '\\':
+				trace_seq_putc(&s, '\\');
 				break;
 			default:
-				trace_seq_printf(s, ">%c<", *ptr);
-
+				trace_seq_putc(&s, *format);
+				break;
 			}
+		} else if (*format == '%') {
+			if (*(format + 1) == '%') {
+				trace_seq_putc(&s, '%');
+				format++;
+				ret++;
+			} else
+				break;
 		} else
-			trace_seq_putc(s, *ptr);
+			trace_seq_putc(&s, *format);
+
+		ret++;
+	}
+	trace_seq_terminate(&s);
+	parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0);
+	trace_seq_destroy(&s);
+
+	return ret;
+}
+
+static struct tep_print_parse *
+parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg)
+{
+	struct tep_print_parse *parse_ret = NULL;
+	struct tep_print_parse **parse = NULL;
+	int ret;
+	int len;
+
+	len = strlen(format);
+	while (*format) {
+		if (!parse_ret)
+			parse = &parse_ret;
+		if (*format == '%' && *(format + 1) != '%')
+			ret = parse_arg_format(parse, event, format, &arg);
+		else
+			ret = parse_arg_string(parse, format);
+		if (*parse)
+			parse = &((*parse)->next);
+
+		len -= ret;
+		if (len > 0)
+			format += ret;
+		else
+			break;
+	}
+	return parse_ret;
+}
+
+static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s,
+			      void *data, int size, struct tep_event *event)
+{
+	int len_arg;
+
+	while (parse) {
+		if (parse->len_as_arg)
+			len_arg = eval_num_arg(data, size, event, parse->len_as_arg);
+		switch (parse->type) {
+		case PRINT_FMT_ARG_DIGIT:
+			print_arg_number(s, parse->format,
+					parse->len_as_arg ? len_arg : -1, data,
+					 size, parse->ls, event, parse->arg);
+			break;
+		case PRINT_FMT_ARG_POINTER:
+			print_arg_pointer(s, parse->format,
+					  parse->len_as_arg ? len_arg : 1,
+					  data, size, event, parse->arg);
+			break;
+		case PRINT_FMT_ARG_STRING:
+			print_arg_string(s, parse->format,
+					 parse->len_as_arg ? len_arg : -1,
+					 data, size, event, parse->arg);
+			break;
+		case PRINT_FMT_STRING:
+		default:
+			trace_seq_printf(s, "%s", parse->format);
+			break;
+		}
+		parse = parse->next;
 	}
+}
+
+static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+{
+	struct tep_print_parse *parse = event->print_fmt.print_cache;
+	struct tep_print_arg *args = NULL;
+	char *bprint_fmt = NULL;
 
 	if (event->flags & TEP_EVENT_FL_FAILED) {
-out_failed:
 		trace_seq_printf(s, "[FAILED TO PARSE]");
+		tep_print_fields(s, data, size, event);
+		return;
 	}
 
-	if (args) {
+	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+		bprint_fmt = get_bprint_format(data, size, event);
+		args = make_bprint_args(bprint_fmt, data, size, event);
+		parse = parse_args(event, bprint_fmt, args);
+	}
+
+	print_event_cache(parse, s, data, size, event);
+
+	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+		free_parse_args(parse);
 		free_args(args);
 		free(bprint_fmt);
 	}
@@ -6363,9 +6852,13 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
 			*list = arg;
 			list = &arg->next;
 		}
-		return 0;
 	}
 
+	if (!(event->flags & TEP_EVENT_FL_ISBPRINT))
+		event->print_fmt.print_cache = parse_args(event,
+							  event->print_fmt.format,
+							  event->print_fmt.args);
+
 	return 0;
 
  event_parse_failed:
@@ -7032,7 +7525,7 @@ void tep_free_event(struct tep_event *event)
 
 	free(event->print_fmt.format);
 	free_args(event->print_fmt.args);
-
+	free_parse_args(event->print_fmt.print_cache);
 	free(event);
 }
 
@@ -7120,6 +7613,7 @@ void tep_free(struct tep_handle *tep)
 	free(tep->events);
 	free(tep->sort_events);
 	free(tep->func_resolver);
+	tep_free_plugin_paths(tep);
 
 	free(tep);
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index b77837f75a0d..c29b693e31ee 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -1,21 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #ifndef _PARSE_EVENTS_H
 #define _PARSE_EVENTS_H
@@ -272,9 +258,12 @@ struct tep_print_arg {
 	};
 };
 
+struct tep_print_parse;
+
 struct tep_print_fmt {
 	char			*format;
 	struct tep_print_arg	*args;
+	struct tep_print_parse	*print_cache;
 };
 
 struct tep_event {
@@ -379,7 +368,7 @@ enum tep_errno {
 	 * errno since SUS requires the errno has distinct positive values.
 	 * See 'Issue 6' in the link below.
 	 *
-	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
 	 */
 	__TEP_ERRNO__START			= -100000,
 
@@ -393,14 +382,29 @@ struct tep_plugin_list;
 
 #define INVALID_PLUGIN_LIST_OPTION	((char **)((unsigned long)-1))
 
+enum tep_plugin_load_priority {
+	TEP_PLUGIN_FIRST,
+	TEP_PLUGIN_LAST,
+};
+
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+			enum tep_plugin_load_priority prio);
 struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
 void tep_unload_plugins(struct tep_plugin_list *plugin_list,
 			struct tep_handle *tep);
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+			   void (*load_plugin)(struct tep_handle *tep,
+					       const char *path,
+					       const char *name,
+					       void *data),
+			   void *data);
 char **tep_plugin_list_options(void);
 void tep_plugin_free_options_list(char **list);
 int tep_plugin_add_options(const char *name,
 			   struct tep_plugin_option *options);
+int tep_plugin_add_option(const char *name, const char *val);
 void tep_plugin_remove_options(struct tep_plugin_option *options);
+void tep_plugin_print_options(struct trace_seq *s);
 void tep_print_plugins(struct trace_seq *s,
 			const char *prefix, const char *suffix,
 			const struct tep_plugin_list *list);
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index e1f7ddd5a6cf..e7c2acb8680f 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -13,6 +13,7 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include <dirent.h>
+#include <errno.h>
 #include "event-parse.h"
 #include "event-parse-local.h"
 #include "event-utils.h"
@@ -38,6 +39,12 @@ struct tep_plugin_list {
 	void			*handle;
 };
 
+struct tep_plugins_dir {
+	struct tep_plugins_dir		*next;
+	char				*path;
+	enum tep_plugin_load_priority	prio;
+};
+
 static void lower_case(char *str)
 {
 	if (!str)
@@ -247,6 +254,170 @@ void tep_plugin_remove_options(struct tep_plugin_option *options)
 	}
 }
 
+static int parse_option_name(char **option, char **plugin)
+{
+	char *p;
+
+	*plugin = NULL;
+
+	if ((p = strstr(*option, ":"))) {
+		*plugin = *option;
+		*p = '\0';
+		*option = strdup(p + 1);
+		if (!*option)
+			return -1;
+	}
+	return 0;
+}
+
+static struct tep_plugin_option *
+find_registered_option(const char *plugin, const char *option)
+{
+	struct registered_plugin_options *reg;
+	struct tep_plugin_option *op;
+	const char *op_plugin;
+
+	for (reg = registered_options; reg; reg = reg->next) {
+		for (op = reg->options; op->name; op++) {
+			if (op->plugin_alias)
+				op_plugin = op->plugin_alias;
+			else
+				op_plugin = op->file;
+
+			if (plugin && strcmp(plugin, op_plugin) != 0)
+				continue;
+			if (strcmp(option, op->name) != 0)
+				continue;
+
+			return op;
+		}
+	}
+
+	return NULL;
+}
+
+static int process_option(const char *plugin, const char *option, const char *val)
+{
+	struct tep_plugin_option *op;
+
+	op = find_registered_option(plugin, option);
+	if (!op)
+		return 0;
+
+	return update_option_value(op, val);
+}
+
+/**
+ * tep_plugin_add_option - add an option/val pair to set plugin options
+ * @name: The name of the option (format: <plugin>:<option> or just <option>)
+ * @val: (optional) the value for the option
+ *
+ * Modify a plugin option. If @val is given than the value of the option
+ * is set (note, some options just take a boolean, so @val must be either
+ * "1" or "0" or "true" or "false").
+ */
+int tep_plugin_add_option(const char *name, const char *val)
+{
+	struct trace_plugin_options *op;
+	char *option_str;
+	char *plugin;
+
+	option_str = strdup(name);
+	if (!option_str)
+		return -ENOMEM;
+
+	if (parse_option_name(&option_str, &plugin) < 0)
+		return -ENOMEM;
+
+	/* If the option exists, update the val */
+	for (op = trace_plugin_options; op; op = op->next) {
+		/* Both must be NULL or not NULL */
+		if ((!plugin || !op->plugin) && plugin != op->plugin)
+			continue;
+		if (plugin && strcmp(plugin, op->plugin) != 0)
+			continue;
+		if (strcmp(op->option, option_str) != 0)
+			continue;
+
+		/* update option */
+		free(op->value);
+		if (val) {
+			op->value = strdup(val);
+			if (!op->value)
+				goto out_free;
+		} else
+			op->value = NULL;
+
+		/* plugin and option_str don't get freed at the end */
+		free(plugin);
+		free(option_str);
+
+		plugin = op->plugin;
+		option_str = op->option;
+		break;
+	}
+
+	/* If not found, create */
+	if (!op) {
+		op = malloc(sizeof(*op));
+		if (!op)
+			goto out_free;
+		memset(op, 0, sizeof(*op));
+		op->plugin = plugin;
+		op->option = option_str;
+		if (val) {
+			op->value = strdup(val);
+			if (!op->value) {
+				free(op);
+				goto out_free;
+			}
+		}
+		op->next = trace_plugin_options;
+		trace_plugin_options = op;
+	}
+
+	return process_option(plugin, option_str, val);
+
+out_free:
+	free(plugin);
+	free(option_str);
+	return -ENOMEM;
+}
+
+static void print_op_data(struct trace_seq *s, const char *name,
+			  const char *op)
+{
+	if (op)
+		trace_seq_printf(s, "%8s:\t%s\n", name, op);
+}
+
+/**
+ * tep_plugin_print_options - print out the registered plugin options
+ * @s: The trace_seq descriptor to write the plugin options into
+ *
+ * Writes a list of options into trace_seq @s.
+ */
+void tep_plugin_print_options(struct trace_seq *s)
+{
+	struct registered_plugin_options *reg;
+	struct tep_plugin_option *op;
+
+	for (reg = registered_options; reg; reg = reg->next) {
+		if (reg != registered_options)
+			trace_seq_printf(s, "============\n");
+		for (op = reg->options; op->name; op++) {
+			if (op != reg->options)
+				trace_seq_printf(s, "------------\n");
+			print_op_data(s, "file", op->file);
+			print_op_data(s, "plugin", op->plugin_alias);
+			print_op_data(s, "option", op->name);
+			print_op_data(s, "desc", op->description);
+			print_op_data(s, "value", op->value);
+			trace_seq_printf(s, "%8s:\t%d\n", "set", op->set);
+		}
+	}
+}
+
 /**
  * tep_print_plugins - print out the list of plugins loaded
  * @s: the trace_seq descripter to write to
@@ -273,6 +444,7 @@ load_plugin(struct tep_handle *tep, const char *path,
 	    const char *file, void *data)
 {
 	struct tep_plugin_list **plugin_list = data;
+	struct tep_plugin_option *options;
 	tep_plugin_load_func func;
 	struct tep_plugin_list *list;
 	const char *alias;
@@ -297,6 +469,16 @@ load_plugin(struct tep_handle *tep, const char *path,
 	if (!alias)
 		alias = file;
 
+	options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME);
+	if (options) {
+		while (options->name) {
+			ret = update_option(alias, options);
+			if (ret < 0)
+				goto out_free;
+			options++;
+		}
+	}
+
 	func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
 	if (!func) {
 		warning("could not find func '%s' in plugin '%s'\n%s\n",
@@ -365,28 +547,53 @@ load_plugins_dir(struct tep_handle *tep, const char *suffix,
 	closedir(dir);
 }
 
-static void
-load_plugins(struct tep_handle *tep, const char *suffix,
-	     void (*load_plugin)(struct tep_handle *tep,
-				 const char *path,
-				 const char *name,
-				 void *data),
-	     void *data)
+/**
+ * tep_load_plugins_hook - call a user specified callback to load a plugin
+ * @tep: handler to traceevent context
+ * @suffix: filter only plugin files with given suffix
+ * @load_plugin: user specified callback, called for each plugin file
+ * @data: custom context, passed to @load_plugin
+ *
+ * Searches for traceevent plugin files and calls @load_plugin for each
+ * The order of plugins search is:
+ *  - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST
+ *  - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR
+ *  - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR
+ *  - In user's home: ~/.local/lib/traceevent/plugins/
+ *  - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST
+ *
+ */
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+			   void (*load_plugin)(struct tep_handle *tep,
+					       const char *path,
+					       const char *name,
+					       void *data),
+			   void *data)
 {
+	struct tep_plugins_dir *dir = NULL;
 	char *home;
 	char *path;
 	char *envdir;
 	int ret;
 
-	if (tep->flags & TEP_DISABLE_PLUGINS)
+	if (tep && tep->flags & TEP_DISABLE_PLUGINS)
 		return;
 
+	if (tep)
+		dir = tep->plugins_dir;
+	while (dir) {
+		if (dir->prio == TEP_PLUGIN_FIRST)
+			load_plugins_dir(tep, suffix, dir->path,
+					 load_plugin, data);
+		dir = dir->next;
+	}
+
 	/*
 	 * If a system plugin directory was defined,
 	 * check that first.
 	 */
 #ifdef PLUGIN_DIR
-	if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS))
+	if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS))
 		load_plugins_dir(tep, suffix, PLUGIN_DIR,
 				 load_plugin, data);
 #endif
@@ -415,6 +622,15 @@ load_plugins(struct tep_handle *tep, const char *suffix,
 
 	load_plugins_dir(tep, suffix, path, load_plugin, data);
 
+	if (tep)
+		dir = tep->plugins_dir;
+	while (dir) {
+		if (dir->prio == TEP_PLUGIN_LAST)
+			load_plugins_dir(tep, suffix, dir->path,
+					 load_plugin, data);
+		dir = dir->next;
+	}
+
 	free(path);
 }
 
@@ -423,10 +639,59 @@ tep_load_plugins(struct tep_handle *tep)
 {
 	struct tep_plugin_list *list = NULL;
 
-	load_plugins(tep, ".so", load_plugin, &list);
+	tep_load_plugins_hook(tep, ".so", load_plugin, &list);
 	return list;
 }
 
+/**
+ * tep_add_plugin_path - Add a new plugin directory.
+ * @tep: Trace event handler.
+ * @path: Path to a directory. All plugin files in that
+ *	  directory will be loaded.
+ *@prio: Load priority of the plugins in that directory.
+ *
+ * Returns -1 in case of an error, 0 otherwise.
+ */
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+			enum tep_plugin_load_priority prio)
+{
+	struct tep_plugins_dir *dir;
+
+	if (!tep || !path)
+		return -1;
+
+	dir = calloc(1, sizeof(*dir));
+	if (!dir)
+		return -1;
+
+	dir->path = strdup(path);
+	if (!dir->path) {
+		free(dir);
+		return -1;
+	}
+	dir->prio = prio;
+	dir->next = tep->plugins_dir;
+	tep->plugins_dir = dir;
+
+	return 0;
+}
+
+void tep_free_plugin_paths(struct tep_handle *tep)
+{
+	struct tep_plugins_dir *dir;
+
+	if (!tep)
+		return;
+
+	dir = tep->plugins_dir;
+	while (dir) {
+		tep->plugins_dir = tep->plugins_dir->next;
+		free(dir->path);
+		free(dir);
+		dir = tep->plugins_dir;
+	}
+}
+
 void
 tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
 {
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
index 5fa8292e341b..a2b522093cfd 100644
--- a/tools/lib/traceevent/kbuffer.h
+++ b/tools/lib/traceevent/kbuffer.h
@@ -1,22 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #ifndef _KBUFFER_H
 #define _KBUFFER_H
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
index 210d26910613..dd4da823c38f 100644
--- a/tools/lib/traceevent/plugins/Build
+++ b/tools/lib/traceevent/plugins/Build
@@ -5,6 +5,8 @@ plugin_kvm-y          += plugin_kvm.o
 plugin_mac80211-y     += plugin_mac80211.o
 plugin_sched_switch-y += plugin_sched_switch.o
 plugin_function-y     += plugin_function.o
+plugin_futex-y        += plugin_futex.o
 plugin_xen-y          += plugin_xen.o
 plugin_scsi-y         += plugin_scsi.o
 plugin_cfg80211-y     += plugin_cfg80211.o
+plugin_tlb-y          += plugin_tlb.o
+\ No newline at end of file
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
index 680d883efe05..47e802553250 100644
--- a/tools/lib/traceevent/plugins/Makefile
+++ b/tools/lib/traceevent/plugins/Makefile
@@ -134,9 +134,11 @@ PLUGINS += plugin_kvm.so
 PLUGINS += plugin_mac80211.so
 PLUGINS += plugin_sched_switch.so
 PLUGINS += plugin_function.so
+PLUGINS += plugin_futex.so
 PLUGINS += plugin_xen.so
 PLUGINS += plugin_scsi.so
 PLUGINS += plugin_cfg80211.so
+PLUGINS += plugin_tlb.so
 
 PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
 PLUGINS_IN := $(PLUGINS:.so=-in.o)
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
index 7770fcb78e0f..807b16e1bf0f 100644
--- a/tools/lib/traceevent/plugins/plugin_function.c
+++ b/tools/lib/traceevent/plugins/plugin_function.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
@@ -50,12 +35,20 @@ struct tep_plugin_option plugin_options[] =
 		.set = 1,
 	},
 	{
+		.name = "offset",
+		.plugin_alias = "ftrace",
+		.description =
+		"Show function names as well as their offsets",
+		.set = 0,
+	},
+	{
 		.name = NULL,
 	}
 };
 
 static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
 static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
+static struct tep_plugin_option *ftrace_offset = &plugin_options[2];
 
 static void add_child(struct func_stack *stack, const char *child, int pos)
 {
@@ -123,6 +116,18 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
 	return 0;
 }
 
+static void show_function(struct trace_seq *s, struct tep_handle *tep,
+			  const char *func, unsigned long long function)
+{
+	unsigned long long offset;
+
+	trace_seq_printf(s, "%s", func);
+	if (ftrace_offset->set) {
+		offset = tep_find_function_address(tep, function);
+		trace_seq_printf(s, "+0x%x ", (int)(function - offset));
+	}
+}
+
 static int function_handler(struct trace_seq *s, struct tep_record *record,
 			    struct tep_event *event, void *context)
 {
@@ -149,14 +154,14 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
 	trace_seq_printf(s, "%*s", index*3, "");
 
 	if (func)
-		trace_seq_printf(s, "%s", func);
+		show_function(s, tep, func, function);
 	else
 		trace_seq_printf(s, "0x%llx", function);
 
 	if (ftrace_parent->set) {
 		trace_seq_printf(s, " <-- ");
 		if (parent)
-			trace_seq_printf(s, "%s", parent);
+			show_function(s, tep, parent, pfunction);
 		else
 			trace_seq_printf(s, "0x%llx", pfunction);
 	}
@@ -164,11 +169,93 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
 	return 0;
 }
 
+static int
+trace_stack_handler(struct trace_seq *s, struct tep_record *record,
+		    struct tep_event *event, void *context)
+{
+	struct tep_format_field *field;
+	unsigned long long addr;
+	const char *func;
+	int long_size;
+	void *data = record->data;
+
+	field = tep_find_any_field(event, "caller");
+	if (!field) {
+		trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller");
+		return 0;
+	}
+
+	trace_seq_puts(s, "<stack trace >\n");
+
+	long_size = tep_get_long_size(event->tep);
+
+	for (data += field->offset; data < record->data + record->size;
+	     data += long_size) {
+		addr = tep_read_number(event->tep, data, long_size);
+
+		if ((long_size == 8 && addr == (unsigned long long)-1) ||
+		    ((int)addr == -1))
+			break;
+
+		func = tep_find_function(event->tep, addr);
+		if (func)
+			trace_seq_printf(s, "=> %s (%llx)\n", func, addr);
+		else
+			trace_seq_printf(s, "=> %llx\n", addr);
+	}
+
+	return 0;
+}
+
+static int
+trace_raw_data_handler(struct trace_seq *s, struct tep_record *record,
+		    struct tep_event *event, void *context)
+{
+	struct tep_format_field *field;
+	unsigned long long id;
+	int long_size;
+	void *data = record->data;
+
+	if (tep_get_field_val(s, event, "id", record, &id, 1))
+		return trace_seq_putc(s, '!');
+
+	trace_seq_printf(s, "# %llx", id);
+
+	field = tep_find_any_field(event, "buf");
+	if (!field) {
+		trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf");
+		return 0;
+	}
+
+	long_size = tep_get_long_size(event->tep);
+
+	for (data += field->offset; data < record->data + record->size;
+	     data += long_size) {
+		int size = sizeof(long);
+		int left = (record->data + record->size) - data;
+		int i;
+
+		if (size > left)
+			size = left;
+
+		for (i = 0; i < size; i++)
+			trace_seq_printf(s, " %02x", *(unsigned char *)(data + i));
+	}
+
+	return 0;
+}
+
 int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
 	tep_register_event_handler(tep, -1, "ftrace", "function",
 				   function_handler, NULL);
 
+	tep_register_event_handler(tep, -1, "ftrace", "kernel_stack",
+				      trace_stack_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "ftrace", "raw_data",
+				      trace_raw_data_handler, NULL);
+
 	tep_plugin_add_options("ftrace", plugin_options);
 
 	return 0;
diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c
new file mode 100644
index 000000000000..eb7c9f8a850a
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_futex.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2017 National Instruments Corp.
+ *
+ * Author: Julia Cartwright <julia@ni.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/futex.h>
+
+#include "event-parse.h"
+
+#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
+
+struct futex_args {
+	unsigned long long	uaddr;
+	unsigned long long	op;
+	unsigned long long	val;
+	unsigned long long	utime; /* or val2 */
+	unsigned long long	uaddr2;
+	unsigned long long	val3;
+};
+
+struct futex_op {
+	const char	*name;
+	const char	*fmt_val;
+	const char	*fmt_utime;
+	const char	*fmt_uaddr2;
+	const char	*fmt_val3;
+};
+
+static const struct futex_op futex_op_tbl[] = {
+	{            "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx",               NULL,             NULL },
+	{            "FUTEX_WAKE",     " val=%llu",              NULL,               NULL,             NULL },
+	{              "FUTEX_FD",     " val=%llu",              NULL,               NULL,             NULL },
+	{         "FUTEX_REQUEUE",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx",             NULL },
+	{     "FUTEX_CMP_REQUEUE",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+	{         "FUTEX_WAKE_OP",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+	{         "FUTEX_LOCK_PI",            NULL, " utime=0x%08llx",               NULL,             NULL },
+	{       "FUTEX_UNLOCK_PI",            NULL,              NULL,               NULL,             NULL },
+	{      "FUTEX_TRYLOCK_PI",            NULL,              NULL,               NULL,             NULL },
+	{     "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx",               NULL, " val3=0x%08llx" },
+	{     "FUTEX_WAKE_BITSET",     " val=%llu",              NULL,               NULL, " val3=0x%08llx" },
+	{ "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" },
+	{  "FUTEX_CMP_REQUEUE_PI",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+};
+
+
+static void futex_print(struct trace_seq *s, const struct futex_args *args,
+			const struct futex_op *fop)
+{
+	trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr);
+
+	if (fop->fmt_val)
+		trace_seq_printf(s, fop->fmt_val, args->val);
+
+	if (fop->fmt_utime)
+		trace_seq_printf(s,fop->fmt_utime, args->utime);
+
+	if (fop->fmt_uaddr2)
+		trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2);
+
+	if (fop->fmt_val3)
+		trace_seq_printf(s, fop->fmt_val3, args->val3);
+}
+
+static int futex_handler(struct trace_seq *s, struct tep_record *record,
+			 struct tep_event *event, void *context)
+{
+	const struct futex_op *fop;
+	struct futex_args args;
+	unsigned long long cmd;
+
+	if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "op", record, &args.op, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "val", record, &args.val, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "utime", record, &args.utime, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "val3", record, &args.val3, 1))
+		return 1;
+
+	cmd = args.op & FUTEX_CMD_MASK;
+	if (cmd >= ARRAY_SIZE(futex_op_tbl))
+		return 1;
+
+	fop = &futex_op_tbl[cmd];
+
+	trace_seq_printf(s, "op=%s", fop->name);
+
+	if (args.op & FUTEX_PRIVATE_FLAG)
+		trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG");
+
+	if (args.op & FUTEX_CLOCK_REALTIME)
+		trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME");
+
+	futex_print(s, &args, fop);
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+				   futex_handler, NULL);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+				     futex_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
index bb434e0ed03a..d98466788f14 100644
--- a/tools/lib/traceevent/plugins/plugin_hrtimer.c
+++ b/tools/lib/traceevent/plugins/plugin_hrtimer.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
index 04fc125f38cb..69111a68d3cf 100644
--- a/tools/lib/traceevent/plugins/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugins/plugin_jbd2.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
index edaec5d962c3..4b4f7f9616e3 100644
--- a/tools/lib/traceevent/plugins/plugin_kmem.c
+++ b/tools/lib/traceevent/plugins/plugin_kmem.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
index c8e623065a7e..51ceeb9147eb 100644
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
@@ -155,7 +140,23 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
 	_ER(EXIT_WRITE_DR5,	0x035)		\
 	_ER(EXIT_WRITE_DR6,	0x036)		\
 	_ER(EXIT_WRITE_DR7,	0x037)		\
-	_ER(EXIT_EXCP_BASE,     0x040)		\
+	_ER(EXIT_EXCP_DE,	0x040)		\
+	_ER(EXIT_EXCP_DB,	0x041)		\
+	_ER(EXIT_EXCP_BP,	0x043)		\
+	_ER(EXIT_EXCP_OF,	0x044)		\
+	_ER(EXIT_EXCP_BR,	0x045)		\
+	_ER(EXIT_EXCP_UD,	0x046)		\
+	_ER(EXIT_EXCP_NM,	0x047)		\
+	_ER(EXIT_EXCP_DF,	0x048)		\
+	_ER(EXIT_EXCP_TS,	0x04a)		\
+	_ER(EXIT_EXCP_NP,	0x04b)		\
+	_ER(EXIT_EXCP_SS,	0x04c)		\
+	_ER(EXIT_EXCP_GP,	0x04d)		\
+	_ER(EXIT_EXCP_PF,	0x04e)		\
+	_ER(EXIT_EXCP_MF,	0x050)		\
+	_ER(EXIT_EXCP_AC,	0x051)		\
+	_ER(EXIT_EXCP_MC,	0x052)		\
+	_ER(EXIT_EXCP_XF,	0x053)		\
 	_ER(EXIT_INTR,		0x060)		\
 	_ER(EXIT_NMI,		0x061)		\
 	_ER(EXIT_SMI,		0x062)		\
@@ -201,7 +202,10 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
 	_ER(EXIT_MONITOR,	0x08a)		\
 	_ER(EXIT_MWAIT,		0x08b)		\
 	_ER(EXIT_MWAIT_COND,	0x08c)		\
-	_ER(EXIT_NPF,		0x400)		\
+	_ER(EXIT_XSETBV,	0x08d)		\
+	_ER(EXIT_NPF, 		0x400)		\
+	_ER(EXIT_AVIC_INCOMPLETE_IPI,		0x401)	\
+	_ER(EXIT_AVIC_UNACCELERATED_ACCESS,	0x402)	\
 	_ER(EXIT_ERR,		-1)
 
 #define _ER(reason, val)	{ #reason, val },
@@ -241,7 +245,7 @@ static const char *find_exit_reason(unsigned isa, int val)
 		}
 	if (!strings)
 		return "UNKNOWN-ISA";
-	for (i = 0; strings[i].val >= 0; i++)
+	for (i = 0; strings[i].str; i++)
 		if (strings[i].val == val)
 			break;
 
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
index 884303c26b5c..f48071e3cfb8 100644
--- a/tools/lib/traceevent/plugins/plugin_mac80211.c
+++ b/tools/lib/traceevent/plugins/plugin_mac80211.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
index 957389a0ff7a..e12fa103820a 100644
--- a/tools/lib/traceevent/plugins/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugins/plugin_sched_switch.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c
new file mode 100644
index 000000000000..43657fb60504
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_tlb.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+enum tlb_flush_reason {
+	TLB_FLUSH_ON_TASK_SWITCH,
+	TLB_REMOTE_SHOOTDOWN,
+	TLB_LOCAL_SHOOTDOWN,
+	TLB_LOCAL_MM_SHOOTDOWN,
+	NR_TLB_FLUSH_REASONS,
+};
+
+static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record,
+			     struct tep_event *event, void *context)
+{
+	unsigned long long val;
+
+	trace_seq_printf(s, "pages=");
+
+	tep_print_num_field(s, "%ld", event, "pages", record, 1);
+
+	if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0)
+		return -1;
+
+	trace_seq_puts(s, " reason=");
+
+	switch (val) {
+	case TLB_FLUSH_ON_TASK_SWITCH:
+		trace_seq_puts(s, "flush on task switch");
+		break;
+	case TLB_REMOTE_SHOOTDOWN:
+		trace_seq_puts(s, "remote shootdown");
+		break;
+	case TLB_LOCAL_SHOOTDOWN:
+		trace_seq_puts(s, "local shootdown");
+		break;
+	case TLB_LOCAL_MM_SHOOTDOWN:
+		trace_seq_puts(s, "local mm shootdown");
+		break;
+	}
+
+	trace_seq_printf(s, " (%lld)", val);
+
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "tlb", "tlb_flush",
+				   tlb_flush_handler, NULL);
+
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1,
+				     "tlb", "tlb_flush",
+				     tlb_flush_handler, NULL);
+}
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 33ba98d72b16..99d00870b160 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -3,9 +3,9 @@
                                C  Self  R  W  RMW  Self  R  W  DR  DW  RMW  SV
                               --  ----  -  -  ---  ----  -  -  --  --  ---  --
 
-Store, e.g., WRITE_ONCE()            Y                                       Y
-Load, e.g., READ_ONCE()              Y                          Y   Y        Y
-Unsuccessful RMW operation           Y                          Y   Y        Y
+Relaxed store                        Y                                       Y
+Relaxed load                         Y                          Y   Y        Y
+Relaxed RMW operation                Y                          Y   Y        Y
 rcu_dereference()                    Y                          Y   Y        Y
 Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y
 Successful *_release()         C        Y  Y    Y     W                      Y
@@ -17,14 +17,19 @@ smp_mb__before_atomic()       CP        Y  Y    Y        a  a   a   a    Y
 smp_mb__after_atomic()        CP        a  a    Y        Y  Y   Y   Y    Y
 
 
-Key:	C:	Ordering is cumulative
-	P:	Ordering propagates
-	R:	Read, for example, READ_ONCE(), or read portion of RMW
-	W:	Write, for example, WRITE_ONCE(), or write portion of RMW
-	Y:	Provides ordering
-	a:	Provides ordering given intervening RMW atomic operation
-	DR:	Dependent read (address dependency)
-	DW:	Dependent write (address, data, or control dependency)
-	RMW:	Atomic read-modify-write operation
-	SELF:	Orders self, as opposed to accesses before and/or after
-	SV:	Orders later accesses to the same variable
+Key:	Relaxed:  A relaxed operation is either READ_ONCE(), WRITE_ONCE(),
+		  a *_relaxed() RMW operation, an unsuccessful RMW
+		  operation, a non-value-returning RMW operation such
+		  as atomic_inc(), or one of the atomic*_read() and
+		  atomic*_set() family of operations.
+	C:	  Ordering is cumulative
+	P:	  Ordering propagates
+	R:	  Read, for example, READ_ONCE(), or read portion of RMW
+	W:	  Write, for example, WRITE_ONCE(), or write portion of RMW
+	Y:	  Provides ordering
+	a:	  Provides ordering given intervening RMW atomic operation
+	DR:	  Dependent read (address dependency)
+	DW:	  Dependent write (address, data, or control dependency)
+	RMW:	  Atomic read-modify-write operation
+	SELF:	  Orders self, as opposed to accesses before and/or after
+	SV:	  Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
new file mode 100644
index 000000000000..2f840dcd15cf
--- /dev/null
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -0,0 +1,1074 @@
+Linux-Kernel Memory Model Litmus Tests
+======================================
+
+This file describes the LKMM litmus-test format by example, describes
+some tricks and traps, and finally outlines LKMM's limitations.  Earlier
+versions of this material appeared in a number of LWN articles, including:
+
+https://lwn.net/Articles/720550/
+	A formal kernel memory-ordering model (part 2)
+https://lwn.net/Articles/608550/
+	Axiomatic validation of memory barriers and atomic instructions
+https://lwn.net/Articles/470681/
+	Validating Memory Barriers and Atomic Instructions
+
+This document presents information in decreasing order of applicability,
+so that, where possible, the information that has proven more commonly
+useful is shown near the beginning.
+
+For information on installing LKMM, including the underlying "herd7"
+tool, please see tools/memory-model/README.
+
+
+Copy-Pasta
+==========
+
+As with other software, it is often better (if less macho) to adapt an
+existing litmus test than it is to create one from scratch.  A number
+of litmus tests may be found in the kernel source tree:
+
+	tools/memory-model/litmus-tests/
+	Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available on github
+and kernel.org:
+
+	https://github.com/paulmckrcu/litmus
+	https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+	https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+The -l and -L arguments to "git grep" can be quite helpful in identifying
+existing litmus tests that are similar to the one you need.  But even if
+you start with an existing litmus test, it is still helpful to have a
+good understanding of the litmus-test format.
+
+
+Examples and Format
+===================
+
+This section describes the overall format of litmus tests, starting
+with a small example of the message-passing pattern and moving on to
+more complex examples that illustrate explicit initialization and LKMM's
+minimalistic set of flow-control statements.
+
+
+Message-Passing Example
+-----------------------
+
+This section gives an overview of the format of a litmus test using an
+example based on the common message-passing use case.  This use case
+appears often in the Linux kernel.  For example, a flag (modeled by "y"
+below) indicates that a buffer (modeled by "x" below) is now completely
+filled in and ready for use.  It would be very bad if the consumer saw the
+flag set, but, due to memory misordering, saw old values in the buffer.
+
+This example asks whether smp_store_release() and smp_load_acquire()
+suffices to avoid this bad outcome:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7   WRITE_ONCE(*x, 1);
+ 8   smp_store_release(y, 1);
+ 9 }
+10
+11 P1(int *x, int *y)
+12 {
+13   int r0;
+14   int r1;
+15
+16   r0 = smp_load_acquire(y);
+17   r1 = READ_ONCE(*x);
+18 }
+19
+20 exists (1:r0=1 /\ 1:r1=0)
+
+Line 1 starts with "C", which identifies this file as being in the
+LKMM C-language format (which, as we will see, is a small fragment
+of the full C language).  The remainder of line 1 is the name of
+the test, which by convention is the filename with the ".litmus"
+suffix stripped.  In this case, the actual test may be found in
+tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+in the Linux-kernel source tree.
+
+Mechanically generated litmus tests will often have an optional
+double-quoted comment string on the second line.  Such strings are ignored
+when running the test.  Yes, you can add your own comments to litmus
+tests, but this is a bit involved due to the use of multiple parsers.
+For now, you can use C-language comments in the C code, and these comments
+may be in either the "/* */" or the "//" style.  A later section will
+cover the full litmus-test commenting story.
+
+Line 3 is the initialization section.  Because the default initialization
+to zero suffices for this test, the "{}" syntax is used, which mean the
+initialization section is empty.  Litmus tests requiring non-default
+initialization must have non-empty initialization sections, as in the
+example that will be presented later in this document.
+
+Lines 5-9 show the first process and lines 11-18 the second process.  Each
+process corresponds to a Linux-kernel task (or kthread, workqueue, thread,
+and so on; LKMM discussions often use these terms interchangeably).
+The name of the first process is "P0" and that of the second "P1".
+You can name your processes anything you like as long as the names consist
+of a single "P" followed by a number, and as long as the numbers are
+consecutive starting with zero.  This can actually be quite helpful,
+for example, a .litmus file matching "^P1(" but not matching "^P2("
+must contain a two-process litmus test.
+
+The argument list for each function are pointers to the global variables
+used by that function.  Unlike normal C-language function parameters, the
+names are significant.  The fact that both P0() and P1() have a formal
+parameter named "x" means that these two processes are working with the
+same global variable, also named "x".  So the "int *x, int *y" on P0()
+and P1() mean that both processes are working with two shared global
+variables, "x" and "y".  Global variables are always passed to processes
+by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)".
+
+P0() has no local variables, but P1() has two of them named "r0" and "r1".
+These names may be freely chosen, but for historical reasons stemming from
+other litmus-test formats, it is conventional to use names consisting of
+"r" followed by a number as shown here.  A common bug in litmus tests
+is forgetting to add a global variable to a process's parameter list.
+This will sometimes result in an error message, but can also cause the
+intended global to instead be silently treated as an undeclared local
+variable.
+
+Each process's code is similar to Linux-kernel C, as can be seen on lines
+7-8 and 13-17.  This code may use many of the Linux kernel's atomic
+operations, some of its exclusive-lock functions, and some of its RCU
+and SRCU functions.  An approximate list of the currently supported
+functions may be found in the linux-kernel.def file.
+
+The P0() process does "WRITE_ONCE(*x, 1)" on line 7.  Because "x" is a
+pointer in P0()'s parameter list, this does an unordered store to global
+variable "x".  Line 8 does "smp_store_release(y, 1)", and because "y"
+is also in P0()'s parameter list, this does a release store to global
+variable "y".
+
+The P1() process declares two local variables on lines 13 and 14.
+Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load
+from global variable "y" into local variable "r0".  Line 17 does a
+"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local
+variable "r1".  Both "x" and "y" are in P1()'s parameter list, so both
+reference the same global variables that are used by P0().
+
+Line 20 is the "exists" assertion expression to evaluate the final state.
+This final state is evaluated after the dust has settled: both processes
+have completed and all of their memory references and memory barriers
+have propagated to all parts of the system.  The references to the local
+variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify
+which process they are local to.
+
+Note that the assertion expression is written in the litmus-test
+language rather than in C.  For example, single "=" is an equality
+operator rather than an assignment.  The "/\" character combination means
+"and".  Similarly, "\/" stands for "or".  Both of these are ASCII-art
+representations of the corresponding mathematical symbols.  Finally,
+"~" stands for "logical not", which is "!" in C, and not to be confused
+with the C-language "~" operator which instead stands for "bitwise not".
+Parentheses may be used to override precedence.
+
+The "exists" assertion on line 20 is satisfied if the consumer sees the
+flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1()
+loaded a value from "x" that was equal to 1 but loaded a value from "y"
+that was still equal to zero.
+
+This example can be checked by running the following command, which
+absolutely must be run from the tools/memory-model directory and from
+this directory only:
+
+herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus
+
+The output is the result of something similar to a full state-space
+search, and is as follows:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=0)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.00
+12 Hash=579aaa14d8c35a39429b02e698241d09
+
+The most pertinent line is line 10, which contains "Never 0 3", which
+indicates that the bad result flagged by the "exists" clause never
+happens.  This line might instead say "Sometimes" to indicate that the
+bad result happened in some but not all executions, or it might say
+"Always" to indicate that the bad result happened in all executions.
+(The herd7 tool doesn't judge, so it is only an LKMM convention that the
+"exists" clause indicates a bad result.  To see this, invert the "exists"
+clause's condition and run the test.)  The numbers ("0 3") at the end
+of this line indicate the number of end states satisfying the "exists"
+clause (0) and the number not not satisfying that clause (3).
+
+Another important part of this output is shown in lines 2-5, repeated here:
+
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+
+Line 2 gives the total number of end states, and each of lines 3-5 list
+one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that
+both of P1()'s loads returned the value "0".  As expected, given the
+"Never" on line 10, the state flagged by the "exists" clause is not
+listed.  This full list of states can be helpful when debugging a new
+litmus test.
+
+The rest of the output is not normally needed, either due to irrelevance
+or due to being redundant with the lines discussed above.  However, the
+following paragraph lists them for the benefit of readers possessed of
+an insatiable curiosity.  Other readers should feel free to skip ahead.
+
+Line 1 echos the test name, along with the "Test" and "Allowed".  Line 6's
+"No" says that the "exists" clause was not satisfied by any execution,
+and as such it has the same meaning as line 10's "Never".  Line 7 is a
+lead-in to line 8's "Positive: 0 Negative: 3", which lists the number
+of end states satisfying and not satisfying the "exists" clause, just
+like the two numbers at the end of line 10.  Line 9 repeats the "exists"
+clause so that you don't have to look it up in the litmus-test file.
+The number at the end of line 11 (which begins with "Time") gives the
+time in seconds required to analyze the litmus test.  Small tests such
+as this one complete in a few milliseconds, so "0.00" is quite common.
+Line 12 gives a hash of the contents for the litmus-test file, and is used
+by tooling that manages litmus tests and their output.  This tooling is
+used by people modifying LKMM itself, and among other things lets such
+people know which of the several thousand relevant litmus tests were
+affected by a given change to LKMM.
+
+
+Initialization
+--------------
+
+The previous example relied on the default zero initialization for
+"x" and "y", but a similar litmus test could instead initialize them
+to some other value:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {
+ 4   x=42;
+ 5   y=42;
+ 6 }
+ 7
+ 8 P0(int *x, int *y)
+ 9 {
+10   WRITE_ONCE(*x, 1);
+11   smp_store_release(y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16   int r0;
+17   int r1;
+18
+19   r0 = smp_load_acquire(y);
+20   r1 = READ_ONCE(*x);
+21 }
+22
+23 exists (1:r0=1 /\ 1:r1=42)
+
+Lines 3-6 now initialize both "x" and "y" to the value 42.  This also
+means that the "exists" clause on line 23 must change "1:r1=0" to
+"1:r1=42".
+
+Running the test gives the same overall result as before, but with the
+value 42 appearing in place of the value zero:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=1; 1:r1=1;
+ 4 1:r0=42; 1:r1=1;
+ 5 1:r0=42; 1:r1=42;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=42)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.02
+12 Hash=ab9a9b7940a75a792266be279a980156
+
+It is tempting to avoid the open-coded repetitions of the value "42"
+by defining another global variable "initval=42" and replacing all
+occurrences of "42" with "initval".  This will not, repeat *not*,
+initialize "x" and "y" to 42, but instead to the address of "initval"
+(try it!).  See the section below on linked lists to learn more about
+why this approach to initialization can be useful.
+
+
+Control Structures
+------------------
+
+LKMM supports the C-language "if" statement, which allows modeling of
+conditional branches.  In LKMM, conditional branches can affect ordering,
+but only if you are *very* careful (compilers are surprisingly able
+to optimize away conditional branches).  The following example shows
+the "load buffering" (LB) use case that is used in the Linux kernel to
+synchronize between ring-buffer producers and consumers.  In the example
+below, P0() is one side checking to see if an operation may proceed and
+P1() is the other side completing its update.
+
+ 1 C LB+fencembonceonce+ctrlonceonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7   int r0;
+ 8
+ 9   r0 = READ_ONCE(*x);
+10   if (r0)
+11     WRITE_ONCE(*y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16   int r0;
+17
+18   r0 = READ_ONCE(*y);
+19   smp_mb();
+20   WRITE_ONCE(*x, 1);
+21 }
+22
+23 exists (0:r0=1 /\ 1:r0=1)
+
+P1()'s "if" statement on line 10 works as expected, so that line 11 is
+executed only if line 9 loads a non-zero value from "x".  Because P1()'s
+write of "1" to "x" happens only after P1()'s read from "y", one would
+hope that the "exists" clause cannot be satisfied.  LKMM agrees:
+
+ 1 Test LB+fencembonceonce+ctrlonceonce Allowed
+ 2 States 2
+ 3 0:r0=0; 1:r0=0;
+ 4 0:r0=1; 1:r0=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r0=1 /\ 1:r0=1)
+ 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2
+10 Time LB+fencembonceonce+ctrlonceonce 0.00
+11 Hash=e5260556f6de495fd39b556d1b831c3b
+
+However, there is no "while" statement due to the fact that full
+state-space search has some difficulty with iteration.  However, there
+are tricks that may be used to handle some special cases, which are
+discussed below.  In addition, loop-unrolling tricks may be applied,
+albeit sparingly.
+
+
+Tricks and Traps
+================
+
+This section covers extracting debug output from herd7, emulating
+spin loops, handling trivial linked lists, adding comments to litmus tests,
+emulating call_rcu(), and finally tricks to improve herd7 performance
+in order to better handle large litmus tests.
+
+
+Debug Output
+------------
+
+By default, the herd7 state output includes all variables mentioned
+in the "exists" clause.  But sometimes debugging efforts are greatly
+aided by the values of other variables.  Consider this litmus test
+(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but
+slightly modified), which probes an obscure corner of hardware memory
+ordering:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7   int r1;
+ 8   int r2;
+ 9
+10   WRITE_ONCE(*x, 1);
+11   r1 = READ_ONCE(*x);
+12   r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17   int r3;
+18   int r4;
+19
+20   WRITE_ONCE(*y, 1);
+21   r3 = READ_ONCE(*y);
+22   r4 = READ_ONCE(*x);
+23 }
+24
+25 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output is as follows:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r2=0; 1:r4=0;
+ 4 0:r2=0; 1:r4=1;
+ 5 0:r2=1; 1:r4=0;
+ 6 0:r2=1; 1:r4=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=c7f30fe0faebb7d565405d55b7318ada
+
+(This output indicates that CPUs are permitted to "snoop their own
+store buffers", which all of Linux's CPU families other than s390 will
+happily do.  Such snooping results in disagreement among CPUs on the
+order of stores from different CPUs, which is rarely an issue.)
+
+But the herd7 output shows only the two variables mentioned in the
+"exists" clause.  Someone modifying this test might wish to know the
+values of "x", "y", "0:r1", and "0:r3" as well.  The "locations"
+statement on line 25 shows how to cause herd7 to display additional
+variables:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7   int r1;
+ 8   int r2;
+ 9
+10   WRITE_ONCE(*x, 1);
+11   r1 = READ_ONCE(*x);
+12   r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17   int r3;
+18   int r4;
+19
+20   WRITE_ONCE(*y, 1);
+21   r3 = READ_ONCE(*y);
+22   r4 = READ_ONCE(*x);
+23 }
+24
+25 locations [0:r1; 1:r3; x; y]
+26 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output then displays the values of all the variables:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1;
+ 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1;
+ 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1;
+ 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=40de8418c4b395388f6501cafd1ed38d
+
+What if you would like to know the value of a particular global variable
+at some particular point in a given process's execution?  One approach
+is to use a READ_ONCE() to load that global variable into a new local
+variable, then add that local variable to the "locations" clause.
+But be careful:  In some litmus tests, adding a READ_ONCE() will change
+the outcome!  For one example, please see the C-READ_ONCE.litmus and
+C-READ_ONCE-omitted.litmus tests located here:
+
+	https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/
+
+
+Spin Loops
+----------
+
+The analysis carried out by herd7 explores full state space, which is
+at best of exponential time complexity.  Adding processes and increasing
+the amount of code in a give process can greatly increase execution time.
+Potentially infinite loops, such as those used to wait for locks to
+become available, are clearly problematic.
+
+Fortunately, it is possible to avoid state-space explosion by specially
+modeling such loops.  For example, the following litmus tests emulates
+locking using xchg_acquire(), but instead of enclosing xchg_acquire()
+in a spin loop, it instead excludes executions that fail to acquire the
+lock using a herd7 "filter" clause.  Note that for exclusive locking, you
+are better off using the spin_lock() and spin_unlock() that LKMM directly
+models, if for no other reason that these are much faster.  However, the
+techniques illustrated in this section can be used for other purposes,
+such as emulating reader-writer locking, which LKMM does not yet model.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 }
+ 5
+ 6 P0(int *sl, int *x0, int *x1)
+ 7 {
+ 8   int r2;
+ 9   int r1;
+10
+11   r2 = xchg_acquire(sl, 1);
+12   WRITE_ONCE(*x0, 1);
+13   r1 = READ_ONCE(*x1);
+14   smp_store_release(sl, 0);
+15 }
+16
+17 P1(int *sl, int *x0, int *x1)
+18 {
+19   int r2;
+20   int r1;
+21
+22   r2 = xchg_acquire(sl, 1);
+23   WRITE_ONCE(*x1, 1);
+24   r1 = READ_ONCE(*x0);
+25   smp_store_release(sl, 0);
+26 }
+27
+28 filter (0:r2=0 /\ 1:r2=0)
+29 exists (0:r1=0 /\ 1:r1=0)
+
+This litmus test may be found here:
+
+https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus
+
+This test uses two global variables, "x1" and "x2", and also emulates a
+single global spinlock named "sl".  This spinlock is held by whichever
+process changes the value of "sl" from "0" to "1", and is released when
+that process sets "sl" back to "0".  P0()'s lock acquisition is emulated
+on line 11 using xchg_acquire(), which unconditionally stores the value
+"1" to "sl" and stores either "0" or "1" to "r2", depending on whether
+the lock acquisition was successful or unsuccessful (due to "sl" already
+having the value "1"), respectively.  P1() operates in a similar manner.
+
+Rather unconventionally, execution appears to proceed to the critical
+section on lines 12 and 13 in either case.  Line 14 then uses an
+smp_store_release() to store zero to "sl", thus emulating lock release.
+
+The case where xchg_acquire() fails to acquire the lock is handled by
+the "filter" clause on line 28, which tells herd7 to keep only those
+executions in which both "0:r2" and "1:r2" are zero, that is to pay
+attention only to those executions in which both locks are actually
+acquired.  Thus, the bogus executions that would execute the critical
+sections are discarded and any effects that they might have had are
+ignored.  Note well that the "filter" clause keeps those executions
+for which its expression is satisfied, that is, for which the expression
+evaluates to true.  In other words, the "filter" clause says what to
+keep, not what to discard.
+
+The result of running this test is as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 2
+ 3 0:r1=0; 1:r1=1;
+ 4 0:r1=1; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r1=0 /\ 1:r1=0)
+ 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2
+10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03
+
+The "Never" on line 9 indicates that this use of xchg_acquire() and
+smp_store_release() really does correctly emulate locking.
+
+Why doesn't the litmus test take the simpler approach of using a spin loop
+to handle failed spinlock acquisitions, like the kernel does?  The key
+insight behind this litmus test is that spin loops have no effect on the
+possible "exists"-clause outcomes of program execution in the absence
+of deadlock.  In other words, given a high-quality lock-acquisition
+primitive in a deadlock-free program running on high-quality hardware,
+each lock acquisition will eventually succeed.  Because herd7 already
+explores the full state space, the length of time required to actually
+acquire the lock does not matter.  After all, herd7 already models all
+possible durations of the xchg_acquire() statements.
+
+Why not just add the "filter" clause to the "exists" clause, thus
+avoiding the "filter" clause entirely?  This does work, but is slower.
+The reason that the "filter" clause is faster is that (in the common case)
+herd7 knows to abandon an execution as soon as the "filter" expression
+fails to be satisfied.  In contrast, the "exists" clause is evaluated
+only at the end of time, thus requiring herd7 to waste time on bogus
+executions in which both critical sections proceed concurrently.  In
+addition, some LKMM users like the separation of concerns provided by
+using the both the "filter" and "exists" clauses.
+
+Readers lacking a pathological interest in odd corner cases should feel
+free to skip the remainder of this section.
+
+But what if the litmus test were to temporarily set "0:r2" to a non-zero
+value?  Wouldn't that cause herd7 to abandon the execution prematurely
+due to an early mismatch of the "filter" clause?
+
+Why not just try it?  Line 4 of the following modified litmus test
+introduces a new global variable "x2" that is initialized to "1".  Line 23
+of P1() reads that variable into "1:r2" to force an early mismatch with
+the "filter" clause.  Line 24 does a known-true "if" condition to avoid
+and static analysis that herd7 might do.  Finally the "exists" clause
+on line 32 is updated to a condition that is alway satisfied at the end
+of the test.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4   x2=1;
+ 5 }
+ 6
+ 7 P0(int *sl, int *x0, int *x1)
+ 8 {
+ 9   int r2;
+10   int r1;
+11
+12   r2 = xchg_acquire(sl, 1);
+13   WRITE_ONCE(*x0, 1);
+14   r1 = READ_ONCE(*x1);
+15   smp_store_release(sl, 0);
+16 }
+17
+18 P1(int *sl, int *x0, int *x1, int *x2)
+19 {
+20   int r2;
+21   int r1;
+22
+23   r2 = READ_ONCE(*x2);
+24   if (r2)
+25     r2 = xchg_acquire(sl, 1);
+26   WRITE_ONCE(*x1, 1);
+27   r1 = READ_ONCE(*x0);
+28   smp_store_release(sl, 0);
+29 }
+30
+31 filter (0:r2=0 /\ 1:r2=0)
+32 exists (x1=1)
+
+If the "filter" clause were to check each variable at each point in the
+execution, running this litmus test would display no executions because
+all executions would be filtered out at line 23.  However, the output
+is instead as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 1
+ 3 x1=1;
+ 4 Ok
+ 5 Witnesses
+ 6 Positive: 2 Negative: 0
+ 7 Condition exists (x1=1)
+ 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0
+ 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04
+10 Hash=080bc508da7f291e122c6de76c0088e3
+
+Line 3 shows that there is one execution that did not get filtered out,
+so the "filter" clause is evaluated only on the last assignment to
+the variables that it checks.  In this case, the "filter" clause is a
+disjunction, so it might be evaluated twice, once at the final (and only)
+assignment to "0:r2" and once at the final assignment to "1:r2".
+
+
+Linked Lists
+------------
+
+LKMM can handle linked lists, but only linked lists in which each node
+contains nothing except a pointer to the next node in the list.  This is
+of course quite restrictive, but there is nevertheless quite a bit that
+can be done within these confines, as can be seen in the litmus test
+at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus:
+
+ 1 C MP+onceassign+derefonce
+ 2
+ 3 {
+ 4 y=z;
+ 5 z=0;
+ 6 }
+ 7
+ 8 P0(int *x, int **y)
+ 9 {
+10   WRITE_ONCE(*x, 1);
+11   rcu_assign_pointer(*y, x);
+12 }
+13
+14 P1(int *x, int **y)
+15 {
+16   int *r0;
+17   int r1;
+18
+19   rcu_read_lock();
+20   r0 = rcu_dereference(*y);
+21   r1 = READ_ONCE(*r0);
+22   rcu_read_unlock();
+23 }
+24
+25 exists (1:r0=x /\ 1:r1=0)
+
+Line 4's "y=z" may seem odd, given that "z" has not yet been initialized.
+But "y=z" does not set the value of "y" to that of "z", but instead
+sets the value of "y" to the *address* of "z".  Lines 4 and 5 therefore
+create a simple linked list, with "y" pointing to "z" and "z" having a
+NULL pointer.  A much longer linked list could be created if desired,
+and circular singly linked lists can also be created and manipulated.
+
+The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s
+"r0" not to the value of "x", but again to its address.  This term of the
+"exists" clause therefore tests whether line 20's load from "y" saw the
+value stored by line 11, which is in fact what is required in this case.
+
+P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x"
+from "y", replacing "z".
+
+P1()'s line 20 loads a pointer from "y", and line 21 dereferences that
+pointer.  The RCU read-side critical section spanning lines 19-22 is just
+for show in this example.  Note that the address used for line 21's load
+depends on (in this case, "is exactly the same as") the value loaded by
+line 20.  This is an example of what is called an "address dependency".
+This particular address dependency extends from the load on line 20 to the
+load on line 21.  Address dependencies provide a weak form of ordering.
+
+Running this test results in the following:
+
+ 1 Test MP+onceassign+derefonce Allowed
+ 2 States 2
+ 3 1:r0=x; 1:r1=1;
+ 4 1:r0=z; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (1:r0=x /\ 1:r1=0)
+ 9 Observation MP+onceassign+derefonce Never 0 2
+10 Time MP+onceassign+derefonce 0.00
+11 Hash=49ef7a741563570102448a256a0c8568
+
+The only possible outcomes feature P1() loading a pointer to "z"
+(which contains zero) on the one hand and P1() loading a pointer to "x"
+(which contains the value one) on the other.  This should be reassuring
+because it says that RCU readers cannot see the old preinitialization
+values when accessing a newly inserted list node.  This undesirable
+scenario is flagged by the "exists" clause, and would occur if P1()
+loaded a pointer to "x", but obtained the pre-initialization value of
+zero after dereferencing that pointer.
+
+
+Comments
+--------
+
+Different portions of a litmus test are processed by different parsers,
+which has the charming effect of requiring different comment syntax in
+different portions of the litmus test.  The C-syntax portions use
+C-language comments (either "/* */" or "//"), while the other portions
+use Ocaml comments "(* *)".
+
+The following litmus test illustrates the comment style corresponding
+to each syntactic unit of the test:
+
+ 1 C MP+onceassign+derefonce (* A *)
+ 2
+ 3 (* B *)
+ 4
+ 5 {
+ 6 y=z; (* C *)
+ 7 z=0;
+ 8 } // D
+ 9
+10 // E
+11
+12 P0(int *x, int **y) // F
+13 {
+14   WRITE_ONCE(*x, 1);  // G
+15   rcu_assign_pointer(*y, x);
+16 }
+17
+18 // H
+19
+20 P1(int *x, int **y)
+21 {
+22   int *r0;
+23   int r1;
+24
+25   rcu_read_lock();
+26   r0 = rcu_dereference(*y);
+27   r1 = READ_ONCE(*r0);
+28   rcu_read_unlock();
+29 }
+30
+31 // I
+32
+33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *)
+
+In short, use C-language comments in the C code and Ocaml comments in
+the rest of the litmus test.
+
+On the other hand, if you prefer C-style comments everywhere, the
+C preprocessor is your friend.
+
+
+Asynchronous RCU Grace Periods
+------------------------------
+
+The following litmus test is derived from the example show in
+Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to
+emulate call_rcu():
+
+ 1 C RCU+sync+free
+ 2
+ 3 {
+ 4 int x = 1;
+ 5 int *y = &x;
+ 6 int z = 1;
+ 7 }
+ 8
+ 9 P0(int *x, int *z, int **y)
+10 {
+11   int *r0;
+12   int r1;
+13
+14   rcu_read_lock();
+15   r0 = rcu_dereference(*y);
+16   r1 = READ_ONCE(*r0);
+17   rcu_read_unlock();
+18 }
+19
+20 P1(int *z, int **y, int *c)
+21 {
+22   rcu_assign_pointer(*y, z);
+23   smp_store_release(*c, 1); // Emulate call_rcu().
+24 }
+25
+26 P2(int *x, int *z, int **y, int *c)
+27 {
+28   int r0;
+29
+30   r0 = smp_load_acquire(*c); // Note call_rcu() request.
+31   synchronize_rcu(); // Wait one grace period.
+32   WRITE_ONCE(*x, 0); // Emulate the RCU callback.
+33 }
+34
+35 filter (2:r0=1) (* Reject too-early starts. *)
+36 exists (0:r0=x /\ 0:r1=0)
+
+Lines 4-6 initialize a linked list headed by "y" that initially contains
+"x".  In addition, "z" is pre-initialized to prepare for P1(), which
+will replace "x" with "z" in this list.
+
+P0() on lines 9-18 enters an RCU read-side critical section, loads the
+list header "y" and dereferences it, leaving the node in "0:r0" and
+the node's value in "0:r1".
+
+P1() on lines 20-24 updates the list header to instead reference "z",
+then emulates call_rcu() by doing a release store into "c".
+
+P2() on lines 27-33 emulates the behind-the-scenes effect of doing a
+call_rcu().  Line 30 first does an acquire load from "c", then line 31
+waits for an RCU grace period to elapse, and finally line 32 emulates
+the RCU callback, which in turn emulates a call to kfree().
+
+Of course, it is possible for P2() to start too soon, so that the
+value of "2:r0" is zero rather than the required value of "1".
+The "filter" clause on line 35 handles this possibility, rejecting
+all executions in which "2:r0" is not equal to the value "1".
+
+
+Performance
+-----------
+
+LKMM's exploration of the full state-space can be extremely helpful,
+but it does not come for free.  The price is exponential computational
+complexity in terms of the number of processes, the average number
+of statements in each process, and the total number of stores in the
+litmus test.
+
+So it is best to start small and then work up.  Where possible, break
+your code down into small pieces each representing a core concurrency
+requirement.
+
+That said, herd7 is quite fast.  On an unprepossessing x86 laptop, it
+was able to analyze the following 10-process RCU litmus test in about
+six seconds.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus
+
+One way to make herd7 run faster is to use the "-speedcheck true" option.
+This option prevents herd7 from generating all possible end states,
+instead causing it to focus solely on whether or not the "exists"
+clause can be satisfied.  With this option, herd7 evaluates the above
+litmus test in about 300 milliseconds, for more than an order of magnitude
+improvement in performance.
+
+Larger 16-process litmus tests that would normally consume 15 minutes
+of time complete in about 40 seconds with this option.  To be fair,
+you do get an extra 65,535 states when you leave off the "-speedcheck
+true" option.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus
+
+Nevertheless, litmus-test analysis really is of exponential complexity,
+whether with or without "-speedcheck true".  Increasing by just three
+processes to a 19-process litmus test requires 2 hours and 40 minutes
+without, and about 8 minutes with "-speedcheck true".  Each of these
+results represent roughly an order of magnitude slowdown compared to the
+16-process litmus test.  Again, to be fair, the multi-hour run explores
+no fewer than 524,287 additional states compared to the shorter one.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus
+
+If you don't like command-line arguments, you can obtain a similar speedup
+by adding a "filter" clause with exactly the same expression as your
+"exists" clause.
+
+However, please note that seeing the full set of states can be extremely
+helpful when developing and debugging litmus tests.
+
+
+LIMITATIONS
+===========
+
+Limitations of the Linux-kernel memory model (LKMM) include:
+
+1.	Compiler optimizations are not accurately modeled.  Of course,
+	the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
+	ability to optimize, but under some circumstances it is possible
+	for the compiler to undermine the memory model.  For more
+	information, see Documentation/explanation.txt (in particular,
+	the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
+	sections).
+
+	Note that this limitation in turn limits LKMM's ability to
+	accurately model address, control, and data dependencies.
+	For example, if the compiler can deduce the value of some variable
+	carrying a dependency, then the compiler can break that dependency
+	by substituting a constant of that value.
+
+2.	Multiple access sizes for a single variable are not supported,
+	and neither are misaligned or partially overlapping accesses.
+
+3.	Exceptions and interrupts are not modeled.  In some cases,
+	this limitation can be overcome by modeling the interrupt or
+	exception with an additional process.
+
+4.	I/O such as MMIO or DMA is not supported.
+
+5.	Self-modifying code (such as that found in the kernel's
+	alternatives mechanism, function tracer, Berkeley Packet Filter
+	JIT compiler, and module loader) is not supported.
+
+6.	Complete modeling of all variants of atomic read-modify-write
+	operations, locking primitives, and RCU is not provided.
+	For example, call_rcu() and rcu_barrier() are not supported.
+	However, a substantial amount of support is provided for these
+	operations, as shown in the linux-kernel.def file.
+
+	Here are specific limitations:
+
+	a.	When rcu_assign_pointer() is passed NULL, the Linux
+		kernel provides no ordering, but LKMM models this
+		case as a store release.
+
+	b.	The "unless" RMW operations are not currently modeled:
+		atomic_long_add_unless(), atomic_inc_unless_negative(),
+		and atomic_dec_unless_positive().  These can be emulated
+		in litmus tests, for example, by using atomic_cmpxchg().
+
+		One exception of this limitation is atomic_add_unless(),
+		which is provided directly by herd7 (so no corresponding
+		definition in linux-kernel.def).  atomic_add_unless() is
+		modeled by herd7 therefore it can be used in litmus tests.
+
+	c.	The call_rcu() function is not modeled.  As was shown above,
+		it can be emulated in litmus tests by adding another
+		process that invokes synchronize_rcu() and the body of the
+		callback function, with (for example) a release-acquire
+		from the site of the emulated call_rcu() to the beginning
+		of the additional process.
+
+	d.	The rcu_barrier() function is not modeled.  It can be
+		emulated in litmus tests emulating call_rcu() via
+		(for example) a release-acquire from the end of each
+		additional call_rcu() process to the site of the
+		emulated rcu-barrier().
+
+	e.	Although sleepable RCU (SRCU) is now modeled, there
+		are some subtle differences between its semantics and
+		those in the Linux kernel.  For example, the kernel
+		might interpret the following sequence as two partially
+		overlapping SRCU read-side critical sections:
+
+			 1  r1 = srcu_read_lock(&my_srcu);
+			 2  do_something_1();
+			 3  r2 = srcu_read_lock(&my_srcu);
+			 4  do_something_2();
+			 5  srcu_read_unlock(&my_srcu, r1);
+			 6  do_something_3();
+			 7  srcu_read_unlock(&my_srcu, r2);
+
+		In contrast, LKMM will interpret this as a nested pair of
+		SRCU read-side critical sections, with the outer critical
+		section spanning lines 1-7 and the inner critical section
+		spanning lines 3-5.
+
+		This difference would be more of a concern had anyone
+		identified a reasonable use case for partially overlapping
+		SRCU read-side critical sections.  For more information
+		on the trickiness of such overlapping, please see:
+		https://paulmck.livejournal.com/40593.html
+
+	f.	Reader-writer locking is not modeled.  It can be
+		emulated in litmus tests using atomic read-modify-write
+		operations.
+
+The fragment of the C language supported by these litmus tests is quite
+limited and in some ways non-standard:
+
+1.	There is no automatic C-preprocessor pass.  You can of course
+	run it manually, if you choose.
+
+2.	There is no way to create functions other than the Pn() functions
+	that model the concurrent processes.
+
+3.	The Pn() functions' formal parameters must be pointers to the
+	global shared variables.  Nothing can be passed by value into
+	these functions.
+
+4.	The only functions that can be invoked are those built directly
+	into herd7 or that are defined in the linux-kernel.def file.
+
+5.	The "switch", "do", "for", "while", and "goto" C statements are
+	not supported.	The "switch" statement can be emulated by the
+	"if" statement.  The "do", "for", and "while" statements can
+	often be emulated by manually unrolling the loop, or perhaps by
+	enlisting the aid of the C preprocessor to minimize the resulting
+	code duplication.  Some uses of "goto" can be emulated by "if",
+	and some others by unrolling.
+
+6.	Although you can use a wide variety of types in litmus-test
+	variable declarations, and especially in global-variable
+	declarations, the "herd7" tool understands only int and
+	pointer types.	There is no support for floating-point types,
+	enumerations, characters, strings, arrays, or structures.
+
+7.	Parsing of variable declarations is very loose, with almost no
+	type checking.
+
+8.	Initializers differ from their C-language counterparts.
+	For example, when an initializer contains the name of a shared
+	variable, that name denotes a pointer to that variable, not
+	the current value of that variable.  For example, "int x = y"
+	is interpreted the way "int x = &y" would be in C.
+
+9.	Dynamic memory allocation is not supported, although this can
+	be worked around in some cases by supplying multiple statically
+	allocated variables.
+
+Some of these limitations may be overcome in the future, but others are
+more likely to be addressed by incorporating the Linux-kernel memory model
+into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index 63c4adfed884..03f58b11c252 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -1,7 +1,7 @@
 This document provides "recipes", that is, litmus tests for commonly
 occurring situations, as well as a few that illustrate subtly broken but
 attractive nuisances.  Many of these recipes include example code from
-v4.13 of the Linux kernel.
+v5.7 of the Linux kernel.
 
 The first section covers simple special cases, the second section
 takes off the training wheels to cover more involved examples,
@@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access
 first place (control dependency).  Note that the term "data dependency"
 is sometimes casually used to cover both address and data dependencies.
 
-In lib/prime_numbers.c, the expand_to_next_prime() function invokes
+In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes
 rcu_assign_pointer(), and the next_prime_number() function invokes
 rcu_dereference().  This combination mediates access to a bit vector
 that is expanded as additional primes are needed.
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ecbbaa5396d4..c5fdfd19df24 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -120,7 +120,7 @@ o	Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
 
 o	Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
 	semantics of the weak consistency model specification language
-	cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
+	cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531
 
 
 Memory-model comparisons
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
new file mode 100644
index 000000000000..81e1a0ec5342
--- /dev/null
+++ b/tools/memory-model/Documentation/simple.txt
@@ -0,0 +1,271 @@
+This document provides options for those wishing to keep their
+memory-ordering lives simple, as is necessary for those whose domain
+is complex.  After all, there are bugs other than memory-ordering bugs,
+and the time spent gaining memory-ordering knowledge is not available
+for gaining domain knowledge.  Furthermore Linux-kernel memory model
+(LKMM) is quite complex, with subtle differences in code often having
+dramatic effects on correctness.
+
+The options near the beginning of this list are quite simple.  The idea
+is not that kernel hackers don't already know about them, but rather
+that they might need the occasional reminder.
+
+Please note that this is a generic guide, and that specific subsystems
+will often have special requirements or idioms.  For example, developers
+of MMIO-based device drivers will often need to use mb(), rmb(), and
+wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb()
+to be more natural than smp_load_acquire() and smp_store_release().
+On the other hand, those coming in from other environments will likely
+be more familiar with these last two.
+
+
+Single-threaded code
+====================
+
+In single-threaded code, there is no reordering, at least assuming
+that your toolchain and hardware are working correctly.  In addition,
+it is generally a mistake to assume your code will only run in a single
+threaded context as the kernel can enter the same code path on multiple
+CPUs at the same time.  One important exception is a function that makes
+no external data references.
+
+In the general case, you will need to take explicit steps to ensure that
+your code really is executed within a single thread that does not access
+shared variables.  A simple way to achieve this is to define a global lock
+that you acquire at the beginning of your code and release at the end,
+taking care to ensure that all references to your code's shared data are
+also carried out under that same lock.  Because only one thread can hold
+this lock at a given time, your code will be executed single-threaded.
+This approach is called "code locking".
+
+Code locking can severely limit both performance and scalability, so it
+should be used with caution, and only on code paths that execute rarely.
+After all, a huge amount of effort was required to remove the Linux
+kernel's old "Big Kernel Lock", so let's please be very careful about
+adding new "little kernel locks".
+
+One of the advantages of locking is that, in happy contrast with the
+year 1981, almost all kernel developers are very familiar with locking.
+The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with
+the formerly feared deadlock scenarios.
+
+Please use the standard locking primitives provided by the kernel rather
+than rolling your own.  For one thing, the standard primitives interact
+properly with lockdep.  For another thing, these primitives have been
+tuned to deal better with high contention.  And for one final thing, it is
+surprisingly hard to correctly code production-quality lock acquisition
+and release functions.  After all, even simple non-production-quality
+locking functions must carefully prevent both the CPU and the compiler
+from moving code in either direction across the locking function.
+
+Despite the scalability limitations of single-threaded code, RCU
+takes this approach for much of its grace-period processing and also
+for early-boot operation.  The reason RCU is able to scale despite
+single-threaded grace-period processing is use of batching, where all
+updates that accumulated during one grace period are handled by the
+next one.  In other words, slowing down grace-period processing makes
+it more efficient.  Nor is RCU unique:  Similar batching optimizations
+are used in many I/O operations.
+
+
+Packaged code
+=============
+
+Even if performance and scalability concerns prevent your code from
+being completely single-threaded, it is often possible to use library
+functions that handle the concurrency nearly or entirely on their own.
+This approach delegates any LKMM worries to the library maintainer.
+
+In the kernel, what is the "library"?  Quite a bit.  It includes the
+contents of the lib/ directory, much of the include/linux/ directory along
+with a lot of other heavily used APIs.  But heavily used examples include
+the list macros (for example, include/linux/{,rcu}list.h), workqueues,
+smp_call_function(), and the various hash tables and search trees.
+
+
+Data locking
+============
+
+With code locking, we use single-threaded code execution to guarantee
+serialized access to the data that the code is accessing.  However,
+we can also achieve this by instead associating the lock with specific
+instances of the data structures.  This creates a "critical section"
+in the code execution that will execute as though it is single threaded.
+By placing all the accesses and modifications to a shared data structure
+inside a critical section, we ensure that the execution context that
+holds the lock has exclusive access to the shared data.
+
+The poster boy for this approach is the hash table, where placing a lock
+in each hash bucket allows operations on different buckets to proceed
+concurrently.  This works because the buckets do not overlap with each
+other, so that an operation on one bucket does not interfere with any
+other bucket.
+
+As the number of buckets increases, data locking scales naturally.
+In particular, if the amount of data increases with the number of CPUs,
+increasing the number of buckets as the number of CPUs increase results
+in a naturally scalable data structure.
+
+
+Per-CPU processing
+==================
+
+Partitioning processing and data over CPUs allows each CPU to take
+a single-threaded approach while providing excellent performance and
+scalability.  Of course, there is no free lunch:  The dark side of this
+excellence is substantially increased memory footprint.
+
+In addition, it is sometimes necessary to occasionally update some global
+view of this processing and data, in which case something like locking
+must be used to protect this global view.  This is the approach taken
+by the percpu_counter infrastructure. In many cases, there are already
+generic/library variants of commonly used per-cpu constructs available.
+Please use them rather than rolling your own.
+
+RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU
+data sets.  For example, each CPU does private quiescent-state processing
+within its instance of the per-CPU rcu_data structure, and then uses data
+locking to report quiescent states up the grace-period combining tree.
+
+
+Packaged primitives: Sequence locking
+=====================================
+
+Lockless programming is considered by many to be more difficult than
+lock-based programming, but there are a few lockless design patterns that
+have been built out into an API.  One of these APIs is sequence locking.
+Although this APIs can be used in extremely complex ways, there are simple
+and effective ways of using it that avoid the need to pay attention to
+memory ordering.
+
+The basic keep-things-simple rule for sequence locking is "do not write
+in read-side code".  Yes, you can do writes from within sequence-locking
+readers, but it won't be so simple.  For example, such writes will be
+lockless and should be idempotent.
+
+For more sophisticated use cases, LKMM can guide you, including use
+cases involving combining sequence locking with other synchronization
+primitives.  (LKMM does not yet know about sequence locking, so it is
+currently necessary to open-code it in your litmus tests.)
+
+Additional information may be found in include/linux/seqlock.h.
+
+Packaged primitives: RCU
+========================
+
+Another lockless design pattern that has been baked into an API
+is RCU.  The Linux kernel makes sophisticated use of RCU, but the
+keep-things-simple rules for RCU are "do not write in read-side code"
+and "do not update anything that is visible to and accessed by readers",
+and "protect updates with locking".
+
+These rules are illustrated by the functions foo_update_a() and
+foo_get_a() shown in Documentation/RCU/whatisRCU.rst.  Additional
+RCU usage patterns maybe found in Documentation/RCU and in the
+source code.
+
+
+Packaged primitives: Atomic operations
+======================================
+
+Back in the day, the Linux kernel had three types of atomic operations:
+
+1.	Initialization and read-out, such as atomic_set() and atomic_read().
+
+2.	Operations that did not return a value and provided no ordering,
+	such as atomic_inc() and atomic_dec().
+
+3.	Operations that returned a value and provided full ordering, such as
+	atomic_add_return() and atomic_dec_and_test().  Note that some
+	value-returning operations provide full ordering only conditionally.
+	For example, cmpxchg() provides ordering only upon success.
+
+More recent kernels have operations that return a value but do not
+provide full ordering.  These are flagged with either a _relaxed()
+suffix (providing no ordering), or an _acquire() or _release() suffix
+(providing limited ordering).
+
+Additional information may be found in these files:
+
+Documentation/atomic_t.txt
+Documentation/atomic_bitops.txt
+Documentation/core-api/atomic_ops.rst
+Documentation/core-api/refcount-vs-atomic.rst
+
+Reading code using these primitives is often also quite helpful.
+
+
+Lockless, fully ordered
+=======================
+
+When using locking, there often comes a time when it is necessary
+to access some variable or another without holding the data lock
+that serializes access to that variable.
+
+If you want to keep things simple, use the initialization and read-out
+operations from the previous section only when there are no racing
+accesses.  Otherwise, use only fully ordered operations when accessing
+or modifying the variable.  This approach guarantees that code prior
+to a given access to that variable will be seen by all CPUs has having
+happened before any code following any later access to that same variable.
+
+Please note that per-CPU functions are not atomic operations and
+hence they do not provide any ordering guarantees at all.
+
+If the lockless accesses are frequently executed reads that are used
+only for heuristics, or if they are frequently executed writes that
+are used only for statistics, please see the next section.
+
+
+Lockless statistics and heuristics
+==================================
+
+Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and
+WRITE_ONCE() can safely be used in some cases.  These primitives provide
+no ordering, but they do prevent the compiler from carrying out a number
+of destructive optimizations (for which please see the next section).
+One example use for these primitives is statistics, such as per-CPU
+counters exemplified by the rt_cache_stat structure's routing-cache
+statistics counters.  Another example use case is heuristics, such as
+the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters
+controlling how often RCU scans for idle CPUs.
+
+But be careful.  "Unordered" really does mean "unordered".  It is all
+too easy to assume ordering, and this assumption must be avoided when
+using these primitives.
+
+
+Don't let the compiler trip you up
+==================================
+
+It can be quite tempting to use plain C-language accesses for lockless
+loads from and stores to shared variables.  Although this is both
+possible and quite common in the Linux kernel, it does require a
+surprising amount of analysis, care, and knowledge about the compiler.
+Yes, some decades ago it was not unfair to consider a C compiler to be
+an assembler with added syntax and better portability, but the advent of
+sophisticated optimizing compilers mean that those days are long gone.
+Today's optimizing compilers can profoundly rewrite your code during the
+translation process, and have long been ready, willing, and able to do so.
+
+Therefore, if you really need to use C-language assignments instead of
+READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good
+understanding of both the C standard and your compiler.  Here are some
+introductory references and some tooling to start you on this noble quest:
+
+Who's afraid of a big bad optimizing compiler?
+	https://lwn.net/Articles/793253/
+Calibrating your fear of big bad optimizing compilers
+	https://lwn.net/Articles/799218/
+Concurrency bugs should fear the big bad data-race detector (part 1)
+	https://lwn.net/Articles/816850/
+Concurrency bugs should fear the big bad data-race detector (part 2)
+	https://lwn.net/Articles/816854/
+
+
+More complex use cases
+======================
+
+If the alternatives above do not do what you need, please look at the
+recipes-pairs.txt file to peel off the next layer of the memory-ordering
+onion.
diff --git a/tools/memory-model/README b/tools/memory-model/README
index ecb7385376bf..c8144d4aafa0 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -63,10 +63,32 @@ BASIC USAGE: HERD7
 ==================
 
 The memory model is used, in conjunction with "herd7", to exhaustively
-explore the state space of small litmus tests.
+explore the state space of small litmus tests.  Documentation describing
+the format, features, capabilities and limitations of these litmus
+tests is available in tools/memory-model/Documentation/litmus-tests.txt.
 
-For example, to run SB+fencembonceonces.litmus against the memory model:
+Example litmus tests may be found in the Linux-kernel source tree:
 
+	tools/memory-model/litmus-tests/
+	Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available here:
+
+	https://github.com/paulmckrcu/litmus
+	https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+	https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+Documentation describing litmus tests and now to use them may be found
+here:
+
+	tools/memory-model/Documentation/litmus-tests.txt
+
+The remainder of this section uses the SB+fencembonceonces.litmus test
+located in the tools/memory-model directory.
+
+To run SB+fencembonceonces.litmus against the memory model:
+
+  $ cd $LINUX_SOURCE_TREE/tools/memory-model
   $ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
 
 Here is the corresponding output:
@@ -87,7 +109,11 @@ Here is the corresponding output:
 The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
 this litmus test's "exists" clause can not be satisfied.
 
-See "herd7 -help" or "herdtools7/doc/" for more information.
+See "herd7 -help" or "herdtools7/doc/" for more information on running the
+tool itself, but please be aware that this documentation is intended for
+people who work on the memory model itself, that is, people making changes
+to the tools/memory-model/linux-kernel.* files.  It is not intended for
+people focusing on writing, understanding, and running LKMM litmus tests.
 
 
 =====================
@@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus
 test's "exists" clause was not reached.
 
 And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
-for more information.
+for more information.  And again, please be aware that this documentation
+is intended for people who work on the memory model itself, that is,
+people making changes to the tools/memory-model/linux-kernel.* files.
+It is not intended for people focusing on writing, understanding, and
+running LKMM litmus tests.
 
 
 ====================
@@ -137,12 +167,21 @@ Documentation/cheatsheet.txt
 Documentation/explanation.txt
 	Describes the memory model in detail.
 
+Documentation/litmus-tests.txt
+	Describes the format, features, capabilities, and limitations
+	of the litmus tests that LKMM can evaluate.
+
 Documentation/recipes.txt
 	Lists common memory-ordering patterns.
 
 Documentation/references.txt
 	Provides background reading.
 
+Documentation/simple.txt
+	Starting point for someone new to Linux-kernel concurrency.
+	And also for those needing a reminder of the simpler approaches
+	to concurrency!
+
 linux-kernel.bell
 	Categorizes the relevant instructions, including memory
 	references, memory barriers, atomic read-modify-write operations,
@@ -187,116 +226,3 @@ README
 	This file.
 
 scripts	Various scripts, see scripts/README.
-
-
-===========
-LIMITATIONS
-===========
-
-The Linux-kernel memory model (LKMM) has the following limitations:
-
-1.	Compiler optimizations are not accurately modeled.  Of course,
-	the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
-	ability to optimize, but under some circumstances it is possible
-	for the compiler to undermine the memory model.  For more
-	information, see Documentation/explanation.txt (in particular,
-	the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
-	sections).
-
-	Note that this limitation in turn limits LKMM's ability to
-	accurately model address, control, and data dependencies.
-	For example, if the compiler can deduce the value of some variable
-	carrying a dependency, then the compiler can break that dependency
-	by substituting a constant of that value.
-
-2.	Multiple access sizes for a single variable are not supported,
-	and neither are misaligned or partially overlapping accesses.
-
-3.	Exceptions and interrupts are not modeled.  In some cases,
-	this limitation can be overcome by modeling the interrupt or
-	exception with an additional process.
-
-4.	I/O such as MMIO or DMA is not supported.
-
-5.	Self-modifying code (such as that found in the kernel's
-	alternatives mechanism, function tracer, Berkeley Packet Filter
-	JIT compiler, and module loader) is not supported.
-
-6.	Complete modeling of all variants of atomic read-modify-write
-	operations, locking primitives, and RCU is not provided.
-	For example, call_rcu() and rcu_barrier() are not supported.
-	However, a substantial amount of support is provided for these
-	operations, as shown in the linux-kernel.def file.
-
-	a.	When rcu_assign_pointer() is passed NULL, the Linux
-		kernel provides no ordering, but LKMM models this
-		case as a store release.
-
-	b.	The "unless" RMW operations are not currently modeled:
-		atomic_long_add_unless(), atomic_inc_unless_negative(),
-		and atomic_dec_unless_positive().  These can be emulated
-		in litmus tests, for example, by using atomic_cmpxchg().
-
-		One exception of this limitation is atomic_add_unless(),
-		which is provided directly by herd7 (so no corresponding
-		definition in linux-kernel.def).  atomic_add_unless() is
-		modeled by herd7 therefore it can be used in litmus tests.
-
-	c.	The call_rcu() function is not modeled.  It can be
-		emulated in litmus tests by adding another process that
-		invokes synchronize_rcu() and the body of the callback
-		function, with (for example) a release-acquire from
-		the site of the emulated call_rcu() to the beginning
-		of the additional process.
-
-	d.	The rcu_barrier() function is not modeled.  It can be
-		emulated in litmus tests emulating call_rcu() via
-		(for example) a release-acquire from the end of each
-		additional call_rcu() process to the site of the
-		emulated rcu-barrier().
-
-	e.	Although sleepable RCU (SRCU) is now modeled, there
-		are some subtle differences between its semantics and
-		those in the Linux kernel.  For example, the kernel
-		might interpret the following sequence as two partially
-		overlapping SRCU read-side critical sections:
-
-			 1  r1 = srcu_read_lock(&my_srcu);
-			 2  do_something_1();
-			 3  r2 = srcu_read_lock(&my_srcu);
-			 4  do_something_2();
-			 5  srcu_read_unlock(&my_srcu, r1);
-			 6  do_something_3();
-			 7  srcu_read_unlock(&my_srcu, r2);
-
-		In contrast, LKMM will interpret this as a nested pair of
-		SRCU read-side critical sections, with the outer critical
-		section spanning lines 1-7 and the inner critical section
-		spanning lines 3-5.
-
-		This difference would be more of a concern had anyone
-		identified a reasonable use case for partially overlapping
-		SRCU read-side critical sections.  For more information,
-		please see: https://paulmck.livejournal.com/40593.html
-
-	f.	Reader-writer locking is not modeled.  It can be
-		emulated in litmus tests using atomic read-modify-write
-		operations.
-
-The "herd7" tool has some additional limitations of its own, apart from
-the memory model:
-
-1.	Non-trivial data structures such as arrays or structures are
-	not supported.	However, pointers are supported, allowing trivial
-	linked lists to be constructed.
-
-2.	Dynamic memory allocation is not supported, although this can
-	be worked around in some cases by supplying multiple statically
-	allocated variables.
-
-Some of these limitations may be overcome in the future, but others are
-more likely to be addressed by incorporating the Linux-kernel memory model
-into other tools.
-
-Finally, please note that LKMM is subject to change as hardware, use cases,
-and compilers evolve.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 7770edcda3a0..4ea9a833dde7 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -37,7 +37,7 @@ INCLUDES := -I$(srctree)/tools/include \
 	    -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
 	    -I$(srctree)/tools/arch/$(SRCARCH)/include	\
 	    -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
 CFLAGS   := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
 LDFLAGS  += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
 
@@ -55,6 +55,10 @@ ifeq ($(SRCARCH),x86)
 	SUBCMD_ORC := y
 endif
 
+ifeq ($(SUBCMD_ORC),y)
+	CFLAGS += -DINSN_USE_ORC
+endif
+
 export SUBCMD_CHECK SUBCMD_ORC
 export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 2e2ce089b0e9..4a84c3081b8e 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -11,7 +11,9 @@
 #include "objtool.h"
 #include "cfi.h"
 
+#ifdef INSN_USE_ORC
 #include <asm/orc_types.h>
+#endif
 
 enum insn_type {
 	INSN_JUMP_CONDITIONAL,
@@ -86,4 +88,6 @@ unsigned long arch_dest_reloc_offset(int addend);
 
 const char *arch_nop_insn(int len);
 
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
+
 #endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index 7c5004008e97..9f7869b5c5e0 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,3 +1,4 @@
+objtool-y += special.o
 objtool-y += decode.o
 
 inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 1967370440b3..cde9c36e40ae 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -15,6 +15,7 @@
 #include "../../elf.h"
 #include "../../arch.h"
 #include "../../warn.h"
+#include <asm/orc_types.h>
 
 static unsigned char op_to_cfi_reg[][2] = {
 	{CFI_AX, CFI_R8},
@@ -583,3 +584,39 @@ const char *arch_nop_insn(int len)
 
 	return nops[len-1];
 }
+
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
+{
+	struct cfi_reg *cfa = &insn->cfi.cfa;
+
+	switch (sp_reg) {
+	case ORC_REG_UNDEFINED:
+		cfa->base = CFI_UNDEFINED;
+		break;
+	case ORC_REG_SP:
+		cfa->base = CFI_SP;
+		break;
+	case ORC_REG_BP:
+		cfa->base = CFI_BP;
+		break;
+	case ORC_REG_SP_INDIRECT:
+		cfa->base = CFI_SP_INDIRECT;
+		break;
+	case ORC_REG_R10:
+		cfa->base = CFI_R10;
+		break;
+	case ORC_REG_R13:
+		cfa->base = CFI_R13;
+		break;
+	case ORC_REG_DI:
+		cfa->base = CFI_DI;
+		break;
+	case ORC_REG_DX:
+		cfa->base = CFI_DX;
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h
new file mode 100644
index 000000000000..d818b2bffa02
--- /dev/null
+++ b/tools/objtool/arch/x86/include/arch_special.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _X86_ARCH_SPECIAL_H
+#define _X86_ARCH_SPECIAL_H
+
+#define EX_ENTRY_SIZE		12
+#define EX_ORIG_OFFSET		0
+#define EX_NEW_OFFSET		4
+
+#define JUMP_ENTRY_SIZE		16
+#define JUMP_ORIG_OFFSET	0
+#define JUMP_NEW_OFFSET		4
+
+#define ALT_ENTRY_SIZE		13
+#define ALT_ORIG_OFFSET		0
+#define ALT_NEW_OFFSET		4
+#define ALT_FEATURE_OFFSET	8
+#define ALT_ORIG_LEN_OFFSET	10
+#define ALT_NEW_LEN_OFFSET	11
+
+#endif /* _X86_ARCH_SPECIAL_H */
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
new file mode 100644
index 000000000000..fd4af88c0ea5
--- /dev/null
+++ b/tools/objtool/arch/x86/special.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
+
+#include "../../special.h"
+#include "../../builtin.h"
+
+#define X86_FEATURE_POPCNT (4 * 32 + 23)
+#define X86_FEATURE_SMAP   (9 * 32 + 20)
+
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+	switch (feature) {
+	case X86_FEATURE_SMAP:
+		/*
+		 * If UACCESS validation is enabled; force that alternative;
+		 * otherwise force it the other way.
+		 *
+		 * What we want to avoid is having both the original and the
+		 * alternative code flow at the same time, in that case we can
+		 * find paths that see the STAC but take the NOP instead of
+		 * CLAC and the other way around.
+		 */
+		if (uaccess)
+			alt->skip_orig = true;
+		else
+			alt->skip_alt = true;
+		break;
+	case X86_FEATURE_POPCNT:
+		/*
+		 * It has been requested that we don't validate the !POPCNT
+		 * feature path which is a "very very small percentage of
+		 * machines".
+		 */
+		alt->skip_orig = true;
+		break;
+	default:
+		break;
+	}
+}
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+				 struct instruction *insn,
+				 struct reloc *reloc)
+{
+	/*
+	 * The x86 alternatives code adjusts the offsets only when it
+	 * encounters a branch instruction at the very beginning of the
+	 * replacement group.
+	 */
+	return insn->offset == special_alt->new_off &&
+	       (insn->type == INSN_CALL || is_static_jump(insn));
+}
+
+/*
+ * There are 3 basic jump table patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ *    This is the most common case by far.  It jumps to an address in a simple
+ *    jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ *    This is caused by a rare GCC quirk, currently only seen in three driver
+ *    functions in the kernel, only with certain obscure non-distro configs.
+ *
+ *    As part of an optimization, GCC makes a copy of an existing switch jump
+ *    table, modifies it, and then hard-codes the jump (albeit with an indirect
+ *    jump) to use a single entry in the table.  The rest of the jump table and
+ *    some of its jump targets remain as dead code.
+ *
+ *    In such a case we can just crudely ignore all unreachable instruction
+ *    warnings for the entire object file.  Ideally we would just ignore them
+ *    for the function, but that would require redesigning the code quite a
+ *    bit.  And honestly that's just not worth doing: unreachable instruction
+ *    warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ *    ... some instructions ...
+ *    jmpq *(%reg1,%reg2,8)
+ *
+ *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
+ *    writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ *    As of GCC 7 there are quite a few more of these and the 'in between' code
+ *    is significant. Esp. with KASAN enabled some of the code between the mov
+ *    and jmpq uses .rodata itself, which can confuse things.
+ *
+ *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ *    ensure the same register is used in the mov and jump instructions.
+ *
+ *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ */
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+				    struct instruction *insn)
+{
+	struct reloc  *text_reloc, *rodata_reloc;
+	struct section *table_sec;
+	unsigned long table_offset;
+
+	/* look for a relocation which references .rodata */
+	text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+					      insn->offset, insn->len);
+	if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+	    !text_reloc->sym->sec->rodata)
+		return NULL;
+
+	table_offset = text_reloc->addend;
+	table_sec = text_reloc->sym->sec;
+
+	if (text_reloc->type == R_X86_64_PC32)
+		table_offset += 4;
+
+	/*
+	 * Make sure the .rodata address isn't associated with a
+	 * symbol.  GCC jump tables are anonymous data.
+	 *
+	 * Also support C jump tables which are in the same format as
+	 * switch jump tables.  For objtool to recognize them, they
+	 * need to be placed in the C_JUMP_TABLE_SECTION section.  They
+	 * have symbols associated with them.
+	 */
+	if (find_symbol_containing(table_sec, table_offset) &&
+	    strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
+		return NULL;
+
+	/*
+	 * Each table entry has a rela associated with it.  The rela
+	 * should reference text in the same function as the original
+	 * instruction.
+	 */
+	rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+	if (!rodata_reloc)
+		return NULL;
+
+	/*
+	 * Use of RIP-relative switch jumps is quite rare, and
+	 * indicates a rare GCC quirk/bug which can leave dead
+	 * code behind.
+	 */
+	if (text_reloc->type == R_X86_64_PC32)
+		file->ignore_unreachables = true;
+
+	return rodata_reloc;
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 7a44174967b5..c6d199bfd0ae 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -41,6 +41,8 @@ const struct option check_options[] = {
 int cmd_check(int argc, const char **argv)
 {
 	const char *objname, *s;
+	struct objtool_file *file;
+	int ret;
 
 	argc = parse_options(argc, argv, check_options, check_usage, 0);
 
@@ -53,5 +55,16 @@ int cmd_check(int argc, const char **argv)
 	if (s && !s[9])
 		vmlinux = true;
 
-	return check(objname, false);
+	file = objtool_open_read(objname);
+	if (!file)
+		return 1;
+
+	ret = check(file);
+	if (ret)
+		return ret;
+
+	if (file->elf->changed)
+		return elf_write(file->elf);
+
+	return 0;
 }
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index b1dfe2007962..7b31121fa60b 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -31,13 +31,38 @@ int cmd_orc(int argc, const char **argv)
 		usage_with_options(orc_usage, check_options);
 
 	if (!strncmp(argv[0], "gen", 3)) {
+		struct objtool_file *file;
+		int ret;
+
 		argc = parse_options(argc, argv, check_options, orc_usage, 0);
 		if (argc != 1)
 			usage_with_options(orc_usage, check_options);
 
 		objname = argv[0];
 
-		return check(objname, true);
+		file = objtool_open_read(objname);
+		if (!file)
+			return 1;
+
+		ret = check(file);
+		if (ret)
+			return ret;
+
+		if (list_empty(&file->insn_list))
+			return 0;
+
+		ret = create_orc(file);
+		if (ret)
+			return ret;
+
+		ret = create_orc_sections(file);
+		if (ret)
+			return ret;
+
+		if (!file->elf->changed)
+			return 0;
+
+		return elf_write(file->elf);
 	}
 
 	if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e034a8f24f46..c6ab44543c92 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -14,20 +14,19 @@
 #include "warn.h"
 #include "arch_elf.h"
 
+#include <linux/objtool.h>
 #include <linux/hashtable.h>
 #include <linux/kernel.h>
+#include <linux/static_call_types.h>
 
 #define FAKE_JUMP_OFFSET -1
 
-#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
-
 struct alternative {
 	struct list_head list;
 	struct instruction *insn;
 	bool skip_orig;
 };
 
-const char *objname;
 struct cfi_init_state initial_func_cfi;
 
 struct instruction *find_insn(struct objtool_file *file,
@@ -110,12 +109,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
-static bool is_static_jump(struct instruction *insn)
-{
-	return insn->type == INSN_JUMP_CONDITIONAL ||
-	       insn->type == INSN_JUMP_UNCONDITIONAL;
-}
-
 static bool is_sibling_call(struct instruction *insn)
 {
 	/* An indirect jump is either a sibling call or a jump to a table. */
@@ -433,6 +426,103 @@ reachable:
 	return 0;
 }
 
+static int create_static_call_sections(struct objtool_file *file)
+{
+	struct section *sec, *reloc_sec;
+	struct reloc *reloc;
+	struct static_call_site *site;
+	struct instruction *insn;
+	struct symbol *key_sym;
+	char *key_name, *tmp;
+	int idx;
+
+	sec = find_section_by_name(file->elf, ".static_call_sites");
+	if (sec) {
+		INIT_LIST_HEAD(&file->static_call_list);
+		WARN("file already has .static_call_sites section, skipping");
+		return 0;
+	}
+
+	if (list_empty(&file->static_call_list))
+		return 0;
+
+	idx = 0;
+	list_for_each_entry(insn, &file->static_call_list, static_call_node)
+		idx++;
+
+	sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+				 sizeof(struct static_call_site), idx);
+	if (!sec)
+		return -1;
+
+	reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+	if (!reloc_sec)
+		return -1;
+
+	idx = 0;
+	list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+		site = (struct static_call_site *)sec->data->d_buf + idx;
+		memset(site, 0, sizeof(struct static_call_site));
+
+		/* populate reloc for 'addr' */
+		reloc = malloc(sizeof(*reloc));
+		if (!reloc) {
+			perror("malloc");
+			return -1;
+		}
+		memset(reloc, 0, sizeof(*reloc));
+		reloc->sym = insn->sec->sym;
+		reloc->addend = insn->offset;
+		reloc->type = R_X86_64_PC32;
+		reloc->offset = idx * sizeof(struct static_call_site);
+		reloc->sec = reloc_sec;
+		elf_add_reloc(file->elf, reloc);
+
+		/* find key symbol */
+		key_name = strdup(insn->call_dest->name);
+		if (!key_name) {
+			perror("strdup");
+			return -1;
+		}
+		if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
+			    STATIC_CALL_TRAMP_PREFIX_LEN)) {
+			WARN("static_call: trampoline name malformed: %s", key_name);
+			return -1;
+		}
+		tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
+		memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
+
+		key_sym = find_symbol_by_name(file->elf, tmp);
+		if (!key_sym) {
+			WARN("static_call: can't find static_call_key symbol: %s", tmp);
+			return -1;
+		}
+		free(key_name);
+
+		/* populate reloc for 'key' */
+		reloc = malloc(sizeof(*reloc));
+		if (!reloc) {
+			perror("malloc");
+			return -1;
+		}
+		memset(reloc, 0, sizeof(*reloc));
+		reloc->sym = key_sym;
+		reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
+		reloc->type = R_X86_64_PC32;
+		reloc->offset = idx * sizeof(struct static_call_site) + 4;
+		reloc->sec = reloc_sec;
+		elf_add_reloc(file->elf, reloc);
+
+		idx++;
+	}
+
+	if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+		return -1;
+
+	return 0;
+}
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -493,6 +583,8 @@ static const char *uaccess_safe_builtin[] = {
 	"__asan_store4_noabort",
 	"__asan_store8_noabort",
 	"__asan_store16_noabort",
+	"__kasan_check_read",
+	"__kasan_check_write",
 	/* KASAN in-line */
 	"__asan_report_load_n_noabort",
 	"__asan_report_load1_noabort",
@@ -528,6 +620,61 @@ static const char *uaccess_safe_builtin[] = {
 	"__tsan_write4",
 	"__tsan_write8",
 	"__tsan_write16",
+	"__tsan_read_write1",
+	"__tsan_read_write2",
+	"__tsan_read_write4",
+	"__tsan_read_write8",
+	"__tsan_read_write16",
+	"__tsan_atomic8_load",
+	"__tsan_atomic16_load",
+	"__tsan_atomic32_load",
+	"__tsan_atomic64_load",
+	"__tsan_atomic8_store",
+	"__tsan_atomic16_store",
+	"__tsan_atomic32_store",
+	"__tsan_atomic64_store",
+	"__tsan_atomic8_exchange",
+	"__tsan_atomic16_exchange",
+	"__tsan_atomic32_exchange",
+	"__tsan_atomic64_exchange",
+	"__tsan_atomic8_fetch_add",
+	"__tsan_atomic16_fetch_add",
+	"__tsan_atomic32_fetch_add",
+	"__tsan_atomic64_fetch_add",
+	"__tsan_atomic8_fetch_sub",
+	"__tsan_atomic16_fetch_sub",
+	"__tsan_atomic32_fetch_sub",
+	"__tsan_atomic64_fetch_sub",
+	"__tsan_atomic8_fetch_and",
+	"__tsan_atomic16_fetch_and",
+	"__tsan_atomic32_fetch_and",
+	"__tsan_atomic64_fetch_and",
+	"__tsan_atomic8_fetch_or",
+	"__tsan_atomic16_fetch_or",
+	"__tsan_atomic32_fetch_or",
+	"__tsan_atomic64_fetch_or",
+	"__tsan_atomic8_fetch_xor",
+	"__tsan_atomic16_fetch_xor",
+	"__tsan_atomic32_fetch_xor",
+	"__tsan_atomic64_fetch_xor",
+	"__tsan_atomic8_fetch_nand",
+	"__tsan_atomic16_fetch_nand",
+	"__tsan_atomic32_fetch_nand",
+	"__tsan_atomic64_fetch_nand",
+	"__tsan_atomic8_compare_exchange_strong",
+	"__tsan_atomic16_compare_exchange_strong",
+	"__tsan_atomic32_compare_exchange_strong",
+	"__tsan_atomic64_compare_exchange_strong",
+	"__tsan_atomic8_compare_exchange_weak",
+	"__tsan_atomic16_compare_exchange_weak",
+	"__tsan_atomic32_compare_exchange_weak",
+	"__tsan_atomic64_compare_exchange_weak",
+	"__tsan_atomic8_compare_exchange_val",
+	"__tsan_atomic16_compare_exchange_val",
+	"__tsan_atomic32_compare_exchange_val",
+	"__tsan_atomic64_compare_exchange_val",
+	"__tsan_atomic_thread_fence",
+	"__tsan_atomic_signal_fence",
 	/* KCOV */
 	"write_comp_data",
 	"check_kcov_mode",
@@ -548,8 +695,9 @@ static const char *uaccess_safe_builtin[] = {
 	"__ubsan_handle_shift_out_of_bounds",
 	/* misc */
 	"csum_partial_copy_generic",
-	"__memcpy_mcsafe",
-	"mcsafe_handle_tail",
+	"copy_mc_fragile",
+	"copy_mc_fragile_handle_tail",
+	"copy_mc_enhanced_fast_string",
 	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
 	NULL
 };
@@ -619,7 +767,7 @@ static int add_jump_destinations(struct objtool_file *file)
 		if (!is_static_jump(insn))
 			continue;
 
-		if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET)
+		if (insn->offset == FAKE_JUMP_OFFSET)
 			continue;
 
 		reloc = find_reloc_by_dest_range(file->elf, insn->sec,
@@ -649,6 +797,10 @@ static int add_jump_destinations(struct objtool_file *file)
 		} else {
 			/* external sibling call */
 			insn->call_dest = reloc->sym;
+			if (insn->call_dest->static_call_tramp) {
+				list_add_tail(&insn->static_call_node,
+					      &file->static_call_list);
+			}
 			continue;
 		}
 
@@ -700,6 +852,10 @@ static int add_jump_destinations(struct objtool_file *file)
 
 				/* internal sibling call */
 				insn->call_dest = insn->jump_dest->func;
+				if (insn->call_dest->static_call_tramp) {
+					list_add_tail(&insn->static_call_node,
+						      &file->static_call_list);
+				}
 			}
 		}
 	}
@@ -717,6 +873,17 @@ static void remove_insn_ops(struct instruction *insn)
 	}
 }
 
+static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
+{
+	struct symbol *call_dest;
+
+	call_dest = find_func_by_offset(sec, offset);
+	if (!call_dest)
+		call_dest = find_symbol_by_offset(sec, offset);
+
+	return call_dest;
+}
+
 /*
  * Find the destination instructions for all calls.
  */
@@ -734,9 +901,7 @@ static int add_call_destinations(struct objtool_file *file)
 					       insn->offset, insn->len);
 		if (!reloc) {
 			dest_off = arch_jump_destination(insn);
-			insn->call_dest = find_func_by_offset(insn->sec, dest_off);
-			if (!insn->call_dest)
-				insn->call_dest = find_symbol_by_offset(insn->sec, dest_off);
+			insn->call_dest = find_call_destination(insn->sec, dest_off);
 
 			if (insn->ignore)
 				continue;
@@ -754,8 +919,8 @@ static int add_call_destinations(struct objtool_file *file)
 
 		} else if (reloc->sym->type == STT_SECTION) {
 			dest_off = arch_dest_reloc_offset(reloc->addend);
-			insn->call_dest = find_func_by_offset(reloc->sym->sec,
-							      dest_off);
+			insn->call_dest = find_call_destination(reloc->sym->sec,
+								dest_off);
 			if (!insn->call_dest) {
 				WARN_FUNC("can't find call dest symbol at %s+0x%lx",
 					  insn->sec, insn->offset,
@@ -867,6 +1032,8 @@ static int handle_group_alt(struct objtool_file *file,
 	alt_group = alt_group_next_index++;
 	insn = *new_insn;
 	sec_for_each_insn_from(file, insn) {
+		struct reloc *alt_reloc;
+
 		if (insn->offset >= special_alt->new_off + special_alt->new_len)
 			break;
 
@@ -883,14 +1050,11 @@ static int handle_group_alt(struct objtool_file *file,
 		 * .altinstr_replacement section, unless the arch's
 		 * alternatives code can adjust the relative offsets
 		 * accordingly.
-		 *
-		 * The x86 alternatives code adjusts the offsets only when it
-		 * encounters a branch instruction at the very beginning of the
-		 * replacement group.
 		 */
-		if ((insn->offset != special_alt->new_off ||
-		    (insn->type != INSN_CALL && !is_static_jump(insn))) &&
-		    find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
+		alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+						   insn->offset, insn->len);
+		if (alt_reloc &&
+		    !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
 
 			WARN_FUNC("unsupported relocation in alternatives section",
 				  insn->sec, insn->offset);
@@ -1092,56 +1256,15 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 }
 
 /*
- * find_jump_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- *    This is the most common case by far.  It jumps to an address in a simple
- *    jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- *    This is caused by a rare GCC quirk, currently only seen in three driver
- *    functions in the kernel, only with certain obscure non-distro configs.
- *
- *    As part of an optimization, GCC makes a copy of an existing switch jump
- *    table, modifies it, and then hard-codes the jump (albeit with an indirect
- *    jump) to use a single entry in the table.  The rest of the jump table and
- *    some of its jump targets remain as dead code.
- *
- *    In such a case we can just crudely ignore all unreachable instruction
- *    warnings for the entire object file.  Ideally we would just ignore them
- *    for the function, but that would require redesigning the code quite a
- *    bit.  And honestly that's just not worth doing: unreachable instruction
- *    warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- *    ... some instructions ...
- *    jmpq *(%reg1,%reg2,8)
- *
- *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
- *    writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- *    As of GCC 7 there are quite a few more of these and the 'in between' code
- *    is significant. Esp. with KASAN enabled some of the code between the mov
- *    and jmpq uses .rodata itself, which can confuse things.
- *
- *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- *    ensure the same register is used in the mov and jump instructions.
- *
- *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ * find_jump_table() - Given a dynamic jump, find the switch jump table
+ * associated with it.
  */
 static struct reloc *find_jump_table(struct objtool_file *file,
 				      struct symbol *func,
 				      struct instruction *insn)
 {
-	struct reloc *text_reloc, *table_reloc;
+	struct reloc *table_reloc;
 	struct instruction *dest_insn, *orig_insn = insn;
-	struct section *table_sec;
-	unsigned long table_offset;
 
 	/*
 	 * Backward search using the @first_jump_src links, these help avoid
@@ -1162,52 +1285,13 @@ static struct reloc *find_jump_table(struct objtool_file *file,
 		     insn->jump_dest->offset > orig_insn->offset))
 		    break;
 
-		/* look for a relocation which references .rodata */
-		text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-						    insn->offset, insn->len);
-		if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
-		    !text_reloc->sym->sec->rodata)
-			continue;
-
-		table_offset = text_reloc->addend;
-		table_sec = text_reloc->sym->sec;
-
-		if (text_reloc->type == R_X86_64_PC32)
-			table_offset += 4;
-
-		/*
-		 * Make sure the .rodata address isn't associated with a
-		 * symbol.  GCC jump tables are anonymous data.
-		 *
-		 * Also support C jump tables which are in the same format as
-		 * switch jump tables.  For objtool to recognize them, they
-		 * need to be placed in the C_JUMP_TABLE_SECTION section.  They
-		 * have symbols associated with them.
-		 */
-		if (find_symbol_containing(table_sec, table_offset) &&
-		    strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
-			continue;
-
-		/*
-		 * Each table entry has a reloc associated with it.  The reloc
-		 * should reference text in the same function as the original
-		 * instruction.
-		 */
-		table_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+		table_reloc = arch_find_switch_table(file, insn);
 		if (!table_reloc)
 			continue;
 		dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
 		if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
 			continue;
 
-		/*
-		 * Use of RIP-relative switch jumps is quite rare, and
-		 * indicates a rare GCC quirk/bug which can leave dead code
-		 * behind.
-		 */
-		if (text_reloc->type == R_X86_64_PC32)
-			file->ignore_unreachables = true;
-
 		return table_reloc;
 	}
 
@@ -1350,32 +1434,7 @@ static int read_unwind_hints(struct objtool_file *file)
 
 		insn->hint = true;
 
-		switch (hint->sp_reg) {
-		case ORC_REG_UNDEFINED:
-			cfa->base = CFI_UNDEFINED;
-			break;
-		case ORC_REG_SP:
-			cfa->base = CFI_SP;
-			break;
-		case ORC_REG_BP:
-			cfa->base = CFI_BP;
-			break;
-		case ORC_REG_SP_INDIRECT:
-			cfa->base = CFI_SP_INDIRECT;
-			break;
-		case ORC_REG_R10:
-			cfa->base = CFI_R10;
-			break;
-		case ORC_REG_R13:
-			cfa->base = CFI_R13;
-			break;
-		case ORC_REG_DI:
-			cfa->base = CFI_DI;
-			break;
-		case ORC_REG_DX:
-			cfa->base = CFI_DX;
-			break;
-		default:
+		if (arch_decode_hint_reg(insn, hint->sp_reg)) {
 			WARN_FUNC("unsupported unwind_hint sp base reg %d",
 				  insn->sec, insn->offset, hint->sp_reg);
 			return -1;
@@ -1522,6 +1581,23 @@ static int read_intra_function_calls(struct objtool_file *file)
 	return 0;
 }
 
+static int read_static_call_tramps(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+
+	for_each_sec(file, sec) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->bind == STB_GLOBAL &&
+			    !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+				     strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+				func->static_call_tramp = true;
+		}
+	}
+
+	return 0;
+}
+
 static void mark_rodata(struct objtool_file *file)
 {
 	struct section *sec;
@@ -1569,6 +1645,10 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = read_static_call_tramps(file);
+	if (ret)
+		return ret;
+
 	ret = add_jump_destinations(file);
 	if (ret)
 		return ret;
@@ -1768,7 +1848,8 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 		return 0;
 	}
 
-	if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET)
+	if (cfi->type == UNWIND_HINT_TYPE_REGS ||
+	    cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL)
 		return update_cfi_state_regs(insn, cfi, op);
 
 	switch (op->dest.type) {
@@ -2016,7 +2097,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 				/* drap: push %rbp */
 				cfi->stack_size = 0;
 
-			} else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+			} else {
 
 				/* drap: push %reg */
 				save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size);
@@ -2045,9 +2126,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 
 				/* save drap offset so we know when to restore it */
 				cfi->drap_offset = op->dest.offset;
-			}
-
-			else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+			} else {
 
 				/* drap: mov reg, disp(%rbp) */
 				save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset);
@@ -2432,6 +2511,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			if (dead_end_function(file, insn->call_dest))
 				return 0;
 
+			if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
+				list_add_tail(&insn->static_call_node,
+					      &file->static_call_list);
+			}
+
 			break;
 
 		case INSN_JUMP_CONDITIONAL:
@@ -2612,9 +2696,10 @@ static bool is_ubsan_insn(struct instruction *insn)
 			"__ubsan_handle_builtin_unreachable"));
 }
 
-static bool ignore_unreachable_insn(struct instruction *insn)
+static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn)
 {
 	int i;
+	struct instruction *prev_insn;
 
 	if (insn->ignore || insn->type == INSN_NOP)
 		return true;
@@ -2631,6 +2716,9 @@ static bool ignore_unreachable_insn(struct instruction *insn)
 	    !strcmp(insn->sec->name, ".altinstr_aux"))
 		return true;
 
+	if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
+		return true;
+
 	if (!insn->func)
 		return false;
 
@@ -2639,8 +2727,11 @@ static bool ignore_unreachable_insn(struct instruction *insn)
 	 * __builtin_unreachable().  The BUG() macro has an unreachable() after
 	 * the UD2, which causes GCC's undefined trap logic to emit another UD2
 	 * (or occasionally a JMP to UD2).
+	 *
+	 * It may also insert a UD2 after calling a __noreturn function.
 	 */
-	if (list_prev_entry(insn, list)->dead_end &&
+	prev_insn = list_prev_entry(insn, list);
+	if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
 	    (insn->type == INSN_BUG ||
 	     (insn->type == INSN_JUMP_UNCONDITIONAL &&
 	      insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
@@ -2767,7 +2858,7 @@ static int validate_reachable_instructions(struct objtool_file *file)
 		return 0;
 
 	for_each_insn(file, insn) {
-		if (insn->visited || ignore_unreachable_insn(insn))
+		if (insn->visited || ignore_unreachable_insn(file, insn))
 			continue;
 
 		WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
@@ -2777,36 +2868,22 @@ static int validate_reachable_instructions(struct objtool_file *file)
 	return 0;
 }
 
-static struct objtool_file file;
-
-int check(const char *_objname, bool orc)
+int check(struct objtool_file *file)
 {
 	int ret, warnings = 0;
 
-	objname = _objname;
-
-	file.elf = elf_open_read(objname, O_RDWR);
-	if (!file.elf)
-		return 1;
-
-	INIT_LIST_HEAD(&file.insn_list);
-	hash_init(file.insn_hash);
-	file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
-	file.ignore_unreachables = no_unreachable;
-	file.hints = false;
-
 	arch_initial_func_cfi_state(&initial_func_cfi);
 
-	ret = decode_sections(&file);
+	ret = decode_sections(file);
 	if (ret < 0)
 		goto out;
 	warnings += ret;
 
-	if (list_empty(&file.insn_list))
+	if (list_empty(&file->insn_list))
 		goto out;
 
 	if (vmlinux && !validate_dup) {
-		ret = validate_vmlinux_functions(&file);
+		ret = validate_vmlinux_functions(file);
 		if (ret < 0)
 			goto out;
 
@@ -2815,44 +2892,33 @@ int check(const char *_objname, bool orc)
 	}
 
 	if (retpoline) {
-		ret = validate_retpoline(&file);
+		ret = validate_retpoline(file);
 		if (ret < 0)
 			return ret;
 		warnings += ret;
 	}
 
-	ret = validate_functions(&file);
+	ret = validate_functions(file);
 	if (ret < 0)
 		goto out;
 	warnings += ret;
 
-	ret = validate_unwind_hints(&file, NULL);
+	ret = validate_unwind_hints(file, NULL);
 	if (ret < 0)
 		goto out;
 	warnings += ret;
 
 	if (!warnings) {
-		ret = validate_reachable_instructions(&file);
+		ret = validate_reachable_instructions(file);
 		if (ret < 0)
 			goto out;
 		warnings += ret;
 	}
 
-	if (orc) {
-		ret = create_orc(&file);
-		if (ret < 0)
-			goto out;
-
-		ret = create_orc_sections(&file);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (file.elf->changed) {
-		ret = elf_write(file.elf);
-		if (ret < 0)
-			goto out;
-	}
+	ret = create_static_call_sections(file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
 
 out:
 	if (ret < 0) {
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 061aa96e15d3..5ec00a4b891b 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -22,6 +22,7 @@ struct insn_state {
 struct instruction {
 	struct list_head list;
 	struct hlist_node hash;
+	struct list_head static_call_node;
 	struct section *sec;
 	unsigned long offset;
 	unsigned int len;
@@ -42,9 +43,17 @@ struct instruction {
 	struct symbol *func;
 	struct list_head stack_ops;
 	struct cfi_state cfi;
+#ifdef INSN_USE_ORC
 	struct orc_entry orc;
+#endif
 };
 
+static inline bool is_static_jump(struct instruction *insn)
+{
+	return insn->type == INSN_JUMP_CONDITIONAL ||
+	       insn->type == INSN_JUMP_UNCONDITIONAL;
+}
+
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
 
@@ -57,5 +66,4 @@ struct instruction *find_insn(struct objtool_file *file,
 			insn->sec == sec;				\
 	     insn = list_next_entry(insn, list))
 
-
 #endif /* _CHECK_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 3ddbd66f1a37..4e1d7460574b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -652,7 +652,7 @@ err:
 }
 
 struct section *elf_create_section(struct elf *elf, const char *name,
-				   size_t entsize, int nr)
+				   unsigned int sh_flags, size_t entsize, int nr)
 {
 	struct section *sec, *shstrtab;
 	size_t size = entsize * nr;
@@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 	sec->sh.sh_entsize = entsize;
 	sec->sh.sh_type = SHT_PROGBITS;
 	sec->sh.sh_addralign = 1;
-	sec->sh.sh_flags = SHF_ALLOC;
+	sec->sh.sh_flags = SHF_ALLOC | sh_flags;
 
 
 	/* Add section name to .shstrtab (or .strtab for Clang) */
@@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect
 	strcpy(relocname, ".rel");
 	strcat(relocname, base->name);
 
-	sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0);
+	sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
 	free(relocname);
 	if (!sec)
 		return NULL;
@@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
 	strcpy(relocname, ".rela");
 	strcat(relocname, base->name);
 
-	sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0);
+	sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
 	free(relocname);
 	if (!sec)
 		return NULL;
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 6cc80a075166..807f8c670097 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -56,6 +56,7 @@ struct symbol {
 	unsigned int len;
 	struct symbol *pfunc, *cfunc, *alias;
 	bool uaccess_safe;
+	bool static_call_tramp;
 };
 
 struct reloc {
@@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc)
 }
 
 struct elf *elf_open_read(const char *name, int flags);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
+struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
 struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
 void elf_add_reloc(struct elf *elf, struct reloc *reloc);
 int elf_write_insn(struct elf *elf, struct section *sec,
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 58fdda510653..9df0cd86d310 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -22,6 +22,8 @@
 #include <linux/kernel.h>
 
 #include "builtin.h"
+#include "objtool.h"
+#include "warn.h"
 
 struct cmd_struct {
 	const char *name;
@@ -39,6 +41,34 @@ static struct cmd_struct objtool_cmds[] = {
 
 bool help;
 
+const char *objname;
+static struct objtool_file file;
+
+struct objtool_file *objtool_open_read(const char *_objname)
+{
+	if (objname) {
+		if (strcmp(objname, _objname)) {
+			WARN("won't handle more than one file at a time");
+			return NULL;
+		}
+		return &file;
+	}
+	objname = _objname;
+
+	file.elf = elf_open_read(objname, O_RDWR);
+	if (!file.elf)
+		return NULL;
+
+	INIT_LIST_HEAD(&file.insn_list);
+	hash_init(file.insn_hash);
+	INIT_LIST_HEAD(&file.static_call_list);
+	file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+	file.ignore_unreachables = no_unreachable;
+	file.hints = false;
+
+	return &file;
+}
+
 static void cmd_usage(void)
 {
 	unsigned int i, longest = 0;
diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h
index 528028a66816..4125d4578b23 100644
--- a/tools/objtool/objtool.h
+++ b/tools/objtool/objtool.h
@@ -12,14 +12,19 @@
 
 #include "elf.h"
 
+#define __weak __attribute__((weak))
+
 struct objtool_file {
 	struct elf *elf;
 	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 20);
+	struct list_head static_call_list;
 	bool ignore_unreachables, c_file, hints, rodata;
 };
 
-int check(const char *objname, bool orc);
+struct objtool_file *objtool_open_read(const char *_objname);
+
+int check(struct objtool_file *file);
 int orc_dump(const char *objname);
 int create_orc(struct objtool_file *file);
 int create_orc_sections(struct objtool_file *file);
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index fca46e006fc2..5e6a95368d35 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -4,6 +4,7 @@
  */
 
 #include <unistd.h>
+#include <linux/objtool.h>
 #include <asm/orc_types.h>
 #include "objtool.h"
 #include "warn.h"
@@ -37,12 +38,12 @@ static const char *reg_name(unsigned int reg)
 static const char *orc_type_name(unsigned int type)
 {
 	switch (type) {
-	case ORC_TYPE_CALL:
+	case UNWIND_HINT_TYPE_CALL:
 		return "call";
-	case ORC_TYPE_REGS:
+	case UNWIND_HINT_TYPE_REGS:
 		return "regs";
-	case ORC_TYPE_REGS_IRET:
-		return "iret";
+	case UNWIND_HINT_TYPE_REGS_PARTIAL:
+		return "regs (partial)";
 	default:
 		return "?";
 	}
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 968f55e6dd94..235663b96adc 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -6,6 +6,9 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+
 #include "check.h"
 #include "warn.h"
 
@@ -18,6 +21,9 @@ int create_orc(struct objtool_file *file)
 		struct cfi_reg *cfa = &insn->cfi.cfa;
 		struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
 
+		if (!insn->sec->text)
+			continue;
+
 		orc->end = insn->cfi.end;
 
 		if (cfa->base == CFI_UNDEFINED) {
@@ -143,7 +149,7 @@ int create_orc_sections(struct objtool_file *file)
 	struct orc_entry empty = {
 		.sp_reg = ORC_REG_UNDEFINED,
 		.bp_reg  = ORC_REG_UNDEFINED,
-		.type    = ORC_TYPE_CALL,
+		.type    = UNWIND_HINT_TYPE_CALL,
 	};
 
 	sec = find_section_by_name(file->elf, ".orc_unwind");
@@ -177,7 +183,7 @@ int create_orc_sections(struct objtool_file *file)
 
 
 	/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
-	sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+	sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
 	if (!sec)
 		return -1;
 
@@ -186,7 +192,7 @@ int create_orc_sections(struct objtool_file *file)
 		return -1;
 
 	/* create .orc_unwind section */
-	u_sec = elf_create_section(file->elf, ".orc_unwind",
+	u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
 				   sizeof(struct orc_entry), idx);
 
 	/* populate sections */
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index e893f1e48e44..1a2420febd08 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -14,24 +14,7 @@
 #include "builtin.h"
 #include "special.h"
 #include "warn.h"
-
-#define EX_ENTRY_SIZE		12
-#define EX_ORIG_OFFSET		0
-#define EX_NEW_OFFSET		4
-
-#define JUMP_ENTRY_SIZE		16
-#define JUMP_ORIG_OFFSET	0
-#define JUMP_NEW_OFFSET		4
-
-#define ALT_ENTRY_SIZE		13
-#define ALT_ORIG_OFFSET		0
-#define ALT_NEW_OFFSET		4
-#define ALT_FEATURE_OFFSET	8
-#define ALT_ORIG_LEN_OFFSET	10
-#define ALT_NEW_LEN_OFFSET	11
-
-#define X86_FEATURE_POPCNT (4*32+23)
-#define X86_FEATURE_SMAP   (9*32+20)
+#include "arch_special.h"
 
 struct special_entry {
 	const char *sec;
@@ -68,6 +51,10 @@ struct special_entry entries[] = {
 	{},
 };
 
+void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+}
+
 static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 			 struct section *sec, int idx,
 			 struct special_alt *alt)
@@ -92,30 +79,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 
 		feature = *(unsigned short *)(sec->data->d_buf + offset +
 					      entry->feature);
-
-		/*
-		 * It has been requested that we don't validate the !POPCNT
-		 * feature path which is a "very very small percentage of
-		 * machines".
-		 */
-		if (feature == X86_FEATURE_POPCNT)
-			alt->skip_orig = true;
-
-		/*
-		 * If UACCESS validation is enabled; force that alternative;
-		 * otherwise force it the other way.
-		 *
-		 * What we want to avoid is having both the original and the
-		 * alternative code flow at the same time, in that case we can
-		 * find paths that see the STAC but take the NOP instead of
-		 * CLAC and the other way around.
-		 */
-		if (feature == X86_FEATURE_SMAP) {
-			if (uaccess)
-				alt->skip_orig = true;
-			else
-				alt->skip_alt = true;
-		}
+		arch_handle_alternative(feature, alt);
 	}
 
 	orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
index 35061530e46e..abddf38ef334 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/special.h
@@ -7,8 +7,11 @@
 #define _SPECIAL_H
 
 #include <stdbool.h>
+#include "check.h"
 #include "elf.h"
 
+#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+
 struct special_alt {
 	struct list_head list;
 
@@ -28,4 +31,11 @@ struct special_alt {
 
 int special_get_alts(struct elf *elf, struct list_head *alts);
 
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt);
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+				 struct instruction *insn,
+				 struct reloc *reloc);
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+				    struct instruction *insn);
 #endif /* _SPECIAL_H */
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 2a1261bfbb62..606a4b5e929f 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -1,13 +1,27 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
-FILES='
+if [ -z "$SRCARCH" ]; then
+	echo 'sync-check.sh: error: missing $SRCARCH environment variable' >&2
+	exit 1
+fi
+
+FILES="include/linux/objtool.h"
+
+if [ "$SRCARCH" = "x86" ]; then
+FILES="$FILES
 arch/x86/include/asm/inat_types.h
 arch/x86/include/asm/orc_types.h
 arch/x86/include/asm/emulate_prefix.h
 arch/x86/lib/x86-opcode-map.txt
 arch/x86/tools/gen-insn-attr-x86.awk
-'
+include/linux/static_call_types.h
+arch/x86/include/asm/inat.h     -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
+arch/x86/include/asm/insn.h     -I '^#include [\"<]\(asm/\)*inat.h[\">]'
+arch/x86/lib/inat.c             -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
+arch/x86/lib/insn.c             -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
+"
+fi
 
 check_2 () {
   file1=$1
@@ -40,11 +54,12 @@ fi
 
 cd ../..
 
-for i in $FILES; do
-  check $i
-done
+while read -r file_entry; do
+    if [ -z "$file_entry" ]; then
+	continue
+    fi
 
-check arch/x86/include/asm/inat.h     '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
-check arch/x86/include/asm/insn.h     '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
-check arch/x86/lib/inat.c             '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
-check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
+    check $file_entry
+done <<EOF
+$FILES
+EOF
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index 942ea5e8ac36..7843e9a7a72f 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -9,17 +9,13 @@
 #include <errno.h>
 #include "objtool.h"
 
-#define __weak __attribute__((weak))
-
 #define UNSUPPORTED(name)						\
 ({									\
 	fprintf(stderr, "error: objtool: " name " not implemented\n");	\
 	return ENOSYS;							\
 })
 
-const char __weak *objname;
-
-int __weak check(const char *_objname, bool orc)
+int __weak check(struct objtool_file *file)
 {
 	UNSUPPORTED("check subcommand");
 }
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index e817179c5027..d3740c8f399b 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -18,6 +18,7 @@
 		l	synthesize last branch entries (use with i or x)
 		L	synthesize last branch entries on existing event records
 		s       skip initial number of events
+		q	quicker (less detailed) decoding
 
 	The default is all events i.e. the same as --itrace=ibxwpe,
 	except for perf script where it is --itrace=ce
@@ -47,3 +48,16 @@
 	--itrace=i0nss1000000
 
 	skips the first million instructions.
+
+	The 'e' option may be followed by flags which affect what errors will or
+	will not be reported. Each flag must be preceded by either '+' or '-'.
+	The flags are:
+		o	overflow
+		l	trace data lost
+
+	If supported, the 'd' option may be followed by flags which affect what
+	debug messages will or will not be logged. Each flag must be preceded
+	by either '+' or '-'. The flags are:
+		a	all perf events
+
+	If supported, the 'q' option may be repeated to increase the effect.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index bad16512c48d..a0529c7fa5ef 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -49,6 +49,9 @@ SUBSYSTEM
 'sched'::
 	Scheduler and IPC mechanisms.
 
+'syscall'::
+	System call performance (throughput).
+
 'mem'::
 	Memory access performance.
 
@@ -137,6 +140,14 @@ Example of *pipe*
                 59004 ops/sec
 ---------------------
 
+SUITES FOR 'syscall'
+~~~~~~~~~~~~~~~~~~
+*basic*::
+Suite for evaluating performance of core system call throughput (both usecs/op and ops/sec metrics).
+This uses a single thread simply doing getppid(2), which is a simple syscall where the result is not
+cached by glibc.
+
+
 SUITES FOR 'mem'
 ~~~~~~~~~~~~~~~~
 *memcpy*::
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c7d3df5798e2..76408d986aed 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -614,8 +614,9 @@ trace.*::
 
 ftrace.*::
 	ftrace.tracer::
-		Can be used to select the default tracer. Possible values are
-		'function' and 'function_graph'.
+		Can be used to select the default tracer when neither -G nor
+		-F option is not specified. Possible values are 'function' and
+		'function_graph'.
 
 llvm.*::
 	llvm.clang-path::
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index c87180764829..726b9bc9e1a7 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
 --to-ctf::
 	Triggers the CTF conversion, specify the path of CTF data directory.
 
+--tod::
+	Convert time to wall clock time.
+
 -i::
 	Specify input perf data file path.
 
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index b80c84307dc9..78358af9a1c4 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -24,16 +24,28 @@ OPTIONS
 
 -t::
 --tracer=::
-	Tracer to use: function_graph or function.
+	Tracer to use when neither -G nor -F option is not
+	specified: function_graph or function.
 
 -v::
 --verbose=::
         Verbosity level.
 
+-F::
+--funcs::
+        List all available functions to trace.
+
 -p::
 --pid=::
 	Trace on existing process id (comma separated list).
 
+--tid=::
+	Trace on existing thread id (comma separated list).
+
+-D::
+--delay::
+	Time (ms) to wait before starting tracing after program start.
+
 -a::
 --all-cpus::
 	Force system-wide collection.  Scripts run without a <command>
@@ -48,39 +60,58 @@ OPTIONS
 	Ranges of CPUs are specified with -: 0-2.
 	Default is to trace on all online CPUs.
 
+-m::
+--buffer-size::
+	Set the size of per-cpu tracing buffer, <size> is expected to
+	be a number with appended unit character - B/K/M/G.
+
+--inherit::
+	Trace children processes spawned by our target.
+
 -T::
 --trace-funcs=::
-	Only trace functions given by the argument.  Multiple functions
-	can be given by using this option more than once.  The function
-	argument also can be a glob pattern.  It will be passed to
-	'set_ftrace_filter' in tracefs.
+	Select function tracer and set function filter on the given
+	function (or a glob pattern). Multiple functions can be given
+	by using this option more than once. The function argument also
+	can be a glob pattern. It will be passed to 'set_ftrace_filter'
+	in tracefs.
 
 -N::
 --notrace-funcs=::
-	Do not trace functions given by the argument.  Like -T option,
-	this can be used more than once to specify multiple functions
-	(or glob patterns).  It will be passed to 'set_ftrace_notrace'
-	in tracefs.
+	Select function tracer and do not trace functions given by the
+	argument.  Like -T option, this can be used more than once to
+	specify multiple functions (or glob patterns).  It will be
+	passed to 'set_ftrace_notrace' in tracefs.
+
+--func-opts::
+	List of options allowed to set:
+	  call-graph - Display kernel stack trace for function tracer.
+	  irq-info   - Display irq context info for function tracer.
 
 -G::
 --graph-funcs=::
-	Set graph filter on the given function (or a glob pattern).
-	This is useful for the function_graph tracer only and enables
-	tracing for functions executed from the given function.
-	This can be used more than once to specify multiple functions.
-	It will be passed to 'set_graph_function' in tracefs.
+	Select function_graph tracer and set graph filter on the given
+	function (or a glob pattern). This is useful to trace for
+	functions executed from the given function. This can be used more
+	than once to specify multiple functions. It will be passed to
+	'set_graph_function' in tracefs.
 
 -g::
 --nograph-funcs=::
-	Set graph notrace filter on the given function (or a glob pattern).
-	Like -G option, this is useful for the function_graph tracer only
-	and disables tracing for function executed from the given function.
-	This can be used more than once to specify multiple functions.
-	It will be passed to 'set_graph_notrace' in tracefs.
+	Select function_graph tracer and set graph notrace filter on the
+	given function (or a glob pattern). Like -G option, this is useful
+	for the function_graph tracer only and disables tracing for function
+	executed from the given function. This can be used more than once to
+	specify multiple functions. It will be passed to 'set_graph_notrace'
+	in tracefs.
 
--D::
---graph-depth=::
-	Set max depth for function graph tracer to follow
+--graph-opts::
+	List of options allowed to set:
+	  nosleep-time - Measure on-CPU time only for function_graph tracer.
+	  noirqs       - Ignore functions that happen inside interrupt.
+	  verbose      - Show process names, PIDs, timestamps, etc.
+	  thresh=<n>   - Setup trace duration threshold in microseconds.
+	  depth=<n>    - Set max depth for function graph tracer to follow.
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index f4cd49a7fcdb..d5a266d7f15b 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -825,6 +825,7 @@ The letters are:
 	l	synthesize last branch entries (use with i or x)
 	L	synthesize last branch entries on existing event records
 	s	skip initial number of events
+	q	quicker (less detailed) decoding
 
 "Instructions" events look like they were recorded by "perf record -e
 instructions".
@@ -871,11 +872,24 @@ Developer Manuals.
 
 Error events show where the decoder lost the trace.  Error events
 are quite important.  Users must know if what they are seeing is a complete
-picture or not.
+picture or not. The "e" option may be followed by flags which affect what errors
+will or will not be reported.  Each flag must be preceded by either '+' or '-'.
+The flags supported by Intel PT are:
+		-o	Suppress overflow errors
+		-l	Suppress trace data lost errors
+For example, for errors but not overflow or data lost errors:
+
+	--itrace=e-o-l
 
 The "d" option will cause the creation of a file "intel_pt.log" containing all
 decoded packets and instructions.  Note that this option slows down the decoder
-and that the resulting file may be very large.
+and that the resulting file may be very large.  The "d" option may be followed
+by flags which affect what debug messages will or will not be logged. Each flag
+must be preceded by either '+' or '-'. The flags support by Intel PT are:
+		-a	Suppress logging of perf events
+		+a	Log all perf events
+By default, logged perf events are filtered by any specified time ranges, but
+flag +a overrides that.
 
 In addition, the period of the "instructions" event can be specified. e.g.
 
@@ -956,6 +970,51 @@ at the beginning. This is useful to ignore initialization code.
 
 skips the first million instructions.
 
+The q option changes the way the trace is decoded.  The decoding is much faster
+but much less detailed.  Specifically, with the q option, the decoder does not
+decode TNT packets, and does not walk object code, but gets the ip from FUP and
+TIP packets.  The q option can be used with the b and i options but the period
+is not used.  The q option decodes more quickly, but is useful only if the
+control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or
+TIP.PGD packets (refer below).  However the q option could be used to find time
+ranges that could then be decoded fully using the --time option.
+
+What will *not* be decoded with the (single) q option:
+
+	- direct calls and jmps
+	- conditional branches
+	- non-branch instructions
+
+What *will* be decoded with the (single) q option:
+
+	- asynchronous branches such as interrupts
+	- indirect branches
+	- function return target address *if* the noretcomp config term (refer
+	config terms section) was used
+	- start of (control-flow) tracing
+	- end of (control-flow) tracing, if it is not out of context
+	- power events, ptwrite, transaction start and abort
+	- instruction pointer associated with PSB packets
+
+Note the q option does not specify what events will be synthesized e.g. the p
+option must be used also to show power events.
+
+Repeating the q option (double-q i.e. qq) results in even faster decoding and even
+less detail.  The decoder decodes only extended PSB (PSB+) packets, getting the
+instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and
+PSBEND).  Note PSB packets occur regularly in the trace based on the psb_period
+config term (refer config terms section).  There will be a FUP packet if the
+PSB+ occurs while control flow is being traced.
+
+What will *not* be decoded with the qq option:
+
+	- everything except instruction pointer associated with PSB packets
+
+What *will* be decoded with the qq option:
+
+	- instruction pointer associated with PSB packets
+
+
 dump option
 ~~~~~~~~~~~
 
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 376a50b3452d..10ed539a8859 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -119,6 +119,7 @@ It's also possible to use pmu syntax:
 
  perf record -e r1a8 -a sleep 1
  perf record -e cpu/r1a8/ ...
+ perf record -e cpu/r0x1a8/ ...
 
 You should refer to the processor specific documentation for getting these
 details. Some of them are referenced in the SEE ALSO section below.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index fa8a5fcd27ab..bd50cdff08a8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -33,6 +33,10 @@ OPTIONS
         - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
 	  hexadecimal event descriptor.
 
+        - a symbolic or raw PMU event followed by an optional colon
+	  and a list of event modifiers, e.g., cpu-cycles:p.  See the
+	  linkperf:perf-list[1] man page for details on event modifiers.
+
 	- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
 	  'param1', 'param2', etc are defined as formats for the PMU in
 	  /sys/bus/event_source/devices/<pmu>/format/*.
@@ -407,8 +411,9 @@ if combined with -a or -C options.
 
 -D::
 --delay=::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program, which
+is often very different.
 
 -I::
 --intr-regs::
@@ -626,6 +631,45 @@ option. The -e option and this one can be mixed and matched.  Events
 can be grouped using the {} notation.
 endif::HAVE_LIBPFM[]
 
+--control fd:ctl-fd[,ack-fd]
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events). Measurements can be started with events disabled using
+--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
+to synchronize with the controlling process. Example of bash shell script to enable and
+disable events during measurements:
+
+#!/bin/bash
+
+ctl_dir=/tmp/
+
+ctl_fifo=${ctl_dir}perf_ctl.fifo
+test -p ${ctl_fifo} && unlink ${ctl_fifo}
+mkfifo ${ctl_fifo}
+exec {ctl_fd}<>${ctl_fifo}
+
+ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+mkfifo ${ctl_ack_fifo}
+exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+perf record -D -1 -e cpu-cycles -a               \
+            --control fd:${ctl_fd},${ctl_fd_ack} \
+            -- sleep 30 &
+perf_pid=$!
+
+sleep 5  && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+exec {ctl_fd_ack}>&-
+unlink ${ctl_ack_fifo}
+
+exec {ctl_fd}>&-
+unlink ${ctl_fifo}
+
+wait -n ${perf_pid}
+exit $?
+
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 372dfd110e6d..4f712fb8f175 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -322,6 +322,10 @@ OPTIONS
 --show-cgroup-events
 	Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
 
+--show-text-poke-events
+	Display text poke events i.e. events of type PERF_RECORD_TEXT_POKE and
+	PERF_RECORD_KSYMBOL.
+
 --demangle::
 	Demangle symbol names to human readable form. It's enabled by default,
 	disable with --no-demangle.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index b029ee728a0b..db420dd75e43 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -39,6 +39,10 @@ report::
 	- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
 	  hexadecimal event descriptor.
 
+        - a symbolic or raw PMU event followed by an optional colon
+	  and a list of event modifiers, e.g., cpu-cycles:p.  See the
+	  linkperf:perf-list[1] man page for details on event modifiers.
+
 	- a symbolically formed event like 'pmu/param1=0x3,param2/' where
 	  param1 and param2 are defined as formats for the PMU in
 	  /sys/bus/event_source/devices/<pmu>/format/*
@@ -176,6 +180,45 @@ with it.  --append may be used here.  Examples:
      3>results  perf stat --log-fd 3          -- $cmd
      3>>results perf stat --log-fd 3 --append -- $cmd
 
+--control fd:ctl-fd[,ack-fd]
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events). Measurements can be started with events disabled using
+--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
+to synchronize with the controlling process. Example of bash shell script to enable and
+disable events during measurements:
+
+#!/bin/bash
+
+ctl_dir=/tmp/
+
+ctl_fifo=${ctl_dir}perf_ctl.fifo
+test -p ${ctl_fifo} && unlink ${ctl_fifo}
+mkfifo ${ctl_fifo}
+exec {ctl_fd}<>${ctl_fifo}
+
+ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+mkfifo ${ctl_ack_fifo}
+exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+perf stat -D -1 -e cpu-cycles -a -I 1000       \
+          --control fd:${ctl_fd},${ctl_fd_ack} \
+          -- sleep 30 &
+perf_pid=$!
+
+sleep 5  && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+exec {ctl_fd_ack}>&-
+unlink ${ctl_ack_fifo}
+
+exec {ctl_fd}>&-
+unlink ${ctl_fifo}
+
+wait -n ${perf_pid}
+exit $?
+
+
 --pre::
 --post::
 	Pre and post measurement hooks, e.g.:
@@ -238,8 +281,9 @@ mode, use --per-node in addition to -a. (system-wide).
 
 -D msecs::
 --delay msecs::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program,
+which is often very different.
 
 -T::
 --transaction::
@@ -376,6 +420,9 @@ counts for all hardware threads in a core but show the sum counts per
 hardware thread. This is essentially a replacement for the any bit and
 convenient for post processing.
 
+--summary::
+Print summary for interval mode (-I).
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index b6472e463284..9ee96640744e 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -389,6 +389,19 @@ struct {
 Example:
  cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
 
+	HEADER_CLOCK_DATA = 29,
+
+	Contains clock id and its reference time together with wall clock
+	time taken at the 'same time', both values are in nanoseconds.
+	The format of data is as below.
+
+struct {
+	u32 version;  /* version = 1 */
+	u32 clockid;
+	u64 wall_clock_ns;
+	u64 clockid_time_ns;
+};
+
 	other bits are reserved and should ignored for now
 	HEADER_FEAT_BITS	= 256,
 
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 513633809c81..8137a6046a47 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -16,7 +16,7 @@ $(shell printf "" > $(OUTPUT).config-detected)
 detected     = $(shell echo "$(1)=y"       >> $(OUTPUT).config-detected)
 detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
 
-CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
+CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
 
 include $(srctree)/tools/scripts/Makefile.arch
 
@@ -501,6 +501,14 @@ ifndef NO_LIBELF
     CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
   endif
 
+  ifndef NO_LIBDEBUGINFOD
+    $(call feature_check,libdebuginfod)
+    ifeq ($(feature-libdebuginfod), 1)
+      CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT
+      EXTLIBS += -ldebuginfod
+    endif
+  endif
+
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
       msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 86dbb51bb272..6031167939ae 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -124,6 +124,8 @@ include ../scripts/utilities.mak
 #
 # Define LIBPFM4 to enable libpfm4 events extension.
 #
+# Define NO_LIBDEBUGINFOD if you do not want support debuginfod
+#
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -418,6 +420,7 @@ export INSTALL SHELL_PATH
 
 SHELL = $(SHELL_PATH)
 
+beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
 linux_uapi_dir := $(srctree)/tools/include/uapi/linux
 asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
 arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
@@ -501,6 +504,12 @@ socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
 $(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
 	$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
 
+socket_arrays := $(beauty_outdir)/socket_arrays.c
+socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
+
+$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
+	$(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
+
 vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
 vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
 vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -697,6 +706,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(kcmp_type_array) \
 	$(kvm_ioctl_array) \
 	$(socket_ipproto_array) \
+	$(socket_arrays) \
 	$(vhost_virtio_ioctl_array) \
 	$(madvise_behavior_array) \
 	$(mmap_flags_array) \
@@ -1006,6 +1016,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 		$(OUTPUT)$(kvm_ioctl_array) \
 		$(OUTPUT)$(kcmp_type_array) \
 		$(OUTPUT)$(socket_ipproto_array) \
+		$(OUTPUT)$(socket_arrays) \
 		$(OUTPUT)$(vhost_virtio_ioctl_array) \
 		$(OUTPUT)$(perf_ioctl_array) \
 		$(OUTPUT)$(prctl_option_array) \
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 28a5d0c18b1d..b187bddbd01a 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -57,17 +57,15 @@ struct auxtrace_record
 	struct evsel *evsel;
 	bool found_etm = false;
 	struct perf_pmu *found_spe = NULL;
-	static struct perf_pmu **arm_spe_pmus = NULL;
-	static int nr_spes = 0;
+	struct perf_pmu **arm_spe_pmus = NULL;
+	int nr_spes = 0;
 	int i = 0;
 
 	if (!evlist)
 		return NULL;
 
 	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
-
-	if (!arm_spe_pmus)
-		arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
+	arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
 
 	evlist__for_each_entry(evlist, evsel) {
 		if (cs_etm_pmu &&
@@ -84,6 +82,7 @@ struct auxtrace_record
 			}
 		}
 	}
+	free(arm_spe_pmus);
 
 	if (found_etm && found_spe) {
 		pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index cea5e33d61d2..cad7bf783413 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -243,10 +243,10 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
 	}
 
 	/*
-	 * No sink was provided on the command line - for _now_ treat
-	 * this as an error.
+	 * No sink was provided on the command line - allow the CoreSight
+	 * system to look for a default
 	 */
-	return ret;
+	return 0;
 }
 
 static int cs_etm_recording_options(struct auxtrace_record *itr,
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index b190f2eb2611..b168364ac050 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -32,7 +32,7 @@
 18	spu	oldstat				sys_ni_syscall
 19	common	lseek				sys_lseek			compat_sys_lseek
 20	common	getpid				sys_getpid
-21	nospu	mount				sys_mount			compat_sys_mount
+21	nospu	mount				sys_mount
 22	32	umount				sys_oldumount
 22	64	umount				sys_ni_syscall
 22	spu	umount				sys_ni_syscall
@@ -189,11 +189,11 @@
 142	common	_newselect			sys_select			compat_sys_select
 143	common	flock				sys_flock
 144	common	msync				sys_msync
-145	common	readv				sys_readv			compat_sys_readv
-146	common	writev				sys_writev			compat_sys_writev
+145	common	readv				sys_readv
+146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
-149	nospu	_sysctl				sys_sysctl			compat_sys_sysctl
+149	nospu	_sysctl				sys_ni_syscall
 150	common	mlock				sys_mlock
 151	common	munlock				sys_munlock
 152	common	mlockall			sys_mlockall
@@ -363,7 +363,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
@@ -443,8 +443,8 @@
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
-351	nospu	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-352	nospu	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+351	nospu	process_vm_readv		sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev
 353	nospu	finit_module			sys_finit_module
 354	nospu	kcmp				sys_kcmp
 355	common	sched_setattr			sys_sched_setattr
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index e18a3556f5e3..63f3ac91049f 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -64,7 +64,13 @@ static const char *reg_names[] = {
 	[PERF_REG_POWERPC_DAR] = "dar",
 	[PERF_REG_POWERPC_DSISR] = "dsisr",
 	[PERF_REG_POWERPC_SIER] = "sier",
-	[PERF_REG_POWERPC_MMCRA] = "mmcra"
+	[PERF_REG_POWERPC_MMCRA] = "mmcra",
+	[PERF_REG_POWERPC_MMCR0] = "mmcr0",
+	[PERF_REG_POWERPC_MMCR1] = "mmcr1",
+	[PERF_REG_POWERPC_MMCR2] = "mmcr2",
+	[PERF_REG_POWERPC_MMCR3] = "mmcr3",
+	[PERF_REG_POWERPC_SIER2] = "sier2",
+	[PERF_REG_POWERPC_SIER3] = "sier3",
 };
 
 static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
index 54cfa0530e86..488f4339b83c 100644
--- a/tools/perf/arch/powerpc/util/book3s_hcalls.h
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -84,7 +84,7 @@
 	{0x1a4, "H_CREATE_RPT"},				\
 	{0x1a8, "H_REMOVE_RPT"},				\
 	{0x1ac, "H_REGISTER_RPAGES"},				\
-	{0x1b0, "H_DISABLE_AND_GETC"},				\
+	{0x1b0, "H_DISABLE_AND_GET"},				\
 	{0x1b4, "H_ERROR_DATA"},				\
 	{0x1b8, "H_GET_HCA_INFO"},				\
 	{0x1bc, "H_GET_PERF_COUNT"},				\
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index d4870074f14c..1a950171a66f 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -7,17 +7,10 @@
 #include <string.h>
 #include <linux/stringify.h>
 #include "header.h"
+#include "utils_header.h"
 #include "metricgroup.h"
 #include <api/fs/fs.h>
 
-#define mfspr(rn)       ({unsigned long rval; \
-			 asm volatile("mfspr %0," __stringify(rn) \
-				      : "=r" (rval)); rval; })
-
-#define SPRN_PVR        0x11F	/* Processor Version Register */
-#define PVR_VER(pvr)    (((pvr) >>  16) & 0xFFFF) /* Version field */
-#define PVR_REV(pvr)    (((pvr) >>   0) & 0xFFFF) /* Revison field */
-
 int
 get_cpuid(char *buffer, size_t sz)
 {
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index 0a5242900248..2b6d4704e3aa 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -6,9 +6,16 @@
 
 #include "../../../util/perf_regs.h"
 #include "../../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/header.h"
+#include "../../../perf-sys.h"
+#include "utils_header.h"
 
 #include <linux/kernel.h>
 
+#define PVR_POWER9		0x004E
+#define PVR_POWER10		0x0080
+
 const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG(r0, PERF_REG_POWERPC_R0),
 	SMPL_REG(r1, PERF_REG_POWERPC_R1),
@@ -55,6 +62,12 @@ const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
 	SMPL_REG(sier, PERF_REG_POWERPC_SIER),
 	SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA),
+	SMPL_REG(mmcr0, PERF_REG_POWERPC_MMCR0),
+	SMPL_REG(mmcr1, PERF_REG_POWERPC_MMCR1),
+	SMPL_REG(mmcr2, PERF_REG_POWERPC_MMCR2),
+	SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3),
+	SMPL_REG(sier2, PERF_REG_POWERPC_SIER2),
+	SMPL_REG(sier3, PERF_REG_POWERPC_SIER3),
 	SMPL_REG_END
 };
 
@@ -163,3 +176,45 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
 
 	return SDT_ARG_VALID;
 }
+
+uint64_t arch__intr_reg_mask(void)
+{
+	struct perf_event_attr attr = {
+		.type                   = PERF_TYPE_HARDWARE,
+		.config                 = PERF_COUNT_HW_CPU_CYCLES,
+		.sample_type            = PERF_SAMPLE_REGS_INTR,
+		.precise_ip             = 1,
+		.disabled               = 1,
+		.exclude_kernel         = 1,
+	};
+	int fd;
+	u32 version;
+	u64 extended_mask = 0, mask = PERF_REGS_MASK;
+
+	/*
+	 * Get the PVR value to set the extended
+	 * mask specific to platform.
+	 */
+	version = (((mfspr(SPRN_PVR)) >>  16) & 0xFFFF);
+	if (version == PVR_POWER9)
+		extended_mask = PERF_REG_PMU_MASK_300;
+	else if (version == PVR_POWER10)
+		extended_mask = PERF_REG_PMU_MASK_31;
+	else
+		return mask;
+
+	attr.sample_regs_intr = extended_mask;
+	attr.sample_period = 1;
+	event_attr_init(&attr);
+
+	/*
+	 * check if the pmu supports perf extended regs, before
+	 * returning the register mask to sample.
+	 */
+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (fd != -1) {
+		close(fd);
+		mask |= extended_mask;
+	}
+	return mask;
+}
diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h
new file mode 100644
index 000000000000..5788eb1f1fe3
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/utils_header.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UTIL_HEADER_H
+#define __PERF_UTIL_HEADER_H
+
+#include <linux/stringify.h>
+
+#define mfspr(rn)       ({unsigned long rval; \
+			asm volatile("mfspr %0," __stringify(rn) \
+				: "=r" (rval)); rval; })
+
+#define SPRN_PVR        0x11F   /* Processor Version Register */
+#define PVR_VER(pvr)    (((pvr) >>  16) & 0xFFFF) /* Version field */
+#define PVR_REV(pvr)    (((pvr) >>   0) & 0xFFFF) /* Revison field */
+
+#endif /* __PERF_UTIL_HEADER_H */
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 56ae24b6e4be..d2fa9647ce25 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
 16   32		lchown			-				compat_sys_s390_lchown16
 19   common	lseek			sys_lseek			compat_sys_lseek
 20   common	getpid			sys_getpid			sys_getpid
-21   common	mount			sys_mount			compat_sys_mount
+21   common	mount			sys_mount
 22   common	umount			sys_oldumount			compat_sys_oldumount
 23   32		setuid			-				compat_sys_s390_setuid16
 24   32		getuid			-				compat_sys_s390_getuid16
@@ -134,11 +134,11 @@
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
 144  common	msync			sys_msync			compat_sys_msync
-145  common	readv			sys_readv			compat_sys_readv
-146  common	writev			sys_writev			compat_sys_writev
+145  common	readv			sys_readv
+146  common	writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
-149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+149  common	_sysctl			-				-
 150  common	mlock			sys_mlock			compat_sys_mlock
 151  common	munlock			sys_munlock			compat_sys_munlock
 152  common	mlockall		sys_mlockall			sys_mlockall
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			compat_sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				compat_sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			compat_sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
 337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
-340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
 343  common	kcmp			sys_kcmp			compat_sys_kcmp
 344  common	finit_module		sys_finit_module		compat_sys_finit_module
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index e008d638e641..347809649ba2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
 153	common	vhangup			sys_vhangup
 154	common	modify_ldt		sys_modify_ldt
 155	common	pivot_root		sys_pivot_root
-156	64	_sysctl			sys_sysctl
+156	64	_sysctl			sys_ni_syscall
 157	common	prctl			sys_prctl
 158	common	arch_prctl		sys_arch_prctl
 159	common	adjtimex		sys_adjtimex
@@ -357,6 +357,7 @@
 433	common	fspick			sys_fspick
 434	common	pidfd_open		sys_pidfd_open
 435	common	clone3			sys_clone3
+436	common	close_range		sys_close_range
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
@@ -370,8 +371,8 @@
 512	x32	rt_sigaction		compat_sys_rt_sigaction
 513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
-515	x32	readv			compat_sys_readv
-516	x32	writev			compat_sys_writev
+515	x32	readv			sys_readv
+516	x32	writev			sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
@@ -387,15 +388,15 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
 537	x32	recvmmsg		compat_sys_recvmmsg_time64
 538	x32	sendmmsg		compat_sys_sendmmsg
-539	x32	process_vm_readv	compat_sys_process_vm_readv
-540	x32	process_vm_writev	compat_sys_process_vm_writev
+539	x32	process_vm_readv	sys_process_vm_readv
+540	x32	process_vm_writev	sys_process_vm_writev
 541	x32	setsockopt		sys_setsockopt
 542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6ce451293634..082e5f2a415a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -837,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		}
 	}
 
+	if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel &&
+	    perf_can_record_text_poke_events() && perf_can_record_cpu_wide())
+		opts->text_poke = true;
+
 	if (intel_pt_evsel) {
 		/*
 		 * To obtain the auxtrace buffer file descriptor, the auxtrace
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 768e408757a0..878db6a59a41 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,5 +1,6 @@
 perf-y += sched-messaging.o
 perf-y += sched-pipe.o
+perf-y += syscall.o
 perf-y += mem-functions.o
 perf-y += futex-hash.o
 perf-y += futex-wake.o
@@ -10,8 +11,8 @@ perf-y += epoll-wait.o
 perf-y += epoll-ctl.o
 perf-y += synthesize.o
 perf-y += kallsyms-parse.o
+perf-y += find-bit-bench.o
 
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
 
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 61cae4966cae..2804812d4154 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,8 +33,10 @@ extern struct timeval bench__start, bench__end, bench__runtime;
 int bench_numa(int argc, const char **argv);
 int bench_sched_messaging(int argc, const char **argv);
 int bench_sched_pipe(int argc, const char **argv);
+int bench_syscall_basic(int argc, const char **argv);
 int bench_mem_memcpy(int argc, const char **argv);
 int bench_mem_memset(int argc, const char **argv);
+int bench_mem_find_bit(int argc, const char **argv);
 int bench_futex_hash(int argc, const char **argv);
 int bench_futex_wake(int argc, const char **argv);
 int bench_futex_wake_parallel(int argc, const char **argv);
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
new file mode 100644
index 000000000000..73b5bcc5946a
--- /dev/null
+++ b/tools/perf/bench/find-bit-bench.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark find_next_bit and related bit operations.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int outer_iterations = 5;
+static unsigned int inner_iterations = 100000;
+
+static const struct option options[] = {
+	OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
+		"Number of outer iterations used"),
+	OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
+		"Number of inner iterations used"),
+	OPT_END()
+};
+
+static const char *const bench_usage[] = {
+	"perf bench mem find_bit <options>",
+	NULL
+};
+
+static unsigned int accumulator;
+static unsigned int use_of_val;
+
+static noinline void workload(int val)
+{
+	use_of_val += val;
+	accumulator++;
+}
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+static bool asm_test_bit(long nr, const unsigned long *addr)
+{
+	bool oldbit;
+
+	asm volatile("bt %2,%1"
+		     : "=@ccc" (oldbit)
+		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+#else
+#define asm_test_bit test_bit
+#endif
+
+static int do_for_each_set_bit(unsigned int num_bits)
+{
+	unsigned long *to_test = bitmap_alloc(num_bits);
+	struct timeval start, end, diff;
+	u64 runtime_us;
+	struct stats fb_time_stats, tb_time_stats;
+	double time_average, time_stddev;
+	unsigned int bit, i, j;
+	unsigned int set_bits, skip;
+	unsigned int old;
+
+	init_stats(&fb_time_stats);
+	init_stats(&tb_time_stats);
+
+	for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
+		bitmap_zero(to_test, num_bits);
+		skip = num_bits / set_bits;
+		for (i = 0; i < num_bits; i += skip)
+			set_bit(i, to_test);
+
+		for (i = 0; i < outer_iterations; i++) {
+			old = accumulator;
+			gettimeofday(&start, NULL);
+			for (j = 0; j < inner_iterations; j++) {
+				for_each_set_bit(bit, to_test, num_bits)
+					workload(bit);
+			}
+			gettimeofday(&end, NULL);
+			assert(old + (inner_iterations * set_bits) == accumulator);
+			timersub(&end, &start, &diff);
+			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+			update_stats(&fb_time_stats, runtime_us);
+
+			old = accumulator;
+			gettimeofday(&start, NULL);
+			for (j = 0; j < inner_iterations; j++) {
+				for (bit = 0; bit < num_bits; bit++) {
+					if (asm_test_bit(bit, to_test))
+						workload(bit);
+				}
+			}
+			gettimeofday(&end, NULL);
+			assert(old + (inner_iterations * set_bits) == accumulator);
+			timersub(&end, &start, &diff);
+			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+			update_stats(&tb_time_stats, runtime_us);
+		}
+
+		printf("%d operations %d bits set of %d bits\n",
+			inner_iterations, set_bits, num_bits);
+		time_average = avg_stats(&fb_time_stats);
+		time_stddev = stddev_stats(&fb_time_stats);
+		printf("  Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
+			time_average, time_stddev);
+		time_average = avg_stats(&tb_time_stats);
+		time_stddev = stddev_stats(&tb_time_stats);
+		printf("  Average test_bit loop took:    %.3f usec (+- %.3f usec)\n",
+			time_average, time_stddev);
+
+		if (use_of_val == accumulator)  /* Try to avoid compiler tricks. */
+			printf("\n");
+	}
+	bitmap_free(to_test);
+	return 0;
+}
+
+int bench_mem_find_bit(int argc, const char **argv)
+{
+	int err = 0, i;
+
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	for (i = 1; i <= 2048; i <<= 1)
+		do_for_each_set_bit(i);
+
+	return err;
+}
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9235b76501be..19d45c377ac1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
 	return 0;
 }
 
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
 {
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	memcpy_t fn = r->fn.memcpy;
-	int i;
-
 	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
 	memset(src, 0, size);
 
@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
 	 * to not measure page fault overhead:
 	 */
 	fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memcpy_t fn = r->fn.memcpy;
+	int i;
+
+	memcpy_prefault(fn, size, src, dst);
 
 	cycle_start = get_cycles();
 	for (i = 0; i < nr_loops; ++i)
@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
 	memcpy_t fn = r->fn.memcpy;
 	int i;
 
-	/*
-	 * We prefault the freshly allocated memory range here,
-	 * to not measure page fault overhead:
-	 */
-	fn(dst, src, size);
+	memcpy_prefault(fn, size, src, dst);
 
 	BUG_ON(gettimeofday(&tv_start, NULL));
 	for (i = 0; i < nr_loops; ++i)
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734dde84..000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
-	for (; len; --len, to++, from++) {
-		/*
-		 * Call the assembly routine back directly since
-		 * memcpy_mcsafe() may silently fallback to memcpy.
-		 */
-		unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
-		if (rem)
-			break;
-	}
-	return len;
-}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5797253b9700..f85bceccc459 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -247,17 +247,22 @@ static int is_node_present(int node)
  */
 static bool node_has_cpus(int node)
 {
-	struct bitmask *cpu = numa_allocate_cpumask();
-	unsigned int i;
+	struct bitmask *cpumask = numa_allocate_cpumask();
+	bool ret = false; /* fall back to nocpus */
+	int cpu;
 
-	if (cpu && !numa_node_to_cpus(node, cpu)) {
-		for (i = 0; i < cpu->size; i++) {
-			if (numa_bitmask_isbitset(cpu, i))
-				return true;
+	BUG_ON(!cpumask);
+	if (!numa_node_to_cpus(node, cpumask)) {
+		for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+			if (numa_bitmask_isbitset(cpumask, cpu)) {
+				ret = true;
+				break;
+			}
 		}
 	}
+	numa_free_cpumask(cpumask);
 
-	return false; /* lets fall back to nocpus safely */
+	return ret;
 }
 
 static cpu_set_t bind_to_cpu(int target_cpu)
@@ -288,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu)
 
 static cpu_set_t bind_to_node(int target_node)
 {
-	int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
 	cpu_set_t orig_mask, mask;
 	int cpu;
 	int ret;
 
-	BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
-	BUG_ON(!cpus_per_node);
-
 	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
 	BUG_ON(ret);
 
@@ -305,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node)
 		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
 			CPU_SET(cpu, &mask);
 	} else {
-		int cpu_start = (target_node + 0) * cpus_per_node;
-		int cpu_stop  = (target_node + 1) * cpus_per_node;
+		struct bitmask *cpumask = numa_allocate_cpumask();
 
-		BUG_ON(cpu_stop > g->p.nr_cpus);
-
-		for (cpu = cpu_start; cpu < cpu_stop; cpu++)
-			CPU_SET(cpu, &mask);
+		BUG_ON(!cpumask);
+		if (!numa_node_to_cpus(target_node, cpumask)) {
+			for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+				if (numa_bitmask_isbitset(cpumask, cpu))
+					CPU_SET(cpu, &mask);
+			}
+		}
+		numa_free_cpumask(cpumask);
 	}
 
 	ret = sched_setaffinity(0, sizeof(mask), &mask);
@@ -729,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
 		return -1;
 
 	return parse_node_list(arg);
-
-	return 0;
 }
 
 #define BIT(x) (1ul << x)
@@ -813,12 +815,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
 			}
 		}
 	} else if (!g->p.data_backwards || (nr + loop) & 1) {
+		/* Process data forwards: */
 
 		d0 = data + off;
 		d  = data + off + 1;
 		d1 = data + words;
 
-		/* Process data forwards: */
 		for (;;) {
 			if (unlikely(d >= d1))
 				d = data;
@@ -836,7 +838,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
 		d  = data + off - 1;
 		d1 = data + words;
 
-		/* Process data forwards: */
 		for (;;) {
 			if (unlikely(d < data))
 				d = data + words-1;
@@ -1733,12 +1734,12 @@ err:
  */
 static const char *tests[][MAX_ARGS] = {
    /* Basic single-stream NUMA bandwidth measurements: */
-   { "RAM-bw-local,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
+   { "RAM-bw-local,",     "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
 			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM },
    { "RAM-bw-local-NOTHP,",
 			  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
 			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM_NOTHP },
-   { "RAM-bw-remote,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
+   { "RAM-bw-remote,",    "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
 			  "-C" ,   "0", "-M",   "1", OPT_BW_RAM },
 
    /* 2-stream NUMA bandwidth measurements: */
@@ -1755,7 +1756,7 @@ static const char *tests[][MAX_ARGS] = {
    { " 1x3-convergence,", "mem",  "-p",  "1", "-t",  "3", "-P",  "512", OPT_CONV },
    { " 1x4-convergence,", "mem",  "-p",  "1", "-t",  "4", "-P",  "512", OPT_CONV },
    { " 1x6-convergence,", "mem",  "-p",  "1", "-t",  "6", "-P", "1020", OPT_CONV },
-   { " 2x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
+   { " 2x3-convergence,", "mem",  "-p",  "2", "-t",  "3", "-P", "1020", OPT_CONV },
    { " 3x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
    { " 4x4-convergence,", "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV },
    { " 4x4-convergence-NOTHP,",
@@ -1780,24 +1781,24 @@ static const char *tests[][MAX_ARGS] = {
 			  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW_NOTHP },
    { "16x1-bw-process,",  "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_BW },
 
-   { " 4x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "4", "-T",  "256", OPT_BW },
-   { " 8x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "8", "-T",  "256", OPT_BW },
-   { "16x1-bw-thread,",   "mem",  "-p",  "1", "-t", "16", "-T",  "128", OPT_BW },
-   { "32x1-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-T",   "64", OPT_BW },
+   { " 1x4-bw-thread,",   "mem",  "-p",  "1", "-t",  "4", "-T",  "256", OPT_BW },
+   { " 1x8-bw-thread,",   "mem",  "-p",  "1", "-t",  "8", "-T",  "256", OPT_BW },
+   { "1x16-bw-thread,",   "mem",  "-p",  "1", "-t", "16", "-T",  "128", OPT_BW },
+   { "1x32-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-T",   "64", OPT_BW },
 
-   { " 2x3-bw-thread,",	  "mem",  "-p",  "2", "-t",  "3", "-P",  "512", OPT_BW },
-   { " 4x4-bw-thread,",	  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_BW },
-   { " 4x6-bw-thread,",	  "mem",  "-p",  "4", "-t",  "6", "-P",  "512", OPT_BW },
-   { " 4x8-bw-thread,",	  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW },
-   { " 4x8-bw-thread-NOTHP,",
+   { " 2x3-bw-process,",  "mem",  "-p",  "2", "-t",  "3", "-P",  "512", OPT_BW },
+   { " 4x4-bw-process,",  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_BW },
+   { " 4x6-bw-process,",  "mem",  "-p",  "4", "-t",  "6", "-P",  "512", OPT_BW },
+   { " 4x8-bw-process,",  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW },
+   { " 4x8-bw-process-NOTHP,",
 			  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW_NOTHP },
-   { " 3x3-bw-thread,",	  "mem",  "-p",  "3", "-t",  "3", "-P",  "512", OPT_BW },
-   { " 5x5-bw-thread,",	  "mem",  "-p",  "5", "-t",  "5", "-P",  "512", OPT_BW },
+   { " 3x3-bw-process,",  "mem",  "-p",  "3", "-t",  "3", "-P",  "512", OPT_BW },
+   { " 5x5-bw-process,",  "mem",  "-p",  "5", "-t",  "5", "-P",  "512", OPT_BW },
 
-   { "2x16-bw-thread,",   "mem",  "-p",  "2", "-t", "16", "-P",  "512", OPT_BW },
-   { "1x32-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-P", "2048", OPT_BW },
+   { "2x16-bw-process,",  "mem",  "-p",  "2", "-t", "16", "-P",  "512", OPT_BW },
+   { "1x32-bw-process,",  "mem",  "-p",  "1", "-t", "32", "-P", "2048", OPT_BW },
 
-   { "numa02-bw,",	  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW },
+   { "numa02-bw,",        "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW },
    { "numa02-bw-NOTHP,",  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW_NOTHP },
    { "numa01-bw-thread,", "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW },
    { "numa01-bw-thread-NOTHP,",
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 71d830d7b923..cecce93ccc63 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -66,11 +66,10 @@ static void fdpair(int fds[2])
 /* Block until we're ready to go */
 static void ready(int ready_out, int wakefd)
 {
-	char dummy;
 	struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
 
 	/* Tell them we're ready. */
-	if (write(ready_out, &dummy, 1) != 1)
+	if (write(ready_out, "R", 1) != 1)
 		err(EXIT_FAILURE, "CLIENT: ready write");
 
 	/* Wait for "GO" signal */
@@ -85,6 +84,7 @@ static void *sender(struct sender_context *ctx)
 	unsigned int i, j;
 
 	ready(ctx->ready_out, ctx->wakefd);
+	memset(data, 'S', sizeof(data));
 
 	/* Now pump to every receiver. */
 	for (i = 0; i < nr_loops; i++) {
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index 8d624aea1c5e..b2924e3181dc 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -162,8 +162,8 @@ static int do_run_multi_threaded(struct target *target,
 	init_stats(&event_stats);
 	for (i = 0; i < multi_iterations; i++) {
 		session = perf_session__new(NULL, false, NULL);
-		if (!session)
-			return -ENOMEM;
+		if (IS_ERR(session))
+			return PTR_ERR(session);
 
 		atomic_set(&event_count, 0);
 		gettimeofday(&start, NULL);
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
new file mode 100644
index 000000000000..5fe621cff8e9
--- /dev/null
+++ b/tools/perf/bench/syscall.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * syscall.c
+ *
+ * syscall: Benchmark for system call performance
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 10000000
+static	int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+	OPT_INTEGER('l', "loop",	&loops,		"Specify number of loops"),
+	OPT_END()
+};
+
+static const char * const bench_syscall_usage[] = {
+	"perf bench syscall <options>",
+	NULL
+};
+
+int bench_syscall_basic(int argc, const char **argv)
+{
+	struct timeval start, stop, diff;
+	unsigned long long result_usec = 0;
+	int i;
+
+	argc = parse_options(argc, argv, options, bench_syscall_usage, 0);
+
+	gettimeofday(&start, NULL);
+
+	for (i = 0; i < loops; i++)
+		getppid();
+
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start, &diff);
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# Executed %'d getppid() calls\n", loops);
+
+		result_usec = diff.tv_sec * 1000000;
+		result_usec += diff.tv_usec;
+
+		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec/1000));
+
+		printf(" %14lf usecs/op\n",
+		       (double)result_usec / (double)loops);
+		printf(" %'14d ops/sec\n",
+		       (int)((double)loops /
+			     ((double)result_usec / (double)1000000)));
+		break;
+
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / 1000));
+		break;
+
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index cad31b1d3438..4f176039fc8f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -11,6 +11,7 @@
  * Available benchmark collection list:
  *
  *  sched ... scheduler and IPC performance
+ *  syscall ... System call performance
  *  mem   ... memory access performance
  *  numa  ... NUMA scheduling and MM performance
  *  futex ... Futex performance
@@ -49,9 +50,16 @@ static struct bench sched_benchmarks[] = {
 	{ NULL,		NULL,						NULL			}
 };
 
+static struct bench syscall_benchmarks[] = {
+	{ "basic",	"Benchmark for basic getppid(2) calls",		bench_syscall_basic	},
+	{ "all",	"Run all syscall benchmarks",			NULL			},
+	{ NULL,		NULL,						NULL			},
+};
+
 static struct bench mem_benchmarks[] = {
 	{ "memcpy",	"Benchmark for memcpy() functions",		bench_mem_memcpy	},
 	{ "memset",	"Benchmark for memset() functions",		bench_mem_memset	},
+	{ "find_bit",	"Benchmark for find_bit() functions",		bench_mem_find_bit	},
 	{ "all",	"Run all memory access benchmarks",		NULL			},
 	{ NULL,		NULL,						NULL			}
 };
@@ -90,6 +98,7 @@ struct collection {
 
 static struct collection collections[] = {
 	{ "sched",	"Scheduler and IPC benchmarks",			sched_benchmarks	},
+	{ "syscall",	"System call benchmarks",			syscall_benchmarks	},
 	{ "mem",	"Memory access benchmarks",			mem_benchmarks		},
 #ifdef HAVE_LIBNUMA_SUPPORT
 	{ "numa",	"NUMA scheduling and MM benchmarks",		numa_benchmarks		},
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index d617d5682c68..5938b100eaf4 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2582,7 +2582,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 
 static int setup_callchain(struct evlist *evlist)
 {
-	u64 sample_type = perf_evlist__combined_sample_type(evlist);
+	u64 sample_type = evlist__combined_sample_type(evlist);
 	enum perf_call_graph_mode mode = CALLCHAIN_NONE;
 
 	if ((sample_type & PERF_SAMPLE_REGS_USER) &&
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index ca2fb44874e4..8d23b8d6ee8e 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -65,6 +65,7 @@ static int cmd_data_convert(int argc, const char **argv)
 		OPT_STRING('i', "input", &input_name, "file", "input file name"),
 #ifdef HAVE_LIBBABELTRACE_SUPPORT
 		OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
+		OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
 #endif
 		OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"),
 		OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"),
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 2bfc1b0db536..1d44bc2f63d8 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -3,6 +3,7 @@
  * builtin-ftrace.c
  *
  * Copyright (c) 2013  LG Electronics,  Namhyung Kim <namhyung@kernel.org>
+ * Copyright (c) 2020  Changbin Du <changbin.du@gmail.com>, significant enhancement.
  */
 
 #include "builtin.h"
@@ -26,6 +27,8 @@
 #include "thread_map.h"
 #include "util/cap.h"
 #include "util/config.h"
+#include "util/units.h"
+#include "util/parse-sublevel-options.h"
 
 #define DEFAULT_TRACER  "function_graph"
 
@@ -33,11 +36,21 @@ struct perf_ftrace {
 	struct evlist		*evlist;
 	struct target		target;
 	const char		*tracer;
+	bool			list_avail_functions;
 	struct list_head	filters;
 	struct list_head	notrace;
 	struct list_head	graph_funcs;
 	struct list_head	nograph_funcs;
 	int			graph_depth;
+	unsigned long		percpu_buffer_size;
+	bool			inherit;
+	int			func_stack_trace;
+	int			func_irq_info;
+	int			graph_nosleep_time;
+	int			graph_noirqs;
+	int			graph_verbose;
+	int			graph_thresh;
+	unsigned int		initial_delay;
 };
 
 struct filter_entry {
@@ -128,9 +141,85 @@ static int append_tracing_file(const char *name, const char *val)
 	return __write_tracing_file(name, val, true);
 }
 
+static int read_tracing_file_to_stdout(const char *name)
+{
+	char buf[4096];
+	char *file;
+	int fd;
+	int ret = -1;
+
+	file = get_tracing_file(name);
+	if (!file) {
+		pr_debug("cannot get tracing file: %s\n", name);
+		return -1;
+	}
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0) {
+		pr_debug("cannot open tracing file: %s: %s\n",
+			 name, str_error_r(errno, buf, sizeof(buf)));
+		goto out;
+	}
+
+	/* read contents to stdout */
+	while (true) {
+		int n = read(fd, buf, sizeof(buf));
+		if (n == 0)
+			break;
+		else if (n < 0)
+			goto out_close;
+
+		if (fwrite(buf, n, 1, stdout) != 1)
+			goto out_close;
+	}
+	ret = 0;
+
+out_close:
+	close(fd);
+out:
+	put_tracing_file(file);
+	return ret;
+}
+
+static int write_tracing_file_int(const char *name, int value)
+{
+	char buf[16];
+
+	snprintf(buf, sizeof(buf), "%d", value);
+	if (write_tracing_file(name, buf) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int write_tracing_option_file(const char *name, const char *val)
+{
+	char *file;
+	int ret;
+
+	if (asprintf(&file, "options/%s", name) < 0)
+		return -1;
+
+	ret = __write_tracing_file(file, val, false);
+	free(file);
+	return ret;
+}
+
 static int reset_tracing_cpu(void);
 static void reset_tracing_filters(void);
 
+static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
+{
+	write_tracing_option_file("function-fork", "0");
+	write_tracing_option_file("func_stack_trace", "0");
+	write_tracing_option_file("sleep-time", "1");
+	write_tracing_option_file("funcgraph-irqs", "1");
+	write_tracing_option_file("funcgraph-proc", "0");
+	write_tracing_option_file("funcgraph-abstime", "0");
+	write_tracing_option_file("latency-format", "0");
+	write_tracing_option_file("irq-info", "0");
+}
+
 static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
 {
 	if (write_tracing_file("tracing_on", "0") < 0)
@@ -148,7 +237,11 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
 	if (write_tracing_file("max_graph_depth", "0") < 0)
 		return -1;
 
+	if (write_tracing_file("tracing_thresh", "0") < 0)
+		return -1;
+
 	reset_tracing_filters();
+	reset_tracing_options(ftrace);
 	return 0;
 }
 
@@ -204,6 +297,28 @@ static int set_tracing_cpu(struct perf_ftrace *ftrace)
 	return set_tracing_cpumask(cpumap);
 }
 
+static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace)
+{
+	if (!ftrace->func_stack_trace)
+		return 0;
+
+	if (write_tracing_option_file("func_stack_trace", "1") < 0)
+		return -1;
+
+	return 0;
+}
+
+static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
+{
+	if (!ftrace->func_irq_info)
+		return 0;
+
+	if (write_tracing_option_file("irq-info", "1") < 0)
+		return -1;
+
+	return 0;
+}
+
 static int reset_tracing_cpu(void)
 {
 	struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
@@ -258,8 +373,6 @@ static void reset_tracing_filters(void)
 
 static int set_tracing_depth(struct perf_ftrace *ftrace)
 {
-	char buf[16];
-
 	if (ftrace->graph_depth == 0)
 		return 0;
 
@@ -268,10 +381,152 @@ static int set_tracing_depth(struct perf_ftrace *ftrace)
 		return -1;
 	}
 
-	snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth);
+	if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace)
+{
+	int ret;
+
+	if (ftrace->percpu_buffer_size == 0)
+		return 0;
+
+	ret = write_tracing_file_int("buffer_size_kb",
+				     ftrace->percpu_buffer_size / 1024);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int set_tracing_trace_inherit(struct perf_ftrace *ftrace)
+{
+	if (!ftrace->inherit)
+		return 0;
+
+	if (write_tracing_option_file("function-fork", "1") < 0)
+		return -1;
+
+	return 0;
+}
+
+static int set_tracing_sleep_time(struct perf_ftrace *ftrace)
+{
+	if (!ftrace->graph_nosleep_time)
+		return 0;
+
+	if (write_tracing_option_file("sleep-time", "0") < 0)
+		return -1;
+
+	return 0;
+}
+
+static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace)
+{
+	if (!ftrace->graph_noirqs)
+		return 0;
+
+	if (write_tracing_option_file("funcgraph-irqs", "0") < 0)
+		return -1;
+
+	return 0;
+}
+
+static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
+{
+	if (!ftrace->graph_verbose)
+		return 0;
+
+	if (write_tracing_option_file("funcgraph-proc", "1") < 0)
+		return -1;
+
+	if (write_tracing_option_file("funcgraph-abstime", "1") < 0)
+		return -1;
+
+	if (write_tracing_option_file("latency-format", "1") < 0)
+		return -1;
+
+	return 0;
+}
+
+static int set_tracing_thresh(struct perf_ftrace *ftrace)
+{
+	int ret;
+
+	if (ftrace->graph_thresh == 0)
+		return 0;
+
+	ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int set_tracing_options(struct perf_ftrace *ftrace)
+{
+	if (set_tracing_pid(ftrace) < 0) {
+		pr_err("failed to set ftrace pid\n");
+		return -1;
+	}
+
+	if (set_tracing_cpu(ftrace) < 0) {
+		pr_err("failed to set tracing cpumask\n");
+		return -1;
+	}
+
+	if (set_tracing_func_stack_trace(ftrace) < 0) {
+		pr_err("failed to set tracing option func_stack_trace\n");
+		return -1;
+	}
+
+	if (set_tracing_func_irqinfo(ftrace) < 0) {
+		pr_err("failed to set tracing option irq-info\n");
+		return -1;
+	}
+
+	if (set_tracing_filters(ftrace) < 0) {
+		pr_err("failed to set tracing filters\n");
+		return -1;
+	}
+
+	if (set_tracing_depth(ftrace) < 0) {
+		pr_err("failed to set graph depth\n");
+		return -1;
+	}
+
+	if (set_tracing_percpu_buffer_size(ftrace) < 0) {
+		pr_err("failed to set tracing per-cpu buffer size\n");
+		return -1;
+	}
+
+	if (set_tracing_trace_inherit(ftrace) < 0) {
+		pr_err("failed to set tracing option function-fork\n");
+		return -1;
+	}
+
+	if (set_tracing_sleep_time(ftrace) < 0) {
+		pr_err("failed to set tracing option sleep-time\n");
+		return -1;
+	}
+
+	if (set_tracing_funcgraph_irqs(ftrace) < 0) {
+		pr_err("failed to set tracing option funcgraph-irqs\n");
+		return -1;
+	}
 
-	if (write_tracing_file("max_graph_depth", buf) < 0)
+	if (set_tracing_funcgraph_verbose(ftrace) < 0) {
+		pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n");
 		return -1;
+	}
+
+	if (set_tracing_thresh(ftrace) < 0) {
+		pr_err("failed to set tracing thresh\n");
+		return -1;
+	}
 
 	return 0;
 }
@@ -302,6 +557,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 	signal(SIGCHLD, sig_handler);
 	signal(SIGPIPE, sig_handler);
 
+	if (ftrace->list_avail_functions)
+		return read_tracing_file_to_stdout("available_filter_functions");
+
 	if (reset_tracing_files(ftrace) < 0) {
 		pr_err("failed to reset ftrace\n");
 		goto out;
@@ -317,25 +575,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 		goto out;
 	}
 
-	if (set_tracing_pid(ftrace) < 0) {
-		pr_err("failed to set ftrace pid\n");
+	if (set_tracing_options(ftrace) < 0)
 		goto out_reset;
-	}
-
-	if (set_tracing_cpu(ftrace) < 0) {
-		pr_err("failed to set tracing cpumask\n");
-		goto out_reset;
-	}
-
-	if (set_tracing_filters(ftrace) < 0) {
-		pr_err("failed to set tracing filters\n");
-		goto out_reset;
-	}
-
-	if (set_tracing_depth(ftrace) < 0) {
-		pr_err("failed to set graph depth\n");
-		goto out_reset;
-	}
 
 	if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
 		pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
@@ -362,13 +603,26 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
 	pollfd.fd = trace_fd;
 
-	if (write_tracing_file("tracing_on", "1") < 0) {
-		pr_err("can't enable tracing\n");
-		goto out_close_fd;
+	/* display column headers */
+	read_tracing_file_to_stdout("trace");
+
+	if (!ftrace->initial_delay) {
+		if (write_tracing_file("tracing_on", "1") < 0) {
+			pr_err("can't enable tracing\n");
+			goto out_close_fd;
+		}
 	}
 
 	perf_evlist__start_workload(ftrace->evlist);
 
+	if (ftrace->initial_delay) {
+		usleep(ftrace->initial_delay * 1000);
+		if (write_tracing_file("tracing_on", "1") < 0) {
+			pr_err("can't enable tracing\n");
+			goto out_close_fd;
+		}
+	}
+
 	while (!done) {
 		if (poll(&pollfd, 1, -1) < 0)
 			break;
@@ -455,6 +709,99 @@ static void delete_filter_func(struct list_head *head)
 	}
 }
 
+static int parse_buffer_size(const struct option *opt,
+			     const char *str, int unset)
+{
+	unsigned long *s = (unsigned long *)opt->value;
+	static struct parse_tag tags_size[] = {
+		{ .tag  = 'B', .mult = 1       },
+		{ .tag  = 'K', .mult = 1 << 10 },
+		{ .tag  = 'M', .mult = 1 << 20 },
+		{ .tag  = 'G', .mult = 1 << 30 },
+		{ .tag  = 0 },
+	};
+	unsigned long val;
+
+	if (unset) {
+		*s = 0;
+		return 0;
+	}
+
+	val = parse_tag_value(str, tags_size);
+	if (val != (unsigned long) -1) {
+		if (val < 1024) {
+			pr_err("buffer size too small, must larger than 1KB.");
+			return -1;
+		}
+		*s = val;
+		return 0;
+	}
+
+	return -1;
+}
+
+static int parse_func_tracer_opts(const struct option *opt,
+				  const char *str, int unset)
+{
+	int ret;
+	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
+	struct sublevel_option func_tracer_opts[] = {
+		{ .name = "call-graph",	.value_ptr = &ftrace->func_stack_trace },
+		{ .name = "irq-info",	.value_ptr = &ftrace->func_irq_info },
+		{ .name = NULL, }
+	};
+
+	if (unset)
+		return 0;
+
+	ret = perf_parse_sublevel_options(str, func_tracer_opts);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int parse_graph_tracer_opts(const struct option *opt,
+				  const char *str, int unset)
+{
+	int ret;
+	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
+	struct sublevel_option graph_tracer_opts[] = {
+		{ .name = "nosleep-time",	.value_ptr = &ftrace->graph_nosleep_time },
+		{ .name = "noirqs",		.value_ptr = &ftrace->graph_noirqs },
+		{ .name = "verbose",		.value_ptr = &ftrace->graph_verbose },
+		{ .name = "thresh",		.value_ptr = &ftrace->graph_thresh },
+		{ .name = "depth",		.value_ptr = &ftrace->graph_depth },
+		{ .name = NULL, }
+	};
+
+	if (unset)
+		return 0;
+
+	ret = perf_parse_sublevel_options(str, graph_tracer_opts);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void select_tracer(struct perf_ftrace *ftrace)
+{
+	bool graph = !list_empty(&ftrace->graph_funcs) ||
+		     !list_empty(&ftrace->nograph_funcs);
+	bool func = !list_empty(&ftrace->filters) ||
+		    !list_empty(&ftrace->notrace);
+
+	/* The function_graph has priority over function tracer. */
+	if (graph)
+		ftrace->tracer = "function_graph";
+	else if (func)
+		ftrace->tracer = "function";
+	/* Otherwise, the default tracer is used. */
+
+	pr_debug("%s tracer is used\n", ftrace->tracer);
+}
+
 int cmd_ftrace(int argc, const char **argv)
 {
 	int ret;
@@ -469,25 +816,42 @@ int cmd_ftrace(int argc, const char **argv)
 	};
 	const struct option ftrace_options[] = {
 	OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
-		   "tracer to use: function_graph(default) or function"),
+		   "Tracer to use: function_graph(default) or function"),
+	OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions,
+		    "Show available functions to filter"),
 	OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
-		   "trace on existing process id"),
+		   "Trace on existing process id"),
+	/* TODO: Add short option -t after -t/--tracer can be removed. */
+	OPT_STRING(0, "tid", &ftrace.target.tid, "tid",
+		   "Trace on existing thread id (exclusive to --pid)"),
 	OPT_INCR('v', "verbose", &verbose,
-		 "be more verbose"),
+		 "Be more verbose"),
 	OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
-		    "system-wide collection from all CPUs"),
+		    "System-wide collection from all CPUs"),
 	OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
-		    "list of cpus to monitor"),
+		    "List of cpus to monitor"),
 	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
-		     "trace given functions only", parse_filter_func),
+		     "Trace given functions using function tracer",
+		     parse_filter_func),
 	OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
-		     "do not trace given functions", parse_filter_func),
+		     "Do not trace given functions", parse_filter_func),
+	OPT_CALLBACK(0, "func-opts", &ftrace, "options",
+		     "Function tracer options, available options: call-graph,irq-info",
+		     parse_func_tracer_opts),
 	OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
-		     "Set graph filter on given functions", parse_filter_func),
+		     "Trace given functions using function_graph tracer",
+		     parse_filter_func),
 	OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
 		     "Set nograph filter on given functions", parse_filter_func),
-	OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth,
-		    "Max depth for function graph tracer"),
+	OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
+		     "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=<n>,depth=<n>",
+		     parse_graph_tracer_opts),
+	OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
+		     "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
+	OPT_BOOLEAN(0, "inherit", &ftrace.inherit,
+		    "Trace children processes"),
+	OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
+		     "Number of milliseconds to wait before starting tracing after program start"),
 	OPT_END()
 	};
 
@@ -505,6 +869,8 @@ int cmd_ftrace(int argc, const char **argv)
 	if (!argc && target__none(&ftrace.target))
 		ftrace.target.system_wide = true;
 
+	select_tracer(&ftrace);
+
 	ret = target__validate(&ftrace.target);
 	if (ret) {
 		char errbuf[512];
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 4a6de4b03ac0..6d2f410d773a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -292,7 +292,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
 	 * if jit marker, then inject jit mmaps and generate ELF images
 	 */
 	ret = jit_process(inject->session, &inject->output, machine,
-			  event->mmap.filename, sample->pid, &n);
+			  event->mmap.filename, event->mmap.pid, &n);
 	if (ret < 0)
 		return ret;
 	if (ret) {
@@ -330,7 +330,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
 	 * if jit marker, then inject jit mmaps and generate ELF images
 	 */
 	ret = jit_process(inject->session, &inject->output, machine,
-			  event->mmap2.filename, sample->pid, &n);
+			  event->mmap2.filename, event->mmap2.pid, &n);
 	if (ret < 0)
 		return ret;
 	if (ret) {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 38a5ab683ebc..a50dae2c4ae9 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1933,7 +1933,8 @@ int cmd_kmem(int argc, const char **argv)
 		return ret;
 
 	argc = parse_options_subcommand(argc, argv, kmem_options,
-					kmem_subcommands, kmem_usage, 0);
+					kmem_subcommands, kmem_usage,
+					PARSE_OPT_STOP_AT_NON_OPTION);
 
 	if (!argc)
 		usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 95a77058023e..460945ded6dd 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1319,7 +1319,7 @@ static struct evlist *kvm_live_event_list(void)
 		*name = '\0';
 		name++;
 
-		if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+		if (evlist__add_newtp(evlist, sys, name, NULL)) {
 			pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
 			free(tp);
 			goto out;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a37e7910e9e9..772f1057647f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -46,6 +46,7 @@
 #include "util/bpf-event.h"
 #include "util/util.h"
 #include "util/pfm.h"
+#include "util/clockid.h"
 #include "asm/bug.h"
 #include "perf.h"
 
@@ -70,6 +71,7 @@
 #include <linux/time64.h>
 #include <linux/zalloc.h>
 #include <linux/bitmap.h>
+#include <sys/time.h>
 
 struct switch_output {
 	bool		 enabled;
@@ -765,6 +767,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
 
 #endif
 
+static int record__config_text_poke(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	int err;
+
+	/* Nothing to do if text poke is already configured */
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.text_poke)
+			return 0;
+	}
+
+	err = parse_events(evlist, "dummy:u", NULL);
+	if (err)
+		return err;
+
+	evsel = evlist__last(evlist);
+
+	evsel->core.attr.freq = 0;
+	evsel->core.attr.sample_period = 1;
+	evsel->core.attr.text_poke = 1;
+	evsel->core.attr.ksymbol = 1;
+
+	evsel->core.system_wide = true;
+	evsel->no_aux_samples = true;
+	evsel->immediate = true;
+
+	/* Text poke must be collected on all CPUs */
+	perf_cpu_map__put(evsel->core.own_cpus);
+	evsel->core.own_cpus = perf_cpu_map__new(NULL);
+	perf_cpu_map__put(evsel->core.cpus);
+	evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
+
+	evsel__set_sample_bit(evsel, TIME);
+
+	return 0;
+}
+
 static bool record__kcore_readable(struct machine *machine)
 {
 	char kcore[PATH_MAX];
@@ -855,7 +894,7 @@ static int record__open(struct record *rec)
 		pos = perf_evlist__get_tracking_event(evlist);
 		if (!evsel__is_dummy_event(pos)) {
 			/* Set up dummy event. */
-			if (perf_evlist__add_dummy(evlist))
+			if (evlist__add_dummy(evlist))
 				return -ENOMEM;
 			pos = evlist__last(evlist);
 			perf_evlist__set_tracking_event(evlist, pos);
@@ -1166,6 +1205,9 @@ static void record__init_features(struct record *rec)
 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
 
+	if (!rec->opts.use_clockid)
+		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
+
 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
 	if (!record__comp_enabled(rec))
 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
@@ -1489,7 +1531,7 @@ static int record__setup_sb_evlist(struct record *rec)
 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
 		rec->thread_id = pthread_self();
 	}
-
+#ifdef HAVE_LIBBPF_SUPPORT
 	if (!opts->no_bpf_event) {
 		if (rec->sb_evlist == NULL) {
 			rec->sb_evlist = evlist__new();
@@ -1505,7 +1547,7 @@ static int record__setup_sb_evlist(struct record *rec)
 			return -1;
 		}
 	}
-
+#endif
 	if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
 		opts->no_bpf_event = true;
@@ -1514,6 +1556,43 @@ static int record__setup_sb_evlist(struct record *rec)
 	return 0;
 }
 
+static int record__init_clock(struct record *rec)
+{
+	struct perf_session *session = rec->session;
+	struct timespec ref_clockid;
+	struct timeval ref_tod;
+	u64 ref;
+
+	if (!rec->opts.use_clockid)
+		return 0;
+
+	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+		session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
+
+	session->header.env.clock.clockid = rec->opts.clockid;
+
+	if (gettimeofday(&ref_tod, NULL) != 0) {
+		pr_err("gettimeofday failed, cannot set reference time.\n");
+		return -1;
+	}
+
+	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
+		pr_err("clock_gettime failed, cannot set reference time.\n");
+		return -1;
+	}
+
+	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
+	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
+
+	session->header.env.clock.tod_ns = ref;
+
+	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
+	      (u64) ref_clockid.tv_nsec;
+
+	session->header.env.clock.clockid_ns = ref;
+	return 0;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 	int err;
@@ -1527,6 +1606,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	bool disabled = false, draining = false;
 	int fd;
 	float ratio = 0;
+	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
 
 	atexit(record__sig_exit);
 	signal(SIGCHLD, sig_handler);
@@ -1593,10 +1673,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		return -1;
 	}
 
-	record__init_features(rec);
+	if (record__init_clock(rec))
+		return -1;
 
-	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
-		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+	record__init_features(rec);
 
 	if (forks) {
 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
@@ -1646,7 +1726,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	 * Normally perf_session__new would do this, but it doesn't have the
 	 * evlist.
 	 */
-	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
+	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 		rec->tool.ordered_events = false;
 	}
@@ -1748,9 +1828,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		perf_evlist__start_workload(rec->evlist);
 	}
 
+	if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
+		goto out_child;
+
 	if (opts->initial_delay) {
-		usleep(opts->initial_delay * USEC_PER_MSEC);
-		evlist__enable(rec->evlist);
+		pr_info(EVLIST_DISABLED_MSG);
+		if (opts->initial_delay > 0) {
+			usleep(opts->initial_delay * USEC_PER_MSEC);
+			evlist__enable(rec->evlist);
+			pr_info(EVLIST_ENABLED_MSG);
+		}
 	}
 
 	trigger_ready(&auxtrace_snapshot_trigger);
@@ -1842,6 +1929,21 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 				draining = true;
 		}
 
+		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
+			switch (cmd) {
+			case EVLIST_CTL_CMD_ENABLE:
+				pr_info(EVLIST_ENABLED_MSG);
+				break;
+			case EVLIST_CTL_CMD_DISABLE:
+				pr_info(EVLIST_DISABLED_MSG);
+				break;
+			case EVLIST_CTL_CMD_ACK:
+			case EVLIST_CTL_CMD_UNSUPPORTED:
+			default:
+				break;
+			}
+		}
+
 		/*
 		 * When perf is starting the traced process, at the end events
 		 * die with the process and we wait for that. Thus no need to
@@ -1875,6 +1977,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		record__synthesize_workload(rec, true);
 
 out_child:
+	evlist__finalize_ctlfd(rec->evlist);
 	record__mmap_read_all(rec, true);
 	record__aio_mmap_read_sync(rec);
 
@@ -2041,103 +2144,6 @@ static int perf_record_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
-struct clockid_map {
-	const char *name;
-	int clockid;
-};
-
-#define CLOCKID_MAP(n, c)	\
-	{ .name = n, .clockid = (c), }
-
-#define CLOCKID_END	{ .name = NULL, }
-
-
-/*
- * Add the missing ones, we need to build on many distros...
- */
-#ifndef CLOCK_MONOTONIC_RAW
-#define CLOCK_MONOTONIC_RAW 4
-#endif
-#ifndef CLOCK_BOOTTIME
-#define CLOCK_BOOTTIME 7
-#endif
-#ifndef CLOCK_TAI
-#define CLOCK_TAI 11
-#endif
-
-static const struct clockid_map clockids[] = {
-	/* available for all events, NMI safe */
-	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
-	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
-
-	/* available for some events */
-	CLOCKID_MAP("realtime", CLOCK_REALTIME),
-	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
-	CLOCKID_MAP("tai", CLOCK_TAI),
-
-	/* available for the lazy */
-	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
-	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
-	CLOCKID_MAP("real", CLOCK_REALTIME),
-	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
-
-	CLOCKID_END,
-};
-
-static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
-{
-	struct timespec res;
-
-	*res_ns = 0;
-	if (!clock_getres(clk_id, &res))
-		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
-	else
-		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
-
-	return 0;
-}
-
-static int parse_clockid(const struct option *opt, const char *str, int unset)
-{
-	struct record_opts *opts = (struct record_opts *)opt->value;
-	const struct clockid_map *cm;
-	const char *ostr = str;
-
-	if (unset) {
-		opts->use_clockid = 0;
-		return 0;
-	}
-
-	/* no arg passed */
-	if (!str)
-		return 0;
-
-	/* no setting it twice */
-	if (opts->use_clockid)
-		return -1;
-
-	opts->use_clockid = true;
-
-	/* if its a number, we're done */
-	if (sscanf(str, "%d", &opts->clockid) == 1)
-		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
-
-	/* allow a "CLOCK_" prefix to the name */
-	if (!strncasecmp(str, "CLOCK_", 6))
-		str += 6;
-
-	for (cm = clockids; cm->name; cm++) {
-		if (!strcasecmp(str, cm->name)) {
-			opts->clockid = cm->clockid;
-			return get_clockid_res(opts->clockid,
-					       &opts->clockid_res_ns);
-		}
-	}
-
-	opts->use_clockid = false;
-	ui__warning("unknown clockid %s, check man page\n", ostr);
-	return -1;
-}
 
 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
 {
@@ -2224,6 +2230,33 @@ out_free:
 	return ret;
 }
 
+static int parse_control_option(const struct option *opt,
+				const char *str,
+				int unset __maybe_unused)
+{
+	char *comma = NULL, *endptr = NULL;
+	struct record_opts *config = (struct record_opts *)opt->value;
+
+	if (strncmp(str, "fd:", 3))
+		return -EINVAL;
+
+	config->ctl_fd = strtoul(&str[3], &endptr, 0);
+	if (endptr == &str[3])
+		return -EINVAL;
+
+	comma = strchr(str, ',');
+	if (comma) {
+		if (endptr != comma)
+			return -EINVAL;
+
+		config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+		if (endptr == comma + 1 || *endptr != '\0')
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void switch_output_size_warn(struct record *rec)
 {
 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
@@ -2360,6 +2393,8 @@ static struct record record = {
 		},
 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
 		.nr_threads_synthesize = 1,
+		.ctl_fd              = -1,
+		.ctl_fd_ack          = -1,
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -2417,7 +2452,7 @@ static struct option __record_options[] = {
 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
 		    "synthesize non-sample events at the end of output"),
 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
-	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
+	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
 		    "Fail if the specified frequency can't be used"),
 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
@@ -2462,8 +2497,8 @@ static struct option __record_options[] = {
 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 		     "monitor event in cgroup name only",
 		     parse_cgroups),
-	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
-		  "ms to wait before starting measurement after program start"),
+	OPT_INTEGER('D', "delay", &record.opts.initial_delay,
+		  "ms to wait before starting measurement after program start (-1: start with events disabled)"),
 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
 		   "user to profile"),
@@ -2561,6 +2596,10 @@ static struct option __record_options[] = {
 		"libpfm4 event selector. use 'perf list' to list available events",
 		parse_libpfm_events_option),
 #endif
+	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]",
+		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
+		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+		      parse_control_option),
 	OPT_END()
 };
 
@@ -2722,7 +2761,7 @@ int cmd_record(int argc, const char **argv)
 		record.opts.tail_synthesize = true;
 
 	if (rec->evlist->core.nr_entries == 0 &&
-	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
+	    __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
 		pr_err("Not enough memory for event selector list\n");
 		goto out;
 	}
@@ -2766,6 +2805,14 @@ int cmd_record(int argc, const char **argv)
 	if (rec->opts.full_auxtrace)
 		rec->buildid_all = true;
 
+	if (rec->opts.text_poke) {
+		err = record__config_text_poke(rec->evlist);
+		if (err) {
+			pr_err("record__config_text_poke failed, error %d\n", err);
+			goto out;
+		}
+	}
+
 	if (record_opts__config(&rec->opts)) {
 		err = -EINVAL;
 		goto out;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5f1d2a878fad..3c74c9c0f3c3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -338,7 +338,7 @@ static int process_read_event(struct perf_tool *tool,
 static int report__setup_sample_type(struct report *rep)
 {
 	struct perf_session *session = rep->session;
-	u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+	u64 sample_type = evlist__combined_sample_type(session->evlist);
 	bool is_pipe = perf_data__is_pipe(session->data);
 
 	if (session->itrace_synth_opts->callchain ||
@@ -410,8 +410,7 @@ static int report__setup_sample_type(struct report *rep)
 	}
 
 	/* ??? handle more cases than just ANY? */
-	if (!(perf_evlist__combined_branch_type(session->evlist) &
-				PERF_SAMPLE_BRANCH_ANY))
+	if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY))
 		rep->nonany_branch_mode = true;
 
 #if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT)
@@ -1093,7 +1092,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
 	 * Check if we need to enable callchains based
 	 * on events sample_type.
 	 */
-	sample_type = perf_evlist__combined_sample_type(*pevlist);
+	sample_type = evlist__combined_sample_type(*pevlist);
 	callchain_param_setup(sample_type);
 	return 0;
 }
@@ -1333,6 +1332,9 @@ int cmd_report(int argc, const char **argv)
 	if (report.mmaps_mode)
 		report.tasks_mode = true;
 
+	if (dump_trace)
+		report.tool.ordered_events = false;
+
 	if (quiet)
 		perf_quiet_option();
 
@@ -1389,7 +1391,7 @@ repeat:
 
 	has_br_stack = perf_header__has_feat(&session->header,
 					     HEADER_BRANCH_STACK);
-	if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
+	if (evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
 		has_br_stack = false;
 
 	setup_forced_leader(&report, session->evlist);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 459e4229945e..e6fc297cee91 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2398,6 +2398,15 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
 	printf("\n");
 }
 
+static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused,
+					union perf_event *event __maybe_unused,
+					struct evsel *evsel __maybe_unused,
+					struct perf_sample *sample __maybe_unused,
+					struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
 static int timehist_sched_wakeup_event(struct perf_tool *tool,
 				       union perf_event *event __maybe_unused,
 				       struct evsel *evsel,
@@ -2575,7 +2584,8 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 	}
 
 	if (!sched->idle_hist || thread->tid == 0) {
-		timehist_update_runtime_stats(tr, t, tprev);
+		if (!cpu_list || test_bit(sample->cpu, cpu_bitmap))
+			timehist_update_runtime_stats(tr, t, tprev);
 
 		if (sched->idle_hist) {
 			struct idle_thread_runtime *itr = (void *)tr;
@@ -2848,6 +2858,9 @@ static void timehist_print_summary(struct perf_sched *sched,
 
 	printf("\nIdle stats:\n");
 	for (i = 0; i < idle_max_cpu; ++i) {
+		if (cpu_list && !test_bit(i, cpu_bitmap))
+			continue;
+
 		t = idle_threads[i];
 		if (!t)
 			continue;
@@ -2958,9 +2971,10 @@ static int timehist_check_attr(struct perf_sched *sched,
 
 static int perf_sched__timehist(struct perf_sched *sched)
 {
-	const struct evsel_str_handler handlers[] = {
+	struct evsel_str_handler handlers[] = {
 		{ "sched:sched_switch",       timehist_sched_switch_event, },
 		{ "sched:sched_wakeup",	      timehist_sched_wakeup_event, },
+		{ "sched:sched_waking",       timehist_sched_wakeup_event, },
 		{ "sched:sched_wakeup_new",   timehist_sched_wakeup_event, },
 	};
 	const struct evsel_str_handler migrate_handlers[] = {
@@ -3018,6 +3032,11 @@ static int perf_sched__timehist(struct perf_sched *sched)
 
 	setup_pager();
 
+	/* prefer sched_waking if it is captured */
+	if (perf_evlist__find_tracepoint_by_name(session->evlist,
+						  "sched:sched_waking"))
+		handlers[1].handler = timehist_sched_wakeup_ignore;
+
 	/* setup per-evsel handlers */
 	if (perf_session__set_tracepoints_handlers(session, handlers))
 		goto out;
@@ -3330,12 +3349,16 @@ static int __cmd_record(int argc, const char **argv)
 		"-e", "sched:sched_stat_iowait",
 		"-e", "sched:sched_stat_runtime",
 		"-e", "sched:sched_process_fork",
-		"-e", "sched:sched_wakeup",
 		"-e", "sched:sched_wakeup_new",
 		"-e", "sched:sched_migrate_task",
 	};
+	struct tep_event *waking_event;
 
-	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+	/*
+	 * +2 for either "-e", "sched:sched_wakeup" or
+	 * "-e", "sched:sched_waking"
+	 */
+	rec_argc = ARRAY_SIZE(record_args) + 2 + argc - 1;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
 	if (rec_argv == NULL)
@@ -3344,6 +3367,13 @@ static int __cmd_record(int argc, const char **argv)
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[i] = strdup(record_args[i]);
 
+	rec_argv[i++] = "-e";
+	waking_event = trace_event__tp_format("sched", "sched_waking");
+	if (!IS_ERR(waking_event))
+		rec_argv[i++] = strdup("sched:sched_waking");
+	else
+		rec_argv[i++] = strdup("sched:sched_wakeup");
+
 	for (j = 1; j < (unsigned int)argc; j++, i++)
 		rec_argv[i] = argv[j];
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 447457786362..484ce6067d23 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -82,38 +82,64 @@ static bool			native_arch;
 unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
 
 enum perf_output_field {
-	PERF_OUTPUT_COMM            = 1U << 0,
-	PERF_OUTPUT_TID             = 1U << 1,
-	PERF_OUTPUT_PID             = 1U << 2,
-	PERF_OUTPUT_TIME            = 1U << 3,
-	PERF_OUTPUT_CPU             = 1U << 4,
-	PERF_OUTPUT_EVNAME          = 1U << 5,
-	PERF_OUTPUT_TRACE           = 1U << 6,
-	PERF_OUTPUT_IP              = 1U << 7,
-	PERF_OUTPUT_SYM             = 1U << 8,
-	PERF_OUTPUT_DSO             = 1U << 9,
-	PERF_OUTPUT_ADDR            = 1U << 10,
-	PERF_OUTPUT_SYMOFFSET       = 1U << 11,
-	PERF_OUTPUT_SRCLINE         = 1U << 12,
-	PERF_OUTPUT_PERIOD          = 1U << 13,
-	PERF_OUTPUT_IREGS	    = 1U << 14,
-	PERF_OUTPUT_BRSTACK	    = 1U << 15,
-	PERF_OUTPUT_BRSTACKSYM	    = 1U << 16,
-	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
-	PERF_OUTPUT_WEIGHT	    = 1U << 18,
-	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
-	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
-	PERF_OUTPUT_INSN	    = 1U << 21,
-	PERF_OUTPUT_INSNLEN	    = 1U << 22,
-	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
-	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
-	PERF_OUTPUT_SYNTH           = 1U << 25,
-	PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
-	PERF_OUTPUT_UREGS	    = 1U << 27,
-	PERF_OUTPUT_METRIC	    = 1U << 28,
-	PERF_OUTPUT_MISC            = 1U << 29,
-	PERF_OUTPUT_SRCCODE	    = 1U << 30,
-	PERF_OUTPUT_IPC             = 1U << 31,
+	PERF_OUTPUT_COMM            = 1ULL << 0,
+	PERF_OUTPUT_TID             = 1ULL << 1,
+	PERF_OUTPUT_PID             = 1ULL << 2,
+	PERF_OUTPUT_TIME            = 1ULL << 3,
+	PERF_OUTPUT_CPU             = 1ULL << 4,
+	PERF_OUTPUT_EVNAME          = 1ULL << 5,
+	PERF_OUTPUT_TRACE           = 1ULL << 6,
+	PERF_OUTPUT_IP              = 1ULL << 7,
+	PERF_OUTPUT_SYM             = 1ULL << 8,
+	PERF_OUTPUT_DSO             = 1ULL << 9,
+	PERF_OUTPUT_ADDR            = 1ULL << 10,
+	PERF_OUTPUT_SYMOFFSET       = 1ULL << 11,
+	PERF_OUTPUT_SRCLINE         = 1ULL << 12,
+	PERF_OUTPUT_PERIOD          = 1ULL << 13,
+	PERF_OUTPUT_IREGS	    = 1ULL << 14,
+	PERF_OUTPUT_BRSTACK	    = 1ULL << 15,
+	PERF_OUTPUT_BRSTACKSYM	    = 1ULL << 16,
+	PERF_OUTPUT_DATA_SRC	    = 1ULL << 17,
+	PERF_OUTPUT_WEIGHT	    = 1ULL << 18,
+	PERF_OUTPUT_BPF_OUTPUT	    = 1ULL << 19,
+	PERF_OUTPUT_CALLINDENT	    = 1ULL << 20,
+	PERF_OUTPUT_INSN	    = 1ULL << 21,
+	PERF_OUTPUT_INSNLEN	    = 1ULL << 22,
+	PERF_OUTPUT_BRSTACKINSN	    = 1ULL << 23,
+	PERF_OUTPUT_BRSTACKOFF	    = 1ULL << 24,
+	PERF_OUTPUT_SYNTH           = 1ULL << 25,
+	PERF_OUTPUT_PHYS_ADDR       = 1ULL << 26,
+	PERF_OUTPUT_UREGS	    = 1ULL << 27,
+	PERF_OUTPUT_METRIC	    = 1ULL << 28,
+	PERF_OUTPUT_MISC            = 1ULL << 29,
+	PERF_OUTPUT_SRCCODE	    = 1ULL << 30,
+	PERF_OUTPUT_IPC             = 1ULL << 31,
+	PERF_OUTPUT_TOD             = 1ULL << 32,
+};
+
+struct perf_script {
+	struct perf_tool	tool;
+	struct perf_session	*session;
+	bool			show_task_events;
+	bool			show_mmap_events;
+	bool			show_switch_events;
+	bool			show_namespace_events;
+	bool			show_lost_events;
+	bool			show_round_events;
+	bool			show_bpf_events;
+	bool			show_cgroup_events;
+	bool			show_text_poke_events;
+	bool			allocated;
+	bool			per_event_dump;
+	bool			stitch_lbr;
+	struct evswitch		evswitch;
+	struct perf_cpu_map	*cpus;
+	struct perf_thread_map *threads;
+	int			name_width;
+	const char              *time_str;
+	struct perf_time_interval *ptime_range;
+	int			range_size;
+	int			range_num;
 };
 
 struct output_option {
@@ -152,6 +178,7 @@ struct output_option {
 	{.str = "misc", .field = PERF_OUTPUT_MISC},
 	{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
 	{.str = "ipc", .field = PERF_OUTPUT_IPC},
+	{.str = "tod", .field = PERF_OUTPUT_TOD},
 };
 
 enum {
@@ -388,7 +415,7 @@ static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *
 	return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false);
 }
 
-static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session)
+static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 	bool allow_user_set;
@@ -443,8 +470,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *sess
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set &&
-	    !(perf_evlist__combined_branch_type(session->evlist) &
-	      PERF_SAMPLE_BRANCH_ANY)) {
+	    !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
 		pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
 		       "Hint: run 'perf record -b ...'\n");
 		return -EINVAL;
@@ -503,6 +529,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
  */
 static int perf_session__check_output_opt(struct perf_session *session)
 {
+	bool tod = false;
 	unsigned int j;
 	struct evsel *evsel;
 
@@ -522,13 +549,14 @@ static int perf_session__check_output_opt(struct perf_session *session)
 		}
 
 		if (evsel && output[j].fields &&
-			perf_evsel__check_attr(evsel, session))
+			evsel__check_attr(evsel, session))
 			return -1;
 
 		if (evsel == NULL)
 			continue;
 
 		set_print_ip_opts(&evsel->core.attr);
+		tod |= output[j].fields & PERF_OUTPUT_TOD;
 	}
 
 	if (!no_callchain) {
@@ -569,13 +597,17 @@ static int perf_session__check_output_opt(struct perf_session *session)
 		}
 	}
 
+	if (tod && !session->header.env.clock.enabled) {
+		pr_err("Can't provide 'tod' time, missing clock data. "
+		       "Please record with -k/--clockid option.\n");
+		return -1;
+	}
 out:
 	return 0;
 }
 
 static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
-				     FILE *fp
-)
+				     FILE *fp)
 {
 	unsigned i = 0, r;
 	int printed = 0;
@@ -593,6 +625,56 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
 	return printed;
 }
 
+#define DEFAULT_TOD_FMT "%F %H:%M:%S"
+
+static char*
+tod_scnprintf(struct perf_script *script, char *buf, int buflen,
+	     u64 timestamp)
+{
+	u64 tod_ns, clockid_ns;
+	struct perf_env *env;
+	unsigned long nsec;
+	struct tm ltime;
+	char date[64];
+	time_t sec;
+
+	buf[0] = '\0';
+	if (buflen < 64 || !script)
+		return buf;
+
+	env = &script->session->header.env;
+	if (!env->clock.enabled) {
+		scnprintf(buf, buflen, "disabled");
+		return buf;
+	}
+
+	clockid_ns = env->clock.clockid_ns;
+	tod_ns     = env->clock.tod_ns;
+
+	if (timestamp > clockid_ns)
+		tod_ns += timestamp - clockid_ns;
+	else
+		tod_ns -= clockid_ns - timestamp;
+
+	sec  = (time_t) (tod_ns / NSEC_PER_SEC);
+	nsec = tod_ns - sec * NSEC_PER_SEC;
+
+	if (localtime_r(&sec, &ltime) == NULL) {
+		scnprintf(buf, buflen, "failed");
+	} else {
+		strftime(date, sizeof(date), DEFAULT_TOD_FMT, &ltime);
+
+		if (symbol_conf.nanosecs) {
+			snprintf(buf, buflen, "%s.%09lu", date, nsec);
+		} else {
+			snprintf(buf, buflen, "%s.%06lu",
+				 date, nsec / NSEC_PER_USEC);
+		}
+	}
+
+	return buf;
+}
+
 static int perf_sample__fprintf_iregs(struct perf_sample *sample,
 				      struct perf_event_attr *attr, FILE *fp)
 {
@@ -607,7 +689,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample,
 					 attr->sample_regs_user, fp);
 }
 
-static int perf_sample__fprintf_start(struct perf_sample *sample,
+static int perf_sample__fprintf_start(struct perf_script *script,
+				      struct perf_sample *sample,
 				      struct thread *thread,
 				      struct evsel *evsel,
 				      u32 type, FILE *fp)
@@ -616,6 +699,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
 	unsigned long secs;
 	unsigned long long nsecs;
 	int printed = 0;
+	char tstr[128];
 
 	if (PRINT_FIELD(COMM)) {
 		if (latency_format)
@@ -684,6 +768,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
 		printed += ret;
 	}
 
+	if (PRINT_FIELD(TOD)) {
+		tod_scnprintf(script, tstr, sizeof(tstr), sample->time);
+		printed += fprintf(fp, "%s ", tstr);
+	}
+
 	if (PRINT_FIELD(TIME)) {
 		u64 t = sample->time;
 		if (reltime) {
@@ -1668,31 +1757,7 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
 	return 0;
 }
 
-struct perf_script {
-	struct perf_tool	tool;
-	struct perf_session	*session;
-	bool			show_task_events;
-	bool			show_mmap_events;
-	bool			show_switch_events;
-	bool			show_namespace_events;
-	bool			show_lost_events;
-	bool			show_round_events;
-	bool			show_bpf_events;
-	bool			show_cgroup_events;
-	bool			allocated;
-	bool			per_event_dump;
-	bool			stitch_lbr;
-	struct evswitch		evswitch;
-	struct perf_cpu_map	*cpus;
-	struct perf_thread_map *threads;
-	int			name_width;
-	const char              *time_str;
-	struct perf_time_interval *ptime_range;
-	int			range_size;
-	int			range_num;
-};
-
-static int perf_evlist__max_name_len(struct evlist *evlist)
+static int evlist__max_name_len(struct evlist *evlist)
 {
 	struct evsel *evsel;
 	int max = 0;
@@ -1739,7 +1804,7 @@ static void script_print_metric(struct perf_stat_config *config __maybe_unused,
 
 	if (!fmt)
 		return;
-	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+	perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
 				   PERF_RECORD_SAMPLE, mctx->fp);
 	fputs("\tmetric: ", mctx->fp);
 	if (color)
@@ -1754,7 +1819,7 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
 {
 	struct metric_ctx *mctx = ctx;
 
-	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+	perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
 				   PERF_RECORD_SAMPLE, mctx->fp);
 	fputs("\tmetric: ", mctx->fp);
 }
@@ -1865,7 +1930,7 @@ static void process_event(struct perf_script *script,
 
 	++es->samples;
 
-	perf_sample__fprintf_start(sample, thread, evsel,
+	perf_sample__fprintf_start(script, sample, thread, evsel,
 				   PERF_RECORD_SAMPLE, fp);
 
 	if (PRINT_FIELD(PERIOD))
@@ -1875,7 +1940,7 @@ static void process_event(struct perf_script *script,
 		const char *evname = evsel__name(evsel);
 
 		if (!script->name_width)
-			script->name_width = perf_evlist__max_name_len(script->session->evlist);
+			script->name_width = evlist__max_name_len(script->session->evlist);
 
 		fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]");
 	}
@@ -2120,7 +2185,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	}
 
 	if (evsel->core.attr.sample_type) {
-		err = perf_evsel__check_attr(evsel, scr->session);
+		err = evsel__check_attr(evsel, scr->session);
 		if (err)
 			return err;
 	}
@@ -2129,7 +2194,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	 * Check if we need to enable callchains based
 	 * on events sample_type.
 	 */
-	sample_type = perf_evlist__combined_sample_type(evlist);
+	sample_type = evlist__combined_sample_type(evlist);
 	callchain_param_setup(sample_type);
 
 	/* Enable fields for callchain entries */
@@ -2174,11 +2239,11 @@ static int print_event_with_time(struct perf_tool *tool,
 		thread = machine__findnew_thread(machine, pid, tid);
 
 	if (thread && evsel) {
-		perf_sample__fprintf_start(sample, thread, evsel,
+		perf_sample__fprintf_start(script, sample, thread, evsel,
 					   event->header.type, stdout);
 	}
 
-	perf_event__fprintf(event, stdout);
+	perf_event__fprintf(event, machine, stdout);
 
 	thread__put(thread);
 
@@ -2313,7 +2378,7 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused,
 			     struct ordered_events *oe __maybe_unused)
 
 {
-	perf_event__fprintf(event, stdout);
+	perf_event__fprintf(event, NULL, stdout);
 	return 0;
 }
 
@@ -2330,6 +2395,18 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
 			   sample->tid);
 }
 
+static int process_text_poke_events(struct perf_tool *tool,
+				    union perf_event *event,
+				    struct perf_sample *sample,
+				    struct machine *machine)
+{
+	if (perf_event__process_text_poke(tool, event, sample, machine) < 0)
+		return -1;
+
+	return print_event(tool, event, sample, machine, sample->pid,
+			   sample->tid);
+}
+
 static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
@@ -2438,6 +2515,10 @@ static int __cmd_script(struct perf_script *script)
 		script->tool.ksymbol = process_bpf_events;
 		script->tool.bpf     = process_bpf_events;
 	}
+	if (script->show_text_poke_events) {
+		script->tool.ksymbol   = process_bpf_events;
+		script->tool.text_poke = process_text_poke_events;
+	}
 
 	if (perf_script__setup_per_event_dump(script)) {
 		pr_err("Couldn't create the per event dump files\n");
@@ -3171,7 +3252,7 @@ static int have_cmd(int argc, const char **argv)
 static void script__setup_sample_type(struct perf_script *script)
 {
 	struct perf_session *session = script->session;
-	u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+	u64 sample_type = evlist__combined_sample_type(session->evlist);
 
 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
 		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -3423,7 +3504,7 @@ int cmd_script(int argc, const char **argv)
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,srcline,period,iregs,uregs,brstack,"
 		     "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
-		     "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
+		     "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
@@ -3474,6 +3555,8 @@ int cmd_script(int argc, const char **argv)
 		    "Show round events (if recorded)"),
 	OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events,
 		    "Show bpf related events (if recorded)"),
+	OPT_BOOLEAN('\0', "show-text-poke-events", &script.show_text_poke_events,
+		    "Show text poke related events (if recorded)"),
 	OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
 		    "Dump trace output to files named by the monitored events"),
 	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9be020e0098a..fddc97cac984 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -188,6 +188,8 @@ static struct perf_stat_config stat_config = {
 	.metric_only_len	= METRIC_ONLY_LEN,
 	.walltime_nsecs_stats	= &walltime_nsecs_stats,
 	.big_num		= true,
+	.ctl_fd			= -1,
+	.ctl_fd_ack		= -1
 };
 
 static bool cpus_map_matched(struct evsel *a, struct evsel *b)
@@ -402,7 +404,7 @@ static void read_counters(struct timespec *rs)
 {
 	struct evsel *counter;
 
-	if (!stat_config.summary && (read_affinity_counters(rs) < 0))
+	if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0))
 		return;
 
 	evlist__for_each_entry(evsel_list, counter) {
@@ -475,18 +477,38 @@ static void process_interval(void)
 	print_counters(&rs, 0, NULL);
 }
 
+static bool handle_interval(unsigned int interval, int *times)
+{
+	if (interval) {
+		process_interval();
+		if (interval_count && !(--(*times)))
+			return true;
+	}
+	return false;
+}
+
 static void enable_counters(void)
 {
-	if (stat_config.initial_delay)
+	if (stat_config.initial_delay < 0) {
+		pr_info(EVLIST_DISABLED_MSG);
+		return;
+	}
+
+	if (stat_config.initial_delay > 0) {
+		pr_info(EVLIST_DISABLED_MSG);
 		usleep(stat_config.initial_delay * USEC_PER_MSEC);
+	}
 
 	/*
 	 * We need to enable counters only if:
 	 * - we don't have tracee (attaching to task or cpu)
 	 * - we have initial delay configured
 	 */
-	if (!target__none(&target) || stat_config.initial_delay)
+	if (!target__none(&target) || stat_config.initial_delay) {
 		evlist__enable(evsel_list);
+		if (stat_config.initial_delay > 0)
+			pr_info(EVLIST_ENABLED_MSG);
+	}
 }
 
 static void disable_counters(void)
@@ -540,6 +562,86 @@ static bool is_target_alive(struct target *_target,
 	return false;
 }
 
+static void process_evlist(struct evlist *evlist, unsigned int interval)
+{
+	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
+
+	if (evlist__ctlfd_process(evlist, &cmd) > 0) {
+		switch (cmd) {
+		case EVLIST_CTL_CMD_ENABLE:
+			pr_info(EVLIST_ENABLED_MSG);
+			if (interval)
+				process_interval();
+			break;
+		case EVLIST_CTL_CMD_DISABLE:
+			if (interval)
+				process_interval();
+			pr_info(EVLIST_DISABLED_MSG);
+			break;
+		case EVLIST_CTL_CMD_ACK:
+		case EVLIST_CTL_CMD_UNSUPPORTED:
+		default:
+			break;
+		}
+	}
+}
+
+static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
+			int *time_to_sleep)
+{
+	int tts = *time_to_sleep;
+	struct timespec time_diff;
+
+	diff_timespec(&time_diff, time_stop, time_start);
+
+	tts -= time_diff.tv_sec * MSEC_PER_SEC +
+	       time_diff.tv_nsec / NSEC_PER_MSEC;
+
+	if (tts < 0)
+		tts = 0;
+
+	*time_to_sleep = tts;
+}
+
+static int dispatch_events(bool forks, int timeout, int interval, int *times)
+{
+	int child_exited = 0, status = 0;
+	int time_to_sleep, sleep_time;
+	struct timespec time_start, time_stop;
+
+	if (interval)
+		sleep_time = interval;
+	else if (timeout)
+		sleep_time = timeout;
+	else
+		sleep_time = 1000;
+
+	time_to_sleep = sleep_time;
+
+	while (!done) {
+		if (forks)
+			child_exited = waitpid(child_pid, &status, WNOHANG);
+		else
+			child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
+
+		if (child_exited)
+			break;
+
+		clock_gettime(CLOCK_MONOTONIC, &time_start);
+		if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
+			if (timeout || handle_interval(interval, times))
+				break;
+			time_to_sleep = sleep_time;
+		} else { /* fd revent */
+			process_evlist(evsel_list, interval);
+			clock_gettime(CLOCK_MONOTONIC, &time_stop);
+			compute_tts(&time_start, &time_stop, &time_to_sleep);
+		}
+	}
+
+	return status;
+}
+
 enum counter_recovery {
 	COUNTER_SKIP,
 	COUNTER_RETRY,
@@ -603,7 +705,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 	char msg[BUFSIZ];
 	unsigned long long t0, t1;
 	struct evsel *counter;
-	struct timespec ts;
 	size_t l;
 	int status = 0;
 	const bool forks = (argc > 0);
@@ -612,17 +713,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 	int i, cpu;
 	bool second_pass = false;
 
-	if (interval) {
-		ts.tv_sec  = interval / USEC_PER_MSEC;
-		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
-	} else if (timeout) {
-		ts.tv_sec  = timeout / USEC_PER_MSEC;
-		ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
-	} else {
-		ts.tv_sec  = 1;
-		ts.tv_nsec = 0;
-	}
-
 	if (forks) {
 		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
 						  workload_exec_failed_signal) < 0) {
@@ -779,16 +869,8 @@ try_again_reset:
 		perf_evlist__start_workload(evsel_list);
 		enable_counters();
 
-		if (interval || timeout) {
-			while (!waitpid(child_pid, &status, WNOHANG)) {
-				nanosleep(&ts, NULL);
-				if (timeout)
-					break;
-				process_interval();
-				if (interval_count && !(--times))
-					break;
-			}
-		}
+		if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
+			status = dispatch_events(forks, timeout, interval, &times);
 		if (child_pid != -1) {
 			if (timeout)
 				kill(child_pid, SIGTERM);
@@ -805,18 +887,7 @@ try_again_reset:
 			psignal(WTERMSIG(status), argv[0]);
 	} else {
 		enable_counters();
-		while (!done) {
-			nanosleep(&ts, NULL);
-			if (!is_target_alive(&target, evsel_list->core.threads))
-				break;
-			if (timeout)
-				break;
-			if (interval) {
-				process_interval();
-				if (interval_count && !(--times))
-					break;
-			}
-		}
+		status = dispatch_events(forks, timeout, interval, &times);
 	}
 
 	disable_counters();
@@ -826,9 +897,9 @@ try_again_reset:
 	if (stat_config.walltime_run_table)
 		stat_config.walltime_run[run_idx] = t1 - t0;
 
-	if (interval) {
+	if (interval && stat_config.summary) {
 		stat_config.interval = 0;
-		stat_config.summary = true;
+		stat_config.stop_read_counter = true;
 		init_stats(&walltime_nsecs_stats);
 		update_stats(&walltime_nsecs_stats, t1 - t0);
 
@@ -970,6 +1041,33 @@ static int parse_metric_groups(const struct option *opt,
 					 &stat_config.metric_events);
 }
 
+static int parse_control_option(const struct option *opt,
+				const char *str,
+				int unset __maybe_unused)
+{
+	char *comma = NULL, *endptr = NULL;
+	struct perf_stat_config *config = (struct perf_stat_config *)opt->value;
+
+	if (strncmp(str, "fd:", 3))
+		return -EINVAL;
+
+	config->ctl_fd = strtoul(&str[3], &endptr, 0);
+	if (endptr == &str[3])
+		return -EINVAL;
+
+	comma = strchr(str, ',');
+	if (comma) {
+		if (endptr != comma)
+			return -EINVAL;
+
+		config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+		if (endptr == comma + 1 || *endptr != '\0')
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static struct option stat_options[] = {
 	OPT_BOOLEAN('T', "transaction", &transaction_run,
 		    "hardware transaction statistics"),
@@ -1041,8 +1139,8 @@ static struct option stat_options[] = {
 		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
 		     "aggregate counts per numa node", AGGR_NODE),
-	OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
-		     "ms to wait before starting measurement after program start"),
+	OPT_INTEGER('D', "delay", &stat_config.initial_delay,
+		    "ms to wait before starting measurement after program start (-1: start with events disabled)"),
 	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
@@ -1066,11 +1164,17 @@ static struct option stat_options[] = {
 		    "Use with 'percore' event qualifier to show the event "
 		    "counts of one hardware thread by sum up total hardware "
 		    "threads of same physical core"),
+	OPT_BOOLEAN(0, "summary", &stat_config.summary,
+		       "print summary for interval mode"),
 #ifdef HAVE_LIBPFM
 	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
 		"libpfm4 event selector. use 'perf list' to list available events",
 		parse_libpfm_events_option),
 #endif
+	OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]",
+		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
+		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+		      parse_control_option),
 	OPT_END()
 };
 
@@ -1679,19 +1783,17 @@ static int add_default_attributes(void)
 		if (target__has_cpu(&target))
 			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
 
-		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+		if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
 			return -1;
 		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
-			if (perf_evlist__add_default_attrs(evsel_list,
-						frontend_attrs) < 0)
+			if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
 				return -1;
 		}
 		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
-			if (perf_evlist__add_default_attrs(evsel_list,
-						backend_attrs) < 0)
+			if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
 				return -1;
 		}
-		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
+		if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
 			return -1;
 	}
 
@@ -1701,21 +1803,21 @@ static int add_default_attributes(void)
 		return 0;
 
 	/* Append detailed run extra attributes: */
-	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
+	if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
 		return -1;
 
 	if (detailed_run < 2)
 		return 0;
 
 	/* Append very detailed run extra attributes: */
-	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
+	if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
 		return -1;
 
 	if (detailed_run < 3)
 		return 0;
 
 	/* Append very, very detailed run extra attributes: */
-	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+	return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
 }
 
 static const char * const stat_record_usage[] = {
@@ -2242,6 +2344,9 @@ int cmd_stat(int argc, const char **argv)
 	signal(SIGALRM, skip_signal);
 	signal(SIGABRT, skip_signal);
 
+	if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
+		goto out;
+
 	status = 0;
 	for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
 		if (stat_config.run_count != 1 && verbose > 0)
@@ -2261,6 +2366,8 @@ int cmd_stat(int argc, const char **argv)
 	if (!forever && status != -1 && (!interval || stat_config.summary))
 		print_counters(NULL, argc, argv);
 
+	evlist__finalize_ctlfd(evsel_list);
+
 	if (STAT_RECORD) {
 		/*
 		 * We synthesize the kernel mmap record just so that older tools
@@ -2307,6 +2414,7 @@ out:
 
 	evlist__delete(evsel_list);
 
+	metricgroup__rblist_exit(&stat_config.metric_events);
 	runtime_stat_delete(&stat_config);
 
 	return status;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 13889d73f8dd..7c64134472c7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1627,7 +1627,7 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 
 	if (!top.evlist->core.nr_entries &&
-	    perf_evlist__add_default(top.evlist) < 0) {
+	    evlist__add_default(top.evlist) < 0) {
 		pr_err("Not enough memory for event selector list\n");
 		goto out_delete_evlist;
 	}
@@ -1746,6 +1746,7 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
+#ifdef HAVE_LIBBPF_SUPPORT
 	if (!top.record_opts.no_bpf_event) {
 		top.sb_evlist = evlist__new();
 
@@ -1759,6 +1760,7 @@ int cmd_top(int argc, const char **argv)
 			goto out_delete_evlist;
 		}
 	}
+#endif
 
 	if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4cbb64edc998..bea461b6f937 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3917,8 +3917,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	}
 
 	if (trace->sched &&
-	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
-				   trace__sched_stat_runtime))
+	    evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
 		goto out_error_sched_stat_runtime;
 	/*
 	 * If a global cgroup was set, apply it to all the events without an
@@ -4150,11 +4149,11 @@ out_error_raw_syscalls:
 	goto out_error;
 
 out_error_mmap:
-	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+	evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
 	goto out_error;
 
 out_error_open:
-	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+	evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
 
 out_error:
 	fprintf(trace->output, "%s\n", errbuf);
@@ -4813,7 +4812,7 @@ int cmd_trace(int argc, const char **argv)
 			"per thread proc mmap processing timeout in ms"),
 	OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
 		     trace__parse_cgroups),
-	OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+	OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
 		     "ms to wait before starting measurement after program "
 		     "start"),
 	OPTS_EVSWITCH(&trace.evswitch),
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 94c2bc22c2bb..0b4d6431b072 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -128,6 +128,9 @@ check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in
 # diff non-symmetric files
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
 
+# These will require a beauty_check when we get some more like that
+check_2 tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h
+
 # check duplicated library files
 check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
 check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
index 80816d6402e9..f8784c608479 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
@@ -60,7 +60,7 @@
     },
     {
         "BriefDescription": "Stalls due to short latency decimal floating ops.",
-        "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL",
+        "MetricExpr": "dfu_stall_cpi - dflong_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "dfu_other_stall_cpi"
     },
@@ -72,7 +72,7 @@
     },
     {
         "BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache",
-        "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL",
+        "MetricExpr": "dmiss_non_local_stall_cpi - dmiss_remote_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "dmiss_distant_stall_cpi"
     },
@@ -90,7 +90,7 @@
     },
     {
         "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict",
-        "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL",
+        "MetricExpr": "dmiss_l2l3_stall_cpi - dmiss_l2l3_conflict_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "dmiss_l2l3_noconflict_stall_cpi"
     },
@@ -114,7 +114,7 @@
     },
     {
         "BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory",
-        "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL",
+        "MetricExpr": "dmiss_l3miss_stall_cpi - dmiss_l21_l31_stall_cpi - dmiss_lmem_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "dmiss_non_local_stall_cpi"
     },
@@ -126,7 +126,7 @@
     },
     {
         "BriefDescription": "Stalls due to short latency double precision ops.",
-        "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL",
+        "MetricExpr": "dp_stall_cpi - dplong_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "dp_other_stall_cpi"
     },
@@ -155,7 +155,7 @@
         "MetricName": "emq_full_stall_cpi"
     },
     {
-        "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL",
+        "MetricExpr": "erat_miss_stall_cpi + emq_full_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "emq_stall_cpi"
     },
@@ -173,7 +173,7 @@
     },
     {
         "BriefDescription": "Completion stall due to execution units for other reasons.",
-        "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
+        "MetricExpr": "exec_unit_stall_cpi - scalar_stall_cpi - vector_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "exec_unit_other_stall_cpi"
     },
@@ -197,7 +197,7 @@
     },
     {
         "BriefDescription": "Stalls due to short latency integer ops",
-        "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL",
+        "MetricExpr": "fxu_stall_cpi - fxlong_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "fxu_other_stall_cpi"
     },
@@ -221,7 +221,7 @@
     },
     {
         "BriefDescription": "Instruction Completion Table other stalls",
-        "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
+        "MetricExpr": "nothing_dispatched_cpi - ict_noslot_ic_miss_cpi - ict_noslot_br_mpred_icmiss_cpi - ict_noslot_br_mpred_cpi - ict_noslot_disp_held_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "ict_noslot_cyc_other_cpi"
     },
@@ -245,7 +245,7 @@
     },
     {
         "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
-        "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
+        "MetricExpr": "ict_noslot_disp_held_cpi - ict_noslot_disp_held_hb_full_cpi - ict_noslot_disp_held_sync_cpi - ict_noslot_disp_held_tbegin_cpi - ict_noslot_disp_held_issq_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "ict_noslot_disp_held_other_cpi"
     },
@@ -263,7 +263,7 @@
     },
     {
         "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
-        "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
+        "MetricExpr": "ict_noslot_ic_miss_cpi - ict_noslot_ic_l3_cpi - ict_noslot_ic_l3miss_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "ict_noslot_ic_l2_cpi"
     },
@@ -286,7 +286,7 @@
         "MetricName": "ict_noslot_ic_miss_cpi"
     },
     {
-        "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
+        "MetricExpr": "ntc_issue_held_darq_full_cpi + ntc_issue_held_arb_cpi + ntc_issue_held_other_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "issue_hold_cpi"
     },
@@ -327,7 +327,7 @@
         "MetricName": "lrq_other_stall_cpi"
     },
     {
-        "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL",
+        "MetricExpr": "lmq_full_stall_cpi + st_fwd_stall_cpi + lhs_stall_cpi + lsu_mfspr_stall_cpi + larx_stall_cpi + lrq_other_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "lrq_stall_cpi"
     },
@@ -338,7 +338,7 @@
         "MetricName": "lsaq_arb_stall_cpi"
     },
     {
-        "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL",
+        "MetricExpr": "lrq_full_stall_cpi + srq_full_stall_cpi + lsaq_arb_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "lsaq_stall_cpi"
     },
@@ -362,7 +362,7 @@
     },
     {
         "BriefDescription": "Completion LSU stall for other reasons",
-        "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL",
+        "MetricExpr": "lsu_stall_cpi - lsu_fin_stall_cpi - store_finish_stall_cpi - srq_stall_cpi - load_finish_stall_cpi + lsu_stall_dcache_miss_cpi - lrq_stall_cpi + emq_stall_cpi - lsaq_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "lsu_other_stall_cpi"
     },
@@ -434,13 +434,13 @@
     },
     {
         "BriefDescription": "Cycles unaccounted for.",
-        "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL",
+        "MetricExpr": "run_cpi - completion_cpi - thread_block_stall_cpi - stall_cpi - nothing_dispatched_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "other_cpi"
     },
     {
         "BriefDescription": "Completion stall for other reasons",
-        "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
+        "MetricExpr": "stall_cpi - ntc_disp_fin_stall_cpi - ntc_flush_stall_cpi - lsu_stall_cpi - exec_unit_stall_cpi - bru_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "other_stall_cpi"
     },
@@ -469,7 +469,7 @@
         "MetricName": "run_cyc_cpi"
     },
     {
-        "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL",
+        "MetricExpr": "fxu_stall_cpi + dp_stall_cpi + dfu_stall_cpi + pm_stall_cpi + crypto_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "scalar_stall_cpi"
     },
@@ -492,7 +492,7 @@
         "MetricName": "srq_full_stall_cpi"
     },
     {
-        "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL",
+        "MetricExpr": "store_data_stall_cpi + eieio_stall_cpi + stcx_stall_cpi + slb_stall_cpi + tend_stall_cpi + paste_stall_cpi + tlbie_stall_cpi + store_pipe_arb_stall_cpi + store_fin_arb_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "srq_stall_cpi"
     },
@@ -558,7 +558,7 @@
     },
     {
         "BriefDescription": "Vector stalls due to small latency double precision ops",
-        "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL",
+        "MetricExpr": "vdp_stall_cpi - vdplong_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "vdp_other_stall_cpi"
     },
@@ -575,7 +575,7 @@
         "MetricName": "vdplong_stall_cpi"
     },
     {
-        "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
+        "MetricExpr": "vfxu_stall_cpi + vdp_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "vector_stall_cpi"
     },
@@ -587,7 +587,7 @@
     },
     {
         "BriefDescription": "Vector stalls due to small latency integer ops",
-        "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL",
+        "MetricExpr": "vfxu_stall_cpi - vfxlong_stall_cpi",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "vfxu_other_stall_cpi"
     },
@@ -1844,7 +1844,7 @@
     },
     {
         "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
-        "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL",
+        "MetricExpr": "dl1_reload_from_l31_mod_rate_percent + dl1_reload_from_l31_shr_rate_percent",
         "MetricName": "dl1_reload_from_l31_rate_percent"
     },
     {
@@ -1979,7 +1979,7 @@
     },
     {
         "BriefDescription": "Completion stall because a different thread was using the completion pipe",
-        "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL",
+        "MetricExpr": "thread_block_stall_cpi - exception_stall_cpi - any_sync_stall_cpi - sync_pmu_int_stall_cpi - spec_finish_stall_cpi - flush_any_thread_stall_cpi - lsu_flush_next_stall_cpi - nested_tbegin_stall_cpi - nested_tend_stall_cpi - mtfpscr_stall_cpi",
         "MetricName": "other_thread_cmpl_stall"
     },
     {
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
index c121e526442a..8383a37647ad 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -15,5 +15,40 @@
 	"MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
         "MetricName": "PowerBUS_Frequency",
         "ScaleUnit": "2.5e-7GHz"
+    },
+    {
+	"MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@",
+	"MetricName" : "mcs01-read",
+	"MetricGroup" : "memory_bw",
+	"ScaleUnit": "6.1e-5MB"
+    },
+    {
+	"MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@",
+	"MetricName" : "mcs23-read",
+	"MetricGroup" : "memory_bw",
+	"ScaleUnit": "6.1e-5MB"
+    },
+    {
+	"MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@",
+	"MetricName" : "mcs01-write",
+	"MetricGroup" : "memory_bw",
+	"ScaleUnit": "6.1e-5MB"
+    },
+    {
+	"MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@",
+	"MetricName" : "mcs23-write",
+	"MetricGroup" : "memory-bandwidth",
+	"ScaleUnit": "6.1e-5MB"
+    },
+    {
+	"MetricExpr" : "nest_powerbus0_imc@PM_PB_CYC@",
+	"MetricName" : "powerbus_freq",
+	"ScaleUnit": "1e-9GHz"
+    },
+    {
+	"MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)",
+	"MetricName" : "Memory-bandwidth-MCS",
+	"MetricGroup" : "memory_bw",
+	"ScaleUnit": "6.1e-5MB"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
index 7e1aa8273935..653b11b23399 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
@@ -61,7 +61,7 @@
   {
     "EventName": "ex_ret_brn_ind_misp",
     "EventCode": "0xca",
-    "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
+    "BriefDescription": "Retired Indirect Branch Instructions Mispredicted."
   },
   {
     "EventName": "ex_ret_mmx_fp_instr.sse_instr",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
index de89e5a44ff1..4b75183da94a 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen2/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
@@ -125,6 +125,6 @@
   {
     "EventName": "ex_ret_fus_brnch_inst",
     "EventCode": "0x1d0",
-    "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.",
+    "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8."
   }
 ]
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index fa86c5f997cc..fc9c158bfa13 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -137,7 +137,7 @@ static char *fixregex(char *s)
 		return s;
 
 	/* allocate space for a new string */
-	fixed = (char *) malloc(len + 1);
+	fixed = (char *) malloc(len + esc_count + 1);
 	if (!fixed)
 		return NULL;
 
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index cd00498a5dce..84352fc49a20 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -59,6 +59,7 @@ perf-y += genelf.o
 perf-y += api-io.o
 perf-y += demangle-java-test.o
 perf-y += pfm.o
+perf-y += parse-metric.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index 430024f618f1..a36f49fb4dbe 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -49,10 +49,12 @@ Following tests are defined (with perf commands):
   perf record --call-graph fp kill              (test-record-graph-fp)
   perf record --group -e cycles,instructions kill (test-record-group)
   perf record -e '{cycles,instructions}' kill   (test-record-group1)
+  perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2)
   perf record -D kill                           (test-record-no-delay)
   perf record -i kill                           (test-record-no-inherit)
   perf record -n kill                           (test-record-no-samples)
   perf record -c 100 -P kill                    (test-record-period)
+  perf record -c 1 --pfm-events=cycles:period=2 (test-record-pfm-period)
   perf record -R kill                           (test-record-raw)
   perf stat -e cycles kill                      (test-stat-basic)
   perf stat kill                                (test-stat-default)
diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/attr/test-record-group2
new file mode 100644
index 000000000000..6b9f8d182ce1
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group2
@@ -0,0 +1,29 @@
+[config]
+command = record
+args    = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+config=0|1
+sample_period=1234000
+sample_type=87
+read_format=12
+inherit=0
+freq=0
+
+[event-2:base-record]
+fd=2
+group_fd=1
+config=0|1
+sample_period=6789000
+sample_type=87
+read_format=12
+disabled=0
+inherit=0
+mmap=0
+comm=0
+freq=0
+enable_on_exec=0
+task=0
diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/attr/test-record-pfm-period
new file mode 100644
index 000000000000..368f5b814094
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-pfm-period
@@ -0,0 +1,9 @@
+[config]
+command = record
+args    = --no-bpf-event -c 10000 --pfm-events=cycles:period=77777 kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_period=77777
+sample_type=7
+freq=0
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index da8ec1e8e064..cc9fbcedb364 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -45,10 +45,13 @@ volatile long the_var;
 #if defined (__x86_64__)
 extern void __test_function(volatile long *ptr);
 asm (
+	".pushsection .text;"
 	".globl __test_function\n"
+	".type __test_function, @function;"
 	"__test_function:\n"
 	"incq (%rdi)\n"
-	"ret\n");
+	"ret\n"
+	".popsection\n");
 #else
 static void __test_function(volatile long *ptr)
 {
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 5d20bf8397f0..cd77e334e577 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -197,7 +197,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 		perf_mmap__read_done(&md->core);
 	}
 
-	if (count != expect) {
+	if (count != expect * evlist->core.nr_entries) {
 		pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
 		goto out_delete_evlist;
 	}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index da5b6cc23f25..d328caaba45d 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -338,6 +338,10 @@ static struct test generic_tests[] = {
 		.func = test__demangle_java,
 	},
 	{
+		.desc = "Parse and process metrics",
+		.func = test__parse_metric,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 6fe221d31f07..035c9123549a 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -678,7 +678,7 @@ static int do_test_code_reading(bool try_kcore)
 
 			if (verbose > 0) {
 				char errbuf[512];
-				perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+				evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
 				pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
 			}
 
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 1cb02ca2b15f..4d01051951cd 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -18,14 +18,15 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
 
 int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 {
+	struct expr_id_data *val_ptr;
 	const char *p;
-	double val, *val_ptr;
+	double val;
 	int ret;
 	struct expr_parse_ctx ctx;
 
 	expr__ctx_init(&ctx);
-	expr__add_id(&ctx, strdup("FOO"), 1);
-	expr__add_id(&ctx, strdup("BAR"), 2);
+	expr__add_id_val(&ctx, strdup("FOO"), 1);
+	expr__add_id_val(&ctx, strdup("BAR"), 2);
 
 	ret = test(&ctx, "1+1", 2);
 	ret |= test(&ctx, "FOO+BAR", 3);
@@ -39,6 +40,14 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 	ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
 	ret |= test(&ctx, "1.1 + 2.1", 3.2);
 	ret |= test(&ctx, ".1 + 2.", 2.1);
+	ret |= test(&ctx, "d_ratio(1, 2)", 0.5);
+	ret |= test(&ctx, "d_ratio(2.5, 0)", 0);
+	ret |= test(&ctx, "1.1 < 2.2", 1);
+	ret |= test(&ctx, "2.2 > 1.1", 1);
+	ret |= test(&ctx, "1.1 < 1.1", 0);
+	ret |= test(&ctx, "2.2 > 2.2", 0);
+	ret |= test(&ctx, "2.2 < 1.1", 0);
+	ret |= test(&ctx, "1.1 > 2.2", 0);
 
 	if (ret)
 		return ret;
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index c7c81c4a5b2b..d9eca8e86a6b 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -12,6 +12,7 @@ static void fdarray__init_revents(struct fdarray *fda, short revents)
 
 	for (fd = 0; fd < fda->nr; ++fd) {
 		fda->entries[fd].fd	 = fda->nr - fd;
+		fda->entries[fd].events  = revents;
 		fda->entries[fd].revents = revents;
 	}
 }
@@ -29,7 +30,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
 
 int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
 {
-	int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
+	int nr_fds, err = TEST_FAIL;
 	struct fdarray *fda = fdarray__new(5, 5);
 
 	if (fda == NULL) {
@@ -55,7 +56,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
 
 	fdarray__init_revents(fda, POLLHUP);
 	fda->entries[2].revents = POLLIN;
-	expected_fd[0] = fda->entries[2].fd;
 
 	pr_debug("\nfiltering all but fda->entries[2]:");
 	fdarray__fprintf_prefix(fda, "before", stderr);
@@ -66,17 +66,9 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
 		goto out_delete;
 	}
 
-	if (fda->entries[0].fd != expected_fd[0]) {
-		pr_debug("\nfda->entries[0].fd=%d != %d\n",
-			 fda->entries[0].fd, expected_fd[0]);
-		goto out_delete;
-	}
-
 	fdarray__init_revents(fda, POLLHUP);
 	fda->entries[0].revents = POLLIN;
-	expected_fd[0] = fda->entries[0].fd;
 	fda->entries[3].revents = POLLIN;
-	expected_fd[1] = fda->entries[3].fd;
 
 	pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
 	fdarray__fprintf_prefix(fda, "before", stderr);
@@ -88,14 +80,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
 		goto out_delete;
 	}
 
-	for (fd = 0; fd < 2; ++fd) {
-		if (fda->entries[fd].fd != expected_fd[fd]) {
-			pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
-				 fda->entries[fd].fd, expected_fd[fd]);
-			goto out_delete;
-		}
-	}
-
 	pr_debug("\n");
 
 	err = 0;
@@ -128,7 +112,7 @@ int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unu
 	}
 
 #define FDA_ADD(_idx, _fd, _revents, _nr)				   \
-	if (fdarray__add(fda, _fd, _revents) < 0) {			   \
+	if (fdarray__add(fda, _fd, _revents, fdarray_flag__default) < 0) { \
 		pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!",	   \
 			 __LINE__,_fd, _revents);			   \
 		goto out_delete;					   \
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 895188b63f96..aae0fd9045c1 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -631,6 +631,34 @@ static int test__checkterms_simple(struct list_head *terms)
 	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
 	TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
 
+	/*
+	 * read
+	 *
+	 * The perf_pmu__test_parse_init injects 'read' term into
+	 * perf_pmu_events_list, so 'read' is evaluated as read term
+	 * and not as raw event with 'ead' hex value.
+	 */
+	term = list_entry(term->list.next, struct parse_events_term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+	TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read"));
+
+	/*
+	 * r0xead
+	 *
+	 * To be still able to pass 'ead' value with 'r' syntax,
+	 * we added support to parse 'r0xHEX' event.
+	 */
+	term = list_entry(term->list.next, struct parse_events_term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 0xead);
+	TEST_ASSERT_VAL("wrong config", !term->config);
 	return 0;
 }
 
@@ -691,7 +719,7 @@ static int test__group2(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
@@ -814,7 +842,7 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
@@ -1766,6 +1794,11 @@ static struct evlist_test test__events_pmu[] = {
 		.check = test__checkevent_raw_pmu,
 		.id    = 4,
 	},
+	{
+		.name  = "software/r0x1a/",
+		.check = test__checkevent_raw_pmu,
+		.id    = 4,
+	},
 };
 
 struct terms_test {
@@ -1776,7 +1809,7 @@ struct terms_test {
 
 static struct terms_test test__terms[] = {
 	[0] = {
-		.str   = "config=10,config1,config2=3,umask=1",
+		.str   = "config=10,config1,config2=3,umask=1,read,r0xead",
 		.check = test__checkterms_simple,
 	},
 };
@@ -1836,6 +1869,13 @@ static int test_term(struct terms_test *t)
 
 	INIT_LIST_HEAD(&terms);
 
+	/*
+	 * The perf_pmu__test_parse_init prepares perf_pmu_events_list
+	 * which gets freed in parse_events_terms.
+	 */
+	if (perf_pmu__test_parse_init())
+		return -1;
+
 	ret = parse_events_terms(&terms, t->str);
 	if (ret) {
 		pr_debug("failed to parse terms '%s', err %d\n",
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
new file mode 100644
index 000000000000..cd7331aac3bd
--- /dev/null
+++ b/tools/perf/tests/parse-metric.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <string.h>
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+#include "metricgroup.h"
+#include "tests.h"
+#include "pmu-events/pmu-events.h"
+#include "evlist.h"
+#include "rblist.h"
+#include "debug.h"
+#include "expr.h"
+#include "stat.h"
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+
+static struct pmu_event pme_test[] = {
+{
+	.metric_expr	= "inst_retired.any / cpu_clk_unhalted.thread",
+	.metric_name	= "IPC",
+	.metric_group	= "group1",
+},
+{
+	.metric_expr	= "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
+			  "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
+	.metric_name	= "Frontend_Bound_SMT",
+},
+{
+	.metric_expr	= "l1d\\-loads\\-misses / inst_retired.any",
+	.metric_name	= "dcache_miss_cpi",
+},
+{
+	.metric_expr	= "l1i\\-loads\\-misses / inst_retired.any",
+	.metric_name	= "icache_miss_cycles",
+},
+{
+	.metric_expr	= "(dcache_miss_cpi + icache_miss_cycles)",
+	.metric_name	= "cache_miss_cycles",
+	.metric_group	= "group1",
+},
+{
+	.metric_expr	= "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
+	.metric_name	= "DCache_L2_All_Hits",
+},
+{
+	.metric_expr	= "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
+			  "l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
+	.metric_name	= "DCache_L2_All_Miss",
+},
+{
+	.metric_expr	= "dcache_l2_all_hits + dcache_l2_all_miss",
+	.metric_name	= "DCache_L2_All",
+},
+{
+	.metric_expr	= "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
+	.metric_name	= "DCache_L2_Hits",
+},
+{
+	.metric_expr	= "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
+	.metric_name	= "DCache_L2_Misses",
+},
+{
+	.metric_expr	= "ipc + m2",
+	.metric_name	= "M1",
+},
+{
+	.metric_expr	= "ipc + m1",
+	.metric_name	= "M2",
+},
+{
+	.metric_expr	= "1/m3",
+	.metric_name	= "M3",
+},
+{
+	.name	= NULL,
+}
+};
+
+static struct pmu_events_map map = {
+	.cpuid		= "test",
+	.version	= "1",
+	.type		= "core",
+	.table		= pme_test,
+};
+
+struct value {
+	const char	*event;
+	u64		 val;
+};
+
+static u64 find_value(const char *name, struct value *values)
+{
+	struct value *v = values;
+
+	while (v->event) {
+		if (!strcmp(name, v->event))
+			return v->val;
+		v++;
+	};
+	return 0;
+}
+
+static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
+			      struct value *vals)
+{
+	struct evsel *evsel;
+	u64 count;
+
+	evlist__for_each_entry(evlist, evsel) {
+		count = find_value(evsel->name, vals);
+		perf_stat__update_shadow_stats(evsel, count, 0, st);
+	}
+}
+
+static double compute_single(struct rblist *metric_events, struct evlist *evlist,
+			     struct runtime_stat *st, const char *name)
+{
+	struct metric_expr *mexp;
+	struct metric_event *me;
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		me = metricgroup__lookup(metric_events, evsel, false);
+		if (me != NULL) {
+			list_for_each_entry (mexp, &me->head, nd) {
+				if (strcmp(mexp->metric_name, name))
+					continue;
+				return test_generic_metric(mexp, 0, st);
+			}
+		}
+	}
+	return 0.;
+}
+
+static int __compute_metric(const char *name, struct value *vals,
+			    const char *name1, double *ratio1,
+			    const char *name2, double *ratio2)
+{
+	struct rblist metric_events = {
+		.nr_entries = 0,
+	};
+	struct perf_cpu_map *cpus;
+	struct runtime_stat st;
+	struct evlist *evlist;
+	int err;
+
+	/*
+	 * We need to prepare evlist for stat mode running on CPU 0
+	 * because that's where all the stats are going to be created.
+	 */
+	evlist = evlist__new();
+	if (!evlist)
+		return -ENOMEM;
+
+	cpus = perf_cpu_map__new("0");
+	if (!cpus) {
+		evlist__delete(evlist);
+		return -ENOMEM;
+	}
+
+	perf_evlist__set_maps(&evlist->core, cpus, NULL);
+
+	/* Parse the metric into metric_events list. */
+	err = metricgroup__parse_groups_test(evlist, &map, name,
+					     false, false,
+					     &metric_events);
+	if (err)
+		goto out;
+
+	err = perf_evlist__alloc_stats(evlist, false);
+	if (err)
+		goto out;
+
+	/* Load the runtime stats with given numbers for events. */
+	runtime_stat__init(&st);
+	load_runtime_stat(&st, evlist, vals);
+
+	/* And execute the metric */
+	if (name1 && ratio1)
+		*ratio1 = compute_single(&metric_events, evlist, &st, name1);
+	if (name2 && ratio2)
+		*ratio2 = compute_single(&metric_events, evlist, &st, name2);
+
+out:
+	/* ... clenup. */
+	metricgroup__rblist_exit(&metric_events);
+	runtime_stat__exit(&st);
+	perf_evlist__free_stats(evlist);
+	perf_cpu_map__put(cpus);
+	evlist__delete(evlist);
+	return err;
+}
+
+static int compute_metric(const char *name, struct value *vals, double *ratio)
+{
+	return __compute_metric(name, vals, name, ratio, NULL, NULL);
+}
+
+static int compute_metric_group(const char *name, struct value *vals,
+				const char *name1, double *ratio1,
+				const char *name2, double *ratio2)
+{
+	return __compute_metric(name, vals, name1, ratio1, name2, ratio2);
+}
+
+static int test_ipc(void)
+{
+	double ratio;
+	struct value vals[] = {
+		{ .event = "inst_retired.any",        .val = 300 },
+		{ .event = "cpu_clk_unhalted.thread", .val = 200 },
+		{ .event = NULL, },
+	};
+
+	TEST_ASSERT_VAL("failed to compute metric",
+			compute_metric("IPC", vals, &ratio) == 0);
+
+	TEST_ASSERT_VAL("IPC failed, wrong ratio",
+			ratio == 1.5);
+	return 0;
+}
+
+static int test_frontend(void)
+{
+	double ratio;
+	struct value vals[] = {
+		{ .event = "idq_uops_not_delivered.core",        .val = 300 },
+		{ .event = "cpu_clk_unhalted.thread",            .val = 200 },
+		{ .event = "cpu_clk_unhalted.one_thread_active", .val = 400 },
+		{ .event = "cpu_clk_unhalted.ref_xclk",          .val = 600 },
+		{ .event = NULL, },
+	};
+
+	TEST_ASSERT_VAL("failed to compute metric",
+			compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0);
+
+	TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio",
+			ratio == 0.45);
+	return 0;
+}
+
+static int test_cache_miss_cycles(void)
+{
+	double ratio;
+	struct value vals[] = {
+		{ .event = "l1d-loads-misses",  .val = 300 },
+		{ .event = "l1i-loads-misses",  .val = 200 },
+		{ .event = "inst_retired.any",  .val = 400 },
+		{ .event = NULL, },
+	};
+
+	TEST_ASSERT_VAL("failed to compute metric",
+			compute_metric("cache_miss_cycles", vals, &ratio) == 0);
+
+	TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio",
+			ratio == 1.25);
+	return 0;
+}
+
+
+/*
+ * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi
+ * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) +
+ *                      l2_rqsts.pf_miss + l2_rqsts.rfo_miss
+ * DCache_L2_All      = dcache_l2_all_hits + dcache_l2_all_miss
+ * DCache_L2_Hits     = d_ratio(dcache_l2_all_hits, dcache_l2_all)
+ * DCache_L2_Misses   = d_ratio(dcache_l2_all_miss, dcache_l2_all)
+ *
+ * l2_rqsts.demand_data_rd_hit = 100
+ * l2_rqsts.pf_hit             = 200
+ * l2_rqsts.rfo_hi             = 300
+ * l2_rqsts.all_demand_data_rd = 400
+ * l2_rqsts.pf_miss            = 500
+ * l2_rqsts.rfo_miss           = 600
+ *
+ * DCache_L2_All_Hits = 600
+ * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400
+ * DCache_L2_All      = 600 + 1400  = 2000
+ * DCache_L2_Hits     = 600 / 2000  = 0.3
+ * DCache_L2_Misses   = 1400 / 2000 = 0.7
+ */
+static int test_dcache_l2(void)
+{
+	double ratio;
+	struct value vals[] = {
+		{ .event = "l2_rqsts.demand_data_rd_hit", .val = 100 },
+		{ .event = "l2_rqsts.pf_hit",             .val = 200 },
+		{ .event = "l2_rqsts.rfo_hit",            .val = 300 },
+		{ .event = "l2_rqsts.all_demand_data_rd", .val = 400 },
+		{ .event = "l2_rqsts.pf_miss",            .val = 500 },
+		{ .event = "l2_rqsts.rfo_miss",           .val = 600 },
+		{ .event = NULL, },
+	};
+
+	TEST_ASSERT_VAL("failed to compute metric",
+			compute_metric("DCache_L2_Hits", vals, &ratio) == 0);
+
+	TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio",
+			ratio == 0.3);
+
+	TEST_ASSERT_VAL("failed to compute metric",
+			compute_metric("DCache_L2_Misses", vals, &ratio) == 0);
+
+	TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio",
+			ratio == 0.7);
+	return 0;
+}
+
+static int test_recursion_fail(void)
+{
+	double ratio;
+	struct value vals[] = {
+		{ .event = "inst_retired.any",        .val = 300 },
+		{ .event = "cpu_clk_unhalted.thread", .val = 200 },
+		{ .event = NULL, },
+	};
+
+	TEST_ASSERT_VAL("failed to find recursion",
+			compute_metric("M1", vals, &ratio) == -1);
+
+	TEST_ASSERT_VAL("failed to find recursion",
+			compute_metric("M3", vals, &ratio) == -1);
+	return 0;
+}
+
+static int test_metric_group(void)
+{
+	double ratio1, ratio2;
+	struct value vals[] = {
+		{ .event = "cpu_clk_unhalted.thread", .val = 200 },
+		{ .event = "l1d-loads-misses",        .val = 300 },
+		{ .event = "l1i-loads-misses",        .val = 200 },
+		{ .event = "inst_retired.any",        .val = 400 },
+		{ .event = NULL, },
+	};
+
+	TEST_ASSERT_VAL("failed to find recursion",
+			compute_metric_group("group1", vals,
+					     "IPC", &ratio1,
+					     "cache_miss_cycles", &ratio2) == 0);
+
+	TEST_ASSERT_VAL("group IPC failed, wrong ratio",
+			ratio1 == 2.0);
+
+	TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio",
+			ratio2 == 1.25);
+	return 0;
+}
+
+int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
+	TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
+	TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
+	TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
+	TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
+	TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
+	return 0;
+}
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 83adfd846ccd..67d3f5aad016 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -185,14 +185,14 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 				err = perf_evlist__parse_sample(evlist, event, &sample);
 				if (err < 0) {
 					if (verbose > 0)
-						perf_event__fprintf(event, stderr);
+						perf_event__fprintf(event, NULL, stderr);
 					pr_debug("Couldn't parse sample\n");
 					goto out_delete_evlist;
 				}
 
 				if (verbose > 0) {
 					pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
-					perf_event__fprintf(event, stderr);
+					perf_event__fprintf(event, NULL, stderr);
 				}
 
 				if (prev_time > sample.time) {
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index ab64b4a4e284..d3517a74d95e 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -274,6 +274,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
 	int res = 0;
 	bool use_uncore_table;
 	struct pmu_events_map *map = __test_pmu_get_events_map();
+	struct perf_pmu_alias *a, *tmp;
 
 	if (!map)
 		return -1;
@@ -347,6 +348,10 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
 			  pmu_name, alias->name);
 	}
 
+	list_for_each_entry_safe(a, tmp, &aliases, list) {
+		list_del(&a->list);
+		perf_pmu_free_alias(a);
+	}
 	free(pmu);
 	return res;
 }
@@ -390,9 +395,9 @@ static bool is_number(const char *str)
 	return errno == 0 && end_ptr != str;
 }
 
-static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
+static int check_parse_id(const char *id, struct parse_events_error *error,
+			  struct perf_pmu *fake_pmu)
 {
-	struct parse_events_error error;
 	struct evlist *evlist;
 	int ret;
 
@@ -401,8 +406,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
 		return 0;
 
 	evlist = evlist__new();
-	memset(&error, 0, sizeof(error));
-	ret = parse_events(evlist, id, &error);
+	if (!evlist)
+		return -ENOMEM;
+	ret = __parse_events(evlist, id, error, fake_pmu);
+	evlist__delete(evlist);
+	return ret;
+}
+
+static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe)
+{
+	struct parse_events_error error = { .idx = 0, };
+
+	int ret = check_parse_id(id, &error, NULL);
 	if (ret && same_cpu) {
 		pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n",
 			pe->metric_name, id, pe->metric_expr);
@@ -413,7 +428,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
 			  id, pe->metric_name, pe->metric_expr);
 		ret = 0;
 	}
-	evlist__delete(evlist);
+	free(error.str);
+	free(error.help);
+	free(error.first_str);
+	free(error.first_help);
+	return ret;
+}
+
+static int check_parse_fake(const char *id)
+{
+	struct parse_events_error error = { .idx = 0, };
+	int ret = check_parse_id(id, &error, &perf_pmu__fake);
+
 	free(error.str);
 	free(error.help);
 	free(error.first_str);
@@ -471,10 +497,10 @@ static int test_parsing(void)
 			 */
 			k = 1;
 			hashmap__for_each_entry((&ctx.ids), cur, bkt)
-				expr__add_id(&ctx, strdup(cur->key), k++);
+				expr__add_id_val(&ctx, strdup(cur->key), k++);
 
 			hashmap__for_each_entry((&ctx.ids), cur, bkt) {
-				if (check_parse_id(cur->key, map == cpus_map,
+				if (check_parse_cpu(cur->key, map == cpus_map,
 						   pe))
 					ret++;
 			}
@@ -490,6 +516,100 @@ static int test_parsing(void)
 	return ret == 0 ? TEST_OK : TEST_SKIP;
 }
 
+struct test_metric {
+	const char *str;
+};
+
+static struct test_metric metrics[] = {
+	{ "(unc_p_power_state_occupancy.cores_c0 / unc_p_clockticks) * 100." },
+	{ "imx8_ddr0@read\\-cycles@ * 4 * 4", },
+	{ "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@ * 4", },
+	{ "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", },
+	{ "(imx8_ddr0@read\\-cycles@ + imx8_ddr0@write\\-cycles@)", },
+};
+
+static int metric_parse_fake(const char *str)
+{
+	struct expr_parse_ctx ctx;
+	struct hashmap_entry *cur;
+	double result;
+	int ret = -1;
+	size_t bkt;
+	int i;
+
+	pr_debug("parsing '%s'\n", str);
+
+	expr__ctx_init(&ctx);
+	if (expr__find_other(str, NULL, &ctx, 0) < 0) {
+		pr_err("expr__find_other failed\n");
+		return -1;
+	}
+
+	/*
+	 * Add all ids with a made up value. The value may
+	 * trigger divide by zero when subtracted and so try to
+	 * make them unique.
+	 */
+	i = 1;
+	hashmap__for_each_entry((&ctx.ids), cur, bkt)
+		expr__add_id_val(&ctx, strdup(cur->key), i++);
+
+	hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+		if (check_parse_fake(cur->key)) {
+			pr_err("check_parse_fake failed\n");
+			goto out;
+		}
+	}
+
+	if (expr__parse(&result, &ctx, str, 1))
+		pr_err("expr__parse failed\n");
+	else
+		ret = 0;
+
+out:
+	expr__ctx_clear(&ctx);
+	return ret;
+}
+
+/*
+ * Parse all the metrics for current architecture,
+ * or all defined cpus via the 'fake_pmu'
+ * in parse_events.
+ */
+static int test_parsing_fake(void)
+{
+	struct pmu_events_map *map;
+	struct pmu_event *pe;
+	unsigned int i, j;
+	int err = 0;
+
+	for (i = 0; i < ARRAY_SIZE(metrics); i++) {
+		err = metric_parse_fake(metrics[i].str);
+		if (err)
+			return err;
+	}
+
+	i = 0;
+	for (;;) {
+		map = &pmu_events_map[i++];
+		if (!map->table)
+			break;
+		j = 0;
+		for (;;) {
+			pe = &map->table[j++];
+			if (!pe->name && !pe->metric_group && !pe->metric_name)
+				break;
+			if (!pe->metric_expr)
+				continue;
+			err = metric_parse_fake(pe->metric_expr);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
 static const struct {
 	int (*func)(void);
 	const char *desc;
@@ -506,6 +626,10 @@ static const struct {
 		.func = test_parsing,
 		.desc = "Parsing of PMU event table metrics",
 	},
+	{
+		.func = test_parsing_fake,
+		.desc = "Parsing of PMU event table metrics with fake PMUs",
+	},
 };
 
 const char *test__pmu_events_subtest_get_desc(int subtest)
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 5c11fe2b3040..714e6830a758 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -173,6 +173,7 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
 		ret = 0;
 	} while (0);
 
+	perf_pmu__del_formats(&formats);
 	test_format_dir_put(format);
 	return ret;
 }
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 54030c18bfc2..bf9e729b3ecf 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -20,13 +20,13 @@ file=$(mktemp /tmp/temporary_file.XXXXX)
 
 record_open_file() {
 	echo "Recording open file:"
-	perf record -o ${perfdata} -e probe:vfs_getname touch $file
+	perf record -o ${perfdata} -e probe:vfs_getname\* touch $file
 }
 
 perf_script_filenames() {
 	echo "Looking at perf.data file for vfs_getname records for the file we touched:"
 	perf script -i ${perfdata} | \
-	egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\""
+	egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname[_0-9]*: +\([[:xdigit:]]+\) +pathname=\"${file}\""
 }
 
 add_probe_vfs_getname || skip_if_no_debuginfo
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 76a4e352eaaf..4447a516c689 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -121,6 +121,7 @@ int test__demangle_java(struct test *test, int subtest);
 int test__pfm(struct test *test, int subtest);
 const char *test__pfm_subtest_get_desc(int subtest);
 int test__pfm_subtest_get_nr(void);
+int test__parse_metric(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
new file mode 100644
index 000000000000..e9cb30d8cbfb
--- /dev/null
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SOCKET_H
+#define _LINUX_SOCKET_H
+
+
+#include <asm/socket.h>			/* arch-dependent defines	*/
+#include <linux/sockios.h>		/* the SIOCxxx I/O controls	*/
+#include <linux/uio.h>			/* iovec support		*/
+#include <linux/types.h>		/* pid_t			*/
+#include <linux/compiler.h>		/* __user			*/
+#include <uapi/linux/socket.h>
+
+struct file;
+struct pid;
+struct cred;
+struct socket;
+
+#define __sockaddr_check_size(size)	\
+	BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
+
+#ifdef CONFIG_PROC_FS
+struct seq_file;
+extern void socket_seq_show(struct seq_file *seq);
+#endif
+
+typedef __kernel_sa_family_t	sa_family_t;
+
+/*
+ *	1003.1g requires sa_family_t and that sa_data is char.
+ */
+
+struct sockaddr {
+	sa_family_t	sa_family;	/* address family, AF_xxx	*/
+	char		sa_data[14];	/* 14 bytes of protocol address	*/
+};
+
+struct linger {
+	int		l_onoff;	/* Linger active		*/
+	int		l_linger;	/* How long to linger for	*/
+};
+
+#define sockaddr_storage __kernel_sockaddr_storage
+
+/*
+ *	As we do 4.4BSD message passing we use a 4.4BSD message passing
+ *	system, not 4.3. Thus msg_accrights(len) are now missing. They
+ *	belong in an obscure libc emulation or the bin.
+ */
+
+struct msghdr {
+	void		*msg_name;	/* ptr to socket address structure */
+	int		msg_namelen;	/* size of socket address structure */
+	struct iov_iter	msg_iter;	/* data */
+
+	/*
+	 * Ancillary data. msg_control_user is the user buffer used for the
+	 * recv* side when msg_control_is_user is set, msg_control is the kernel
+	 * buffer used for all other cases.
+	 */
+	union {
+		void		*msg_control;
+		void __user	*msg_control_user;
+	};
+	bool		msg_control_is_user : 1;
+	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
+	unsigned int	msg_flags;	/* flags on received message */
+	struct kiocb	*msg_iocb;	/* ptr to iocb for async requests */
+};
+
+struct user_msghdr {
+	void		__user *msg_name;	/* ptr to socket address structure */
+	int		msg_namelen;		/* size of socket address structure */
+	struct iovec	__user *msg_iov;	/* scatter/gather array */
+	__kernel_size_t	msg_iovlen;		/* # elements in msg_iov */
+	void		__user *msg_control;	/* ancillary data */
+	__kernel_size_t	msg_controllen;		/* ancillary data buffer length */
+	unsigned int	msg_flags;		/* flags on received message */
+};
+
+/* For recvmmsg/sendmmsg */
+struct mmsghdr {
+	struct user_msghdr  msg_hdr;
+	unsigned int        msg_len;
+};
+
+/*
+ *	POSIX 1003.1g - ancillary data object information
+ *	Ancillary data consits of a sequence of pairs of
+ *	(cmsghdr, cmsg_data[])
+ */
+
+struct cmsghdr {
+	__kernel_size_t	cmsg_len;	/* data byte count, including hdr */
+        int		cmsg_level;	/* originating protocol */
+        int		cmsg_type;	/* protocol-specific type */
+};
+
+/*
+ *	Ancillary data object information MACROS
+ *	Table 5-14 of POSIX 1003.1g
+ */
+
+#define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg))
+#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg))
+
+#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
+
+#define CMSG_DATA(cmsg) \
+	((void *)(cmsg) + sizeof(struct cmsghdr))
+#define CMSG_USER_DATA(cmsg) \
+	((void __user *)(cmsg) + sizeof(struct cmsghdr))
+#define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len))
+#define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len))
+
+#define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \
+				  (struct cmsghdr *)(ctl) : \
+				  (struct cmsghdr *)NULL)
+#define CMSG_FIRSTHDR(msg)	__CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
+#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \
+			     (cmsg)->cmsg_len <= (unsigned long) \
+			     ((mhdr)->msg_controllen - \
+			      ((char *)(cmsg) - (char *)(mhdr)->msg_control)))
+#define for_each_cmsghdr(cmsg, msg) \
+	for (cmsg = CMSG_FIRSTHDR(msg); \
+	     cmsg; \
+	     cmsg = CMSG_NXTHDR(msg, cmsg))
+
+/*
+ *	Get the next cmsg header
+ *
+ *	PLEASE, do not touch this function. If you think, that it is
+ *	incorrect, grep kernel sources and think about consequences
+ *	before trying to improve it.
+ *
+ *	Now it always returns valid, not truncated ancillary object
+ *	HEADER. But caller still MUST check, that cmsg->cmsg_len is
+ *	inside range, given by msg->msg_controllen before using
+ *	ancillary object DATA.				--ANK (980731)
+ */
+
+static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
+					       struct cmsghdr *__cmsg)
+{
+	struct cmsghdr * __ptr;
+
+	__ptr = (struct cmsghdr*)(((unsigned char *) __cmsg) +  CMSG_ALIGN(__cmsg->cmsg_len));
+	if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size)
+		return (struct cmsghdr *)0;
+
+	return __ptr;
+}
+
+static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg)
+{
+	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
+}
+
+static inline size_t msg_data_left(struct msghdr *msg)
+{
+	return iov_iter_count(&msg->msg_iter);
+}
+
+/* "Socket"-level control message types: */
+
+#define	SCM_RIGHTS	0x01		/* rw: access rights (array of int) */
+#define SCM_CREDENTIALS 0x02		/* rw: struct ucred		*/
+#define SCM_SECURITY	0x03		/* rw: security label		*/
+
+struct ucred {
+	__u32	pid;
+	__u32	uid;
+	__u32	gid;
+};
+
+/* Supported address families. */
+#define AF_UNSPEC	0
+#define AF_UNIX		1	/* Unix domain sockets 		*/
+#define AF_LOCAL	1	/* POSIX name for AF_UNIX	*/
+#define AF_INET		2	/* Internet IP Protocol 	*/
+#define AF_AX25		3	/* Amateur Radio AX.25 		*/
+#define AF_IPX		4	/* Novell IPX 			*/
+#define AF_APPLETALK	5	/* AppleTalk DDP 		*/
+#define AF_NETROM	6	/* Amateur Radio NET/ROM 	*/
+#define AF_BRIDGE	7	/* Multiprotocol bridge 	*/
+#define AF_ATMPVC	8	/* ATM PVCs			*/
+#define AF_X25		9	/* Reserved for X.25 project 	*/
+#define AF_INET6	10	/* IP version 6			*/
+#define AF_ROSE		11	/* Amateur Radio X.25 PLP	*/
+#define AF_DECnet	12	/* Reserved for DECnet project	*/
+#define AF_NETBEUI	13	/* Reserved for 802.2LLC project*/
+#define AF_SECURITY	14	/* Security callback pseudo AF */
+#define AF_KEY		15      /* PF_KEY key management API */
+#define AF_NETLINK	16
+#define AF_ROUTE	AF_NETLINK /* Alias to emulate 4.4BSD */
+#define AF_PACKET	17	/* Packet family		*/
+#define AF_ASH		18	/* Ash				*/
+#define AF_ECONET	19	/* Acorn Econet			*/
+#define AF_ATMSVC	20	/* ATM SVCs			*/
+#define AF_RDS		21	/* RDS sockets 			*/
+#define AF_SNA		22	/* Linux SNA Project (nutters!) */
+#define AF_IRDA		23	/* IRDA sockets			*/
+#define AF_PPPOX	24	/* PPPoX sockets		*/
+#define AF_WANPIPE	25	/* Wanpipe API Sockets */
+#define AF_LLC		26	/* Linux LLC			*/
+#define AF_IB		27	/* Native InfiniBand address	*/
+#define AF_MPLS		28	/* MPLS */
+#define AF_CAN		29	/* Controller Area Network      */
+#define AF_TIPC		30	/* TIPC sockets			*/
+#define AF_BLUETOOTH	31	/* Bluetooth sockets 		*/
+#define AF_IUCV		32	/* IUCV sockets			*/
+#define AF_RXRPC	33	/* RxRPC sockets 		*/
+#define AF_ISDN		34	/* mISDN sockets 		*/
+#define AF_PHONET	35	/* Phonet sockets		*/
+#define AF_IEEE802154	36	/* IEEE802154 sockets		*/
+#define AF_CAIF		37	/* CAIF sockets			*/
+#define AF_ALG		38	/* Algorithm sockets		*/
+#define AF_NFC		39	/* NFC sockets			*/
+#define AF_VSOCK	40	/* vSockets			*/
+#define AF_KCM		41	/* Kernel Connection Multiplexor*/
+#define AF_QIPCRTR	42	/* Qualcomm IPC Router          */
+#define AF_SMC		43	/* smc sockets: reserve number for
+				 * PF_SMC protocol family that
+				 * reuses AF_INET address family
+				 */
+#define AF_XDP		44	/* XDP sockets			*/
+
+#define AF_MAX		45	/* For now.. */
+
+/* Protocol families, same as address families. */
+#define PF_UNSPEC	AF_UNSPEC
+#define PF_UNIX		AF_UNIX
+#define PF_LOCAL	AF_LOCAL
+#define PF_INET		AF_INET
+#define PF_AX25		AF_AX25
+#define PF_IPX		AF_IPX
+#define PF_APPLETALK	AF_APPLETALK
+#define	PF_NETROM	AF_NETROM
+#define PF_BRIDGE	AF_BRIDGE
+#define PF_ATMPVC	AF_ATMPVC
+#define PF_X25		AF_X25
+#define PF_INET6	AF_INET6
+#define PF_ROSE		AF_ROSE
+#define PF_DECnet	AF_DECnet
+#define PF_NETBEUI	AF_NETBEUI
+#define PF_SECURITY	AF_SECURITY
+#define PF_KEY		AF_KEY
+#define PF_NETLINK	AF_NETLINK
+#define PF_ROUTE	AF_ROUTE
+#define PF_PACKET	AF_PACKET
+#define PF_ASH		AF_ASH
+#define PF_ECONET	AF_ECONET
+#define PF_ATMSVC	AF_ATMSVC
+#define PF_RDS		AF_RDS
+#define PF_SNA		AF_SNA
+#define PF_IRDA		AF_IRDA
+#define PF_PPPOX	AF_PPPOX
+#define PF_WANPIPE	AF_WANPIPE
+#define PF_LLC		AF_LLC
+#define PF_IB		AF_IB
+#define PF_MPLS		AF_MPLS
+#define PF_CAN		AF_CAN
+#define PF_TIPC		AF_TIPC
+#define PF_BLUETOOTH	AF_BLUETOOTH
+#define PF_IUCV		AF_IUCV
+#define PF_RXRPC	AF_RXRPC
+#define PF_ISDN		AF_ISDN
+#define PF_PHONET	AF_PHONET
+#define PF_IEEE802154	AF_IEEE802154
+#define PF_CAIF		AF_CAIF
+#define PF_ALG		AF_ALG
+#define PF_NFC		AF_NFC
+#define PF_VSOCK	AF_VSOCK
+#define PF_KCM		AF_KCM
+#define PF_QIPCRTR	AF_QIPCRTR
+#define PF_SMC		AF_SMC
+#define PF_XDP		AF_XDP
+#define PF_MAX		AF_MAX
+
+/* Maximum queue length specifiable by listen.  */
+#define SOMAXCONN	4096
+
+/* Flags we can use with send/ and recv.
+   Added those for 1003.1g not all are supported yet
+ */
+
+#define MSG_OOB		1
+#define MSG_PEEK	2
+#define MSG_DONTROUTE	4
+#define MSG_TRYHARD     4       /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC	8
+#define MSG_PROBE	0x10	/* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC	0x20
+#define MSG_DONTWAIT	0x40	/* Nonblocking io		 */
+#define MSG_EOR         0x80	/* End of record */
+#define MSG_WAITALL	0x100	/* Wait for a full request */
+#define MSG_FIN         0x200
+#define MSG_SYN		0x400
+#define MSG_CONFIRM	0x800	/* Confirm path validity */
+#define MSG_RST		0x1000
+#define MSG_ERRQUEUE	0x2000	/* Fetch message from error queue */
+#define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
+#define MSG_MORE	0x8000	/* Sender will send more */
+#define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
+#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
+#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
+#define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
+#define MSG_EOF         MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
+#define MSG_SENDPAGE_DECRYPTED	0x100000 /* sendpage() internal : page may carry
+					  * plain text and require encryption
+					  */
+
+#define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
+#define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
+#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
+					   descriptor received through
+					   SCM_RIGHTS */
+#if defined(CONFIG_COMPAT)
+#define MSG_CMSG_COMPAT	0x80000000	/* This message needs 32 bit fixups */
+#else
+#define MSG_CMSG_COMPAT	0		/* We never have 32 bit fixups */
+#endif
+
+
+/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
+#define SOL_IP		0
+/* #define SOL_ICMP	1	No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */
+#define SOL_TCP		6
+#define SOL_UDP		17
+#define SOL_IPV6	41
+#define SOL_ICMPV6	58
+#define SOL_SCTP	132
+#define SOL_UDPLITE	136     /* UDP-Lite (RFC 3828) */
+#define SOL_RAW		255
+#define SOL_IPX		256
+#define SOL_AX25	257
+#define SOL_ATALK	258
+#define SOL_NETROM	259
+#define SOL_ROSE	260
+#define SOL_DECNET	261
+#define	SOL_X25		262
+#define SOL_PACKET	263
+#define SOL_ATM		264	/* ATM layer (cell level) */
+#define SOL_AAL		265	/* ATM Adaption Layer (packet level) */
+#define SOL_IRDA        266
+#define SOL_NETBEUI	267
+#define SOL_LLC		268
+#define SOL_DCCP	269
+#define SOL_NETLINK	270
+#define SOL_TIPC	271
+#define SOL_RXRPC	272
+#define SOL_PPPOL2TP	273
+#define SOL_BLUETOOTH	274
+#define SOL_PNPIPE	275
+#define SOL_RDS		276
+#define SOL_IUCV	277
+#define SOL_CAIF	278
+#define SOL_ALG		279
+#define SOL_NFC		280
+#define SOL_KCM		281
+#define SOL_TLS		282
+#define SOL_XDP		283
+
+/* IPX options */
+#define IPX_TYPE	1
+
+extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
+extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+
+struct timespec64;
+struct __kernel_timespec;
+struct old_timespec32;
+
+struct scm_timestamping_internal {
+	struct timespec64 ts[3];
+};
+
+extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss);
+extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss);
+
+/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff
+ * forbid_cmsg_compat==false
+ */
+extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg,
+			  unsigned int flags, bool forbid_cmsg_compat);
+extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg,
+			  unsigned int flags, bool forbid_cmsg_compat);
+extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+			  unsigned int vlen, unsigned int flags,
+			  struct __kernel_timespec __user *timeout,
+			  struct old_timespec32 __user *timeout32);
+extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
+			  unsigned int vlen, unsigned int flags,
+			  bool forbid_cmsg_compat);
+extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
+			       unsigned int flags);
+extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
+			       struct user_msghdr __user *umsg,
+			       struct sockaddr __user *uaddr,
+			       unsigned int flags);
+extern int sendmsg_copy_msghdr(struct msghdr *msg,
+			       struct user_msghdr __user *umsg, unsigned flags,
+			       struct iovec **iov);
+extern int recvmsg_copy_msghdr(struct msghdr *msg,
+			       struct user_msghdr __user *umsg, unsigned flags,
+			       struct sockaddr __user **uaddr,
+			       struct iovec **iov);
+extern int __copy_msghdr_from_user(struct msghdr *kmsg,
+				   struct user_msghdr __user *umsg,
+				   struct sockaddr __user **save_addr,
+				   struct iovec __user **uiov, size_t *nsegs);
+
+/* helpers which do the actual work for syscalls */
+extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
+			  unsigned int flags, struct sockaddr __user *addr,
+			  int __user *addr_len);
+extern int __sys_sendto(int fd, void __user *buff, size_t len,
+			unsigned int flags, struct sockaddr __user *addr,
+			int addr_len);
+extern int __sys_accept4_file(struct file *file, unsigned file_flags,
+			struct sockaddr __user *upeer_sockaddr,
+			 int __user *upeer_addrlen, int flags,
+			 unsigned long nofile);
+extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+			 int __user *upeer_addrlen, int flags);
+extern int __sys_socket(int family, int type, int protocol);
+extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
+extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
+			      int addrlen, int file_flags);
+extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
+			 int addrlen);
+extern int __sys_listen(int fd, int backlog);
+extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+			     int __user *usockaddr_len);
+extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+			     int __user *usockaddr_len);
+extern int __sys_socketpair(int family, int type, int protocol,
+			    int __user *usockvec);
+extern int __sys_shutdown(int fd, int how);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
+#endif /* _LINUX_SOCKET_H */
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index e0c13e6a5788..cd110634ab09 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -7,14 +7,7 @@
 #include <sys/un.h>
 #include <arpa/inet.h>
 
-static const char *socket_families[] = {
-	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
-	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
-	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
-	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
-	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
-	"ALG", "NFC", "VSOCK",
-};
+#include "trace/beauty/generated/socket_arrays.c"
 DEFINE_STRARRAY(socket_families, "PF_");
 
 static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)
diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh
new file mode 100755
index 000000000000..3820e5c82293
--- /dev/null
+++ b/tools/perf/trace/beauty/socket.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+# This one uses a copy from the kernel sources headers that is in a
+# place used just for these tools/perf/beauty/ usage, we shouldn't not
+# put it in tools/include/linux otherwise they would be used in the
+# normal compiler building process and would drag needless stuff from the
+# kernel.
+
+# When what these scripts need is already in tools/include/ then use it,
+# otherwise grab and check the copy from the kernel sources just for these
+# string table building scripts.
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/
+
+printf "static const char *socket_families[] = {\n"
+# #define AF_LOCAL	1	/* POSIX name for AF_UNIX	*/
+regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
+
+egrep $regex ${header_dir}/socket.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[%s] = \"%s\",\n" | \
+	egrep -v "\"(UNIX|MAX)\""
+printf "};\n"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 9023267e5643..bd77825fd5a1 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -209,7 +209,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 		ui_browser__mark_fused(browser,
 				       pcnt_width + 3 + notes->widths.addr + width,
 				       from - 1,
-				       to > from ? true : false);
+				       to > from);
 	}
 }
 
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index be9c4c0549bc..a07626f07208 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3629,8 +3629,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
 {
 	int nr_entries = evlist->core.nr_entries;
 
-single_entry:
 	if (perf_evlist__single_entry(evlist)) {
+single_entry: {
 		struct evsel *first = evlist__first(evlist);
 
 		return perf_evsel__hists_browse(first, nr_entries, help,
@@ -3638,6 +3638,7 @@ single_entry:
 						env, warn_lost_event,
 						annotation_opts);
 	}
+	}
 
 	if (symbol_conf.event_group) {
 		struct evsel *pos;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8d18380ecd10..cd5e41960e64 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -117,6 +117,7 @@ endif
 perf-y += parse-branch-options.o
 perf-y += dump-insn.o
 perf-y += parse-regs-options.o
+perf-y += parse-sublevel-options.o
 perf-y += term.o
 perf-y += help-unknown-cmd.o
 perf-y += mem-events.o
@@ -128,6 +129,7 @@ perf-y += expr-bison.o
 perf-y += expr.o
 perf-y += branch.o
 perf-y += mem2node.o
+perf-y += clockid.o
 
 perf-$(CONFIG_LIBBPF) += bpf-loader.o
 perf-$(CONFIG_LIBBPF) += bpf_map.o
@@ -191,36 +193,60 @@ CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
 # avoid compiler warnings in 32-bit mode
 CFLAGS_genelf_debug.o  += -Wno-packed
 
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
 	$(call rule_mkdir)
-	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \
+		--header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $<
 
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
 	$(call rule_mkdir)
-	$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+	$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+		-o $(OUTPUT)util/parse-events-bison.c -p parse_events_
 
-$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
 	$(call rule_mkdir)
-	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \
+		--header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $<
 
-$(OUTPUT)util/expr-bison.c: util/expr.y
+$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y
 	$(call rule_mkdir)
-	$(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
+	$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+		-o $(OUTPUT)util/expr-bison.c -p expr_
 
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c
 	$(call rule_mkdir)
-	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \
+		--header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $<
 
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
+$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
 	$(call rule_mkdir)
-	$(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
-
-CFLAGS_parse-events-flex.o  += -w
-CFLAGS_pmu-flex.o           += -w
-CFLAGS_expr-flex.o          += -w
-CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
-CFLAGS_pmu-bison.o          += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
-CFLAGS_expr-bison.o         += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+	$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+		-o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e  's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
+ifeq ($(FLEX_GE_26),1)
+  flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations
+  CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?)
+  ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1)
+    flex_flags += -Wno-misleading-indentation
+  endif
+else
+  flex_flags := -w
+endif
+CFLAGS_parse-events-flex.o  += $(flex_flags)
+CFLAGS_pmu-flex.o           += $(flex_flags)
+CFLAGS_expr-flex.o          += $(flex_flags)
+
+bison_flags := -DYYENABLE_NLS=0
+BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
+ifeq ($(BISON_GE_35),1)
+  bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum
+else
+  bison_flags += -w
+endif
+CFLAGS_parse-events-bison.o += $(bison_flags)
+CFLAGS_pmu-bison.o          += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+CFLAGS_expr-bison.o         += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
 
 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
 $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 76bfb4a9d94e..0a1fcf787538 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1621,6 +1621,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	char *build_id_filename;
 	char *build_id_path = NULL;
 	char *pos;
+	int len;
 
 	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
 	    !dso__is_kcore(dso))
@@ -1649,10 +1650,16 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
 		dirname(build_id_path);
 
-	if (dso__is_kcore(dso) ||
-	    readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
-	    strstr(linkname, DSO__NAME_KALLSYMS) ||
-	    access(filename, R_OK)) {
+	if (dso__is_kcore(dso))
+		goto fallback;
+
+	len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
+	if (len < 0)
+		goto fallback;
+
+	linkname[len] = '\0';
+	if (strstr(linkname, DSO__NAME_KALLSYMS) ||
+		access(filename, R_OK)) {
 fallback:
 		/*
 		 * If we don't have build-ids or the build-id file isn't in the
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 302a14d0aca9..93e063f22be5 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -182,15 +182,15 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
 			if (payload & BIT(EV_TLB_ACCESS))
 				decoder->record.type |= ARM_SPE_TLB_ACCESS;
 
-			if ((idx == 1 || idx == 2 || idx == 3) &&
+			if ((idx == 2 || idx == 4 || idx == 8) &&
 			    (payload & BIT(EV_LLC_MISS)))
 				decoder->record.type |= ARM_SPE_LLC_MISS;
 
-			if ((idx == 1 || idx == 2 || idx == 3) &&
+			if ((idx == 2 || idx == 4 || idx == 8) &&
 			    (payload & BIT(EV_LLC_ACCESS)))
 				decoder->record.type |= ARM_SPE_LLC_ACCESS;
 
-			if ((idx == 1 || idx == 2 || idx == 3) &&
+			if ((idx == 2 || idx == 4 || idx == 8) &&
 			    (payload & BIT(EV_REMOTE_ACCESS)))
 				decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
 
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 25c639ac4ad4..42a85c86421d 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1349,6 +1349,47 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
 	synth_opts->initial_skip = 0;
 }
 
+static int get_flag(const char **ptr, unsigned int *flags)
+{
+	while (1) {
+		char c = **ptr;
+
+		if (c >= 'a' && c <= 'z') {
+			*flags |= 1 << (c - 'a');
+			++*ptr;
+			return 0;
+		} else if (c == ' ') {
+			++*ptr;
+			continue;
+		} else {
+			return -1;
+		}
+	}
+}
+
+static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags)
+{
+	while (1) {
+		switch (**ptr) {
+		case '+':
+			++*ptr;
+			if (get_flag(ptr, plus_flags))
+				return -1;
+			break;
+		case '-':
+			++*ptr;
+			if (get_flag(ptr, minus_flags))
+				return -1;
+			break;
+		case ' ':
+			++*ptr;
+			break;
+		default:
+			return 0;
+		}
+	}
+}
+
 /*
  * Please check tools/perf/Documentation/perf-script.txt for information
  * about the options parsed here, which is introduced after this cset,
@@ -1436,9 +1477,15 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 			break;
 		case 'e':
 			synth_opts->errors = true;
+			if (get_flags(&p, &synth_opts->error_plus_flags,
+				      &synth_opts->error_minus_flags))
+				goto out_err;
 			break;
 		case 'd':
 			synth_opts->log = true;
+			if (get_flags(&p, &synth_opts->log_plus_flags,
+				      &synth_opts->log_minus_flags))
+				goto out_err;
 			break;
 		case 'c':
 			synth_opts->branches = true;
@@ -1507,6 +1554,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 		case 'a':
 			synth_opts->remote_access = true;
 			break;
+		case 'q':
+			synth_opts->quick += 1;
+			break;
 		case ' ':
 		case ',':
 			break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 142ccf7d34df..951d2d14cf24 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -55,6 +55,11 @@ enum itrace_period_type {
 	PERF_ITRACE_PERIOD_NANOSECS,
 };
 
+#define AUXTRACE_ERR_FLG_OVERFLOW	(1 << ('o' - 'a'))
+#define AUXTRACE_ERR_FLG_DATA_LOST	(1 << ('l' - 'a'))
+
+#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS	(1 << ('a' - 'a'))
+
 /**
  * struct itrace_synth_opts - AUX area tracing synthesis options.
  * @set: indicates whether or not options have been set
@@ -91,6 +96,11 @@ enum itrace_period_type {
  * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
  * @ptime_range: time intervals to trace or NULL
  * @range_num: number of time intervals to trace
+ * @error_plus_flags: flags to affect what errors are reported
+ * @error_minus_flags: flags to affect what errors are reported
+ * @log_plus_flags: flags to affect what is logged
+ * @log_minus_flags: flags to affect what is logged
+ * @quick: quicker (less detailed) decoding
  */
 struct itrace_synth_opts {
 	bool			set;
@@ -124,6 +134,11 @@ struct itrace_synth_opts {
 	unsigned long		*cpu_bitmap;
 	struct perf_time_interval *ptime_range;
 	int			range_num;
+	unsigned int		error_plus_flags;
+	unsigned int		error_minus_flags;
+	unsigned int		log_plus_flags;
+	unsigned int		log_minus_flags;
+	unsigned int		quick;
 };
 
 /**
@@ -604,22 +619,32 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
 				 struct evsel *evsel);
 
 #define ITRACE_HELP \
-"				i:	    		synthesize instructions events\n"		\
+"				i[period]:    		synthesize instructions events\n" \
 "				b:	    		synthesize branches events (branch misses for Arm SPE)\n" \
 "				c:	    		synthesize branches events (calls only)\n"	\
 "				r:	    		synthesize branches events (returns only)\n" \
 "				x:	    		synthesize transactions events\n"		\
 "				w:	    		synthesize ptwrite events\n"		\
 "				p:	    		synthesize power events\n"			\
-"				e:	    		synthesize error events\n"			\
-"				d:	    		create a debug log\n"			\
+"				o:			synthesize other events recorded due to the use\n" \
+"							of aux-output (refer to perf record)\n"	\
+"				e[flags]:		synthesize error events\n" \
+"							each flag must be preceded by + or -\n" \
+"							error flags are: o (overflow)\n" \
+"									 l (data lost)\n" \
+"				d[flags]:		create a debug log\n" \
+"							each flag must be preceded by + or -\n" \
+"							log flags are: a (all perf events)\n" \
 "				f:	    		synthesize first level cache events\n" \
 "				m:	    		synthesize last level cache events\n" \
 "				t:	    		synthesize TLB events\n" \
 "				a:	    		synthesize remote access events\n" \
 "				g[len]:     		synthesize a call chain (use with i or x)\n" \
+"				G[len]:			synthesize a call chain on existing event records\n" \
 "				l[len]:     		synthesize last branch entries (use with i or x)\n" \
+"				L[len]:			synthesize last branch entries on existing event records\n" \
 "				sNUMBER:    		skip initial number of events\n"		\
+"				q:			quicker (less detailed) decoding\n" \
 "				PERIOD[ns|us|ms|i|t]:   specify period to sample stream\n" \
 "				concatenate multiple options. Default is ibxwpe or cewp\n"
 
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index c076fc7fe025..31207b6e2066 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -31,6 +31,10 @@
 #include "probe-file.h"
 #include "strlist.h"
 
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
 
@@ -636,6 +640,21 @@ static char *build_id_cache__find_debug(const char *sbuild_id,
 	if (realname && access(realname, R_OK))
 		zfree(&realname);
 	nsinfo__mountns_exit(&nsc);
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+        if (realname == NULL) {
+                debuginfod_client* c = debuginfod_begin();
+                if (c != NULL) {
+                        int fd = debuginfod_find_debuginfo(c,
+                                                           (const unsigned char*)sbuild_id, 0,
+                                                           &realname);
+                        if (fd >= 0)
+                                close(fd); /* retaining reference by realname */
+                        debuginfod_end(c);
+                }
+        }
+#endif
+
 out:
 	free(debugfile);
 	return realname;
diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c
new file mode 100644
index 000000000000..74365a5d99c1
--- /dev/null
+++ b/tools/perf/util/clockid.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <time.h>
+#include <strings.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "clockid.h"
+#include "record.h"
+
+struct clockid_map {
+	const char *name;
+	int clockid;
+};
+
+#define CLOCKID_MAP(n, c)	\
+	{ .name = n, .clockid = (c), }
+
+#define CLOCKID_END	{ .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+	/* available for all events, NMI safe */
+	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+	/* available for some events */
+	CLOCKID_MAP("realtime", CLOCK_REALTIME),
+	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+	CLOCKID_MAP("tai", CLOCK_TAI),
+
+	/* available for the lazy */
+	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+	CLOCKID_MAP("real", CLOCK_REALTIME),
+	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+	CLOCKID_END,
+};
+
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+	struct timespec res;
+
+	*res_ns = 0;
+	if (!clock_getres(clk_id, &res))
+		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+	else
+		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+	return 0;
+}
+
+int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+	struct record_opts *opts = (struct record_opts *)opt->value;
+	const struct clockid_map *cm;
+	const char *ostr = str;
+
+	if (unset) {
+		opts->use_clockid = 0;
+		return 0;
+	}
+
+	/* no arg passed */
+	if (!str)
+		return 0;
+
+	/* no setting it twice */
+	if (opts->use_clockid)
+		return -1;
+
+	opts->use_clockid = true;
+
+	/* if its a number, we're done */
+	if (sscanf(str, "%d", &opts->clockid) == 1)
+		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
+
+	/* allow a "CLOCK_" prefix to the name */
+	if (!strncasecmp(str, "CLOCK_", 6))
+		str += 6;
+
+	for (cm = clockids; cm->name; cm++) {
+		if (!strcasecmp(str, cm->name)) {
+			opts->clockid = cm->clockid;
+			return get_clockid_res(opts->clockid,
+					       &opts->clockid_res_ns);
+		}
+	}
+
+	opts->use_clockid = false;
+	ui__warning("unknown clockid %s, check man page\n", ostr);
+	return -1;
+}
+
+const char *clockid_name(clockid_t clk_id)
+{
+	const struct clockid_map *cm;
+
+	for (cm = clockids; cm->name; cm++) {
+		if (cm->clockid == clk_id)
+			return cm->name;
+	}
+	return "(not found)";
+}
diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h
new file mode 100644
index 000000000000..9b49b4711c76
--- /dev/null
+++ b/tools/perf/util/clockid.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PERF_CLOCKID_H
+#define __PERF_CLOCKID_H
+
+struct option;
+int parse_clockid(const struct option *opt, const char *str, int unset);
+
+const char *clockid_name(clockid_t clk_id);
+
+#endif
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index c283223fb31f..a2a369e2fbb6 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -1344,8 +1344,15 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
 	}
 
-	if (etm->synth_opts.last_branch)
+	if (etm->synth_opts.last_branch) {
 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+		/*
+		 * We don't use the hardware index, but the sample generation
+		 * code uses the new format branch_stack with this field,
+		 * so the event attributes must indicate that it's present.
+		 */
+		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+	}
 
 	if (etm->synth_opts.instructions) {
 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5f36fc6a5578..27c5fef9ad54 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -31,6 +31,9 @@
 #include "config.h"
 #include <linux/ctype.h>
 #include <linux/err.h>
+#include <linux/time64.h>
+#include "util.h"
+#include "clockid.h"
 
 #define pr_N(n, fmt, ...) \
 	eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
@@ -1381,11 +1384,26 @@ do {									\
 	return 0;
 }
 
-static int ctf_writer__setup_clock(struct ctf_writer *cw)
+static int ctf_writer__setup_clock(struct ctf_writer *cw,
+				   struct perf_session *session,
+				   bool tod)
 {
 	struct bt_ctf_clock *clock = cw->clock;
+	const char *desc = "perf clock";
+	int64_t offset = 0;
 
-	bt_ctf_clock_set_description(clock, "perf clock");
+	if (tod) {
+		struct perf_env *env = &session->header.env;
+
+		if (!env->clock.enabled) {
+			pr_err("Can't provide --tod time, missing clock data. "
+			       "Please record with -k/--clockid option.\n");
+			return -1;
+		}
+
+		desc   = clockid_name(env->clock.clockid);
+		offset = env->clock.tod_ns - env->clock.clockid_ns;
+	}
 
 #define SET(__n, __v)				\
 do {						\
@@ -1394,8 +1412,8 @@ do {						\
 } while (0)
 
 	SET(frequency,   1000000000);
-	SET(offset_s,    0);
-	SET(offset,      0);
+	SET(offset,      offset);
+	SET(description, desc);
 	SET(precision,   10);
 	SET(is_absolute, 0);
 
@@ -1481,7 +1499,8 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
 	memset(cw, 0, sizeof(*cw));
 }
 
-static int ctf_writer__init(struct ctf_writer *cw, const char *path)
+static int ctf_writer__init(struct ctf_writer *cw, const char *path,
+			    struct perf_session *session, bool tod)
 {
 	struct bt_ctf_writer		*writer;
 	struct bt_ctf_stream_class	*stream_class;
@@ -1505,7 +1524,7 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
 
 	cw->clock = clock;
 
-	if (ctf_writer__setup_clock(cw)) {
+	if (ctf_writer__setup_clock(cw, session, tod)) {
 		pr("Failed to setup CTF clock.\n");
 		goto err_cleanup;
 	}
@@ -1613,17 +1632,15 @@ int bt_convert__perf2ctf(const char *input, const char *path,
 	if (err)
 		return err;
 
-	/* CTF writer */
-	if (ctf_writer__init(cw, path))
-		return -1;
-
 	err = -1;
 	/* perf.data session */
 	session = perf_session__new(&data, 0, &c.tool);
-	if (IS_ERR(session)) {
-		err = PTR_ERR(session);
-		goto free_writer;
-	}
+	if (IS_ERR(session))
+		return PTR_ERR(session);
+
+	/* CTF writer */
+	if (ctf_writer__init(cw, path, session, opts->tod))
+		goto free_session;
 
 	if (c.queue_size) {
 		ordered_events__set_alloc_size(&session->ordered_events,
@@ -1632,17 +1649,17 @@ int bt_convert__perf2ctf(const char *input, const char *path,
 
 	/* CTF writer env/clock setup  */
 	if (ctf_writer__setup_env(cw, session))
-		goto free_session;
+		goto free_writer;
 
 	/* CTF events setup */
 	if (setup_events(cw, session))
-		goto free_session;
+		goto free_writer;
 
 	if (opts->all && setup_non_sample_events(cw, session))
-		goto free_session;
+		goto free_writer;
 
 	if (setup_streams(cw, session))
-		goto free_session;
+		goto free_writer;
 
 	err = perf_session__process_events(session);
 	if (!err)
@@ -1670,10 +1687,10 @@ int bt_convert__perf2ctf(const char *input, const char *path,
 
 	return err;
 
-free_session:
-	perf_session__delete(session);
 free_writer:
 	ctf_writer__cleanup(cw);
+free_session:
+	perf_session__delete(session);
 	pr_err("Error during conversion setup.\n");
 	return err;
 }
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index af90b6076c06..feab5f114e37 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -5,6 +5,7 @@
 struct perf_data_convert_opts {
 	bool force;
 	bool all;
+	bool tod;
 };
 
 #endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index adb656745ecc..5cda5565777a 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -20,6 +20,7 @@
 #include "target.h"
 #include "ui/helpline.h"
 #include "ui/ui.h"
+#include "util/parse-sublevel-options.h"
 
 #include <linux/ctype.h>
 
@@ -173,65 +174,37 @@ void trace_event(union perf_event *event)
 		     trace_event_printer, event);
 }
 
-static struct debug_variable {
-	const char *name;
-	int *ptr;
-} debug_variables[] = {
-	{ .name = "verbose",		.ptr = &verbose },
-	{ .name = "ordered-events",	.ptr = &debug_ordered_events},
-	{ .name = "stderr",		.ptr = &redirect_to_stderr},
-	{ .name = "data-convert",	.ptr = &debug_data_convert },
-	{ .name = "perf-event-open",	.ptr = &debug_peo_args },
+static struct sublevel_option debug_opts[] = {
+	{ .name = "verbose",		.value_ptr = &verbose },
+	{ .name = "ordered-events",	.value_ptr = &debug_ordered_events},
+	{ .name = "stderr",		.value_ptr = &redirect_to_stderr},
+	{ .name = "data-convert",	.value_ptr = &debug_data_convert },
+	{ .name = "perf-event-open",	.value_ptr = &debug_peo_args },
 	{ .name = NULL, }
 };
 
 int perf_debug_option(const char *str)
 {
-	struct debug_variable *var = &debug_variables[0];
-	char *vstr, *s = strdup(str);
-	int v = 1;
-
-	vstr = strchr(s, '=');
-	if (vstr)
-		*vstr++ = 0;
-
-	while (var->name) {
-		if (!strcmp(s, var->name))
-			break;
-		var++;
-	}
-
-	if (!var->name) {
-		pr_err("Unknown debug variable name '%s'\n", s);
-		free(s);
-		return -1;
-	}
+	int ret;
 
-	if (vstr) {
-		v = atoi(vstr);
-		/*
-		 * Allow only values in range (0, 10),
-		 * otherwise set 0.
-		 */
-		v = (v < 0) || (v > 10) ? 0 : v;
-	}
+	ret = perf_parse_sublevel_options(str, debug_opts);
+	if (ret)
+		return ret;
 
-	if (quiet)
-		v = -1;
+	/* Allow only verbose value in range (0, 10), otherwise set 0. */
+	verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
 
-	*var->ptr = v;
-	free(s);
 	return 0;
 }
 
 int perf_quiet_option(void)
 {
-	struct debug_variable *var = &debug_variables[0];
+	struct sublevel_option *opt = &debug_opts[0];
 
 	/* disable all debug messages */
-	while (var->name) {
-		*var->ptr = -1;
-		var++;
+	while (opt->name) {
+		*opt->value_ptr = -1;
+		opt++;
 	}
 
 	return 0;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 99f0a39c3c59..5a3b4755f0b3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -208,6 +208,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
 	case DSO_BINARY_TYPE__JAVA_JIT:
 	case DSO_BINARY_TYPE__BPF_PROG_INFO:
 	case DSO_BINARY_TYPE__BPF_IMAGE:
+	case DSO_BINARY_TYPE__OOL:
 	case DSO_BINARY_TYPE__NOT_FOUND:
 		ret = -1;
 		break;
@@ -898,6 +899,8 @@ static struct dso_cache *dso_cache__populate(struct dso *dso,
 
 	if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
 		*ret = bpf_read(dso, cache_offset, cache->data);
+	else if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+		*ret = DSO__DATA_CACHE_SIZE;
 	else
 		*ret = file_read(dso, machine, cache_offset, cache->data);
 
@@ -1262,7 +1265,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
 		dso->has_build_id = 0;
 		dso->has_srcline = 1;
 		dso->a2l_fails = 1;
-		dso->kernel = DSO_TYPE_USER;
+		dso->kernel = DSO_SPACE__USER;
 		dso->needs_swap = DSO_SWAP__UNSET;
 		dso->comp = COMP_ID__NONE;
 		RB_CLEAR_NODE(&dso->rb_node);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index d3d03274b0d1..8ad17f395a19 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -42,13 +42,14 @@ enum dso_binary_type {
 	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
 	DSO_BINARY_TYPE__BPF_PROG_INFO,
 	DSO_BINARY_TYPE__BPF_IMAGE,
+	DSO_BINARY_TYPE__OOL,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
-enum dso_kernel_type {
-	DSO_TYPE_USER = 0,
-	DSO_TYPE_KERNEL,
-	DSO_TYPE_GUEST_KERNEL
+enum dso_space_type {
+	DSO_SPACE__USER = 0,
+	DSO_SPACE__KERNEL,
+	DSO_SPACE__KERNEL_GUEST
 };
 
 enum dso_swap_type {
@@ -159,7 +160,7 @@ struct dso {
 	void		 *a2l;
 	char		 *symsrc_filename;
 	unsigned int	 a2l_fails;
-	enum dso_kernel_type	kernel;
+	enum dso_space_type	kernel;
 	enum dso_swap_type	needs_swap;
 	enum dso_binary_type	symtab_type;
 	enum dso_binary_type	binary_type;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1ab2682d5d2b..a12972652006 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -77,7 +77,6 @@ struct perf_env {
 	struct numa_node	*numa_nodes;
 	struct memory_node	*memory_nodes;
 	unsigned long long	 memory_bsize;
-	u64                     clockid_res_ns;
 
 	/*
 	 * bpf_info_lock protects bpf rbtrees. This is needed because the
@@ -100,6 +99,19 @@ struct perf_env {
 	/* For fast cpu to numa node lookup via perf_env__numa_node */
 	int			*numa_map;
 	int			 nr_numa_map;
+
+	/* For real clock time reference. */
+	struct {
+		u64	tod_ns;
+		u64	clockid_ns;
+		u64     clockid_res_ns;
+		int	clockid;
+		/*
+		 * enabled is valid for report mode, and is true if above
+		 * values are set, it's set in process_clock_data
+		 */
+		bool	enabled;
+	} clock;
 };
 
 enum perf_compress_type {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f581550a3015..317a26571845 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@
 #include "stat.h"
 #include "session.h"
 #include "bpf-event.h"
+#include "print_binary.h"
 #include "tool.h"
 #include "../perf.h"
 
@@ -55,6 +56,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_KSYMBOL]			= "KSYMBOL",
 	[PERF_RECORD_BPF_EVENT]			= "BPF_EVENT",
 	[PERF_RECORD_CGROUP]			= "CGROUP",
+	[PERF_RECORD_TEXT_POKE]			= "TEXT_POKE",
 	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
 	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
 	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
@@ -267,6 +269,14 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
 	return machine__process_bpf(machine, event, sample);
 }
 
+int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event,
+				  struct perf_sample *sample,
+				  struct machine *machine)
+{
+	return machine__process_text_poke(machine, event, sample);
+}
+
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
 {
 	return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
@@ -413,7 +423,52 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
 		       event->bpf.type, event->bpf.flags, event->bpf.id);
 }
 
-size_t perf_event__fprintf(union perf_event *event, FILE *fp)
+static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
+			     void *extra, FILE *fp)
+{
+	bool old = *(bool *)extra;
+
+	switch ((int)op) {
+	case BINARY_PRINT_LINE_BEGIN:
+		return fprintf(fp, "            %s bytes:", old ? "Old" : "New");
+	case BINARY_PRINT_NUM_DATA:
+		return fprintf(fp, " %02x", val);
+	case BINARY_PRINT_LINE_END:
+		return fprintf(fp, "\n");
+	default:
+		return 0;
+	}
+}
+
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp)
+{
+	struct perf_record_text_poke_event *tp = &event->text_poke;
+	size_t ret;
+	bool old;
+
+	ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr);
+	if (machine) {
+		struct addr_location al;
+
+		al.map = maps__find(&machine->kmaps, tp->addr);
+		if (al.map && map__load(al.map) >= 0) {
+			al.addr = al.map->map_ip(al.map, tp->addr);
+			al.sym = map__find_symbol(al.map, al.addr);
+			if (al.sym)
+				ret += symbol__fprintf_symname_offs(al.sym, &al, fp);
+		}
+	}
+	ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len);
+	old = true;
+	ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer,
+			       &old, fp);
+	old = false;
+	ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16,
+			       text_poke_printer, &old, fp);
+	return ret;
+}
+
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp)
 {
 	size_t ret = fprintf(fp, "PERF_RECORD_%s",
 			     perf_event__name(event->header.type));
@@ -457,6 +512,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 	case PERF_RECORD_BPF_EVENT:
 		ret += perf_event__fprintf_bpf(event, fp);
 		break;
+	case PERF_RECORD_TEXT_POKE:
+		ret += perf_event__fprintf_text_poke(event, machine, fp);
+		break;
 	default:
 		ret += fprintf(fp, "\n");
 	}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6ae01c3c2ffa..b828b99176f4 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -351,6 +351,10 @@ int perf_event__process_bpf(struct perf_tool *tool,
 			    union perf_event *event,
 			    struct perf_sample *sample,
 			    struct machine *machine);
+int perf_event__process_text_poke(struct perf_tool *tool,
+				  union perf_event *event,
+				  struct perf_sample *sample,
+				  struct machine *machine);
 int perf_event__process(struct perf_tool *tool,
 			union perf_event *event,
 			struct perf_sample *sample,
@@ -385,7 +389,8 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
-size_t perf_event__fprintf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
 
 int kallsyms__get_function_start(const char *kallsyms_filename,
 				 const char *symbol_name, u64 *addr);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ab48be4cf258..c0768c61eb43 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -63,6 +63,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
 	perf_evlist__set_maps(&evlist->core, cpus, threads);
 	evlist->workload.pid = -1;
 	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+	evlist->ctl_fd.fd = -1;
+	evlist->ctl_fd.ack = -1;
+	evlist->ctl_fd.pos = -1;
 }
 
 struct evlist *evlist__new(void)
@@ -79,7 +82,7 @@ struct evlist *perf_evlist__new_default(void)
 {
 	struct evlist *evlist = evlist__new();
 
-	if (evlist && perf_evlist__add_default(evlist)) {
+	if (evlist && evlist__add_default(evlist)) {
 		evlist__delete(evlist);
 		evlist = NULL;
 	}
@@ -91,7 +94,7 @@ struct evlist *perf_evlist__new_dummy(void)
 {
 	struct evlist *evlist = evlist__new();
 
-	if (evlist && perf_evlist__add_dummy(evlist)) {
+	if (evlist && evlist__add_dummy(evlist)) {
 		evlist__delete(evlist);
 		evlist = NULL;
 	}
@@ -231,7 +234,7 @@ void perf_evlist__set_leader(struct evlist *evlist)
 	}
 }
 
-int __perf_evlist__add_default(struct evlist *evlist, bool precise)
+int __evlist__add_default(struct evlist *evlist, bool precise)
 {
 	struct evsel *evsel = evsel__new_cycles(precise);
 
@@ -242,7 +245,7 @@ int __perf_evlist__add_default(struct evlist *evlist, bool precise)
 	return 0;
 }
 
-int perf_evlist__add_dummy(struct evlist *evlist)
+int evlist__add_dummy(struct evlist *evlist)
 {
 	struct perf_event_attr attr = {
 		.type	= PERF_TYPE_SOFTWARE,
@@ -258,8 +261,7 @@ int perf_evlist__add_dummy(struct evlist *evlist)
 	return 0;
 }
 
-static int evlist__add_attrs(struct evlist *evlist,
-				  struct perf_event_attr *attrs, size_t nr_attrs)
+static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
 {
 	struct evsel *evsel, *n;
 	LIST_HEAD(head);
@@ -282,8 +284,7 @@ out_delete_partial_list:
 	return -1;
 }
 
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
-				     struct perf_event_attr *attrs, size_t nr_attrs)
+int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
 {
 	size_t i;
 
@@ -322,8 +323,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
 	return NULL;
 }
 
-int perf_evlist__add_newtp(struct evlist *evlist,
-			   const char *sys, const char *name, void *handler)
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
 {
 	struct evsel *evsel = evsel__newtp(sys, name);
 
@@ -500,7 +500,7 @@ int perf_evlist__enable_event_idx(struct evlist *evlist,
 
 int evlist__add_pollfd(struct evlist *evlist, int fd)
 {
-	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
+	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
 }
 
 int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
@@ -540,7 +540,7 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
 	if (sid)
 		return container_of(sid->evsel, struct evsel, core);
 
-	if (!perf_evlist__sample_id_all(evlist))
+	if (!evlist__sample_id_all(evlist))
 		return evlist__first(evlist);
 
 	return NULL;
@@ -946,6 +946,10 @@ int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
 
 	perf_evlist__set_maps(&evlist->core, cpus, threads);
 
+	/* as evlist now has references, put count here */
+	perf_cpu_map__put(cpus);
+	perf_thread_map__put(threads);
+
 	return 0;
 
 out_delete_threads:
@@ -1088,7 +1092,7 @@ int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
 	return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
 }
 
-bool perf_evlist__valid_sample_type(struct evlist *evlist)
+bool evlist__valid_sample_type(struct evlist *evlist)
 {
 	struct evsel *pos;
 
@@ -1107,7 +1111,7 @@ bool perf_evlist__valid_sample_type(struct evlist *evlist)
 	return true;
 }
 
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 __evlist__combined_sample_type(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
@@ -1120,13 +1124,13 @@ u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
 	return evlist->combined_sample_type;
 }
 
-u64 perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 evlist__combined_sample_type(struct evlist *evlist)
 {
 	evlist->combined_sample_type = 0;
-	return __perf_evlist__combined_sample_type(evlist);
+	return __evlist__combined_sample_type(evlist);
 }
 
-u64 perf_evlist__combined_branch_type(struct evlist *evlist)
+u64 evlist__combined_branch_type(struct evlist *evlist)
 {
 	struct evsel *evsel;
 	u64 branch_type = 0;
@@ -1191,7 +1195,7 @@ out:
 	return size;
 }
 
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
+bool evlist__valid_sample_id_all(struct evlist *evlist)
 {
 	struct evsel *first = evlist__first(evlist), *pos = first;
 
@@ -1203,7 +1207,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
 	return true;
 }
 
-bool perf_evlist__sample_id_all(struct evlist *evlist)
+bool evlist__sample_id_all(struct evlist *evlist)
 {
 	struct evsel *first = evlist__first(evlist);
 	return first->core.attr.sample_id_all;
@@ -1273,11 +1277,12 @@ static int perf_evlist__create_syswide_maps(struct evlist *evlist)
 		goto out_put;
 
 	perf_evlist__set_maps(&evlist->core, cpus, threads);
-out:
-	return err;
+
+	perf_thread_map__put(threads);
 out_put:
 	perf_cpu_map__put(cpus);
-	goto out;
+out:
+	return err;
 }
 
 int evlist__open(struct evlist *evlist)
@@ -1464,8 +1469,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
 	return evsel__parse_sample_timestamp(evsel, event, timestamp);
 }
 
-int perf_evlist__strerror_open(struct evlist *evlist,
-			       int err, char *buf, size_t size)
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size)
 {
 	int printed, value;
 	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
@@ -1518,7 +1522,7 @@ out_default:
 	return 0;
 }
 
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
 {
 	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
 	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
@@ -1727,3 +1731,143 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
 	}
 	return leader;
 }
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
+{
+	if (fd == -1) {
+		pr_debug("Control descriptor is not initialized\n");
+		return 0;
+	}
+
+	evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+						     fdarray_flag__nonfilterable);
+	if (evlist->ctl_fd.pos < 0) {
+		evlist->ctl_fd.pos = -1;
+		pr_err("Failed to add ctl fd entry: %m\n");
+		return -1;
+	}
+
+	evlist->ctl_fd.fd = fd;
+	evlist->ctl_fd.ack = ack;
+
+	return 0;
+}
+
+bool evlist__ctlfd_initialized(struct evlist *evlist)
+{
+	return evlist->ctl_fd.pos >= 0;
+}
+
+int evlist__finalize_ctlfd(struct evlist *evlist)
+{
+	struct pollfd *entries = evlist->core.pollfd.entries;
+
+	if (!evlist__ctlfd_initialized(evlist))
+		return 0;
+
+	entries[evlist->ctl_fd.pos].fd = -1;
+	entries[evlist->ctl_fd.pos].events = 0;
+	entries[evlist->ctl_fd.pos].revents = 0;
+
+	evlist->ctl_fd.pos = -1;
+	evlist->ctl_fd.ack = -1;
+	evlist->ctl_fd.fd = -1;
+
+	return 0;
+}
+
+static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
+			      char *cmd_data, size_t data_size)
+{
+	int err;
+	char c;
+	size_t bytes_read = 0;
+
+	memset(cmd_data, 0, data_size);
+	data_size--;
+
+	do {
+		err = read(evlist->ctl_fd.fd, &c, 1);
+		if (err > 0) {
+			if (c == '\n' || c == '\0')
+				break;
+			cmd_data[bytes_read++] = c;
+			if (bytes_read == data_size)
+				break;
+		} else {
+			if (err == -1)
+				pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
+			break;
+		}
+	} while (1);
+
+	pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
+		 bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
+
+	if (err > 0) {
+		if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
+			     (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_ENABLE;
+		} else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
+				    (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_DISABLE;
+		}
+	}
+
+	return err;
+}
+
+static int evlist__ctlfd_ack(struct evlist *evlist)
+{
+	int err;
+
+	if (evlist->ctl_fd.ack == -1)
+		return 0;
+
+	err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG,
+		    sizeof(EVLIST_CTL_CMD_ACK_TAG));
+	if (err == -1)
+		pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack);
+
+	return err;
+}
+
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
+{
+	int err = 0;
+	char cmd_data[EVLIST_CTL_CMD_MAX_LEN];
+	int ctlfd_pos = evlist->ctl_fd.pos;
+	struct pollfd *entries = evlist->core.pollfd.entries;
+
+	if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents)
+		return 0;
+
+	if (entries[ctlfd_pos].revents & POLLIN) {
+		err = evlist__ctlfd_recv(evlist, cmd, cmd_data,
+					 EVLIST_CTL_CMD_MAX_LEN);
+		if (err > 0) {
+			switch (*cmd) {
+			case EVLIST_CTL_CMD_ENABLE:
+				evlist__enable(evlist);
+				break;
+			case EVLIST_CTL_CMD_DISABLE:
+				evlist__disable(evlist);
+				break;
+			case EVLIST_CTL_CMD_ACK:
+			case EVLIST_CTL_CMD_UNSUPPORTED:
+			default:
+				pr_debug("ctlfd: unsupported %d\n", *cmd);
+				break;
+			}
+			if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED))
+				evlist__ctlfd_ack(evlist);
+		}
+	}
+
+	if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR))
+		evlist__finalize_ctlfd(evlist);
+	else
+		entries[ctlfd_pos].revents = 0;
+
+	return err;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a8081dfc19cf..c73f7f7f120b 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -74,6 +74,11 @@ struct evlist {
 		pthread_t		th;
 		volatile int		done;
 	} thread;
+	struct {
+		int	fd;	/* control file descriptor */
+		int	ack;	/* ack file descriptor for control commands */
+		int	pos;	/* index at evlist core object to check signals */
+	} ctl_fd;
 };
 
 struct evsel_str_handler {
@@ -92,20 +97,20 @@ void evlist__delete(struct evlist *evlist);
 void evlist__add(struct evlist *evlist, struct evsel *entry);
 void evlist__remove(struct evlist *evlist, struct evsel *evsel);
 
-int __perf_evlist__add_default(struct evlist *evlist, bool precise);
+int __evlist__add_default(struct evlist *evlist, bool precise);
 
-static inline int perf_evlist__add_default(struct evlist *evlist)
+static inline int evlist__add_default(struct evlist *evlist)
 {
-	return __perf_evlist__add_default(evlist, true);
+	return __evlist__add_default(evlist, true);
 }
 
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
+int __evlist__add_default_attrs(struct evlist *evlist,
 				     struct perf_event_attr *attrs, size_t nr_attrs);
 
-#define perf_evlist__add_default_attrs(evlist, array) \
-	__perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+#define evlist__add_default_attrs(evlist, array) \
+	__evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
 
-int perf_evlist__add_dummy(struct evlist *evlist);
+int evlist__add_dummy(struct evlist *evlist);
 
 int perf_evlist__add_sb_event(struct evlist *evlist,
 			      struct perf_event_attr *attr,
@@ -116,8 +121,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist,
 				 struct target *target);
 void perf_evlist__stop_sb_thread(struct evlist *evlist);
 
-int perf_evlist__add_newtp(struct evlist *evlist,
-			   const char *sys, const char *name, void *handler);
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
 
 int __evlist__set_tracepoints_handlers(struct evlist *evlist,
 				       const struct evsel_str_handler *assocs,
@@ -219,10 +223,10 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 void __perf_evlist__set_leader(struct list_head *list);
 void perf_evlist__set_leader(struct evlist *evlist);
 
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_branch_type(struct evlist *evlist);
-bool perf_evlist__sample_id_all(struct evlist *evlist);
+u64 __evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_branch_type(struct evlist *evlist);
+bool evlist__sample_id_all(struct evlist *evlist);
 u16 perf_evlist__id_hdr_size(struct evlist *evlist);
 
 int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
@@ -232,8 +236,8 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
 					union perf_event *event,
 					u64 *timestamp);
 
-bool perf_evlist__valid_sample_type(struct evlist *evlist);
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist);
+bool evlist__valid_sample_type(struct evlist *evlist);
+bool evlist__valid_sample_id_all(struct evlist *evlist);
 bool perf_evlist__valid_read_format(struct evlist *evlist);
 
 void perf_evlist__splice_list_tail(struct evlist *evlist,
@@ -258,8 +262,8 @@ static inline struct evsel *evlist__last(struct evlist *evlist)
 	return container_of(evsel, struct evsel, core);
 }
 
-int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
 
 bool perf_evlist__can_select_event(struct evlist *evlist, const char *str);
 void perf_evlist__to_front(struct evlist *evlist,
@@ -356,4 +360,25 @@ void perf_evlist__force_leader(struct evlist *evlist);
 struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
 						 struct evsel *evsel,
 						bool close);
+#define EVLIST_CTL_CMD_ENABLE_TAG  "enable"
+#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
+#define EVLIST_CTL_CMD_ACK_TAG     "ack\n"
+
+#define EVLIST_CTL_CMD_MAX_LEN 64
+
+enum evlist_ctl_cmd {
+	EVLIST_CTL_CMD_UNSUPPORTED = 0,
+	EVLIST_CTL_CMD_ENABLE,
+	EVLIST_CTL_CMD_DISABLE,
+	EVLIST_CTL_CMD_ACK
+};
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
+int evlist__finalize_ctlfd(struct evlist *evlist);
+bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
+
+#define EVLIST_ENABLED_MSG "Events enabled\n"
+#define EVLIST_DISABLED_MSG "Events disabled\n"
+
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ef802f6d40c1..459b51e90063 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -976,16 +976,20 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 	 * We default some events to have a default interval. But keep
 	 * it a weak assumption overridable by the user.
 	 */
-	if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
-				     opts->user_interval != ULLONG_MAX)) {
+	if (!attr->sample_period) {
 		if (opts->freq) {
-			evsel__set_sample_bit(evsel, PERIOD);
 			attr->freq		= 1;
 			attr->sample_freq	= opts->freq;
 		} else {
 			attr->sample_period = opts->default_interval;
 		}
 	}
+	/*
+	 * If attr->freq was set (here or earlier), ask for period
+	 * to be sampled.
+	 */
+	if (attr->freq)
+		evsel__set_sample_bit(evsel, PERIOD);
 
 	if (opts->no_samples)
 		attr->sample_freq = 0;
@@ -1014,12 +1018,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 	if (callchain && callchain->enabled && !evsel->no_aux_samples)
 		evsel__config_callchain(evsel, opts, callchain);
 
-	if (opts->sample_intr_regs && !evsel->no_aux_samples) {
+	if (opts->sample_intr_regs && !evsel->no_aux_samples &&
+	    !evsel__is_dummy_event(evsel)) {
 		attr->sample_regs_intr = opts->sample_intr_regs;
 		evsel__set_sample_bit(evsel, REGS_INTR);
 	}
 
-	if (opts->sample_user_regs && !evsel->no_aux_samples) {
+	if (opts->sample_user_regs && !evsel->no_aux_samples &&
+	    !evsel__is_dummy_event(evsel)) {
 		attr->sample_regs_user |= opts->sample_user_regs;
 		evsel__set_sample_bit(evsel, REGS_USER);
 	}
@@ -1064,7 +1070,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 	attr->mmap  = track;
 	attr->mmap2 = track && !perf_missing_features.mmap2;
 	attr->comm  = track;
-	attr->ksymbol = track && !perf_missing_features.ksymbol;
+	/*
+	 * ksymbol is tracked separately with text poke because it needs to be
+	 * system wide and enabled immediately.
+	 */
+	if (!opts->text_poke)
+		attr->ksymbol = track && !perf_missing_features.ksymbol;
 	attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
 
 	if (opts->record_namespaces)
@@ -2495,8 +2506,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 
 		return scnprintf(msg + printed, size - printed,
 		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
-		 "access to performance monitoring and observability operations for users\n"
-		 "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n"
+		 "access to performance monitoring and observability operations for processes\n"
+		 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
+		 "More information can be found at 'Perf events and tool security' document:\n"
+		 "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
 		 "perf_event_paranoid setting is %d:\n"
 		 "  -1: Allow use of (almost) all events by all users\n"
 		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
@@ -2528,6 +2541,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 	 "No such device - did you specify an out-of-range profile CPU?");
 		break;
 	case EOPNOTSUPP:
+		if (evsel->core.attr.aux_output)
+			return scnprintf(msg, size,
+	"%s: PMU Hardware doesn't support 'aux_output' feature",
+					 evsel__name(evsel));
 		if (evsel->core.attr.sample_period != 0)
 			return scnprintf(msg, size,
 	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index f64ab91c432b..53482ef53c41 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -1,10 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdbool.h>
 #include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "metricgroup.h"
+#include "debug.h"
 #include "expr.h"
 #include "expr-bison.h"
 #include "expr-flex.h"
 #include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <ctype.h>
 
 #ifdef PARSER_DEBUG
 extern int expr_debug;
@@ -30,35 +37,144 @@ static bool key_equal(const void *key1, const void *key2,
 }
 
 /* Caller must make sure id is allocated */
-int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
 {
-	double *val_ptr = NULL, *old_val = NULL;
+	struct expr_id_data *data_ptr = NULL, *old_data = NULL;
 	char *old_key = NULL;
 	int ret;
 
-	if (val != 0.0) {
-		val_ptr = malloc(sizeof(double));
-		if (!val_ptr)
-			return -ENOMEM;
-		*val_ptr = val;
+	data_ptr = malloc(sizeof(*data_ptr));
+	if (!data_ptr)
+		return -ENOMEM;
+
+	data_ptr->parent = ctx->parent;
+
+	ret = hashmap__set(&ctx->ids, id, data_ptr,
+			   (const void **)&old_key, (void **)&old_data);
+	if (ret)
+		free(data_ptr);
+	free(old_key);
+	free(old_data);
+	return ret;
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
+{
+	struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+	char *old_key = NULL;
+	int ret;
+
+	data_ptr = malloc(sizeof(*data_ptr));
+	if (!data_ptr)
+		return -ENOMEM;
+	data_ptr->val = val;
+	data_ptr->is_ref = false;
+
+	ret = hashmap__set(&ctx->ids, id, data_ptr,
+			   (const void **)&old_key, (void **)&old_data);
+	if (ret)
+		free(data_ptr);
+	free(old_key);
+	free(old_data);
+	return ret;
+}
+
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
+{
+	struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+	char *old_key = NULL;
+	char *name, *p;
+	int ret;
+
+	data_ptr = zalloc(sizeof(*data_ptr));
+	if (!data_ptr)
+		return -ENOMEM;
+
+	name = strdup(ref->metric_name);
+	if (!name) {
+		free(data_ptr);
+		return -ENOMEM;
 	}
-	ret = hashmap__set(&ctx->ids, name, val_ptr,
-			   (const void **)&old_key, (void **)&old_val);
+
+	/*
+	 * The jevents tool converts all metric expressions
+	 * to lowercase, including metric references, hence
+	 * we need to add lowercase name for metric, so it's
+	 * properly found.
+	 */
+	for (p = name; *p; p++)
+		*p = tolower(*p);
+
+	/*
+	 * Intentionally passing just const char pointers,
+	 * originally from 'struct pmu_event' object.
+	 * We don't need to change them, so there's no
+	 * need to create our own copy.
+	 */
+	data_ptr->ref.metric_name = ref->metric_name;
+	data_ptr->ref.metric_expr = ref->metric_expr;
+	data_ptr->ref.counted = false;
+	data_ptr->is_ref = true;
+
+	ret = hashmap__set(&ctx->ids, name, data_ptr,
+			   (const void **)&old_key, (void **)&old_data);
+	if (ret)
+		free(data_ptr);
+
+	pr_debug2("adding ref metric %s: %s\n",
+		  ref->metric_name, ref->metric_expr);
+
 	free(old_key);
-	free(old_val);
+	free(old_data);
 	return ret;
 }
 
-int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr)
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+		 struct expr_id_data **data)
+{
+	return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1;
+}
+
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+		     struct expr_id_data **datap)
 {
-	double *data;
+	struct expr_id_data *data;
 
-	if (!hashmap__find(&ctx->ids, id, (void **)&data))
+	if (expr__get_id(ctx, id, datap) || !*datap) {
+		pr_debug("%s not found\n", id);
 		return -1;
-	*val_ptr = (data == NULL) ?  0.0 : *data;
+	}
+
+	data = *datap;
+
+	pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n",
+		  data->is_ref, data->ref.counted, data->val, id);
+
+	if (data->is_ref && !data->ref.counted) {
+		data->ref.counted = true;
+		pr_debug("processing metric: %s ENTRY\n", id);
+		if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) {
+			pr_debug("%s failed to count\n", id);
+			return -1;
+		}
+		pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+	}
+
 	return 0;
 }
 
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
+{
+	struct expr_id_data *old_val = NULL;
+	char *old_key = NULL;
+
+	hashmap__delete(&ctx->ids, id,
+			(const void **)&old_key, (void **)&old_val);
+	free(old_key);
+	free(old_val);
+}
+
 void expr__ctx_init(struct expr_parse_ctx *ctx)
 {
 	hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
@@ -88,6 +204,8 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
 	void *scanner;
 	int ret;
 
+	pr_debug2("parsing metric: %s\n", expr);
+
 	ret = expr_lex_init_extra(&scanner_ctx, &scanner);
 	if (ret)
 		return ret;
@@ -116,16 +234,10 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
 int expr__find_other(const char *expr, const char *one,
 		     struct expr_parse_ctx *ctx, int runtime)
 {
-	double *old_val = NULL;
-	char *old_key = NULL;
 	int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
 
-	if (one) {
-		hashmap__delete(&ctx->ids, one,
-				(const void **)&old_key, (void **)&old_val);
-		free(old_key);
-		free(old_val);
-	}
+	if (one)
+		expr__del_id(ctx, one);
 
 	return ret;
 }
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 8a2c1074f90f..fc2b5e824a66 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -11,8 +11,30 @@
 #include "util/hashmap.h"
 //#endif
 
+struct metric_ref;
+
+struct expr_id {
+	char		*id;
+	struct expr_id	*parent;
+};
+
 struct expr_parse_ctx {
-	struct hashmap ids;
+	struct hashmap	 ids;
+	struct expr_id	*parent;
+};
+
+struct expr_id_data {
+	union {
+		double val;
+		struct {
+			const char *metric_name;
+			const char *metric_expr;
+			bool counted;
+		} ref;
+		struct expr_id	*parent;
+	};
+
+	bool is_ref;
 };
 
 struct expr_scanner_ctx {
@@ -22,8 +44,14 @@ struct expr_scanner_ctx {
 
 void expr__ctx_init(struct expr_parse_ctx *ctx);
 void expr__ctx_clear(struct expr_parse_ctx *ctx);
-int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
-int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr);
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+		 struct expr_id_data **data);
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+		     struct expr_id_data **datap);
 int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
 		const char *expr, int runtime);
 int expr__find_other(const char *expr, const char *one,
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index f397bf8b1a48..13e5e3c75f56 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -100,6 +100,7 @@ symbol		({spec}|{sym})+
 		}
 	}
 
+d_ratio		{ return D_RATIO; }
 max		{ return MAX; }
 min		{ return MIN; }
 if		{ return IF; }
@@ -110,6 +111,8 @@ else		{ return ELSE; }
 "|"		{ return '|'; }
 "^"		{ return '^'; }
 "&"		{ return '&'; }
+"<"		{ return '<'; }
+">"		{ return '>'; }
 "-"		{ return '-'; }
 "+"		{ return '+'; }
 "*"		{ return '*'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index bf3e898e3055..d34b370391c6 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -10,6 +10,14 @@
 #include "smt.h"
 #include <string.h>
 
+static double d_ratio(double val0, double val1)
+{
+	if (val1 == 0) {
+		return 0;
+	}
+	return  val0 / val1;
+}
+
 %}
 
 %define api.pure full
@@ -28,11 +36,12 @@
 %token <num> NUMBER
 %token <str> ID
 %destructor { free ($$); } <str>
-%token MIN MAX IF ELSE SMT_ON
+%token MIN MAX IF ELSE SMT_ON D_RATIO
 %left MIN MAX IF
 %left '|'
 %left '^'
 %left '&'
+%left '<' '>'
 %left '-' '+'
 %left '*' '/' '%'
 %left NEG NOT
@@ -60,11 +69,12 @@ all_other: all_other other
 
 other: ID
 {
-	expr__add_id(ctx, $1, 0.0);
+	expr__add_id(ctx, $1);
 }
 |
 MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
-
+|
+'<' | '>' | D_RATIO
 
 all_expr: if_expr			{ *final_val = $1; }
 	;
@@ -75,16 +85,22 @@ if_expr:
 	;
 
 expr:	  NUMBER
-	| ID			{ if (expr__get_id(ctx, $1, &$$)) {
-					pr_debug("%s not found\n", $1);
+	| ID			{
+					struct expr_id_data *data;
+
+					if (expr__resolve_id(ctx, $1, &data)) {
+						free($1);
+						YYABORT;
+					}
+
+					$$ = data->val;
 					free($1);
-					YYABORT;
-				  }
-				  free($1);
 				}
 	| expr '|' expr		{ $$ = (long)$1 | (long)$3; }
 	| expr '&' expr		{ $$ = (long)$1 & (long)$3; }
 	| expr '^' expr		{ $$ = (long)$1 ^ (long)$3; }
+	| expr '<' expr		{ $$ = $1 < $3; }
+	| expr '>' expr		{ $$ = $1 > $3; }
 	| expr '+' expr		{ $$ = $1 + $3; }
 	| expr '-' expr		{ $$ = $1 - $3; }
 	| expr '*' expr		{ $$ = $1 * $3; }
@@ -105,6 +121,7 @@ expr:	  NUMBER
 	| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
 	| MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; }
 	| SMT_ON		 { $$ = smt_on() > 0; }
+	| D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); }
 	;
 
 %%
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 7a67d017d72c..9cf4efdcbbbd 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -46,6 +46,7 @@
 #include "util/util.h" // perf_exe()
 #include "cputopo.h"
 #include "bpf-event.h"
+#include "clockid.h"
 
 #include <linux/ctype.h>
 #include <internal/lib.h>
@@ -891,8 +892,42 @@ static int write_auxtrace(struct feat_fd *ff,
 static int write_clockid(struct feat_fd *ff,
 			 struct evlist *evlist __maybe_unused)
 {
-	return do_write(ff, &ff->ph->env.clockid_res_ns,
-			sizeof(ff->ph->env.clockid_res_ns));
+	return do_write(ff, &ff->ph->env.clock.clockid_res_ns,
+			sizeof(ff->ph->env.clock.clockid_res_ns));
+}
+
+static int write_clock_data(struct feat_fd *ff,
+			    struct evlist *evlist __maybe_unused)
+{
+	u64 *data64;
+	u32 data32;
+	int ret;
+
+	/* version */
+	data32 = 1;
+
+	ret = do_write(ff, &data32, sizeof(data32));
+	if (ret < 0)
+		return ret;
+
+	/* clockid */
+	data32 = ff->ph->env.clock.clockid;
+
+	ret = do_write(ff, &data32, sizeof(data32));
+	if (ret < 0)
+		return ret;
+
+	/* TOD ref time */
+	data64 = &ff->ph->env.clock.tod_ns;
+
+	ret = do_write(ff, data64, sizeof(*data64));
+	if (ret < 0)
+		return ret;
+
+	/* clockid ref time */
+	data64 = &ff->ph->env.clock.clockid_ns;
+
+	return do_write(ff, data64, sizeof(*data64));
 }
 
 static int write_dir_format(struct feat_fd *ff,
@@ -1546,7 +1581,50 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
 static void print_clockid(struct feat_fd *ff, FILE *fp)
 {
 	fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
-		ff->ph->env.clockid_res_ns * 1000);
+		ff->ph->env.clock.clockid_res_ns * 1000);
+}
+
+static void print_clock_data(struct feat_fd *ff, FILE *fp)
+{
+	struct timespec clockid_ns;
+	char tstr[64], date[64];
+	struct timeval tod_ns;
+	clockid_t clockid;
+	struct tm ltime;
+	u64 ref;
+
+	if (!ff->ph->env.clock.enabled) {
+		fprintf(fp, "# reference time disabled\n");
+		return;
+	}
+
+	/* Compute TOD time. */
+	ref = ff->ph->env.clock.tod_ns;
+	tod_ns.tv_sec = ref / NSEC_PER_SEC;
+	ref -= tod_ns.tv_sec * NSEC_PER_SEC;
+	tod_ns.tv_usec = ref / NSEC_PER_USEC;
+
+	/* Compute clockid time. */
+	ref = ff->ph->env.clock.clockid_ns;
+	clockid_ns.tv_sec = ref / NSEC_PER_SEC;
+	ref -= clockid_ns.tv_sec * NSEC_PER_SEC;
+	clockid_ns.tv_nsec = ref;
+
+	clockid = ff->ph->env.clock.clockid;
+
+	if (localtime_r(&tod_ns.tv_sec, &ltime) == NULL)
+		snprintf(tstr, sizeof(tstr), "<error>");
+	else {
+		strftime(date, sizeof(date), "%F %T", &ltime);
+		scnprintf(tstr, sizeof(tstr), "%s.%06d",
+			  date, (int) tod_ns.tv_usec);
+	}
+
+	fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
+	fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
+		    tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
+		    clockid_ns.tv_sec, clockid_ns.tv_nsec,
+		    clockid_name(clockid));
 }
 
 static void print_dir_format(struct feat_fd *ff, FILE *fp)
@@ -1978,7 +2056,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
 	struct machine *machine;
 	u16 cpumode;
 	struct dso *dso;
-	enum dso_kernel_type dso_type;
+	enum dso_space_type dso_space;
 
 	machine = perf_session__findnew_machine(session, bev->pid);
 	if (!machine)
@@ -1988,14 +2066,14 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
 
 	switch (cpumode) {
 	case PERF_RECORD_MISC_KERNEL:
-		dso_type = DSO_TYPE_KERNEL;
+		dso_space = DSO_SPACE__KERNEL;
 		break;
 	case PERF_RECORD_MISC_GUEST_KERNEL:
-		dso_type = DSO_TYPE_GUEST_KERNEL;
+		dso_space = DSO_SPACE__KERNEL_GUEST;
 		break;
 	case PERF_RECORD_MISC_USER:
 	case PERF_RECORD_MISC_GUEST_USER:
-		dso_type = DSO_TYPE_USER;
+		dso_space = DSO_SPACE__USER;
 		break;
 	default:
 		goto out;
@@ -2007,14 +2085,13 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
 
 		dso__set_build_id(dso, &bev->build_id);
 
-		if (dso_type != DSO_TYPE_USER) {
+		if (dso_space != DSO_SPACE__USER) {
 			struct kmod_path m = { .name = NULL, };
 
 			if (!kmod_path__parse_name(&m, filename) && m.kmod)
 				dso__set_module_info(dso, &m, machine);
-			else
-				dso->kernel = dso_type;
 
+			dso->kernel = dso_space;
 			free(m.name);
 		}
 
@@ -2732,9 +2809,43 @@ out:
 static int process_clockid(struct feat_fd *ff,
 			   void *data __maybe_unused)
 {
-	if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+	if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns))
+		return -1;
+
+	return 0;
+}
+
+static int process_clock_data(struct feat_fd *ff,
+			      void *_data __maybe_unused)
+{
+	u32 data32;
+	u64 data64;
+
+	/* version */
+	if (do_read_u32(ff, &data32))
+		return -1;
+
+	if (data32 != 1)
+		return -1;
+
+	/* clockid */
+	if (do_read_u32(ff, &data32))
+		return -1;
+
+	ff->ph->env.clock.clockid = data32;
+
+	/* TOD ref time */
+	if (do_read_u64(ff, &data64))
+		return -1;
+
+	ff->ph->env.clock.tod_ns = data64;
+
+	/* clockid ref time */
+	if (do_read_u64(ff, &data64))
 		return -1;
 
+	ff->ph->env.clock.clockid_ns = data64;
+	ff->ph->env.clock.enabled = true;
 	return 0;
 }
 
@@ -3008,6 +3119,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPR(BPF_BTF,       bpf_btf,        false),
 	FEAT_OPR(COMPRESSED,	compressed,	false),
 	FEAT_OPR(CPU_PMU_CAPS,	cpu_pmu_caps,	false),
+	FEAT_OPR(CLOCK_DATA,	clock_data,	false),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 650bd1c7a99b..2aca71763ecf 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -44,6 +44,7 @@ enum {
 	HEADER_BPF_BTF,
 	HEADER_COMPRESSED,
 	HEADER_CPU_PMU_CAPS,
+	HEADER_CLOCK_DATA,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f8ccfd6be0ee..697513f35154 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -55,6 +55,7 @@ enum intel_pt_pkt_state {
 	INTEL_PT_STATE_TIP_PGD,
 	INTEL_PT_STATE_FUP,
 	INTEL_PT_STATE_FUP_NO_TIP,
+	INTEL_PT_STATE_RESAMPLE,
 };
 
 static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
@@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
 	case INTEL_PT_STATE_ERR_RESYNC:
 	case INTEL_PT_STATE_IN_SYNC:
 	case INTEL_PT_STATE_TNT_CONT:
+	case INTEL_PT_STATE_RESAMPLE:
 		return true;
 	case INTEL_PT_STATE_TNT:
 	case INTEL_PT_STATE_TIP:
@@ -109,6 +111,9 @@ struct intel_pt_decoder {
 	bool fixup_last_mtc;
 	bool have_last_ip;
 	bool in_psb;
+	bool hop;
+	bool hop_psb_fup;
+	bool leap;
 	enum intel_pt_param_flags flags;
 	uint64_t pos;
 	uint64_t last_ip;
@@ -235,6 +240,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	decoder->data               = params->data;
 	decoder->return_compression = params->return_compression;
 	decoder->branch_enable      = params->branch_enable;
+	decoder->hop                = params->quick >= 1;
+	decoder->leap               = params->quick >= 2;
 
 	decoder->flags              = params->flags;
 
@@ -275,6 +282,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
 	intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
 
+	if (decoder->hop)
+		intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
+
 	return decoder;
 }
 
@@ -1164,6 +1174,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 			return 0;
 		if (err == -EAGAIN ||
 		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			if (intel_pt_fup_event(decoder))
 				return 0;
 			return -EAGAIN;
@@ -1729,8 +1740,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
 
 		case INTEL_PT_FUP:
 			decoder->pge = true;
-			if (decoder->packet.count)
+			if (decoder->packet.count) {
 				intel_pt_set_last_ip(decoder);
+				if (decoder->hop) {
+					/* Act on FUP at PSBEND */
+					decoder->ip = decoder->last_ip;
+					decoder->hop_psb_fup = true;
+				}
+			}
 			break;
 
 		case INTEL_PT_MODE_TSX:
@@ -1874,6 +1891,127 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 	}
 }
 
+static int intel_pt_resample(struct intel_pt_decoder *decoder)
+{
+	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	decoder->state.type = INTEL_PT_INSTRUCTION;
+	decoder->state.from_ip = decoder->ip;
+	decoder->state.to_ip = 0;
+	return 0;
+}
+
+#define HOP_PROCESS	0
+#define HOP_IGNORE	1
+#define HOP_RETURN	2
+#define HOP_AGAIN	3
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
+
+/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
+static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
+{
+	/* Leap from PSB to PSB, getting ip from FUP within PSB+ */
+	if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
+		*err = intel_pt_scan_for_psb(decoder);
+		if (*err)
+			return HOP_RETURN;
+	}
+
+	switch (decoder->packet.type) {
+	case INTEL_PT_TNT:
+		return HOP_IGNORE;
+
+	case INTEL_PT_TIP_PGD:
+		if (!decoder->packet.count)
+			return HOP_IGNORE;
+		intel_pt_set_ip(decoder);
+		decoder->state.type |= INTEL_PT_TRACE_END;
+		decoder->state.from_ip = 0;
+		decoder->state.to_ip = decoder->ip;
+		return HOP_RETURN;
+
+	case INTEL_PT_TIP:
+		if (!decoder->packet.count)
+			return HOP_IGNORE;
+		intel_pt_set_ip(decoder);
+		decoder->state.type = INTEL_PT_INSTRUCTION;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		return HOP_RETURN;
+
+	case INTEL_PT_FUP:
+		if (!decoder->packet.count)
+			return HOP_IGNORE;
+		intel_pt_set_ip(decoder);
+		if (intel_pt_fup_event(decoder))
+			return HOP_RETURN;
+		if (!decoder->branch_enable)
+			*no_tip = true;
+		if (*no_tip) {
+			decoder->state.type = INTEL_PT_INSTRUCTION;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return HOP_RETURN;
+		}
+		*err = intel_pt_walk_fup_tip(decoder);
+		if (!*err)
+			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+		return HOP_RETURN;
+
+	case INTEL_PT_PSB:
+		decoder->last_ip = 0;
+		decoder->have_last_ip = true;
+		decoder->hop_psb_fup = false;
+		*err = intel_pt_walk_psbend(decoder);
+		if (*err == -EAGAIN)
+			return HOP_AGAIN;
+		if (*err)
+			return HOP_RETURN;
+		if (decoder->hop_psb_fup) {
+			decoder->hop_psb_fup = false;
+			decoder->state.type = INTEL_PT_INSTRUCTION;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return HOP_RETURN;
+		}
+		if (decoder->cbr != decoder->cbr_seen) {
+			decoder->state.type = 0;
+			return HOP_RETURN;
+		}
+		return HOP_IGNORE;
+
+	case INTEL_PT_BAD:
+	case INTEL_PT_PAD:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TSC:
+	case INTEL_PT_TMA:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_MTC:
+	case INTEL_PT_CYC:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_CBR:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_PIP:
+	case INTEL_PT_OVF:
+	case INTEL_PT_MNT:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_BBP:
+	case INTEL_PT_BIP:
+	case INTEL_PT_BEP:
+	case INTEL_PT_BEP_IP:
+	default:
+		return HOP_PROCESS;
+	}
+}
+
 static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 {
 	bool no_tip = false;
@@ -1884,6 +2022,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 		if (err)
 			return err;
 next:
+		if (decoder->hop) {
+			switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
+			case HOP_IGNORE:
+				continue;
+			case HOP_RETURN:
+				return err;
+			case HOP_AGAIN:
+				goto next;
+			default:
+				break;
+			}
+		}
+
 		switch (decoder->packet.type) {
 		case INTEL_PT_TNT:
 			if (!decoder->packet.count)
@@ -1913,6 +2064,12 @@ next:
 			decoder->state.from_ip = 0;
 			decoder->state.to_ip = decoder->ip;
 			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+			/*
+			 * In hop mode, resample to get the to_ip as an
+			 * "instruction" sample.
+			 */
+			if (decoder->hop)
+				decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
 			return 0;
 		}
 
@@ -1942,17 +2099,13 @@ next:
 			}
 			if (decoder->set_fup_mwait)
 				no_tip = true;
+			if (no_tip)
+				decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+			else
+				decoder->pkt_state = INTEL_PT_STATE_FUP;
 			err = intel_pt_walk_fup(decoder);
-			if (err != -EAGAIN) {
-				if (err)
-					return err;
-				if (no_tip)
-					decoder->pkt_state =
-						INTEL_PT_STATE_FUP_NO_TIP;
-				else
-					decoder->pkt_state = INTEL_PT_STATE_FUP;
-				return 0;
-			}
+			if (err != -EAGAIN)
+				return err;
 			if (no_tip) {
 				no_tip = false;
 				break;
@@ -1980,8 +2133,10 @@ next:
 			 * possibility of another CBR change that gets caught up
 			 * in the PSB+.
 			 */
-			if (decoder->cbr != decoder->cbr_seen)
+			if (decoder->cbr != decoder->cbr_seen) {
+				decoder->state.type = 0;
 				return 0;
+			}
 			break;
 
 		case INTEL_PT_PIP:
@@ -2022,8 +2177,10 @@ next:
 
 		case INTEL_PT_CBR:
 			intel_pt_calc_cbr(decoder);
-			if (decoder->cbr != decoder->cbr_seen)
+			if (decoder->cbr != decoder->cbr_seen) {
+				decoder->state.type = 0;
 				return 0;
+			}
 			break;
 
 		case INTEL_PT_MODE_EXEC:
@@ -2032,7 +2189,7 @@ next:
 
 		case INTEL_PT_MODE_TSX:
 			/* MODE_TSX need not be followed by FUP */
-			if (!decoder->pge) {
+			if (!decoder->pge || decoder->in_psb) {
 				intel_pt_update_in_tx(decoder);
 				break;
 			}
@@ -2423,7 +2580,11 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
 	if (err)
 		return err;
 
-	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	/* In hop mode, resample to get the to_ip as an "instruction" sample */
+	if (decoder->hop)
+		decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+	else
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 	decoder->overflow = false;
 
 	decoder->state.from_ip = 0;
@@ -2531,6 +2692,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 	decoder->ip = 0;
 	intel_pt_clear_stack(&decoder->stack);
 
+leap:
 	err = intel_pt_scan_for_psb(decoder);
 	if (err)
 		return err;
@@ -2544,7 +2706,20 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 
 	if (decoder->ip) {
 		decoder->state.type = 0; /* Do not have a sample */
-		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		/*
+		 * In hop mode, resample to get the PSB FUP ip as an
+		 * "instruction" sample.
+		 */
+		if (decoder->hop)
+			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+		else
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	} else if (decoder->leap) {
+		/*
+		 * In leap mode, only PSB+ is decoded, so keeping leaping to the
+		 * next PSB until there is an ip.
+		 */
+		goto leap;
 	} else {
 		return intel_pt_sync_ip(decoder);
 	}
@@ -2599,19 +2774,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
 			err = intel_pt_walk_tip(decoder);
 			break;
 		case INTEL_PT_STATE_FUP:
-			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			err = intel_pt_walk_fup(decoder);
 			if (err == -EAGAIN)
 				err = intel_pt_walk_fup_tip(decoder);
-			else if (!err)
-				decoder->pkt_state = INTEL_PT_STATE_FUP;
 			break;
 		case INTEL_PT_STATE_FUP_NO_TIP:
-			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			err = intel_pt_walk_fup(decoder);
 			if (err == -EAGAIN)
 				err = intel_pt_walk_trace(decoder);
 			break;
+		case INTEL_PT_STATE_RESAMPLE:
+			err = intel_pt_resample(decoder);
+			break;
 		default:
 			err = intel_pt_bug(decoder);
 			break;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e289e463d635..8645fc265481 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -250,6 +250,7 @@ struct intel_pt_params {
 	uint32_t tsc_ctc_ratio_n;
 	uint32_t tsc_ctc_ratio_d;
 	enum intel_pt_param_flags flags;
+	unsigned int quick;
 };
 
 struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index cb3c1e569a2d..0af4e81c46e2 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -236,7 +236,7 @@ static void intel_pt_log_event(union perf_event *event)
 	if (!intel_pt_enable_logging || !f)
 		return;
 
-	perf_event__fprintf(event, f);
+	perf_event__fprintf(event, NULL, f);
 }
 
 static void intel_pt_dump_sample(struct perf_session *session,
@@ -249,6 +249,24 @@ static void intel_pt_dump_sample(struct perf_session *session,
 	intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
 }
 
+static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
+{
+	struct perf_time_interval *range = pt->synth_opts.ptime_range;
+	int n = pt->synth_opts.range_num;
+
+	if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+		return true;
+
+	if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+		return false;
+
+	/* perf_time__ranges_skip_sample does not work if time is zero */
+	if (!tm)
+		tm = 1;
+
+	return !n || !perf_time__ranges_skip_sample(range, n, tm);
+}
+
 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
 				   struct auxtrace_buffer *b)
 {
@@ -520,6 +538,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
 	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
 }
 
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
+				      u64 offset)
+{
+	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+	if (!c)
+		return;
+
+	auxtrace_cache__remove(dso->auxtrace_cache, offset);
+}
+
 static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
 {
 	return ip >= pt->kernel_start ?
@@ -1001,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	params.mtc_period = intel_pt_mtc_period(pt);
 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
 	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+	params.quick = pt->synth_opts.quick;
 
 	if (pt->filts.cnt > 0)
 		params.pgd_ip = intel_pt_pgd_ip;
@@ -1394,7 +1424,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 
 	sample.id = ptq->pt->instructions_id;
 	sample.stream_id = ptq->pt->instructions_id;
-	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+	if (pt->synth_opts.quick)
+		sample.period = 1;
+	else
+		sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
 
 	sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
 	if (sample.cyc_cnt) {
@@ -1852,6 +1885,15 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
 	char msg[MAX_AUXTRACE_ERROR_MSG];
 	int err;
 
+	if (pt->synth_opts.error_minus_flags) {
+		if (code == INTEL_PT_ERR_OVR &&
+		    pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
+			return 0;
+		if (code == INTEL_PT_ERR_LOST &&
+		    pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
+			return 0;
+	}
+
 	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
 
 	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
@@ -2566,10 +2608,6 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
 		return -EINVAL;
 	}
 
-	intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
-		     cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
-		     &pt->tc));
-
 	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
 	if (ret <= 0)
 		return ret;
@@ -2594,6 +2632,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
 					event->itrace_start.tid);
 }
 
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al)
+{
+	if (!al->map || addr < al->map->start || addr >= al->map->end) {
+		if (!thread__find_map(thread, cpumode, addr, al))
+			return -1;
+	}
+
+	return 0;
+}
+
+/* Invalidate all instruction cache entries that overlap the text poke */
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
+{
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
+	/* Assume text poke begins in a basic block no more than 4096 bytes */
+	int cnt = 4096 + event->text_poke.new_len;
+	struct thread *thread = pt->unknown_thread;
+	struct addr_location al = { .map = NULL };
+	struct machine *machine = pt->machine;
+	struct intel_pt_cache_entry *e;
+	u64 offset;
+
+	if (!event->text_poke.new_len)
+		return 0;
+
+	for (; cnt; cnt--, addr--) {
+		if (intel_pt_find_map(thread, cpumode, addr, &al)) {
+			if (addr < event->text_poke.addr)
+				return 0;
+			continue;
+		}
+
+		if (!al.map->dso || !al.map->dso->auxtrace_cache)
+			continue;
+
+		offset = al.map->map_ip(al.map, addr);
+
+		e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+		if (!e)
+			continue;
+
+		if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
+			/*
+			 * No overlap. Working backwards there cannot be another
+			 * basic block that overlaps the text poke if there is a
+			 * branch instruction before the text poke address.
+			 */
+			if (e->branch != INTEL_PT_BR_NO_BRANCH)
+				return 0;
+		} else {
+			intel_pt_cache_invalidate(al.map->dso, machine, offset);
+			intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
+				     al.map->dso->long_name, addr);
+		}
+	}
+
+	return 0;
+}
+
 static int intel_pt_process_event(struct perf_session *session,
 				  union perf_event *event,
 				  struct perf_sample *sample,
@@ -2662,9 +2761,14 @@ static int intel_pt_process_event(struct perf_session *session,
 		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
 		err = intel_pt_context_switch(pt, event, sample);
 
-	intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
-		     event->header.type, sample->cpu, sample->time, timestamp);
-	intel_pt_log_event(event);
+	if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
+		err = intel_pt_text_poke(pt, event);
+
+	if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
+		intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+			     event->header.type, sample->cpu, sample->time, timestamp);
+		intel_pt_log_event(event);
+	}
 
 	return err;
 }
@@ -2913,8 +3017,15 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 
 	if (pt->synth_opts.callchain)
 		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
-	if (pt->synth_opts.last_branch)
+	if (pt->synth_opts.last_branch) {
 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+		/*
+		 * We don't use the hardware index, but the sample generation
+		 * code uses the new format branch_stack with this field,
+		 * so the event attributes must indicate that it's present.
+		 */
+		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+	}
 
 	if (pt->synth_opts.instructions) {
 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 32bb05e03fb2..0804308ef285 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -26,6 +26,7 @@
 #include "jit.h"
 #include "jitdump.h"
 #include "genelf.h"
+#include "thread.h"
 
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
@@ -749,6 +750,28 @@ jit_detect(char *mmap_name, pid_t pid)
 	return 0;
 }
 
+static void jit_add_pid(struct machine *machine, pid_t pid)
+{
+	struct thread *thread = machine__findnew_thread(machine, pid, pid);
+
+	if (!thread) {
+		pr_err("%s: thread %d not found or created\n", __func__, pid);
+		return;
+	}
+
+	thread->priv = (void *)1;
+}
+
+static bool jit_has_pid(struct machine *machine, pid_t pid)
+{
+	struct thread *thread = machine__find_thread(machine, pid, pid);
+
+	if (!thread)
+		return 0;
+
+	return (bool)thread->priv;
+}
+
 int
 jit_process(struct perf_session *session,
 	    struct perf_data *output,
@@ -764,8 +787,13 @@ jit_process(struct perf_session *session,
 	/*
 	 * first, detect marker mmap (i.e., the jitdump mmap)
 	 */
-	if (jit_detect(filename, pid))
+	if (jit_detect(filename, pid)) {
+		// Strip //anon* mmaps if we processed a jitdump for this pid
+		if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0))
+			return 1;
+
 		return 0;
+	}
 
 	memset(&jd, 0, sizeof(jd));
 
@@ -784,6 +812,7 @@ jit_process(struct perf_session *session,
 
 	ret = jit_inject(&jd, filename);
 	if (!ret) {
+		jit_add_pid(machine, pid);
 		*nbytes = jd.bytes_written;
 		ret = 1;
 	}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d5384807372b..85587de027a5 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -703,7 +703,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
 
 		dso__set_module_info(dso, m, machine);
 		dso__set_long_name(dso, strdup(filename), true);
-		dso->kernel = DSO_TYPE_KERNEL;
+		dso->kernel = DSO_SPACE__KERNEL;
 	}
 
 	dso__get(dso);
@@ -736,12 +736,6 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-static int is_bpf_image(const char *name)
-{
-	return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
-	       strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
-}
-
 static int machine__process_ksymbol_register(struct machine *machine,
 					     union perf_event *event,
 					     struct perf_sample *sample __maybe_unused)
@@ -753,7 +747,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
 		struct dso *dso = dso__new(event->ksymbol.name);
 
 		if (dso) {
-			dso->kernel = DSO_TYPE_KERNEL;
+			dso->kernel = DSO_SPACE__KERNEL;
 			map = map__new2(0, dso);
 		}
 
@@ -762,6 +756,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
 			return -ENOMEM;
 		}
 
+		if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
+			map->dso->binary_type = DSO_BINARY_TYPE__OOL;
+			map->dso->data.file_size = event->ksymbol.len;
+			dso__set_loaded(map->dso);
+		}
+
 		map->start = event->ksymbol.addr;
 		map->end = map->start + event->ksymbol.len;
 		maps__insert(&machine->kmaps, map);
@@ -808,6 +808,47 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
 	return machine__process_ksymbol_register(machine, event, sample);
 }
 
+int machine__process_text_poke(struct machine *machine, union perf_event *event,
+			       struct perf_sample *sample __maybe_unused)
+{
+	struct map *map = maps__find(&machine->kmaps, event->text_poke.addr);
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	if (dump_trace)
+		perf_event__fprintf_text_poke(event, machine, stdout);
+
+	if (!event->text_poke.new_len)
+		return 0;
+
+	if (cpumode != PERF_RECORD_MISC_KERNEL) {
+		pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
+		return 0;
+	}
+
+	if (map && map->dso) {
+		u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len;
+		int ret;
+
+		/*
+		 * Kernel maps might be changed when loading symbols so loading
+		 * must be done prior to using kernel maps.
+		 */
+		map__load(map);
+		ret = dso__data_write_cache_addr(map->dso, map, machine,
+						 event->text_poke.addr,
+						 new_bytes,
+						 event->text_poke.new_len);
+		if (ret != event->text_poke.new_len)
+			pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n",
+				 event->text_poke.addr);
+	} else {
+		pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
+			 event->text_poke.addr);
+	}
+
+	return 0;
+}
+
 static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
 					      const char *filename)
 {
@@ -924,14 +965,14 @@ static struct dso *machine__get_kernel(struct machine *machine)
 			vmlinux_name = symbol_conf.vmlinux_name;
 
 		kernel = machine__findnew_kernel(machine, vmlinux_name,
-						 "[kernel]", DSO_TYPE_KERNEL);
+						 "[kernel]", DSO_SPACE__KERNEL);
 	} else {
 		if (symbol_conf.default_guest_vmlinux_name)
 			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
 
 		kernel = machine__findnew_kernel(machine, vmlinux_name,
 						 "[guest.kernel]",
-						 DSO_TYPE_GUEST_KERNEL);
+						 DSO_SPACE__KERNEL_GUEST);
 	}
 
 	if (kernel != NULL && (!kernel->has_build_id))
@@ -1559,7 +1600,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 					      union perf_event *event)
 {
 	struct map *map;
-	enum dso_kernel_type kernel_type;
+	enum dso_space_type dso_space;
 	bool is_kernel_mmap;
 
 	/* If we have maps from kcore then we do not need or want any others */
@@ -1567,9 +1608,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		return 0;
 
 	if (machine__is_host(machine))
-		kernel_type = DSO_TYPE_KERNEL;
+		dso_space = DSO_SPACE__KERNEL;
 	else
-		kernel_type = DSO_TYPE_GUEST_KERNEL;
+		dso_space = DSO_SPACE__KERNEL_GUEST;
 
 	is_kernel_mmap = memcmp(event->mmap.filename,
 				machine->mmap_name,
@@ -1629,7 +1670,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		if (kernel == NULL)
 			goto out_problem;
 
-		kernel->kernel = kernel_type;
+		kernel->kernel = dso_space;
 		if (__machine__create_kernel_maps(machine, kernel) < 0) {
 			dso__put(kernel);
 			goto out_problem;
@@ -1930,6 +1971,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
 		ret = machine__process_ksymbol(machine, event, sample); break;
 	case PERF_RECORD_BPF_EVENT:
 		ret = machine__process_bpf(machine, event, sample); break;
+	case PERF_RECORD_TEXT_POKE:
+		ret = machine__process_text_poke(machine, event, sample); break;
 	default:
 		ret = -1;
 		break;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index fa1be9ea00fa..062c36a8433c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -138,6 +138,9 @@ int machine__process_mmap2_event(struct machine *machine, union perf_event *even
 int machine__process_ksymbol(struct machine *machine,
 			     union perf_event *event,
 			     struct perf_sample *sample);
+int machine__process_text_poke(struct machine *machine,
+			       union perf_event *event,
+			       struct perf_sample *sample);
 int machine__process_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);
 
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 53d96611e6a6..cc0faf8f1321 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -267,6 +267,27 @@ bool __map__is_bpf_prog(const struct map *map)
 	return name && (strstr(name, "bpf_prog_") == name);
 }
 
+bool __map__is_bpf_image(const struct map *map)
+{
+	const char *name;
+
+	if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE)
+		return true;
+
+	/*
+	 * If PERF_RECORD_KSYMBOL is not included, the dso will not have
+	 * type of DSO_BINARY_TYPE__BPF_IMAGE. In such cases, we can
+	 * guess the type based on name.
+	 */
+	name = map->dso->short_name;
+	return name && is_bpf_image(name);
+}
+
+bool __map__is_ool(const struct map *map)
+{
+	return map->dso && map->dso->binary_type == DSO_BINARY_TYPE__OOL;
+}
+
 bool map__has_symbols(const struct map *map)
 {
 	return dso__has_symbols(map->dso);
@@ -481,7 +502,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
 	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
 	 * but all kernel modules are ET_REL, so won't get here.
 	 */
-	if (map->dso->kernel == DSO_TYPE_USER)
+	if (map->dso->kernel == DSO_SPACE__USER)
 		return rip + map->dso->text_offset;
 
 	return map->unmap_ip(map, rip) - map->reloc;
@@ -511,7 +532,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
 	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
 	 * but all kernel modules are ET_REL, so won't get here.
 	 */
-	if (map->dso->kernel == DSO_TYPE_USER)
+	if (map->dso->kernel == DSO_SPACE__USER)
 		return map->unmap_ip(map, ip - map->dso->text_offset);
 
 	return ip + map->reloc;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 067036e8970c..c2f5d28fe73a 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -147,11 +147,14 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
 bool __map__is_kernel(const struct map *map);
 bool __map__is_extra_kernel_map(const struct map *map);
 bool __map__is_bpf_prog(const struct map *map);
+bool __map__is_bpf_image(const struct map *map);
+bool __map__is_ool(const struct map *map);
 
 static inline bool __map__is_kmodule(const struct map *map)
 {
 	return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
-	       !__map__is_bpf_prog(map);
+	       !__map__is_bpf_prog(map) && !__map__is_ool(map) &&
+	       !__map__is_bpf_image(map);
 }
 
 bool map__has_symbols(const struct map *map);
@@ -163,4 +166,9 @@ static inline bool is_entry_trampoline(const char *name)
 	return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
 }
 
+static inline bool is_bpf_image(const char *name)
+{
+	return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
+	       strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
+}
 #endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 9e21aa767e41..ab5030fcfed4 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -24,6 +24,7 @@
 #include <subcmd/parse-options.h>
 #include <api/fs/fs.h>
 #include "util.h"
+#include <asm/bug.h>
 
 struct metric_event *metricgroup__lookup(struct rblist *metric_events,
 					 struct evsel *evsel,
@@ -76,23 +77,79 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
 	return &me->nd;
 }
 
+static void metric_event_delete(struct rblist *rblist __maybe_unused,
+				struct rb_node *rb_node)
+{
+	struct metric_event *me = container_of(rb_node, struct metric_event, nd);
+	struct metric_expr *expr, *tmp;
+
+	list_for_each_entry_safe(expr, tmp, &me->head, nd) {
+		free(expr->metric_refs);
+		free(expr->metric_events);
+		free(expr);
+	}
+
+	free(me);
+}
+
 static void metricgroup__rblist_init(struct rblist *metric_events)
 {
 	rblist__init(metric_events);
 	metric_events->node_cmp = metric_event_cmp;
 	metric_events->node_new = metric_event_new;
+	metric_events->node_delete = metric_event_delete;
 }
 
-struct egroup {
+void metricgroup__rblist_exit(struct rblist *metric_events)
+{
+	rblist__exit(metric_events);
+}
+
+/*
+ * A node in the list of referenced metrics. metric_expr
+ * is held as a convenience to avoid a search through the
+ * metric list.
+ */
+struct metric_ref_node {
+	const char *metric_name;
+	const char *metric_expr;
+	struct list_head list;
+};
+
+struct metric {
 	struct list_head nd;
 	struct expr_parse_ctx pctx;
 	const char *metric_name;
 	const char *metric_expr;
 	const char *metric_unit;
+	struct list_head metric_refs;
+	int metric_refs_cnt;
 	int runtime;
 	bool has_constraint;
 };
 
+#define RECURSION_ID_MAX 1000
+
+struct expr_ids {
+	struct expr_id	id[RECURSION_ID_MAX];
+	int		cnt;
+};
+
+static struct expr_id *expr_ids__alloc(struct expr_ids *ids)
+{
+	if (ids->cnt >= RECURSION_ID_MAX)
+		return NULL;
+	return &ids->id[ids->cnt++];
+}
+
+static void expr_ids__exit(struct expr_ids *ids)
+{
+	int i;
+
+	for (i = 0; i < ids->cnt; i++)
+		free(ids->id[i].id);
+}
+
 /**
  * Find a group of events in perf_evlist that correpond to those from a parsed
  * metric expression. Note, as find_evsel_group is called in the same order as
@@ -119,7 +176,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
 				      unsigned long *evlist_used)
 {
 	struct evsel *ev, *current_leader = NULL;
-	double *val_ptr;
+	struct expr_id_data *val_ptr;
 	int i = 0, matched_events = 0, events_to_match;
 	const int idnum = (int)hashmap__size(&pctx->ids);
 
@@ -206,7 +263,7 @@ static int metricgroup__setup_events(struct list_head *groups,
 	struct metric_expr *expr;
 	int i = 0;
 	int ret = 0;
-	struct egroup *eg;
+	struct metric *m;
 	struct evsel *evsel, *tmp;
 	unsigned long *evlist_used;
 
@@ -214,22 +271,23 @@ static int metricgroup__setup_events(struct list_head *groups,
 	if (!evlist_used)
 		return -ENOMEM;
 
-	list_for_each_entry (eg, groups, nd) {
+	list_for_each_entry (m, groups, nd) {
 		struct evsel **metric_events;
+		struct metric_ref *metric_refs = NULL;
 
 		metric_events = calloc(sizeof(void *),
-				hashmap__size(&eg->pctx.ids) + 1);
+				hashmap__size(&m->pctx.ids) + 1);
 		if (!metric_events) {
 			ret = -ENOMEM;
 			break;
 		}
-		evsel = find_evsel_group(perf_evlist, &eg->pctx,
+		evsel = find_evsel_group(perf_evlist, &m->pctx,
 					 metric_no_merge,
-					 eg->has_constraint, metric_events,
+					 m->has_constraint, metric_events,
 					 evlist_used);
 		if (!evsel) {
 			pr_debug("Cannot resolve %s: %s\n",
-					eg->metric_name, eg->metric_expr);
+					m->metric_name, m->metric_expr);
 			free(metric_events);
 			continue;
 		}
@@ -247,11 +305,42 @@ static int metricgroup__setup_events(struct list_head *groups,
 			free(metric_events);
 			break;
 		}
-		expr->metric_expr = eg->metric_expr;
-		expr->metric_name = eg->metric_name;
-		expr->metric_unit = eg->metric_unit;
+
+		/*
+		 * Collect and store collected nested expressions
+		 * for metric processing.
+		 */
+		if (m->metric_refs_cnt) {
+			struct metric_ref_node *ref;
+
+			metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1));
+			if (!metric_refs) {
+				ret = -ENOMEM;
+				free(metric_events);
+				free(expr);
+				break;
+			}
+
+			i = 0;
+			list_for_each_entry(ref, &m->metric_refs, list) {
+				/*
+				 * Intentionally passing just const char pointers,
+				 * originally from 'struct pmu_event' object.
+				 * We don't need to change them, so there's no
+				 * need to create our own copy.
+				 */
+				metric_refs[i].metric_name = ref->metric_name;
+				metric_refs[i].metric_expr = ref->metric_expr;
+				i++;
+			}
+		};
+
+		expr->metric_refs = metric_refs;
+		expr->metric_expr = m->metric_expr;
+		expr->metric_name = m->metric_name;
+		expr->metric_unit = m->metric_unit;
 		expr->metric_events = metric_events;
-		expr->runtime = eg->runtime;
+		expr->runtime = m->runtime;
 		list_add(&expr->nd, &me->head);
 	}
 
@@ -443,6 +532,9 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 						continue;
 					strlist__add(me->metrics, s);
 				}
+
+				if (!raw)
+					free(s);
 			}
 			free(omg);
 		}
@@ -552,123 +644,349 @@ int __weak arch_get_runtimeparam(void)
 	return 1;
 }
 
-static int __metricgroup__add_metric(struct list_head *group_list,
-				     struct pmu_event *pe,
-				     bool metric_no_group,
-				     int runtime)
+static int __add_metric(struct list_head *metric_list,
+			struct pmu_event *pe,
+			bool metric_no_group,
+			int runtime,
+			struct metric **mp,
+			struct expr_id *parent,
+			struct expr_ids *ids)
 {
-	struct egroup *eg;
+	struct metric_ref_node *ref;
+	struct metric *m;
 
-	eg = malloc(sizeof(*eg));
-	if (!eg)
-		return -ENOMEM;
+	if (*mp == NULL) {
+		/*
+		 * We got in here for the parent group,
+		 * allocate it and put it on the list.
+		 */
+		m = zalloc(sizeof(*m));
+		if (!m)
+			return -ENOMEM;
+
+		expr__ctx_init(&m->pctx);
+		m->metric_name = pe->metric_name;
+		m->metric_expr = pe->metric_expr;
+		m->metric_unit = pe->unit;
+		m->runtime = runtime;
+		m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+		INIT_LIST_HEAD(&m->metric_refs);
+		m->metric_refs_cnt = 0;
+
+		parent = expr_ids__alloc(ids);
+		if (!parent) {
+			free(m);
+			return -EINVAL;
+		}
 
-	expr__ctx_init(&eg->pctx);
-	eg->metric_name = pe->metric_name;
-	eg->metric_expr = pe->metric_expr;
-	eg->metric_unit = pe->unit;
-	eg->runtime = runtime;
-	eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+		parent->id = strdup(pe->metric_name);
+		if (!parent->id) {
+			free(m);
+			return -ENOMEM;
+		}
+		*mp = m;
+	} else {
+		/*
+		 * We got here for the referenced metric, via the
+		 * recursive metricgroup__add_metric call, add
+		 * it to the parent group.
+		 */
+		m = *mp;
+
+		ref = malloc(sizeof(*ref));
+		if (!ref)
+			return -ENOMEM;
+
+		/*
+		 * Intentionally passing just const char pointers,
+		 * from 'pe' object, so they never go away. We don't
+		 * need to change them, so there's no need to create
+		 * our own copy.
+		 */
+		ref->metric_name = pe->metric_name;
+		ref->metric_expr = pe->metric_expr;
 
-	if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) {
-		expr__ctx_clear(&eg->pctx);
-		free(eg);
+		list_add(&ref->list, &m->metric_refs);
+		m->metric_refs_cnt++;
+	}
+
+	/* Force all found IDs in metric to have us as parent ID. */
+	WARN_ON_ONCE(!parent);
+	m->pctx.parent = parent;
+
+	/*
+	 * For both the parent and referenced metrics, we parse
+	 * all the metric's IDs and add it to the parent context.
+	 */
+	if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) {
+		if (m->metric_refs_cnt == 0) {
+			expr__ctx_clear(&m->pctx);
+			free(m);
+			*mp = NULL;
+		}
 		return -EINVAL;
 	}
 
-	if (list_empty(group_list))
-		list_add(&eg->nd, group_list);
+	/*
+	 * We add new group only in the 'parent' call,
+	 * so bail out for referenced metric case.
+	 */
+	if (m->metric_refs_cnt)
+		return 0;
+
+	if (list_empty(metric_list))
+		list_add(&m->nd, metric_list);
 	else {
 		struct list_head *pos;
 
 		/* Place the largest groups at the front. */
-		list_for_each_prev(pos, group_list) {
-			struct egroup *old = list_entry(pos, struct egroup, nd);
+		list_for_each_prev(pos, metric_list) {
+			struct metric *old = list_entry(pos, struct metric, nd);
 
-			if (hashmap__size(&eg->pctx.ids) <=
+			if (hashmap__size(&m->pctx.ids) <=
 			    hashmap__size(&old->pctx.ids))
 				break;
 		}
-		list_add(&eg->nd, pos);
+		list_add(&m->nd, pos);
 	}
 
 	return 0;
 }
 
+#define map_for_each_event(__pe, __idx, __map)				\
+	for (__idx = 0, __pe = &__map->table[__idx];			\
+	     __pe->name || __pe->metric_group || __pe->metric_name;	\
+	     __pe = &__map->table[++__idx])
+
+#define map_for_each_metric(__pe, __idx, __map, __metric)		\
+	map_for_each_event(__pe, __idx, __map)				\
+		if (__pe->metric_expr &&				\
+		    (match_metric(__pe->metric_group, __metric) ||	\
+		     match_metric(__pe->metric_name, __metric)))
+
+static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map)
+{
+	struct pmu_event *pe;
+	int i;
+
+	map_for_each_event(pe, i, map) {
+		if (match_metric(pe->metric_name, metric))
+			return pe;
+	}
+
+	return NULL;
+}
+
+static int recursion_check(struct metric *m, const char *id, struct expr_id **parent,
+			   struct expr_ids *ids)
+{
+	struct expr_id_data *data;
+	struct expr_id *p;
+	int ret;
+
+	/*
+	 * We get the parent referenced by 'id' argument and
+	 * traverse through all the parent object IDs to check
+	 * if we already processed 'id', if we did, it's recursion
+	 * and we fail.
+	 */
+	ret = expr__get_id(&m->pctx, id, &data);
+	if (ret)
+		return ret;
+
+	p = data->parent;
+
+	while (p->parent) {
+		if (!strcmp(p->id, id)) {
+			pr_err("failed: recursion detected for %s\n", id);
+			return -1;
+		}
+		p = p->parent;
+	}
+
+	/*
+	 * If we are over the limit of static entris, the metric
+	 * is too difficult/nested to process, fail as well.
+	 */
+	p = expr_ids__alloc(ids);
+	if (!p) {
+		pr_err("failed: too many nested metrics\n");
+		return -EINVAL;
+	}
+
+	p->id     = strdup(id);
+	p->parent = data->parent;
+	*parent   = p;
+
+	return p->id ? 0 : -ENOMEM;
+}
+
+static int add_metric(struct list_head *metric_list,
+		      struct pmu_event *pe,
+		      bool metric_no_group,
+		      struct metric **mp,
+		      struct expr_id *parent,
+		      struct expr_ids *ids);
+
+static int __resolve_metric(struct metric *m,
+			    bool metric_no_group,
+			    struct list_head *metric_list,
+			    struct pmu_events_map *map,
+			    struct expr_ids *ids)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+	bool all;
+	int ret;
+
+	/*
+	 * Iterate all the parsed IDs and if there's metric,
+	 * add it to the context.
+	 */
+	do {
+		all = true;
+		hashmap__for_each_entry((&m->pctx.ids), cur, bkt) {
+			struct expr_id *parent;
+			struct pmu_event *pe;
+
+			pe = find_metric(cur->key, map);
+			if (!pe)
+				continue;
+
+			ret = recursion_check(m, cur->key, &parent, ids);
+			if (ret)
+				return ret;
+
+			all = false;
+			/* The metric key itself needs to go out.. */
+			expr__del_id(&m->pctx, cur->key);
+
+			/* ... and it gets resolved to the parent context. */
+			ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids);
+			if (ret)
+				return ret;
+
+			/*
+			 * We added new metric to hashmap, so we need
+			 * to break the iteration and start over.
+			 */
+			break;
+		}
+	} while (!all);
+
+	return 0;
+}
+
+static int resolve_metric(bool metric_no_group,
+			  struct list_head *metric_list,
+			  struct pmu_events_map *map,
+			  struct expr_ids *ids)
+{
+	struct metric *m;
+	int err;
+
+	list_for_each_entry(m, metric_list, nd) {
+		err = __resolve_metric(m, metric_no_group, metric_list, map, ids);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int add_metric(struct list_head *metric_list,
+		      struct pmu_event *pe,
+		      bool metric_no_group,
+		      struct metric **m,
+		      struct expr_id *parent,
+		      struct expr_ids *ids)
+{
+	struct metric *orig = *m;
+	int ret = 0;
+
+	pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+	if (!strstr(pe->metric_expr, "?")) {
+		ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids);
+	} else {
+		int j, count;
+
+		count = arch_get_runtimeparam();
+
+		/* This loop is added to create multiple
+		 * events depend on count value and add
+		 * those events to metric_list.
+		 */
+
+		for (j = 0; j < count && !ret; j++, *m = orig)
+			ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids);
+	}
+
+	return ret;
+}
+
 static int metricgroup__add_metric(const char *metric, bool metric_no_group,
 				   struct strbuf *events,
-				   struct list_head *group_list)
+				   struct list_head *metric_list,
+				   struct pmu_events_map *map)
 {
-	struct pmu_events_map *map = perf_pmu__find_map(NULL);
+	struct expr_ids ids = { .cnt = 0, };
 	struct pmu_event *pe;
-	struct egroup *eg;
+	struct metric *m;
+	LIST_HEAD(list);
 	int i, ret;
 	bool has_match = false;
 
-	if (!map)
-		return 0;
+	map_for_each_metric(pe, i, map, metric) {
+		has_match = true;
+		m = NULL;
 
-	for (i = 0; ; i++) {
-		pe = &map->table[i];
+		ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids);
+		if (ret)
+			goto out;
 
-		if (!pe->name && !pe->metric_group && !pe->metric_name) {
-			/* End of pmu events. */
-			if (!has_match)
-				return -EINVAL;
-			break;
-		}
-		if (!pe->metric_expr)
-			continue;
-		if (match_metric(pe->metric_group, metric) ||
-		    match_metric(pe->metric_name, metric)) {
-			has_match = true;
-			pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
-
-			if (!strstr(pe->metric_expr, "?")) {
-				ret = __metricgroup__add_metric(group_list,
-								pe,
-								metric_no_group,
-								1);
-				if (ret)
-					return ret;
-			} else {
-				int j, count;
-
-				count = arch_get_runtimeparam();
-
-				/* This loop is added to create multiple
-				 * events depend on count value and add
-				 * those events to group_list.
-				 */
+		/*
+		 * Process any possible referenced metrics
+		 * included in the expression.
+		 */
+		ret = resolve_metric(metric_no_group,
+				     &list, map, &ids);
+		if (ret)
+			goto out;
+	}
 
-				for (j = 0; j < count; j++) {
-					ret = __metricgroup__add_metric(
-						group_list, pe,
-						metric_no_group, j);
-					if (ret)
-						return ret;
-				}
-			}
-		}
+	/* End of pmu events. */
+	if (!has_match) {
+		ret = -EINVAL;
+		goto out;
 	}
-	list_for_each_entry(eg, group_list, nd) {
+
+	list_for_each_entry(m, &list, nd) {
 		if (events->len > 0)
 			strbuf_addf(events, ",");
 
-		if (eg->has_constraint) {
+		if (m->has_constraint) {
 			metricgroup__add_metric_non_group(events,
-							  &eg->pctx);
+							  &m->pctx);
 		} else {
 			metricgroup__add_metric_weak_group(events,
-							   &eg->pctx);
+							   &m->pctx);
 		}
 	}
-	return 0;
+
+out:
+	/*
+	 * add to metric_list so that they can be released
+	 * even if it's failed
+	 */
+	list_splice(&list, metric_list);
+	expr_ids__exit(&ids);
+	return ret;
 }
 
 static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
 					struct strbuf *events,
-				        struct list_head *group_list)
+					struct list_head *metric_list,
+					struct pmu_events_map *map)
 {
 	char *llist, *nlist, *p;
 	int ret = -EINVAL;
@@ -683,7 +1001,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
 
 	while ((p = strsep(&llist, ",")) != NULL) {
 		ret = metricgroup__add_metric(p, metric_no_group, events,
-					      group_list);
+					      metric_list, map);
 		if (ret == -EINVAL) {
 			fprintf(stderr, "Cannot find metric or group `%s'\n",
 					p);
@@ -698,50 +1016,88 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
 	return ret;
 }
 
-static void metricgroup__free_egroups(struct list_head *group_list)
+static void metric__free_refs(struct metric *metric)
 {
-	struct egroup *eg, *egtmp;
+	struct metric_ref_node *ref, *tmp;
 
-	list_for_each_entry_safe (eg, egtmp, group_list, nd) {
-		expr__ctx_clear(&eg->pctx);
-		list_del_init(&eg->nd);
-		free(eg);
+	list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) {
+		list_del(&ref->list);
+		free(ref);
 	}
 }
 
-int metricgroup__parse_groups(const struct option *opt,
-			      const char *str,
-			      bool metric_no_group,
-			      bool metric_no_merge,
-			      struct rblist *metric_events)
+static void metricgroup__free_metrics(struct list_head *metric_list)
+{
+	struct metric *m, *tmp;
+
+	list_for_each_entry_safe (m, tmp, metric_list, nd) {
+		metric__free_refs(m);
+		expr__ctx_clear(&m->pctx);
+		list_del_init(&m->nd);
+		free(m);
+	}
+}
+
+static int parse_groups(struct evlist *perf_evlist, const char *str,
+			bool metric_no_group,
+			bool metric_no_merge,
+			struct perf_pmu *fake_pmu,
+			struct rblist *metric_events,
+			struct pmu_events_map *map)
 {
 	struct parse_events_error parse_error;
-	struct evlist *perf_evlist = *(struct evlist **)opt->value;
 	struct strbuf extra_events;
-	LIST_HEAD(group_list);
+	LIST_HEAD(metric_list);
 	int ret;
 
 	if (metric_events->nr_entries == 0)
 		metricgroup__rblist_init(metric_events);
 	ret = metricgroup__add_metric_list(str, metric_no_group,
-					   &extra_events, &group_list);
+					   &extra_events, &metric_list, map);
 	if (ret)
-		return ret;
+		goto out;
 	pr_debug("adding %s\n", extra_events.buf);
 	bzero(&parse_error, sizeof(parse_error));
-	ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
+	ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu);
 	if (ret) {
 		parse_events_print_error(&parse_error, extra_events.buf);
 		goto out;
 	}
-	strbuf_release(&extra_events);
-	ret = metricgroup__setup_events(&group_list, metric_no_merge,
+	ret = metricgroup__setup_events(&metric_list, metric_no_merge,
 					perf_evlist, metric_events);
 out:
-	metricgroup__free_egroups(&group_list);
+	metricgroup__free_metrics(&metric_list);
+	strbuf_release(&extra_events);
 	return ret;
 }
 
+int metricgroup__parse_groups(const struct option *opt,
+			      const char *str,
+			      bool metric_no_group,
+			      bool metric_no_merge,
+			      struct rblist *metric_events)
+{
+	struct evlist *perf_evlist = *(struct evlist **)opt->value;
+	struct pmu_events_map *map = perf_pmu__find_map(NULL);
+
+	if (!map)
+		return 0;
+
+	return parse_groups(perf_evlist, str, metric_no_group,
+			    metric_no_merge, NULL, metric_events, map);
+}
+
+int metricgroup__parse_groups_test(struct evlist *evlist,
+				   struct pmu_events_map *map,
+				   const char *str,
+				   bool metric_no_group,
+				   bool metric_no_merge,
+				   struct rblist *metric_events)
+{
+	return parse_groups(evlist, str, metric_no_group,
+			    metric_no_merge, &perf_pmu__fake, metric_events, map);
+}
+
 bool metricgroup__has_metric(const char *metric)
 {
 	struct pmu_events_map *map = perf_pmu__find_map(NULL);
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 287850bcdeca..62623a39cbec 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -7,8 +7,10 @@
 #include <stdbool.h>
 
 struct evsel;
+struct evlist;
 struct option;
 struct rblist;
+struct pmu_events_map;
 
 struct metric_event {
 	struct rb_node nd;
@@ -16,12 +18,18 @@ struct metric_event {
 	struct list_head head; /* list of metric_expr */
 };
 
+struct metric_ref {
+	const char *metric_name;
+	const char *metric_expr;
+};
+
 struct metric_expr {
 	struct list_head nd;
 	const char *metric_expr;
 	const char *metric_name;
 	const char *metric_unit;
 	struct evsel **metric_events;
+	struct metric_ref *metric_refs;
 	int runtime;
 };
 
@@ -34,8 +42,16 @@ int metricgroup__parse_groups(const struct option *opt,
 			      bool metric_no_merge,
 			      struct rblist *metric_events);
 
+int metricgroup__parse_groups_test(struct evlist *evlist,
+				   struct pmu_events_map *map,
+				   const char *str,
+				   bool metric_no_group,
+				   bool metric_no_merge,
+				   struct rblist *metric_events);
+
 void metricgroup__print(bool metrics, bool groups, char *filter,
 			bool raw, bool details);
 bool metricgroup__has_metric(const char *metric);
 int arch_get_runtimeparam(void);
+void metricgroup__rblist_exit(struct rblist *metric_events);
 #endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3decbb203846..667cbca1547a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -37,6 +37,7 @@
 #include "util/evsel_config.h"
 #include "util/event.h"
 #include "util/pfm.h"
+#include "perf.h"
 
 #define MAX_NAME_LEN 100
 
@@ -410,7 +411,7 @@ static int add_event_tool(struct list_head *list, int *idx,
 		return -ENOMEM;
 	evsel->tool_event = tool_event;
 	if (tool_event == PERF_TOOL_DURATION_TIME)
-		evsel->unit = strdup("ns");
+		evsel->unit = "ns";
 	return 0;
 }
 
@@ -767,8 +768,8 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
 
 	return 0;
 errout:
-	parse_state->error->help = strdup("(add -v to see detail)");
-	parse_state->error->str = strdup(errbuf);
+	parse_events__handle_error(parse_state->error, 0,
+				strdup(errbuf), strdup("(add -v to see detail)"));
 	return err;
 }
 
@@ -784,36 +785,38 @@ parse_events_config_bpf(struct parse_events_state *parse_state,
 		return 0;
 
 	list_for_each_entry(term, head_config, list) {
-		char errbuf[BUFSIZ];
 		int err;
 
 		if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
-			snprintf(errbuf, sizeof(errbuf),
-				 "Invalid config term for BPF object");
-			errbuf[BUFSIZ - 1] = '\0';
-
-			parse_state->error->idx = term->err_term;
-			parse_state->error->str = strdup(errbuf);
+			parse_events__handle_error(parse_state->error, term->err_term,
+						strdup("Invalid config term for BPF object"),
+						NULL);
 			return -EINVAL;
 		}
 
 		err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
 		if (err) {
+			char errbuf[BUFSIZ];
+			int idx;
+
 			bpf__strerror_config_obj(obj, term, parse_state->evlist,
 						 &error_pos, err, errbuf,
 						 sizeof(errbuf));
-			parse_state->error->help = strdup(
+
+			if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+				idx = term->err_val;
+			else
+				idx = term->err_term + error_pos;
+
+			parse_events__handle_error(parse_state->error, idx,
+						strdup(errbuf),
+						strdup(
 "Hint:\tValid config terms:\n"
 "     \tmap:[<arraymap>].value<indices>=[value]\n"
 "     \tmap:[<eventmap>].event<indices>=[event]\n"
 "\n"
 "     \twhere <indices> is something like [0,3...5] or [all]\n"
-"     \t(add -v to see detail)");
-			parse_state->error->str = strdup(errbuf);
-			if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
-				parse_state->error->idx = term->err_val;
-			else
-				parse_state->error->idx = term->err_term + error_pos;
+"     \t(add -v to see detail)"));
 			return err;
 		}
 	}
@@ -877,8 +880,8 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
 						   -err, errbuf,
 						   sizeof(errbuf));
 
-		parse_state->error->help = strdup("(add -v to see detail)");
-		parse_state->error->str = strdup(errbuf);
+		parse_events__handle_error(parse_state->error, 0,
+					strdup(errbuf), strdup("(add -v to see detail)"));
 		return err;
 	}
 
@@ -1450,7 +1453,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 		fprintf(stderr, "' that may result in non-fatal errors\n");
 	}
 
-	pmu = perf_pmu__find(name);
+	pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
 	if (!pmu) {
 		char *err_str;
 
@@ -1483,7 +1486,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 		}
 	}
 
-	if (perf_pmu__check_alias(pmu, head_config, &info))
+	if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info))
 		return -EINVAL;
 
 	if (verbose > 1) {
@@ -1516,7 +1519,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms))
 		return -ENOMEM;
 
-	if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
+	if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
 		struct evsel_config_term *pos, *tmp;
 
 		list_for_each_entry_safe(pos, tmp, &config_terms, list) {
@@ -1531,19 +1534,23 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	evsel = __add_event(list, &parse_state->idx, &attr, true,
 			    get_config_name(head_config), pmu,
 			    &config_terms, auto_merge_stats, NULL);
-	if (evsel) {
-		evsel->unit = info.unit;
-		evsel->scale = info.scale;
-		evsel->per_pkg = info.per_pkg;
-		evsel->snapshot = info.snapshot;
-		evsel->metric_expr = info.metric_expr;
-		evsel->metric_name = info.metric_name;
-		evsel->pmu_name = name ? strdup(name) : NULL;
-		evsel->use_uncore_alias = use_uncore_alias;
-		evsel->percore = config_term_percore(&evsel->config_terms);
-	}
+	if (!evsel)
+		return -ENOMEM;
 
-	return evsel ? 0 : -ENOMEM;
+	evsel->pmu_name = name ? strdup(name) : NULL;
+	evsel->use_uncore_alias = use_uncore_alias;
+	evsel->percore = config_term_percore(&evsel->config_terms);
+
+	if (parse_state->fake_pmu)
+		return 0;
+
+	evsel->unit = info.unit;
+	evsel->scale = info.scale;
+	evsel->per_pkg = info.per_pkg;
+	evsel->snapshot = info.snapshot;
+	evsel->metric_expr = info.metric_expr;
+	evsel->metric_name = info.metric_name;
+	return 0;
 }
 
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
@@ -1792,6 +1799,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 		if (*str == 'u') {
 			if (!exclude)
 				exclude = eu = ek = eh = 1;
+			if (!exclude_GH && !perf_guest)
+				eG = 1;
 			eu = 0;
 		} else if (*str == 'k') {
 			if (!exclude)
@@ -2017,6 +2026,32 @@ err:
 	perf_pmu__parse_cleanup();
 }
 
+/*
+ * This function injects special term in
+ * perf_pmu_events_list so the test code
+ * can check on this functionality.
+ */
+int perf_pmu__test_parse_init(void)
+{
+	struct perf_pmu_event_symbol *list;
+
+	list = malloc(sizeof(*list) * 1);
+	if (!list)
+		return -ENOMEM;
+
+	list->type   = PMU_EVENT_SYMBOL;
+	list->symbol = strdup("read");
+
+	if (!list->symbol) {
+		free(list);
+		return -ENOMEM;
+	}
+
+	perf_pmu_events_list = list;
+	perf_pmu_events_list_num = 1;
+	return 0;
+}
+
 enum perf_pmu_event_symbol_type
 perf_pmu__parse_check(const char *name)
 {
@@ -2078,6 +2113,8 @@ int parse_events_terms(struct list_head *terms, const char *str)
 	int ret;
 
 	ret = parse_events__scanner(str, &parse_state);
+	perf_pmu__parse_cleanup();
+
 	if (!ret) {
 		list_splice(parse_state.terms, terms);
 		zfree(&parse_state.terms);
@@ -2088,15 +2125,16 @@ int parse_events_terms(struct list_head *terms, const char *str)
 	return ret;
 }
 
-int parse_events(struct evlist *evlist, const char *str,
-		 struct parse_events_error *err)
+int __parse_events(struct evlist *evlist, const char *str,
+		   struct parse_events_error *err, struct perf_pmu *fake_pmu)
 {
 	struct parse_events_state parse_state = {
-		.list   = LIST_HEAD_INIT(parse_state.list),
-		.idx    = evlist->core.nr_entries,
-		.error  = err,
-		.evlist = evlist,
-		.stoken = PE_START_EVENTS,
+		.list	  = LIST_HEAD_INIT(parse_state.list),
+		.idx	  = evlist->core.nr_entries,
+		.error	  = err,
+		.evlist	  = evlist,
+		.stoken	  = PE_START_EVENTS,
+		.fake_pmu = fake_pmu,
 	};
 	int ret;
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 1fe23a2f9b36..00cde7d2e30c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -33,8 +33,15 @@ const char *event_type(int type);
 
 int parse_events_option(const struct option *opt, const char *str, int unset);
 int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
-int parse_events(struct evlist *evlist, const char *str,
-		 struct parse_events_error *error);
+int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error,
+		   struct perf_pmu *fake_pmu);
+
+static inline int parse_events(struct evlist *evlist, const char *str,
+			       struct parse_events_error *err)
+{
+	return __parse_events(evlist, str, err, NULL);
+}
+
 int parse_events_terms(struct list_head *terms, const char *str);
 int parse_filter(const struct option *opt, const char *str, int unset);
 int exclude_perf(const struct option *opt, const char *arg, int unset);
@@ -127,9 +134,10 @@ struct parse_events_state {
 	int			   idx;
 	int			   nr_groups;
 	struct parse_events_error *error;
-	struct evlist	  *evlist;
+	struct evlist		  *evlist;
 	struct list_head	  *terms;
 	int			   stoken;
+	struct perf_pmu		  *fake_pmu;
 };
 
 void parse_events__handle_error(struct parse_events_error *err, int idx,
@@ -253,4 +261,6 @@ static inline bool is_sdt_event(char *str __maybe_unused)
 }
 #endif /* HAVE_LIBELF_SUPPORT */
 
+int perf_pmu__test_parse_init(void);
+
 #endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 002802e17059..3ca5fd2829ca 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -41,14 +41,6 @@ static int value(yyscan_t scanner, int base)
 	return __value(yylval, text, base, PE_VALUE);
 }
 
-static int raw(yyscan_t scanner)
-{
-	YYSTYPE *yylval = parse_events_get_lval(scanner);
-	char *text = parse_events_get_text(scanner);
-
-	return __value(yylval, text + 1, 16, PE_RAW);
-}
-
 static int str(yyscan_t scanner, int token)
 {
 	YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -72,6 +64,17 @@ static int str(yyscan_t scanner, int token)
 	return token;
 }
 
+static int raw(yyscan_t scanner)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL)
+		return str(scanner, PE_NAME);
+
+	return __value(yylval, text + 1, 16, PE_RAW);
+}
+
 static bool isbpf_suffix(char *text)
 {
 	int len = strlen(text);
@@ -129,12 +132,16 @@ do {								\
 	yyless(0);						\
 } while (0)
 
-static int pmu_str_check(yyscan_t scanner)
+static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state)
 {
 	YYSTYPE *yylval = parse_events_get_lval(scanner);
 	char *text = parse_events_get_text(scanner);
 
 	yylval->str = strdup(text);
+
+	if (parse_state->fake_pmu)
+		return PE_PMU_EVENT_FAKE;
+
 	switch (perf_pmu__parse_check(text)) {
 		case PMU_EVENT_SYMBOL_PREFIX:
 			return PE_PMU_EVENT_PRE;
@@ -289,6 +296,7 @@ percore			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
 aux-output		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
 aux-sample-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
 r{num_raw_hex}		{ return raw(yyscanner); }
+r0x{num_raw_hex}	{ return raw(yyscanner); }
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
@@ -376,7 +384,7 @@ r{num_raw_hex}		{ return raw(yyscanner); }
 {modifier_event}	{ return str(yyscanner, PE_MODIFIER_EVENT); }
 {bpf_object}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
 {bpf_source}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
-{name}			{ return pmu_str_check(yyscanner); }
+{name}			{ return pmu_str_check(yyscanner, _parse_state); }
 {name_tag}		{ return str(yyscanner, PE_NAME); }
 "/"			{ BEGIN(config); return '/'; }
 -			{ return '-'; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index acef87d9af58..645bf4f1859f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
 %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
 %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
 %token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
 %token PE_ARRAY_ALL PE_ARRAY_RANGE
 %token PE_DRV_CFG_TERM
 %type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
 %type <str> PE_MODIFIER_EVENT
 %type <str> PE_MODIFIER_BP
 %type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
 %type <str> PE_DRV_CFG_TERM
 %destructor { free ($$); } <str>
 %type <term> event_term
@@ -356,6 +356,43 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 		YYABORT;
 	$$ = list;
 }
+|
+PE_PMU_EVENT_FAKE sep_dc
+{
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	if (!list)
+		YYABORT;
+
+	err = parse_events_add_pmu(_parse_state, list, $1, NULL, false, false);
+	free($1);
+	if (err < 0) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_PMU_EVENT_FAKE opt_pmu_config
+{
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	if (!list)
+		YYABORT;
+
+	err = parse_events_add_pmu(_parse_state, list, $1, $2, false, false);
+	free($1);
+	parse_events_terms__delete($2);
+	if (err < 0) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
 
 value_sym:
 PE_VALUE_SYM_HW
@@ -474,7 +511,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
 	list = alloc_list();
 	ABORT_ON(!list);
 	err = parse_events_add_breakpoint(list, &parse_state->idx,
-					(void *) $2, $6, $4);
+					(void *)(uintptr_t) $2, $6, $4);
 	free($6);
 	if (err) {
 		free(list);
@@ -491,7 +528,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
 	list = alloc_list();
 	ABORT_ON(!list);
 	if (parse_events_add_breakpoint(list, &parse_state->idx,
-						(void *) $2, NULL, $4)) {
+						(void *)(uintptr_t) $2, NULL, $4)) {
 		free(list);
 		YYABORT;
 	}
@@ -507,7 +544,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
 	list = alloc_list();
 	ABORT_ON(!list);
 	err = parse_events_add_breakpoint(list, &parse_state->idx,
-					(void *) $2, $4, 0);
+					(void *)(uintptr_t) $2, $4, 0);
 	free($4);
 	if (err) {
 		free(list);
@@ -524,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
 	list = alloc_list();
 	ABORT_ON(!list);
 	if (parse_events_add_breakpoint(list, &parse_state->idx,
-						(void *) $2, NULL, 0)) {
+						(void *)(uintptr_t) $2, NULL, 0)) {
 		free(list);
 		YYABORT;
 	}
diff --git a/tools/perf/util/parse-sublevel-options.c b/tools/perf/util/parse-sublevel-options.c
new file mode 100644
index 000000000000..a841d17ffd57
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.c
@@ -0,0 +1,70 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "util/debug.h"
+#include "util/parse-sublevel-options.h"
+
+static int parse_one_sublevel_option(const char *str,
+				     struct sublevel_option *opts)
+{
+	struct sublevel_option *opt = opts;
+	char *vstr, *s = strdup(str);
+	int v = 1;
+
+	if (!s) {
+		pr_err("no memory\n");
+		return -1;
+	}
+
+	vstr = strchr(s, '=');
+	if (vstr)
+		*vstr++ = 0;
+
+	while (opt->name) {
+		if (!strcmp(s, opt->name))
+			break;
+		opt++;
+	}
+
+	if (!opt->name) {
+		pr_err("Unknown option name '%s'\n", s);
+		free(s);
+		return -1;
+	}
+
+	if (vstr)
+		v = atoi(vstr);
+
+	*opt->value_ptr = v;
+	free(s);
+	return 0;
+}
+
+/* parse options like --foo a=<n>,b,c... */
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts)
+{
+	char *s = strdup(str);
+	char *p = NULL;
+	int ret;
+
+	if (!s) {
+		pr_err("no memory\n");
+		return -1;
+	}
+
+	p = strtok(s, ",");
+	while (p) {
+		ret = parse_one_sublevel_option(p, opts);
+		if (ret) {
+			free(s);
+			return ret;
+		}
+
+		p = strtok(NULL, ",");
+	}
+
+	free(s);
+	return 0;
+}
diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h
new file mode 100644
index 000000000000..9b9efcc2aaad
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.h
@@ -0,0 +1,11 @@
+#ifndef _PERF_PARSE_SUBLEVEL_OPTIONS_H
+#define _PERF_PARSE_SUBLEVEL_OPTIONS_H
+
+struct sublevel_option {
+	const char *name;
+	int *value_ptr;
+};
+
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts);
+
+#endif
+\ No newline at end of file
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 1337965673d7..3840d02f0f7b 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -93,6 +93,11 @@ static void perf_probe_context_switch(struct evsel *evsel)
 	evsel->core.attr.context_switch = 1;
 }
 
+static void perf_probe_text_poke(struct evsel *evsel)
+{
+	evsel->core.attr.text_poke = 1;
+}
+
 bool perf_can_sample_identifier(void)
 {
 	return perf_probe_api(perf_probe_sample_identifier);
@@ -108,6 +113,11 @@ bool perf_can_record_switch_events(void)
 	return perf_probe_api(perf_probe_context_switch);
 }
 
+bool perf_can_record_text_poke_events(void)
+{
+	return perf_probe_api(perf_probe_text_poke);
+}
+
 bool perf_can_record_cpu_wide(void)
 {
 	struct perf_event_attr attr = {
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index 706c3c6426e2..d5506a983a94 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -9,6 +9,7 @@ bool perf_can_aux_sample(void);
 bool perf_can_comm_exec(void);
 bool perf_can_record_cpu_wide(void);
 bool perf_can_record_switch_events(void);
+bool perf_can_record_text_poke_events(void);
 bool perf_can_sample_identifier(void);
 
 #endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index b94fa07f5d32..e67a227c0ce7 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -147,6 +147,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(aux_watermark, p_unsigned);
 	PRINT_ATTRf(sample_max_stack, p_unsigned);
 	PRINT_ATTRf(aux_sample_size, p_unsigned);
+	PRINT_ATTRf(text_poke, p_unsigned);
 
 	return ret;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 93fe72a9dc0b..d41caeb35cf6 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -26,6 +26,8 @@
 #include "strbuf.h"
 #include "fncache.h"
 
+struct perf_pmu perf_pmu__fake;
+
 struct perf_pmu_format {
 	char *name;
 	int value;
@@ -272,7 +274,7 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old,
 }
 
 /* Delete an alias entry. */
-static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
 {
 	zfree(&newalias->name);
 	zfree(&newalias->desc);
@@ -1352,6 +1354,17 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
 		set_bit(b, bits);
 }
 
+void perf_pmu__del_formats(struct list_head *formats)
+{
+	struct perf_pmu_format *fmt, *tmp;
+
+	list_for_each_entry_safe(fmt, tmp, formats, list) {
+		list_del(&fmt->list);
+		free(fmt->name);
+		free(fmt);
+	}
+}
+
 static int sub_non_neg(int a, int b)
 {
 	if (b > a)
@@ -1400,6 +1413,7 @@ struct sevent {
 	char *pmu;
 	char *metric_expr;
 	char *metric_name;
+	int is_cpu;
 };
 
 static int cmp_sevent(const void *a, const void *b)
@@ -1416,6 +1430,11 @@ static int cmp_sevent(const void *a, const void *b)
 		if (n)
 			return n;
 	}
+
+	/* Order CPU core events to be first */
+	if (as->is_cpu != bs->is_cpu)
+		return bs->is_cpu - as->is_cpu;
+
 	return strcmp(as->name, bs->name);
 }
 
@@ -1475,7 +1494,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 		list_for_each_entry(alias, &pmu->aliases, list) {
 			char *name = alias->desc ? alias->name :
 				format_alias(buf, sizeof(buf), pmu, alias);
-			bool is_cpu = !strcmp(pmu->name, "cpu");
+			bool is_cpu = is_pmu_core(pmu->name);
 
 			if (alias->deprecated && !deprecated)
 				continue;
@@ -1507,6 +1526,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 			aliases[j].pmu = pmu->name;
 			aliases[j].metric_expr = alias->metric_expr;
 			aliases[j].metric_name = alias->metric_name;
+			aliases[j].is_cpu = is_cpu;
 			j++;
 		}
 		if (pmu->selectable &&
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index f971d9aa4570..a64e9c9ce731 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -43,6 +43,8 @@ struct perf_pmu {
 	struct list_head list;    /* ELEM */
 };
 
+extern struct perf_pmu perf_pmu__fake;
+
 struct perf_pmu_info {
 	const char *unit;
 	const char *metric_expr;
@@ -92,6 +94,7 @@ int perf_pmu__new_format(struct list_head *list, char *name,
 			 int config, unsigned long *bits);
 void perf_pmu__set_format(unsigned long *bits, long from, long to);
 int perf_pmu__format_parse(char *dir, struct list_head *head);
+void perf_pmu__del_formats(struct list_head *formats);
 
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
 
@@ -111,6 +114,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
 
 struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
 bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
+void perf_pmu_free_alias(struct perf_pmu_alias *alias);
 
 int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
 
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index df713a5d1e26..99d36ac77c08 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -375,9 +375,13 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
 
 	/* Find the address of given function */
 	map__for_each_symbol_by_name(map, pp->function, sym) {
-		if (uprobes)
+		if (uprobes) {
 			address = sym->start;
-		else
+			if (sym->type == STT_GNU_IFUNC)
+				pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n"
+					   "Consider identifying the final function used at run time and set the probe directly on that.\n",
+					   pp->function);
+		} else
 			address = map->unmap_ip(map, sym->start) - map->reloc;
 		break;
 	}
@@ -2968,6 +2972,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	for (j = 0; j < num_matched_functions; j++) {
 		sym = syms[j];
 
+		/* There can be duplicated symbols in the map */
+		for (i = 0; i < j; i++)
+			if (sym->start == syms[i]->start) {
+				pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n",
+					 sym->name, sym->start);
+				break;
+			}
+		if (i != j)
+			continue;
+
 		tev = (*tevs) + ret;
 		tp = &tev->point;
 		if (ret == num_matched_functions) {
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 55924255c535..659024342e9a 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1408,6 +1408,9 @@ static int fill_empty_trace_arg(struct perf_probe_event *pev,
 	char *type;
 	int i, j, ret;
 
+	if (!ntevs)
+		return -ENOENT;
+
 	for (i = 0; i < pev->nargs; i++) {
 		type = NULL;
 		for (j = 0; j < ntevs; j++) {
@@ -1464,7 +1467,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
 	if (ret >= 0 && tf.pf.skip_empty_arg)
 		ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs);
 
-	if (ret < 0) {
+	if (ret < 0 || tf.ntevs == 0) {
 		for (i = 0; i < tf.ntevs; i++)
 			clear_probe_trace_event(&tf.tevs[i]);
 		zfree(tevs);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index a4cc11592f6b..ea9aa1d7cf50 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -2,6 +2,7 @@
 #include "debug.h"
 #include "evlist.h"
 #include "evsel.h"
+#include "evsel_config.h"
 #include "parse-events.h"
 #include <errno.h>
 #include <limits.h>
@@ -33,11 +34,24 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl
 	return leader;
 }
 
+static u64 evsel__config_term_mask(struct evsel *evsel)
+{
+	struct evsel_config_term *term;
+	struct list_head *config_terms = &evsel->config_terms;
+	u64 term_types = 0;
+
+	list_for_each_entry(term, config_terms, list) {
+		term_types |= 1 << term->type;
+	}
+	return term_types;
+}
+
 static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 	struct evsel *leader = evsel->leader;
 	struct evsel *read_sampler;
+	u64 term_types, freq_mask;
 
 	if (!leader->sample_read)
 		return;
@@ -47,16 +61,20 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev
 	if (evsel == read_sampler)
 		return;
 
+	term_types = evsel__config_term_mask(evsel);
 	/*
-	 * Disable sampling for all group members other than the leader in
-	 * case the leader 'leads' the sampling, except when the leader is an
-	 * AUX area event, in which case the 2nd event in the group is the one
-	 * that 'leads' the sampling.
+	 * Disable sampling for all group members except those with explicit
+	 * config terms or the leader. In the case of an AUX area event, the 2nd
+	 * event in the group is the one that 'leads' the sampling.
 	 */
-	attr->freq           = 0;
-	attr->sample_freq    = 0;
-	attr->sample_period  = 0;
-	attr->write_backward = 0;
+	freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
+	if ((term_types & freq_mask) == 0) {
+		attr->freq           = 0;
+		attr->sample_freq    = 0;
+		attr->sample_period  = 0;
+	}
+	if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
+		attr->write_backward = 0;
 
 	/*
 	 * We don't get a sample for slave events, we make them when delivering
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 39d1de4b2a36..03678ff25539 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -48,6 +48,7 @@ struct record_opts {
 	bool	      sample_id;
 	bool	      no_bpf_event;
 	bool	      kcore;
+	bool	      text_poke;
 	unsigned int  freq;
 	unsigned int  mmap_pages;
 	unsigned int  auxtrace_mmap_pages;
@@ -61,7 +62,7 @@ struct record_opts {
 	const char    *auxtrace_snapshot_opts;
 	const char    *auxtrace_sample_opts;
 	bool	      sample_transaction;
-	unsigned      initial_delay;
+	int	      initial_delay;
 	bool	      use_clockid;
 	clockid_t     clockid;
 	u64	      clockid_res_ns;
@@ -70,6 +71,8 @@ struct record_opts {
 	int	      mmap_flush;
 	unsigned int  comp_level;
 	unsigned int  nr_threads_synthesize;
+	int	      ctl_fd;
+	int	      ctl_fd_ack;
 };
 
 extern const char * const *record_usage;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1a157e84a04a..7a5f03764702 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -87,7 +87,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
 		session->decomp_last = decomp;
 	}
 
-	pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+	pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
 
 	return 0;
 }
@@ -115,12 +115,12 @@ static int perf_session__open(struct perf_session *session)
 	if (perf_header__has_feat(&session->header, HEADER_STAT))
 		return 0;
 
-	if (!perf_evlist__valid_sample_type(session->evlist)) {
+	if (!evlist__valid_sample_type(session->evlist)) {
 		pr_err("non matching sample_type\n");
 		return -1;
 	}
 
-	if (!perf_evlist__valid_sample_id_all(session->evlist)) {
+	if (!evlist__valid_sample_id_all(session->evlist)) {
 		pr_err("non matching sample_id_all\n");
 		return -1;
 	}
@@ -252,10 +252,10 @@ struct perf_session *perf_session__new(struct perf_data *data,
 
 	/*
 	 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
-	 * processed, so perf_evlist__sample_id_all is not meaningful here.
+	 * processed, so evlist__sample_id_all is not meaningful here.
 	 */
 	if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
-	    tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
+	    tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
 		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
 		tool->ordered_events = false;
 	}
@@ -490,6 +490,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->ksymbol = perf_event__process_ksymbol;
 	if (tool->bpf == NULL)
 		tool->bpf = perf_event__process_bpf;
+	if (tool->text_poke == NULL)
+		tool->text_poke = perf_event__process_text_poke;
 	if (tool->read == NULL)
 		tool->read = process_event_sample_stub;
 	if (tool->throttle == NULL)
@@ -659,6 +661,24 @@ static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
 		swap_sample_id_all(event, &event->context_switch + 1);
 }
 
+static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
+{
+	event->text_poke.addr    = bswap_64(event->text_poke.addr);
+	event->text_poke.old_len = bswap_16(event->text_poke.old_len);
+	event->text_poke.new_len = bswap_16(event->text_poke.new_len);
+
+	if (sample_id_all) {
+		size_t len = sizeof(event->text_poke.old_len) +
+			     sizeof(event->text_poke.new_len) +
+			     event->text_poke.old_len +
+			     event->text_poke.new_len;
+		void *data = &event->text_poke.old_len;
+
+		data += PERF_ALIGN(len, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
 static void perf_event__throttle_swap(union perf_event *event,
 				      bool sample_id_all)
 {
@@ -932,6 +952,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
 	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
 	[PERF_RECORD_NAMESPACES]	  = perf_event__namespaces_swap,
+	[PERF_RECORD_TEXT_POKE]		  = perf_event__text_poke_swap,
 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1160,10 +1181,10 @@ static void perf_evlist__print_tstamp(struct evlist *evlist,
 				       union perf_event *event,
 				       struct perf_sample *sample)
 {
-	u64 sample_type = __perf_evlist__combined_sample_type(evlist);
+	u64 sample_type = __evlist__combined_sample_type(evlist);
 
 	if (event->header.type != PERF_RECORD_SAMPLE &&
-	    !perf_evlist__sample_id_all(evlist)) {
+	    !evlist__sample_id_all(evlist)) {
 		fputs("-1 -1 ", stdout);
 		return;
 	}
@@ -1474,6 +1495,8 @@ static int machines__deliver_event(struct machines *machines,
 		return tool->ksymbol(tool, event, sample, machine);
 	case PERF_RECORD_BPF_EVENT:
 		return tool->bpf(tool, event, sample, machine);
+	case PERF_RECORD_TEXT_POKE:
+		return tool->text_poke(tool, event, sample, machine);
 	default:
 		++evlist->stats.nr_unknown_events;
 		return -1;
@@ -1655,7 +1678,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
 		return -1;
 
 	if (session->header.needs_swap)
-		event_swap(event, perf_evlist__sample_id_all(session->evlist));
+		event_swap(event, evlist__sample_id_all(session->evlist));
 
 out_parse_sample:
 
@@ -1704,7 +1727,7 @@ static s64 perf_session__process_event(struct perf_session *session,
 	int ret;
 
 	if (session->header.needs_swap)
-		event_swap(event, perf_evlist__sample_id_all(evlist));
+		event_swap(event, evlist__sample_id_all(evlist));
 
 	if (event->header.type >= PERF_RECORD_HEADER_MAX)
 		return -EINVAL;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 57d0706e1330..493ec372fdec 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -117,7 +117,7 @@ static void aggr_printout(struct perf_stat_config *config,
 				cpu_map__id_to_die(id),
 				config->csv_output ? 0 : -3,
 				cpu_map__id_to_cpu(id), config->csv_sep);
-		} else {
+		} else if (id > -1) {
 			fprintf(config->output, "CPU%*d%s",
 				config->csv_output ? 0 : -7,
 				evsel__cpus(evsel)->map[id],
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index a7c13a88ecb9..924b54d15d54 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -517,7 +517,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
 }
 
 static void print_l1_icache_misses(struct perf_stat_config *config,
@@ -538,7 +538,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
 }
 
 static void print_dtlb_cache_misses(struct perf_stat_config *config,
@@ -558,7 +558,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
 }
 
 static void print_itlb_cache_misses(struct perf_stat_config *config,
@@ -578,7 +578,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
 }
 
 static void print_ll_cache_misses(struct perf_stat_config *config,
@@ -598,7 +598,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
 }
 
 /*
@@ -730,25 +730,17 @@ static void print_smi_cost(struct perf_stat_config *config,
 	out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
 }
 
-static void generic_metric(struct perf_stat_config *config,
-			   const char *metric_expr,
-			   struct evsel **metric_events,
-			   char *name,
-			   const char *metric_name,
-			   const char *metric_unit,
-			   int runtime,
-			   int cpu,
-			   struct perf_stat_output_ctx *out,
-			   struct runtime_stat *st)
+static int prepare_metric(struct evsel **metric_events,
+			  struct metric_ref *metric_refs,
+			  struct expr_parse_ctx *pctx,
+			  int cpu,
+			  struct runtime_stat *st)
 {
-	print_metric_t print_metric = out->print_metric;
-	struct expr_parse_ctx pctx;
-	double ratio, scale;
-	int i;
-	void *ctxp = out->ctx;
+	double scale;
 	char *n, *pn;
+	int i, j, ret;
 
-	expr__ctx_init(&pctx);
+	expr__ctx_init(pctx);
 	for (i = 0; metric_events[i]; i++) {
 		struct saved_value *v;
 		struct stats *stats;
@@ -771,7 +763,7 @@ static void generic_metric(struct perf_stat_config *config,
 
 		n = strdup(metric_events[i]->name);
 		if (!n)
-			return;
+			return -ENOMEM;
 		/*
 		 * This display code with --no-merge adds [cpu] postfixes.
 		 * These are not supported by the parser. Remove everything
@@ -782,11 +774,42 @@ static void generic_metric(struct perf_stat_config *config,
 			*pn = 0;
 
 		if (metric_total)
-			expr__add_id(&pctx, n, metric_total);
+			expr__add_id_val(pctx, n, metric_total);
 		else
-			expr__add_id(&pctx, n, avg_stats(stats)*scale);
+			expr__add_id_val(pctx, n, avg_stats(stats)*scale);
 	}
 
+	for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
+		ret = expr__add_ref(pctx, &metric_refs[j]);
+		if (ret)
+			return ret;
+	}
+
+	return i;
+}
+
+static void generic_metric(struct perf_stat_config *config,
+			   const char *metric_expr,
+			   struct evsel **metric_events,
+			   struct metric_ref *metric_refs,
+			   char *name,
+			   const char *metric_name,
+			   const char *metric_unit,
+			   int runtime,
+			   int cpu,
+			   struct perf_stat_output_ctx *out,
+			   struct runtime_stat *st)
+{
+	print_metric_t print_metric = out->print_metric;
+	struct expr_parse_ctx pctx;
+	double ratio, scale;
+	int i;
+	void *ctxp = out->ctx;
+
+	i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st);
+	if (i < 0)
+		return;
+
 	if (!metric_events[i]) {
 		if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
 			char *unit;
@@ -827,6 +850,22 @@ static void generic_metric(struct perf_stat_config *config,
 	expr__ctx_clear(&pctx);
 }
 
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
+{
+	struct expr_parse_ctx pctx;
+	double ratio = 0.0;
+
+	if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0)
+		goto out;
+
+	if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1))
+		ratio = 0.0;
+
+out:
+	expr__ctx_clear(&pctx);
+	return ratio;
+}
+
 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 				   struct evsel *evsel,
 				   double avg, int cpu,
@@ -881,7 +920,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
 			print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
 	} else if (
 		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
@@ -891,7 +930,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
 			print_l1_icache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
 	} else if (
 		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
@@ -901,7 +940,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
 			print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
 	} else if (
 		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
@@ -911,7 +950,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
 			print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
 	} else if (
 		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
@@ -921,7 +960,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
 			print_ll_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
 	} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
 		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
 
@@ -1035,8 +1074,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		else
 			print_metric(config, ctxp, NULL, NULL, name, 0);
 	} else if (evsel->metric_expr) {
-		generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
-				evsel->metric_name, NULL, 1, cpu, out, st);
+		generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
+				evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
 	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 		char unit = 'M';
 		char unit_buf[10];
@@ -1064,7 +1103,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			if (num++ > 0)
 				out->new_line(config, ctxp);
 			generic_metric(config, mexp->metric_expr, mexp->metric_events,
-					evsel->name, mexp->metric_name,
+					mexp->metric_refs, evsel->name, mexp->metric_name,
 					mexp->metric_unit, mexp->runtime, cpu, out, st);
 		}
 	}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index f75ae679eb28..aa3bed48511b 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -113,10 +113,11 @@ struct perf_stat_config {
 	bool			 summary;
 	bool			 metric_no_group;
 	bool			 metric_no_merge;
+	bool			 stop_read_counter;
 	FILE			*output;
 	unsigned int		 interval;
 	unsigned int		 timeout;
-	unsigned int		 initial_delay;
+	int			 initial_delay;
 	unsigned int		 unit_width;
 	unsigned int		 metric_only_len;
 	int			 times;
@@ -133,6 +134,8 @@ struct perf_stat_config {
 	struct perf_cpu_map		*cpus_aggr_map;
 	u64			*walltime_run;
 	struct rblist		 metric_events;
+	int			 ctl_fd;
+	int			 ctl_fd_ack;
 };
 
 void perf_stat__set_big_num(int set);
@@ -230,4 +233,7 @@ perf_evlist__print_counters(struct evlist *evlist,
 			    struct target *_target,
 			    struct timespec *ts,
 			    int argc, const char **argv);
+
+struct metric_expr;
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
 #endif
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 5e43054bffea..8cc4b0059fb0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -789,7 +789,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	if (ss->opdshdr.sh_type != SHT_PROGBITS)
 		ss->opdsec = NULL;
 
-	if (dso->kernel == DSO_TYPE_USER)
+	if (dso->kernel == DSO_SPACE__USER)
 		ss->adjust_symbols = true;
 	else
 		ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
@@ -872,7 +872,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
 		 * kallsyms and identity maps.  Overwrite it to
 		 * map to the kernel dso.
 		 */
-		if (*remap_kernel && dso->kernel) {
+		if (*remap_kernel && dso->kernel && !kmodule) {
 			*remap_kernel = false;
 			map->start = shdr->sh_addr + ref_reloc(kmap);
 			map->end = map->start + shdr->sh_size;
@@ -1068,7 +1068,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 	 * Initial kernel and module mappings do not map to the dso.
 	 * Flag the fixups.
 	 */
-	if (dso->kernel || kmodule) {
+	if (dso->kernel) {
 		remap_kernel = true;
 		adjust_kernel_syms = dso->adjust_symbols;
 	}
@@ -1130,7 +1130,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		    (sym.st_value & 1))
 			--sym.st_value;
 
-		if (dso->kernel || kmodule) {
+		if (dso->kernel) {
 			if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
 						       section_name, adjust_kernel_syms, kmodule, &remap_kernel))
 				goto out_elf_end;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5ddf84dcbae7..5151a8c0b791 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -663,9 +663,12 @@ static bool symbol__is_idle(const char *name)
 		"exit_idle",
 		"mwait_idle",
 		"mwait_idle_with_hints",
+		"mwait_idle_with_hints.constprop.0",
 		"poll_idle",
 		"ppc64_runlatch_off",
 		"pseries_dedicated_idle_sleep",
+		"psw_idle",
+		"psw_idle_exit",
 		NULL
 	};
 	int i;
@@ -806,7 +809,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
 
 			if (strcmp(curr_map->dso->short_name, module)) {
 				if (curr_map != initial_map &&
-				    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+				    dso->kernel == DSO_SPACE__KERNEL_GUEST &&
 				    machine__is_default_guest(machine)) {
 					/*
 					 * We assume all symbols of a module are
@@ -863,7 +866,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
 				goto add_symbol;
 			}
 
-			if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+			if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
 				snprintf(dso_name, sizeof(dso_name),
 					"[guest.kernel].%d",
 					kernel_range++);
@@ -907,7 +910,7 @@ discard_symbol:
 	}
 
 	if (curr_map != initial_map &&
-	    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+	    dso->kernel == DSO_SPACE__KERNEL_GUEST &&
 	    machine__is_default_guest(kmaps->machine)) {
 		dso__set_loaded(curr_map->dso);
 	}
@@ -1385,7 +1388,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	 * Set the data type and long name so that kcore can be read via
 	 * dso__data_read_addr().
 	 */
-	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+	if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
 		dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE;
 	else
 		dso->binary_type = DSO_BINARY_TYPE__KCORE;
@@ -1449,7 +1452,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
 	symbols__fixup_end(&dso->symbols);
 	symbols__fixup_duplicate(&dso->symbols);
 
-	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+	if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
 		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
 	else
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
@@ -1535,17 +1538,17 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
 	case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
 	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
 	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
-		return !kmod && dso->kernel == DSO_TYPE_USER;
+		return !kmod && dso->kernel == DSO_SPACE__USER;
 
 	case DSO_BINARY_TYPE__KALLSYMS:
 	case DSO_BINARY_TYPE__VMLINUX:
 	case DSO_BINARY_TYPE__KCORE:
-		return dso->kernel == DSO_TYPE_KERNEL;
+		return dso->kernel == DSO_SPACE__KERNEL;
 
 	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
 	case DSO_BINARY_TYPE__GUEST_VMLINUX:
 	case DSO_BINARY_TYPE__GUEST_KCORE:
-		return dso->kernel == DSO_TYPE_GUEST_KERNEL;
+		return dso->kernel == DSO_SPACE__KERNEL_GUEST;
 
 	case DSO_BINARY_TYPE__GUEST_KMODULE:
 	case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
@@ -1563,6 +1566,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
 
 	case DSO_BINARY_TYPE__BPF_PROG_INFO:
 	case DSO_BINARY_TYPE__BPF_IMAGE:
+	case DSO_BINARY_TYPE__OOL:
 	case DSO_BINARY_TYPE__NOT_FOUND:
 	default:
 		return false;
@@ -1647,9 +1651,9 @@ int dso__load(struct dso *dso, struct map *map)
 		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
 
 	if (dso->kernel && !kmod) {
-		if (dso->kernel == DSO_TYPE_KERNEL)
+		if (dso->kernel == DSO_SPACE__KERNEL)
 			ret = dso__load_kernel_sym(dso, map);
-		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		else if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
 			ret = dso__load_guest_kernel_sym(dso, map);
 
 		machine = map__kmaps(map)->machine;
@@ -1879,7 +1883,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 	else
 		symbol__join_symfs(symfs_vmlinux, vmlinux);
 
-	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+	if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
 		symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
 	else
 		symtab_type = DSO_BINARY_TYPE__VMLINUX;
@@ -1891,7 +1895,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 	symsrc__destroy(&ss);
 
 	if (err > 0) {
-		if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
 			dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
 		else
 			dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 3fb67bd31e4a..bbbc0dcd461f 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -57,7 +57,8 @@ struct perf_tool {
 			throttle,
 			unthrottle,
 			ksymbol,
-			bpf;
+			bpf,
+			text_poke;
 
 	event_attr_op	attr;
 	event_attr_op	event_update;
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index d2202392ffdb..48dd2b018c47 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -99,7 +99,7 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
 	while (input.pos < input.size) {
 		ret = ZSTD_decompressStream(data->dstream, &output, &input);
 		if (ZSTD_isError(ret)) {
-			pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
+			pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n",
 			       src_size, output.size, dst_size, ZSTD_getErrorName(ret));
 			break;
 		}
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
index ebd3e1a1c28e..a249c50ebf55 100644
--- a/tools/power/acpi/Makefile
+++ b/tools/power/acpi/Makefile
@@ -7,6 +7,8 @@
 
 include ../../scripts/Makefile.include
 
+.NOTPARALLEL:
+
 all: acpidbg acpidump ec
 clean: acpidbg_clean acpidump_clean ec_clean
 install: acpidbg_install acpidump_install ec_install
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index dd38c2b2e1b4..11c5046dce16 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -110,7 +110,7 @@ u32 gbl_table_count = 0;
  *
  * RETURN:      Status; Converted from errno.
  *
- * DESCRIPTION: Get last errno and conver it to acpi_status.
+ * DESCRIPTION: Get last errno and convert it to acpi_status.
  *
  *****************************************************************************/
 
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 6ed82fba5aaa..7b2164e07057 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -99,13 +99,17 @@ static unsigned long string_to_frequency(const char *str)
 		continue;
 
 	if (str[cp] == '.') {
-		while (power > -1 && isdigit(str[cp+1]))
-			cp++, power--;
+		while (power > -1 && isdigit(str[cp+1])) {
+			cp++;
+			power--;
+		}
 	}
-	if (power >= -1)	/* not enough => pad */
+	if (power >= -1) {		/* not enough => pad */
 		pad = power + 1;
-	else			/* to much => strip */
-		pad = 0, cp += power + 1;
+	} else {			/* too much => strip */
+		pad = 0;
+		cp += power + 1;
+	}
 	/* check bounds */
 	if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1)
 		return 0;
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 46ff97e909c6..1bc36a1db14f 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -171,7 +171,7 @@ class SystemValues:
 	tracefuncs = {
 		'sys_sync': {},
 		'ksys_sync': {},
-		'__pm_notifier_call_chain': {},
+		'pm_notifier_call_chain_robust': {},
 		'pm_prepare_console': {},
 		'pm_notifier_call_chain': {},
 		'freeze_processes': {},
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 9f4b190f1d74..cd089a505859 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.5";
+static const char *version_str = "v1.6";
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
@@ -545,20 +545,23 @@ static void set_cpu_present_cpu_mask(void)
 	}
 }
 
-int get_core_count(int pkg_id, int die_id)
+int get_max_punit_core_id(int pkg_id, int die_id)
 {
-	int cnt = 0;
+	int max_id = 0;
+	int i;
 
-	if (pkg_id < MAX_PACKAGE_COUNT && die_id < MAX_DIE_PER_PACKAGE) {
-		int i;
+	for (i = 0; i < topo_max_cpus; ++i)
+	{
+		if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask))
+			continue;
 
-		for (i = 0; i < sizeof(long long) * 8; ++i) {
-			if (core_mask[pkg_id][die_id] & (1ULL << i))
-				cnt++;
-		}
+		if (cpu_map[i].pkg_id == pkg_id &&
+			cpu_map[i].die_id == die_id &&
+			cpu_map[i].punit_cpu_core > max_id)
+			max_id = cpu_map[i].punit_cpu_core;
 	}
 
-	return cnt;
+	return max_id;
 }
 
 int get_cpu_count(int pkg_id, int die_id)
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index a7f4337c5777..1d7ecb54352e 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -396,7 +396,7 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
 {
 	struct isst_pkg_ctdp_level_info ctdp_level;
 	struct isst_pkg_ctdp pkg_dev;
-	int i, ret, core_cnt, max;
+	int i, ret, max_punit_core, max_mask_index;
 	unsigned int req, resp;
 
 	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
@@ -421,10 +421,10 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
 
 	pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask);
 
-	core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu));
-	max = core_cnt > 32 ? 2 : 1;
+	max_punit_core = get_max_punit_core_id(get_physical_package_id(cpu), get_physical_die_id(cpu));
+	max_mask_index = max_punit_core > 32 ? 2 : 1;
 
-	for (i = 0; i < max; ++i) {
+	for (i = 0; i < max_mask_index; ++i) {
 		unsigned long long mask;
 		int count;
 
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 094ba4589a9c..29715e9c2e06 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -170,7 +170,7 @@ struct isst_pkg_ctdp {
 
 extern int get_topo_max_cpus(void);
 extern int get_cpu_count(int pkg_id, int die_id);
-extern int get_core_count(int pkg_id, int die_id);
+extern int get_max_punit_core_id(int pkg_id, int die_id);
 
 /* Common interfaces */
 FILE *get_output_file(void);
diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README
index a12d295a09d8..4f048789b260 100644
--- a/tools/testing/ktest/examples/README
+++ b/tools/testing/ktest/examples/README
@@ -11,7 +11,7 @@ crosstests.conf - this config shows an example of testing a git repo against
     lots of different architectures. It only does build tests, but makes
     it easy to compile test different archs. You can download the arch
     cross compilers from:
-  http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+  https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
 
 test.conf - A generic example of a config. This is based on an actual config
      used to perform real testing.
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
index 6907f32590b2..3b15e85f26bd 100644
--- a/tools/testing/ktest/examples/crosstests.conf
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -3,7 +3,7 @@
 #
 # In this config, it is expected that the tool chains from:
 #
-#   http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+#   https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
 #
 # running on a x86_64 system have been downloaded and installed into:
 #
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 7570e36d636d..cb16d2aac51c 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -11,6 +11,7 @@ use File::Path qw(mkpath);
 use File::Copy qw(cp);
 use FileHandle;
 use FindBin;
+use IO::Handle;
 
 my $VERSION = "0.2";
 
@@ -81,6 +82,8 @@ my %default = (
     "IGNORE_UNUSED"		=> 0,
 );
 
+my $test_log_start = 0;
+
 my $ktest_config = "ktest.conf";
 my $version;
 my $have_version = 0;
@@ -98,6 +101,7 @@ my $final_post_ktest;
 my $pre_ktest;
 my $post_ktest;
 my $pre_test;
+my $pre_test_die;
 my $post_test;
 my $pre_build;
 my $post_build;
@@ -223,6 +227,7 @@ my $dirname = $FindBin::Bin;
 my $mailto;
 my $mailer;
 my $mail_path;
+my $mail_max_size;
 my $mail_command;
 my $email_on_error;
 my $email_when_finished;
@@ -259,6 +264,7 @@ my %option_map = (
     "MAILTO"			=> \$mailto,
     "MAILER"			=> \$mailer,
     "MAIL_PATH"			=> \$mail_path,
+    "MAIL_MAX_SIZE"		=> \$mail_max_size,
     "MAIL_COMMAND"		=> \$mail_command,
     "EMAIL_ON_ERROR"		=> \$email_on_error,
     "EMAIL_WHEN_FINISHED"	=> \$email_when_finished,
@@ -273,6 +279,7 @@ my %option_map = (
     "PRE_KTEST"			=> \$pre_ktest,
     "POST_KTEST"		=> \$post_ktest,
     "PRE_TEST"			=> \$pre_test,
+    "PRE_TEST_DIE"		=> \$pre_test_die,
     "POST_TEST"			=> \$post_test,
     "BUILD_TYPE"		=> \$build_type,
     "BUILD_OPTIONS"		=> \$build_options,
@@ -507,9 +514,7 @@ EOF
 
 sub _logit {
     if (defined($opt{"LOG_FILE"})) {
-	open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
-	print OUT @_;
-	close(OUT);
+	print LOG @_;
     }
 }
 
@@ -909,6 +914,12 @@ sub process_expression {
 	}
     }
 
+    if ($val =~ s/^\s*NOT\s+(.*)//) {
+	my $express = $1;
+	my $ret = process_expression($name, $express);
+	return !$ret;
+    }
+
     if ($val =~ /^\s*0\s*$/) {
 	return 0;
     } elsif ($val =~ /^\s*\d+\s*$/) {
@@ -1485,8 +1496,32 @@ sub dodie {
 
     if ($email_on_error) {
 	my $name = get_test_name;
+	my $log_file;
+
+	if (defined($opt{"LOG_FILE"})) {
+	    my $whence = 0; # beginning of file
+	    my $pos = $test_log_start;
+
+	    if (defined($mail_max_size)) {
+		my $log_size = tell LOG;
+		$log_size -= $test_log_start;
+		if ($log_size > $mail_max_size) {
+		    $whence = 2; # end of file
+		    $pos = - $mail_max_size;
+		}
+	    }
+	    $log_file = "$tmpdir/log";
+	    open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)";
+	    open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n";
+	    seek(L, $pos, $whence);
+	    while (<L>) {
+		print O;
+	    }
+	    close O;
+	    close L;
+	}
         send_email("KTEST: critical failure for test $i [$name]",
-                "Your test started at $script_start_time has failed with:\n@_\n");
+                "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
     }
 
     if ($monitor_cnt) {
@@ -1508,7 +1543,7 @@ sub create_pty {
     my $TIOCGPTN = 0x80045430;
 
     sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or
-	dodie "Cant open /dev/ptmx";
+	dodie "Can't open /dev/ptmx";
 
     # unlockpt()
     $tmp = pack("i", 0);
@@ -1772,8 +1807,6 @@ sub run_command {
 	(fail "unable to exec $command" and return 0);
 
     if (defined($opt{"LOG_FILE"})) {
-	open(LOG, ">>$opt{LOG_FILE}") or
-	    dodie "failed to write to log";
 	$dolog = 1;
     }
 
@@ -1821,7 +1854,6 @@ sub run_command {
     }
 
     close(CMD);
-    close(LOG) if ($dolog);
     close(RD)  if ($dord);
 
     $end_time = time;
@@ -3188,6 +3220,8 @@ sub config_bisect_end {
     doprint "***************************************\n\n";
 }
 
+my $pass = 1;
+
 sub run_config_bisect {
     my ($good, $bad, $last_result) = @_;
     my $reset = "";
@@ -3210,11 +3244,15 @@ sub run_config_bisect {
 
     $ret = run_config_bisect_test $config_bisect_type;
     if ($ret) {
-        doprint "NEW GOOD CONFIG\n";
+        doprint "NEW GOOD CONFIG ($pass)\n";
+	system("cp $output_config $tmpdir/good_config.tmp.$pass");
+	$pass++;
 	# Return 3 for good config
 	return 3;
     } else {
-        doprint "NEW BAD CONFIG\n";
+        doprint "NEW BAD CONFIG ($pass)\n";
+	system("cp $output_config $tmpdir/bad_config.tmp.$pass");
+	$pass++;
 	# Return 4 for bad config
 	return 4;
     }
@@ -4077,8 +4115,12 @@ if ($#new_configs >= 0) {
     }
 }
 
-if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
-    unlink $opt{"LOG_FILE"};
+if (defined($opt{"LOG_FILE"})) {
+    if ($opt{"CLEAR_LOG"}) {
+	unlink $opt{"LOG_FILE"};
+    }
+    open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+    LOG->autoflush(1);
 }
 
 doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
@@ -4171,7 +4213,7 @@ sub find_mailer {
 }
 
 sub do_send_mail {
-    my ($subject, $message) = @_;
+    my ($subject, $message, $file) = @_;
 
     if (!defined($mail_path)) {
 	# find the mailer
@@ -4181,16 +4223,30 @@ sub do_send_mail {
 	}
     }
 
+    my $header_file = "$tmpdir/header";
+    open (HEAD, ">$header_file") or die "Can not create $header_file\n";
+    print HEAD "To: $mailto\n";
+    print HEAD "Subject: $subject\n\n";
+    print HEAD "$message\n";
+    close HEAD;
+
     if (!defined($mail_command)) {
 	if ($mailer eq "mail" || $mailer eq "mailx") {
-	    $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+	    $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO";
 	} elsif ($mailer eq "sendmail" ) {
-	    $mail_command =  "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+	    $mail_command =  "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -t \$MAILTO";
 	} else {
 	    die "\nYour mailer: $mailer is not supported.\n";
 	}
     }
 
+    if (defined($file)) {
+	$mail_command =~ s/\$BODY_FILE/$file/g;
+    } else {
+	$mail_command =~ s/\$BODY_FILE//g;
+    }
+
+    $mail_command =~ s/\$HEADER_FILE/$header_file/g;
     $mail_command =~ s/\$MAILER/$mailer/g;
     $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
     $mail_command =~ s/\$MAILTO/$mailto/g;
@@ -4338,10 +4394,19 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
     }
 
     doprint "\n\n";
+
+    if (defined($opt{"LOG_FILE"})) {
+	$test_log_start = tell(LOG);
+    }
+
     doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
 
     if (defined($pre_test)) {
-	run_command $pre_test;
+	my $ret = run_command $pre_test;
+	if (!$ret && defined($pre_test_die) &&
+	    $pre_test_die) {
+	    dodie "failed to pre_test\n";
+	}
     }
 
     unlink $dmesg;
@@ -4441,4 +4506,10 @@ if ($email_when_finished) {
     send_email("KTEST: Your test has finished!",
             "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
 }
+
+if (defined($opt{"LOG_FILE"})) {
+    print "\n See $opt{LOG_FILE} for the record of results.\n\n";
+    close LOG;
+}
+
 exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 27666b8007ed..5e7d1d729752 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -442,6 +442,19 @@
 # Users can cancel the test by Ctrl^C
 # (default 0)
 #EMAIL_WHEN_CANCELED = 1
+#
+# If a test ends with an error and EMAIL_ON_ERROR is set as well
+# as a LOG_FILE is defined, then the log of the failing test will
+# be included in the email that is sent.
+# It is possible that the log may be very large, in which case,
+# only the last amount of the log should be sent. To limit how
+# much of the log is sent, set MAIL_MAX_SIZE. This will be the
+# size in bytes of the last portion of the log of the failed
+# test file. That is, if this is set to 100000, then only the
+# last 100 thousand bytes of the log file will be included in
+# the email.
+# (default undef)
+#MAIL_MAX_SIZE = 1000000
 
 # Start a test setup. If you leave this off, all options
 # will be default and the test will run once.
@@ -557,6 +570,11 @@
 # default (undefined)
 #PRE_TEST = ${SSH} reboot_to_special_kernel
 
+# To kill the entire test if PRE_TEST is defined but fails set this
+# to 1.
+# (default 0)
+#PRE_TEST_DIE = 1
+
 # If there is a command you want to run after the individual test case
 # completes, then you can set this option.
 #
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
index 7e5d979e73cb..fb342a8c98d3 100644
--- a/tools/testing/nvdimm/dax-dev.c
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -9,12 +9,19 @@
 phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 		unsigned long size)
 {
-	struct resource *res = &dev_dax->region->res;
-	phys_addr_t addr;
+	int i;
 
-	addr = pgoff * PAGE_SIZE + res->start;
-	if (addr >= res->start && addr <= res->end) {
-		if (addr + size - 1 <= res->end) {
+	for (i = 0; i < dev_dax->nr_range; i++) {
+		struct dev_dax_range *dax_range = &dev_dax->ranges[i];
+		struct range *range = &dax_range->range;
+		unsigned long long pgoff_end;
+		phys_addr_t addr;
+
+		pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
+		if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
+			continue;
+		addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
+		if (addr + size - 1 <= range->end) {
 			if (get_nfit_res(addr)) {
 				struct page *page;
 
@@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 
 				page = vmalloc_to_page((void *)addr);
 				return PFN_PHYS(page_to_pfn(page));
-			} else
-				return addr;
+			}
+			return addr;
 		}
+		break;
 	}
 	return -1;
 }
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 03e40b3b0106..c62d372d426f 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 {
 	int error;
-	resource_size_t offset = pgmap->res.start;
+	resource_size_t offset = pgmap->range.start;
 	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
 	if (!nfit_res)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a8ee5c4d41eb..2ac0fff6dad8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
 #include "nfit_test.h"
 #include "../watermark.h"
 
-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>
 
 /*
  * Generate an NFIT table to describe the following topology:
@@ -173,6 +174,9 @@ struct nfit_test_fw {
 	u64 version;
 	u32 size_received;
 	u64 end_time;
+	bool armed;
+	bool missed_activate;
+	unsigned long last_activate;
 };
 
 struct nfit_test {
@@ -345,7 +349,7 @@ static int nd_intel_test_finish_fw(struct nfit_test *t,
 			__func__, t, nd_cmd, buf_len, idx);
 
 	if (fw->state == FW_STATE_UPDATED) {
-		/* update already done, need cold boot */
+		/* update already done, need activation */
 		nd_cmd->status = 0x20007;
 		return 0;
 	}
@@ -430,6 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t,
 		}
 		dev_dbg(dev, "%s: transition out verify\n", __func__);
 		fw->state = FW_STATE_UPDATED;
+		fw->missed_activate = false;
 		/* fall through */
 	case FW_STATE_UPDATED:
 		nd_cmd->status = 0;
@@ -1178,6 +1183,134 @@ static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t,
 	return 0;
 }
 
+static unsigned long last_activate;
+
+static int nvdimm_bus_intel_fw_activate_businfo(struct nfit_test *t,
+		struct nd_intel_bus_fw_activate_businfo *nd_cmd,
+		unsigned int buf_len)
+{
+	int i, armed = 0;
+	int state;
+	u64 tmo;
+
+	for (i = 0; i < NUM_DCR; i++) {
+		struct nfit_test_fw *fw = &t->fw[i];
+
+		if (fw->armed)
+			armed++;
+	}
+
+	/*
+	 * Emulate 3 second activation max, and 1 second incremental
+	 * quiesce time per dimm requiring multiple activates to get all
+	 * DIMMs updated.
+	 */
+	if (armed)
+		state = ND_INTEL_FWA_ARMED;
+	else if (!last_activate || time_after(jiffies, last_activate + 3 * HZ))
+		state = ND_INTEL_FWA_IDLE;
+	else
+		state = ND_INTEL_FWA_BUSY;
+
+	tmo = armed * USEC_PER_SEC;
+	*nd_cmd = (struct nd_intel_bus_fw_activate_businfo) {
+		.capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+			| ND_INTEL_BUS_FWA_CAP_OSQUIESCE
+			| ND_INTEL_BUS_FWA_CAP_RESET,
+		.state = state,
+		.activate_tmo = tmo,
+		.cpu_quiesce_tmo = tmo,
+		.io_quiesce_tmo = tmo,
+		.max_quiesce_tmo = 3 * USEC_PER_SEC,
+	};
+
+	return 0;
+}
+
+static int nvdimm_bus_intel_fw_activate(struct nfit_test *t,
+		struct nd_intel_bus_fw_activate *nd_cmd,
+		unsigned int buf_len)
+{
+	struct nd_intel_bus_fw_activate_businfo info;
+	u32 status = 0;
+	int i;
+
+	nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+	if (info.state == ND_INTEL_FWA_BUSY)
+		status = ND_INTEL_BUS_FWA_STATUS_BUSY;
+	else if (info.activate_tmo > info.max_quiesce_tmo)
+		status = ND_INTEL_BUS_FWA_STATUS_TMO;
+	else if (info.state == ND_INTEL_FWA_IDLE)
+		status = ND_INTEL_BUS_FWA_STATUS_NOARM;
+
+	dev_dbg(&t->pdev.dev, "status: %d\n", status);
+	nd_cmd->status = status;
+	if (status && status != ND_INTEL_BUS_FWA_STATUS_TMO)
+		return 0;
+
+	last_activate = jiffies;
+	for (i = 0; i < NUM_DCR; i++) {
+		struct nfit_test_fw *fw = &t->fw[i];
+
+		if (!fw->armed)
+			continue;
+		if (fw->state != FW_STATE_UPDATED)
+			fw->missed_activate = true;
+		else
+			fw->state = FW_STATE_NEW;
+		fw->armed = false;
+		fw->last_activate = last_activate;
+	}
+
+	return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_dimminfo(struct nfit_test *t,
+		struct nd_intel_fw_activate_dimminfo *nd_cmd,
+		unsigned int buf_len, int dimm)
+{
+	struct nd_intel_bus_fw_activate_businfo info;
+	struct nfit_test_fw *fw = &t->fw[dimm];
+	u32 result, state;
+
+	nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+
+	if (info.state == ND_INTEL_FWA_BUSY)
+		state = ND_INTEL_FWA_BUSY;
+	else if (info.state == ND_INTEL_FWA_IDLE)
+		state = ND_INTEL_FWA_IDLE;
+	else if (fw->armed)
+		state = ND_INTEL_FWA_ARMED;
+	else
+		state = ND_INTEL_FWA_IDLE;
+
+	result = ND_INTEL_DIMM_FWA_NONE;
+	if (last_activate && fw->last_activate == last_activate &&
+			state == ND_INTEL_FWA_IDLE) {
+		if (fw->missed_activate)
+			result = ND_INTEL_DIMM_FWA_NOTSTAGED;
+		else
+			result = ND_INTEL_DIMM_FWA_SUCCESS;
+	}
+
+	*nd_cmd = (struct nd_intel_fw_activate_dimminfo) {
+		.result = result,
+		.state = state,
+	};
+
+	return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_arm(struct nfit_test *t,
+		struct nd_intel_fw_activate_arm *nd_cmd,
+		unsigned int buf_len, int dimm)
+{
+	struct nfit_test_fw *fw = &t->fw[dimm];
+
+	fw->armed = nd_cmd->activate_arm == ND_INTEL_DIMM_FWA_ARM;
+	nd_cmd->status = 0;
+	return 0;
+}
 
 static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
 {
@@ -1192,6 +1325,29 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
 	return i;
 }
 
+static void nfit_ctl_dbg(struct acpi_nfit_desc *acpi_desc,
+		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		unsigned int len)
+{
+	struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+	unsigned int func = cmd;
+	unsigned int family = 0;
+
+	if (cmd == ND_CMD_CALL) {
+		struct nd_cmd_pkg *pkg = buf;
+
+		len = pkg->nd_size_in;
+		family = pkg->nd_family;
+		buf = pkg->nd_payload;
+		func = pkg->nd_command;
+	}
+	dev_dbg(&t->pdev.dev, "%s family: %d cmd: %d: func: %d input length: %d\n",
+			nvdimm ? nvdimm_name(nvdimm) : "bus", family, cmd, func,
+			len);
+	print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 16, 4,
+			buf, min(len, 256u), true);
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len, int *cmd_rc)
@@ -1205,6 +1361,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		cmd_rc = &__cmd_rc;
 	*cmd_rc = 0;
 
+	nfit_ctl_dbg(acpi_desc, nvdimm, cmd, buf, buf_len);
+
 	if (nvdimm) {
 		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 		unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
@@ -1224,6 +1382,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 			i = get_dimm(nfit_mem, func);
 			if (i < 0)
 				return i;
+			if (i >= NUM_DCR) {
+				dev_WARN_ONCE(&t->pdev.dev, 1,
+						"ND_CMD_CALL only valid for nfit_test0\n");
+				return -EINVAL;
+			}
 
 			switch (func) {
 			case NVDIMM_INTEL_GET_SECURITY_STATE:
@@ -1252,11 +1415,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 				break;
 			case NVDIMM_INTEL_OVERWRITE:
 				rc = nd_intel_test_cmd_overwrite(t,
-						buf, buf_len, i - t->dcr_idx);
+						buf, buf_len, i);
 				break;
 			case NVDIMM_INTEL_QUERY_OVERWRITE:
 				rc = nd_intel_test_cmd_query_overwrite(t,
-						buf, buf_len, i - t->dcr_idx);
+						buf, buf_len, i);
 				break;
 			case NVDIMM_INTEL_SET_MASTER_PASSPHRASE:
 				rc = nd_intel_test_cmd_master_set_pass(t,
@@ -1266,54 +1429,59 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 				rc = nd_intel_test_cmd_master_secure_erase(t,
 						buf, buf_len, i);
 				break;
+			case NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO:
+				rc = nd_intel_test_cmd_fw_activate_dimminfo(
+					t, buf, buf_len, i);
+				break;
+			case NVDIMM_INTEL_FW_ACTIVATE_ARM:
+				rc = nd_intel_test_cmd_fw_activate_arm(
+					t, buf, buf_len, i);
+				break;
 			case ND_INTEL_ENABLE_LSS_STATUS:
 				rc = nd_intel_test_cmd_set_lss_status(t,
 						buf, buf_len);
 				break;
 			case ND_INTEL_FW_GET_INFO:
 				rc = nd_intel_test_get_fw_info(t, buf,
-						buf_len, i - t->dcr_idx);
+						buf_len, i);
 				break;
 			case ND_INTEL_FW_START_UPDATE:
 				rc = nd_intel_test_start_update(t, buf,
-						buf_len, i - t->dcr_idx);
+						buf_len, i);
 				break;
 			case ND_INTEL_FW_SEND_DATA:
 				rc = nd_intel_test_send_data(t, buf,
-						buf_len, i - t->dcr_idx);
+						buf_len, i);
 				break;
 			case ND_INTEL_FW_FINISH_UPDATE:
 				rc = nd_intel_test_finish_fw(t, buf,
-						buf_len, i - t->dcr_idx);
+						buf_len, i);
 				break;
 			case ND_INTEL_FW_FINISH_QUERY:
 				rc = nd_intel_test_finish_query(t, buf,
-						buf_len, i - t->dcr_idx);
+						buf_len, i);
 				break;
 			case ND_INTEL_SMART:
 				rc = nfit_test_cmd_smart(buf, buf_len,
-						&t->smart[i - t->dcr_idx]);
+						&t->smart[i]);
 				break;
 			case ND_INTEL_SMART_THRESHOLD:
 				rc = nfit_test_cmd_smart_threshold(buf,
 						buf_len,
-						&t->smart_threshold[i -
-							t->dcr_idx]);
+						&t->smart_threshold[i]);
 				break;
 			case ND_INTEL_SMART_SET_THRESHOLD:
 				rc = nfit_test_cmd_smart_set_threshold(buf,
 						buf_len,
-						&t->smart_threshold[i -
-							t->dcr_idx],
-						&t->smart[i - t->dcr_idx],
+						&t->smart_threshold[i],
+						&t->smart[i],
 						&t->pdev.dev, t->dimm_dev[i]);
 				break;
 			case ND_INTEL_SMART_INJECT:
 				rc = nfit_test_cmd_smart_inject(buf,
 						buf_len,
-						&t->smart_threshold[i -
-							t->dcr_idx],
-						&t->smart[i - t->dcr_idx],
+						&t->smart_threshold[i],
+						&t->smart[i],
 						&t->pdev.dev, t->dimm_dev[i]);
 				break;
 			default:
@@ -1353,9 +1521,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		if (!nd_desc)
 			return -ENOTTY;
 
-		if (cmd == ND_CMD_CALL) {
+		if (cmd == ND_CMD_CALL && call_pkg->nd_family
+				== NVDIMM_BUS_FAMILY_NFIT) {
 			func = call_pkg->nd_command;
-
 			buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
 			buf = (void *) call_pkg->nd_payload;
 
@@ -1379,7 +1547,26 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 			default:
 				return -ENOTTY;
 			}
-		}
+		} else if (cmd == ND_CMD_CALL && call_pkg->nd_family
+				== NVDIMM_BUS_FAMILY_INTEL) {
+			func = call_pkg->nd_command;
+			buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+			buf = (void *) call_pkg->nd_payload;
+
+			switch (func) {
+			case NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO:
+				rc = nvdimm_bus_intel_fw_activate_businfo(t,
+						buf, buf_len);
+				return rc;
+			case NVDIMM_BUS_INTEL_FW_ACTIVATE:
+				rc = nvdimm_bus_intel_fw_activate(t, buf,
+						buf_len);
+				return rc;
+			default:
+				return -ENOTTY;
+			}
+		} else if (cmd == ND_CMD_CALL)
+			return -ENOTTY;
 
 		if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
 			return -ENOTTY;
@@ -1805,6 +1992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	struct acpi_nfit_flush_address *flush;
 	struct acpi_nfit_capabilities *pcap;
 	unsigned int offset = 0, i;
+	unsigned long *acpi_mask;
 
 	/*
 	 * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -2507,10 +2695,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
-	set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
-	set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
-	set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
-	set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+	set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_dsm_mask);
+	set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_dsm_mask);
+	set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_dsm_mask);
+	set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_dsm_mask);
 	set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
@@ -2531,6 +2719,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 			&acpi_desc->dimm_cmd_force_en);
 	set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE,
 			&acpi_desc->dimm_cmd_force_en);
+	set_bit(NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, &acpi_desc->dimm_cmd_force_en);
+	set_bit(NVDIMM_INTEL_FW_ACTIVATE_ARM, &acpi_desc->dimm_cmd_force_en);
+
+	acpi_mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL];
+	set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, acpi_mask);
+	set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE, acpi_mask);
 }
 
 static void nfit_test1_setup(struct nfit_test *t)
@@ -2699,14 +2893,18 @@ static int nfit_ctl_test(struct device *dev)
 	struct acpi_nfit_desc *acpi_desc;
 	const u64 test_val = 0x0123456789abcdefULL;
 	unsigned long mask, cmd_size, offset;
-	union {
-		struct nd_cmd_get_config_size cfg_size;
-		struct nd_cmd_clear_error clear_err;
-		struct nd_cmd_ars_status ars_stat;
-		struct nd_cmd_ars_cap ars_cap;
-		char buf[sizeof(struct nd_cmd_ars_status)
-			+ sizeof(struct nd_ars_record)];
-	} cmds;
+	struct nfit_ctl_test_cmd {
+		struct nd_cmd_pkg pkg;
+		union {
+			struct nd_cmd_get_config_size cfg_size;
+			struct nd_cmd_clear_error clear_err;
+			struct nd_cmd_ars_status ars_stat;
+			struct nd_cmd_ars_cap ars_cap;
+			struct nd_intel_bus_fw_activate_businfo fwa_info;
+			char buf[sizeof(struct nd_cmd_ars_status)
+				+ sizeof(struct nd_ars_record)];
+		};
+	} cmd;
 
 	adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL);
 	if (!adev)
@@ -2731,11 +2929,15 @@ static int nfit_ctl_test(struct device *dev)
 			.module = THIS_MODULE,
 			.provider_name = "ACPI.NFIT",
 			.ndctl = acpi_nfit_ctl,
-			.bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
-				| 1UL << NFIT_CMD_ARS_INJECT_SET
-				| 1UL << NFIT_CMD_ARS_INJECT_CLEAR
-				| 1UL << NFIT_CMD_ARS_INJECT_GET,
+			.bus_family_mask = 1UL << NVDIMM_BUS_FAMILY_NFIT
+				| 1UL << NVDIMM_BUS_FAMILY_INTEL,
 		},
+		.bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
+			| 1UL << NFIT_CMD_ARS_INJECT_SET
+			| 1UL << NFIT_CMD_ARS_INJECT_CLEAR
+			| 1UL << NFIT_CMD_ARS_INJECT_GET,
+		.family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL] =
+			NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK,
 		.dev = &adev->dev,
 	};
 
@@ -2766,21 +2968,21 @@ static int nfit_ctl_test(struct device *dev)
 
 
 	/* basic checkout of a typical 'get config size' command */
-	cmd_size = sizeof(cmds.cfg_size);
-	cmds.cfg_size = (struct nd_cmd_get_config_size) {
+	cmd_size = sizeof(cmd.cfg_size);
+	cmd.cfg_size = (struct nd_cmd_get_config_size) {
 		.status = 0,
 		.config_size = SZ_128K,
 		.max_xfer = SZ_4K,
 	};
-	rc = setup_result(cmds.buf, cmd_size);
+	rc = setup_result(cmd.buf, cmd_size);
 	if (rc)
 		return rc;
 	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
-			cmds.buf, cmd_size, &cmd_rc);
+			cmd.buf, cmd_size, &cmd_rc);
 
-	if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0
-			|| cmds.cfg_size.config_size != SZ_128K
-			|| cmds.cfg_size.max_xfer != SZ_4K) {
+	if (rc < 0 || cmd_rc || cmd.cfg_size.status != 0
+			|| cmd.cfg_size.config_size != SZ_128K
+			|| cmd.cfg_size.max_xfer != SZ_4K) {
 		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
 				__func__, __LINE__, rc, cmd_rc);
 		return -EIO;
@@ -2789,14 +2991,14 @@ static int nfit_ctl_test(struct device *dev)
 
 	/* test ars_status with zero output */
 	cmd_size = offsetof(struct nd_cmd_ars_status, address);
-	cmds.ars_stat = (struct nd_cmd_ars_status) {
+	cmd.ars_stat = (struct nd_cmd_ars_status) {
 		.out_length = 0,
 	};
-	rc = setup_result(cmds.buf, cmd_size);
+	rc = setup_result(cmd.buf, cmd_size);
 	if (rc)
 		return rc;
 	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
-			cmds.buf, cmd_size, &cmd_rc);
+			cmd.buf, cmd_size, &cmd_rc);
 
 	if (rc < 0 || cmd_rc) {
 		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2806,16 +3008,16 @@ static int nfit_ctl_test(struct device *dev)
 
 
 	/* test ars_cap with benign extended status */
-	cmd_size = sizeof(cmds.ars_cap);
-	cmds.ars_cap = (struct nd_cmd_ars_cap) {
+	cmd_size = sizeof(cmd.ars_cap);
+	cmd.ars_cap = (struct nd_cmd_ars_cap) {
 		.status = ND_ARS_PERSISTENT << 16,
 	};
 	offset = offsetof(struct nd_cmd_ars_cap, status);
-	rc = setup_result(cmds.buf + offset, cmd_size - offset);
+	rc = setup_result(cmd.buf + offset, cmd_size - offset);
 	if (rc)
 		return rc;
 	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP,
-			cmds.buf, cmd_size, &cmd_rc);
+			cmd.buf, cmd_size, &cmd_rc);
 
 	if (rc < 0 || cmd_rc) {
 		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2825,19 +3027,19 @@ static int nfit_ctl_test(struct device *dev)
 
 
 	/* test ars_status with 'status' trimmed from 'out_length' */
-	cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
-	cmds.ars_stat = (struct nd_cmd_ars_status) {
+	cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+	cmd.ars_stat = (struct nd_cmd_ars_status) {
 		.out_length = cmd_size - 4,
 	};
-	record = &cmds.ars_stat.records[0];
+	record = &cmd.ars_stat.records[0];
 	*record = (struct nd_ars_record) {
 		.length = test_val,
 	};
-	rc = setup_result(cmds.buf, cmd_size);
+	rc = setup_result(cmd.buf, cmd_size);
 	if (rc)
 		return rc;
 	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
-			cmds.buf, cmd_size, &cmd_rc);
+			cmd.buf, cmd_size, &cmd_rc);
 
 	if (rc < 0 || cmd_rc || record->length != test_val) {
 		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2847,19 +3049,19 @@ static int nfit_ctl_test(struct device *dev)
 
 
 	/* test ars_status with 'Output (Size)' including 'status' */
-	cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
-	cmds.ars_stat = (struct nd_cmd_ars_status) {
+	cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+	cmd.ars_stat = (struct nd_cmd_ars_status) {
 		.out_length = cmd_size,
 	};
-	record = &cmds.ars_stat.records[0];
+	record = &cmd.ars_stat.records[0];
 	*record = (struct nd_ars_record) {
 		.length = test_val,
 	};
-	rc = setup_result(cmds.buf, cmd_size);
+	rc = setup_result(cmd.buf, cmd_size);
 	if (rc)
 		return rc;
 	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
-			cmds.buf, cmd_size, &cmd_rc);
+			cmd.buf, cmd_size, &cmd_rc);
 
 	if (rc < 0 || cmd_rc || record->length != test_val) {
 		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2869,15 +3071,15 @@ static int nfit_ctl_test(struct device *dev)
 
 
 	/* test extended status for get_config_size results in failure */
-	cmd_size = sizeof(cmds.cfg_size);
-	cmds.cfg_size = (struct nd_cmd_get_config_size) {
+	cmd_size = sizeof(cmd.cfg_size);
+	cmd.cfg_size = (struct nd_cmd_get_config_size) {
 		.status = 1 << 16,
 	};
-	rc = setup_result(cmds.buf, cmd_size);
+	rc = setup_result(cmd.buf, cmd_size);
 	if (rc)
 		return rc;
 	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
-			cmds.buf, cmd_size, &cmd_rc);
+			cmd.buf, cmd_size, &cmd_rc);
 
 	if (rc < 0 || cmd_rc >= 0) {
 		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2886,16 +3088,46 @@ static int nfit_ctl_test(struct device *dev)
 	}
 
 	/* test clear error */
-	cmd_size = sizeof(cmds.clear_err);
-	cmds.clear_err = (struct nd_cmd_clear_error) {
+	cmd_size = sizeof(cmd.clear_err);
+	cmd.clear_err = (struct nd_cmd_clear_error) {
 		.length = 512,
 		.cleared = 512,
 	};
-	rc = setup_result(cmds.buf, cmd_size);
+	rc = setup_result(cmd.buf, cmd_size);
 	if (rc)
 		return rc;
 	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
-			cmds.buf, cmd_size, &cmd_rc);
+			cmd.buf, cmd_size, &cmd_rc);
+	if (rc < 0 || cmd_rc) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+	/* test firmware activate bus info */
+	cmd_size = sizeof(cmd.fwa_info);
+	cmd = (struct nfit_ctl_test_cmd) {
+		.pkg = {
+			.nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO,
+			.nd_family = NVDIMM_BUS_FAMILY_INTEL,
+			.nd_size_out = cmd_size,
+			.nd_fw_size = cmd_size,
+		},
+		.fwa_info = {
+			.state = ND_INTEL_FWA_IDLE,
+			.capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+				| ND_INTEL_BUS_FWA_CAP_OSQUIESCE,
+			.activate_tmo = 1,
+			.cpu_quiesce_tmo = 1,
+			.io_quiesce_tmo = 1,
+			.max_quiesce_tmo = 1,
+		},
+	};
+	rc = setup_result(cmd.buf, cmd_size);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CALL,
+			&cmd, sizeof(cmd.pkg) + cmd_size, &cmd_rc);
 	if (rc < 0 || cmd_rc) {
 		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
 				__func__, __LINE__, rc, cmd_rc);
@@ -3052,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };
 
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
 
 enum INJECT {
 	INJECT_NONE,
@@ -3060,7 +3292,7 @@ enum INJECT {
 	INJECT_DST,
 };
 
-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
 {
 	size_t i;
 
@@ -3069,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
 		src[i] = (char) i;
 }
 
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
 		size_t size, unsigned long rem)
 {
 	size_t i;
@@ -3090,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
 	return true;
 }
 
-void mcsafe_test(void)
+void copy_mc_test(void)
 {
 	char *inject_desc[] = { "none", "source", "destination" };
 	enum INJECT inj;
 
-	if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+	if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
 		pr_info("%s: run...\n", __func__);
 	} else {
 		pr_info("%s: disabled, skip.\n", __func__);
@@ -3113,31 +3345,31 @@ void mcsafe_test(void)
 
 			switch (inj) {
 			case INJECT_NONE:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 0;
 				break;
 			case INJECT_SRC:
-				mcsafe_inject_src(&mcsafe_buf[1024]);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(&copy_mc_buf[1024]);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 512 - i;
 				break;
 			case INJECT_DST:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(&mcsafe_buf[2048]);
-				dst = &mcsafe_buf[2048 - i];
-				src = &mcsafe_buf[1024];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(&copy_mc_buf[2048]);
+				dst = &copy_mc_buf[2048 - i];
+				src = &copy_mc_buf[1024];
 				expect = 512 - i;
 				break;
 			}
 
-			mcsafe_test_init(dst, src, 512);
-			rem = __memcpy_mcsafe(dst, src, 512);
-			valid = mcsafe_test_validate(dst, src, 512, expect);
+			copy_mc_test_init(dst, src, 512);
+			rem = copy_mc_fragile(dst, src, 512);
+			valid = copy_mc_test_validate(dst, src, 512, expect);
 			if (rem == expect && valid)
 				continue;
 			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3149,8 +3381,8 @@ void mcsafe_test(void)
 		}
 	}
 
-	mcsafe_inject_src(NULL);
-	mcsafe_inject_dst(NULL);
+	copy_mc_inject_src(NULL);
+	copy_mc_inject_dst(NULL);
 }
 
 static __init int nfit_test_init(void)
@@ -3161,7 +3393,7 @@ static __init int nfit_test_init(void)
 	libnvdimm_test();
 	acpi_nfit_test();
 	device_dax_test();
-	mcsafe_test();
+	copy_mc_test();
 	dax_pmem_test();
 	dax_pmem_core_test();
 #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index e03bc15ce731..9018f45d631d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -32,6 +32,7 @@ TARGETS += lkdtm
 TARGETS += membarrier
 TARGETS += memfd
 TARGETS += memory-hotplug
+TARGETS += mincore
 TARGETS += mount
 TARGETS += mqueue
 TARGETS += net
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b..2c9d012797a7 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
 ARCH ?= $(shell uname -m 2>/dev/null || echo not)
 
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal pauth fp mte
 else
 ARM64_SUBTARGETS :=
 endif
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
new file mode 100644
index 000000000000..d66f76d2a650
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -0,0 +1,5 @@
+fpsimd-test
+sve-probe-vls
+sve-ptrace
+sve-test
+vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
new file mode 100644
index 000000000000..a57009d3a0dc
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+
+all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+
+fpsimd-test: fpsimd-test.o
+	$(CC) -nostdlib $^ -o $@
+sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
+sve-probe-vls: sve-probe-vls.o
+sve-test: sve-test.o
+	$(CC) -nostdlib $^ -o $@
+vlset: vlset.o
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README
new file mode 100644
index 000000000000..03e3dad865d8
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/README
@@ -0,0 +1,100 @@
+This directory contains a mix of tests integrated with kselftest and
+standalone stress tests.
+
+kselftest tests
+===============
+
+sve-probe-vls - Checks the SVE vector length enumeration interface
+sve-ptrace - Checks the SVE ptrace interface
+
+Running the non-kselftest tests
+===============================
+
+sve-stress performs an SVE context switch stress test, as described
+below.
+
+(The fpsimd-stress test works the same way; just substitute "fpsimd" for
+"sve" in the following commands.)
+
+
+The test runs until killed by the user.
+
+If no context switch error was detected, you will see output such as
+the following:
+
+$ ./sve-stress
+(wait for some time)
+^C
+Vector length:        512 bits
+PID:    1573
+Terminated by signal 15, no error, iterations=9467, signals=1014
+Vector length:  512 bits
+PID:    1575
+Terminated by signal 15, no error, iterations=9448, signals=1028
+Vector length:  512 bits
+PID:    1577
+Terminated by signal 15, no error, iterations=9436, signals=1039
+Vector length:  512 bits
+PID:    1579
+Terminated by signal 15, no error, iterations=9421, signals=1039
+Vector length:  512 bits
+PID:    1581
+Terminated by signal 15, no error, iterations=9403, signals=1039
+Vector length:  512 bits
+PID:    1583
+Terminated by signal 15, no error, iterations=9385, signals=1036
+Vector length:  512 bits
+PID:    1585
+Terminated by signal 15, no error, iterations=9376, signals=1039
+Vector length:  512 bits
+PID:    1587
+Terminated by signal 15, no error, iterations=9361, signals=1039
+Vector length:  512 bits
+PID:    1589
+Terminated by signal 15, no error, iterations=9350, signals=1039
+
+
+If an error was detected, details of the mismatch will be printed
+instead of "no error".
+
+Ideally, the test should be allowed to run for many minutes or hours
+to maximise test coverage.
+
+
+KVM stress testing
+==================
+
+To try to reproduce the bugs that we have been observing, sve-stress
+should be run in parallel in two KVM guests, while simultaneously
+running on the host.
+
+1) Start 2 guests, using the following command for each:
+
+$ lkvm run --console=virtio -pconsole=hvc0 --sve Image
+
+(Depending on the hardware GIC implementation, you may also need
+--irqchip=gicv3.  New kvmtool defaults to that if appropriate, but I
+can't remember whether my branch is new enough for that.  Try without
+the option first.)
+
+Kvmtool occupies the terminal until you kill it (Ctrl+A x),
+or until the guest terminates.  It is therefore recommended to run
+each instance in separate terminal (use screen or ssh etc.)  This
+allows multiple guests to be run in parallel while running other
+commands on the host.
+
+Within the guest, the host filesystem is accessible, mounted on /host.
+
+2) Run the sve-stress on *each* guest with the Vector-Length set to 32:
+guest$ ./vlset --inherit 32 ./sve-stress
+
+3) Run the sve-stress on the host with the maximum Vector-Length:
+host$ ./vlset --inherit --max ./sve-stress
+
+
+Again, the test should be allowed to run for many minutes or hours to
+maximise test coverage.
+
+If no error is detected, you will see output from each sve-stress
+instance similar to that illustrated above; otherwise details of the
+observed mismatches will be printed.
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
new file mode 100644
index 000000000000..a180851496ec
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -0,0 +1,11 @@
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+#define SIGUSR1 10
+#define SIGTERM 15
+#define SIGINT 2
+#define SIGABRT 6
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
new file mode 100644
index 000000000000..8944f2189206
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+.macro __for from:req, to:req
+	.if (\from) == (\to)
+		_for__body %\from
+	.else
+		__for \from, %(\from) + ((\to) - (\from)) / 2
+		__for %(\from) + ((\to) - (\from)) / 2 + 1, \to
+	.endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+	.macro _for__body \var:req
+		.noaltmacro
+		\insn
+		.altmacro
+	.endm
+
+	.altmacro
+	__for \from, \to
+	.noaltmacro
+
+	.purgem _for__body
+.endm
+
+.macro function name
+	.macro endfunction
+		.type \name, @function
+		.purgem endfunction
+	.endm
+\name:
+.endm
+
+.macro define_accessor name, num, insn
+	.macro \name\()_entry n
+		\insn \n, 1
+		ret
+	.endm
+
+function \name
+	adr	x2, .L__accessor_tbl\@
+	add	x2, x2, x0, lsl #3
+	br	x2
+
+.L__accessor_tbl\@:
+	_for x, 0, (\num) - 1, \name\()_entry \x
+endfunction
+
+	.purgem \name\()_entry
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress
new file mode 100755
index 000000000000..781b5b022eaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-stress
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+trap child_died CHLD
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./fpsimd-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
new file mode 100644
index 000000000000..1c5556bdd11d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple FPSIMD context switch test
+// Repeatedly writes unique test patterns into each FPSIMD register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NVR	32
+#define MAXVL_B	(128 / 8)
+
+.macro _vldr Vn:req, Xt:req
+	ld1	{v\Vn\().2d}, [x\Xt]
+.endm
+
+.macro _vstr Vn:req, Xt:req
+	st1	{v\Vn\().2d}, [x\Xt]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// FPSIMD registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
+// All clobber x0-x2
+define_accessor setv, NVR, _vldr
+define_accessor getv, NVR, _vstr
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+	str	x0, [sp, #-16]!
+
+	mov	x0, #1			// STDOUT_FILENO
+	mov	x1, sp
+	mov	x2, #1
+	mov	x8, #__NR_write
+	svc	#0
+
+	add	sp, sp, #16
+	ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+	mov	x1, x0
+
+	mov	x2, #0
+0:	ldrb	w3, [x0], #1
+	cbz	w3, 1f
+	add	x2, x2, #1
+	b	0b
+
+1:	mov	w0, #1			// STDOUT_FILENO
+	mov	x8, #__NR_write
+	svc	#0
+
+	ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+	.pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+	.popsection
+
+	ldr	x0, =.L__puts_literal\@
+	bl	puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+	mov	x1, sp
+	str	x30, [sp, #-32]!	// Result can't be > 20 digits
+
+	mov	x2, #0
+	strb	w2, [x1, #-1]!		// Write the NUL terminator
+
+	mov	x2, #10
+0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
+	msub	x0, x3, x2, x0
+	add	w0, w0, #'0'
+	strb	w0, [x1, #-1]!
+	mov	x0, x3
+	cbnz	x3, 0b
+
+	ldrb	w0, [x1]
+	cbnz	w0, 1f
+	mov	w0, #'0'		// Print "0" for 0, not ""
+	strb	w0, [x1, #-1]!
+
+1:	mov	x0, x1
+	bl	puts
+
+	ldr	x30, [sp], #32
+	ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+	mov	x5, x30
+
+	bl	putdec
+	mov	x0, #'\n'
+	bl	putc
+
+	ret	x5
+endfunction
+
+
+// Clobbers x0-x3,x8
+function puthexb
+	str	x30, [sp, #-0x10]!
+
+	mov	w3, w0
+	lsr	w0, w0, #4
+	bl	puthexnibble
+	mov	w0, w3
+
+	ldr	x30, [sp], #0x10
+	// fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+	and	w0, w0, #0xf
+	cmp	w0, #10
+	blo	1f
+	add	w0, w0, #'a' - ('9' + 1)
+1:	add	w0, w0, #'0'
+	b	putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+	str	x30, [sp, #-0x10]!
+
+	mov	x4, x0
+	mov	x5, x1
+
+0:	subs	x5, x5, #1
+	b.lo	1f
+	ldrb	w0, [x4], #1
+	bl	puthexb
+	b	0b
+
+1:	ldr	x30, [sp], #0x10
+	ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+vref:
+	.space	MAXVL_B * NVR
+scratch:
+	.space	MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid	(16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+function pattern
+	orr	w1, w0, w1, lsl #16
+	orr	w2, w1, w2, lsl #28
+
+	ldr	x0, =scratch
+	mov	w1, #MAXVL_B / 4
+
+0:	str	w2, [x0], #4
+	add	w2, w2, #(1 << 22)
+	subs	w1, w1, #1
+	bne	0b
+
+	ret
+endfunction
+
+// Get the address of shadow data for FPSIMD V-register V<xn>
+.macro _adrv xd, xn, nrtmp
+	ldr	\xd, =vref
+	mov	x\nrtmp, #16
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a FPSIMD V-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_vreg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrv	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setv
+
+	ret	x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+	mov	w2, #0xae
+	b	memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+	mov	w2, #0
+endfunction
+	// fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+	cmp	x1, #0
+	b.eq	1f
+
+0:	strb	w2, [x0], #1
+	subs	x1, x1, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+	cbz	x2, 1f
+
+	mov	x5, #0
+0:	ldrb	w3, [x0, x5]
+	ldrb	w4, [x1, x5]
+	add	x5, x5, #1
+	cmp	w3, w4
+	b.ne	barf
+	subs	x2, x2, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Verify that a FPSIMD V-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x5.
+function check_vreg
+	mov	x3, x30
+
+	_adrv	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getv
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+	// Increment the irritation signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	// Corrupt some random V-regs
+	adr	x0, .text + (irritator_handler - .text) / 16 * 16
+	movi	v0.8b, #7
+	movi	v9.16b, #9
+	movi	v31.8b, #31
+
+	ret
+endfunction
+
+function terminate_handler
+	mov	w21, w0
+	mov	x20, x2
+
+	puts	"Terminated by signal "
+	mov	w0, w21
+	bl	putdec
+	puts	", no error, iterations="
+	ldr	x0, [x20, #ucontext_regs + 8 * 22]
+	bl	putdec
+	puts	", signals="
+	ldr	x0, [x20, #ucontext_regs + 8 * 23]
+	bl	putdecn
+
+	mov	x0, #0
+	mov	x8, #__NR_exit
+	svc	#0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+	mov	w4, w0
+	mov	x5, x1
+	mov	w6, w2
+
+	add	x0, sp, #16
+	mov	x1, #sa_sz
+	bl	memclr
+
+	mov	w0, w4
+	add	x1, sp, #16
+	str	w6, [x1, #sa_flags]
+	str	x5, [x1, #sa_handler]
+	mov	x2, #0
+	mov	x3, #sa_mask_sz
+	mov	x8, #__NR_rt_sigaction
+	svc	#0
+
+	cbz	w0, 1f
+
+	puts	"sigaction failure\n"
+	b	.Labort
+
+1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+	ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+	// Sanity-check and report the vector length
+
+	mov	x19, #128
+	cmp	x19, #128
+	b.lo	1f
+	cmp	x19, #2048
+	b.hi	1f
+	tst	x19, #(8 - 1)
+	b.eq	2f
+
+1:	puts	"Bad vector length: "
+	mov	x0, x19
+	bl	putdecn
+	b	.Labort
+
+2:	puts	"Vector length:\t"
+	mov	x0, x19
+	bl	putdec
+	puts	" bits\n"
+
+	// Obtain our PID, to ensure test pattern uniqueness between processes
+
+	mov	x8, #__NR_getpid
+	svc	#0
+	mov	x20, x0
+
+	puts	"PID:\t"
+	mov	x0, x20
+	bl	putdecn
+
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	x22, #0		// generation number, increments per iteration
+.Ltest_loop:
+
+	mov	x21, #0		// Set up V-regs & shadow with test pattern
+0:	mov	x0, x20
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_vreg
+	add	x21, x21, #1
+	cmp	x21, #NVR
+	b.lo	0b
+
+// Can't do this when SVE state is volatile across SVC:
+	mov	x8, #__NR_sched_yield	// Encourage preemption
+	svc	#0
+
+	mov	x21, #0
+0:	mov	x0, x21
+	bl	check_vreg
+	add	x21, x21, #1
+	cmp	x21, #NVR
+	b.lo	0b
+
+	add	x22, x22, #1
+	b	.Ltest_loop
+
+.Labort:
+	mov	x0, #0
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+endfunction
+
+function barf
+	mov	x10, x0	// expected data
+	mov	x11, x1	// actual data
+	mov	x12, x2	// data size
+
+	puts	"Mistatch: PID="
+	mov	x0, x20
+	bl	putdec
+	puts	", iteration="
+	mov	x0, x22
+	bl	putdec
+	puts	", reg="
+	mov	x0, x21
+	bl	putdecn
+	puts	"\tExpected ["
+	mov	x0, x10
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n\tGot      ["
+	mov	x0, x11
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n"
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
new file mode 100644
index 000000000000..b29cbc642c57
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/sigcontext.h>
+
+#include "../../kselftest.h"
+
+int main(int argc, char **argv)
+{
+	unsigned int vq;
+	int vl;
+	static unsigned int vqs[SVE_VQ_MAX];
+	unsigned int nvqs = 0;
+
+	ksft_print_header();
+	ksft_set_plan(2);
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+		ksft_exit_skip("SVE not available");
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		if (vl == -1)
+			ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+					   strerror(errno), errno);
+
+		vl &= PR_SVE_VL_LEN_MASK;
+
+		if (!sve_vl_valid(vl))
+			ksft_exit_fail_msg("VL %d invalid\n", vl);
+		vq = sve_vq_from_vl(vl);
+
+		if (!(nvqs < SVE_VQ_MAX))
+			ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n",
+					   nvqs);
+		vqs[nvqs++] = vq;
+	}
+	ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs);
+	ksft_test_result_pass("All vector lengths valid\n");
+
+	/* Print out the vector lengths in ascending order: */
+	while (nvqs--)
+		ksft_print_msg("%u\n", 16 * vqs[nvqs]);
+
+	ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
new file mode 100644
index 000000000000..3e81f9fab574
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+#include <asm/unistd.h>
+
+.arch_extension sve
+
+.globl sve_store_patterns
+
+sve_store_patterns:
+	mov	x1, x0
+
+	index	z0.b, #0, #1
+	str	q0, [x1]
+
+	mov	w8, #__NR_getpid
+	svc	#0
+	str	q0, [x1, #0x10]
+
+	mov	z1.d, z0.d
+	str	q0, [x1, #0x20]
+
+	mov	w8, #__NR_getpid
+	svc	#0
+	str	q0, [x1, #0x30]
+
+	mov	z1.d, z0.d
+	str	q0, [x1, #0x40]
+
+	ret
+
+.size	sve_store_patterns, . - sve_store_patterns
+.type	sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
new file mode 100644
index 000000000000..b2282be6f938
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+/* Number of registers filled in by sve_store_patterns */
+#define NR_VREGS 5
+
+void sve_store_patterns(__uint128_t v[NR_VREGS]);
+
+static void dump(const void *buf, size_t size)
+{
+	size_t i;
+	const unsigned char *p = buf;
+
+	for (i = 0; i < size; ++i)
+		printf(" %.2x", *p++);
+}
+
+static int check_vregs(const __uint128_t vregs[NR_VREGS])
+{
+	int i;
+	int ok = 1;
+
+	for (i = 0; i < NR_VREGS; ++i) {
+		printf("# v[%d]:", i);
+		dump(&vregs[i], sizeof vregs[i]);
+		putchar('\n');
+
+		if (vregs[i] != vregs[0])
+			ok = 0;
+	}
+
+	return ok;
+}
+
+static int do_child(void)
+{
+	if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+		ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+	if (raise(SIGSTOP))
+		ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+	return EXIT_SUCCESS;
+}
+
+static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+{
+	struct user_sve_header *sve;
+	void *p;
+	size_t sz = sizeof *sve;
+	struct iovec iov;
+
+	while (1) {
+		if (*size < sz) {
+			p = realloc(*buf, sz);
+			if (!p) {
+				errno = ENOMEM;
+				goto error;
+			}
+
+			*buf = p;
+			*size = sz;
+		}
+
+		iov.iov_base = *buf;
+		iov.iov_len = sz;
+		if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+			goto error;
+
+		sve = *buf;
+		if (sve->size <= sz)
+			break;
+
+		sz = sve->size;
+	}
+
+	return sve;
+
+error:
+	return NULL;
+}
+
+static int set_sve(pid_t pid, const struct user_sve_header *sve)
+{
+	struct iovec iov;
+
+	iov.iov_base = (void *)sve;
+	iov.iov_len = sve->size;
+	return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+}
+
+static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
+			  unsigned int vlmax)
+{
+	unsigned int vq;
+	unsigned int i;
+
+	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+		ksft_exit_fail_msg("Dumping non-SVE register\n");
+
+	if (vlmax > sve->vl)
+		vlmax = sve->vl;
+
+	vq = sve_vq_from_vl(sve->vl);
+	for (i = 0; i < num; ++i) {
+		printf("# z%u:", i);
+		dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+		     vlmax);
+		printf("%s\n", vlmax == sve->vl ? "" : " ...");
+	}
+}
+
+static int do_parent(pid_t child)
+{
+	int ret = EXIT_FAILURE;
+	pid_t pid;
+	int status;
+	siginfo_t si;
+	void *svebuf = NULL, *newsvebuf;
+	size_t svebufsz = 0, newsvebufsz;
+	struct user_sve_header *sve, *new_sve;
+	struct user_fpsimd_state *fpsimd;
+	unsigned int i, j;
+	unsigned char *p;
+	unsigned int vq;
+
+	/* Attach to the child */
+	while (1) {
+		int sig;
+
+		pid = wait(&status);
+		if (pid == -1) {
+			perror("wait");
+			goto error;
+		}
+
+		/*
+		 * This should never happen but it's hard to flag in
+		 * the framework.
+		 */
+		if (pid != child)
+			continue;
+
+		if (WIFEXITED(status) || WIFSIGNALED(status))
+			ksft_exit_fail_msg("Child died unexpectedly\n");
+
+		ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
+				 status);
+		if (!WIFSTOPPED(status))
+			goto error;
+
+		sig = WSTOPSIG(status);
+
+		if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			if (errno == EINVAL) {
+				sig = 0; /* bust group-stop */
+				goto cont;
+			}
+
+			ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+
+		if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+		    si.si_pid == pid)
+			break;
+
+	cont:
+		if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			ksft_test_result_fail("PTRACE_CONT: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+	}
+
+	sve = get_sve(pid, &svebuf, &svebufsz);
+	if (!sve) {
+		int e = errno;
+
+		ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	} else {
+		ksft_test_result_pass("get_sve\n");
+	}
+
+	ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
+			 "FPSIMD registers\n");
+	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
+		goto error;
+
+	fpsimd = (struct user_fpsimd_state *)((char *)sve +
+					      SVE_PT_FPSIMD_OFFSET);
+	for (i = 0; i < 32; ++i) {
+		p = (unsigned char *)&fpsimd->vregs[i];
+
+		for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
+			p[j] = j;
+	}
+
+	if (set_sve(pid, sve)) {
+		int e = errno;
+
+		ksft_test_result_fail("set_sve(FPSIMD): %s\n",
+				      strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	vq = sve_vq_from_vl(sve->vl);
+
+	newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	new_sve = newsvebuf = malloc(newsvebufsz);
+	if (!new_sve) {
+		errno = ENOMEM;
+		perror(NULL);
+		goto error;
+	}
+
+	*new_sve = *sve;
+	new_sve->flags &= ~SVE_PT_REGS_MASK;
+	new_sve->flags |= SVE_PT_REGS_SVE;
+	memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+	       0, SVE_PT_SVE_ZREG_SIZE(vq));
+	new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	if (set_sve(pid, new_sve)) {
+		int e = errno;
+
+		ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
+	if (!new_sve) {
+		int e = errno;
+
+		ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
+			 "SVE registers\n");
+	if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+		goto error;
+
+	dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
+
+	p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
+		unsigned char expected = i;
+
+		if (__BYTE_ORDER == __BIG_ENDIAN)
+			expected = sizeof fpsimd->vregs[0] - 1 - expected;
+
+		ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
+		if (p[i] != expected)
+			goto error;
+	}
+
+	ret = EXIT_SUCCESS;
+
+error:
+	kill(child, SIGKILL);
+
+disappeared:
+	return ret;
+}
+
+int main(void)
+{
+	int ret = EXIT_SUCCESS;
+	__uint128_t v[NR_VREGS];
+	pid_t child;
+
+	ksft_print_header();
+	ksft_set_plan(20);
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+		ksft_exit_skip("SVE not available\n");
+
+	sve_store_patterns(v);
+
+	if (!check_vregs(v))
+		ksft_exit_fail_msg("Initial check_vregs() failed\n");
+
+	child = fork();
+	if (!child)
+		return do_child();
+
+	if (do_parent(child))
+		ret = EXIT_FAILURE;
+
+	ksft_print_cnts();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress
new file mode 100755
index 000000000000..24dd0922cc02
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./sve-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
new file mode 100644
index 000000000000..f95074c9b48b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple Scalable Vector Extension context switch test
+// Repeatedly writes unique test patterns into each SVE register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NZR	32
+#define NPR	16
+#define MAXVL_B	(2048 / 8)
+
+.arch_extension sve
+
+.macro _sve_ldr_v zt, xn
+	ldr	z\zt, [x\xn]
+.endm
+
+.macro _sve_str_v zt, xn
+	str	z\zt, [x\xn]
+.endm
+
+.macro _sve_ldr_p pt, xn
+	ldr	p\pt, [x\xn]
+.endm
+
+.macro _sve_str_p pt, xn
+	str	p\pt, [x\xn]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// SVE registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
+// All clobber x0-x2
+define_accessor setz, NZR, _sve_ldr_v
+define_accessor getz, NZR, _sve_str_v
+define_accessor setp, NPR, _sve_ldr_p
+define_accessor getp, NPR, _sve_str_p
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+	str	x0, [sp, #-16]!
+
+	mov	x0, #1			// STDOUT_FILENO
+	mov	x1, sp
+	mov	x2, #1
+	mov	x8, #__NR_write
+	svc	#0
+
+	add	sp, sp, #16
+	ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+	mov	x1, x0
+
+	mov	x2, #0
+0:	ldrb	w3, [x0], #1
+	cbz	w3, 1f
+	add	x2, x2, #1
+	b	0b
+
+1:	mov	w0, #1			// STDOUT_FILENO
+	mov	x8, #__NR_write
+	svc	#0
+
+	ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+	.pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+	.popsection
+
+	ldr	x0, =.L__puts_literal\@
+	bl	puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+	mov	x1, sp
+	str	x30, [sp, #-32]!	// Result can't be > 20 digits
+
+	mov	x2, #0
+	strb	w2, [x1, #-1]!		// Write the NUL terminator
+
+	mov	x2, #10
+0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
+	msub	x0, x3, x2, x0
+	add	w0, w0, #'0'
+	strb	w0, [x1, #-1]!
+	mov	x0, x3
+	cbnz	x3, 0b
+
+	ldrb	w0, [x1]
+	cbnz	w0, 1f
+	mov	w0, #'0'		// Print "0" for 0, not ""
+	strb	w0, [x1, #-1]!
+
+1:	mov	x0, x1
+	bl	puts
+
+	ldr	x30, [sp], #32
+	ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+	mov	x5, x30
+
+	bl	putdec
+	mov	x0, #'\n'
+	bl	putc
+
+	ret	x5
+endfunction
+
+// Clobbers x0-x3,x8
+function puthexb
+	str	x30, [sp, #-0x10]!
+
+	mov	w3, w0
+	lsr	w0, w0, #4
+	bl	puthexnibble
+	mov	w0, w3
+
+	ldr	x30, [sp], #0x10
+	// fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+	and	w0, w0, #0xf
+	cmp	w0, #10
+	blo	1f
+	add	w0, w0, #'a' - ('9' + 1)
+1:	add	w0, w0, #'0'
+	b	putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+	str	x30, [sp, #-0x10]!
+
+	mov	x4, x0
+	mov	x5, x1
+
+0:	subs	x5, x5, #1
+	b.lo	1f
+	ldrb	w0, [x4], #1
+	bl	puthexb
+	b	0b
+
+1:	ldr	x30, [sp], #0x10
+	ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+zref:
+	.space	MAXVL_B * NZR
+pref:
+	.space	MAXVL_B / 8 * NPR
+ffrref:
+	.space	MAXVL_B / 8
+scratch:
+	.space	MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid	(16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28	generation number (increments once per test_loop)
+// bits 27:22	32-bit lane index
+// bits 21:16	register number
+// bits 15: 0	pid
+
+function pattern
+	orr	w1, w0, w1, lsl #16
+	orr	w2, w1, w2, lsl #28
+
+	ldr	x0, =scratch
+	mov	w1, #MAXVL_B / 4
+
+0:	str	w2, [x0], #4
+	add	w2, w2, #(1 << 22)
+	subs	w1, w1, #1
+	bne	0b
+
+	ret
+endfunction
+
+// Get the address of shadow data for SVE Z-register Z<xn>
+.macro _adrz xd, xn, nrtmp
+	ldr	\xd, =zref
+	rdvl	x\nrtmp, #1
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Get the address of shadow data for SVE P-register P<xn - NZR>
+.macro _adrp xd, xn, nrtmp
+	ldr	\xd, =pref
+	rdvl	x\nrtmp, #1
+	lsr	x\nrtmp, x\nrtmp, #3
+	sub	\xn, \xn, #NZR
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a SVE Z-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_zreg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrz	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setz
+
+	ret	x4
+endfunction
+
+// Set up test pattern in a SVE P-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_preg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrp	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setp
+
+	ret	x4
+endfunction
+
+// Set up test pattern in the FFR
+// x0: pid
+// x2: generation
+// Beware: corrupts P0.
+function setup_ffr
+	mov	x4, x30
+
+	bl	pattern
+	ldr	x0, =ffrref
+	ldr	x1, =scratch
+	rdvl	x2, #1
+	lsr	x2, x2, #3
+	bl	memcpy
+
+	mov	x0, #0
+	ldr	x1, =ffrref
+	bl	setp
+
+	wrffr	p0.b
+
+	ret	x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+	mov	w2, #0xae
+	b	memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+	mov	w2, #0
+endfunction
+	// fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+	cmp	x1, #0
+	b.eq	1f
+
+0:	strb	w2, [x0], #1
+	subs	x1, x1, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+	cbz	x2, 2f
+
+	stp	x0, x1, [sp, #-0x20]!
+	str	x2, [sp, #0x10]
+
+	mov	x5, #0
+0:	ldrb	w3, [x0, x5]
+	ldrb	w4, [x1, x5]
+	add	x5, x5, #1
+	cmp	w3, w4
+	b.ne	1f
+	subs	x2, x2, #1
+	b.ne	0b
+
+1:	ldr	x2, [sp, #0x10]
+	ldp	x0, x1, [sp], #0x20
+	b.ne	barf
+
+2:	ret
+endfunction
+
+// Verify that a SVE Z-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_zreg
+	mov	x3, x30
+
+	_adrz	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getz
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Verify that a SVE P-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_preg
+	mov	x3, x30
+
+	_adrp	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getp
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Verify that the FFR matches its shadow in memory, else abort
+// Beware -- corrupts P0.
+// Clobbers x0-x5.
+function check_ffr
+	mov	x3, x30
+
+	ldr	x4, =scratch
+	rdvl	x5, #1
+	lsr	x5, x5, #3
+
+	mov	x0, x4
+	mov	x1, x5
+	bl	memfill_ae
+
+	rdffr	p0.b
+	mov	x0, #0
+	mov	x1, x4
+	bl	getp
+
+	ldr	x0, =ffrref
+	mov	x1, x4
+	mov	x2, x5
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+	// Increment the irritation signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	// Corrupt some random Z-regs
+	adr	x0, .text + (irritator_handler - .text) / 16 * 16
+	movi	v0.8b, #1
+	movi	v9.16b, #2
+	movi	v31.8b, #3
+	// And P0
+	rdffr	p0.b
+	// And FFR
+	wrffr	p15.b
+
+	ret
+endfunction
+
+function terminate_handler
+	mov	w21, w0
+	mov	x20, x2
+
+	puts	"Terminated by signal "
+	mov	w0, w21
+	bl	putdec
+	puts	", no error, iterations="
+	ldr	x0, [x20, #ucontext_regs + 8 * 22]
+	bl	putdec
+	puts	", signals="
+	ldr	x0, [x20, #ucontext_regs + 8 * 23]
+	bl	putdecn
+
+	mov	x0, #0
+	mov	x8, #__NR_exit
+	svc	#0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+	mov	w4, w0
+	mov	x5, x1
+	mov	w6, w2
+
+	add	x0, sp, #16
+	mov	x1, #sa_sz
+	bl	memclr
+
+	mov	w0, w4
+	add	x1, sp, #16
+	str	w6, [x1, #sa_flags]
+	str	x5, [x1, #sa_handler]
+	mov	x2, #0
+	mov	x3, #sa_mask_sz
+	mov	x8, #__NR_rt_sigaction
+	svc	#0
+
+	cbz	w0, 1f
+
+	puts	"sigaction failure\n"
+	b	.Labort
+
+1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+	ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+	// Sanity-check and report the vector length
+
+	rdvl	x19, #8
+	cmp	x19, #128
+	b.lo	1f
+	cmp	x19, #2048
+	b.hi	1f
+	tst	x19, #(8 - 1)
+	b.eq	2f
+
+1:	puts	"Bad vector length: "
+	mov	x0, x19
+	bl	putdecn
+	b	.Labort
+
+2:	puts	"Vector length:\t"
+	mov	x0, x19
+	bl	putdec
+	puts	" bits\n"
+
+	// Obtain our PID, to ensure test pattern uniqueness between processes
+
+	mov	x8, #__NR_getpid
+	svc	#0
+	mov	x20, x0
+
+	puts	"PID:\t"
+	mov	x0, x20
+	bl	putdecn
+
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	x22, #0		// generation number, increments per iteration
+.Ltest_loop:
+	rdvl	x0, #8
+	cmp	x0, x19
+	b.ne	vl_barf
+
+	mov	x21, #0		// Set up Z-regs & shadow with test pattern
+0:	mov	x0, x20
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_zreg
+	add	x21, x21, #1
+	cmp	x21, #NZR
+	b.lo	0b
+
+	mov	x0, x20		// Set up FFR & shadow with test pattern
+	mov	x1, #NZR + NPR
+	and	x2, x22, #0xf
+	bl	setup_ffr
+
+0:	mov	x0, x20		// Set up P-regs & shadow with test pattern
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_preg
+	add	x21, x21, #1
+	cmp	x21, #NZR + NPR
+	b.lo	0b
+
+// Can't do this when SVE state is volatile across SVC:
+//	mov	x8, #__NR_sched_yield	// Encourage preemption
+//	svc	#0
+
+	mov	x21, #0
+0:	mov	x0, x21
+	bl	check_zreg
+	add	x21, x21, #1
+	cmp	x21, #NZR
+	b.lo	0b
+
+0:	mov	x0, x21
+	bl	check_preg
+	add	x21, x21, #1
+	cmp	x21, #NZR + NPR
+	b.lo	0b
+
+	bl	check_ffr
+
+	add	x22, x22, #1
+	b	.Ltest_loop
+
+.Labort:
+	mov	x0, #0
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// end hack
+	mov	x10, x0	// expected data
+	mov	x11, x1	// actual data
+	mov	x12, x2	// data size
+
+	puts	"Mistatch: PID="
+	mov	x0, x20
+	bl	putdec
+	puts	", iteration="
+	mov	x0, x22
+	bl	putdec
+	puts	", reg="
+	mov	x0, x21
+	bl	putdecn
+	puts	"\tExpected ["
+	mov	x0, x10
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n\tGot      ["
+	mov	x0, x11
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n"
+
+	mov	x8, #__NR_getpid
+	svc	#0
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// ^ end of hack
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+//	mov	x8, #__NR_exit
+//	mov	x1, #1
+//	svc	#0
+endfunction
+
+function vl_barf
+	mov	x10, x0
+
+	puts	"Bad active VL: "
+	mov	x0, x10
+	bl	putdecn
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
new file mode 100644
index 000000000000..308d27a68226
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2019 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+
+static int inherit = 0;
+static int no_inherit = 0;
+static int force = 0;
+static unsigned long vl;
+
+static const struct option options[] = {
+	{ "force",	no_argument, NULL, 'f' },
+	{ "inherit",	no_argument, NULL, 'i' },
+	{ "max",	no_argument, NULL, 'M' },
+	{ "no-inherit",	no_argument, &no_inherit, 1 },
+	{ "help",	no_argument, NULL, '?' },
+	{}
+};
+
+static char const *program_name;
+
+static int parse_options(int argc, char **argv)
+{
+	int c;
+	char *rest;
+
+	program_name = strrchr(argv[0], '/');
+	if (program_name)
+		++program_name;
+	else
+		program_name = argv[0];
+
+	while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1)
+		switch (c) {
+		case 'M':	vl = SVE_VL_MAX; break;
+		case 'f':	force = 1; break;
+		case 'i':	inherit = 1; break;
+		case 0:		break;
+		default:	goto error;
+		}
+
+	if (inherit && no_inherit)
+		goto error;
+
+	if (!vl) {
+		/* vector length */
+		if (optind >= argc)
+			goto error;
+
+		errno = 0;
+		vl = strtoul(argv[optind], &rest, 0);
+		if (*rest) {
+			vl = ULONG_MAX;
+			errno = EINVAL;
+		}
+		if (vl == ULONG_MAX && errno) {
+			fprintf(stderr, "%s: %s: %s\n",
+				program_name, argv[optind], strerror(errno));
+			goto error;
+		}
+
+		++optind;
+	}
+
+	/* command */
+	if (optind >= argc)
+		goto error;
+
+	return 0;
+
+error:
+	fprintf(stderr,
+		"Usage: %s [-f | --force] "
+		"[-i | --inherit | --no-inherit] "
+		"{-M | --max | <vector length>} "
+		"<command> [<arguments> ...]\n",
+		program_name);
+	return -1;
+}
+
+int main(int argc, char **argv)
+{
+	int ret = 126;	/* same as sh(1) command-not-executable error */
+	long flags;
+	char *path;
+	int t, e;
+
+	if (parse_options(argc, argv))
+		return 2;	/* same as sh(1) builtin incorrect-usage */
+
+	if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) {
+		fprintf(stderr, "%s: Invalid vector length %lu\n",
+			program_name, vl);
+		return 2;	/* same as sh(1) builtin incorrect-usage */
+	}
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+		fprintf(stderr, "%s: Scalable Vector Extension not present\n",
+			program_name);
+
+		if (!force)
+			goto error;
+
+		fputs("Going ahead anyway (--force):  "
+		      "This is a debug option.  Don't rely on it.\n",
+		      stderr);
+	}
+
+	flags = PR_SVE_SET_VL_ONEXEC;
+	if (inherit)
+		flags |= PR_SVE_VL_INHERIT;
+
+	t = prctl(PR_SVE_SET_VL, vl | flags);
+	if (t < 0) {
+		fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
+			program_name, strerror(errno));
+		goto error;
+	}
+
+	t = prctl(PR_SVE_GET_VL);
+	if (t == -1) {
+		fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
+			program_name, strerror(errno));
+		goto error;
+	}
+	flags = PR_SVE_VL_LEN_MASK;
+	flags = t & ~flags;
+
+	assert(optind < argc);
+	path = argv[optind];
+
+	execvp(path, &argv[optind]);
+	e = errno;
+	if (errno == ENOENT)
+		ret = 127;	/* same as sh(1) not-found error */
+	fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e));
+
+error:
+	return ret;		/* same as sh(1) not-executable error */
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
new file mode 100644
index 000000000000..bc3ac63f3314
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -0,0 +1,6 @@
+check_buffer_fill
+check_tags_inclusion
+check_child_memory
+check_mmap_options
+check_ksm_options
+check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
new file mode 100644
index 000000000000..2480226dfe57
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+CFLAGS += -std=gnu99 -I.
+SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+#Add mte compiler option
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
+CFLAGS += -march=armv8.5-a+memtag
+endif
+
+#check if the compiler works well
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(mte_cc_support),1)
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(PROGS)
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+endif
+
+# Include KSFT lib.mk.
+include ../../lib.mk
+
+ifeq ($(mte_cc_support),1)
+$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
new file mode 100644
index 000000000000..242635d79035
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define OVERFLOW_RANGE MT_GRANULE_SIZE
+
+static int sizes[] = {
+	1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+enum mte_block_test_alloc {
+	UNTAGGED_TAGGED,
+	TAGGED_UNTAGGED,
+	TAGGED_TAGGED,
+	BLOCK_ALLOC_MAX,
+};
+
+static int check_buffer_by_byte(int mem_type, int mode)
+{
+	char *ptr;
+	int i, j, item;
+	bool err;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
+		if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS)
+			return KSFT_FAIL;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]);
+		/* Set some value in tagged memory */
+		for (j = 0; j < sizes[i]; j++)
+			ptr[j] = '1';
+		mte_wait_after_trig();
+		err = cur_mte_cxt.fault_valid;
+		/* Check the buffer whether it is filled. */
+		for (j = 0; j < sizes[i] && !err; j++) {
+			if (ptr[j] != '1')
+				err = true;
+		}
+		mte_free_memory((void *)ptr, sizes[i], mem_type, true);
+
+		if (err)
+			break;
+	}
+	if (!err)
+		return KSFT_PASS;
+	else
+		return KSFT_FAIL;
+}
+
+static int check_buffer_underflow_by_byte(int mem_type, int mode,
+					  int underflow_range)
+{
+	char *ptr;
+	int i, j, item, last_index;
+	bool err;
+	char *und_ptr = NULL;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+							    underflow_range, 0);
+		if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+					       underflow_range, 0) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range);
+		last_index = 0;
+		/* Set some value in tagged memory and make the buffer underflow */
+		for (j = sizes[i] - 1; (j >= -underflow_range) &&
+				       (cur_mte_cxt.fault_valid == false); j--) {
+			ptr[j] = '1';
+			last_index = j;
+		}
+		mte_wait_after_trig();
+		err = false;
+		/* Check whether the buffer is filled */
+		for (j = 0; j < sizes[i]; j++) {
+			if (ptr[j] != '1') {
+				err = true;
+				ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+						j, ptr);
+				break;
+			}
+		}
+		if (err)
+			goto check_buffer_underflow_by_byte_err;
+
+		switch (mode) {
+		case MTE_NONE_ERR:
+			if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) {
+				err = true;
+				break;
+			}
+			/* There were no fault so the underflow area should be filled */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range);
+			for (j = 0 ; j < underflow_range; j++) {
+				if (und_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_ASYNC_ERR:
+			/* Imprecise fault should occur otherwise return error */
+			if (cur_mte_cxt.fault_valid == false) {
+				err = true;
+				break;
+			}
+			/*
+			 * The imprecise fault is checked after the write to the buffer,
+			 * so the underflow area before the fault should be filled.
+			 */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			for (j = last_index ; j < 0 ; j++) {
+				if (und_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_SYNC_ERR:
+			/* Precise fault should occur otherwise return error */
+			if (!cur_mte_cxt.fault_valid || (last_index != (-1))) {
+				err = true;
+				break;
+			}
+			/* Underflow area should not be filled */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			if (und_ptr[-1] == '1')
+				err = true;
+			break;
+		default:
+			err = true;
+		break;
+		}
+check_buffer_underflow_by_byte_err:
+		mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0);
+		if (err)
+			break;
+	}
+	return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_overflow_by_byte(int mem_type, int mode,
+					  int overflow_range)
+{
+	char *ptr;
+	int i, j, item, last_index;
+	bool err;
+	size_t tagged_size, overflow_size;
+	char *over_ptr = NULL;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+							    0, overflow_range);
+		if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+						 0, overflow_range) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		tagged_size = MT_ALIGN_UP(sizes[i]);
+
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range);
+
+		/* Set some value in tagged memory and make the buffer underflow */
+		for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) &&
+					     (cur_mte_cxt.fault_valid == false); j++) {
+			ptr[j] = '1';
+			last_index = j;
+		}
+		mte_wait_after_trig();
+		err = false;
+		/* Check whether the buffer is filled */
+		for (j = 0; j < sizes[i]; j++) {
+			if (ptr[j] != '1') {
+				err = true;
+				ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+						j, ptr);
+				break;
+			}
+		}
+		if (err)
+			goto check_buffer_overflow_by_byte_err;
+
+		overflow_size = overflow_range - (tagged_size - sizes[i]);
+
+		switch (mode) {
+		case MTE_NONE_ERR:
+			if ((cur_mte_cxt.fault_valid == true) ||
+			    (last_index != (sizes[i] + overflow_range - 1))) {
+				err = true;
+				break;
+			}
+			/* There were no fault so the overflow area should be filled */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+			for (j = 0 ; j < overflow_size; j++) {
+				if (over_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_ASYNC_ERR:
+			/* Imprecise fault should occur otherwise return error */
+			if (cur_mte_cxt.fault_valid == false) {
+				err = true;
+				break;
+			}
+			/*
+			 * The imprecise fault is checked after the write to the buffer,
+			 * so the overflow area should be filled before the fault.
+			 */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			for (j = tagged_size ; j < last_index; j++) {
+				if (over_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_SYNC_ERR:
+			/* Precise fault should occur otherwise return error */
+			if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) {
+				err = true;
+				break;
+			}
+			/* Underflow area should not be filled */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+			for (j = 0 ; j < overflow_size; j++) {
+				if (over_ptr[j] == '1')
+					err = true;
+			}
+			break;
+		default:
+			err = true;
+		break;
+		}
+check_buffer_overflow_by_byte_err:
+		mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range);
+		if (err)
+			break;
+	}
+	return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size)
+{
+	char *src, *dst;
+	int j, result = KSFT_PASS;
+	enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED;
+
+	for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) {
+		switch (alloc_type) {
+		case UNTAGGED_TAGGED:
+			src = (char *)mte_allocate_memory(size, mem_type, 0, false);
+			if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)src, size, mem_type, false);
+				return KSFT_FAIL;
+			}
+
+			break;
+		case TAGGED_UNTAGGED:
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, false);
+			if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)dst, size, mem_type, false);
+				return KSFT_FAIL;
+			}
+			break;
+		case TAGGED_TAGGED:
+			src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)src, size, mem_type, true);
+				return KSFT_FAIL;
+			}
+			break;
+		default:
+			return KSFT_FAIL;
+		}
+
+		cur_mte_cxt.fault_valid = false;
+		result = KSFT_PASS;
+		mte_initialize_current_context(mode, (uintptr_t)dst, size);
+		/* Set some value in memory and copy*/
+		memset((void *)src, (int)'1', size);
+		memcpy((void *)dst, (void *)src, size);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid) {
+			result = KSFT_FAIL;
+			goto check_buffer_by_block_err;
+		}
+		/* Check the buffer whether it is filled. */
+		for (j = 0; j < size; j++) {
+			if (src[j] != dst[j] || src[j] != '1') {
+				result = KSFT_FAIL;
+				break;
+			}
+		}
+check_buffer_by_block_err:
+		mte_free_memory((void *)src, size, mem_type,
+				MT_FETCH_TAG((uintptr_t)src) ? true : false);
+		mte_free_memory((void *)dst, size, mem_type,
+				MT_FETCH_TAG((uintptr_t)dst) ? true : false);
+		if (result != KSFT_PASS)
+			return result;
+	}
+	return result;
+}
+
+static int check_buffer_by_block(int mem_type, int mode)
+{
+	int i, item, result = KSFT_PASS;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	cur_mte_cxt.fault_valid = false;
+	for (i = 0; i < item; i++) {
+		result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
+		if (result != KSFT_PASS)
+			break;
+	}
+	return result;
+}
+
+static int compare_memory_tags(char *ptr, size_t size, int tag)
+{
+	int i, new_tag;
+
+	for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+		new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+		if (tag != new_tag) {
+			ksft_print_msg("FAIL: child mte tag mismatch\n");
+			return KSFT_FAIL;
+		}
+	}
+	return KSFT_PASS;
+}
+
+static int check_memory_initial_tags(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int run, fd;
+	int total = sizeof(sizes)/sizeof(int);
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		/* check initial tags for anonymous mmap */
+		ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
+		if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS)
+			return KSFT_FAIL;
+		if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+			mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+			return KSFT_FAIL;
+		}
+		mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+
+		/* check initial tags for file mmap */
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+		ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd);
+		if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+			mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+		close(fd);
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	size_t page_size = getpagesize();
+	int item = sizeof(sizes)/sizeof(int);
+
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	/* Buffer by byte tests */
+	evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
+	"Check buffer correctness by byte with sync err mode and mmap memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR),
+	"Check buffer correctness by byte with async err mode and mmap memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR),
+	"Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR),
+	"Check buffer correctness by byte with async err mode and mmap/mprotect memory\n");
+
+	/* Check buffer underflow with underflow size as 16 */
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+	/* Check buffer underflow with underflow size as page size */
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size),
+	"Check buffer write underflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size),
+	"Check buffer write underflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size),
+	"Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+	/* Check buffer overflow with overflow size as 16 */
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n");
+
+	/* Buffer by block tests */
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR),
+	"Check buffer write correctness by block with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR),
+	"Check buffer write correctness by block with async mode and mmap memory\n");
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR),
+	"Check buffer write correctness by block with tag fault ignore and mmap memory\n");
+
+	/* Initial tags are supposed to be 0 */
+	evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check initial tags with private mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+	"Check initial tags with shared mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+	"Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
new file mode 100644
index 000000000000..97bebdecd29e
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE		(5 * MT_GRANULE_SIZE)
+#define RUNS			(MT_TAG_COUNT)
+#define UNDERFLOW		MT_GRANULE_SIZE
+#define OVERFLOW		MT_GRANULE_SIZE
+
+static size_t page_size;
+static int sizes[] = {
+	1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+	int i, parent_tag, child_tag, fault, child_status;
+	pid_t child;
+
+	parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+	fault = 0;
+
+	child = fork();
+	if (child == -1) {
+		ksft_print_msg("FAIL: child process creation\n");
+		return KSFT_FAIL;
+	} else if (child == 0) {
+		mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+		/* Do copy on write */
+		memset(ptr, '1', size);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == true) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+		for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+			child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+			if (parent_tag != child_tag) {
+				ksft_print_msg("FAIL: child mte tag mismatch\n");
+				fault = 1;
+				goto check_child_tag_inheritance_err;
+			}
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+		memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == false) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+		memset(ptr + size, '3', OVERFLOW);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == false) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+check_child_tag_inheritance_err:
+		_exit(fault);
+	}
+	/* Wait for child process to terminate */
+	wait(&child_status);
+	if (WIFEXITED(child_status))
+		fault = WEXITSTATUS(child_status);
+	else
+		fault = 1;
+	return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_child_memory_mapping(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int run, result;
+	int item = sizeof(sizes)/sizeof(int);
+
+	item = sizeof(sizes)/sizeof(int);
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < item; run++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+							    UNDERFLOW, OVERFLOW);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS)
+			return KSFT_FAIL;
+		result = check_child_tag_inheritance(ptr, sizes[run], mode);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		if (result == KSFT_FAIL)
+			return result;
+	}
+	return KSFT_PASS;
+}
+
+static int check_child_file_mapping(int mem_type, int mode, int mapping)
+{
+	char *ptr, *map_ptr;
+	int run, fd, map_size, result = KSFT_PASS;
+	int total = sizeof(sizes)/sizeof(int);
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on file based memory\n");
+			munmap((void *)map_ptr, map_size);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_child_tag_inheritance(ptr, sizes[run], mode);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		munmap((void *)map_ptr, map_size);
+		close(fd);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	int item = sizeof(sizes)/sizeof(int);
+
+	page_size = getpagesize();
+	if (!page_size) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+	mte_register_signal(SIGBUS, mte_default_handler);
+
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+	evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, precise mode and mmap/mprotect memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
new file mode 100644
index 000000000000..bc41ae630c86
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TEST_UNIT	10
+#define PATH_KSM	"/sys/kernel/mm/ksm/"
+#define MAX_LOOP	4
+
+static size_t page_sz;
+static unsigned long ksm_sysfs[5];
+
+static unsigned long read_sysfs(char *str)
+{
+	FILE *f;
+	unsigned long val = 0;
+
+	f = fopen(str, "r");
+	if (!f) {
+		ksft_print_msg("ERR: missing %s\n", str);
+		return 0;
+	}
+	fscanf(f, "%lu", &val);
+	fclose(f);
+	return val;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+	FILE *f;
+
+	f = fopen(str, "w");
+	if (!f) {
+		ksft_print_msg("ERR: missing %s\n", str);
+		return;
+	}
+	fprintf(f, "%lu", val);
+	fclose(f);
+}
+
+static void mte_ksm_setup(void)
+{
+	ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes");
+	write_sysfs(PATH_KSM "merge_across_nodes", 1);
+	ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs");
+	write_sysfs(PATH_KSM "sleep_millisecs", 0);
+	ksm_sysfs[2] = read_sysfs(PATH_KSM "run");
+	write_sysfs(PATH_KSM "run", 1);
+	ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing");
+	write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT);
+	ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan");
+	write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT);
+}
+
+static void mte_ksm_restore(void)
+{
+	write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]);
+	write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]);
+	write_sysfs(PATH_KSM "run", ksm_sysfs[2]);
+	write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]);
+	write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]);
+}
+
+static void mte_ksm_scan(void)
+{
+	int cur_count = read_sysfs(PATH_KSM "full_scans");
+	int scan_count = cur_count + 1;
+	int max_loop_count = MAX_LOOP;
+
+	while ((cur_count < scan_count) && max_loop_count) {
+		sleep(1);
+		cur_count = read_sysfs(PATH_KSM "full_scans");
+		max_loop_count--;
+	}
+#ifdef DEBUG
+	ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n",
+			read_sysfs(PATH_KSM "pages_shared"),
+			read_sysfs(PATH_KSM "pages_sharing"));
+#endif
+}
+
+static int check_madvise_options(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int err, ret;
+
+	err = KSFT_FAIL;
+	if (access(PATH_KSM, F_OK) == -1) {
+		ksft_print_msg("ERR: Kernel KSM config not enabled\n");
+		return err;
+	}
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
+	if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	/* Insert same data in all the pages */
+	memset(ptr, 'A', TEST_UNIT * page_sz);
+	ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE);
+	if (ret) {
+		ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n");
+		goto madvise_err;
+	}
+	mte_ksm_scan();
+	/* Tagged pages should not merge */
+	if ((read_sysfs(PATH_KSM "pages_shared") < 1) ||
+	    (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1)))
+		err = KSFT_PASS;
+madvise_err:
+	mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true);
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+	page_sz = getpagesize();
+	if (!page_sz) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	/* Register signal handlers */
+	mte_register_signal(SIGBUS, mte_default_handler);
+	mte_register_signal(SIGSEGV, mte_default_handler);
+	/* Enable KSM */
+	mte_ksm_setup();
+
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check KSM mte page merge for private mapping, sync mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check KSM mte page merge for private mapping, async mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check KSM mte page merge for shared mapping, sync mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check KSM mte page merge for shared mapping, async mode and mmap memory\n");
+
+	mte_ksm_restore();
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
new file mode 100644
index 000000000000..33b13b86199b
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define RUNS			(MT_TAG_COUNT)
+#define UNDERFLOW		MT_GRANULE_SIZE
+#define OVERFLOW		MT_GRANULE_SIZE
+#define TAG_CHECK_ON		0
+#define TAG_CHECK_OFF		1
+
+static size_t page_size;
+static int sizes[] = {
+	1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+	mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+	memset(ptr, '1', size);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == true)
+		return KSFT_FAIL;
+
+	mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+	memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+		return KSFT_FAIL;
+	if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+		return KSFT_FAIL;
+
+	mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+	memset(ptr + size, '3', OVERFLOW);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+		return KSFT_FAIL;
+	if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
+
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+	char *ptr, *map_ptr;
+	int run, result, map_size;
+	int item = sizeof(sizes)/sizeof(int);
+
+	item = sizeof(sizes)/sizeof(int);
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < item; run++) {
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+			munmap((void *)map_ptr, map_size);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+		if (result == KSFT_FAIL)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+	char *ptr, *map_ptr;
+	int run, fd, map_size;
+	int total = sizeof(sizes)/sizeof(int);
+	int result = KSFT_PASS;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+
+		map_size = sizes[run] + UNDERFLOW + OVERFLOW;
+		map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on file based memory\n");
+			munmap((void *)map_ptr, map_size);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		munmap((void *)map_ptr, map_size);
+		close(fd);
+		if (result == KSFT_FAIL)
+			break;
+	}
+	return result;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+	char *ptr, *map_ptr;
+	int run, prot_flag, result, fd, map_size;
+	int total = sizeof(sizes)/sizeof(int);
+
+	prot_flag = PROT_READ | PROT_WRITE;
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+							    UNDERFLOW, OVERFLOW);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS)
+			return KSFT_FAIL;
+		map_ptr = ptr - UNDERFLOW;
+		/* Try to clear PROT_MTE property and verify it by tag checking */
+		if (mprotect(map_ptr, map_size, prot_flag)) {
+			mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+						  UNDERFLOW, OVERFLOW);
+			ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+		ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping,
+								 UNDERFLOW, OVERFLOW, fd);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		map_ptr = ptr - UNDERFLOW;
+		/* Try to clear PROT_MTE property and verify it by tag checking */
+		if (mprotect(map_ptr, map_size, prot_flag)) {
+			ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+			mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+						  UNDERFLOW, OVERFLOW);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		close(fd);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	int item = sizeof(sizes)/sizeof(int);
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+	page_size = getpagesize();
+	if (!page_size) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	/* Register signal handlers */
+	mte_register_signal(SIGBUS, mte_default_handler);
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	mte_enable_pstate_tco();
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+
+	mte_disable_pstate_tco();
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+	evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
new file mode 100644
index 000000000000..94d245a0ed56
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE		(5 * MT_GRANULE_SIZE)
+#define RUNS			(MT_TAG_COUNT * 2)
+#define MTE_LAST_TAG_MASK	(0x7FFF)
+
+static int verify_mte_pointer_validity(char *ptr, int mode)
+{
+	mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+	/* Check the validity of the tagged pointer */
+	memset((void *)ptr, '1', BUFFER_SIZE);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	/* Proceed further for nonzero tags */
+	if (!MT_FETCH_TAG((uintptr_t)ptr))
+		return KSFT_PASS;
+	mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1);
+	/* Check the validity outside the range */
+	ptr[BUFFER_SIZE] = '2';
+	mte_wait_after_trig();
+	if (!cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	else
+		return KSFT_PASS;
+}
+
+static int check_single_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int tag, run, result = KSFT_PASS;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
+		mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+		/* Try to catch a excluded tag by a number of tries. */
+		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			/* Check tag value */
+			if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
+				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+					       MT_FETCH_TAG((uintptr_t)ptr),
+					       MT_INCLUDE_VALID_TAG(tag));
+				result = KSFT_FAIL;
+				break;
+			}
+			result = verify_mte_pointer_validity(ptr, mode);
+		}
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_multiple_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int tag, run, result = KSFT_PASS;
+	unsigned long excl_mask = 0;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
+		excl_mask |= 1 << tag;
+		mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+		/* Try to catch a excluded tag by a number of tries. */
+		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			/* Check tag value */
+			if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
+				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+					       MT_FETCH_TAG((uintptr_t)ptr),
+					       MT_INCLUDE_VALID_TAGS(excl_mask));
+				result = KSFT_FAIL;
+				break;
+			}
+			result = verify_mte_pointer_validity(ptr, mode);
+		}
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_all_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int run, result = KSFT_PASS;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+	/* Try to catch a excluded tag by a number of tries. */
+	for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+		/*
+		 * Here tag byte can be between 0x0 to 0xF (full allowed range)
+		 * so no need to match so just verify if it is writable.
+		 */
+		result = verify_mte_pointer_validity(ptr, mode);
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_none_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int run;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+	/* Try to catch a excluded tag by a number of tries. */
+	for (run = 0; run < RUNS; run++) {
+		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+		/* Here all tags exluded so tag value generated should be 0 */
+		if (MT_FETCH_TAG((uintptr_t)ptr)) {
+			ksft_print_msg("FAIL: included tag value found\n");
+			mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true);
+			return KSFT_FAIL;
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+		/* Check the write validity of the untagged pointer */
+		memset((void *)ptr, '1', BUFFER_SIZE);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid)
+			break;
+	}
+	mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+	if (cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	else
+		return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check an included tag value with sync mode\n");
+	evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check different included tags value with sync mode\n");
+	evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check none included tags value with sync mode\n");
+	evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check all included tags value with sync mode\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
new file mode 100644
index 000000000000..594e98e76880
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+static size_t page_sz;
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+{
+	int fd, i, err;
+	char val = 'A';
+	size_t len, read_len;
+	void *ptr, *ptr_next;
+
+	err = KSFT_FAIL;
+	len = 2 * page_sz;
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	fd = create_temp_file();
+	if (fd == -1)
+		return KSFT_FAIL;
+	for (i = 0; i < len; i++)
+		write(fd, &val, sizeof(val));
+	lseek(fd, 0, 0);
+	ptr = mte_allocate_memory(len, mem_type, mapping, true);
+	if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
+		close(fd);
+		return KSFT_FAIL;
+	}
+	mte_initialize_current_context(mode, (uintptr_t)ptr, len);
+	/* Copy from file into buffer with valid tag */
+	read_len = read(fd, ptr, len);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid || read_len < len)
+		goto usermem_acc_err;
+	/* Verify same pattern is read */
+	for (i = 0; i < len; i++)
+		if (*(char *)(ptr + i) != val)
+			break;
+	if (i < len)
+		goto usermem_acc_err;
+
+	/* Tag the next half of memory with different value */
+	ptr_next = (void *)((unsigned long)ptr + page_sz);
+	ptr_next = mte_insert_new_tag(ptr_next);
+	mte_set_tag_address_range(ptr_next, page_sz);
+
+	lseek(fd, 0, 0);
+	/* Copy from file into buffer with invalid tag */
+	read_len = read(fd, ptr, len);
+	mte_wait_after_trig();
+	/*
+	 * Accessing user memory in kernel with invalid tag should fail in sync
+	 * mode without fault but may not fail in async mode as per the
+	 * implemented MTE userspace support in Arm64 kernel.
+	 */
+	if (mode == MTE_SYNC_ERR &&
+	    !cur_mte_cxt.fault_valid && read_len < len) {
+		err = KSFT_PASS;
+	} else if (mode == MTE_ASYNC_ERR &&
+		   !cur_mte_cxt.fault_valid && read_len == len) {
+		err = KSFT_PASS;
+	}
+usermem_acc_err:
+	mte_free_memory((void *)ptr, len, mem_type, true);
+	close(fd);
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	page_sz = getpagesize();
+	if (!page_sz) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	err = mte_default_setup();
+	if (err)
+		return err;
+	/* Register signal handlers */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
+
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
new file mode 100644
index 000000000000..39f8908988ea
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/auxvec.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define INIT_BUFFER_SIZE       256
+
+struct mte_fault_cxt cur_mte_cxt;
+static unsigned int mte_cur_mode;
+static unsigned int mte_cur_pstate_tco;
+
+void mte_default_handler(int signum, siginfo_t *si, void *uc)
+{
+	unsigned long addr = (unsigned long)si->si_addr;
+
+	if (signum == SIGSEGV) {
+#ifdef DEBUG
+		ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+				((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+#endif
+		if (si->si_code == SEGV_MTEAERR) {
+			if (cur_mte_cxt.trig_si_code == si->si_code)
+				cur_mte_cxt.fault_valid = true;
+			return;
+		}
+		/* Compare the context for precise error */
+		else if (si->si_code == SEGV_MTESERR) {
+			if (cur_mte_cxt.trig_si_code == si->si_code &&
+			    ((cur_mte_cxt.trig_range >= 0 &&
+			      addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+			      addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+			     (cur_mte_cxt.trig_range < 0 &&
+			      addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+			      addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+				cur_mte_cxt.fault_valid = true;
+				/* Adjust the pc by 4 */
+				((ucontext_t *)uc)->uc_mcontext.pc += 4;
+			} else {
+				ksft_print_msg("Invalid MTE synchronous exception caught!\n");
+				exit(1);
+			}
+		} else {
+			ksft_print_msg("Unknown SIGSEGV exception caught!\n");
+			exit(1);
+		}
+	} else if (signum == SIGBUS) {
+		ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+				((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+		if ((cur_mte_cxt.trig_range >= 0 &&
+		     addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+		     addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+		    (cur_mte_cxt.trig_range < 0 &&
+		     addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+		     addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+			cur_mte_cxt.fault_valid = true;
+			/* Adjust the pc by 4 */
+			((ucontext_t *)uc)->uc_mcontext.pc += 4;
+		}
+	}
+}
+
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+{
+	struct sigaction sa;
+
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	sigaction(signal, &sa, NULL);
+}
+
+void mte_wait_after_trig(void)
+{
+	sched_yield();
+}
+
+void *mte_insert_tags(void *ptr, size_t size)
+{
+	void *tag_ptr;
+	int align_size;
+
+	if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+		ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+		return NULL;
+	}
+	align_size = MT_ALIGN_UP(size);
+	tag_ptr = mte_insert_random_tag(ptr);
+	mte_set_tag_address_range(tag_ptr, align_size);
+	return tag_ptr;
+}
+
+void mte_clear_tags(void *ptr, size_t size)
+{
+	if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+		ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+		return;
+	}
+	size = MT_ALIGN_UP(size);
+	ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr);
+	mte_clear_tag_address_range(ptr, size);
+}
+
+static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after,
+					 bool tags, int fd)
+{
+	void *ptr;
+	int prot_flag, map_flag;
+	size_t entire_size = size + range_before + range_after;
+
+	if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
+	    mem_type != USE_MPROTECT) {
+		ksft_print_msg("FAIL: Invalid allocate request\n");
+		return NULL;
+	}
+	if (mem_type == USE_MALLOC)
+		return malloc(entire_size) + range_before;
+
+	prot_flag = PROT_READ | PROT_WRITE;
+	if (mem_type == USE_MMAP)
+		prot_flag |= PROT_MTE;
+
+	map_flag = mapping;
+	if (fd == -1)
+		map_flag = MAP_ANONYMOUS | map_flag;
+	if (!(mapping & MAP_SHARED))
+		map_flag |= MAP_PRIVATE;
+	ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
+	if (ptr == MAP_FAILED) {
+		ksft_print_msg("FAIL: mmap allocation\n");
+		return NULL;
+	}
+	if (mem_type == USE_MPROTECT) {
+		if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+			munmap(ptr, size);
+			ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
+			return NULL;
+		}
+	}
+	if (tags)
+		ptr = mte_insert_tags(ptr + range_before, size);
+	return ptr;
+}
+
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+				    size_t range_before, size_t range_after)
+{
+	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+					   range_after, true, -1);
+}
+
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags)
+{
+	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1);
+}
+
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd)
+{
+	int index;
+	char buffer[INIT_BUFFER_SIZE];
+
+	if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+		ksft_print_msg("FAIL: Invalid mmap file request\n");
+		return NULL;
+	}
+	/* Initialize the file for mappable size */
+	lseek(fd, 0, SEEK_SET);
+	for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
+		write(fd, buffer, INIT_BUFFER_SIZE);
+	index -= INIT_BUFFER_SIZE;
+	write(fd, buffer, size - index);
+	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
+}
+
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after, int fd)
+{
+	int index;
+	char buffer[INIT_BUFFER_SIZE];
+	int map_size = size + range_before + range_after;
+
+	if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+		ksft_print_msg("FAIL: Invalid mmap file request\n");
+		return NULL;
+	}
+	/* Initialize the file for mappable size */
+	lseek(fd, 0, SEEK_SET);
+	for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
+		write(fd, buffer, INIT_BUFFER_SIZE);
+	index -= INIT_BUFFER_SIZE;
+	write(fd, buffer, map_size - index);
+	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+					   range_after, true, fd);
+}
+
+static void __mte_free_memory_range(void *ptr, size_t size, int mem_type,
+				    size_t range_before, size_t range_after, bool tags)
+{
+	switch (mem_type) {
+	case USE_MALLOC:
+		free(ptr - range_before);
+		break;
+	case USE_MMAP:
+	case USE_MPROTECT:
+		if (tags)
+			mte_clear_tags(ptr, size);
+		munmap(ptr - range_before, size + range_before + range_after);
+		break;
+	default:
+		ksft_print_msg("FAIL: Invalid free request\n");
+		break;
+	}
+}
+
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+			       size_t range_before, size_t range_after)
+{
+	__mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true);
+}
+
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags)
+{
+	__mte_free_memory_range(ptr, size, mem_type, 0, 0, tags);
+}
+
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
+{
+	cur_mte_cxt.fault_valid = false;
+	cur_mte_cxt.trig_addr = ptr;
+	cur_mte_cxt.trig_range = range;
+	if (mode == MTE_SYNC_ERR)
+		cur_mte_cxt.trig_si_code = SEGV_MTESERR;
+	else if (mode == MTE_ASYNC_ERR)
+		cur_mte_cxt.trig_si_code = SEGV_MTEAERR;
+	else
+		cur_mte_cxt.trig_si_code = 0;
+}
+
+int mte_switch_mode(int mte_option, unsigned long incl_mask)
+{
+	unsigned long en = 0;
+
+	if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
+	      mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+		ksft_print_msg("FAIL: Invalid mte config option\n");
+		return -EINVAL;
+	}
+	en = PR_TAGGED_ADDR_ENABLE;
+	if (mte_option == MTE_SYNC_ERR)
+		en |= PR_MTE_TCF_SYNC;
+	else if (mte_option == MTE_ASYNC_ERR)
+		en |= PR_MTE_TCF_ASYNC;
+	else if (mte_option == MTE_NONE_ERR)
+		en |= PR_MTE_TCF_NONE;
+
+	en |= (incl_mask << PR_MTE_TAG_SHIFT);
+	/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
+	if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+		ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#define ID_AA64PFR1_MTE_SHIFT		8
+#define ID_AA64PFR1_MTE			2
+
+int mte_default_setup(void)
+{
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+	unsigned long en = 0;
+	int ret;
+
+	if (!(hwcaps & HWCAP_CPUID)) {
+		ksft_print_msg("FAIL: CPUID registers unavailable\n");
+		return KSFT_FAIL;
+	}
+	/* Read ID_AA64PFR1_EL1 register */
+	asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
+	if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+		ksft_print_msg("FAIL: MTE features unavailable\n");
+		return KSFT_SKIP;
+	}
+	/* Get current mte mode */
+	ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
+	if (ret < 0) {
+		ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret);
+		return KSFT_FAIL;
+	}
+	if (ret & PR_MTE_TCF_SYNC)
+		mte_cur_mode = MTE_SYNC_ERR;
+	else if (ret & PR_MTE_TCF_ASYNC)
+		mte_cur_mode = MTE_ASYNC_ERR;
+	else if (ret & PR_MTE_TCF_NONE)
+		mte_cur_mode = MTE_NONE_ERR;
+
+	mte_cur_pstate_tco = mte_get_pstate_tco();
+	/* Disable PSTATE.TCO */
+	mte_disable_pstate_tco();
+	return 0;
+}
+
+void mte_restore_setup(void)
+{
+	mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+	if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
+		mte_enable_pstate_tco();
+	else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
+		mte_disable_pstate_tco();
+}
+
+int create_temp_file(void)
+{
+	int fd;
+	char filename[] = "/dev/shm/tmp_XXXXXX";
+
+	/* Create a file in the tmpfs filesystem */
+	fd = mkstemp(&filename[0]);
+	if (fd == -1) {
+		ksft_print_msg("FAIL: Unable to open temporary file\n");
+		return 0;
+	}
+	unlink(&filename[0]);
+	return fd;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
new file mode 100644
index 000000000000..195a7d1879e6
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _MTE_COMMON_UTIL_H
+#define _MTE_COMMON_UTIL_H
+
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include "mte_def.h"
+#include "kselftest.h"
+
+enum mte_mem_type {
+	USE_MALLOC,
+	USE_MMAP,
+	USE_MPROTECT,
+};
+
+enum mte_mode {
+	MTE_NONE_ERR,
+	MTE_SYNC_ERR,
+	MTE_ASYNC_ERR,
+};
+
+struct mte_fault_cxt {
+	/* Address start which triggers mte tag fault */
+	unsigned long trig_addr;
+	/* Address range for mte tag fault and negative value means underflow */
+	ssize_t trig_range;
+	/* siginfo si code */
+	unsigned long trig_si_code;
+	/* Flag to denote if correct fault caught */
+	bool fault_valid;
+};
+
+extern struct mte_fault_cxt cur_mte_cxt;
+
+/* MTE utility functions */
+void mte_default_handler(int signum, siginfo_t *si, void *uc);
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_wait_after_trig(void);
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+				    size_t range_before, size_t range_after);
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping,
+			       bool tags, int fd);
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after, int fd);
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags);
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+			       size_t range_before, size_t range_after);
+void *mte_insert_tags(void *ptr, size_t size);
+void mte_clear_tags(void *ptr, size_t size);
+int mte_default_setup(void);
+void mte_restore_setup(void);
+int mte_switch_mode(int mte_option, unsigned long incl_mask);
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
+
+/* Common utility functions */
+int create_temp_file(void);
+
+/* Assembly MTE utility functions */
+void *mte_insert_random_tag(void *ptr);
+void *mte_insert_new_tag(void *ptr);
+void *mte_get_tag_address(void *ptr);
+void mte_set_tag_address_range(void *ptr, int range);
+void mte_clear_tag_address_range(void *ptr, int range);
+void mte_disable_pstate_tco(void);
+void mte_enable_pstate_tco(void);
+unsigned int mte_get_pstate_tco(void);
+
+/* Test framework static inline functions/macros */
+static inline void evaluate_test(int err, const char *msg)
+{
+	if (err == KSFT_PASS)
+		ksft_test_result_pass(msg);
+	else if (err == KSFT_FAIL)
+		ksft_test_result_fail(msg);
+}
+
+static inline int check_allocated_memory(void *ptr, size_t size,
+					 int mem_type, bool tags)
+{
+	if (ptr == NULL) {
+		ksft_print_msg("FAIL: memory allocation\n");
+		return KSFT_FAIL;
+	}
+
+	if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) {
+		ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+		mte_free_memory((void *)ptr, size, mem_type, false);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type,
+					       size_t range_before, size_t range_after)
+{
+	if (ptr == NULL) {
+		ksft_print_msg("FAIL: memory allocation\n");
+		return KSFT_FAIL;
+	}
+
+	if (!MT_FETCH_TAG((uintptr_t)ptr)) {
+		ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+		mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before,
+					  range_after);
+		return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+#endif /* _MTE_COMMON_UTIL_H */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
new file mode 100644
index 000000000000..9b188254b61a
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+/*
+ * Below definitions may be found in kernel headers, However, they are
+ * redefined here to decouple the MTE selftests compilations from them.
+ */
+#ifndef SEGV_MTEAERR
+#define	SEGV_MTEAERR	8
+#endif
+#ifndef SEGV_MTESERR
+#define	SEGV_MTESERR	9
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE	 0x20
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE	(1 << 18)
+#endif
+
+#ifndef PR_MTE_TCF_SHIFT
+#define PR_MTE_TCF_SHIFT	1
+#endif
+#ifndef PR_MTE_TCF_NONE
+#define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_SYNC
+#define	PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_ASYNC
+#define PR_MTE_TCF_ASYNC	(2UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TAG_SHIFT
+#define	PR_MTE_TAG_SHIFT	3
+#endif
+
+/* MTE Hardware feature definitions below. */
+#define MT_TAG_SHIFT		56
+#define MT_TAG_MASK		0xFUL
+#define MT_FREE_TAG		0x0UL
+#define MT_GRANULE_SIZE         16
+#define MT_TAG_COUNT		16
+#define MT_INCLUDE_TAG_MASK	0xFFFF
+#define MT_EXCLUDE_TAG_MASK	0x0
+
+#define MT_ALIGN_GRANULE	(MT_GRANULE_SIZE - 1)
+#define MT_CLEAR_TAG(x)		((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
+#define MT_SET_TAG(x, y)	((x) | (y << MT_TAG_SHIFT))
+#define MT_FETCH_TAG(x)		((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
+#define MT_ALIGN_UP(x)		((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+
+#define MT_PSTATE_TCO_SHIFT	25
+#define MT_PSTATE_TCO_MASK	~(0x1 << MT_PSTATE_TCO_SHIFT)
+#define MT_PSTATE_TCO_EN	1
+#define MT_PSTATE_TCO_DIS	0
+
+#define MT_EXCLUDE_TAG(x)		(1 << (x))
+#define MT_INCLUDE_VALID_TAG(x)		(MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x))
+#define MT_INCLUDE_VALID_TAGS(x)	(MT_INCLUDE_TAG_MASK ^ (x))
+#define MTE_ALLOW_NON_ZERO_TAG		MT_INCLUDE_VALID_TAG(0)
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
new file mode 100644
index 000000000000..a02c04cd0aac
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#include "mte_def.h"
+
+#define ENTRY(name) \
+	.globl name ;\
+	.p2align 2;\
+	.type name, @function ;\
+name:
+
+#define ENDPROC(name) \
+	.size name, .-name ;
+
+	.text
+/*
+ * mte_insert_random_tag: Insert random tag and might be same as the source tag if
+ *			  the source pointer has it.
+ * Input:
+ *		x0 - source pointer with a tag/no-tag
+ * Return:
+ *		x0 - pointer with random tag
+ */
+ENTRY(mte_insert_random_tag)
+	irg	x0, x0, xzr
+	ret
+ENDPROC(mte_insert_random_tag)
+
+/*
+ * mte_insert_new_tag: Insert new tag and different from the source tag if
+ *		       source pointer has it.
+ * Input:
+ *		x0 - source pointer with a tag/no-tag
+ * Return:
+ *		x0 - pointer with random tag
+ */
+ENTRY(mte_insert_new_tag)
+	gmi	x1, x0, xzr
+	irg	x0, x0, x1
+	ret
+ENDPROC(mte_insert_new_tag)
+
+/*
+ * mte_get_tag_address: Get the tag from given address.
+ * Input:
+ *		x0 - source pointer
+ * Return:
+ *		x0 - pointer with appended tag
+ */
+ENTRY(mte_get_tag_address)
+	ldg	x0, [x0]
+	ret
+ENDPROC(mte_get_tag_address)
+
+/*
+ * mte_set_tag_address_range: Set the tag range from the given address
+ * Input:
+ *		x0 - source pointer with tag data
+ *		x1 - range
+ * Return:
+ *		none
+ */
+ENTRY(mte_set_tag_address_range)
+	cbz	x1, 2f
+1:
+	stg	x0, [x0, #0x0]
+	add	x0, x0, #MT_GRANULE_SIZE
+	sub	x1, x1, #MT_GRANULE_SIZE
+	cbnz	x1, 1b
+2:
+	ret
+ENDPROC(mte_set_tag_address_range)
+
+/*
+ * mt_clear_tag_address_range: Clear the tag range from the given address
+ * Input:
+ *		x0 - source pointer with tag data
+ *		x1 - range
+ * Return:
+ *		none
+ */
+ENTRY(mte_clear_tag_address_range)
+	cbz	x1, 2f
+1:
+	stzg	x0, [x0, #0x0]
+	add	x0, x0, #MT_GRANULE_SIZE
+	sub	x1, x1, #MT_GRANULE_SIZE
+	cbnz	x1, 1b
+2:
+	ret
+ENDPROC(mte_clear_tag_address_range)
+
+/*
+ * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		none
+ */
+ENTRY(mte_enable_pstate_tco)
+	msr	tco, #MT_PSTATE_TCO_EN
+	ret
+ENDPROC(mte_enable_pstate_tco)
+
+/*
+ * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		none
+ */
+ENTRY(mte_disable_pstate_tco)
+	msr	tco, #MT_PSTATE_TCO_DIS
+	ret
+ENDPROC(mte_disable_pstate_tco)
+
+/*
+ * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		x0
+ */
+ENTRY(mte_get_pstate_tco)
+	mrs	x0, tco
+	ubfx	x0, x0, #MT_PSTATE_TCO_SHIFT, #1
+	ret
+ENDPROC(mte_get_pstate_tco)
diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
new file mode 100644
index 000000000000..155137d92722
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -0,0 +1,2 @@
+exec_target
+pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
new file mode 100644
index 000000000000..72e290b0b10c
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -mbranch-protection=pac-ret
+# check if the compiler supports ARMv8.3 and branch protection with PAuth
+pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(pauth_cc_support),1)
+TEST_GEN_PROGS := pac
+TEST_GEN_FILES := pac_corruptor.o helper.o
+TEST_GEN_PROGS_EXTENDED := exec_target
+endif
+
+include ../../lib.mk
+
+ifeq ($(pauth_cc_support),1)
+# pac* and aut* instructions are not available on architectures berfore
+# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly
+$(OUTPUT)/pac_corruptor.o: pac_corruptor.S
+	$(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+$(OUTPUT)/helper.o: helper.c
+	$(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
+# greater, gcc emits pac* instructions which are not in HINT NOP space,
+# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
+# run on earlier targets and print a meaningful error messages
+$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o
+	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
+	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+endif
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
new file mode 100644
index 000000000000..4435600ca400
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+#include "helper.h"
+
+int main(void)
+{
+	struct signatures signed_vals;
+	unsigned long hwcaps;
+	size_t val;
+
+	fread(&val, sizeof(size_t), 1, stdin);
+
+	/* don't try to execute illegal (unimplemented) instructions) caller
+	 * should have checked this and keep worker simple
+	 */
+	hwcaps = getauxval(AT_HWCAP);
+
+	if (hwcaps & HWCAP_PACA) {
+		signed_vals.keyia = keyia_sign(val);
+		signed_vals.keyib = keyib_sign(val);
+		signed_vals.keyda = keyda_sign(val);
+		signed_vals.keydb = keydb_sign(val);
+	}
+	signed_vals.keyg = (hwcaps & HWCAP_PACG) ?  keyg_sign(val) : 0;
+
+	fwrite(&signed_vals, sizeof(struct signatures), 1, stdout);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c
new file mode 100644
index 000000000000..2c201e7d0d50
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include "helper.h"
+
+size_t keyia_sign(size_t ptr)
+{
+	asm volatile("paciza %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyib_sign(size_t ptr)
+{
+	asm volatile("pacizb %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyda_sign(size_t ptr)
+{
+	asm volatile("pacdza %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keydb_sign(size_t ptr)
+{
+	asm volatile("pacdzb %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyg_sign(size_t ptr)
+{
+	/* output is encoded in the upper 32 bits */
+	size_t dest = 0;
+	size_t modifier = 0;
+
+	asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier));
+
+	return dest;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
new file mode 100644
index 000000000000..652496c7b411
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _HELPER_H_
+#define _HELPER_H_
+
+#include <stdlib.h>
+
+#define NKEYS 5
+
+struct signatures {
+	size_t keyia;
+	size_t keyib;
+	size_t keyda;
+	size_t keydb;
+	size_t keyg;
+};
+
+void pac_corruptor(void);
+
+/* PAuth sign a value with key ia and modifier value 0 */
+size_t keyia_sign(size_t val);
+size_t keyib_sign(size_t val);
+size_t keyda_sign(size_t val);
+size_t keydb_sign(size_t val);
+size_t keyg_sign(size_t val);
+
+#endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
new file mode 100644
index 000000000000..592fe538506e
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <sys/auxv.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sched.h>
+
+#include "../../kselftest_harness.h"
+#include "helper.h"
+
+#define PAC_COLLISION_ATTEMPTS 10
+/*
+ * The kernel sets TBID by default. So bits 55 and above should remain
+ * untouched no matter what.
+ * The VA space size is 48 bits. Bigger is opt-in.
+ */
+#define PAC_MASK (~0xff80ffffffffffff)
+#define ARBITRARY_VALUE (0x1234)
+#define ASSERT_PAUTH_ENABLED() \
+do { \
+	unsigned long hwcaps = getauxval(AT_HWCAP); \
+	/* data key instructions are not in NOP space. This prevents a SIGILL */ \
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+} while (0)
+#define ASSERT_GENERIC_PAUTH_ENABLED() \
+do { \
+	unsigned long hwcaps = getauxval(AT_HWCAP); \
+	/* generic key instructions are not in NOP space. This prevents a SIGILL */ \
+	ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+} while (0)
+
+void sign_specific(struct signatures *sign, size_t val)
+{
+	sign->keyia = keyia_sign(val);
+	sign->keyib = keyib_sign(val);
+	sign->keyda = keyda_sign(val);
+	sign->keydb = keydb_sign(val);
+}
+
+void sign_all(struct signatures *sign, size_t val)
+{
+	sign->keyia = keyia_sign(val);
+	sign->keyib = keyib_sign(val);
+	sign->keyda = keyda_sign(val);
+	sign->keydb = keydb_sign(val);
+	sign->keyg  = keyg_sign(val);
+}
+
+int n_same(struct signatures *old, struct signatures *new, int nkeys)
+{
+	int res = 0;
+
+	res += old->keyia == new->keyia;
+	res += old->keyib == new->keyib;
+	res += old->keyda == new->keyda;
+	res += old->keydb == new->keydb;
+	if (nkeys == NKEYS)
+		res += old->keyg == new->keyg;
+
+	return res;
+}
+
+int n_same_single_set(struct signatures *sign, int nkeys)
+{
+	size_t vals[nkeys];
+	int same = 0;
+
+	vals[0] = sign->keyia & PAC_MASK;
+	vals[1] = sign->keyib & PAC_MASK;
+	vals[2] = sign->keyda & PAC_MASK;
+	vals[3] = sign->keydb & PAC_MASK;
+
+	if (nkeys >= 4)
+		vals[4] = sign->keyg & PAC_MASK;
+
+	for (int i = 0; i < nkeys - 1; i++) {
+		for (int j = i + 1; j < nkeys; j++) {
+			if (vals[i] == vals[j])
+				same += 1;
+		}
+	}
+	return same;
+}
+
+int exec_sign_all(struct signatures *signed_vals, size_t val)
+{
+	int new_stdin[2];
+	int new_stdout[2];
+	int status;
+	int i;
+	ssize_t ret;
+	pid_t pid;
+	cpu_set_t mask;
+
+	ret = pipe(new_stdin);
+	if (ret == -1) {
+		perror("pipe returned error");
+		return -1;
+	}
+
+	ret = pipe(new_stdout);
+	if (ret == -1) {
+		perror("pipe returned error");
+		return -1;
+	}
+
+	/*
+	 * pin this process and all its children to a single CPU, so it can also
+	 * guarantee a context switch with its child
+	 */
+	sched_getaffinity(0, sizeof(mask), &mask);
+
+	for (i = 0; i < sizeof(cpu_set_t); i++)
+		if (CPU_ISSET(i, &mask))
+			break;
+
+	CPU_ZERO(&mask);
+	CPU_SET(i, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+
+	pid = fork();
+	// child
+	if (pid == 0) {
+		dup2(new_stdin[0], STDIN_FILENO);
+		if (ret == -1) {
+			perror("dup2 returned error");
+			exit(1);
+		}
+
+		dup2(new_stdout[1], STDOUT_FILENO);
+		if (ret == -1) {
+			perror("dup2 returned error");
+			exit(1);
+		}
+
+		close(new_stdin[0]);
+		close(new_stdin[1]);
+		close(new_stdout[0]);
+		close(new_stdout[1]);
+
+		ret = execl("exec_target", "exec_target", (char *)NULL);
+		if (ret == -1) {
+			perror("exec returned error");
+			exit(1);
+		}
+	}
+
+	close(new_stdin[0]);
+	close(new_stdout[1]);
+
+	ret = write(new_stdin[1], &val, sizeof(size_t));
+	if (ret == -1) {
+		perror("write returned error");
+		return -1;
+	}
+
+	/*
+	 * wait for the worker to finish, so that read() reads all data
+	 * will also context switch with worker so that this function can be used
+	 * for context switch tests
+	 */
+	waitpid(pid, &status, 0);
+	if (WIFEXITED(status) == 0) {
+		fprintf(stderr, "worker exited unexpectedly\n");
+		return -1;
+	}
+	if (WEXITSTATUS(status) != 0) {
+		fprintf(stderr, "worker exited with error\n");
+		return -1;
+	}
+
+	ret = read(new_stdout[0], signed_vals, sizeof(struct signatures));
+	if (ret == -1) {
+		perror("read returned error");
+		return -1;
+	}
+
+	return 0;
+}
+
+sigjmp_buf jmpbuf;
+void pac_signal_handler(int signum, siginfo_t *si, void *uc)
+{
+	if (signum == SIGSEGV || signum == SIGILL)
+		siglongjmp(jmpbuf, 1);
+}
+
+/* check that a corrupted PAC results in SIGSEGV or SIGILL */
+TEST(corrupt_pac)
+{
+	struct sigaction sa;
+
+	ASSERT_PAUTH_ENABLED();
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sa.sa_sigaction = pac_signal_handler;
+		sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+		sigemptyset(&sa.sa_mask);
+
+		sigaction(SIGSEGV, &sa, NULL);
+		sigaction(SIGILL, &sa, NULL);
+
+		pac_corruptor();
+		ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur");
+	}
+}
+
+/*
+ * There are no separate pac* and aut* controls so checking only the pac*
+ * instructions is sufficient
+ */
+TEST(pac_instructions_not_nop)
+{
+	size_t keyia = 0;
+	size_t keyib = 0;
+	size_t keyda = 0;
+	size_t keydb = 0;
+
+	ASSERT_PAUTH_ENABLED();
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		keyia |= keyia_sign(i) & PAC_MASK;
+		keyib |= keyib_sign(i) & PAC_MASK;
+		keyda |= keyda_sign(i) & PAC_MASK;
+		keydb |= keydb_sign(i) & PAC_MASK;
+	}
+
+	ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing");
+	ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing");
+	ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing");
+	ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing");
+}
+
+TEST(pac_instructions_not_nop_generic)
+{
+	size_t keyg = 0;
+
+	ASSERT_GENERIC_PAUTH_ENABLED();
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++)
+		keyg |= keyg_sign(i) & PAC_MASK;
+
+	ASSERT_NE(0, keyg)  TH_LOG("keyg instructions did nothing");
+}
+
+TEST(single_thread_different_keys)
+{
+	int same = 10;
+	int nkeys = NKEYS;
+	int tmp;
+	struct signatures signed_vals;
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+
+	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	if (!(hwcaps & HWCAP_PACG)) {
+		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+		nkeys = NKEYS - 1;
+	}
+
+	/*
+	 * In Linux the PAC field can be up to 7 bits wide. Even if keys are
+	 * different, there is about 5% chance for PACs to collide with
+	 * different addresses. This chance rapidly increases with fewer bits
+	 * allocated for the PAC (e.g. wider address). A comparison of the keys
+	 * directly will be more reliable.
+	 * All signed values need to be different at least once out of n
+	 * attempts to be certain that the keys are different
+	 */
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		if (nkeys == NKEYS)
+			sign_all(&signed_vals, i);
+		else
+			sign_specific(&signed_vals, i);
+
+		tmp = n_same_single_set(&signed_vals, nkeys);
+		if (tmp < same)
+			same = tmp;
+	}
+
+	ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same);
+}
+
+/*
+ * fork() does not change keys. Only exec() does so call a worker program.
+ * Its only job is to sign a value and report back the resutls
+ */
+TEST(exec_changed_keys)
+{
+	struct signatures new_keys;
+	struct signatures old_keys;
+	int ret;
+	int same = 10;
+	int nkeys = NKEYS;
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+
+	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	if (!(hwcaps & HWCAP_PACG)) {
+		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+		nkeys = NKEYS - 1;
+	}
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		ret = exec_sign_all(&new_keys, i);
+		ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+		if (nkeys == NKEYS)
+			sign_all(&old_keys, i);
+		else
+			sign_specific(&old_keys, i);
+
+		ret = n_same(&old_keys, &new_keys, nkeys);
+		if (ret < same)
+			same = ret;
+	}
+
+	ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
+}
+
+TEST(context_switch_keep_keys)
+{
+	int ret;
+	struct signatures trash;
+	struct signatures before;
+	struct signatures after;
+
+	ASSERT_PAUTH_ENABLED();
+
+	sign_specific(&before, ARBITRARY_VALUE);
+
+	/* will context switch with a process with different keys at least once */
+	ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+	ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+	sign_specific(&after, ARBITRARY_VALUE);
+
+	ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching");
+	ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching");
+	ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching");
+	ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching");
+}
+
+TEST(context_switch_keep_keys_generic)
+{
+	int ret;
+	struct signatures trash;
+	size_t before;
+	size_t after;
+
+	ASSERT_GENERIC_PAUTH_ENABLED();
+
+	before = keyg_sign(ARBITRARY_VALUE);
+
+	/* will context switch with a process with different keys at least once */
+	ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+	ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+	after = keyg_sign(ARBITRARY_VALUE);
+
+	ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
new file mode 100644
index 000000000000..aa6588050752
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+.global pac_corruptor
+
+.text
+/*
+ * Corrupting a single bit of the PAC ensures the authentication will fail.  It
+ * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no
+ * top byte PAC is tested
+ */
+ pac_corruptor:
+	paciasp
+
+	/* corrupt the top bit of the PAC */
+	eor lr, lr, #1 << 53
+
+	autiasp
+	ret
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 1bb204cee853..9a0946ddb705 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -6,7 +6,6 @@ test_lpm_map
 test_tag
 FEATURE-DUMP.libbpf
 fixdep
-test_align
 test_dev_cgroup
 /test_progs*
 test_tcpbpf_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e7a8cf83ba48..fc946b7ac288 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
-	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
+	test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
@@ -102,7 +102,7 @@ endif
 OVERRIDE_TARGETS := 1
 override define CLEAN
 	$(call msg,CLEAN)
-	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+	$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 endef
 
 include ../lib.mk
@@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS)						\
 	 $(TEST_GEN_PROGS_EXTENDED)					\
 	 $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
 
+$(OUTPUT)/%.o: %.c
+	$(call msg,CC,,$@)
+	$(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+
 $(OUTPUT)/%:%.c
 	$(call msg,BINARY,,$@)
-	$(LINK.c) $^ $(LDLIBS) -o $@
+	$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
 
 $(OUTPUT)/urandom_read: urandom_read.c
 	$(call msg,BINARY,,$@)
-	$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
+	$(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
 
 $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
-	$(CC) -c $(CFLAGS) -o $@ $<
+	$(Q)$(CC) -c $(CFLAGS) -o $@ $<
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
@@ -142,7 +146,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     /boot/vmlinux-$(shell uname -r)
 VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
 
-$(OUTPUT)/runqslower: $(BPFOBJ)
+DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
+
+$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	\
 		    OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF)   \
 		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&	\
@@ -164,7 +170,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
 
-DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
 $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    $(BPFOBJ) | $(BUILD_DIR)/bpftool
@@ -180,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 
 $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR):
 	$(call msg,MKDIR,,$@)
-	mkdir -p $@
+	$(Q)mkdir -p $@
 
 $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
 ifeq ($(VMLINUX_H),)
 	$(call msg,GEN,,$@)
-	$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+	$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
 else
 	$(call msg,CP,,$@)
-	cp "$(VMLINUX_H)" $@
+	$(Q)cp "$(VMLINUX_H)" $@
 endif
 
 $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids	\
@@ -237,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
 # $4 - LDFLAGS
 define CLANG_BPF_BUILD_RULE
 	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	($(CLANG) $3 -O2 -target bpf -emit-llvm				\
+	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
 		-c $1 -o - || echo "BPF obj compilation failed") | 	\
 	$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
 define CLANG_NOALU32_BPF_BUILD_RULE
 	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	($(CLANG) $3 -O2 -target bpf -emit-llvm				\
+	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
 		-c $1 -o - || echo "BPF obj compilation failed") | 	\
 	$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
 define CLANG_NATIVE_BPF_BUILD_RULE
 	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
-	($(CLANG) $3 -O2 -emit-llvm					\
+	$(Q)($(CLANG) $3 -O2 -emit-llvm					\
 		-c $1 -o - || echo "BPF obj compilation failed") | 	\
 	$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
 endef
 # Build BPF object using GCC
 define GCC_BPF_BUILD_RULE
 	$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
-	$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+	$(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
 endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -300,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),)
 $(TRUNNER_OUTPUT)-dir := y
 $(TRUNNER_OUTPUT):
 	$$(call msg,MKDIR,,$$@)
-	mkdir -p $$@
+	$(Q)mkdir -p $$@
 endif
 
 # ensure we set up BPF objects generation rule just once for a given
@@ -320,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h:			\
 		      $(TRUNNER_OUTPUT)/%.o				\
 		      | $(BPFTOOL) $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
-	$$(BPFTOOL) gen skeleton $$< > $$@
+	$(Q)$$(BPFTOOL) gen skeleton $$< > $$@
 endif
 
 # ensure we set up tests.h header generation rule just once
@@ -344,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o:			\
 		      $(TRUNNER_BPF_SKELS)				\
 		      $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
-	cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+	$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
 
 $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		       %.c						\
@@ -352,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		       $(TRUNNER_TESTS_HDR)				\
 		       $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
-	$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+	$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
 
 # only copy extra resources if in flavored build
 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
 ifneq ($2,)
 	$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
-	cp -a $$^ $(TRUNNER_OUTPUT)/
+	$(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
 endif
 
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
@@ -366,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     $(RESOLVE_BTFIDS)				\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
-	$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
-	$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
+	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+	$(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
 
 endef
 
@@ -420,17 +425,17 @@ verifier/tests.h: verifier/*.c
 		) > verifier/tests.h)
 $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
 	$(call msg,BINARY,,$@)
-	$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
 # Make sure we are able to include and link libbpf against c++.
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
-	$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+	$(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
 
 # Benchmark runner
 $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
 	$(call msg,CC,,$@)
-	$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+	$(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
 $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
 $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
 $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
@@ -443,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 		 $(OUTPUT)/bench_trigger.o \
 		 $(OUTPUT)/bench_ringbufs.o
 	$(call msg,BINARY,,$@)
-	$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+	$(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
 
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)			\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 4ffefdc1130f..7375d9a6d242 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -468,6 +468,7 @@ static void test_bpf_hash_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_hash_map *skel;
 	int err, i, len, map_fd, iter_fd;
+	union bpf_iter_link_info linfo;
 	__u64 val, expected_val = 0;
 	struct bpf_link *link;
 	struct key_t {
@@ -490,13 +491,16 @@ static void test_bpf_hash_map(void)
 		goto out;
 
 	/* iterator with hashmap2 and hashmap3 should fail */
-	opts.map_fd = bpf_map__fd(skel->maps.hashmap2);
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2);
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(!IS_ERR(link), "attach_iter",
 		  "attach_iter for hashmap2 unexpected succeeded\n"))
 		goto out;
 
-	opts.map_fd = bpf_map__fd(skel->maps.hashmap3);
+	linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(!IS_ERR(link), "attach_iter",
 		  "attach_iter for hashmap3 unexpected succeeded\n"))
@@ -519,7 +523,7 @@ static void test_bpf_hash_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	linfo.map.map_fd = map_fd;
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_percpu_hash_map *skel;
 	int err, i, j, len, map_fd, iter_fd;
+	union bpf_iter_link_info linfo;
 	__u32 expected_val = 0;
 	struct bpf_link *link;
 	struct key_t {
@@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -649,6 +657,7 @@ static void test_bpf_array_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	__u32 expected_key = 0, res_first_key;
 	struct bpf_iter_bpf_array_map *skel;
+	union bpf_iter_link_info linfo;
 	int err, i, map_fd, iter_fd;
 	struct bpf_link *link;
 	char buf[64] = {};
@@ -673,7 +682,10 @@ static void test_bpf_array_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_percpu_array_map *skel;
 	__u32 expected_key = 0, expected_val = 0;
+	union bpf_iter_link_info linfo;
 	int err, i, j, map_fd, iter_fd;
 	struct bpf_link *link;
 	char buf[64];
@@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	int err, i, len, map_fd, iter_fd, num_sockets;
 	struct bpf_iter_bpf_sk_storage_map *skel;
+	union bpf_iter_link_info linfo;
 	int sock_fd[3] = {-1, -1, -1};
 	__u32 val, expected_val = 0;
 	struct bpf_link *link;
@@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_test_kern5 *skel;
+	union bpf_iter_link_info linfo;
 	struct bpf_link *link;
 
 	skel = bpf_iter_test_kern5__open_and_load();
@@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void)
 		  "skeleton open_and_load failed\n"))
 		return;
 
-	opts.map_fd = bpf_map__fd(skel->maps.hashmap1);
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1);
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
 		bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 7afa4160416f..284d5921c345 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -159,15 +159,15 @@ void test_bpf_obj_id(void)
 		/* Check getting link info */
 		info_len = sizeof(struct bpf_link_info) * 2;
 		bzero(&link_infos[i], info_len);
-		link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name;
+		link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name);
 		link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
 		err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
 					     &link_infos[i], &info_len);
 		if (CHECK(err ||
 			  link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
 			  link_infos[i].prog_id != prog_infos[i].id ||
-			  link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name ||
-			  strcmp((char *)link_infos[i].raw_tracepoint.tp_name,
+			  link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
+			  strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
 				 "sys_enter") ||
 			  info_len != sizeof(struct bpf_link_info),
 			  "get-link-info(fd)",
@@ -178,7 +178,7 @@ void test_bpf_obj_id(void)
 			  link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
 			  link_infos[i].id,
 			  link_infos[i].prog_id, prog_infos[i].id,
-			  (char *)link_infos[i].raw_tracepoint.tp_name,
+			  (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
 			  "sys_enter"))
 			goto done;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index cb33a7ee4e04..39fb81d9daeb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -12,15 +12,16 @@ void btf_dump_printf(void *ctx, const char *fmt, va_list args)
 static struct btf_dump_test_case {
 	const char *name;
 	const char *file;
+	bool known_ptr_sz;
 	struct btf_dump_opts opts;
 } btf_dump_test_cases[] = {
-	{"btf_dump: syntax", "btf_dump_test_case_syntax", {}},
-	{"btf_dump: ordering", "btf_dump_test_case_ordering", {}},
-	{"btf_dump: padding", "btf_dump_test_case_padding", {}},
-	{"btf_dump: packing", "btf_dump_test_case_packing", {}},
-	{"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}},
-	{"btf_dump: multidim", "btf_dump_test_case_multidim", {}},
-	{"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}},
+	{"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}},
+	{"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}},
+	{"btf_dump: padding", "btf_dump_test_case_padding", true, {}},
+	{"btf_dump: packing", "btf_dump_test_case_packing", true, {}},
+	{"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}},
+	{"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}},
+	{"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}},
 };
 
 static int btf_dump_all_types(const struct btf *btf,
@@ -62,6 +63,18 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
 		goto done;
 	}
 
+	/* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
+	 * so it's impossible to determine correct pointer size; but if they
+	 * do, it should be 8 regardless of host architecture, becaues BPF
+	 * target is always 64-bit
+	 */
+	if (!t->known_ptr_sz) {
+		btf__set_pointer_size(btf, 8);
+	} else {
+		CHECK(btf__pointer_size(btf) != 8, "ptr_sz", "exp %d, got %zu\n",
+		      8, btf__pointer_size(btf));
+	}
+
 	snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
 	fd = mkstemp(out_file);
 	if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
index b093787e9448..1931a158510e 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_extern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -159,8 +159,8 @@ void test_core_extern(void)
 		exp = (uint64_t *)&t->data;
 		for (j = 0; j < n; j++) {
 			CHECK(got[j] != exp[j], "check_res",
-			      "result #%d: expected %lx, but got %lx\n",
-			       j, exp[j], got[j]);
+			      "result #%d: expected %llx, but got %llx\n",
+			       j, (__u64)exp[j], (__u64)got[j]);
 		}
 cleanup:
 		test_core_extern__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 084ed26a7d78..a54eafc5e4b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -237,8 +237,8 @@
 		.union_sz = sizeof(((type *)0)->union_field),		\
 		.arr_sz = sizeof(((type *)0)->arr_field),		\
 		.arr_elem_sz = sizeof(((type *)0)->arr_field[0]),	\
-		.ptr_sz = sizeof(((type *)0)->ptr_field),		\
-		.enum_sz = sizeof(((type *)0)->enum_field),	\
+		.ptr_sz = 8, /* always 8-byte pointer for BPF */	\
+		.enum_sz = sizeof(((type *)0)->enum_field),		\
 	}
 
 #define SIZE_CASE(name) {						\
@@ -432,20 +432,20 @@ static struct core_reloc_test_case test_cases[] = {
 		.sb4 = -1,
 		.sb20 = -0x17654321,
 		.u32 = 0xBEEF,
-		.s32 = -0x3FEDCBA987654321,
+		.s32 = -0x3FEDCBA987654321LL,
 	}),
 	BITFIELDS_CASE(bitfields___bitfield_vs_int, {
-		.ub1 = 0xFEDCBA9876543210,
+		.ub1 = 0xFEDCBA9876543210LL,
 		.ub2 = 0xA6,
-		.ub7 = -0x7EDCBA987654321,
-		.sb4 = -0x6123456789ABCDE,
-		.sb20 = 0xD00D,
+		.ub7 = -0x7EDCBA987654321LL,
+		.sb4 = -0x6123456789ABCDELL,
+		.sb20 = 0xD00DLL,
 		.u32 = -0x76543,
-		.s32 = 0x0ADEADBEEFBADB0B,
+		.s32 = 0x0ADEADBEEFBADB0BLL,
 	}),
 	BITFIELDS_CASE(bitfields___just_big_enough, {
-		.ub1 = 0xF,
-		.ub2 = 0x0812345678FEDCBA,
+		.ub1 = 0xFLL,
+		.ub2 = 0x0812345678FEDCBALL,
 	}),
 	BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
 
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index a895bfed55db..197d0d217b56 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -16,7 +16,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 	__u32 duration = 0, retval;
 	struct bpf_map *data_map;
 	const int zero = 0;
-	u64 *result = NULL;
+	__u64 *result = NULL;
 
 	err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
 			    &pkt_obj, &pkt_fd);
@@ -29,7 +29,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 
 	link = calloc(sizeof(struct bpf_link *), prog_cnt);
 	prog = calloc(sizeof(struct bpf_program *), prog_cnt);
-	result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64));
+	result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
 	if (CHECK(!link || !prog || !result, "alloc_memory",
 		  "failed to alloc memory"))
 		goto close_prog;
@@ -72,7 +72,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 		goto close_prog;
 
 	for (i = 0; i < prog_cnt; i++)
-		if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n",
+		if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n",
 			  result[i]))
 			goto close_prog;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index f11f187990e9..cd6dc80edf18 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -591,7 +591,7 @@ void test_flow_dissector(void)
 		CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
 			   err || tattr.retval != 1,
 			   tests[i].name,
-			   "err %d errno %d retval %d duration %d size %u/%lu\n",
+			   "err %d errno %d retval %d duration %d size %u/%zu\n",
 			   err, errno, tattr.retval, tattr.duration,
 			   tattr.data_size_out, sizeof(flow_keys));
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index e3cb62b0a110..9efa7e50eab2 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -5,7 +5,7 @@
 static void test_global_data_number(struct bpf_object *obj, __u32 duration)
 {
 	int i, err, map_fd;
-	uint64_t num;
+	__u64 num;
 
 	map_fd = bpf_find_map(__func__, obj, "result_number");
 	if (CHECK_FAIL(map_fd < 0))
@@ -14,7 +14,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
 	struct {
 		char *name;
 		uint32_t key;
-		uint64_t num;
+		__u64 num;
 	} tests[] = {
 		{ "relocate .bss reference",     0, 0 },
 		{ "relocate .data reference",    1, 42 },
@@ -32,7 +32,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
 	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
 		err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
 		CHECK(err || num != tests[i].num, tests[i].name,
-		      "err %d result %lx expected %lx\n",
+		      "err %d result %llx expected %llx\n",
 		      err, num, tests[i].num);
 	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 43d0b5578f46..9c3c5c0f068f 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -21,7 +21,7 @@ void test_mmap(void)
 	const long page_size = sysconf(_SC_PAGE_SIZE);
 	int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd;
 	struct bpf_map *data_map, *bss_map;
-	void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
+	void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp0, *tmp1, *tmp2;
 	struct test_mmap__bss *bss_data;
 	struct bpf_map_info map_info;
 	__u32 map_info_sz = sizeof(map_info);
@@ -183,16 +183,23 @@ void test_mmap(void)
 
 	/* check some more advanced mmap() manipulations */
 
+	tmp0 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS,
+			  -1, 0);
+	if (CHECK(tmp0 == MAP_FAILED, "adv_mmap0", "errno %d\n", errno))
+		goto cleanup;
+
 	/* map all but last page: pages 1-3 mapped */
-	tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED,
+	tmp1 = mmap(tmp0, 3 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
 			  data_map_fd, 0);
-	if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno))
+	if (CHECK(tmp0 != tmp1, "adv_mmap1", "tmp0: %p, tmp1: %p\n", tmp0, tmp1)) {
+		munmap(tmp0, 4 * page_size);
 		goto cleanup;
+	}
 
 	/* unmap second page: pages 1, 3 mapped */
 	err = munmap(tmp1 + page_size, page_size);
 	if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) {
-		munmap(tmp1, map_sz);
+		munmap(tmp1, 4 * page_size);
 		goto cleanup;
 	}
 
@@ -201,7 +208,7 @@ void test_mmap(void)
 		    MAP_SHARED | MAP_FIXED, data_map_fd, 0);
 	if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) {
 		munmap(tmp1, page_size);
-		munmap(tmp1 + 2*page_size, page_size);
+		munmap(tmp1 + 2*page_size, 2 * page_size);
 		goto cleanup;
 	}
 	CHECK(tmp1 + page_size != tmp2, "adv_mmap4",
@@ -211,7 +218,7 @@ void test_mmap(void)
 	tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
 		    data_map_fd, 0);
 	if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) {
-		munmap(tmp1, 3 * page_size); /* unmap page 1 */
+		munmap(tmp1, 4 * page_size); /* unmap page 1 */
 		goto cleanup;
 	}
 	CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index dde2b7ae7bc9..935a294f049a 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -28,7 +28,7 @@ void test_prog_run_xattr(void)
 	      "err %d errno %d retval %d\n", err, errno, tattr.retval);
 
 	CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
-	      "incorrect output size, want %lu have %u\n",
+	      "incorrect output size, want %zu have %u\n",
 	      sizeof(pkt_v4), tattr.data_size_out);
 
 	CHECK_ATTR(buf[5] != 0, "overflow",
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 504abb7bfb95..7043e6ded0e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 		close(pipe_p2c[1]); /* close write */
 
 		/* notify parent signal handler is installed */
-		write(pipe_c2p[1], buf, 1);
+		CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
 
 		/* make sure parent enabled bpf program to send_signal */
-		read(pipe_p2c[0], buf, 1);
+		CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
 
 		/* wait a little for signal handler */
 		sleep(1);
 
-		if (sigusr1_received)
-			write(pipe_c2p[1], "2", 1);
-		else
-			write(pipe_c2p[1], "0", 1);
+		buf[0] = sigusr1_received ? '2' : '0';
+		CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
 
 		/* wait for parent notification and exit */
-		read(pipe_p2c[0], buf, 1);
+		CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
 
 		close(pipe_c2p[1]);
 		close(pipe_p2c[0]);
@@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 	}
 
 	/* wait until child signal handler installed */
-	read(pipe_c2p[0], buf, 1);
+	CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
 
 	/* trigger the bpf send_signal */
 	skel->bss->pid = pid;
@@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 	skel->bss->signal_thread = signal_thread;
 
 	/* notify child that bpf program can send_signal now */
-	write(pipe_p2c[1], buf, 1);
+	CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
 
 	/* wait for result */
 	err = read(pipe_c2p[0], buf, 1);
@@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 	CHECK(buf[0] != '2', test_name, "incorrect result\n");
 
 	/* notify child safe to exit */
-	write(pipe_p2c[1], buf, 1);
+	CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
 
 disable_pmu:
 	close(pmu_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index c571584c00f5..9ff0412e1fd3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -309,6 +309,7 @@ static void v4_to_v6(struct sockaddr_storage *ss)
 	v6->sin6_addr.s6_addr[10] = 0xff;
 	v6->sin6_addr.s6_addr[11] = 0xff;
 	memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+	memset(&v6->sin6_addr.s6_addr[0], 0, 10);
 }
 
 static int udp_recv_send(int server_fd)
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 25de86af2d03..fafeddaad6a9 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -81,7 +81,7 @@ void test_skb_ctx(void)
 
 	CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
 		   "ctx_size_out",
-		   "incorrect output size, want %lu have %u\n",
+		   "incorrect output size, want %zu have %u\n",
 		   sizeof(skb), tattr.ctx_size_out);
 
 	for (i = 0; i < 5; i++)
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f002e3090d92..11a769e18f5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void)
 {
 	__u64 sample_freq = 5000; /* fallback to 5000 on error */
 	FILE *f;
+	__u32 duration = 0;
 
 	f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
 	if (f == NULL)
 		return sample_freq;
-	fscanf(f, "%llu", &sample_freq);
+	CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate",
+		  "return default value: 5000,err %d\n", -errno);
 	fclose(f);
 	return sample_freq;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 25b068591e9a..193002b14d7f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -19,7 +19,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
 	log_buf = va_arg(args, char *);
 	if (!log_buf)
 		goto out;
-	if (strstr(log_buf, err_str) == 0)
+	if (err_str && strstr(log_buf, err_str) == 0)
 		found = true;
 out:
 	printf(format, log_buf);
diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
index c75525eab02c..dd324b4933db 100644
--- a/tools/testing/selftests/bpf/prog_tests/varlen.c
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -44,25 +44,25 @@ void test_varlen(void)
 	CHECK_VAL(bss->payload1_len2, size2);
 	CHECK_VAL(bss->total1, size1 + size2);
 	CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
-	      "doesn't match!");
+	      "doesn't match!\n");
 
 	CHECK_VAL(data->payload2_len1, size1);
 	CHECK_VAL(data->payload2_len2, size2);
 	CHECK_VAL(data->total2, size1 + size2);
 	CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
-	      "doesn't match!");
+	      "doesn't match!\n");
 
 	CHECK_VAL(data->payload3_len1, size1);
 	CHECK_VAL(data->payload3_len2, size2);
 	CHECK_VAL(data->total3, size1 + size2);
 	CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
-	      "doesn't match!");
+	      "doesn't match!\n");
 
 	CHECK_VAL(data->payload4_len1, size1);
 	CHECK_VAL(data->payload4_len2, size2);
 	CHECK_VAL(data->total4, size1 + size2);
 	CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
-	      "doesn't match!");
+	      "doesn't match!\n");
 cleanup:
 	test_varlen__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 07ddbfdbcab7..6dfce3fd68bc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -47,7 +47,10 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
 	__u32 seq_num = ctx->meta->seq_num;
 	struct bpf_map *map = ctx->map;
 	struct key_t *key = ctx->key;
+	struct key_t tmp_key;
 	__u64 *val = ctx->value;
+	__u64 tmp_val = 0;
+	int ret;
 
 	if (in_test_mode) {
 		/* test mode is used by selftests to
@@ -61,6 +64,18 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
 		if (key == (void *)0 || val == (void *)0)
 			return 0;
 
+		/* update the value and then delete the <key, value> pair.
+		 * it should not impact the existing 'val' which is still
+		 * accessible under rcu.
+		 */
+		__builtin_memcpy(&tmp_key, key, sizeof(struct key_t));
+		ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0);
+		if (ret)
+			return 0;
+		ret = bpf_map_delete_elem(&hashmap1, &tmp_key);
+		if (ret)
+			return 0;
+
 		key_sum_a += key->a;
 		key_sum_b += key->b;
 		key_sum_c += key->c;
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 34d84717c946..69139ed66216 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -1,5 +1,10 @@
 #include <stdint.h>
 #include <stdbool.h>
+
+void preserce_ptr_sz_fn(long x) {}
+
+#define __bpf_aligned __attribute__((aligned(8)))
+
 /*
  * KERNEL
  */
@@ -444,51 +449,51 @@ struct core_reloc_primitives {
 	char a;
 	int b;
 	enum core_reloc_primitives_enum c;
-	void *d;
-	int (*f)(const char *);
+	void *d __bpf_aligned;
+	int (*f)(const char *) __bpf_aligned;
 };
 
 struct core_reloc_primitives___diff_enum_def {
 	char a;
 	int b;
-	void *d;
-	int (*f)(const char *);
+	void *d __bpf_aligned;
+	int (*f)(const char *) __bpf_aligned;
 	enum {
 		X = 100,
 		Y = 200,
-	} c; /* inline enum def with differing set of values */
+	} c __bpf_aligned; /* inline enum def with differing set of values */
 };
 
 struct core_reloc_primitives___diff_func_proto {
-	void (*f)(int); /* incompatible function prototype */
-	void *d;
-	enum core_reloc_primitives_enum c;
+	void (*f)(int) __bpf_aligned; /* incompatible function prototype */
+	void *d __bpf_aligned;
+	enum core_reloc_primitives_enum c __bpf_aligned;
 	int b;
 	char a;
 };
 
 struct core_reloc_primitives___diff_ptr_type {
-	const char * const d; /* different pointee type + modifiers */
-	char a;
+	const char * const d __bpf_aligned; /* different pointee type + modifiers */
+	char a __bpf_aligned;
 	int b;
 	enum core_reloc_primitives_enum c;
-	int (*f)(const char *);
+	int (*f)(const char *) __bpf_aligned;
 };
 
 struct core_reloc_primitives___err_non_enum {
 	char a[1];
 	int b;
 	int c; /* int instead of enum */
-	void *d;
-	int (*f)(const char *);
+	void *d __bpf_aligned;
+	int (*f)(const char *) __bpf_aligned;
 };
 
 struct core_reloc_primitives___err_non_int {
 	char a[1];
-	int *b; /* ptr instead of int */
-	enum core_reloc_primitives_enum c;
-	void *d;
-	int (*f)(const char *);
+	int *b __bpf_aligned; /* ptr instead of int */
+	enum core_reloc_primitives_enum c __bpf_aligned;
+	void *d __bpf_aligned;
+	int (*f)(const char *) __bpf_aligned;
 };
 
 struct core_reloc_primitives___err_non_ptr {
@@ -496,7 +501,7 @@ struct core_reloc_primitives___err_non_ptr {
 	int b;
 	enum core_reloc_primitives_enum c;
 	int d; /* int instead of ptr */
-	int (*f)(const char *);
+	int (*f)(const char *) __bpf_aligned;
 };
 
 /*
@@ -507,7 +512,7 @@ struct core_reloc_mods_output {
 };
 
 typedef const int int_t;
-typedef const char *char_ptr_t;
+typedef const char *char_ptr_t __bpf_aligned;
 typedef const int arr_t[7];
 
 struct core_reloc_mods_substruct {
@@ -523,9 +528,9 @@ typedef struct {
 struct core_reloc_mods {
 	int a;
 	int_t b;
-	char *c;
+	char *c __bpf_aligned;
 	char_ptr_t d;
-	int e[3];
+	int e[3] __bpf_aligned;
 	arr_t f;
 	struct core_reloc_mods_substruct g;
 	core_reloc_mods_substruct_t h;
@@ -535,9 +540,9 @@ struct core_reloc_mods {
 struct core_reloc_mods___mod_swap {
 	int b;
 	int_t a;
-	char *d;
+	char *d __bpf_aligned;
 	char_ptr_t c;
-	int f[3];
+	int f[3] __bpf_aligned;
 	arr_t e;
 	struct {
 		int y;
@@ -555,7 +560,7 @@ typedef arr1_t arr2_t;
 typedef arr2_t arr3_t;
 typedef arr3_t arr4_t;
 
-typedef const char * const volatile fancy_char_ptr_t;
+typedef const char * const volatile fancy_char_ptr_t __bpf_aligned;
 
 typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt;
 
@@ -567,7 +572,7 @@ struct core_reloc_mods___typedefs {
 	arr4_t e;
 	fancy_char_ptr_t d;
 	fancy_char_ptr_t c;
-	int3_t b;
+	int3_t b __bpf_aligned;
 	int3_t a;
 };
 
@@ -739,19 +744,19 @@ struct core_reloc_bitfields___bit_sz_change {
 	int8_t		sb4: 1;		/*  4 ->  1 */
 	int32_t		sb20: 30;	/* 20 -> 30 */
 	/* non-bitfields */
-	uint16_t	u32;		/* 32 -> 16 */
-	int64_t		s32;		/* 32 -> 64 */
+	uint16_t	u32;			/* 32 -> 16 */
+	int64_t		s32 __bpf_aligned;	/* 32 -> 64 */
 };
 
 /* turn bitfield into non-bitfield and vice versa */
 struct core_reloc_bitfields___bitfield_vs_int {
 	uint64_t	ub1;		/*  3 -> 64 non-bitfield */
 	uint8_t		ub2;		/* 20 ->  8 non-bitfield */
-	int64_t		ub7;		/*  7 -> 64 non-bitfield signed */
-	int64_t		sb4;		/*  4 -> 64 non-bitfield signed */
-	uint64_t	sb20;		/* 20 -> 16 non-bitfield unsigned */
-	int32_t		u32: 20;	/* 32 non-bitfield -> 20 bitfield */
-	uint64_t	s32: 60;	/* 32 non-bitfield -> 60 bitfield */
+	int64_t		ub7 __bpf_aligned;	/*  7 -> 64 non-bitfield signed */
+	int64_t		sb4 __bpf_aligned;	/*  4 -> 64 non-bitfield signed */
+	uint64_t	sb20 __bpf_aligned;	/* 20 -> 16 non-bitfield unsigned */
+	int32_t		u32: 20;		/* 32 non-bitfield -> 20 bitfield */
+	uint64_t	s32: 60 __bpf_aligned;	/* 32 non-bitfield -> 60 bitfield */
 };
 
 struct core_reloc_bitfields___just_big_enough {
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 1f1966e86e9f..3e6912e4df3d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -54,6 +54,7 @@ SEC("sockops")
 int bpf_testcb(struct bpf_sock_ops *skops)
 {
 	char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+	struct bpf_sock_ops *reuse = skops;
 	struct tcphdr *thdr;
 	int good_call_rv = 0;
 	int bad_call_rv = 0;
@@ -62,6 +63,46 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 	int v = 0;
 	int op;
 
+	/* Test reading fields in bpf_sock_ops using single register */
+	asm volatile (
+		"%[reuse] = *(u32 *)(%[reuse] +96)"
+		: [reuse] "+r"(reuse)
+		:);
+
+	asm volatile (
+		"%[op] = *(u32 *)(%[skops] +96)"
+		: [op] "+r"(op)
+		: [skops] "r"(skops)
+		:);
+
+	asm volatile (
+		"r9 = %[skops];\n"
+		"r8 = *(u32 *)(r9 +164);\n"
+		"*(u32 *)(r9 +164) = r8;\n"
+		:: [skops] "r"(skops)
+		: "r9", "r8");
+
+	asm volatile (
+		"r1 = %[skops];\n"
+		"r1 = *(u64 *)(r1 +184);\n"
+		"if r1 == 0 goto +1;\n"
+		"r1 = *(u32 *)(r1 +4);\n"
+		:: [skops] "r"(skops):"r1");
+
+	asm volatile (
+		"r9 = %[skops];\n"
+		"r9 = *(u64 *)(r9 +184);\n"
+		"if r9 == 0 goto +1;\n"
+		"r9 = *(u32 *)(r9 +4);\n"
+		:: [skops] "r"(skops):"r9");
+
+	asm volatile (
+		"r1 = %[skops];\n"
+		"r2 = *(u64 *)(r1 +184);\n"
+		"if r2 == 0 goto +1;\n"
+		"r2 = *(u32 *)(r2 +4);\n"
+		:: [skops] "r"(skops):"r1", "r2");
+
 	op = (int) skops->op;
 
 	update_event_map(op);
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
index cd4b72c55dfe..913acdffd90f 100644
--- a/tools/testing/selftests/bpf/progs/test_varlen.c
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -15,9 +15,9 @@ int test_pid = 0;
 bool capture = false;
 
 /* .bss */
-long payload1_len1 = 0;
-long payload1_len2 = 0;
-long total1 = 0;
+__u64 payload1_len1 = 0;
+__u64 payload1_len2 = 0;
+__u64 total1 = 0;
 char payload1[MAX_LEN + MAX_LEN] = {};
 
 /* .data */
diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 305fae8f80a9..c75fc6447186 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -3883,7 +3883,7 @@ static int test_big_btf_info(unsigned int test_num)
 	info_garbage.garbage = 0;
 	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
 	if (CHECK(err || info_len != sizeof(*info),
-		  "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+		  "err:%d errno:%d info_len:%u sizeof(*info):%zu",
 		  err, errno, info_len, sizeof(*info))) {
 		err = -1;
 		goto done;
@@ -4094,7 +4094,7 @@ static int do_test_get_info(unsigned int test_num)
 	if (CHECK(err || !info.id || info_len != sizeof(info) ||
 		  info.btf_size != raw_btf_size ||
 		  (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
-		  "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+		  "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%zu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
 		  err, errno, info.id, info_len, sizeof(info),
 		  raw_btf_size, info.btf_size, expected_nbytes, ret)) {
 		err = -1;
@@ -4730,7 +4730,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
 
 		nexpected_line = snprintf(expected_line, line_size,
 					  "%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
-					  "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
+					  "{%llu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
 					  "%u,0x%x,[[%d,%d],[%d,%d]]}\n",
 					  percpu_map ? "\tcpu" : "",
 					  percpu_map ? cpu : next_key,
@@ -4738,7 +4738,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
 					  v->unused_bits2a,
 					  v->bits28,
 					  v->unused_bits2b,
-					  v->ui64,
+					  (__u64)v->ui64,
 					  v->ui8a[0], v->ui8a[1],
 					  v->ui8a[2], v->ui8a[3],
 					  v->ui8a[4], v->ui8a[5],
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 754cf611723e..0d92ebcb335d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1274,6 +1274,8 @@ static void __run_parallel(unsigned int tasks,
 	pid_t pid[tasks];
 	int i;
 
+	fflush(stdout);
+
 	for (i = 0; i < tasks; i++) {
 		pid[i] = fork();
 		if (pid[i] == 0) {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b1e4dadacd9b..22943b58d752 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -618,7 +618,9 @@ int cd_flavor_subdir(const char *exec_name)
 	if (!flavor)
 		return 0;
 	flavor++;
-	fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout,	"Switching to flavor '%s' subdirectory...\n", flavor);
+
 	return chdir(flavor);
 }
 
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 6e09bf738473..dbb820dde138 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -135,6 +135,11 @@ static inline __u64 ptr_to_u64(const void *ptr)
 	return (__u64) (unsigned long) ptr;
 }
 
+static inline void *u64_to_ptr(__u64 ptr)
+{
+	return (void *) (unsigned long) ptr;
+}
+
 int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
 int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 8549b31716ab..73da7fe8c152 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -124,17 +124,24 @@ int main(int argc, char **argv)
 	sprintf(test_script,
 		"iptables -A INPUT -p tcp --dport %d -j DROP",
 		TESTPORT);
-	system(test_script);
+	if (system(test_script)) {
+		printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+		goto err;
+	}
 
 	sprintf(test_script,
 		"nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ",
 		TESTPORT);
-	system(test_script);
+	if (system(test_script))
+		printf("execute command: %s, err %d\n", test_script, -errno);
 
 	sprintf(test_script,
 		"iptables -D INPUT -p tcp --dport %d -j DROP",
 		TESTPORT);
-	system(test_script);
+	if (system(test_script)) {
+		printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+		goto err;
+	}
 
 	rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g);
 	if (rv != 0) {
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index aa6de65b0838..84cfcabea838 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -2,3 +2,4 @@
 test_memcontrol
 test_core
 test_freezer
+test_kmem
+\ No newline at end of file
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 967f268fde74..f027d933595b 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -6,11 +6,13 @@ all:
 TEST_FILES     := with_stress.sh
 TEST_PROGS     := test_stress.sh
 TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_kmem
 TEST_GEN_PROGS += test_core
 TEST_GEN_PROGS += test_freezer
 
 include ../lib.mk
 
 $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
 $(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
 $(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 8a637ca7d73a..05853b0b8831 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -106,7 +106,7 @@ int cg_read_strcmp(const char *cgroup, const char *control,
 
 	/* Handle the case of comparing against empty string */
 	if (!expected)
-		size = 32;
+		return -1;
 	else
 		size = strlen(expected) + 1;
 
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
new file mode 100644
index 000000000000..0941aa16157e
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+
+/*
+ * Memory cgroup charging and vmstat data aggregation is performed using
+ * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum
+ * discrepancy between charge and vmstat entries is number of cpus multiplied
+ * by 32 pages multiplied by 2.
+ */
+#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs())
+
+
+static int alloc_dcache(const char *cgroup, void *arg)
+{
+	unsigned long i;
+	struct stat st;
+	char buf[128];
+
+	for (i = 0; i < (unsigned long)arg; i++) {
+		snprintf(buf, sizeof(buf),
+			"/something-non-existent-with-a-long-name-%64lu-%d",
+			 i, getpid());
+		stat(buf, &st);
+	}
+
+	return 0;
+}
+
+/*
+ * This test allocates 100000 of negative dentries with long names.
+ * Then it checks that "slab" in memory.stat is larger than 1M.
+ * Then it sets memory.high to 1M and checks that at least 1/2
+ * of slab memory has been reclaimed.
+ */
+static int test_kmem_basic(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cg = NULL;
+	long slab0, slab1, current;
+
+	cg = cg_name(root, "kmem_basic_test");
+	if (!cg)
+		goto cleanup;
+
+	if (cg_create(cg))
+		goto cleanup;
+
+	if (cg_run(cg, alloc_dcache, (void *)100000))
+		goto cleanup;
+
+	slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
+	if (slab0 < (1 << 20))
+		goto cleanup;
+
+	cg_write(cg, "memory.high", "1M");
+	slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
+	if (slab1 <= 0)
+		goto cleanup;
+
+	current = cg_read_long(cg, "memory.current");
+	if (current <= 0)
+		goto cleanup;
+
+	if (slab1 < slab0 / 2 && current < slab0 / 2)
+		ret = KSFT_PASS;
+cleanup:
+	cg_destroy(cg);
+	free(cg);
+
+	return ret;
+}
+
+static void *alloc_kmem_fn(void *arg)
+{
+	alloc_dcache(NULL, (void *)100);
+	return NULL;
+}
+
+static int alloc_kmem_smp(const char *cgroup, void *arg)
+{
+	int nr_threads = 2 * get_nprocs();
+	pthread_t *tinfo;
+	unsigned long i;
+	int ret = -1;
+
+	tinfo = calloc(nr_threads, sizeof(pthread_t));
+	if (tinfo == NULL)
+		return -1;
+
+	for (i = 0; i < nr_threads; i++) {
+		if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
+				   (void *)i)) {
+			free(tinfo);
+			return -1;
+		}
+	}
+
+	for (i = 0; i < nr_threads; i++) {
+		ret = pthread_join(tinfo[i], NULL);
+		if (ret)
+			break;
+	}
+
+	free(tinfo);
+	return ret;
+}
+
+static int cg_run_in_subcgroups(const char *parent,
+				int (*fn)(const char *cgroup, void *arg),
+				void *arg, int times)
+{
+	char *child;
+	int i;
+
+	for (i = 0; i < times; i++) {
+		child = cg_name_indexed(parent, "child", i);
+		if (!child)
+			return -1;
+
+		if (cg_create(child)) {
+			cg_destroy(child);
+			free(child);
+			return -1;
+		}
+
+		if (cg_run(child, fn, NULL)) {
+			cg_destroy(child);
+			free(child);
+			return -1;
+		}
+
+		cg_destroy(child);
+		free(child);
+	}
+
+	return 0;
+}
+
+/*
+ * The test creates and destroys a large number of cgroups. In each cgroup it
+ * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
+ * threads. Then it checks the sanity of numbers on the parent level:
+ * the total size of the cgroups should be roughly equal to
+ * anon + file + slab + kernel_stack.
+ */
+static int test_kmem_memcg_deletion(const char *root)
+{
+	long current, slab, anon, file, kernel_stack, sum;
+	int ret = KSFT_FAIL;
+	char *parent;
+
+	parent = cg_name(root, "kmem_memcg_deletion_test");
+	if (!parent)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
+		goto cleanup;
+
+	current = cg_read_long(parent, "memory.current");
+	slab = cg_read_key_long(parent, "memory.stat", "slab ");
+	anon = cg_read_key_long(parent, "memory.stat", "anon ");
+	file = cg_read_key_long(parent, "memory.stat", "file ");
+	kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
+	if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
+	    kernel_stack < 0)
+		goto cleanup;
+
+	sum = slab + anon + file + kernel_stack;
+	if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+		ret = KSFT_PASS;
+	} else {
+		printf("memory.current = %ld\n", current);
+		printf("slab + anon + file + kernel_stack = %ld\n", sum);
+		printf("slab = %ld\n", slab);
+		printf("anon = %ld\n", anon);
+		printf("file = %ld\n", file);
+		printf("kernel_stack = %ld\n", kernel_stack);
+	}
+
+cleanup:
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
+/*
+ * The test reads the entire /proc/kpagecgroup. If the operation went
+ * successfully (and the kernel didn't panic), the test is treated as passed.
+ */
+static int test_kmem_proc_kpagecgroup(const char *root)
+{
+	unsigned long buf[128];
+	int ret = KSFT_FAIL;
+	ssize_t len;
+	int fd;
+
+	fd = open("/proc/kpagecgroup", O_RDONLY);
+	if (fd < 0)
+		return ret;
+
+	do {
+		len = read(fd, buf, sizeof(buf));
+	} while (len > 0);
+
+	if (len == 0)
+		ret = KSFT_PASS;
+
+	close(fd);
+	return ret;
+}
+
+static void *pthread_wait_fn(void *arg)
+{
+	sleep(100);
+	return NULL;
+}
+
+static int spawn_1000_threads(const char *cgroup, void *arg)
+{
+	int nr_threads = 1000;
+	pthread_t *tinfo;
+	unsigned long i;
+	long stack;
+	int ret = -1;
+
+	tinfo = calloc(nr_threads, sizeof(pthread_t));
+	if (tinfo == NULL)
+		return -1;
+
+	for (i = 0; i < nr_threads; i++) {
+		if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
+				   (void *)i)) {
+			free(tinfo);
+			return(-1);
+		}
+	}
+
+	stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
+	if (stack >= 4096 * 1000)
+		ret = 0;
+
+	free(tinfo);
+	return ret;
+}
+
+/*
+ * The test spawns a process, which spawns 1000 threads. Then it checks
+ * that memory.stat's kernel_stack is at least 1000 pages large.
+ */
+static int test_kmem_kernel_stacks(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cg = NULL;
+
+	cg = cg_name(root, "kmem_kernel_stacks_test");
+	if (!cg)
+		goto cleanup;
+
+	if (cg_create(cg))
+		goto cleanup;
+
+	if (cg_run(cg, spawn_1000_threads, NULL))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+cleanup:
+	cg_destroy(cg);
+	free(cg);
+
+	return ret;
+}
+
+/*
+ * This test sequentionally creates 30 child cgroups, allocates some
+ * kernel memory in each of them, and deletes them. Then it checks
+ * that the number of dying cgroups on the parent level is 0.
+ */
+static int test_kmem_dead_cgroups(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent;
+	long dead;
+	int i;
+
+	parent = cg_name(root, "kmem_dead_cgroups_test");
+	if (!parent)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
+		goto cleanup;
+
+	for (i = 0; i < 5; i++) {
+		dead = cg_read_key_long(parent, "cgroup.stat",
+					"nr_dying_descendants ");
+		if (dead == 0) {
+			ret = KSFT_PASS;
+			break;
+		}
+		/*
+		 * Reclaiming cgroups might take some time,
+		 * let's wait a bit and repeat.
+		 */
+		sleep(1);
+	}
+
+cleanup:
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
+/*
+ * This test creates a sub-tree with 1000 memory cgroups.
+ * Then it checks that the memory.current on the parent level
+ * is greater than 0 and approximates matches the percpu value
+ * from memory.stat.
+ */
+static int test_percpu_basic(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent, *child;
+	long current, percpu;
+	int i;
+
+	parent = cg_name(root, "percpu_basic_test");
+	if (!parent)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	for (i = 0; i < 1000; i++) {
+		child = cg_name_indexed(parent, "child", i);
+		if (!child)
+			return -1;
+
+		if (cg_create(child))
+			goto cleanup_children;
+
+		free(child);
+	}
+
+	current = cg_read_long(parent, "memory.current");
+	percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+
+	if (current > 0 && percpu > 0 && abs(current - percpu) <
+	    MAX_VMSTAT_ERROR)
+		ret = KSFT_PASS;
+	else
+		printf("memory.current %ld\npercpu %ld\n",
+		       current, percpu);
+
+cleanup_children:
+	for (i = 0; i < 1000; i++) {
+		child = cg_name_indexed(parent, "child", i);
+		cg_destroy(child);
+		free(child);
+	}
+
+cleanup:
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
+#define T(x) { x, #x }
+struct kmem_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_kmem_basic),
+	T(test_kmem_memcg_deletion),
+	T(test_kmem_proc_kpagecgroup),
+	T(test_kmem_kernel_stacks),
+	T(test_kmem_dead_cgroups),
+	T(test_percpu_basic),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+	/*
+	 * Check that memory controller is available:
+	 * memory is listed in cgroup.controllers
+	 */
+	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+		ksft_exit_skip("memory controller isn't available\n");
+
+	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+		if (cg_write(root, "cgroup.subtree_control", "+memory"))
+			ksft_exit_skip("Failed to set memory controller\n");
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index b7e6dec36173..42be3b925830 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -20,13 +20,6 @@
 #include "../kselftest.h"
 #include "clone3_selftests.h"
 
-/*
- * Different sizes of struct clone_args
- */
-#ifndef CLONE3_ARGS_SIZE_V0
-#define CLONE3_ARGS_SIZE_V0 64
-#endif
-
 enum test_mode {
 	CLONE3_ARGS_NO_TEST,
 	CLONE3_ARGS_ALL_0,
@@ -38,13 +31,13 @@ enum test_mode {
 
 static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
 {
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = flags,
 		.exit_signal = SIGCHLD,
 	};
 
 	struct clone_args_extended {
-		struct clone_args args;
+		struct __clone_args args;
 		__aligned_u64 excess_space[2];
 	} args_ext;
 
@@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
 	int status;
 
 	memset(&args_ext, 0, sizeof(args_ext));
-	if (size > sizeof(struct clone_args))
+	if (size > sizeof(struct __clone_args))
 		args_ext.excess_space[1] = 1;
 
 	if (size == 0)
-		size = sizeof(struct clone_args);
+		size = sizeof(struct __clone_args);
 
 	switch (test_mode) {
 	case CLONE3_ARGS_ALL_0:
@@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
 		break;
 	}
 
-	memcpy(&args_ext.args, &args, sizeof(struct clone_args));
+	memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
 
-	pid = sys_clone3((struct clone_args *)&args_ext, size);
+	pid = sys_clone3((struct __clone_args *)&args_ext, size);
 	if (pid < 0) {
 		ksft_print_msg("%s - Failed to create new process\n",
 				strerror(errno));
@@ -144,14 +137,14 @@ int main(int argc, char *argv[])
 	else
 		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
-	test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
+	test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
-	test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
+	test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
 
 	/* Do a clone3() with sizeof(struct clone_args) + 8 */
-	test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+	test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
 
 	/* Do a clone3() with exit_signal having highest 32 bits non-zero */
 	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
@@ -165,31 +158,31 @@ int main(int argc, char *argv[])
 	/* Do a clone3() with NSIG < exit_signal < CSIG */
 	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
 
-	test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+	test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
 
-	test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
+	test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
 			CLONE3_ARGS_ALL_0);
 
-	test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
+	test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
 			CLONE3_ARGS_ALL_0);
 
 	/* Do a clone3() with > page size */
 	test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
 	if (uid == 0)
-		test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
+		test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
 				CLONE3_ARGS_NO_TEST);
 	else
 		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
-	test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
+	test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
 			CLONE3_ARGS_NO_TEST);
 
 	/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
 	if (uid == 0)
-		test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
+		test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
 				CLONE3_ARGS_NO_TEST);
 	else
 		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 9562425aa0a9..55bd387ce7ec 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata,
 	int status;
 	pid_t pid = -1;
 
-	struct clone_args args = {
+	struct __clone_args args = {
 		.exit_signal = SIGCHLD,
 		.set_tid = ptr_to_u64(set_tid),
 		.set_tid_size = set_tid_size,
 	};
 
-	pid = sys_clone3(&args, sizeof(struct clone_args));
+	pid = sys_clone3(&args, sizeof(args));
 	if (pid < 0) {
 		TH_LOG("%s - Failed to create new process", strerror(errno));
 		return -errno;
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index db5fc9c5edcf..47a8c0fc3676 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void)
 {
 	int ret;
 	pid_t pid;
-	struct clone_args args = {};
+	struct __clone_args args = {};
 	struct sigaction act;
 
 	/*
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 91c1a78ddb39..e81ffaaee02b 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -19,13 +19,11 @@
 #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
 #endif
 
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64
-#endif
-
 #ifndef __NR_clone3
 #define __NR_clone3 -1
-struct clone_args {
+#endif
+
+struct __clone_args {
 	__aligned_u64 flags;
 	__aligned_u64 pidfd;
 	__aligned_u64 child_tid;
@@ -34,15 +32,21 @@ struct clone_args {
 	__aligned_u64 stack;
 	__aligned_u64 stack_size;
 	__aligned_u64 tls;
-#define CLONE_ARGS_SIZE_VER1 80
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64	/* sizeof first published struct */
+#endif
 	__aligned_u64 set_tid;
 	__aligned_u64 set_tid_size;
-#define CLONE_ARGS_SIZE_VER2 88
+#ifndef CLONE_ARGS_SIZE_VER1
+#define CLONE_ARGS_SIZE_VER1 80	/* sizeof second published struct */
+#endif
 	__aligned_u64 cgroup;
+#ifndef CLONE_ARGS_SIZE_VER2
+#define CLONE_ARGS_SIZE_VER2 88	/* sizeof third published struct */
+#endif
 };
-#endif /* __NR_clone3 */
 
-static pid_t sys_clone3(struct clone_args *args, size_t size)
+static pid_t sys_clone3(struct __clone_args *args, size_t size)
 {
 	fflush(stdout);
 	fflush(stderr);
@@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
 static inline void test_clone3_supported(void)
 {
 	pid_t pid;
-	struct clone_args args = {};
+	struct __clone_args args = {};
 
 	if (__NR_clone3 < 0)
 		ksft_exit_skip("clone3() syscall is not supported\n");
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 5831c1082d6d..0229e9ebb995 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
 	int status;
 	pid_t pid = -1;
 
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = flags,
 		.exit_signal = SIGCHLD,
 		.set_tid = ptr_to_u64(set_tid),
 		.set_tid_size = set_tid_size,
 	};
 
-	pid = sys_clone3(&args, sizeof(struct clone_args));
+	pid = sys_clone3(&args, sizeof(args));
 	if (pid < 0) {
 		ksft_print_msg("%s - Failed to create new process\n",
 			       strerror(errno));
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 94b02a18f230..344a99c6da1b 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -10,3 +10,4 @@ execveat.denatured
 /recursion-depth
 xxxxxxxx*
 pipe
+S_I*.test
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 4453b8f8def3..0a13b110c1e6 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,7 +3,7 @@ CFLAGS = -Wall
 CFLAGS += -Wno-nonnull
 CFLAGS += -D_GNU_SOURCE
 
-TEST_PROGS := binfmt_script
+TEST_PROGS := binfmt_script non-regular
 TEST_GEN_PROGS := execveat
 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
 # Makefile is a run-time dependency, since it's accessed by the execveat test
@@ -11,7 +11,8 @@ TEST_FILES := Makefile
 
 TEST_GEN_PROGS += recursion-depth
 
-EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*	\
+	       $(OUTPUT)/S_I*.test
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c
new file mode 100644
index 000000000000..cd3a34aca93e
--- /dev/null
+++ b/tools/testing/selftests/exec/non-regular.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#include "../kselftest_harness.h"
+
+/* Remove a file, ignoring the result if it didn't exist. */
+void rm(struct __test_metadata *_metadata, const char *pathname,
+	int is_dir)
+{
+	int rc;
+
+	if (is_dir)
+		rc = rmdir(pathname);
+	else
+		rc = unlink(pathname);
+
+	if (rc < 0) {
+		ASSERT_EQ(errno, ENOENT) {
+			TH_LOG("Not ENOENT: %s", pathname);
+		}
+	} else {
+		ASSERT_EQ(rc, 0) {
+			TH_LOG("Failed to remove: %s", pathname);
+		}
+	}
+}
+
+FIXTURE(file) {
+	char *pathname;
+	int is_dir;
+};
+
+FIXTURE_VARIANT(file)
+{
+	const char *name;
+	int expected;
+	int is_dir;
+	void (*setup)(struct __test_metadata *_metadata,
+		      FIXTURE_DATA(file) *self,
+		      const FIXTURE_VARIANT(file) *variant);
+	int major, minor, mode; /* for mknod() */
+};
+
+void setup_link(struct __test_metadata *_metadata,
+		FIXTURE_DATA(file) *self,
+		const FIXTURE_VARIANT(file) *variant)
+{
+	const char * const paths[] = {
+		"/bin/true",
+		"/usr/bin/true",
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(paths); i++) {
+		if (access(paths[i], X_OK) == 0) {
+			ASSERT_EQ(symlink(paths[i], self->pathname), 0);
+			return;
+		}
+	}
+	ASSERT_EQ(1, 0) {
+		TH_LOG("Could not find viable 'true' binary");
+	}
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFLNK)
+{
+	.name = "S_IFLNK",
+	.expected = ELOOP,
+	.setup = setup_link,
+};
+
+void setup_dir(struct __test_metadata *_metadata,
+	       FIXTURE_DATA(file) *self,
+	       const FIXTURE_VARIANT(file) *variant)
+{
+	ASSERT_EQ(mkdir(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFDIR)
+{
+	.name = "S_IFDIR",
+	.is_dir = 1,
+	.expected = EACCES,
+	.setup = setup_dir,
+};
+
+void setup_node(struct __test_metadata *_metadata,
+		FIXTURE_DATA(file) *self,
+		const FIXTURE_VARIANT(file) *variant)
+{
+	dev_t dev;
+	int rc;
+
+	dev = makedev(variant->major, variant->minor);
+	rc = mknod(self->pathname, 0755 | variant->mode, dev);
+	ASSERT_EQ(rc, 0) {
+		if (errno == EPERM)
+			SKIP(return, "Please run as root; cannot mknod(%s)",
+				variant->name);
+	}
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFBLK)
+{
+	.name = "S_IFBLK",
+	.expected = EACCES,
+	.setup = setup_node,
+	/* /dev/loop0 */
+	.major = 7,
+	.minor = 0,
+	.mode = S_IFBLK,
+};
+
+FIXTURE_VARIANT_ADD(file, S_IFCHR)
+{
+	.name = "S_IFCHR",
+	.expected = EACCES,
+	.setup = setup_node,
+	/* /dev/zero */
+	.major = 1,
+	.minor = 5,
+	.mode = S_IFCHR,
+};
+
+void setup_fifo(struct __test_metadata *_metadata,
+		FIXTURE_DATA(file) *self,
+		const FIXTURE_VARIANT(file) *variant)
+{
+	ASSERT_EQ(mkfifo(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFIFO)
+{
+	.name = "S_IFIFO",
+	.expected = EACCES,
+	.setup = setup_fifo,
+};
+
+FIXTURE_SETUP(file)
+{
+	ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6);
+	self->is_dir = variant->is_dir;
+
+	rm(_metadata, self->pathname, variant->is_dir);
+	variant->setup(_metadata, self, variant);
+}
+
+FIXTURE_TEARDOWN(file)
+{
+	rm(_metadata, self->pathname, self->is_dir);
+}
+
+TEST_F(file, exec_errno)
+{
+	char * const argv[2] = { (char * const)self->pathname, NULL };
+
+	EXPECT_LT(execv(argv[0], argv), 0);
+	EXPECT_EQ(errno, variant->expected);
+}
+
+/* S_IFSOCK */
+FIXTURE(sock)
+{
+	int fd;
+};
+
+FIXTURE_SETUP(sock)
+{
+	self->fd = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_TEARDOWN(sock)
+{
+	if (self->fd >= 0)
+		ASSERT_EQ(close(self->fd), 0);
+}
+
+TEST_F(sock, exec_errno)
+{
+	char * const argv[2] = { " magic socket ", NULL };
+	char * const envp[1] = { NULL };
+
+	EXPECT_LT(fexecve(self->fd, argv, envp), 0);
+	EXPECT_EQ(errno, EACCES);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..c2a2a100114b 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -149,6 +149,26 @@ config_unset_into_buf()
 	echo 0 >  $DIR/config_into_buf
 }
 
+config_set_buf_size()
+{
+	echo $1 >  $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+	echo $1 >  $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+	echo 1 >  $DIR/config_partial
+}
+
+config_unset_partial()
+{
+	echo 0 >  $DIR/config_partial
+}
+
 config_set_sync_direct()
 {
 	echo 1 >  $DIR/config_sync_direct
@@ -207,6 +227,35 @@ read_firmwares()
 	done
 }
 
+read_partial_firmwares()
+{
+	if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+		fwfile="${FW_INTO_BUF}"
+	else
+		fwfile="${FW}"
+	fi
+
+	if [ "$1" = "xzonly" ]; then
+		fwfile="${fwfile}-orig"
+	fi
+
+	# Strip fwfile down to match partial offset and length
+	partial_data="$(cat $fwfile)"
+	partial_data="${partial_data:$2:$3}"
+
+	for i in $(seq 0 3); do
+		config_set_read_fw_idx $i
+
+		read_firmware="$(cat $DIR/read_firmware)"
+
+		# Verify the contents are what we expect.
+		if [ $read_firmware != $partial_data ]; then
+			echo "request #$i: partial firmware was not loaded" >&2
+			exit 1
+		fi
+	done
+}
+
 read_firmwares_expect_nofile()
 {
 	for i in $(seq 0 3); do
@@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile()
 	echo "OK"
 }
 
+test_request_partial_firmware_into_buf_nofile()
+{
+	echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: "
+	config_reset
+	config_set_name nope-test-firmware.bin
+	config_set_into_buf
+	config_set_partial
+	config_set_buf_size $2
+	config_set_file_offset $1
+	config_trigger_sync
+	read_firmwares_expect_nofile
+	release_all_firmware
+	echo "OK"
+}
+
 test_batched_request_firmware_direct_nofile()
 {
 	echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -356,6 +420,21 @@ test_request_firmware_nowait_custom()
 	echo "OK"
 }
 
+test_request_partial_firmware_into_buf()
+{
+	echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: "
+	config_reset
+	config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+	config_set_into_buf
+	config_set_partial
+	config_set_buf_size $2
+	config_set_file_offset $1
+	config_trigger_sync
+	read_partial_firmwares normal $1 $2
+	release_all_firmware
+	echo "OK"
+}
+
 # Only continue if batched request triggers are present on the
 # test-firmware driver
 test_config_present
@@ -383,6 +462,12 @@ for i in $(seq 1 5); do
 	test_request_firmware_nowait_custom $i normal
 done
 
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf 0 10
+test_request_partial_firmware_into_buf 0 5
+test_request_partial_firmware_into_buf 1 6
+test_request_partial_firmware_into_buf 2 10
+
 # Test for file not found, errors are expected, the failure would be
 # a hung task, which would require a hard reset.
 echo
@@ -407,6 +492,12 @@ for i in $(seq 1 5); do
 	test_request_firmware_nowait_custom_nofile $i
 done
 
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf_nofile 0 10
+test_request_partial_firmware_into_buf_nofile 0 5
+test_request_partial_firmware_into_buf_nofile 1 6
+test_request_partial_firmware_into_buf_nofile 2 10
+
 test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
 
 # test with both files present
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index 68550f97d3c3..3bcd4c3624ee 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=_do_fork
+PLACE=kernel_clone
 
 echo "p:myevent1 $PLACE" >> dynamic_events
 echo "r:myevent2 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index c969be9eb7de..438961971b7e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=_do_fork
+PLACE=kernel_clone
 
 setup_events() {
 echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index 16d543eaac88..a8603bd23e0d 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=_do_fork
+PLACE=kernel_clone
 
 setup_events() {
 echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 0f41e441c203..98305d76bd04 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -4,9 +4,9 @@
 # requires: set_ftrace_filter
 # flags: instance
 
-echo _do_fork:stacktrace >> set_ftrace_filter
+echo kernel_clone:stacktrace >> set_ftrace_filter
 
-grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter
 
 (echo "forked"; sleep 1)
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index eba858c21815..9737cd0578a7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -3,7 +3,7 @@
 # description: Kprobe dynamic event - adding and removing
 # requires: kprobe_events
 
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
 grep myevent kprobe_events
 test -d events/kprobes/myevent
 echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index d10bf4f05bc8..f9a40af76888 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -3,7 +3,7 @@
 # description: Kprobe dynamic event - busy event check
 # requires: kprobe_events
 
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
 test -d events/kprobes/myevent
 echo 1 > events/kprobes/myevent/enable
 echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index 61f2ac441aec..eb543d3cfe5f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -3,13 +3,13 @@
 # description: Kprobe dynamic event with arguments
 # requires: kprobe_events
 
-echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events
 grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
 test -d events/kprobes/testprobe
 
 echo 1 > events/kprobes/testprobe/enable
 ( echo "forked")
-grep testprobe trace | grep '_do_fork' | \
+grep testprobe trace | grep 'kernel_clone' | \
   grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
 
 echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 05aaeed6987f..4e5b63be51c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -5,7 +5,7 @@
 
 grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
 
-echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events
 grep testprobe kprobe_events | grep -q 'comm=$comm'
 test -d events/kprobes/testprobe
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index b5fa05443b39..a1d70588ab21 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -30,13 +30,13 @@ esac
 : "Test get argument (1)"
 echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
 grep -qe "testprobe.* arg1=\"test\"" trace
 
 echo 0 > events/kprobes/testprobe/enable
 : "Test get argument (2)"
 echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
 grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index b8c75a3d003c..bd25dd0ba0d0 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
 fi
 
 : "Test get basic types symbol argument"
-echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
 if grep -q "x8/16/32/64" README; then
-  echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+  echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
 fi
-echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
 echo 1 > events/kprobes/enable
 (echo "forked")
 echo 0 > events/kprobes/enable
@@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
 grep "testprobe_bf:.* arg1=.*" trace
 
 : "Test get string symbol argument"
-echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events
 echo 1 > events/kprobes/enable
 (echo "forked")
 echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 0610e0b5587c..91fcce1c241c 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events "x8/16/32/64":README
 
 gen_event() { # Bitsize
-  echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+  echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
 }
 
 check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 81d8b58c03bc..0d179094191f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,29 +5,29 @@
 
 # prepare
 echo nop > current_tracer
-echo _do_fork > set_ftrace_filter
-echo 'p:testprobe _do_fork' > kprobe_events
+echo kernel_clone > set_ftrace_filter
+echo 'p:testprobe kernel_clone' > kprobe_events
 
 # kprobe on / ftrace off
 echo 1 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
 
 # kprobe on / ftrace on
 echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
 
 # kprobe off / ftrace on
 echo 0 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 ! grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
 
 # kprobe on / ftrace on
 echo 1 > events/kprobes/testprobe/enable
@@ -35,11 +35,11 @@ echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
 
 # kprobe on / ftrace off
 echo nop > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 366b7e1b6718..45d90b6c763d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events "Create/append/":README
 
 # Choose 2 symbols for target
-SYM1=_do_fork
+SYM1=kernel_clone
 SYM2=do_exit
 EVENT_NAME=kprobes/testevent
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index b4d834675e59..c02ea50d63ea 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -86,15 +86,15 @@ esac
 
 # multiprobe errors
 if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent _do_fork' > kprobe_events
+echo 'p:kprobes/testevent kernel_clone' > kprobe_events
 check_error '^r:kprobes/testevent do_exit'	# DIFF_PROBE_TYPE
 
 # Explicitly use printf "%s" to not interpret \1
-printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent _do_fork ^bcd=\1'	# DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8'	# DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"'	# DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent _do_fork abcd=\1'	# SAME_PROBE
+printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events
+check_error 'p:kprobes/testevent kernel_clone ^bcd=\1'	# DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8'	# DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"'	# DIFF_ARG_TYPE
+check_error '^p:kprobes/testevent kernel_clone abcd=\1'	# SAME_PROBE
 fi
 
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 523fde6d1aa5..7ae492c204a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,14 +4,14 @@
 # requires: kprobe_events
 
 # Add new kretprobe event
-echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+echo 'r:testprobe2 kernel_clone $retval' > kprobe_events
 grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
 test -d events/kprobes/testprobe2
 
 echo 1 > events/kprobes/testprobe2/enable
 ( echo "forked")
 
-cat trace | grep testprobe2 | grep -q '<- _do_fork'
+cat trace | grep testprobe2 | grep -q '<- kernel_clone'
 
 echo 0 > events/kprobes/testprobe2/enable
 echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index ff6c44adc8a0..c4093fc1a773 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events
 
 ! grep -q 'myevent' kprobe_profile
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
 grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
 echo 1 > events/kprobes/myevent/enable
 ( echo "forked" )
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index ea2147248ebe..afd42387e8b2 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -343,7 +343,7 @@ kmod_test_0001_driver()
 
 	kmod_defaults_driver
 	config_num_threads 1
-	printf '\000' >"$DIR"/config_test_driver
+	printf $NAME >"$DIR"/config_test_driver
 	config_trigger ${FUNCNAME[0]}
 	config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
 }
@@ -354,7 +354,7 @@ kmod_test_0001_fs()
 
 	kmod_defaults_fs
 	config_num_threads 1
-	printf '\000' >"$DIR"/config_test_fs
+	printf $NAME >"$DIR"/config_test_fs
 	config_trigger ${FUNCNAME[0]}
 	config_expect_result ${FUNCNAME[0]} -EINVAL
 }
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 4f78e4805633..f19804df244c 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -971,6 +971,11 @@ void __run_test(struct __fixture_metadata *f,
 
 	ksft_print_msg(" RUN           %s%s%s.%s ...\n",
 	       f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+	/* Make sure output buffers are flushed before fork */
+	fflush(stdout);
+	fflush(stderr);
+
 	t->pid = fork();
 	if (t->pid < 0) {
 		ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 8162c58a1234..2fc6b3af81a1 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -40,11 +40,11 @@ static void guest_code(void)
 
 	/* Single step test, covers 2 basic instructions and 2 emulated */
 	asm volatile("ss_start: "
-		     "xor %%rax,%%rax\n\t"
+		     "xor %%eax,%%eax\n\t"
 		     "cpuid\n\t"
 		     "movl $0x1a0,%%ecx\n\t"
 		     "rdmsr\n\t"
-		     : : : "rax", "ecx");
+		     : : : "eax", "ebx", "ecx", "edx");
 
 	/* DR6.BD test */
 	asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
@@ -73,7 +73,7 @@ int main(void)
 	int i;
 	/* Instruction lengths starting at ss_start */
 	int ss_size[4] = {
-		3,		/* xor */
+		2,		/* xor */
 		2,		/* cpuid */
 		5,		/* mov */
 		2,		/* rdmsr */
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index 8383eb89d88a..bb7a1775307b 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -82,7 +82,7 @@ dmesg > "$DMESG"
 ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
 
 # Record and dump the results
-dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
 
 cat "$LOG"
 # Check for expected output
diff --git a/tools/testing/selftests/mincore/.gitignore b/tools/testing/selftests/mincore/.gitignore
new file mode 100644
index 000000000000..15c4dfc2df00
--- /dev/null
+++ b/tools/testing/selftests/mincore/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+mincore_selftest
diff --git a/tools/testing/selftests/mincore/Makefile b/tools/testing/selftests/mincore/Makefile
new file mode 100644
index 000000000000..38c7db1e8926
--- /dev/null
+++ b/tools/testing/selftests/mincore/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+CFLAGS += -Wall
+
+TEST_GEN_PROGS := mincore_selftest
+include ../lib.mk
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
new file mode 100644
index 000000000000..5a1e85ff5d32
--- /dev/null
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * kselftest suite for mincore().
+ *
+ * Copyright (C) 2020 Collabora, Ltd.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+/* Default test file size: 4MB */
+#define MB (1UL << 20)
+#define FILE_SIZE (4 * MB)
+
+
+/*
+ * Tests the user interface. This test triggers most of the documented
+ * error conditions in mincore().
+ */
+TEST(basic_interface)
+{
+	int retval;
+	int page_size;
+	unsigned char vec[1];
+	char *addr;
+
+	page_size = sysconf(_SC_PAGESIZE);
+
+	/* Query a 0 byte sized range */
+	retval = mincore(0, 0, vec);
+	EXPECT_EQ(0, retval);
+
+	/* Addresses in the specified range are invalid or unmapped */
+	errno = 0;
+	retval = mincore(NULL, page_size, vec);
+	EXPECT_EQ(-1, retval);
+	EXPECT_EQ(ENOMEM, errno);
+
+	errno = 0;
+	addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(MAP_FAILED, addr) {
+		TH_LOG("mmap error: %s", strerror(errno));
+	}
+
+	/* <addr> argument is not page-aligned */
+	errno = 0;
+	retval = mincore(addr + 1, page_size, vec);
+	EXPECT_EQ(-1, retval);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* <length> argument is too large */
+	errno = 0;
+	retval = mincore(addr, -1, vec);
+	EXPECT_EQ(-1, retval);
+	EXPECT_EQ(ENOMEM, errno);
+
+	/* <vec> argument points to an illegal address */
+	errno = 0;
+	retval = mincore(addr, page_size, NULL);
+	EXPECT_EQ(-1, retval);
+	EXPECT_EQ(EFAULT, errno);
+	munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a private anonymous page mapping.
+ * Check that the page is not loaded into memory right after the mapping
+ * but after accessing it (on-demand allocation).
+ * Then free the page and check that it's not memory-resident.
+ */
+TEST(check_anonymous_locked_pages)
+{
+	unsigned char vec[1];
+	char *addr;
+	int retval;
+	int page_size;
+
+	page_size = sysconf(_SC_PAGESIZE);
+
+	/* Map one page and check it's not memory-resident */
+	errno = 0;
+	addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(MAP_FAILED, addr) {
+		TH_LOG("mmap error: %s", strerror(errno));
+	}
+	retval = mincore(addr, page_size, vec);
+	ASSERT_EQ(0, retval);
+	ASSERT_EQ(0, vec[0]) {
+		TH_LOG("Page found in memory before use");
+	}
+
+	/* Touch the page and check again. It should now be in memory */
+	addr[0] = 1;
+	mlock(addr, page_size);
+	retval = mincore(addr, page_size, vec);
+	ASSERT_EQ(0, retval);
+	ASSERT_EQ(1, vec[0]) {
+		TH_LOG("Page not found in memory after use");
+	}
+
+	/*
+	 * It shouldn't be memory-resident after unlocking it and
+	 * marking it as unneeded.
+	 */
+	munlock(addr, page_size);
+	madvise(addr, page_size, MADV_DONTNEED);
+	retval = mincore(addr, page_size, vec);
+	ASSERT_EQ(0, retval);
+	ASSERT_EQ(0, vec[0]) {
+		TH_LOG("Page in memory after being zapped");
+	}
+	munmap(addr, page_size);
+}
+
+
+/*
+ * Check mincore() behavior on huge pages.
+ * This test will be skipped if the mapping fails (ie. if there are no
+ * huge pages available).
+ *
+ * Make sure the system has at least one free huge page, check
+ * "HugePages_Free" in /proc/meminfo.
+ * Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if
+ * needed.
+ */
+TEST(check_huge_pages)
+{
+	unsigned char vec[1];
+	char *addr;
+	int retval;
+	int page_size;
+
+	page_size = sysconf(_SC_PAGESIZE);
+
+	errno = 0;
+	addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+		-1, 0);
+	if (addr == MAP_FAILED) {
+		if (errno == ENOMEM)
+			SKIP(return, "No huge pages available.");
+		else
+			TH_LOG("mmap error: %s", strerror(errno));
+	}
+	retval = mincore(addr, page_size, vec);
+	ASSERT_EQ(0, retval);
+	ASSERT_EQ(0, vec[0]) {
+		TH_LOG("Page found in memory before use");
+	}
+
+	addr[0] = 1;
+	mlock(addr, page_size);
+	retval = mincore(addr, page_size, vec);
+	ASSERT_EQ(0, retval);
+	ASSERT_EQ(1, vec[0]) {
+		TH_LOG("Page not found in memory after use");
+	}
+
+	munlock(addr, page_size);
+	munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a file-backed page.
+ * No pages should be loaded into memory right after the mapping. Then,
+ * accessing any address in the mapping range should load the page
+ * containing the address and a number of subsequent pages (readahead).
+ *
+ * The actual readahead settings depend on the test environment, so we
+ * can't make a lot of assumptions about that. This test covers the most
+ * general cases.
+ */
+TEST(check_file_mmap)
+{
+	unsigned char *vec;
+	int vec_size;
+	char *addr;
+	int retval;
+	int page_size;
+	int fd;
+	int i;
+	int ra_pages = 0;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	vec_size = FILE_SIZE / page_size;
+	if (FILE_SIZE % page_size)
+		vec_size++;
+
+	vec = calloc(vec_size, sizeof(unsigned char));
+	ASSERT_NE(NULL, vec) {
+		TH_LOG("Can't allocate array");
+	}
+
+	errno = 0;
+	fd = open(".", O_TMPFILE | O_RDWR, 0600);
+	ASSERT_NE(-1, fd) {
+		TH_LOG("Can't create temporary file: %s",
+			strerror(errno));
+	}
+	errno = 0;
+	retval = fallocate(fd, 0, 0, FILE_SIZE);
+	ASSERT_EQ(0, retval) {
+		TH_LOG("Error allocating space for the temporary file: %s",
+			strerror(errno));
+	}
+
+	/*
+	 * Map the whole file, the pages shouldn't be fetched yet.
+	 */
+	errno = 0;
+	addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, 0);
+	ASSERT_NE(MAP_FAILED, addr) {
+		TH_LOG("mmap error: %s", strerror(errno));
+	}
+	retval = mincore(addr, FILE_SIZE, vec);
+	ASSERT_EQ(0, retval);
+	for (i = 0; i < vec_size; i++) {
+		ASSERT_EQ(0, vec[i]) {
+			TH_LOG("Unexpected page in memory");
+		}
+	}
+
+	/*
+	 * Touch a page in the middle of the mapping. We expect the next
+	 * few pages (the readahead window) to be populated too.
+	 */
+	addr[FILE_SIZE / 2] = 1;
+	retval = mincore(addr, FILE_SIZE, vec);
+	ASSERT_EQ(0, retval);
+	ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+		TH_LOG("Page not found in memory after use");
+	}
+
+	i = FILE_SIZE / 2 / page_size + 1;
+	while (i < vec_size && vec[i]) {
+		ra_pages++;
+		i++;
+	}
+	EXPECT_GT(ra_pages, 0) {
+		TH_LOG("No read-ahead pages found in memory");
+	}
+
+	EXPECT_LT(i, vec_size) {
+		TH_LOG("Read-ahead pages reached the end of the file");
+	}
+	/*
+	 * End of the readahead window. The rest of the pages shouldn't
+	 * be in memory.
+	 */
+	if (i < vec_size) {
+		while (i < vec_size && !vec[i])
+			i++;
+		EXPECT_EQ(vec_size, i) {
+			TH_LOG("Unexpected page in memory beyond readahead window");
+		}
+	}
+
+	munmap(addr, FILE_SIZE);
+	close(fd);
+	free(vec);
+}
+
+
+/*
+ * Test mincore() behavior on a page backed by a tmpfs file.  This test
+ * performs the same steps as the previous one. However, we don't expect
+ * any readahead in this case.
+ */
+TEST(check_tmpfs_mmap)
+{
+	unsigned char *vec;
+	int vec_size;
+	char *addr;
+	int retval;
+	int page_size;
+	int fd;
+	int i;
+	int ra_pages = 0;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	vec_size = FILE_SIZE / page_size;
+	if (FILE_SIZE % page_size)
+		vec_size++;
+
+	vec = calloc(vec_size, sizeof(unsigned char));
+	ASSERT_NE(NULL, vec) {
+		TH_LOG("Can't allocate array");
+	}
+
+	errno = 0;
+	fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600);
+	ASSERT_NE(-1, fd) {
+		TH_LOG("Can't create temporary file: %s",
+			strerror(errno));
+	}
+	errno = 0;
+	retval = fallocate(fd, 0, 0, FILE_SIZE);
+	ASSERT_EQ(0, retval) {
+		TH_LOG("Error allocating space for the temporary file: %s",
+			strerror(errno));
+	}
+
+	/*
+	 * Map the whole file, the pages shouldn't be fetched yet.
+	 */
+	errno = 0;
+	addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, 0);
+	ASSERT_NE(MAP_FAILED, addr) {
+		TH_LOG("mmap error: %s", strerror(errno));
+	}
+	retval = mincore(addr, FILE_SIZE, vec);
+	ASSERT_EQ(0, retval);
+	for (i = 0; i < vec_size; i++) {
+		ASSERT_EQ(0, vec[i]) {
+			TH_LOG("Unexpected page in memory");
+		}
+	}
+
+	/*
+	 * Touch a page in the middle of the mapping. We expect only
+	 * that page to be fetched into memory.
+	 */
+	addr[FILE_SIZE / 2] = 1;
+	retval = mincore(addr, FILE_SIZE, vec);
+	ASSERT_EQ(0, retval);
+	ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+		TH_LOG("Page not found in memory after use");
+	}
+
+	i = FILE_SIZE / 2 / page_size + 1;
+	while (i < vec_size && vec[i]) {
+		ra_pages++;
+		i++;
+	}
+	ASSERT_EQ(ra_pages, 0) {
+		TH_LOG("Read-ahead pages found in memory");
+	}
+
+	munmap(addr, FILE_SIZE);
+	close(fd);
+	free(vec);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 18c5de53558a..bf361f30d6ef 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -180,6 +180,8 @@ setup()
 			;;
 		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
 		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
+		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
 
 		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
 		       ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 2499824d9e1c..8df5cb8f71ff 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,4 +1,6 @@
 CONFIG_MPTCP=y
 CONFIG_MPTCP_IPV6=y
+CONFIG_INET_DIAG=m
+CONFIG_INET_MPTCP_DIAG=m
 CONFIG_VETH=y
 CONFIG_NET_SCH_NETEM=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index cad6f73a5fd0..090620c3e10c 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 
 				/* ... but we still receive.
 				 * Close our write side, ev. give some time
-				 * for address notification
+				 * for address notification and/or checking
+				 * the current status
 				 */
-				if (cfg_join)
-					usleep(400000);
+				if (cfg_wait)
+					usleep(cfg_wait);
 				shutdown(peerfd, SHUT_WR);
 			} else {
 				if (errno == EINTR)
@@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 	}
 
 	/* leave some time for late join/announce */
-	if (cfg_wait)
+	if (cfg_join)
 		usleep(cfg_wait);
 
 	close(peerfd);
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 7c38a909f8b8..8a2fe6d64bf2 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -1175,6 +1175,51 @@ kci_test_neigh_get()
 	echo "PASS: neigh get"
 }
 
+kci_test_bridge_parent_id()
+{
+	local ret=0
+	sysfsnet=/sys/bus/netdevsim/devices/netdevsim
+	probed=false
+
+	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+		modprobe -q netdevsim
+		check_err $?
+		if [ $ret -ne 0 ]; then
+			echo "SKIP: bridge_parent_id can't load netdevsim"
+			return $ksft_skip
+		fi
+		probed=true
+	fi
+
+	echo "10 1" > /sys/bus/netdevsim/new_device
+	while [ ! -d ${sysfsnet}10 ] ; do :; done
+	echo "20 1" > /sys/bus/netdevsim/new_device
+	while [ ! -d ${sysfsnet}20 ] ; do :; done
+	udevadm settle
+	dev10=`ls ${sysfsnet}10/net/`
+	dev20=`ls ${sysfsnet}20/net/`
+
+	ip link add name test-bond0 type bond mode 802.3ad
+	ip link set dev $dev10 master test-bond0
+	ip link set dev $dev20 master test-bond0
+	ip link add name test-br0 type bridge
+	ip link set dev test-bond0 master test-br0
+	check_err $?
+
+	# clean up any leftovers
+	ip link del dev test-br0
+	ip link del dev test-bond0
+	echo 20 > /sys/bus/netdevsim/del_device
+	echo 10 > /sys/bus/netdevsim/del_device
+	$probed && rmmod netdevsim
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: bridge_parent_id"
+		return 1
+	fi
+	echo "PASS: bridge_parent_id"
+}
+
 kci_test_rtnl()
 {
 	local ret=0
@@ -1224,6 +1269,8 @@ kci_test_rtnl()
 	check_err $?
 	kci_test_neigh_get
 	check_err $?
+	kci_test_bridge_parent_id
+	check_err $?
 
 	kci_del_dummy
 	return $ret
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index d3e0809ab368..431296c0f91c 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -2,13 +2,18 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 # This tests basic flowtable functionality.
-# Creates following topology:
+# Creates following default topology:
 #
 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
 # Router1 is the one doing flow offloading, Router2 has no special
 # purpose other than having a link that is smaller than either Originator
 # and responder, i.e. TCPMSS announced values are too large and will still
 # result in fragmentation and/or PMTU discovery.
+#
+# You can check with different Orgininator/Link/Responder MTU eg:
+# nft_flowtable.sh -o8000 -l1500 -r2000
+#
+
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
@@ -21,29 +26,17 @@ ns2out=""
 
 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
 
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without nft tool"
-	exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
-
-which nc > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without nc (netcat)"
-	exit $ksft_skip
-fi
+checktool (){
+	if ! $1 > /dev/null 2>&1; then
+		echo "SKIP: Could not $2"
+		exit $ksft_skip
+	fi
+}
 
-ip netns add nsr1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not create net namespace"
-	exit $ksft_skip
-fi
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "which nc" "run test without nc (netcat)"
+checktool "ip netns add nsr1" "create net namespace"
 
 ip netns add ns1
 ip netns add ns2
@@ -89,11 +82,41 @@ ip -net nsr2 addr add dead:2::1/64 dev veth1
 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
 # is NOT the lowest link mtu.
 
-ip -net nsr1 link set veth0 mtu 9000
-ip -net ns1 link set eth0 mtu 9000
+omtu=9000
+lmtu=1500
+rmtu=2000
+
+usage(){
+	echo "nft_flowtable.sh [OPTIONS]"
+	echo
+	echo "MTU options"
+	echo "   -o originator"
+	echo "   -l link"
+	echo "   -r responder"
+	exit 1
+}
+
+while getopts "o:l:r:" o
+do
+	case $o in
+		o) omtu=$OPTARG;;
+		l) lmtu=$OPTARG;;
+		r) rmtu=$OPTARG;;
+		*) usage;;
+	esac
+done
+
+if ! ip -net nsr1 link set veth0 mtu $omtu; then
+	exit 1
+fi
+
+ip -net ns1 link set eth0 mtu $omtu
+
+if ! ip -net nsr2 link set veth1 mtu $rmtu; then
+	exit 1
+fi
 
-ip -net nsr2 link set veth1 mtu 2000
-ip -net ns2 link set eth0 mtu 2000
+ip -net ns2 link set eth0 mtu $rmtu
 
 # transfer-net between nsr1 and nsr2.
 # these addresses are not used for connections.
@@ -113,7 +136,10 @@ for i in 1 2; do
   ip -net ns$i route add default via 10.0.$i.1
   ip -net ns$i addr add dead:$i::99/64 dev eth0
   ip -net ns$i route add default via dead:$i::1
-  ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
+  if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+	echo "ERROR: Check Originator/Responder values (problem during address addition)"
+	exit 1
+  fi
 
   # don't set ip DF bit for first two tests
   ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
@@ -147,7 +173,7 @@ table inet filter {
       # as PMTUd is off.
       # This rule is deleted for the last test, when we expect PMTUd
       # to kick in and ensure all packets meet mtu requirements.
-      meta length gt 1500 accept comment something-to-grep-for
+      meta length gt $lmtu accept comment something-to-grep-for
 
       # next line blocks connection w.o. working offload.
       # we only do this for reverse dir, because we expect packets to
@@ -171,15 +197,13 @@ if [ $? -ne 0 ]; then
 fi
 
 # test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
   echo "ERROR: ns1 cannot reach ns2" 1>&2
   bash
   exit 1
 fi
 
-ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
   echo "ERROR: ns2 cannot reach ns1" 1>&2
   exit 1
 fi
@@ -196,7 +220,6 @@ ns2out=$(mktemp)
 make_file()
 {
 	name=$1
-	who=$2
 
 	SIZE=$((RANDOM % (1024 * 8)))
 	TSIZE=$((SIZE * 1024))
@@ -215,8 +238,7 @@ check_transfer()
 	out=$2
 	what=$3
 
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
+	if ! cmp "$in" "$out" > /dev/null 2>&1; then
 		echo "FAIL: file mismatch for $what" 1>&2
 		ls -l "$in"
 		ls -l "$out"
@@ -243,17 +265,21 @@ test_tcp_forwarding_ip()
 
 	sleep 3
 
-	kill $lpid
-	kill $cpid
+	if ps -p $lpid > /dev/null;then
+		kill $lpid
+	fi
+
+	if ps -p $cpid > /dev/null;then
+		kill $cpid
+	fi
+
 	wait
 
-	check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
-	if [ $? -ne 0 ];then
+	if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
 		lret=1
 	fi
 
-	check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
-	if [ $? -ne 0 ];then
+	if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
 		lret=1
 	fi
 
@@ -282,13 +308,12 @@ test_tcp_forwarding_nat()
 	return $lret
 }
 
-make_file "$ns1in" "ns1"
-make_file "$ns2in" "ns2"
+make_file "$ns1in"
+make_file "$ns2in"
 
 # First test:
 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
 	echo "PASS: flow offloaded for ns1/ns2"
 else
 	echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -319,9 +344,7 @@ table ip nat {
 }
 EOF
 
-test_tcp_forwarding_nat ns1 ns2
-
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT"
 else
 	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
@@ -333,8 +356,7 @@ fi
 # Same as second test, but with PMTU discovery enabled.
 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
 
-ip netns exec nsr1 nft delete rule inet filter forward $handle
-if [ $? -ne 0 ] ;then
+if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
 	echo "FAIL: Could not delete large-packet accept rule"
 	exit 1
 fi
@@ -342,8 +364,7 @@ fi
 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 
-test_tcp_forwarding_nat ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
 else
 	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
@@ -389,8 +410,7 @@ ip -net ns2 route del 192.168.10.1 via 10.0.2.1
 ip -net ns2 route add default via 10.0.2.1
 ip -net ns2 route add default via dead:2::1
 
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
 	echo "PASS: ipsec tunnel mode for ns1/ns2"
 else
 	echo "FAIL: ipsec tunnel mode for ns1/ns2"
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index a2c80914e3dc..01f8d3c0cf2c 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -46,6 +46,10 @@
 #define __NR_pidfd_getfd -1
 #endif
 
+#ifndef PIDFD_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#endif
+
 /*
  * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
  * That means, when it wraps around any pid < 300 will be skipped.
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 7dca1aa4672d..1f085b922c6e 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options)
 
 pid_t create_child(int *pidfd, unsigned flags)
 {
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags		= CLONE_PIDFD | flags,
 		.exit_signal	= SIGCHLD,
 		.pidfd		= ptr_to_u64(pidfd),
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 7079f8eef792..be2943f072f6 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -17,10 +17,15 @@
 #include <unistd.h>
 
 #include "pidfd.h"
-#include "../kselftest.h"
+#include "../kselftest_harness.h"
 
 #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
 
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
+#endif
+
 static pid_t sys_clone3(struct clone_args *args)
 {
 	return syscall(__NR_clone3, args, sizeof(struct clone_args));
@@ -32,9 +37,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
 	return syscall(__NR_waitid, which, pid, info, options, ru);
 }
 
-static int test_pidfd_wait_simple(void)
+TEST(wait_simple)
 {
-	const char *test_name = "pidfd wait simple";
 	int pidfd = -1, status = 0;
 	pid_t parent_tid = -1;
 	struct clone_args args = {
@@ -50,76 +54,40 @@ static int test_pidfd_wait_simple(void)
 	};
 
 	pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-	if (pidfd < 0)
-		ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pidfd, 0);
 
 	pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (pid == 0)
-		ksft_exit_fail_msg(
-			"%s test: succeeded to wait on invalid pidfd %s\n",
-			test_name, strerror(errno));
-	close(pidfd);
+	ASSERT_NE(pid, 0);
+	EXPECT_EQ(close(pidfd), 0);
 	pidfd = -1;
 
 	pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
-	if (pidfd == 0)
-		ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pidfd, 0);
 
 	pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (pid == 0)
-		ksft_exit_fail_msg(
-			"%s test: succeeded to wait on invalid pidfd %s\n",
-			test_name, strerror(errno));
-	close(pidfd);
+	ASSERT_NE(pid, 0);
+	EXPECT_EQ(close(pidfd), 0);
 	pidfd = -1;
 
 	pid = sys_clone3(&args);
-	if (pid < 0)
-		ksft_exit_fail_msg("%s test: failed to create new process %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pid, 0);
 
 	if (pid == 0)
 		exit(EXIT_SUCCESS);
 
 	pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (pid < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status))
-		ksft_exit_fail_msg(
-			"%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-	close(pidfd);
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_EXITED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ksft_test_result_pass("%s test: Passed\n", test_name);
-	return 0;
+	ASSERT_GE(pid, 0);
+	ASSERT_EQ(WIFEXITED(info.si_status), true);
+	ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
+	EXPECT_EQ(close(pidfd), 0);
+
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_EXITED);
+	ASSERT_EQ(info.si_pid, parent_tid);
 }
 
-static int test_pidfd_wait_states(void)
+TEST(wait_states)
 {
-	const char *test_name = "pidfd wait states";
 	int pidfd = -1, status = 0;
 	pid_t parent_tid = -1;
 	struct clone_args args = {
@@ -135,9 +103,7 @@ static int test_pidfd_wait_states(void)
 	};
 
 	pid = sys_clone3(&args);
-	if (pid < 0)
-		ksft_exit_fail_msg("%s test: failed to create new process %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pid, 0);
 
 	if (pid == 0) {
 		kill(getpid(), SIGSTOP);
@@ -145,127 +111,115 @@ static int test_pidfd_wait_states(void)
 		exit(EXIT_SUCCESS);
 	}
 
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_STOPPED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to send signal to process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_CONTINUED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_STOPPED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_STOPPED);
+	ASSERT_EQ(info.si_pid, parent_tid);
 
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_KILLED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	close(pidfd);
-
-	ksft_test_result_pass("%s test: Passed\n", test_name);
-	return 0;
+	ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_CONTINUED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_STOPPED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_KILLED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	EXPECT_EQ(close(pidfd), 0);
 }
 
-int main(int argc, char **argv)
+TEST(wait_nonblock)
 {
-	ksft_print_header();
-	ksft_set_plan(2);
+	int pidfd, status = 0;
+	unsigned int flags = 0;
+	pid_t parent_tid = -1;
+	struct clone_args args = {
+		.parent_tid = ptr_to_u64(&parent_tid),
+		.flags = CLONE_PARENT_SETTID,
+		.exit_signal = SIGCHLD,
+	};
+	int ret;
+	pid_t pid;
+	siginfo_t info = {
+		.si_signo = 0,
+	};
+
+	/*
+	 * Callers need to see ECHILD with non-blocking pidfds when no child
+	 * processes exists.
+	 */
+	pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK);
+	EXPECT_GE(pidfd, 0) {
+		/* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+		ASSERT_EQ(errno, EINVAL);
+		SKIP(return, "Skipping PIDFD_NONBLOCK test");
+	}
+
+	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+	ASSERT_LT(ret, 0);
+	ASSERT_EQ(errno, ECHILD);
+	EXPECT_EQ(close(pidfd), 0);
+
+	pid = sys_clone3(&args);
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		kill(getpid(), SIGSTOP);
+		exit(EXIT_SUCCESS);
+	}
 
-	test_pidfd_wait_simple();
-	test_pidfd_wait_states();
+	pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK);
+	EXPECT_GE(pidfd, 0) {
+		/* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+		ASSERT_EQ(errno, EINVAL);
+		SKIP(return, "Skipping PIDFD_NONBLOCK test");
+	}
+
+	flags = fcntl(pidfd, F_GETFL, 0);
+	ASSERT_GT(flags, 0);
+	ASSERT_GT((flags & O_NONBLOCK), 0);
+
+	/*
+	 * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
+	 * child processes exist but none have exited.
+	 */
+	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+	ASSERT_LT(ret, 0);
+	ASSERT_EQ(errno, EAGAIN);
+
+	/*
+	 * Callers need to continue seeing 0 with non-blocking pidfd and
+	 * WNOHANG raised explicitly when child processes exist but none have
+	 * exited.
+	 */
+	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
+	ASSERT_EQ(ret, 0);
 
-	return ksft_exit_pass();
+	ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_STOPPED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_EXITED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	EXPECT_EQ(close(pidfd), 0);
 }
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0453c50c949c..55ef15184057 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,17 @@
  * This selftest exercises the powerpc alignment fault handler.
  *
  * We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided).
+ *
+ * One way to get cache-inhibited memory is to use the "mem" kernel parameter
+ * to limit the kernel to less memory than actually exists.  Addresses above
+ * the limit may still be accessed but will be treated as cache-inhibited. For
+ * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
+ * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
+ * To access this region /dev/mem is used. The kernel should be configured
+ * without CONFIG_STRICT_DEVMEM. In this case use:
+ *         ./alignment_handler /dev/mem 0xc0000000
  *
  * We initialise the source buffers, then use whichever set of load/store
  * instructions is under test to copy bytes from the source buffers to the
@@ -48,11 +58,14 @@
 #include <asm/cputable.h>
 
 #include "utils.h"
+#include "instructions.h"
 
 int bufsize;
 int debug;
 int testing;
 volatile int gotsig;
+char *cipath = "/dev/fb0";
+long cioffset;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
@@ -84,6 +97,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
 	}							\
 	rc |= do_test(#name, test_##name)
 
+#define TESTP(name, ld_op, st_op, ld_reg, st_reg)		\
+	void test_##name(char *s, char *d)			\
+	{							\
+		asm volatile(					\
+			ld_op(ld_reg, %0, 0, 0)			\
+			st_op(st_reg, %1, 0, 0)			\
+			:: "r"(s), "r"(d), "r"(0)		\
+			: "memory", "vs0", "vs32", "r31");	\
+	}							\
+	rc |= do_test(#name, test_##name)
+
 #define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
 #define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
 #define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
@@ -103,6 +127,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
 #define LOAD_FLOAT_XFORM_TEST(op)  TEST(op, op, stfdx, XFORM, 0, 0)
 #define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
 
+#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0)
+#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0)
+
+#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32)
+#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0)
 
 /* FIXME: Unimplemented tests: */
 // STORE_DFORM_TEST(stq)   /* FIXME: need two registers for quad */
@@ -195,17 +230,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
 
 	printf("\tDoing %s:\t", test_name);
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0) {
 		printf("\n");
-		perror("Can't open /dev/fb0 now?");
+		perror("Can't open ci file now?");
 		return 1;
 	}
 
-	ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, 0x0);
-	ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, bufsize);
+	ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset);
+	ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset + bufsize);
+
 	if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
 		printf("\n");
 		perror("mmap failed");
@@ -270,11 +306,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
 	return rc;
 }
 
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
 {
 	int fd;
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0)
 		return false;
 
@@ -286,7 +322,7 @@ int test_alignment_handler_vsx_206(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("VSX: 2.06B\n");
@@ -304,7 +340,7 @@ int test_alignment_handler_vsx_207(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
 
 	printf("VSX: 2.07B\n");
@@ -320,7 +356,7 @@ int test_alignment_handler_vsx_300(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
 	printf("VSX: 3.00B\n");
@@ -348,11 +384,30 @@ int test_alignment_handler_vsx_300(void)
 	return rc;
 }
 
+int test_alignment_handler_vsx_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("VSX: PREFIX\n");
+	LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1);
+	return rc;
+}
+
 int test_alignment_handler_integer(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Integer\n");
 	LOAD_DFORM_TEST(lbz);
@@ -408,7 +463,7 @@ int test_alignment_handler_integer_206(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Integer: 2.06\n");
@@ -419,11 +474,32 @@ int test_alignment_handler_integer_206(void)
 	return rc;
 }
 
+int test_alignment_handler_integer_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("Integer: PREFIX\n");
+	LOAD_MLS_PREFIX_TEST(PLBZ);
+	LOAD_MLS_PREFIX_TEST(PLHZ);
+	LOAD_MLS_PREFIX_TEST(PLHA);
+	LOAD_MLS_PREFIX_TEST(PLWZ);
+	LOAD_8LS_PREFIX_TEST(PLWA);
+	LOAD_8LS_PREFIX_TEST(PLD);
+	STORE_MLS_PREFIX_TEST(PSTB);
+	STORE_MLS_PREFIX_TEST(PSTH);
+	STORE_MLS_PREFIX_TEST(PSTW);
+	STORE_8LS_PREFIX_TEST(PSTD);
+	return rc;
+}
+
 int test_alignment_handler_vmx(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
 
 	printf("VMX\n");
@@ -451,7 +527,7 @@ int test_alignment_handler_fp(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Floating point\n");
 	LOAD_FLOAT_DFORM_TEST(lfd);
@@ -479,7 +555,7 @@ int test_alignment_handler_fp_205(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
 
 	printf("Floating point: 2.05\n");
@@ -497,7 +573,7 @@ int test_alignment_handler_fp_206(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Floating point: 2.06\n");
@@ -507,13 +583,32 @@ int test_alignment_handler_fp_206(void)
 	return rc;
 }
 
+
+int test_alignment_handler_fp_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("Floating point: PREFIX\n");
+	LOAD_FLOAT_DFORM_TEST(lfs);
+	LOAD_FLOAT_MLS_PREFIX_TEST(PLFS);
+	LOAD_FLOAT_MLS_PREFIX_TEST(PLFD);
+	STORE_FLOAT_MLS_PREFIX_TEST(PSTFS);
+	STORE_FLOAT_MLS_PREFIX_TEST(PSTFD);
+	return rc;
+}
+
 void usage(char *prog)
 {
-	printf("Usage: %s [options]\n", prog);
+	printf("Usage: %s [options] [path [offset]]\n", prog);
 	printf("  -d	Enable debug error output\n");
 	printf("\n");
-	printf("This test requires a POWER8 or POWER9 CPU and a usable ");
-	printf("framebuffer at /dev/fb0.\n");
+	printf("This test requires a POWER8, POWER9 or POWER10 CPU ");
+	printf("and either a usable framebuffer at /dev/fb0 or ");
+	printf("the path to usable cache inhibited memory and optional ");
+	printf("offset to be provided\n");
 }
 
 int main(int argc, char *argv[])
@@ -533,6 +628,13 @@ int main(int argc, char *argv[])
 			exit(1);
 		}
 	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc > 0)
+		cipath = argv[0];
+	if (argc > 1)
+		cioffset = strtol(argv[1], 0, 0x10);
 
 	bufsize = getpagesize();
 
@@ -552,10 +654,14 @@ int main(int argc, char *argv[])
 			   "test_alignment_handler_vsx_207");
 	rc |= test_harness(test_alignment_handler_vsx_300,
 			   "test_alignment_handler_vsx_300");
+	rc |= test_harness(test_alignment_handler_vsx_prefix,
+			   "test_alignment_handler_vsx_prefix");
 	rc |= test_harness(test_alignment_handler_integer,
 			   "test_alignment_handler_integer");
 	rc |= test_harness(test_alignment_handler_integer_206,
 			   "test_alignment_handler_integer_206");
+	rc |= test_harness(test_alignment_handler_integer_prefix,
+			   "test_alignment_handler_integer_prefix");
 	rc |= test_harness(test_alignment_handler_vmx,
 			   "test_alignment_handler_vmx");
 	rc |= test_harness(test_alignment_handler_fp,
@@ -564,5 +670,7 @@ int main(int argc, char *argv[])
 			   "test_alignment_handler_fp_205");
 	rc |= test_harness(test_alignment_handler_fp_206,
 			   "test_alignment_handler_fp_206");
+	rc |= test_harness(test_alignment_handler_fp_prefix,
+			   "test_alignment_handler_fp_prefix");
 	return rc;
 }
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index a2e8c9da7fa5..d50cc05df495 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -19,6 +19,7 @@
 #include <limits.h>
 #include <sys/time.h>
 #include <sys/syscall.h>
+#include <sys/sysinfo.h>
 #include <sys/types.h>
 #include <sys/shm.h>
 #include <linux/futex.h>
@@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
 
 static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
 {
-	int pid;
-	cpu_set_t cpuset;
+	int pid, ncpus;
+	cpu_set_t *cpuset;
+	size_t size;
 
 	pid = fork();
 	if (pid == -1) {
@@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
 	if (pid)
 		return;
 
-	CPU_ZERO(&cpuset);
-	CPU_SET(cpu, &cpuset);
+	ncpus = get_nprocs();
+	size = CPU_ALLOC_SIZE(ncpus);
+	cpuset = CPU_ALLOC(ncpus);
+	if (!cpuset) {
+		perror("malloc");
+		exit(1);
+	}
+	CPU_ZERO_S(size, cpuset);
+	CPU_SET_S(cpu, size, cpuset);
 
-	if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+	if (sched_setaffinity(0, size, cpuset)) {
 		perror("sched_setaffinity");
+		CPU_FREE(cpuset);
 		exit(1);
 	}
 
+	CPU_FREE(cpuset);
 	fn(arg);
 
 	exit(0);
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..994b11af765c 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
 copyuser_64_exc_t0
 copyuser_64_exc_t1
 copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..3095b1f1c02b 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
 		copyuser_p7_t0 copyuser_p7_t1 \
 		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
-		memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+		memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
 		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
 
 EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%:	memcpy_power7.S $(EXTRA_SOURCES)
 		-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
 		-o $@ $^
 
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
 	$(CC) $(CPPFLAGS) $(CFLAGS) \
-		-D COPY_LOOP=test_memcpy_mcsafe \
+		-D COPY_LOOP=test_copy_mc_generic \
 		-o $@ $^
 
 $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 000000000000..dcbe06d500fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f6..000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
-\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
index f52ed92b53e7..00dc32c0ed75 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -5,12 +5,17 @@ pe_ok() {
 	local dev="$1"
 	local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
 
-	if ! [ -e "$path" ] ; then
+	# if a driver doesn't support the error handling callbacks then the
+	# device is recovered by removing and re-probing it. This causes the
+	# sysfs directory to disappear so read the PE state once and squash
+	# any potential error messages
+	local eeh_state="$(cat $path 2>/dev/null)"
+	if [ -z "$eeh_state" ]; then
 		return 1;
 	fi
 
-	local fw_state="$(cut -d' ' -f1 < $path)"
-	local sw_state="$(cut -d' ' -f2 < $path)"
+	local fw_state="$(echo $eeh_state | cut -d' ' -f1)"
+	local sw_state="$(echo $eeh_state | cut -d' ' -f2)"
 
 	# If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
 	# error state or being recovered. Either way, not ok.
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index f36061eb6f0f..4efa6314bd96 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -66,4 +66,81 @@ static inline int paste_last(void *i)
 #define PPC_INST_PASTE                 __PASTE(0, 0, 0, 0)
 #define PPC_INST_PASTE_LAST            __PASTE(0, 0, 1, 1)
 
+/* This defines the prefixed load/store instructions */
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)	__VA_ARGS__
+#else
+#  define __stringify_in_c(...)	#__VA_ARGS__
+#  define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+#endif
+
+#define __PPC_RA(a)	(((a) & 0x1f) << 16)
+#define __PPC_RS(s)	(((s) & 0x1f) << 21)
+#define __PPC_RT(t)	__PPC_RS(t)
+#define __PPC_PREFIX_R(r)	(((r) & 0x1) << 20)
+
+#define PPC_PREFIX_MLS			0x06000000
+#define PPC_PREFIX_8LS			0x04000000
+
+#define PPC_INST_LBZ			0x88000000
+#define PPC_INST_LHZ			0xa0000000
+#define PPC_INST_LHA			0xa8000000
+#define PPC_INST_LWZ			0x80000000
+#define PPC_INST_STB			0x98000000
+#define PPC_INST_STH			0xb0000000
+#define PPC_INST_STW			0x90000000
+#define PPC_INST_STD			0xf8000000
+#define PPC_INST_LFS			0xc0000000
+#define PPC_INST_LFD			0xc8000000
+#define PPC_INST_STFS			0xd0000000
+#define PPC_INST_STFD			0xd8000000
+
+#define PREFIX_MLS(instr, t, a, r, d)	stringify_in_c(.balign 64, , 4;)		\
+					stringify_in_c(.long PPC_PREFIX_MLS |		\
+						       __PPC_PREFIX_R(r) |		\
+						       (((d) >> 16) & 0x3ffff);)	\
+					stringify_in_c(.long (instr)  |			\
+						       __PPC_RT(t) |			\
+						       __PPC_RA(a) |			\
+						       ((d) & 0xffff);\n)
+
+#define PREFIX_8LS(instr, t, a, r, d)	stringify_in_c(.balign 64, , 4;)		\
+					stringify_in_c(.long PPC_PREFIX_8LS |		\
+						       __PPC_PREFIX_R(r) |		\
+						       (((d) >> 16) & 0x3ffff);)	\
+					stringify_in_c(.long (instr)  |			\
+						       __PPC_RT(t) |			\
+						       __PPC_RA(a) |			\
+						       ((d) & 0xffff);\n)
+
+/* Prefixed Integer Load/Store instructions */
+#define PLBZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LBZ, t, a, r, d)
+#define PLHZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LHZ, t, a, r, d)
+#define PLHA(t, a, r, d)		PREFIX_MLS(PPC_INST_LHA, t, a, r, d)
+#define PLWZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LWZ, t, a, r, d)
+#define PLWA(t, a, r, d)		PREFIX_8LS(0xa4000000, t, a, r, d)
+#define PLD(t, a, r, d)			PREFIX_8LS(0xe4000000, t, a, r, d)
+#define PLQ(t, a, r, d)			PREFIX_8LS(0xe0000000, t, a, r, d)
+#define PSTB(s, a, r, d)		PREFIX_MLS(PPC_INST_STB, s, a, r, d)
+#define PSTH(s, a, r, d)		PREFIX_MLS(PPC_INST_STH, s, a, r, d)
+#define PSTW(s, a, r, d)		PREFIX_MLS(PPC_INST_STW, s, a, r, d)
+#define PSTD(s, a, r, d)		PREFIX_8LS(0xf4000000, s, a, r, d)
+#define PSTQ(s, a, r, d)		PREFIX_8LS(0xf0000000, s, a, r, d)
+
+/* Prefixed Floating-Point Load/Store Instructions */
+#define PLFS(frt, a, r, d)		PREFIX_MLS(PPC_INST_LFS, frt, a, r, d)
+#define PLFD(frt, a, r, d)		PREFIX_MLS(PPC_INST_LFD, frt, a, r, d)
+#define PSTFS(frs, a, r, d)		PREFIX_MLS(PPC_INST_STFS, frs, a, r, d)
+#define PSTFD(frs, a, r, d)		PREFIX_MLS(PPC_INST_STFD, frs, a, r, d)
+
+/* Prefixed VSX Load/Store Instructions */
+#define PLXSD(vrt, a, r, d)		PREFIX_8LS(0xa8000000, vrt, a, r, d)
+#define PLXSSP(vrt, a, r, d)		PREFIX_8LS(0xac000000, vrt, a, r, d)
+#define PLXV0(s, a, r, d)		PREFIX_8LS(0xc8000000, s, a, r, d)
+#define PLXV1(s, a, r, d)		PREFIX_8LS(0xcc000000, s, a, r, d)
+#define PSTXSD(vrs, a, r, d)		PREFIX_8LS(0xb8000000, vrs, a, r, d)
+#define PSTXSSP(vrs, a, r, d)		PREFIX_8LS(0xbc000000, vrs, a, r, d)
+#define PSTXV0(s, a, r, d)		PREFIX_8LS(0xd8000000, s, a, r, d)
+#define PSTXV1(s, a, r, d)		PREFIX_8LS(0xdc000000, s, a, r, d)
+
 #endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
new file mode 100644
index 000000000000..3312cb1b058d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PKEYS_H
+#define _SELFTESTS_POWERPC_PKEYS_H
+
+#include <sys/mman.h>
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS	0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE	0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+
+/* Older versions of libc do not not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR	4
+#endif
+
+#define SI_PKEY_OFFSET	0x20
+
+#define __NR_pkey_mprotect	386
+#define __NR_pkey_alloc		384
+#define __NR_pkey_free		385
+
+#define PKEY_BITS_PER_PKEY	2
+#define NR_PKEYS		32
+#define PKEY_BITS_MASK		((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+inline unsigned long pkeyreg_get(void)
+{
+	return mfspr(SPRN_AMR);
+}
+
+inline void pkeyreg_set(unsigned long amr)
+{
+	set_amr(amr);
+}
+
+void pkey_set_rights(int pkey, unsigned long rights)
+{
+	unsigned long amr, shift;
+
+	shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+	amr = pkeyreg_get();
+	amr &= ~(PKEY_BITS_MASK << shift);
+	amr |= (rights & PKEY_BITS_MASK) << shift;
+	pkeyreg_set(amr);
+}
+
+int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+	return syscall(__NR_pkey_mprotect, addr, len, prot, pkey);
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+	return syscall(__NR_pkey_alloc, flags, rights);
+}
+
+int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+int pkeys_unsupported(void)
+{
+	bool hash_mmu = false;
+	int pkey;
+
+	/* Protection keys are currently supported on Hash MMU only */
+	FAIL_IF(using_hash_mmu(&hash_mmu));
+	SKIP_IF(!hash_mmu);
+
+	/* Check if the system call is supported */
+	pkey = sys_pkey_alloc(0, 0);
+	SKIP_IF(pkey < 0);
+	sys_pkey_free(pkey);
+
+	return 0;
+}
+
+int siginfo_pkey(siginfo_t *si)
+{
+	/*
+	 * In older versions of libc, siginfo_t does not have si_pkey as
+	 * a member.
+	 */
+#ifdef si_pkey
+	return si->si_pkey;
+#else
+	return *((int *)(((char *) si) + SI_PKEY_OFFSET));
+#endif
+}
+
+#define pkey_rights(r) ({						\
+	static char buf[4] = "rwx";					\
+	unsigned int amr_bits;						\
+	if ((r) & PKEY_DISABLE_EXECUTE)					\
+		buf[2] = '-';						\
+	amr_bits = (r) & PKEY_BITS_MASK;				\
+	if (amr_bits & PKEY_DISABLE_WRITE)				\
+		buf[1] = '-';						\
+	if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE)	\
+		buf[0] = '-';						\
+	buf;								\
+})
+
+unsigned long next_pkey_rights(unsigned long rights)
+{
+	if (rights == PKEY_DISABLE_ACCESS)
+		return PKEY_DISABLE_EXECUTE;
+	else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE))
+		return 0;
+
+	if ((rights & PKEY_BITS_MASK) == 0)
+		rights |= PKEY_DISABLE_WRITE;
+	else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE)
+		rights |= PKEY_DISABLE_ACCESS;
+
+	return rights;
+}
+
+#endif /* _SELFTESTS_POWERPC_PKEYS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 022c5076b2c5..c0f2742a3a59 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -57,6 +57,12 @@
 #define SPRN_PPR       896	/* Program Priority Register */
 #define SPRN_AMR       13	/* Authority Mask Register - problem state */
 
+#define set_amr(v)	asm volatile("isync;" \
+				     "mtspr " __stringify(SPRN_AMR) ",%0;" \
+				     "isync" : \
+				    : "r" ((unsigned long)(v)) \
+				    : "memory")
+
 /* TEXASR register bits */
 #define TEXASR_FC	0xFE00000000000000
 #define TEXASR_FP	0x0100000000000000
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index e089a0c30d9a..71d2924f5b8b 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -42,6 +42,16 @@ int perf_event_enable(int fd);
 int perf_event_disable(int fd);
 int perf_event_reset(int fd);
 
+#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+#endif
+
 static inline bool have_hwcap(unsigned long ftr)
 {
 	return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -60,6 +70,7 @@ static inline bool have_hwcap2(unsigned long ftr2)
 #endif
 
 bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
 
 /* Yes, this is evil */
 #define FAIL_IF(x)						\
@@ -71,6 +82,15 @@ do {								\
 	}							\
 } while (0)
 
+#define FAIL_IF_EXIT(x)						\
+do {								\
+	if ((x)) {						\
+		fprintf(stderr,					\
+		"[FAIL] Test FAILED on line %d\n", __LINE__);	\
+		_exit(1);					\
+	}							\
+} while (0)
+
 /* The test harness uses this, yes it's gross */
 #define MAGIC_SKIP_RETURN_VALUE	99
 
@@ -96,11 +116,20 @@ do {								\
 #define _str(s) #s
 #define str(s) _str(s)
 
+#define sigsafe_err(msg)	({ \
+		ssize_t nbytes __attribute__((unused)); \
+		nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
 /* POWER9 feature */
 #ifndef PPC_FEATURE2_ARCH_3_00
 #define PPC_FEATURE2_ARCH_3_00 0x00800000
 #endif
 
+/* POWER10 feature */
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
 #if defined(__powerpc64__)
 #define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
 #define UCONTEXT_MSR(UC)	(UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index e31ca6f453ed..d0c23b2e4b60 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -6,3 +6,4 @@ vmx_preempt
 fpu_signal
 vmx_signal
 vsx_preempt
+fpu_denormal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 11a10d7a2bbd..fcc91c205984 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
 
 top_srcdir = ../../../../..
 include ../../lib.mk
@@ -11,9 +11,9 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S
 $(OUTPUT)/fpu_preempt: fpu_asm.S
 $(OUTPUT)/fpu_signal:  fpu_asm.S
 
-$(OUTPUT)/vmx_syscall: vmx_asm.S
-$(OUTPUT)/vmx_preempt: vmx_asm.S
-$(OUTPUT)/vmx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
 
 $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
-$(OUTPUT)/vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c
new file mode 100644
index 000000000000..5f96682abaa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately
+ * if the denormal handler is not configured or working properly, this can cause a bad
+ * crash in kernel mode when the kernel tries to save FP registers when the process
+ * exits.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static int test_denormal_fpu(void)
+{
+	unsigned int m32;
+	unsigned long m64;
+	volatile float f;
+	volatile double d;
+
+	/* try to induce lfs <denormal> ; stfd */
+
+	m32 = 0x00715fcf; /* random denormal */
+	memcpy((float *)&f, &m32, sizeof(f));
+	d = f;
+	memcpy(&m64, (double *)&d, sizeof(d));
+
+	FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_denormal_fpu, "fpu_denormal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
index 2e059f154e77..6761d6ce30ec 100644
--- a/tools/testing/selftests/powerpc/math/vmx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -57,6 +57,9 @@ int test_preempt_vmx(void)
 	int i, rc, threads;
 	pthread_t *tids;
 
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
index 785a48e0976f..b340a5c4e79d 100644
--- a/tools/testing/selftests/powerpc/math/vmx_signal.c
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -96,6 +96,9 @@ int test_signal_vmx(void)
 	void *rc_p;
 	pthread_t *tids;
 
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
index 9ee293cc868e..03c78dfe3444 100644
--- a/tools/testing/selftests/powerpc/math/vmx_syscall.c
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -49,9 +49,14 @@ int test_vmx_syscall(void)
 	 * Setup an environment with much context switching
 	 */
 	pid_t pid2;
-	pid_t pid = fork();
+	pid_t pid;
 	int ret;
 	int child_ret;
+
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+	pid = fork();
 	FAIL_IF(pid == -1);
 
 	pid2 = fork();
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
index 63de9c6e2cd3..d1601bb889d4 100644
--- a/tools/testing/selftests/powerpc/math/vsx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -92,6 +92,8 @@ int test_preempt_vsx(void)
 	int i, rc, threads;
 	pthread_t *tids;
 
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..aac4a59f9e28 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -8,3 +8,7 @@ wild_bctr
 large_vm_fork_separation
 bad_accesses
 tlbie_test
+pkey_exec_prot
+pkey_siginfo
+stack_expansion_ldst
+stack_expansion_signal
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..defe488d6bf1 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,22 +3,32 @@ noarg:
 	$(MAKE) -C ../
 
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
-		  large_vm_fork_separation bad_accesses
+		  large_vm_fork_separation bad_accesses pkey_exec_prot \
+		  pkey_siginfo stack_expansion_signal stack_expansion_ldst
+
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 $(OUTPUT)/prot_sao: ../utils.c
 
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
 $(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
+
+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
 
 $(OUTPUT)/tempfile:
 	dd if=/dev/zero of=$@ bs=64k count=1
 
 $(OUTPUT)/tlbie_test: LDLIBS += -lpthread
+$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index adc465f499ef..a864ed7e2008 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -64,34 +64,6 @@ int bad_access(char *p, bool write)
 	return 0;
 }
 
-static int using_hash_mmu(bool *using_hash)
-{
-	char line[128];
-	FILE *f;
-	int rc;
-
-	f = fopen("/proc/cpuinfo", "r");
-	FAIL_IF(!f);
-
-	rc = 0;
-	while (fgets(line, sizeof(line), f) != NULL) {
-		if (strcmp(line, "MMU		: Hash\n") == 0) {
-			*using_hash = true;
-			goto out;
-		}
-
-		if (strcmp(line, "MMU		: Radix\n") == 0) {
-			*using_hash = false;
-			goto out;
-		}
-	}
-
-	rc = -1;
-out:
-	fclose(f);
-	return rc;
-}
-
 static int test(void)
 {
 	unsigned long i, j, addr, region_shift, page_shift, page_size;
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index 000000000000..9e5c7f3f498a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP	0x60000000
+#define PPC_INST_TRAP	0x7fe00008
+#define PPC_INST_BLR	0x4e800020
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr)
+		sigsafe_err("got a fault for an unexpected address\n");
+
+	_exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	int signal_pkey;
+
+	signal_pkey = siginfo_pkey(sinfo);
+	fault_code = sinfo->si_code;
+
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr) {
+		sigsafe_err("got a fault for an unexpected address\n");
+		_exit(1);
+	}
+
+	/* Check if too many faults have occurred for a single test case */
+	if (!remaining_faults) {
+		sigsafe_err("got too many faults for the same address\n");
+		_exit(1);
+	}
+
+
+	/* Restore permissions in order to continue */
+	switch (fault_code) {
+	case SEGV_ACCERR:
+		if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {
+			sigsafe_err("failed to set access permissions\n");
+			_exit(1);
+		}
+		break;
+	case SEGV_PKUERR:
+		if (signal_pkey != fault_pkey) {
+			sigsafe_err("got a fault for an unexpected pkey\n");
+			_exit(1);
+		}
+
+		switch (fault_type) {
+		case PKEY_DISABLE_ACCESS:
+			pkey_set_rights(fault_pkey, 0);
+			break;
+		case PKEY_DISABLE_EXECUTE:
+			/*
+			 * Reassociate the exec-only pkey with the region
+			 * to be able to continue. Unlike AMR, we cannot
+			 * set IAMR directly from userspace to restore the
+			 * permissions.
+			 */
+			if (mprotect(insns, pgsize, PROT_EXEC)) {
+				sigsafe_err("failed to set execute permissions\n");
+				_exit(1);
+			}
+			break;
+		default:
+			sigsafe_err("got a fault with an unexpected type\n");
+			_exit(1);
+		}
+		break;
+	default:
+		sigsafe_err("got a fault with an unexpected code\n");
+		_exit(1);
+	}
+
+	remaining_faults--;
+}
+
+static int test(void)
+{
+	struct sigaction segv_act, trap_act;
+	unsigned long rights;
+	int pkey, ret, i;
+
+	ret = pkeys_unsupported();
+	if (ret)
+		return ret;
+
+	/* Setup SIGSEGV handler */
+	segv_act.sa_handler = 0;
+	segv_act.sa_sigaction = segv_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+	segv_act.sa_flags = SA_SIGINFO;
+	segv_act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+	/* Setup SIGTRAP handler */
+	trap_act.sa_handler = 0;
+	trap_act.sa_sigaction = trap_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+	trap_act.sa_flags = SA_SIGINFO;
+	trap_act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+	/* Setup executable region */
+	pgsize = getpagesize();
+	numinsns = pgsize / sizeof(unsigned int);
+	insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+				      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(insns == MAP_FAILED);
+
+	/* Write the instruction words */
+	for (i = 1; i < numinsns - 1; i++)
+		insns[i] = PPC_INST_NOP;
+
+	/*
+	 * Set the first instruction as an unconditional trap. If
+	 * the last write to this address succeeds, this should
+	 * get overwritten by a no-op.
+	 */
+	insns[0] = PPC_INST_TRAP;
+
+	/*
+	 * Later, to jump to the executable region, we use a branch
+	 * and link instruction (bctrl) which sets the return address
+	 * automatically in LR. Use that to return back.
+	 */
+	insns[numinsns - 1] = PPC_INST_BLR;
+
+	/* Allocate a pkey that restricts execution */
+	rights = PKEY_DISABLE_EXECUTE;
+	pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF(pkey < 0);
+
+	/*
+	 * Pick the first instruction's address from the executable
+	 * region.
+	 */
+	fault_addr = insns;
+
+	/* The following two cases will avoid SEGV_PKUERR */
+	fault_type = -1;
+	fault_pkey = -1;
+
+	/*
+	 * Read an instruction word from the address when AMR bits
+	 * are not set i.e. the pkey permits both read and write
+	 * access.
+	 *
+	 * This should not generate a fault as having PROT_EXEC
+	 * implies PROT_READ on GNU systems. The pkey currently
+	 * restricts execution only based on the IAMR bits. The
+	 * AMR bits are cleared.
+	 */
+	remaining_faults = 0;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	printf("read from %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	i = *fault_addr;
+	FAIL_IF(remaining_faults != 0);
+
+	/*
+	 * Write an instruction word to the address when AMR bits
+	 * are not set i.e. the pkey permits both read and write
+	 * access.
+	 *
+	 * This should generate an access fault as having just
+	 * PROT_EXEC also restricts writes. The pkey currently
+	 * restricts execution only based on the IAMR bits. The
+	 * AMR bits are cleared.
+	 */
+	remaining_faults = 1;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	printf("write to %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	*fault_addr = PPC_INST_TRAP;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+	/* The following three cases will generate SEGV_PKUERR */
+	rights |= PKEY_DISABLE_ACCESS;
+	fault_type = PKEY_DISABLE_ACCESS;
+	fault_pkey = pkey;
+
+	/*
+	 * Read an instruction word from the address when AMR bits
+	 * are set i.e. the pkey permits neither read nor write
+	 * access.
+	 *
+	 * This should generate a pkey fault based on AMR bits only
+	 * as having PROT_EXEC implicitly allows reads.
+	 */
+	remaining_faults = 1;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	pkey_set_rights(pkey, rights);
+	printf("read from %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	i = *fault_addr;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR);
+
+	/*
+	 * Write an instruction word to the address when AMR bits
+	 * are set i.e. the pkey permits neither read nor write
+	 * access.
+	 *
+	 * This should generate two faults. First, a pkey fault
+	 * based on AMR bits and then an access fault since
+	 * PROT_EXEC does not allow writes.
+	 */
+	remaining_faults = 2;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	pkey_set_rights(pkey, rights);
+	printf("write to %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	*fault_addr = PPC_INST_NOP;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+	/* Free the current pkey */
+	sys_pkey_free(pkey);
+
+	rights = 0;
+	do {
+		/*
+		 * Allocate pkeys with all valid combinations of read,
+		 * write and execute restrictions.
+		 */
+		pkey = sys_pkey_alloc(0, rights);
+		FAIL_IF(pkey < 0);
+
+		/*
+		 * Jump to the executable region. AMR bits may or may not
+		 * be set but they should not affect execution.
+		 *
+		 * This should generate pkey faults based on IAMR bits which
+		 * may be set to restrict execution.
+		 *
+		 * The first iteration also checks if the overwrite of the
+		 * first instruction word from a trap to a no-op succeeded.
+		 */
+		fault_pkey = pkey;
+		fault_type = -1;
+		remaining_faults = 0;
+		if (rights & PKEY_DISABLE_EXECUTE) {
+			fault_type = PKEY_DISABLE_EXECUTE;
+			remaining_faults = 1;
+		}
+
+		FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+		printf("execute at %p, pkey permissions are %s\n", fault_addr,
+		       pkey_rights(rights));
+		asm volatile("mtctr	%0; bctrl" : : "r"(insns));
+		FAIL_IF(remaining_faults != 0);
+		if (rights & PKEY_DISABLE_EXECUTE)
+			FAIL_IF(fault_code != SEGV_PKUERR);
+
+		/* Free the current pkey */
+		sys_pkey_free(pkey);
+
+		/* Find next valid combination of pkey rights */
+		rights = next_pkey_rights(rights);
+	} while (rights);
+
+	/* Cleanup */
+	munmap((void *) insns, pgsize);
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness(test, "pkey_exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
new file mode 100644
index 000000000000..4f815d7c1214
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if the signal information reports the correct memory protection
+ * key upon getting a key access violation fault for a page that was
+ * attempted to be protected by two different keys from two competing
+ * threads at the same time.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP	0x60000000
+#define PPC_INST_BLR	0x4e800020
+#define PROT_RWX	(PROT_READ | PROT_WRITE | PROT_EXEC)
+
+#define NUM_ITERATIONS	1000000
+
+static volatile sig_atomic_t perm_pkey, rest_pkey;
+static volatile sig_atomic_t rights, fault_count;
+static volatile unsigned int *volatile fault_addr;
+static pthread_barrier_t iteration_barrier;
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	void *pgstart;
+	size_t pgsize;
+	int pkey;
+
+	pkey = siginfo_pkey(sinfo);
+
+	/* Check if this fault originated from a pkey access violation */
+	if (sinfo->si_code != SEGV_PKUERR) {
+		sigsafe_err("got a fault for an unexpected reason\n");
+		_exit(1);
+	}
+
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr) {
+		sigsafe_err("got a fault for an unexpected address\n");
+		_exit(1);
+	}
+
+	/* Check if this fault originated from the restrictive pkey */
+	if (pkey != rest_pkey) {
+		sigsafe_err("got a fault for an unexpected pkey\n");
+		_exit(1);
+	}
+
+	/* Check if too many faults have occurred for the same iteration */
+	if (fault_count > 0) {
+		sigsafe_err("got too many faults for the same address\n");
+		_exit(1);
+	}
+
+	pgsize = getpagesize();
+	pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1));
+
+	/*
+	 * If the current fault occurred due to lack of execute rights,
+	 * reassociate the page with the exec-only pkey since execute
+	 * rights cannot be changed directly for the faulting pkey as
+	 * IAMR is inaccessible from userspace.
+	 *
+	 * Otherwise, if the current fault occurred due to lack of
+	 * read-write rights, change the AMR permission bits for the
+	 * pkey.
+	 *
+	 * This will let the test continue.
+	 */
+	if (rights == PKEY_DISABLE_EXECUTE &&
+	    mprotect(pgstart, pgsize, PROT_EXEC))
+		_exit(1);
+	else
+		pkey_set_rights(pkey, 0);
+
+	fault_count++;
+}
+
+struct region {
+	unsigned long rights;
+	unsigned int *base;
+	size_t size;
+};
+
+static void *protect(void *p)
+{
+	unsigned long rights;
+	unsigned int *base;
+	size_t size;
+	int tid, i;
+
+	tid = gettid();
+	base = ((struct region *) p)->base;
+	size = ((struct region *) p)->size;
+	FAIL_IF_EXIT(!base);
+
+	/* No read, write and execute restrictions */
+	rights = 0;
+
+	printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+
+	/* Allocate the permissive pkey */
+	perm_pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF_EXIT(perm_pkey < 0);
+
+	/*
+	 * Repeatedly try to protect the common region with a permissive
+	 * pkey
+	 */
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		/*
+		 * Wait until the other thread has finished allocating the
+		 * restrictive pkey or until the next iteration has begun
+		 */
+		pthread_barrier_wait(&iteration_barrier);
+
+		/* Try to associate the permissive pkey with the region */
+		FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+					       perm_pkey));
+	}
+
+	/* Free the permissive pkey */
+	sys_pkey_free(perm_pkey);
+
+	return NULL;
+}
+
+static void *protect_access(void *p)
+{
+	size_t size, numinsns;
+	unsigned int *base;
+	int tid, i;
+
+	tid = gettid();
+	base = ((struct region *) p)->base;
+	size = ((struct region *) p)->size;
+	rights = ((struct region *) p)->rights;
+	numinsns = size / sizeof(base[0]);
+	FAIL_IF_EXIT(!base);
+
+	/* Allocate the restrictive pkey */
+	rest_pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF_EXIT(rest_pkey < 0);
+
+	printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+	printf("tid %d, %s randomly in range [%p, %p]\n", tid,
+	       (rights == PKEY_DISABLE_EXECUTE) ? "execute" :
+	       (rights == PKEY_DISABLE_WRITE)  ? "write" : "read",
+	       base, base + numinsns);
+
+	/*
+	 * Repeatedly try to protect the common region with a restrictive
+	 * pkey and read, write or execute from it
+	 */
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		/*
+		 * Wait until the other thread has finished allocating the
+		 * permissive pkey or until the next iteration has begun
+		 */
+		pthread_barrier_wait(&iteration_barrier);
+
+		/* Try to associate the restrictive pkey with the region */
+		FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+					       rest_pkey));
+
+		/* Choose a random instruction word address from the region */
+		fault_addr = base + (rand() % numinsns);
+		fault_count = 0;
+
+		switch (rights) {
+		/* Read protection test */
+		case PKEY_DISABLE_ACCESS:
+			/*
+			 * Read an instruction word from the region and
+			 * verify if it has not been overwritten to
+			 * something unexpected
+			 */
+			FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP &&
+				     *fault_addr != PPC_INST_BLR);
+			break;
+
+		/* Write protection test */
+		case PKEY_DISABLE_WRITE:
+			/*
+			 * Write an instruction word to the region and
+			 * verify if the overwrite has succeeded
+			 */
+			*fault_addr = PPC_INST_BLR;
+			FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR);
+			break;
+
+		/* Execute protection test */
+		case PKEY_DISABLE_EXECUTE:
+			/* Jump to the region and execute instructions */
+			asm volatile(
+				"mtctr	%0; bctrl"
+				: : "r"(fault_addr) : "ctr", "lr");
+			break;
+		}
+
+		/*
+		 * Restore the restrictions originally imposed by the
+		 * restrictive pkey as the signal handler would have
+		 * cleared out the corresponding AMR bits
+		 */
+		pkey_set_rights(rest_pkey, rights);
+	}
+
+	/* Free restrictive pkey */
+	sys_pkey_free(rest_pkey);
+
+	return NULL;
+}
+
+static void reset_pkeys(unsigned long rights)
+{
+	int pkeys[NR_PKEYS], i;
+
+	/* Exhaustively allocate all available pkeys */
+	for (i = 0; i < NR_PKEYS; i++)
+		pkeys[i] = sys_pkey_alloc(0, rights);
+
+	/* Free all allocated pkeys */
+	for (i = 0; i < NR_PKEYS; i++)
+		sys_pkey_free(pkeys[i]);
+}
+
+static int test(void)
+{
+	pthread_t prot_thread, pacc_thread;
+	struct sigaction act;
+	pthread_attr_t attr;
+	size_t numinsns;
+	struct region r;
+	int ret, i;
+
+	srand(time(NULL));
+	ret = pkeys_unsupported();
+	if (ret)
+		return ret;
+
+	/* Allocate the region */
+	r.size = getpagesize();
+	r.base = mmap(NULL, r.size, PROT_RWX,
+		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(r.base == MAP_FAILED);
+
+	/*
+	 * Fill the region with no-ops with a branch at the end
+	 * for returning to the caller
+	 */
+	numinsns = r.size / sizeof(r.base[0]);
+	for (i = 0; i < numinsns - 1; i++)
+		r.base[i] = PPC_INST_NOP;
+	r.base[i] = PPC_INST_BLR;
+
+	/* Setup SIGSEGV handler */
+	act.sa_handler = 0;
+	act.sa_sigaction = segv_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+	/*
+	 * For these tests, the parent process should clear all bits of
+	 * AMR and IAMR, i.e. impose no restrictions, for all available
+	 * pkeys. This will be the base for the initial AMR and IAMR
+	 * values for all the test thread pairs.
+	 *
+	 * If the AMR and IAMR bits of all available pkeys are cleared
+	 * before running the tests and a fault is generated when
+	 * attempting to read, write or execute instructions from a
+	 * pkey protected region, the pkey responsible for this must be
+	 * the one from the protect-and-access thread since the other
+	 * one is fully permissive. Despite that, if the pkey reported
+	 * by siginfo is not the restrictive pkey, then there must be a
+	 * kernel bug.
+	 */
+	reset_pkeys(0);
+
+	/* Setup barrier for protect and protect-and-access threads */
+	FAIL_IF(pthread_attr_init(&attr) != 0);
+	FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0);
+
+	/* Setup and start protect and protect-and-read threads */
+	puts("starting thread pair (protect, protect-and-read)");
+	r.rights = PKEY_DISABLE_ACCESS;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Setup and start protect and protect-and-write threads */
+	puts("starting thread pair (protect, protect-and-write)");
+	r.rights = PKEY_DISABLE_WRITE;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Setup and start protect and protect-and-execute threads */
+	puts("starting thread pair (protect, protect-and-execute)");
+	r.rights = PKEY_DISABLE_EXECUTE;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Cleanup */
+	FAIL_IF(pthread_attr_destroy(&attr) != 0);
+	FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0);
+	munmap(r.base, r.size);
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness(test, "pkey_siginfo");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
index e2eed65b7735..30b71b1d78d5 100644
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
+#include <unistd.h>
 
 #include <asm/cputable.h>
 
@@ -18,8 +19,13 @@ int test_prot_sao(void)
 {
 	char *p;
 
-	/* 2.06 or later should support SAO */
-	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+	/*
+	 * SAO was introduced in 2.06 and removed in 3.1. It's disabled in
+	 * guests/LPARs by default, so also skip if we are running in a guest.
+	 */
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) ||
+		have_hwcap2(PPC_FEATURE2_ARCH_3_1) ||
+		access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0);
 
 	/*
 	 * Ensure we can ask for PROT_SAO.
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 000000000000..ed9143990888
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+	LOAD,
+	STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+	unsigned long target;
+	char stack_cur;
+
+	if ((unsigned long)&stack_cur > target_sp)
+		return consume_stack(target_sp, stack_high, delta, type);
+	else {
+		// We don't really need this, but without it GCC might not
+		// generate a recursive call above.
+		stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+		asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+		asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+		target = stack_high - delta + 1;
+		volatile char *p = (char *)target;
+
+		if (type == STORE)
+			*p = c;
+		else
+			c = *p;
+
+		// Do something to prevent the stack frame being popped prior to
+		// our access above.
+		getpid();
+	}
+
+	return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+	unsigned long start, end;
+	static char buf[4096];
+	char name[128];
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+			    &start, &end, name);
+		if (rc == 2)
+			continue;
+
+		if (rc != 3) {
+			printf("sscanf errored\n");
+			rc = -1;
+			break;
+		}
+
+		if (strstr(name, needle)) {
+			*low = start;
+			*high = end - 1;
+			rc = 0;
+			break;
+		}
+	}
+
+	fclose(f);
+
+	return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+	unsigned long low, stack_high;
+
+	assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+	assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+	printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+	       type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+	       stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+	return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+	pid_t pid;
+	int rc;
+
+	pid = fork();
+	if (pid == 0)
+		exit(child(stack_used, delta, type));
+
+	assert(waitpid(pid, &rc, 0) != -1);
+
+	if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+		return 0;
+
+	// We don't expect a non-zero exit that's not a signal
+	assert(!WIFEXITED(rc));
+
+	printf("Faulted:   %s delta %-7d used size 0x%06x signal %d\n",
+	       type == LOAD ? "load" : "store", delta, stack_used,
+	       WTERMSIG(rc));
+
+	return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE	(32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+	unsigned long delta;
+
+	// We should be able to access anywhere within the rlimit
+	for (delta = page_size; delta <= rlim_cur; delta += page_size)
+		assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
+	assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+
+	// But if we go past the rlimit it should fail
+	assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+}
+
+static int test(void)
+{
+	unsigned long page_size;
+	struct rlimit rlimit;
+
+	page_size = getpagesize();
+	getrlimit(RLIMIT_STACK, &rlimit);
+	printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+	printf("Testing loads ...\n");
+	test_one_type(LOAD, page_size, rlimit.rlim_cur);
+	printf("Testing stores ...\n");
+	test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+	printf("All OK\n");
+
+	return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+	return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+	return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 000000000000..c8b32a29e274
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+	sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+	char stack_cur;
+
+	if ((stack_base_ptr - &stack_cur) < stack_size)
+		return consume_stack(stack_size, write_pipe);
+	else {
+		stack_top_ptr = &stack_cur;
+
+		FAIL_IF(notify_parent(write_pipe));
+
+		while (!sig_occurred)
+			barrier();
+	}
+
+	return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+	struct sigaction act;
+	char stack_base;
+
+	act.sa_handler = sigusr1_handler;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = 0;
+	if (sigaction(SIGUSR1, &act, NULL) < 0)
+		err(1, "sigaction");
+
+	stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+	FAIL_IF(consume_stack(stack_size, write_pipe));
+
+	printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+		stack_size, stack_base_ptr, stack_top_ptr,
+		stack_base_ptr - stack_top_ptr);
+
+	return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+	union pipe read_pipe, write_pipe;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		close(read_pipe.read_fd);
+		close(write_pipe.write_fd);
+		exit(child(stack_size, read_pipe));
+	}
+
+	close(read_pipe.write_fd);
+	close(write_pipe.read_fd);
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	kill(pid, SIGUSR1);
+
+	FAIL_IF(wait_for_child(pid));
+
+	close(read_pipe.read_fd);
+	close(write_pipe.write_fd);
+
+	return 0;
+}
+
+int test(void)
+{
+	unsigned int i, size;
+
+	// Test with used stack from 1MB - 64K to 1MB + 64K
+	// Increment by 64 to get more coverage of odd sizes
+	for (i = 0; i < (128 * _KB); i += 64) {
+		size = i + (1 * _MB) - (64 * _KB);
+		FAIL_IF(test_one_size(size));
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 7b4ac4537702..2980abca31e0 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -9,6 +9,7 @@
 #include <stdbool.h>
 #include <string.h>
 #include <sys/prctl.h>
+#include <asm/cputable.h>
 
 #include "event.h"
 #include "utils.h"
@@ -104,6 +105,9 @@ static int test_body(void)
 	struct event events[3];
 	u64 overhead;
 
+	// The STCX_FAIL event we use works on Power8 or later
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
 	setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
 	setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
index a2d7b0e3dca9..a26ac122c759 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -91,8 +91,6 @@ int back_to_back_ebbs(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
index bc893813483e..bb9f587fa76e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -42,8 +42,6 @@ int cycles(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
index dcd351d20328..9ae795ce314e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -99,8 +99,6 @@ int cycles_with_freeze(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	printf("EBBs while frozen %d\n", ebbs_while_frozen);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
index 94c99c12c0f2..4b45a2e70f62 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -71,8 +71,6 @@ int cycles_with_mmcr2(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index dfbc5c3ad52d..21537d6eb6b7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
index ca2f7d729155..b208bf6ad58d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	FAIL_IF(ebb_state.stats.ebb_count == 0);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
index ac3e6e182614..ba2681a12cc7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -75,7 +75,6 @@ static int test_body(void)
 	ebb_freeze_pmcs();
 	ebb_global_disable();
 
-	count_pmc(4, sample_period);
 	mtspr(SPRN_PMC4, 0xdead);
 
 	dump_summary_ebb_state();
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
index b8242e9d97d2..791d37ba327b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -70,13 +70,6 @@ int multi_counter(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-	count_pmc(2, sample_period);
-	count_pmc(3, sample_period);
-	count_pmc(4, sample_period);
-	count_pmc(5, sample_period);
-	count_pmc(6, sample_period);
-
 	dump_ebb_state();
 
 	for (i = 0; i < 6; i++)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
index a05c0e18ded6..9b0f70d59702 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -61,8 +61,6 @@ static int cycles_child(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_summary_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
index 153ebc92234f..2904c741e04e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -82,8 +82,6 @@ static int test_body(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	if (mmcr0_mismatch)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
index eadad75ed7e6..b29f8ba22d1e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -76,8 +76,6 @@ int pmc56_overflow(void)
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(2, sample_period);
-
 	dump_ebb_state();
 
 	printf("PMC5/6 overflow %d\n", pmc56_overflowed);
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d0b4d3..bf1bec013bbb 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
 #ifndef __SELFTESTS_POWERPC_PMU_LIB_H
 #define __SELFTESTS_POWERPC_PMU_LIB_H
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <string.h>
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2756fe2efdc5..2d37942bf72b 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -12,6 +12,8 @@
 #include <string.h>
 #include <sys/prctl.h>
 
+#include <asm/cputable.h>
+
 #include "event.h"
 #include "lib.h"
 #include "utils.h"
@@ -23,12 +25,9 @@
 static int per_event_excludes(void)
 {
 	struct event *e, events[4];
-	char *platform;
 	int i;
 
-	platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
-	FAIL_IF(!platform);
-	SKIP_IF(strcmp(platform, "power8") != 0);
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
 
 	/*
 	 * We need to create the events disabled, otherwise the running/enabled
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fee032d..bbc05ffc5860 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@ static int child(struct shared_info *info)
 	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
 	       user_write, info->amr, pkey1, pkey2, pkey3);
 
-	mtspr(SPRN_AMR, info->amr);
+	set_amr(info->amr);
 
 	/*
 	 * We won't use pkey3. This tests whether the kernel restores the UAMOR
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bdbbbe8431e0..bc454f899124 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -44,7 +44,7 @@ struct shared_info {
 	unsigned long amr2;
 
 	/* AMR value that ptrace should refuse to write to the child. */
-	unsigned long amr3;
+	unsigned long invalid_amr;
 
 	/* IAMR value the parent expects to read from the child. */
 	unsigned long expected_iamr;
@@ -57,8 +57,8 @@ struct shared_info {
 	 * (even though they're valid ones) because userspace doesn't have
 	 * access to those registers.
 	 */
-	unsigned long new_iamr;
-	unsigned long new_uamor;
+	unsigned long invalid_iamr;
+	unsigned long invalid_uamor;
 };
 
 static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
@@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
 	return syscall(__NR_pkey_alloc, flags, init_access_rights);
 }
 
-static int sys_pkey_free(int pkey)
-{
-	return syscall(__NR_pkey_free, pkey);
-}
-
 static int child(struct shared_info *info)
 {
 	unsigned long reg;
@@ -100,33 +95,37 @@ static int child(struct shared_info *info)
 
 	info->amr1 |= 3ul << pkeyshift(pkey1);
 	info->amr2 |= 3ul << pkeyshift(pkey2);
-	info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+	/*
+	 * invalid amr value where we try to force write
+	 * things which are deined by a uamor setting.
+	 */
+	info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
 
+	/*
+	 * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+	 */
 	if (disable_execute)
 		info->expected_iamr |= 1ul << pkeyshift(pkey1);
 	else
 		info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
 
-	info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
-
-	info->expected_uamor |= 3ul << pkeyshift(pkey1) |
-				3ul << pkeyshift(pkey2);
-	info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
-	info->new_uamor |= 3ul << pkeyshift(pkey1);
+	/*
+	 * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+	 */
+	info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+	info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
 
 	/*
-	 * We won't use pkey3. We just want a plausible but invalid key to test
-	 * whether ptrace will let us write to AMR bits we are not supposed to.
-	 *
-	 * This also tests whether the kernel restores the UAMOR permissions
-	 * after a key is freed.
+	 * Create an IAMR value different from expected value.
+	 * Kernel will reject an IAMR and UAMOR change.
 	 */
-	sys_pkey_free(pkey3);
+	info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+	info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
 
 	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
 	       user_write, info->amr1, pkey1, pkey2, pkey3);
 
-	mtspr(SPRN_AMR, info->amr1);
+	set_amr(info->amr1);
 
 	/* Wait for parent to read our AMR value and write a new one. */
 	ret = prod_parent(&info->child_sync);
@@ -196,9 +195,9 @@ static int parent(struct shared_info *info, pid_t pid)
 	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
 	PARENT_FAIL_IF(ret, &info->child_sync);
 
-	info->amr1 = info->amr2 = info->amr3 = regs[0];
-	info->expected_iamr = info->new_iamr = regs[1];
-	info->expected_uamor = info->new_uamor = regs[2];
+	info->amr1 = info->amr2 = regs[0];
+	info->expected_iamr = regs[1];
+	info->expected_uamor = regs[2];
 
 	/* Wake up child so that it can set itself up. */
 	ret = prod_child(&info->child_sync);
@@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid)
 		return ret;
 
 	/* Write invalid AMR value in child. */
-	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
 	PARENT_FAIL_IF(ret, &info->child_sync);
 
-	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
 
 	/* Wake up child so that it can verify it didn't change. */
 	ret = prod_child(&info->child_sync);
@@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid)
 
 	/* Try to write to IAMR. */
 	regs[0] = info->amr1;
-	regs[1] = info->new_iamr;
+	regs[1] = info->invalid_iamr;
 	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
 	PARENT_FAIL_IF(!ret, &info->child_sync);
 
@@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid)
 	       ptrace_write_running, regs[0], regs[1]);
 
 	/* Try to write to IAMR and UAMOR. */
-	regs[2] = info->new_uamor;
+	regs[2] = info->invalid_uamor;
 	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
 	PARENT_FAIL_IF(!ret, &info->child_sync);
 
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 58cb1a860cc9..4436ca9d3caf 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -78,6 +78,9 @@ int ptrace_tar(void)
 	pid_t pid;
 	int ret, status;
 
+	// TAR was added in v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index c4fe0e893306..cb9875f764ca 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,6 +61,8 @@ int ptrace_vsx(void)
 	pid_t pid;
 	int ret, status, i;
 
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 
 	for (i = 0; i < VEC_MAX; i++)
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 8c6b982af2a8..c8d82b784102 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -183,6 +183,16 @@ int spectre_v2_test(void)
 		if (miss_percent > 15) {
 			printf("Branch misses > 15%% unexpected in this configuration!\n");
 			printf("Possible mis-match between reported & actual mitigation\n");
+			/*
+			 * Such a mismatch may be caused by a guest system
+			 * reporting as vulnerable when the host is mitigated.
+			 * Return skip code to avoid detecting this as an error.
+			 * We are not vulnerable and reporting otherwise, so
+			 * missing such a mismatch is safe.
+			 */
+			if (state == VULNERABLE)
+				return 4;
+
 			return 1;
 		}
 		break;
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 7fc0623d85c3..9c39f55a58ff 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null
 
 TEST_GEN_PROGS := memcmp_64 strlen
 
-$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: memcmp.c ../utils.c
 $(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
 
 ifeq ($(build_32bit),1)
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index b1fa7546957f..979df3d98368 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,7 +2,9 @@
 #include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <time.h>
+#include <asm/cputable.h>
 #include "utils.h"
 
 #define SIZE 256
@@ -13,6 +15,9 @@
 #define LARGE_MAX_OFFSET 32
 #define LARGE_SIZE_START 4096
 
+/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */
+#define MAP_SIZE (64 * 1024)
+
 #define MAX_OFFSET_DIFF_S1_S2 48
 
 int vmx_count;
@@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset,
 
 static int testcase(bool islarge)
 {
-	char *s1;
-	char *s2;
-	unsigned long i;
-
-	unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
-	unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
-	int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
-
-	s1 = memalign(128, alloc_size);
-	if (!s1) {
-		perror("memalign");
-		exit(1);
-	}
+	unsigned long i, comp_size, alloc_size;
+	char *p, *s1, *s2;
+	int iterations;
 
-	s2 = memalign(128, alloc_size);
-	if (!s2) {
-		perror("memalign");
-		exit(1);
-	}
+	comp_size = (islarge ? LARGE_SIZE : SIZE);
+	alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+	iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+	p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	FAIL_IF(p == MAP_FAILED);
+
+	/* Put s1/s2 at the end of a page */
+	s1 = p + MAP_SIZE - alloc_size;
+	s2 = p + 3 * MAP_SIZE - alloc_size;
+
+	/* And unmap the subsequent page to force a fault if we overread */
+	munmap(p + MAP_SIZE, MAP_SIZE);
+	munmap(p + 3 * MAP_SIZE, MAP_SIZE);
 
 	srandom(time(0));
 
@@ -147,6 +152,11 @@ static int testcase(bool islarge)
 
 static int testcases(void)
 {
+#ifdef __powerpc64__
+	// vcmpequd used in memcmp_64.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+#endif
+
 	testcase(0);
 	testcase(1);
 	return 0;
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 5ee0e98c4896..18b6a773d5c7 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -16,6 +16,7 @@
 #include <string.h>
 #include <sys/ioctl.h>
 #include <sys/stat.h>
+#include <sys/sysinfo.h>
 #include <sys/types.h>
 #include <sys/utsname.h>
 #include <unistd.h>
@@ -88,28 +89,40 @@ void *get_auxv_entry(int type)
 
 int pick_online_cpu(void)
 {
-	cpu_set_t mask;
-	int cpu;
+	int ncpus, cpu = -1;
+	cpu_set_t *mask;
+	size_t size;
+
+	ncpus = get_nprocs_conf();
+	size = CPU_ALLOC_SIZE(ncpus);
+	mask = CPU_ALLOC(ncpus);
+	if (!mask) {
+		perror("malloc");
+		return -1;
+	}
 
-	CPU_ZERO(&mask);
+	CPU_ZERO_S(size, mask);
 
-	if (sched_getaffinity(0, sizeof(mask), &mask)) {
+	if (sched_getaffinity(0, size, mask)) {
 		perror("sched_getaffinity");
-		return -1;
+		goto done;
 	}
 
 	/* We prefer a primary thread, but skip 0 */
-	for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
-		if (CPU_ISSET(cpu, &mask))
-			return cpu;
+	for (cpu = 8; cpu < ncpus; cpu += 8)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
 
 	/* Search for anything, but in reverse */
-	for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
-		if (CPU_ISSET(cpu, &mask))
-			return cpu;
+	for (cpu = ncpus - 1; cpu >= 0; cpu--)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
 
 	printf("No cpus in affinity mask?!\n");
-	return -1;
+
+done:
+	CPU_FREE(mask);
+	return cpu;
 }
 
 bool is_ppc64le(void)
@@ -293,3 +306,31 @@ void set_dscr(unsigned long val)
 
 	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
 }
+
+int using_hash_mmu(bool *using_hash)
+{
+	char line[128];
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/cpuinfo", "r");
+	FAIL_IF(!f);
+
+	rc = 0;
+	while (fgets(line, sizeof(line), f) != NULL) {
+		if (strcmp(line, "MMU		: Hash\n") == 0) {
+			*using_hash = true;
+			goto out;
+		}
+
+		if (strcmp(line, "MMU		: Radix\n") == 0) {
+			*using_hash = false;
+			goto out;
+		}
+	}
+
+	rc = -1;
+out:
+	fclose(f);
+	return rc;
+}
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index e8a657a5f48a..384589095864 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: LGPL-2.1
 #define _GNU_SOURCE
 #include <assert.h>
+#include <linux/membarrier.h>
 #include <pthread.h>
 #include <sched.h>
+#include <stdatomic.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -1131,6 +1133,220 @@ static int set_signal_handler(void)
 	return ret;
 }
 
+struct test_membarrier_thread_args {
+	int stop;
+	intptr_t percpu_list_ptr;
+};
+
+/* Worker threads modify data in their "active" percpu lists. */
+void *test_membarrier_worker_thread(void *arg)
+{
+	struct test_membarrier_thread_args *args =
+		(struct test_membarrier_thread_args *)arg;
+	const int iters = opt_reps;
+	int i;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	/* Wait for initialization. */
+	while (!atomic_load(&args->percpu_list_ptr)) {}
+
+	for (i = 0; i < iters; ++i) {
+		int ret;
+
+		do {
+			int cpu = rseq_cpu_start();
+
+			ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
+				sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+		} while (rseq_unlikely(ret));
+	}
+
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	return NULL;
+}
+
+void test_membarrier_init_percpu_list(struct percpu_list *list)
+{
+	int i;
+
+	memset(list, 0, sizeof(*list));
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_list_node *node;
+
+		node = malloc(sizeof(*node));
+		assert(node);
+		node->data = 0;
+		node->next = NULL;
+		list->c[i].head = node;
+	}
+}
+
+void test_membarrier_free_percpu_list(struct percpu_list *list)
+{
+	int i;
+
+	for (i = 0; i < CPU_SETSIZE; i++)
+		free(list->c[i].head);
+}
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+	return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+/*
+ * The manager thread swaps per-cpu lists that worker threads see,
+ * and validates that there are no unexpected modifications.
+ */
+void *test_membarrier_manager_thread(void *arg)
+{
+	struct test_membarrier_thread_args *args =
+		(struct test_membarrier_thread_args *)arg;
+	struct percpu_list list_a, list_b;
+	intptr_t expect_a = 0, expect_b = 0;
+	int cpu_a = 0, cpu_b = 0;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	/* Init lists. */
+	test_membarrier_init_percpu_list(&list_a);
+	test_membarrier_init_percpu_list(&list_b);
+
+	atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+
+	while (!atomic_load(&args->stop)) {
+		/* list_a is "active". */
+		cpu_a = rand() % CPU_SETSIZE;
+		/*
+		 * As list_b is "inactive", we should never see changes
+		 * to list_b.
+		 */
+		if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+			fprintf(stderr, "Membarrier test failed\n");
+			abort();
+		}
+
+		/* Make list_b "active". */
+		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+					MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+				errno != ENXIO /* missing CPU */) {
+			perror("sys_membarrier");
+			abort();
+		}
+		/*
+		 * Cpu A should now only modify list_b, so the values
+		 * in list_a should be stable.
+		 */
+		expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+
+		cpu_b = rand() % CPU_SETSIZE;
+		/*
+		 * As list_a is "inactive", we should never see changes
+		 * to list_a.
+		 */
+		if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+			fprintf(stderr, "Membarrier test failed\n");
+			abort();
+		}
+
+		/* Make list_a "active". */
+		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+					MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+				errno != ENXIO /* missing CPU*/) {
+			perror("sys_membarrier");
+			abort();
+		}
+		/* Remember a value from list_b. */
+		expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+	}
+
+	test_membarrier_free_percpu_list(&list_a);
+	test_membarrier_free_percpu_list(&list_b);
+
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	return NULL;
+}
+
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+void test_membarrier(void)
+{
+	const int num_threads = opt_threads;
+	struct test_membarrier_thread_args thread_args;
+	pthread_t worker_threads[num_threads];
+	pthread_t manager_thread;
+	int i, ret;
+
+	if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
+		perror("sys_membarrier");
+		abort();
+	}
+
+	thread_args.stop = 0;
+	thread_args.percpu_list_ptr = 0;
+	ret = pthread_create(&manager_thread, NULL,
+			test_membarrier_manager_thread, &thread_args);
+	if (ret) {
+		errno = ret;
+		perror("pthread_create");
+		abort();
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&worker_threads[i], NULL,
+				test_membarrier_worker_thread, &thread_args);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(worker_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	atomic_store(&thread_args.stop, 1);
+	ret = pthread_join(manager_thread, NULL);
+	if (ret) {
+		errno = ret;
+		perror("pthread_join");
+		abort();
+	}
+}
+#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+void test_membarrier(void)
+{
+	fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+			"Skipping membarrier test.\n");
+}
+#endif
+
 static void show_usage(int argc, char **argv)
 {
 	printf("Usage : %s <OPTIONS>\n",
@@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv)
 	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
 	printf("	[-d] Disable rseq system call (no initialization)\n");
 	printf("	[-D M] Disable rseq for each M threads\n");
-	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
 	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
 	printf("	[-v] Verbose output.\n");
 	printf("	[-h] Show this help.\n");
@@ -1268,6 +1484,7 @@ int main(int argc, char **argv)
 			case 'i':
 			case 'b':
 			case 'm':
+			case 'r':
 				break;
 			default:
 				show_usage(argc, argv);
@@ -1320,6 +1537,10 @@ int main(int argc, char **argv)
 		printf_verbose("counter increment\n");
 		test_percpu_inc();
 		break;
+	case 'r':
+		printf_verbose("membarrier\n");
+		test_membarrier();
+		break;
 	}
 	if (!opt_disable_rseq && rseq_unregister_current_thread())
 		abort();
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index b2da6004fe30..640411518e46 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -279,6 +279,63 @@ error1:
 #endif
 }
 
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ *   pval = *(ptr+off)
+ *  *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+#endif
+		/* get p+v */
+		"movq %[ptr], %%rbx\n\t"
+		"addq %[off], %%rbx\n\t"
+		/* get pv */
+		"movq (%%rbx), %%rcx\n\t"
+		/* *pv += inc */
+		"addq %[inc], (%%rcx)\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [rseq_abi]		"r" (&__rseq_abi),
+		  /* final store input */
+		  [ptr]			"m" (*ptr),
+		  [off]			"er" (off),
+		  [inc]			"er" (inc)
+		: "memory", "cc", "rax", "rbx", "rcx"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
 static inline __attribute__((always_inline))
 int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 				 intptr_t *v2, intptr_t newv2,
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
index e426304fd4a0..f51bc83c9e41 100755
--- a/tools/testing/selftests/rseq/run_param_test.sh
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -15,6 +15,7 @@ TEST_LIST=(
 	"-T m"
 	"-T m -M"
 	"-T i"
+	"-T r"
 )
 
 TEST_NAME=(
@@ -25,6 +26,7 @@ TEST_NAME=(
 	"memcpy"
 	"memcpy with barrier"
 	"increment"
+	"membarrier"
 )
 IFS="$OLDIFS"
 
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 535720b2592a..4a180439ee9e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -133,6 +133,8 @@ struct seccomp_data {
 #  define __NR_seccomp 348
 # elif defined(__xtensa__)
 #  define __NR_seccomp 337
+# elif defined(__sh__)
+#  define __NR_seccomp 372
 # else
 #  warning "seccomp syscall number unknown for this architecture"
 #  define __NR_seccomp 0xffff
@@ -772,8 +774,15 @@ void *kill_thread(void *data)
 	return (void *)SIBLING_EXIT_UNKILLED;
 }
 
+enum kill_t {
+	KILL_THREAD,
+	KILL_PROCESS,
+	RET_UNKNOWN
+};
+
 /* Prepare a thread that will kill itself or both of us. */
-void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+void kill_thread_or_group(struct __test_metadata *_metadata,
+			  enum kill_t kill_how)
 {
 	pthread_t thread;
 	void *status;
@@ -789,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
 		.filter = filter_thread,
 	};
+	int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
 	struct sock_filter filter_process[] = {
 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+		BPF_STMT(BPF_RET|BPF_K, kill),
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
 	struct sock_fprog prog_process = {
@@ -806,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 	}
 
 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
-			     kill_process ? &prog_process : &prog_thread));
+			     kill_how == KILL_THREAD ? &prog_thread
+						     : &prog_process));
 
 	/*
 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
 	 * flag cannot be downgraded by a new filter.
 	 */
-	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+	if (kill_how == KILL_PROCESS)
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
 
 	/* Start a thread that will exit immediately. */
 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
@@ -840,7 +852,7 @@ TEST(KILL_thread)
 	child_pid = fork();
 	ASSERT_LE(0, child_pid);
 	if (child_pid == 0) {
-		kill_thread_or_group(_metadata, false);
+		kill_thread_or_group(_metadata, KILL_THREAD);
 		_exit(38);
 	}
 
@@ -859,7 +871,7 @@ TEST(KILL_process)
 	child_pid = fork();
 	ASSERT_LE(0, child_pid);
 	if (child_pid == 0) {
-		kill_thread_or_group(_metadata, true);
+		kill_thread_or_group(_metadata, KILL_PROCESS);
 		_exit(38);
 	}
 
@@ -870,6 +882,27 @@ TEST(KILL_process)
 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
 }
 
+TEST(KILL_unknown)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, RET_UNKNOWN);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If the entire process was killed, we'll see SIGSYS. */
+	EXPECT_TRUE(WIFSIGNALED(status)) {
+		TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
+	}
+	ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
 TEST(arg_out_of_range)
 {
@@ -1665,66 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 }
 
 #if defined(__x86_64__)
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	orig_rax
-# define SYSCALL_RET	rax
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).orig_rax
+# define SYSCALL_RET(_regs)	(_regs).rax
 #elif defined(__i386__)
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	orig_eax
-# define SYSCALL_RET	eax
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).orig_eax
+# define SYSCALL_RET(_regs)	(_regs).eax
 #elif defined(__arm__)
-# define ARCH_REGS	struct pt_regs
-# define SYSCALL_NUM	ARM_r7
-# define SYSCALL_RET	ARM_r0
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).ARM_r7
+# ifndef PTRACE_SET_SYSCALL
+#  define PTRACE_SET_SYSCALL   23
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr)	\
+		EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
+# define SYSCALL_RET(_regs)	(_regs).ARM_r0
 #elif defined(__aarch64__)
-# define ARCH_REGS	struct user_pt_regs
-# define SYSCALL_NUM	regs[8]
-# define SYSCALL_RET	regs[0]
+# define ARCH_REGS		struct user_pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).regs[8]
+# ifndef NT_ARM_SYSTEM_CALL
+#  define NT_ARM_SYSTEM_CALL 0x404
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr)				\
+	do {							\
+		struct iovec __v;				\
+		typeof(_nr) __nr = (_nr);			\
+		__v.iov_base = &__nr;				\
+		__v.iov_len = sizeof(__nr);			\
+		EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee,	\
+				    NT_ARM_SYSTEM_CALL, &__v));	\
+	} while (0)
+# define SYSCALL_RET(_regs)	(_regs).regs[0]
 #elif defined(__riscv) && __riscv_xlen == 64
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	a7
-# define SYSCALL_RET	a0
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).a7
+# define SYSCALL_RET(_regs)	(_regs).a0
 #elif defined(__csky__)
-# define ARCH_REGS	struct pt_regs
-#if defined(__CSKYABIV2__)
-# define SYSCALL_NUM	regs[3]
-#else
-# define SYSCALL_NUM	regs[9]
-#endif
-# define SYSCALL_RET	a0
+# define ARCH_REGS		struct pt_regs
+#  if defined(__CSKYABIV2__)
+#   define SYSCALL_NUM(_regs)	(_regs).regs[3]
+#  else
+#   define SYSCALL_NUM(_regs)	(_regs).regs[9]
+#  endif
+# define SYSCALL_RET(_regs)	(_regs).a0
 #elif defined(__hppa__)
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	gr[20]
-# define SYSCALL_RET	gr[28]
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).gr[20]
+# define SYSCALL_RET(_regs)	(_regs).gr[28]
 #elif defined(__powerpc__)
-# define ARCH_REGS	struct pt_regs
-# define SYSCALL_NUM	gpr[0]
-# define SYSCALL_RET	gpr[3]
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).gpr[0]
+# define SYSCALL_RET(_regs)	(_regs).gpr[3]
+# define SYSCALL_RET_SET(_regs, _val)				\
+	do {							\
+		typeof(_val) _result = (_val);			\
+		/*						\
+		 * A syscall error is signaled by CR0 SO bit	\
+		 * and the code is stored as a positive value.	\
+		 */						\
+		if (_result < 0) {				\
+			SYSCALL_RET(_regs) = -result;		\
+			(_regs).ccr |= 0x10000000;		\
+		} else {					\
+			SYSCALL_RET(_regs) = result;		\
+			(_regs).ccr &= ~0x10000000;		\
+		}						\
+	} while (0)
+# define SYSCALL_RET_SET_ON_PTRACE_EXIT
 #elif defined(__s390__)
-# define ARCH_REGS     s390_regs
-# define SYSCALL_NUM   gprs[2]
-# define SYSCALL_RET   gprs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# define ARCH_REGS		s390_regs
+# define SYSCALL_NUM(_regs)	(_regs).gprs[2]
+# define SYSCALL_RET_SET(_regs, _val)			\
+		TH_LOG("Can't modify syscall return on this architecture")
 #elif defined(__mips__)
-# define ARCH_REGS	struct pt_regs
-# define SYSCALL_NUM	regs[2]
-# define SYSCALL_SYSCALL_NUM regs[4]
-# define SYSCALL_RET	regs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# include <asm/unistd_nr_n32.h>
+# include <asm/unistd_nr_n64.h>
+# include <asm/unistd_nr_o32.h>
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)				\
+	({						\
+		typeof((_regs).regs[2]) _nr;		\
+		if ((_regs).regs[2] == __NR_O32_Linux)	\
+			_nr = (_regs).regs[4];		\
+		else					\
+			_nr = (_regs).regs[2];		\
+		_nr;					\
+	})
+# define SYSCALL_NUM_SET(_regs, _nr)			\
+	do {						\
+		if ((_regs).regs[2] == __NR_O32_Linux)	\
+			(_regs).regs[4] = _nr;		\
+		else					\
+			(_regs).regs[2] = _nr;		\
+	} while (0)
+# define SYSCALL_RET_SET(_regs, _val)			\
+		TH_LOG("Can't modify syscall return on this architecture")
 #elif defined(__xtensa__)
-# define ARCH_REGS	struct user_pt_regs
-# define SYSCALL_NUM	syscall
+# define ARCH_REGS		struct user_pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).syscall
 /*
  * On xtensa syscall return value is in the register
  * a2 of the current window which is not fixed.
  */
-#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
+#define SYSCALL_RET(_regs)	(_regs).a[(_regs).windowbase * 4 + 2]
+#elif defined(__sh__)
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).gpr[3]
+# define SYSCALL_RET(_regs)	(_regs).gpr[0]
 #else
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
 
+/*
+ * Most architectures can change the syscall by just updating the
+ * associated register. This is the default if not defined above.
+ */
+#ifndef SYSCALL_NUM_SET
+# define SYSCALL_NUM_SET(_regs, _nr)		\
+	do {					\
+		SYSCALL_NUM(_regs) = (_nr);	\
+	} while (0)
+#endif
+/*
+ * Most architectures can change the syscall return value by just
+ * writing to the SYSCALL_RET register. This is the default if not
+ * defined above. If an architecture cannot set the return value
+ * (for example when the syscall and return value register is
+ * shared), report it with TH_LOG() in an arch-specific definition
+ * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
+ */
+#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
+# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
+#endif
+#ifndef SYSCALL_RET_SET
+# define SYSCALL_RET_SET(_regs, _val)		\
+	do {					\
+		SYSCALL_RET(_regs) = (_val);	\
+	} while (0)
+#endif
+
 /* When the syscall return can't be changed, stub out the tests for it. */
-#ifdef SYSCALL_NUM_RET_SHARE_REG
+#ifndef SYSCALL_RET
 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
 #else
 # define EXPECT_SYSCALL_RETURN(val, action)		\
@@ -1739,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 	} while (0)
 #endif
 
-/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+/*
+ * Some architectures (e.g. powerpc) can only set syscall
+ * return values on syscall exit during ptrace.
+ */
+const bool ptrace_entry_set_syscall_nr = true;
+const bool ptrace_entry_set_syscall_ret =
+#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
+	true;
+#else
+	false;
+#endif
+
+/*
+ * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  */
 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
-#define HAVE_GETREGS
+# define ARCH_GETREGS(_regs)	ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
+# define ARCH_SETREGS(_regs)	ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
+#else
+# define ARCH_GETREGS(_regs)	({					\
+		struct iovec __v;					\
+		__v.iov_base = &(_regs);				\
+		__v.iov_len = sizeof(_regs);				\
+		ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v);	\
+	})
+# define ARCH_SETREGS(_regs)	({					\
+		struct iovec __v;					\
+		__v.iov_base = &(_regs);				\
+		__v.iov_len = sizeof(_regs);				\
+		ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v);	\
+	})
 #endif
 
 /* Architecture-specific syscall fetching routine. */
 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
 {
 	ARCH_REGS regs;
-#ifdef HAVE_GETREGS
-	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
-		TH_LOG("PTRACE_GETREGS failed");
-		return -1;
-	}
-#else
-	struct iovec iov;
 
-	iov.iov_base = &regs;
-	iov.iov_len = sizeof(regs);
-	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
-		TH_LOG("PTRACE_GETREGSET failed");
+	EXPECT_EQ(0, ARCH_GETREGS(regs)) {
 		return -1;
 	}
-#endif
 
-#if defined(__mips__)
-	if (regs.SYSCALL_NUM == __NR_O32_Linux)
-		return regs.SYSCALL_SYSCALL_NUM;
-#endif
-	return regs.SYSCALL_NUM;
+	return SYSCALL_NUM(regs);
 }
 
 /* Architecture-specific syscall changing routine. */
-void change_syscall(struct __test_metadata *_metadata,
-		    pid_t tracee, int syscall, int result)
+void __change_syscall(struct __test_metadata *_metadata,
+		    pid_t tracee, long *syscall, long *ret)
 {
-	int ret;
-	ARCH_REGS regs;
-#ifdef HAVE_GETREGS
-	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
-#else
-	struct iovec iov;
-	iov.iov_base = &regs;
-	iov.iov_len = sizeof(regs);
-	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
-	EXPECT_EQ(0, ret) {}
+	ARCH_REGS orig, regs;
 
-#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
-	defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
-	defined(__xtensa__) || defined(__csky__)
-	{
-		regs.SYSCALL_NUM = syscall;
-	}
-#elif defined(__mips__)
-	{
-		if (regs.SYSCALL_NUM == __NR_O32_Linux)
-			regs.SYSCALL_SYSCALL_NUM = syscall;
-		else
-			regs.SYSCALL_NUM = syscall;
-	}
+	/* Do not get/set registers if we have nothing to do. */
+	if (!syscall && !ret)
+		return;
 
-#elif defined(__arm__)
-# ifndef PTRACE_SET_SYSCALL
-#  define PTRACE_SET_SYSCALL   23
-# endif
-	{
-		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
-		EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, ARCH_GETREGS(regs)) {
+		return;
 	}
+	orig = regs;
 
-#elif defined(__aarch64__)
-# ifndef NT_ARM_SYSTEM_CALL
-#  define NT_ARM_SYSTEM_CALL 0x404
-# endif
-	{
-		iov.iov_base = &syscall;
-		iov.iov_len = sizeof(syscall);
-		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
-			     &iov);
-		EXPECT_EQ(0, ret);
-	}
+	if (syscall)
+		SYSCALL_NUM_SET(regs, *syscall);
 
-#else
-	ASSERT_EQ(1, 0) {
-		TH_LOG("How is the syscall changed on this architecture?");
-	}
-#endif
+	if (ret)
+		SYSCALL_RET_SET(regs, *ret);
 
-	/* If syscall is skipped, change return value. */
-	if (syscall == -1)
-#ifdef SYSCALL_NUM_RET_SHARE_REG
-		TH_LOG("Can't modify syscall return on this architecture");
+	/* Flush any register changes made. */
+	if (memcmp(&orig, &regs, sizeof(orig)) != 0)
+		EXPECT_EQ(0, ARCH_SETREGS(regs));
+}
 
-#elif defined(__xtensa__)
-		regs.SYSCALL_RET(regs) = result;
-#else
-		regs.SYSCALL_RET = result;
-#endif
+/* Change only syscall number. */
+void change_syscall_nr(struct __test_metadata *_metadata,
+		       pid_t tracee, long syscall)
+{
+	__change_syscall(_metadata, tracee, &syscall, NULL);
+}
 
-#ifdef HAVE_GETREGS
-	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
-#else
-	iov.iov_base = &regs;
-	iov.iov_len = sizeof(regs);
-	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
-	EXPECT_EQ(0, ret);
+/* Change syscall return value (and set syscall number to -1). */
+void change_syscall_ret(struct __test_metadata *_metadata,
+			pid_t tracee, long ret)
+{
+	long syscall = -1;
+
+	__change_syscall(_metadata, tracee, &syscall, &ret);
 }
 
 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
@@ -1866,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
 	case 0x1002:
 		/* change getpid to getppid. */
 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, __NR_getppid, 0);
+		change_syscall_nr(_metadata, tracee, __NR_getppid);
 		break;
 	case 0x1003:
 		/* skip gettid with valid return code. */
 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, -1, 45000);
+		change_syscall_ret(_metadata, tracee, 45000);
 		break;
 	case 0x1004:
 		/* skip openat with error. */
 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, -1, -ESRCH);
+		change_syscall_ret(_metadata, tracee, -ESRCH);
 		break;
 	case 0x1005:
 		/* do nothing (allow getppid) */
@@ -1891,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
 
 }
 
+FIXTURE(TRACE_syscall) {
+	struct sock_fprog prog;
+	pid_t tracer, mytid, mypid, parent;
+	long syscall_nr;
+};
+
 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 		   int status, void *args)
 {
-	int ret, nr;
+	int ret;
 	unsigned long msg;
 	static bool entry;
+	long syscall_nr_val, syscall_ret_val;
+	long *syscall_nr = NULL, *syscall_ret = NULL;
+	FIXTURE_DATA(TRACE_syscall) *self = args;
 
 	/*
 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
@@ -1910,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 	EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
 			: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
 
-	if (!entry)
-		return;
+	/*
+	 * Some architectures only support setting return values during
+	 * syscall exit under ptrace, and on exit the syscall number may
+	 * no longer be available. Therefore, save the initial sycall
+	 * number here, so it can be examined during both entry and exit
+	 * phases.
+	 */
+	if (entry)
+		self->syscall_nr = get_syscall(_metadata, tracee);
 
-	nr = get_syscall(_metadata, tracee);
+	/*
+	 * Depending on the architecture's syscall setting abilities, we
+	 * pick which things to set during this phase (entry or exit).
+	 */
+	if (entry == ptrace_entry_set_syscall_nr)
+		syscall_nr = &syscall_nr_val;
+	if (entry == ptrace_entry_set_syscall_ret)
+		syscall_ret = &syscall_ret_val;
+
+	/* Now handle the actual rewriting cases. */
+	switch (self->syscall_nr) {
+	case __NR_getpid:
+		syscall_nr_val = __NR_getppid;
+		/* Never change syscall return for this case. */
+		syscall_ret = NULL;
+		break;
+	case __NR_gettid:
+		syscall_nr_val = -1;
+		syscall_ret_val = 45000;
+		break;
+	case __NR_openat:
+		syscall_nr_val = -1;
+		syscall_ret_val = -ESRCH;
+		break;
+	default:
+		/* Unhandled, do nothing. */
+		return;
+	}
 
-	if (nr == __NR_getpid)
-		change_syscall(_metadata, tracee, __NR_getppid, 0);
-	if (nr == __NR_gettid)
-		change_syscall(_metadata, tracee, -1, 45000);
-	if (nr == __NR_openat)
-		change_syscall(_metadata, tracee, -1, -ESRCH);
+	__change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
 }
 
-FIXTURE(TRACE_syscall) {
-	struct sock_fprog prog;
-	pid_t tracer, mytid, mypid, parent;
-};
-
 FIXTURE_VARIANT(TRACE_syscall) {
 	/*
 	 * All of the SECCOMP_RET_TRACE behaviors can be tested with either
@@ -1986,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall)
 	self->tracer = setup_trace_fixture(_metadata,
 					   variant->use_ptrace ? tracer_ptrace
 							       : tracer_seccomp,
-					   NULL, variant->use_ptrace);
+					   self, variant->use_ptrace);
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
@@ -3136,11 +3260,11 @@ skip:
 static int user_notif_syscall(int nr, unsigned int flags)
 {
 	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
 
 	struct sock_fprog prog = {
@@ -3693,7 +3817,7 @@ TEST(user_notification_filter_empty)
 	long ret;
 	int status;
 	struct pollfd pollfd;
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_FILES,
 		.exit_signal = SIGCHLD,
 	};
@@ -3709,7 +3833,7 @@ TEST(user_notification_filter_empty)
 	if (pid == 0) {
 		int listener;
 
-		listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+		listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
 		if (listener < 0)
 			_exit(EXIT_FAILURE);
 
@@ -3747,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded)
 	long ret;
 	int status;
 	struct pollfd pollfd;
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_FILES,
 		.exit_signal = SIGCHLD,
 	};
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
index d5a2da428752..be8266f5d04c 100644
--- a/tools/testing/selftests/splice/.gitignore
+++ b/tools/testing/selftests/splice/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 default_file_splice_read
+splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
index e519b159b60d..541cd826d5a5 100644
--- a/tools/testing/selftests/splice/Makefile
+++ b/tools/testing/selftests/splice/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := default_file_splice_read.sh
-TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+TEST_PROGS := default_file_splice_read.sh short_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read splice_read
 
 include ../lib.mk
diff --git a/tools/testing/selftests/splice/config b/tools/testing/selftests/splice/config
new file mode 100644
index 000000000000..058c928368b8
--- /dev/null
+++ b/tools/testing/selftests/splice/config
@@ -0,0 +1 @@
+CONFIG_TEST_LKM=m
diff --git a/tools/testing/selftests/splice/settings b/tools/testing/selftests/splice/settings
new file mode 100644
index 000000000000..89cedfc0d12b
--- /dev/null
+++ b/tools/testing/selftests/splice/settings
@@ -0,0 +1 @@
+timeout=5
diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh
new file mode 100755
index 000000000000..7810d3589d9a
--- /dev/null
+++ b/tools/testing/selftests/splice/short_splice_read.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+set -e
+
+ret=0
+
+do_splice()
+{
+	filename="$1"
+	bytes="$2"
+	expected="$3"
+
+	out=$(./splice_read "$filename" "$bytes" | cat)
+	if [ "$out" = "$expected" ] ; then
+		echo "ok: $filename $bytes"
+	else
+		echo "FAIL: $filename $bytes"
+		ret=1
+	fi
+}
+
+test_splice()
+{
+	filename="$1"
+
+	full=$(cat "$filename")
+	two=$(echo "$full" | grep -m1 . | cut -c-2)
+
+	# Make sure full splice has the same contents as a standard read.
+	do_splice "$filename" 4096 "$full"
+
+	# Make sure a partial splice see the first two characters.
+	do_splice "$filename" 2 "$two"
+}
+
+# proc_single_open(), seq_read()
+test_splice /proc/$$/limits
+# special open, seq_read()
+test_splice /proc/$$/comm
+
+# proc_handler, proc_dointvec_minmax
+test_splice /proc/sys/fs/nr_open
+# proc_handler, proc_dostring
+test_splice /proc/sys/kernel/modprobe
+# proc_handler, special read
+test_splice /proc/sys/kernel/version
+
+if ! [ -d /sys/module/test_module/sections ] ; then
+	modprobe test_module
+fi
+# kernfs, attr
+test_splice /sys/module/test_module/coresize
+# kernfs, binattr
+test_splice /sys/module/test_module/sections/.init.text
+
+exit $ret
diff --git a/tools/testing/selftests/splice/splice_read.c b/tools/testing/selftests/splice/splice_read.c
new file mode 100644
index 000000000000..46dae6a25cfb
--- /dev/null
+++ b/tools/testing/selftests/splice/splice_read.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(int argc, char *argv[])
+{
+	int fd;
+	size_t size;
+	ssize_t spliced;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s INPUT [BYTES]\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0) {
+		perror(argv[1]);
+		return EXIT_FAILURE;
+	}
+
+	if (argc == 3)
+		size = atol(argv[2]);
+	else {
+		struct stat statbuf;
+
+		if (fstat(fd, &statbuf) < 0) {
+			perror(argv[1]);
+			return EXIT_FAILURE;
+		}
+
+		if (statbuf.st_size > INT_MAX) {
+			fprintf(stderr, "%s: Too big\n", argv[1]);
+			return EXIT_FAILURE;
+		}
+
+		size = statbuf.st_size;
+	}
+
+	/* splice(2) file to stdout. */
+	spliced = splice(fd, NULL, STDOUT_FILENO, NULL,
+		      size, SPLICE_F_MOVE);
+	if (spliced < 0) {
+		perror("splice");
+		return EXIT_FAILURE;
+	}
+
+	close(fd);
+	return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 7656c7ce79d9..0e73a16874c4 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 
 TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
 
+TEST_FILES := settings
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/timers/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a9026706d597..30873b19d04b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -3,6 +3,23 @@
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
 
+# Without this, failed build products remain, with up-to-date timestamps,
+# thus tricking Make (and you!) into believing that All Is Well, in subsequent
+# make invocations:
+.DELETE_ON_ERROR:
+
+# Avoid accidental wrong builds, due to built-in rules working just a little
+# bit too well--but not quite as well as required for our situation here.
+#
+# In other words, "make userfaultfd" is supposed to fail to build at all,
+# because this Makefile only supports either "make" (all), or "make /full/path".
+# However,  the built-in rules, if not suppressed, will pick up CFLAGS and the
+# initial LDLIBS (but not the target-specific LDLIBS, because those are only
+# set for the full path target!). This causes it to get pretty far into building
+# things despite using incorrect values such as an *occasionally* incomplete
+# LDLIBS.
+MAKEFLAGS += --no-builtin-rules
+
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index bcec71250873..9b420140ba2b 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -18,7 +18,8 @@
 
 #include "../kselftest.h"
 
-#define MAP_SIZE 1048576
+#define MAP_SIZE_MB	100
+#define MAP_SIZE	(MAP_SIZE_MB * 1024 * 1024)
 
 struct map_list {
 	void *map;
@@ -165,7 +166,7 @@ int main(int argc, char **argv)
 	void *map = NULL;
 	unsigned long mem_free = 0;
 	unsigned long hugepage_size = 0;
-	unsigned long mem_fragmentable = 0;
+	long mem_fragmentable_MB = 0;
 
 	if (prereq() != 0) {
 		printf("Either the sysctl compact_unevictable_allowed is not\n"
@@ -190,9 +191,9 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
-	mem_fragmentable = mem_free * 0.8 / 1024;
+	mem_fragmentable_MB = mem_free * 0.8 / 1024;
 
-	while (mem_fragmentable > 0) {
+	while (mem_fragmentable_MB > 0) {
 		map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
 			   MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
 		if (map == MAP_FAILED)
@@ -213,7 +214,7 @@ int main(int argc, char **argv)
 		for (i = 0; i < MAP_SIZE; i += page_size)
 			*(unsigned long *)(map + i) = (unsigned long)map + i;
 
-		mem_fragmentable--;
+		mem_fragmentable_MB -= MAP_SIZE_MB;
 	}
 
 	for (entry = list; entry != NULL; entry = entry->next) {
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 43b4dfe161a2..1d4359341e44 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,12 +15,12 @@
 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
 
 #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
-#define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
-#define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
+#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_benchmark)
 
 /* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
-#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
+#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_benchmark)
+#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_benchmark)
+#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_benchmark)
 
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
@@ -52,6 +52,9 @@ int main(int argc, char **argv)
 		case 'b':
 			cmd = PIN_BENCHMARK;
 			break;
+		case 'L':
+			cmd = PIN_LONGTERM_BENCHMARK;
+			break;
 		case 'm':
 			size = atoi(optarg) * MB;
 			break;
@@ -67,9 +70,6 @@ int main(int argc, char **argv)
 		case 'T':
 			thp = 0;
 			break;
-		case 'L':
-			cmd = GUP_LONGTERM_BENCHMARK;
-			break;
 		case 'U':
 			cmd = GUP_BENCHMARK;
 			break;
@@ -105,12 +105,16 @@ int main(int argc, char **argv)
 		gup.flags |= FOLL_WRITE;
 
 	fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
-	if (fd == -1)
-		perror("open"), exit(1);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
 
 	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
-	if (p == MAP_FAILED)
-		perror("mmap"), exit(1);
+	if (p == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
 	gup.addr = (unsigned long)p;
 
 	if (thp == 1)
@@ -123,8 +127,10 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < repeats; i++) {
 		gup.size = size;
-		if (ioctl(fd, cmd, &gup))
-			perror("ioctl"), exit(1);
+		if (ioctl(fd, cmd, &gup)) {
+			perror("ioctl");
+			exit(1);
+		}
 
 		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
 			gup.put_delta_usec);
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 91d38a29956b..0a28a6a29581 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -680,7 +680,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
 
 	n = gethugepagesizes(pagesizes, 4);
 	if (n <= 0)
-		return;
+		SKIP(return, "Huge page size could not be determined");
 	for (idx = 0; --n > 0; ) {
 		if (pagesizes[n] < pagesizes[idx])
 			idx = n;
@@ -694,7 +694,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
 	buffer->ptr = get_hugepage_region(size, GHR_STRICT);
 	if (buffer->ptr == NULL) {
 		free(buffer);
-		return;
+		SKIP(return, "Huge page could not be allocated");
 	}
 
 	buffer->fd = -1;
@@ -942,6 +942,41 @@ TEST_F(hmm, migrate_fault)
 }
 
 /*
+ * Migrate anonymous shared memory to device private memory.
+ */
+TEST_F(hmm, migrate_shared)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Migrate memory to device. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+	ASSERT_EQ(ret, -ENOENT);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
  * Try to migrate various memory types to device private memory.
  */
 TEST_F(hmm2, migrate_mixed)
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index 6af951900aa3..312889edb84a 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
 	}
 
 	if (shift)
-		printf("%u kB hugepages\n", 1 << shift);
+		printf("%u kB hugepages\n", 1 << (shift - 10));
 	else
 		printf("Default size hugepages\n");
 	printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 61e5cfeb1350..9b0912a01777 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area)
 				  huge_fd, *alloc_area == area_src ? 0 :
 				  nr_pages * page_size);
 		if (area_alias == MAP_FAILED) {
-			if (munmap(*alloc_area, nr_pages * page_size) < 0)
-				perror("hugetlb munmap"), exit(1);
+			if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+				perror("hugetlb munmap");
+				exit(1);
+			}
 			*alloc_area = NULL;
 			return;
 		}
@@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
 	/* Undo write-protect, do wakeup after that */
 	prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
 
-	if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
-		fprintf(stderr, "clear WP failed for address 0x%Lx\n",
-			start), exit(1);
+	if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
+		fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
+		exit(1);
+	}
 }
 
 static void *locking_thread(void *arg)
@@ -359,8 +362,10 @@ static void *locking_thread(void *arg)
 			seed += cpu;
 		bzero(&rand, sizeof(rand));
 		bzero(&randstate, sizeof(randstate));
-		if (initstate_r(seed, randstate, sizeof(randstate), &rand))
-			fprintf(stderr, "srandom_r error\n"), exit(1);
+		if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
+			fprintf(stderr, "srandom_r error\n");
+			exit(1);
+		}
 	} else {
 		page_nr = -bounces;
 		if (!(bounces & BOUNCE_RACINGFAULTS))
@@ -369,12 +374,16 @@ static void *locking_thread(void *arg)
 
 	while (!finished) {
 		if (bounces & BOUNCE_RANDOM) {
-			if (random_r(&rand, &rand_nr))
-				fprintf(stderr, "random_r 1 error\n"), exit(1);
+			if (random_r(&rand, &rand_nr)) {
+				fprintf(stderr, "random_r 1 error\n");
+				exit(1);
+			}
 			page_nr = rand_nr;
 			if (sizeof(page_nr) > sizeof(rand_nr)) {
-				if (random_r(&rand, &rand_nr))
-					fprintf(stderr, "random_r 2 error\n"), exit(1);
+				if (random_r(&rand, &rand_nr)) {
+					fprintf(stderr, "random_r 2 error\n");
+					exit(1);
+				}
 				page_nr |= (((unsigned long) rand_nr) << 16) <<
 					   16;
 			}
@@ -385,11 +394,13 @@ static void *locking_thread(void *arg)
 		start = time(NULL);
 		if (bounces & BOUNCE_VERIFY) {
 			count = *area_count(area_dst, page_nr);
-			if (!count)
+			if (!count) {
 				fprintf(stderr,
 					"page_nr %lu wrong count %Lu %Lu\n",
 					page_nr, count,
-					count_verify[page_nr]), exit(1);
+					count_verify[page_nr]);
+				exit(1);
+			}
 
 
 			/*
@@ -401,11 +412,12 @@ static void *locking_thread(void *arg)
 			 */
 #if 1
 			if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
-				     page_size))
+				     page_size)) {
 				fprintf(stderr,
 					"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
-					page_nr, count,
-					count_verify[page_nr]), exit(1);
+					page_nr, count, count_verify[page_nr]);
+				exit(1);
+			}
 #else
 			unsigned long loops;
 
@@ -437,7 +449,7 @@ static void *locking_thread(void *arg)
 			fprintf(stderr,
 				"page_nr %lu memory corruption %Lu %Lu\n",
 				page_nr, count,
-				count_verify[page_nr]), exit(1);
+				count_verify[page_nr]); exit(1);
 		}
 		count++;
 		*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
@@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
 				     offset);
 	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
-		if (uffdio_copy->copy != -EEXIST)
+		if (uffdio_copy->copy != -EEXIST) {
 			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
-				uffdio_copy->copy), exit(1);
+				uffdio_copy->copy);
+			exit(1);
+		}
 	} else {
 		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
-			uffdio_copy->copy), exit(1);
+			uffdio_copy->copy); exit(1);
 	}
 }
 
@@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
 {
 	struct uffdio_copy uffdio_copy;
 
-	if (offset >= nr_pages * page_size)
-		fprintf(stderr, "unexpected offset %lu\n",
-			offset), exit(1);
+	if (offset >= nr_pages * page_size) {
+		fprintf(stderr, "unexpected offset %lu\n", offset);
+		exit(1);
+	}
 	uffdio_copy.dst = (unsigned long) area_dst + offset;
 	uffdio_copy.src = (unsigned long) area_src + offset;
 	uffdio_copy.len = page_size;
@@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
 	uffdio_copy.copy = 0;
 	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
-		if (uffdio_copy.copy != -EEXIST)
+		if (uffdio_copy.copy != -EEXIST) {
 			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
-				uffdio_copy.copy), exit(1);
+				uffdio_copy.copy);
+			exit(1);
+		}
 	} else if (uffdio_copy.copy != page_size) {
 		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
-			uffdio_copy.copy), exit(1);
+			uffdio_copy.copy); exit(1);
 	} else {
 		if (test_uffdio_copy_eexist && retry) {
 			test_uffdio_copy_eexist = false;
@@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
 		if (ret < 0) {
 			if (errno == EAGAIN)
 				return 1;
-			else
-				perror("blocking read error"), exit(1);
+			perror("blocking read error");
 		} else {
-			fprintf(stderr, "short read\n"), exit(1);
+			fprintf(stderr, "short read\n");
 		}
+		exit(1);
 	}
 
 	return 0;
@@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
 {
 	unsigned long offset;
 
-	if (msg->event != UFFD_EVENT_PAGEFAULT)
-		fprintf(stderr, "unexpected msg event %u\n",
-			msg->event), exit(1);
+	if (msg->event != UFFD_EVENT_PAGEFAULT) {
+		fprintf(stderr, "unexpected msg event %u\n", msg->event);
+		exit(1);
+	}
 
 	if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
 		wp_range(uffd, msg->arg.pagefault.address, page_size, false);
@@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
 	} else {
 		/* Missing page faults */
 		if (bounces & BOUNCE_VERIFY &&
-		    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
-			fprintf(stderr, "unexpected write fault\n"), exit(1);
+		    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
+			fprintf(stderr, "unexpected write fault\n");
+			exit(1);
+		}
 
 		offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
 		offset &= ~(page_size-1);
@@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg)
 
 	for (;;) {
 		ret = poll(pollfd, 2, -1);
-		if (!ret)
-			fprintf(stderr, "poll error %d\n", ret), exit(1);
-		if (ret < 0)
-			perror("poll"), exit(1);
+		if (!ret) {
+			fprintf(stderr, "poll error %d\n", ret);
+			exit(1);
+		}
+		if (ret < 0) {
+			perror("poll");
+			exit(1);
+		}
 		if (pollfd[1].revents & POLLIN) {
-			if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
-				fprintf(stderr, "read pipefd error\n"),
-					exit(1);
+			if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
+				fprintf(stderr, "read pipefd error\n");
+				exit(1);
+			}
 			break;
 		}
-		if (!(pollfd[0].revents & POLLIN))
+		if (!(pollfd[0].revents & POLLIN)) {
 			fprintf(stderr, "pollfd[0].revents %d\n",
-				pollfd[0].revents), exit(1);
+				pollfd[0].revents);
+			exit(1);
+		}
 		if (uffd_read_msg(uffd, &msg))
 			continue;
 		switch (msg.event) {
 		default:
 			fprintf(stderr, "unexpected msg event %u\n",
-				msg.event), exit(1);
+				msg.event); exit(1);
 			break;
 		case UFFD_EVENT_PAGEFAULT:
 			uffd_handle_page_fault(&msg, stats);
@@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg)
 			uffd_reg.range.start = msg.arg.remove.start;
 			uffd_reg.range.len = msg.arg.remove.end -
 				msg.arg.remove.start;
-			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
-				fprintf(stderr, "remove failure\n"), exit(1);
+			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
+				fprintf(stderr, "remove failure\n");
+				exit(1);
+			}
 			break;
 		case UFFD_EVENT_REMAP:
 			area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -879,8 +908,10 @@ static int faulting_process(int signal_test)
 
 	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
 			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
-	if (area_dst == MAP_FAILED)
-		perror("mremap"), exit(1);
+	if (area_dst == MAP_FAILED) {
+		perror("mremap");
+		exit(1);
+	}
 
 	for (; nr < nr_pages; nr++) {
 		count = *area_count(area_dst, nr);
@@ -888,7 +919,7 @@ static int faulting_process(int signal_test)
 			fprintf(stderr,
 				"nr %lu memory corruption %Lu %Lu\n",
 				nr, count,
-				count_verify[nr]), exit(1);
+				count_verify[nr]); exit(1);
 		}
 		/*
 		 * Trigger write protection if there is by writting
@@ -901,8 +932,10 @@ static int faulting_process(int signal_test)
 		return 1;
 
 	for (nr = 0; nr < nr_pages; nr++) {
-		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
-			fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
+			fprintf(stderr, "nr %lu is not zero\n", nr);
+			exit(1);
+		}
 	}
 
 	return 0;
@@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd,
 				     uffdio_zeropage->range.len,
 				     offset);
 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
-		if (uffdio_zeropage->zeropage != -EEXIST)
+		if (uffdio_zeropage->zeropage != -EEXIST) {
 			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
-				uffdio_zeropage->zeropage), exit(1);
+				uffdio_zeropage->zeropage);
+			exit(1);
+		}
 	} else {
 		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
-			uffdio_zeropage->zeropage), exit(1);
+			uffdio_zeropage->zeropage); exit(1);
 	}
 }
 
@@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 
 	has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
 
-	if (offset >= nr_pages * page_size)
-		fprintf(stderr, "unexpected offset %lu\n",
-			offset), exit(1);
+	if (offset >= nr_pages * page_size) {
+		fprintf(stderr, "unexpected offset %lu\n", offset);
+		exit(1);
+	}
 	uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
 	uffdio_zeropage.range.len = page_size;
 	uffdio_zeropage.mode = 0;
@@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	if (ret) {
 		/* real retval in ufdio_zeropage.zeropage */
 		if (has_zeropage) {
-			if (uffdio_zeropage.zeropage == -EEXIST)
-				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
-					exit(1);
-			else
+			if (uffdio_zeropage.zeropage == -EEXIST) {
+				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
+				exit(1);
+			} else {
 				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
-					uffdio_zeropage.zeropage), exit(1);
+					uffdio_zeropage.zeropage);
+				exit(1);
+			}
 		} else {
-			if (uffdio_zeropage.zeropage != -EINVAL)
+			if (uffdio_zeropage.zeropage != -EINVAL) {
 				fprintf(stderr,
 					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
-					uffdio_zeropage.zeropage), exit(1);
+					uffdio_zeropage.zeropage);
+				exit(1);
+			}
 		}
 	} else if (has_zeropage) {
 		if (uffdio_zeropage.zeropage != page_size) {
 			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
-				uffdio_zeropage.zeropage), exit(1);
+				uffdio_zeropage.zeropage); exit(1);
 		} else {
 			if (test_uffdio_zeropage_eexist && retry) {
 				test_uffdio_zeropage_eexist = false;
@@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	} else {
 		fprintf(stderr,
 			"UFFDIO_ZEROPAGE succeeded %Ld\n",
-			uffdio_zeropage.zeropage), exit(1);
+			uffdio_zeropage.zeropage); exit(1);
 	}
 
 	return 0;
@@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void)
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 	if (test_uffdio_wp)
 		uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
-	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
-		fprintf(stderr, "register failure\n"), exit(1);
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+		fprintf(stderr, "register failure\n");
+		exit(1);
+	}
 
 	expected_ioctls = uffd_test_ops->expected_ioctls;
 	if ((uffdio_register.ioctls & expected_ioctls) !=
-	    expected_ioctls)
+	    expected_ioctls) {
 		fprintf(stderr,
-			"unexpected missing ioctl for anon memory\n"),
-			exit(1);
+			"unexpected missing ioctl for anon memory\n");
+		exit(1);
+	}
 
 	if (uffdio_zeropage(uffd, 0)) {
-		if (my_bcmp(area_dst, zeropage, page_size))
-			fprintf(stderr, "zeropage is not zero\n"), exit(1);
+		if (my_bcmp(area_dst, zeropage, page_size)) {
+			fprintf(stderr, "zeropage is not zero\n");
+			exit(1);
+		}
 	}
 
 	close(uffd);
@@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void)
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 	if (test_uffdio_wp)
 		uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
-	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
-		fprintf(stderr, "register failure\n"), exit(1);
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+		fprintf(stderr, "register failure\n");
+		exit(1);
+	}
 
 	expected_ioctls = uffd_test_ops->expected_ioctls;
-	if ((uffdio_register.ioctls & expected_ioctls) !=
-	    expected_ioctls)
-		fprintf(stderr,
-			"unexpected missing ioctl for anon memory\n"),
-			exit(1);
+	if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+		fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+		exit(1);
+	}
 
-	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
-		perror("uffd_poll_thread create"), exit(1);
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+		perror("uffd_poll_thread create");
+		exit(1);
+	}
 
 	pid = fork();
-	if (pid < 0)
-		perror("fork"), exit(1);
+	if (pid < 0) {
+		perror("fork");
+		exit(1);
+	}
 
 	if (!pid)
 		return faulting_process(0);
 
 	waitpid(pid, &err, 0);
-	if (err)
-		fprintf(stderr, "faulting process failed\n"), exit(1);
+	if (err) {
+		fprintf(stderr, "faulting process failed\n");
+		exit(1);
+	}
 
-	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
-		perror("pipe write"), exit(1);
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+		perror("pipe write");
+		exit(1);
+	}
 	if (pthread_join(uffd_mon, NULL))
 		return 1;
 
@@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void)
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 	if (test_uffdio_wp)
 		uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
-	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
-		fprintf(stderr, "register failure\n"), exit(1);
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+		fprintf(stderr, "register failure\n");
+		exit(1);
+	}
 
 	expected_ioctls = uffd_test_ops->expected_ioctls;
-	if ((uffdio_register.ioctls & expected_ioctls) !=
-	    expected_ioctls)
-		fprintf(stderr,
-			"unexpected missing ioctl for anon memory\n"),
-			exit(1);
+	if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+		fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+		exit(1);
+	}
 
-	if (faulting_process(1))
-		fprintf(stderr, "faulting process failed\n"), exit(1);
+	if (faulting_process(1)) {
+		fprintf(stderr, "faulting process failed\n");
+		exit(1);
+	}
 
 	if (uffd_test_ops->release_pages(area_dst))
 		return 1;
 
-	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
-		perror("uffd_poll_thread create"), exit(1);
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+		perror("uffd_poll_thread create");
+		exit(1);
+	}
 
 	pid = fork();
-	if (pid < 0)
-		perror("fork"), exit(1);
+	if (pid < 0) {
+		perror("fork");
+		exit(1);
+	}
 
 	if (!pid)
 		exit(faulting_process(2));
 
 	waitpid(pid, &err, 0);
-	if (err)
-		fprintf(stderr, "faulting process failed\n"), exit(1);
+	if (err) {
+		fprintf(stderr, "faulting process failed\n");
+		exit(1);
+	}
 
-	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
-		perror("pipe write"), exit(1);
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+		perror("pipe write");
+		exit(1);
+	}
 	if (pthread_join(uffd_mon, (void **)&userfaults))
 		return 1;
 
@@ -1395,7 +1460,7 @@ static void set_test_type(const char *type)
 		test_type = TEST_SHMEM;
 		uffd_test_ops = &shmem_uffd_test_ops;
 	} else {
-		fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+		fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
 	}
 
 	if (test_type == TEST_HUGETLB)
@@ -1403,12 +1468,15 @@ static void set_test_type(const char *type)
 	else
 		page_size = sysconf(_SC_PAGE_SIZE);
 
-	if (!page_size)
-		fprintf(stderr, "Unable to determine page size\n"),
-				exit(2);
+	if (!page_size) {
+		fprintf(stderr, "Unable to determine page size\n");
+		exit(2);
+	}
 	if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
-	    > page_size)
-		fprintf(stderr, "Impossible to run this test\n"), exit(2);
+	    > page_size) {
+		fprintf(stderr, "Impossible to run this test\n");
+		exit(2);
+	}
 }
 
 static void sigalrm(int sig)
@@ -1425,8 +1493,10 @@ int main(int argc, char **argv)
 	if (argc < 4)
 		usage();
 
-	if (signal(SIGALRM, sigalrm) == SIG_ERR)
-		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+	if (signal(SIGALRM, sigalrm) == SIG_ERR) {
+		fprintf(stderr, "failed to arm SIGALRM");
+		exit(1);
+	}
 	alarm(ALARM_INTERVAL_SECS);
 
 	set_test_type(argv[1]);
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 998319553523..7161cfc2e60b 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -443,6 +443,68 @@ static void test_unexpected_base(void)
 
 #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
 
+static void test_ptrace_write_gs_read_base(void)
+{
+	int status;
+	pid_t child = fork();
+
+	if (child < 0)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+		printf("[RUN]\tARCH_SET_GS to 1\n");
+		if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+			err(1, "ARCH_SET_GS");
+
+		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		raise(SIGTRAP);
+		_exit(0);
+	}
+
+	wait(&status);
+
+	if (WSTOPSIG(status) == SIGTRAP) {
+		unsigned long base;
+		unsigned long gs_offset = USER_REGS_OFFSET(gs);
+		unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+		/* Read the initial base.  It should be 1. */
+		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+		if (base == 1) {
+			printf("[OK]\tGSBASE started at 1\n");
+		} else {
+			nerrs++;
+			printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+		}
+
+		printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+		/* Poke an LDT selector into GS. */
+		if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+			err(1, "PTRACE_POKEUSER");
+
+		/* And read the base. */
+		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+		if (base == 0 || base == 1) {
+			printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+		} else {
+			nerrs++;
+			printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+		}
+	}
+
+	ptrace(PTRACE_CONT, child, NULL, NULL);
+
+	wait(&status);
+	if (!WIFEXITED(status))
+		printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
 static void test_ptrace_write_gsbase(void)
 {
 	int status;
@@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void)
 
 END:
 	ptrace(PTRACE_CONT, child, NULL, NULL);
+	wait(&status);
+	if (!WIFEXITED(status))
+		printf("[WARN]\tChild didn't exit cleanly.\n");
 }
 
 int main()
@@ -526,6 +591,9 @@ int main()
 	shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
 			      MAP_ANONYMOUS | MAP_SHARED, -1, 0);
 
+	/* Do these tests before we have an LDT. */
+	test_ptrace_write_gs_read_base();
+
 	/* Probe FSGSBASE */
 	sethandler(SIGILL, sigill, 0);
 	if (sigsetjmp(jmpbuf, 1) == 0) {
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index c41f24b517f4..65c141ebfbbd 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -462,6 +462,17 @@ static int test_vsys_x(void)
 	return 0;
 }
 
+/*
+ * Debuggers expect ptrace() to be able to peek at the vsyscall page.
+ * Use process_vm_readv() as a proxy for ptrace() to test this.  We
+ * want it to work in the vsyscall=emulate case and to fail in the
+ * vsyscall=xonly case.
+ *
+ * It's worth noting that this ABI is a bit nutty.  write(2) can't
+ * read from the vsyscall page on any kernel version or mode.  The
+ * fact that ptrace() ever worked was a nice courtesy of old kernels,
+ * but the code to support it is fairly gross.
+ */
 static int test_process_vm_readv(void)
 {
 #ifdef __x86_64__
@@ -477,8 +488,12 @@ static int test_process_vm_readv(void)
 	remote.iov_len = 4096;
 	ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
 	if (ret != 4096) {
-		printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
-		return 0;
+		/*
+		 * We expect process_vm_readv() to work if and only if the
+		 * vsyscall page is readable.
+		 */
+		printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
+		return vsyscall_map_r ? 1 : 0;
 	}
 
 	if (vsyscall_map_r) {
@@ -488,6 +503,9 @@ static int test_process_vm_readv(void)
 			printf("[FAIL]\tIt worked but returned incorrect data\n");
 			return 1;
 		}
+	} else {
+		printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
+		return 1;
 	}
 #endif
 
diff --git a/tools/usb/Build b/tools/usb/Build
new file mode 100644
index 000000000000..2ad6f9745816
--- /dev/null
+++ b/tools/usb/Build
@@ -0,0 +1,2 @@
+testusb-y += testusb.o
+ffs-test-y += ffs-test.o
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index 01d758d73b6d..1b128e551b2e 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -1,14 +1,51 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for USB tools
+include ../scripts/Makefile.include
 
-PTHREAD_LIBS = -lpthread
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g -I../include
-LDFLAGS = $(PTHREAD_LIBS)
+bindir ?= /usr/bin
 
-all: testusb ffs-test
-%: %.c
-	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -Wextra -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(srctree)/tools/include
+override LDFLAGS += -lpthread
+
+ALL_TARGETS := testusb ffs-test
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+TESTUSB_IN := $(OUTPUT)testusb-in.o
+$(TESTUSB_IN): FORCE
+	$(Q)$(MAKE) $(build)=testusb
+$(OUTPUT)testusb: $(TESTUSB_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+
+FFS_TEST_IN := $(OUTPUT)ffs-test-in.o
+$(FFS_TEST_IN): FORCE
+	$(Q)$(MAKE) $(build)=ffs-test
+$(OUTPUT)ffs-test: $(FFS_TEST_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
 
 clean:
-	$(RM) testusb ffs-test
+	rm -f $(ALL_PROGRAMS)
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.o.cmd' -delete
+
+install: $(ALL_PROGRAMS)
+	install -d -m 755 $(DESTDIR)$(bindir);		\
+	for program in $(ALL_PROGRAMS); do		\
+		install $$program $(DESTDIR)$(bindir);	\
+	done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index dbf14c1e2188..f2640e505c4e 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
 	(__virtio_test_bit((dev), feature))
 
 /**
- * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * virtio_has_dma_quirk - determine whether this device has the DMA quirk
  * @vdev: the device
  */
-static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
 {
 	/*
 	 * Note the reverse polarity of the quirk feature (compared to most
 	 * other features), this is for compatibility with legacy systems.
 	 */
-	return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+	return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
 }
 
 static inline bool virtio_is_little_endian(struct virtio_device *vdev)
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 58c0eab71bca..0517c744b04e 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -78,6 +78,7 @@
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
 #define KPF_SOFTDIRTY		40
+#define KPF_ARCH_2		41
 
 /* [48-] take some arbitrary free slots for expanding overloaded flags
  * not part of kernel API
@@ -135,6 +136,7 @@ static const char * const page_flag_names[] = {
 	[KPF_ARCH]		= "h:arch",
 	[KPF_UNCACHED]		= "c:uncached",
 	[KPF_SOFTDIRTY]		= "f:softdirty",
+	[KPF_ARCH_2]		= "H:arch_2",
 
 	[KPF_READAHEAD]		= "I:readahead",
 	[KPF_SLOB_FREE]		= "P:slob_free",