diff options
Diffstat (limited to 'tools')
437 files changed, 19350 insertions, 3336 deletions
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 264e266a85bf..c3af3f324c5a 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) +/* POWER10 registers */ +#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) +#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) +#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index f599064dd8dc..bdf5f10f8b9f 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -48,6 +48,24 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, PERF_REG_POWERPC_MMCRA, - PERF_REG_POWERPC_MAX, + /* Extended registers */ + PERF_REG_POWERPC_MMCR0, + PERF_REG_POWERPC_MMCR1, + PERF_REG_POWERPC_MMCR2, + PERF_REG_POWERPC_MMCR3, + PERF_REG_POWERPC_SIER2, + PERF_REG_POWERPC_SIER3, + /* Max regs without the extended regs */ + PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; + +#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) + +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ +#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ +#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) + +#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) +#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h index 0e2eeeb1fd27..f506cca520b0 100644 --- a/tools/arch/riscv/include/uapi/asm/unistd.h +++ b/tools/arch/riscv/include/uapi/asm/unistd.h @@ -12,7 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <https://www.gnu.org/licenses/>. */ #ifdef __LP64__ diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 436ec7636927..7a6b14874d65 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -231,11 +231,13 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) #define KVM_SYNC_ETOKEN (1UL << 11) +#define KVM_SYNC_DIAG318 (1UL << 12) #define KVM_SYNC_S390_VALID_FIELDS \ (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ - KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) + KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \ + KVM_SYNC_DIAG318) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 @@ -264,7 +266,8 @@ struct kvm_sync_regs { __u8 reserved2 : 7; __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ - __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + __u64 diag318; /* diagnose 0x318 info */ + __u8 padding2[184]; /* sdnx needs to be 256byte aligned */ union { __u8 sdnx[SDNXL]; /* state description annex */ struct { diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 02dabc9e77b0..2901d5df4366 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -96,6 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ +/* free ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -107,6 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +/* free ( 3*32+29) */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -365,7 +367,9 @@ #define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h deleted file mode 100644 index 2ccd588fbad4..000000000000 --- a/tools/arch/x86/include/asm/mcsafe_test.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MCSAFE_TEST_H_ -#define _MCSAFE_TEST_H_ - -.macro MCSAFE_TEST_CTL -.endm - -.macro MCSAFE_TEST_SRC reg count target -.endm - -.macro MCSAFE_TEST_DST reg count target -.endm -#endif /* _MCSAFE_TEST_H_ */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e8370e64a155..2859ee4f39a8 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -149,6 +149,10 @@ #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 + +#define MSR_IA32_POWER_CTL 0x000001fc +#define MSR_IA32_POWER_CTL_BIT_EE 19 + #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 #define MSR_LBR_CORE_FROM 0x00000040 @@ -158,7 +162,23 @@ #define LBR_INFO_MISPRED BIT_ULL(63) #define LBR_INFO_IN_TX BIT_ULL(62) #define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60) #define LBR_INFO_CYCLES 0xffff +#define LBR_INFO_BR_TYPE_OFFSET 56 +#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) + +#define MSR_ARCH_LBR_CTL 0x000014ce +#define ARCH_LBR_CTL_LBREN BIT(0) +#define ARCH_LBR_CTL_CPL_OFFSET 1 +#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET) +#define ARCH_LBR_CTL_STACK_OFFSET 3 +#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET) +#define ARCH_LBR_CTL_FILTER_OFFSET 16 +#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET) +#define MSR_ARCH_LBR_DEPTH 0x000014cf +#define MSR_ARCH_LBR_FROM_0 0x00001500 +#define MSR_ARCH_LBR_TO_0 0x00001600 +#define MSR_ARCH_LBR_INFO_0 0x00001200 #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_PEBS_DATA_CFG 0x000003f2 @@ -253,8 +273,6 @@ #define MSR_PEBS_FRONTEND 0x000003f7 -#define MSR_IA32_POWER_CTL 0x000001fc - #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 @@ -418,7 +436,6 @@ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f -#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 @@ -427,6 +444,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_BU_CFG2 0xc001102a @@ -466,6 +484,8 @@ #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 /* Fam 15h MSRs */ +#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a +#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL #define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index d25534940bde..fdbffec4cfde 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -39,27 +39,6 @@ #define ORC_REG_SP_INDIRECT 9 #define ORC_REG_MAX 15 -/* - * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the - * caller's SP right before it made the call). Used for all callable - * functions, i.e. all C code and all callable asm functions. - * - * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points - * to a fully populated pt_regs from a syscall, interrupt, or exception. - * - * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset - * points to the iret return frame. - * - * The UNWIND_HINT macros are used only for the unwind_hint struct. They - * aren't used in struct orc_entry due to size and complexity constraints. - * Objtool converts them to real types when it converts the hints to orc - * entries. - */ -#define ORC_TYPE_CALL 0 -#define ORC_TYPE_REGS 1 -#define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 - #ifndef __ASSEMBLY__ /* * This struct is more or less a vastly simplified version of the DWARF Call @@ -78,19 +57,6 @@ struct orc_entry { unsigned end:1; } __packed; -/* - * This struct is used by asm and inline asm code to manually annotate the - * location of registers on the stack for the ORC unwinder. - * - * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. - */ -struct unwind_hint { - u32 ip; - s16 sp_offset; - u8 sp_reg; - u8 type; - u8 end; -}; #endif /* __ASSEMBLY__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 45f8e1b02241..0b5b8ae56bd9 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -4,7 +4,6 @@ #include <linux/linkage.h> #include <asm/errno.h> #include <asm/cpufeatures.h> -#include <asm/mcsafe_test.h> #include <asm/alternative-asm.h> #include <asm/export.h> @@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig) SYM_FUNC_END(memcpy_orig) .popsection - -#ifndef CONFIG_UML - -MCSAFE_TEST_CTL - -/* - * __memcpy_mcsafe - memory copy with machine check exception handling - * Note that we only catch machine checks when reading the source addresses. - * Writes to target are posted and don't generate machine checks. - */ -SYM_FUNC_START(__memcpy_mcsafe) - cmpl $8, %edx - /* Less than 8 bytes? Go to byte copy loop */ - jb .L_no_whole_words - - /* Check for bad alignment of source */ - testl $7, %esi - /* Already aligned */ - jz .L_8byte_aligned - - /* Copy one byte at a time until source is 8-byte aligned */ - movl %esi, %ecx - andl $7, %ecx - subl $8, %ecx - negl %ecx - subl %ecx, %edx -.L_read_leading_bytes: - movb (%rsi), %al - MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes - MCSAFE_TEST_DST %rdi 1 .E_leading_bytes -.L_write_leading_bytes: - movb %al, (%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .L_read_leading_bytes - -.L_8byte_aligned: - movl %edx, %ecx - andl $7, %edx - shrl $3, %ecx - jz .L_no_whole_words - -.L_read_words: - movq (%rsi), %r8 - MCSAFE_TEST_SRC %rsi 8 .E_read_words - MCSAFE_TEST_DST %rdi 8 .E_write_words -.L_write_words: - movq %r8, (%rdi) - addq $8, %rsi - addq $8, %rdi - decl %ecx - jnz .L_read_words - - /* Any trailing bytes? */ -.L_no_whole_words: - andl %edx, %edx - jz .L_done_memcpy_trap - - /* Copy trailing bytes */ - movl %edx, %ecx -.L_read_trailing_bytes: - movb (%rsi), %al - MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes - MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes -.L_write_trailing_bytes: - movb %al, (%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .L_read_trailing_bytes - - /* Copy successful. Return zero */ -.L_done_memcpy_trap: - xorl %eax, %eax -.L_done: - ret -SYM_FUNC_END(__memcpy_mcsafe) -EXPORT_SYMBOL_GPL(__memcpy_mcsafe) - - .section .fixup, "ax" - /* - * Return number of bytes not copied for any failure. Note that - * there is no "tail" handling since the source buffer is 8-byte - * aligned and poison is cacheline aligned. - */ -.E_read_words: - shll $3, %ecx -.E_leading_bytes: - addl %edx, %ecx -.E_trailing_bytes: - mov %ecx, %eax - jmp .L_done - - /* - * For write fault handling, given the destination is unaligned, - * we handle faults on multi-byte writes with a byte-by-byte - * copy up to the write-protected page. - */ -.E_write_words: - shll $3, %ecx - addl %edx, %ecx - movl %ecx, %edx - jmp mcsafe_handle_tail - - .previous - - _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) - _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) - _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) - _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) - _ASM_EXTABLE(.L_write_words, .E_write_words) - _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) -#endif diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk index a42015b305f4..af38469afd14 100644 --- a/tools/arch/x86/tools/gen-insn-attr-x86.awk +++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk @@ -362,6 +362,9 @@ function convert_operands(count,opnd, i,j,imm,mod) END { if (awkchecked != "") exit 1 + + print "#ifndef __BOOT_COMPRESSED\n" + # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ @@ -388,6 +391,51 @@ END { for (j = 0; j < max_lprefix; j++) if (atable[i,j]) print " ["i"]["j"] = "atable[i,j]"," - print "};" + print "};\n" + + print "#else /* !__BOOT_COMPRESSED */\n" + + print "/* Escape opcode map array */" + print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "/* Group opcode map array */" + print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "/* AVX opcode map array */" + print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "static void inat_init_tables(void)" + print "{" + + # print escape opcode map's array + print "\t/* Print Escape opcode map array */" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";" + print "" + + # print group opcode map's array + print "\t/* Print Group opcode map array */" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";" + print "" + # print AVX opcode map's array + print "\t/* Print AVX opcode map array */" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";" + + print "}" + print "#endif" } diff --git a/tools/bootconfig/samples/bad-override.bconf b/tools/bootconfig/samples/bad-override.bconf new file mode 100644 index 000000000000..fde6c561512e --- /dev/null +++ b/tools/bootconfig/samples/bad-override.bconf @@ -0,0 +1,3 @@ +key.subkey = value +# We can not override pre-defined subkeys with value +key := value diff --git a/tools/bootconfig/samples/bad-override2.bconf b/tools/bootconfig/samples/bad-override2.bconf new file mode 100644 index 000000000000..688587cb023c --- /dev/null +++ b/tools/bootconfig/samples/bad-override2.bconf @@ -0,0 +1,3 @@ +key = value +# We can not override pre-defined value with subkey +key.subkey := value diff --git a/tools/bootconfig/samples/good-override.bconf b/tools/bootconfig/samples/good-override.bconf new file mode 100644 index 000000000000..7d31d5f8fbd8 --- /dev/null +++ b/tools/bootconfig/samples/good-override.bconf @@ -0,0 +1,6 @@ +# Override the value +key.word = 1,2,4 +key.word := 2,3 + +# No pre-defined key +key.new.word := "new" diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index 3c2ab9e75730..d295e406a756 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -117,6 +117,19 @@ xpass grep -q "bar" $OUTFILE xpass grep -q "baz" $OUTFILE xpass grep -q "qux" $OUTFILE +echo "Override same-key values" +cat > $TEMPCONF << EOF +key = bar, baz +key := qux +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xfail grep -q "bar" $OUTFILE +xfail grep -q "baz" $OUTFILE +xpass grep -q "qux" $OUTFILE + echo "Double/single quotes test" echo "key = '\"string\"';" > $TEMPCONF $BOOTCONF -a $TEMPCONF $INITRD @@ -124,6 +137,31 @@ $BOOTCONF $INITRD > $TEMPCONF cat $TEMPCONF xpass grep \'\"string\"\' $TEMPCONF +echo "Repeat same-key tree" +cat > $TEMPCONF << EOF +foo +bar +foo { buz } +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xpass grep -q "bar" $OUTFILE + + +echo "Remove/keep tailing spaces" +cat > $TEMPCONF << EOF +foo = val # comment +bar = "val2 " # comment +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xfail grep -q val[[:space:]] $OUTFILE +xpass grep -q val2[[:space:]] $OUTFILE + echo "=== expected failure cases ===" for i in samples/bad-* ; do xfail $BOOTCONF -a $i $INITRD diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 0a6d09a3e91f..39bb322707b4 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args FEATURE_DISPLAY = libbfd disassembler-four-args check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean +NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) check_feat := 0 @@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c -clean: bpftool_clean runqslower_clean +clean: bpftool_clean runqslower_clean resolve_btfids_clean $(call QUIET_CLEAN, bpf-progs) $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8462690a039b..4828913703b6 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -25,7 +25,7 @@ endif LIBBPF = $(LIBBPF_PATH)libbpf.a -BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) $(LIBBPF): FORCE $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index ede162f83eea..0e9310727281 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -67,7 +67,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, if (!info->btf_id || !info->nr_func_info || btf__get_from_id(info->btf_id, &prog_btf)) goto print; - finfo = (struct bpf_func_info *)info->func_info; + finfo = u64_to_ptr(info->func_info); func_type = btf__type_by_id(prog_btf, finfo->type_id); if (!func_type || !btf_is_func(func_type)) goto print; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 8a4c2b3b0cd6..f61184653633 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -143,6 +143,20 @@ static int codegen_datasec_def(struct bpf_object *obj, var_name, align); return -EINVAL; } + /* Assume 32-bit architectures when generating data section + * struct memory layout. Given bpftool can't know which target + * host architecture it's emitting skeleton for, we need to be + * conservative and assume 32-bit one to ensure enough padding + * bytes are generated for pointer and long types. This will + * still work correctly for 64-bit architectures, because in + * the worst case we'll generate unnecessary padding field, + * which on 64-bit architectures is not strictly necessary and + * would be handled by natural 8-byte alignment. But it still + * will be a correct memory layout, based on recorded offsets + * in BTF. + */ + if (align > 4) + align = 4; align_off = (off + align - 1) / align * align; if (align_off != need_off) { @@ -397,7 +411,7 @@ static int do_skeleton(int argc, char **argv) { \n\ struct %1$s *obj; \n\ \n\ - obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\ + obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ if (!obj) \n\ return NULL; \n\ if (%1$s__create_skeleton(obj)) \n\ @@ -461,7 +475,7 @@ static int do_skeleton(int argc, char **argv) { \n\ struct bpf_object_skeleton *s; \n\ \n\ - s = (typeof(s))calloc(1, sizeof(*s)); \n\ + s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ if (!s) \n\ return -1; \n\ obj->skeleton = s; \n\ @@ -479,7 +493,7 @@ static int do_skeleton(int argc, char **argv) /* maps */ \n\ s->map_cnt = %zu; \n\ s->map_skel_sz = sizeof(*s->maps); \n\ - s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\ + s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\ if (!s->maps) \n\ goto err; \n\ ", @@ -515,7 +529,7 @@ static int do_skeleton(int argc, char **argv) /* programs */ \n\ s->prog_cnt = %zu; \n\ s->prog_skel_sz = sizeof(*s->progs); \n\ - s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\ + s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\ if (!s->progs) \n\ goto err; \n\ ", diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index c9dba7543dba..3b1aad7535dd 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -11,6 +11,7 @@ static int do_pin(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); + union bpf_iter_link_info linfo; const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; @@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv) map_fd = map_parse_fd(&argc, &argv); if (map_fd < 0) return -1; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + iter_opts.link_info = &linfo; + iter_opts.link_info_len = sizeof(linfo); } } @@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv) goto close_obj; } - if (map_fd >= 0) - iter_opts.map_fd = map_fd; - link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 1b793759170e..a89f09e3c848 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -106,7 +106,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) switch (info->type) { case BPF_LINK_TYPE_RAW_TRACEPOINT: jsonw_string_field(json_wtr, "tp_name", - (const char *)info->raw_tracepoint.tp_name); + u64_to_ptr(info->raw_tracepoint.tp_name)); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); @@ -185,7 +185,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) switch (info->type) { case BPF_LINK_TYPE_RAW_TRACEPOINT: printf("\n\ttp '%s' ", - (const char *)info->raw_tracepoint.tp_name); + (const char *)u64_to_ptr(info->raw_tracepoint.tp_name)); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index e3a79b5a9960..c46e52137b87 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -21,7 +21,15 @@ /* Make sure we do not use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static inline void *u64_to_ptr(__u64 ptr) +{ + return (void *)(unsigned long)ptr; +} #define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index e3b116325403..df7d8ec76036 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -134,6 +134,8 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) while (true) { ret = read(fd, buf, sizeof(buf)); if (ret < 0) { + if (errno == EAGAIN) + continue; err = -errno; p_err("failed to read PID iterator output: %d", err); goto out; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 158995d853b0..d393eb8263a6 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -428,14 +428,14 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, p_info("no instructions returned"); return -1; } - buf = (unsigned char *)(info->jited_prog_insns); + buf = u64_to_ptr(info->jited_prog_insns); member_len = info->jited_prog_len; } else { /* DUMP_XLATED */ if (info->xlated_prog_len == 0 || !info->xlated_prog_insns) { p_err("error retrieving insn dump: kernel.kptr_restrict set?"); return -1; } - buf = (unsigned char *)info->xlated_prog_insns; + buf = u64_to_ptr(info->xlated_prog_insns); member_len = info->xlated_prog_len; } @@ -444,7 +444,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, return -1; } - func_info = (void *)info->func_info; + func_info = u64_to_ptr(info->func_info); if (info->nr_line_info) { prog_linfo = bpf_prog_linfo__new(info); @@ -462,7 +462,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, n = write(fd, buf, member_len); close(fd); - if (n != member_len) { + if (n != (ssize_t)member_len) { p_err("error writing output file: %s", n < 0 ? strerror(errno) : "short write"); return -1; @@ -492,13 +492,13 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, __u32 i; if (info->nr_jited_ksyms) { kernel_syms_load(&dd); - ksyms = (__u64 *) info->jited_ksyms; + ksyms = u64_to_ptr(info->jited_ksyms); } if (json_output) jsonw_start_array(json_wtr); - lens = (__u32 *) info->jited_func_lens; + lens = u64_to_ptr(info->jited_func_lens); for (i = 0; i < info->nr_jited_func_lens; i++) { if (ksyms) { sym = kernel_syms_search(&dd, ksyms[i]); @@ -559,7 +559,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, } else { kernel_syms_load(&dd); dd.nr_jited_ksyms = info->nr_jited_ksyms; - dd.jited_ksyms = (__u64 *) info->jited_ksyms; + dd.jited_ksyms = u64_to_ptr(info->jited_ksyms); dd.btf = btf; dd.func_info = func_info; dd.finfo_rec_size = info->func_info_rec_size; @@ -1681,7 +1681,7 @@ static char *profile_target_name(int tgt_fd) goto out; } - func_info = (struct bpf_func_info *)(info_linear->info.func_info); + func_info = u64_to_ptr(info_linear->info.func_info); t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { p_err("btf %d doesn't have type %d", diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index a88cd4426398..fe8eb537688b 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -80,6 +80,7 @@ libbpf-clean: clean: libsubcmd-clean libbpf-clean fixdep-clean $(call msg,CLEAN,$(BINARY)) $(Q)$(RM) -f $(BINARY); \ + $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) tags: diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 52d883325a23..0def0bb1f783 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -233,6 +233,39 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, false); } +/* + * The data of compressed section should be aligned to 4 + * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld + * sets sh_addralign to 1, which makes libelf fail with + * misaligned section error during the update: + * FAILED elf_update(WRITE): invalid section alignment + * + * While waiting for ld fix, we fix the compressed sections + * sh_addralign value manualy. + */ +static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh) +{ + int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8; + + if (!(sh->sh_flags & SHF_COMPRESSED)) + return 0; + + if (sh->sh_addralign == expected) + return 0; + + pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n", + sh->sh_addralign, expected); + + sh->sh_addralign = expected; + + if (gelf_update_shdr(scn, sh) == 0) { + printf("FAILED cannot update section header: %s\n", + elf_errmsg(-1)); + return -1; + } + return 0; +} + static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; @@ -309,6 +342,9 @@ static int elf_collect(struct object *obj) obj->efile.idlist_shndx = idx; obj->efile.idlist_addr = sh.sh_addr; } + + if (compressed_section_fix(elf, scn, &sh)) + return -1; } return 0; @@ -566,6 +602,7 @@ static int sets_patch(struct object *obj) next = rb_next(next); } + return 0; } static int symbols_patch(struct object *obj) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index cb152370fdef..c1daf4d57518 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -8,7 +8,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) @@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA := \ llvm-version \ clang \ libbpf \ - libpfm4 + libpfm4 \ + libdebuginfod FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 88371f7f0369..d220fe952747 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -26,6 +26,7 @@ FILES= \ test-libelf-gelf_getnote.bin \ test-libelf-getshdrstrndx.bin \ test-libelf-mmap.bin \ + test-libdebuginfod.bin \ test-libnuma.bin \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ @@ -74,8 +75,6 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -CC ?= $(CROSS_COMPILE)gcc -CXX ?= $(CROSS_COMPILE)g++ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config CLANG ?= clang @@ -159,6 +158,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin: $(OUTPUT)test-libelf-getshdrstrndx.bin: $(BUILD) -lelf +$(OUTPUT)test-libdebuginfod.bin: + $(BUILD) -ldebuginfod + $(OUTPUT)test-libnuma.bin: $(BUILD) -lnuma diff --git a/tools/build/feature/test-libdebuginfod.c b/tools/build/feature/test-libdebuginfod.c new file mode 100644 index 000000000000..da22548b8413 --- /dev/null +++ b/tools/build/feature/test-libdebuginfod.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <elfutils/debuginfod.h> + +int main(void) +{ + debuginfod_client* c = debuginfod_begin(); + return (long)c; +} diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index f4699f9b46ba..c4ff907c078b 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -45,8 +45,7 @@ except: err('The kernel does not have iocost enabled') IOC_RUNNING = prog['IOC_RUNNING'].value_() -NR_USAGE_SLOTS = prog['NR_USAGE_SLOTS'].value_() -HWEIGHT_WHOLE = prog['HWEIGHT_WHOLE'].value_() +WEIGHT_ONE = prog['WEIGHT_ONE'].value_() VTIME_PER_SEC = prog['VTIME_PER_SEC'].value_() VTIME_PER_USEC = prog['VTIME_PER_USEC'].value_() AUTOP_SSD_FAST = prog['AUTOP_SSD_FAST'].value_() @@ -100,7 +99,7 @@ class IocStat: self.period_ms = ioc.period_us.value_() / 1_000 self.period_at = ioc.period_at.value_() / 1_000_000 self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC - self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC + self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC self.busy_level = ioc.busy_level.value_() self.autop_idx = ioc.autop_idx.value_() self.user_cost_model = ioc.user_cost_model.value_() @@ -136,7 +135,7 @@ class IocStat: def table_header_str(self): return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \ - f'{"dbt":>3} {"delay":>6} {"usages%"}' + f'{"debt":>7} {"delay":>7} {"usage%"}' class IocgStat: def __init__(self, iocg): @@ -144,11 +143,11 @@ class IocgStat: blkg = iocg.pd.blkg self.is_active = not list_empty(iocg.active_list.address_of_()) - self.weight = iocg.weight.value_() - self.active = iocg.active.value_() - self.inuse = iocg.inuse.value_() - self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE - self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE + self.weight = iocg.weight.value_() / WEIGHT_ONE + self.active = iocg.active.value_() / WEIGHT_ONE + self.inuse = iocg.inuse.value_() / WEIGHT_ONE + self.hwa_pct = iocg.hweight_active.value_() * 100 / WEIGHT_ONE + self.hwi_pct = iocg.hweight_inuse.value_() * 100 / WEIGHT_ONE self.address = iocg.value_() vdone = iocg.done_vtime.counter.value_() @@ -160,23 +159,13 @@ class IocgStat: else: self.inflight_pct = 0 - # vdebt used to be an atomic64_t and is now u64, support both - try: - self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 - except: - self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 - - self.use_delay = blkg.use_delay.counter.value_() - self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 - - usage_idx = iocg.usage_idx.value_() - self.usages = [] - self.usage = 0 - for i in range(NR_USAGE_SLOTS): - usage = iocg.usages[(usage_idx + 1 + i) % NR_USAGE_SLOTS].value_() - upct = usage * 100 / HWEIGHT_WHOLE - self.usages.append(upct) - self.usage = max(self.usage, upct) + self.usage = (100 * iocg.usage_delta_us.value_() / + ioc.period_us.value_()) if self.active else 0 + self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 + if blkg.use_delay.counter.value_() != 0: + self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 + else: + self.delay_ms = 0 def dict(self, now, path): out = { 'cgroup' : path, @@ -189,25 +178,20 @@ class IocgStat: 'hweight_inuse_pct' : self.hwi_pct, 'inflight_pct' : self.inflight_pct, 'debt_ms' : self.debt_ms, - 'use_delay' : self.use_delay, 'delay_ms' : self.delay_ms, 'usage_pct' : self.usage, 'address' : self.address } - for i in range(len(self.usages)): - out[f'usage_pct_{i}'] = str(self.usages[i]) return out def table_row_str(self, path): out = f'{path[-28:]:28} ' \ f'{"*" if self.is_active else " "} ' \ - f'{self.inuse:5}/{self.active:5} ' \ + f'{round(self.inuse):5}/{round(self.active):5} ' \ f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \ f'{self.inflight_pct:6.2f} ' \ - f'{min(math.ceil(self.debt_ms), 999):3} ' \ - f'{min(self.use_delay, 99):2}*'\ - f'{min(math.ceil(self.delay_ms), 999):03} ' - for u in self.usages: - out += f'{min(round(u), 999):03d}:' + f'{self.debt_ms:7.2f} ' \ + f'{self.delay_ms:7.2f} '\ + f'{min(self.usage, 999):6.2f}' out = out.rstrip(':') return out diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py new file mode 100644 index 000000000000..c4225ed63565 --- /dev/null +++ b/tools/cgroup/memcg_slabinfo.py @@ -0,0 +1,226 @@ +#!/usr/bin/env drgn +# +# Copyright (C) 2020 Roman Gushchin <guro@fb.com> +# Copyright (C) 2020 Facebook + +from os import stat +import argparse +import sys + +from drgn.helpers.linux import list_for_each_entry, list_empty +from drgn.helpers.linux import for_each_page +from drgn.helpers.linux.cpumask import for_each_online_cpu +from drgn.helpers.linux.percpu import per_cpu_ptr +from drgn import container_of, FaultError, Object + + +DESC = """ +This is a drgn script to provide slab statistics for memory cgroups. +It supports cgroup v2 and v1 and can emulate memory.kmem.slabinfo +interface of cgroup v1. +For drgn, visit https://github.com/osandov/drgn. +""" + + +MEMCGS = {} + +OO_SHIFT = 16 +OO_MASK = ((1 << OO_SHIFT) - 1) + + +def err(s): + print('slabinfo.py: error: %s' % s, file=sys.stderr, flush=True) + sys.exit(1) + + +def find_memcg_ids(css=prog['root_mem_cgroup'].css, prefix=''): + if not list_empty(css.children.address_of_()): + for css in list_for_each_entry('struct cgroup_subsys_state', + css.children.address_of_(), + 'sibling'): + name = prefix + '/' + css.cgroup.kn.name.string_().decode('utf-8') + memcg = container_of(css, 'struct mem_cgroup', 'css') + MEMCGS[css.cgroup.kn.id.value_()] = memcg + find_memcg_ids(css, name) + + +def is_root_cache(s): + try: + return False if s.memcg_params.root_cache else True + except AttributeError: + return True + + +def cache_name(s): + if is_root_cache(s): + return s.name.string_().decode('utf-8') + else: + return s.memcg_params.root_cache.name.string_().decode('utf-8') + + +# SLUB + +def oo_order(s): + return s.oo.x >> OO_SHIFT + + +def oo_objects(s): + return s.oo.x & OO_MASK + + +def count_partial(n, fn): + nr_pages = 0 + for page in list_for_each_entry('struct page', n.partial.address_of_(), + 'lru'): + nr_pages += fn(page) + return nr_pages + + +def count_free(page): + return page.objects - page.inuse + + +def slub_get_slabinfo(s, cfg): + nr_slabs = 0 + nr_objs = 0 + nr_free = 0 + + for node in range(cfg['nr_nodes']): + n = s.node[node] + nr_slabs += n.nr_slabs.counter.value_() + nr_objs += n.total_objects.counter.value_() + nr_free += count_partial(n, count_free) + + return {'active_objs': nr_objs - nr_free, + 'num_objs': nr_objs, + 'active_slabs': nr_slabs, + 'num_slabs': nr_slabs, + 'objects_per_slab': oo_objects(s), + 'cache_order': oo_order(s), + 'limit': 0, + 'batchcount': 0, + 'shared': 0, + 'shared_avail': 0} + + +def cache_show(s, cfg, objs): + if cfg['allocator'] == 'SLUB': + sinfo = slub_get_slabinfo(s, cfg) + else: + err('SLAB isn\'t supported yet') + + if cfg['shared_slab_pages']: + sinfo['active_objs'] = objs + sinfo['num_objs'] = objs + + print('%-17s %6lu %6lu %6u %4u %4d' + ' : tunables %4u %4u %4u' + ' : slabdata %6lu %6lu %6lu' % ( + cache_name(s), sinfo['active_objs'], sinfo['num_objs'], + s.size, sinfo['objects_per_slab'], 1 << sinfo['cache_order'], + sinfo['limit'], sinfo['batchcount'], sinfo['shared'], + sinfo['active_slabs'], sinfo['num_slabs'], + sinfo['shared_avail'])) + + +def detect_kernel_config(): + cfg = {} + + cfg['nr_nodes'] = prog['nr_online_nodes'].value_() + + if prog.type('struct kmem_cache').members[1][1] == 'flags': + cfg['allocator'] = 'SLUB' + elif prog.type('struct kmem_cache').members[1][1] == 'batchcount': + cfg['allocator'] = 'SLAB' + else: + err('Can\'t determine the slab allocator') + + cfg['shared_slab_pages'] = False + try: + if prog.type('struct obj_cgroup'): + cfg['shared_slab_pages'] = True + except: + pass + + return cfg + + +def for_each_slab_page(prog): + PGSlab = 1 << prog.constant('PG_slab') + PGHead = 1 << prog.constant('PG_head') + + for page in for_each_page(prog): + try: + if page.flags.value_() & PGSlab: + yield page + except FaultError: + pass + + +def main(): + parser = argparse.ArgumentParser(description=DESC, + formatter_class= + argparse.RawTextHelpFormatter) + parser.add_argument('cgroup', metavar='CGROUP', + help='Target memory cgroup') + args = parser.parse_args() + + try: + cgroup_id = stat(args.cgroup).st_ino + find_memcg_ids() + memcg = MEMCGS[cgroup_id] + except KeyError: + err('Can\'t find the memory cgroup') + + cfg = detect_kernel_config() + + print('# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>' + ' : tunables <limit> <batchcount> <sharedfactor>' + ' : slabdata <active_slabs> <num_slabs> <sharedavail>') + + if cfg['shared_slab_pages']: + obj_cgroups = set() + stats = {} + caches = {} + + # find memcg pointers belonging to the specified cgroup + obj_cgroups.add(memcg.objcg.value_()) + for ptr in list_for_each_entry('struct obj_cgroup', + memcg.objcg_list.address_of_(), + 'list'): + obj_cgroups.add(ptr.value_()) + + # look over all slab pages, belonging to non-root memcgs + # and look for objects belonging to the given memory cgroup + for page in for_each_slab_page(prog): + objcg_vec_raw = page.obj_cgroups.value_() + if objcg_vec_raw == 0: + continue + cache = page.slab_cache + if not cache: + continue + addr = cache.value_() + caches[addr] = cache + # clear the lowest bit to get the true obj_cgroups + objcg_vec = Object(prog, page.obj_cgroups.type_, + value=objcg_vec_raw & ~1) + + if addr not in stats: + stats[addr] = 0 + + for i in range(oo_objects(cache)): + if objcg_vec[i].value_() in obj_cgroups: + stats[addr] += 1 + + for addr in caches: + if stats[addr] > 0: + cache_show(caches[addr], cfg, stats[addr]) + + else: + for s in list_for_each_entry('struct kmem_cache', + memcg.kmem_caches.address_of_(), + 'memcg_params.kmem_caches_node'): + cache_show(s, cfg, None) + + +main() diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1a303a81aeef..90c3155f05b1 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -23,17 +23,17 @@ #include <sys/ioctl.h> #include <sys/types.h> #include <linux/gpio.h> +#include "gpio-utils.h" int monitor_device(const char *device_name, - unsigned int line, - uint32_t handleflags, - uint32_t eventflags, + unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, unsigned int loops) { - struct gpioevent_request req; - struct gpiohandle_data data; + struct gpio_v2_line_values values; char *chrdev_name; - int fd; + int cfd, lfd; int ret; int i = 0; @@ -41,44 +41,55 @@ int monitor_device(const char *device_name, if (ret < 0) return -ENOMEM; - fd = open(chrdev_name, 0); - if (fd == -1) { + cfd = open(chrdev_name, 0); + if (cfd == -1) { ret = -errno; fprintf(stderr, "Failed to open %s\n", chrdev_name); goto exit_free_name; } - req.lineoffset = line; - req.handleflags = handleflags; - req.eventflags = eventflags; - strcpy(req.consumer_label, "gpio-event-mon"); - - ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req); - if (ret == -1) { - ret = -errno; - fprintf(stderr, "Failed to issue GET EVENT " - "IOCTL (%d)\n", - ret); - goto exit_close_error; - } + ret = gpiotools_request_line(device_name, lines, num_lines, config, + "gpio-event-mon"); + if (ret < 0) + goto exit_device_close; + else + lfd = ret; /* Read initial states */ - ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data); - if (ret == -1) { - ret = -errno; - fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE " - "VALUES IOCTL (%d)\n", + values.mask = 0; + values.bits = 0; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&values.mask, i); + ret = gpiotools_get_values(lfd, &values); + if (ret < 0) { + fprintf(stderr, + "Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n", ret); - goto exit_close_error; + goto exit_line_close; } - fprintf(stdout, "Monitoring line %d on %s\n", line, device_name); - fprintf(stdout, "Initial line value: %d\n", data.values[0]); + if (num_lines == 1) { + fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name); + fprintf(stdout, "Initial line value: %d\n", + gpiotools_test_bit(values.bits, 0)); + } else { + fprintf(stdout, "Monitoring lines %d", lines[0]); + for (i = 1; i < num_lines - 1; i++) + fprintf(stdout, ", %d", lines[i]); + fprintf(stdout, " and %d on %s\n", lines[i], device_name); + fprintf(stdout, "Initial line values: %d", + gpiotools_test_bit(values.bits, 0)); + for (i = 1; i < num_lines - 1; i++) + fprintf(stdout, ", %d", + gpiotools_test_bit(values.bits, i)); + fprintf(stdout, " and %d\n", + gpiotools_test_bit(values.bits, i)); + } while (1) { - struct gpioevent_data event; + struct gpio_v2_line_event event; - ret = read(req.fd, &event, sizeof(event)); + ret = read(lfd, &event, sizeof(event)); if (ret == -1) { if (errno == -EAGAIN) { fprintf(stderr, "nothing available\n"); @@ -96,12 +107,14 @@ int monitor_device(const char *device_name, ret = -EIO; break; } - fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp); + fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", + event.timestamp_ns, event.offset, event.line_seqno, + event.seqno); switch (event.id) { - case GPIOEVENT_EVENT_RISING_EDGE: + case GPIO_V2_LINE_EVENT_RISING_EDGE: fprintf(stdout, "rising edge"); break; - case GPIOEVENT_EVENT_FALLING_EDGE: + case GPIO_V2_LINE_EVENT_FALLING_EDGE: fprintf(stdout, "falling edge"); break; default: @@ -114,8 +127,11 @@ int monitor_device(const char *device_name, break; } -exit_close_error: - if (close(fd) == -1) +exit_line_close: + if (close(lfd) == -1) + perror("Failed to close line file"); +exit_device_close: + if (close(cfd) == -1) perror("Failed to close GPIO character device file"); exit_free_name: free(chrdev_name); @@ -127,29 +143,37 @@ void print_usage(void) fprintf(stderr, "Usage: gpio-event-mon [options]...\n" "Listen to events on GPIO lines, 0->1 1->0\n" " -n <name> Listen on GPIOs on a named device (must be stated)\n" - " -o <n> Offset to monitor\n" + " -o <n> Offset of line to monitor (may be repeated)\n" " -d Set line as open drain\n" " -s Set line as open source\n" " -r Listen for rising edges\n" " -f Listen for falling edges\n" + " -b <n> Debounce the line with period n microseconds\n" " [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n" " -? This helptext\n" "\n" "Example:\n" - "gpio-event-mon -n gpiochip0 -o 4 -r -f\n" + "gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n" ); } +#define EDGE_FLAGS \ + (GPIO_V2_LINE_FLAG_EDGE_RISING | \ + GPIO_V2_LINE_FLAG_EDGE_FALLING) + int main(int argc, char **argv) { const char *device_name = NULL; - unsigned int line = -1; + unsigned int lines[GPIO_V2_LINES_MAX]; + unsigned int num_lines = 0; unsigned int loops = 0; - uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT; - uint32_t eventflags = 0; - int c; + struct gpio_v2_line_config config; + int c, attr, i; + unsigned long debounce_period_us = 0; - while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) { + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_INPUT; + while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) { switch (c) { case 'c': loops = strtoul(optarg, NULL, 10); @@ -158,19 +182,27 @@ int main(int argc, char **argv) device_name = optarg; break; case 'o': - line = strtoul(optarg, NULL, 10); + if (num_lines >= GPIO_V2_LINES_MAX) { + print_usage(); + return -1; + } + lines[num_lines] = strtoul(optarg, NULL, 10); + num_lines++; + break; + case 'b': + debounce_period_us = strtoul(optarg, NULL, 10); break; case 'd': - handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN; + config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN; break; case 's': - handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE; + config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE; break; case 'r': - eventflags |= GPIOEVENT_REQUEST_RISING_EDGE; + config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING; break; case 'f': - eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE; + config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING; break; case '?': print_usage(); @@ -178,15 +210,23 @@ int main(int argc, char **argv) } } - if (!device_name || line == -1) { + if (debounce_period_us) { + attr = config.num_attrs; + config.num_attrs++; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&config.attrs[attr].mask, i); + config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE; + config.attrs[attr].attr.debounce_period_us = debounce_period_us; + } + + if (!device_name || num_lines == 0) { print_usage(); return -1; } - if (!eventflags) { + if (!(config.flags & EDGE_FLAGS)) { printf("No flags specified, listening on both rising and " "falling edges\n"); - eventflags = GPIOEVENT_REQUEST_BOTH_EDGES; + config.flags |= EDGE_FLAGS; } - return monitor_device(device_name, line, handleflags, - eventflags, loops); + return monitor_device(device_name, lines, num_lines, &config, loops); } diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index 9fd926e8cb52..54fdf59dd320 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -22,39 +22,46 @@ #include <linux/gpio.h> #include "gpio-utils.h" -int hammer_device(const char *device_name, unsigned int *lines, int nlines, +int hammer_device(const char *device_name, unsigned int *lines, int num_lines, unsigned int loops) { - struct gpiohandle_data data; + struct gpio_v2_line_values values; + struct gpio_v2_line_config config; char swirr[] = "-\\|/"; int fd; int ret; int i, j; unsigned int iteration = 0; - memset(&data.values, 0, sizeof(data.values)); - ret = gpiotools_request_linehandle(device_name, lines, nlines, - GPIOHANDLE_REQUEST_OUTPUT, &data, - "gpio-hammer"); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_OUTPUT; + + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, "gpio-hammer"); if (ret < 0) goto exit_error; else fd = ret; - ret = gpiotools_get_values(fd, &data); + values.mask = 0; + values.bits = 0; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&values.mask, i); + + ret = gpiotools_get_values(fd, &values); if (ret < 0) goto exit_close_error; fprintf(stdout, "Hammer lines ["); - for (i = 0; i < nlines; i++) { + for (i = 0; i < num_lines; i++) { fprintf(stdout, "%d", lines[i]); - if (i != (nlines - 1)) + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "] on %s, initial states: [", device_name); - for (i = 0; i < nlines; i++) { - fprintf(stdout, "%d", data.values[i]); - if (i != (nlines - 1)) + for (i = 0; i < num_lines; i++) { + fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i)); + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "]\n"); @@ -63,15 +70,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, j = 0; while (1) { /* Invert all lines so we blink */ - for (i = 0; i < nlines; i++) - data.values[i] = !data.values[i]; + for (i = 0; i < num_lines; i++) + gpiotools_change_bit(&values.bits, i); - ret = gpiotools_set_values(fd, &data); + ret = gpiotools_set_values(fd, &values); if (ret < 0) goto exit_close_error; /* Re-read values to get status */ - ret = gpiotools_get_values(fd, &data); + ret = gpiotools_get_values(fd, &values); if (ret < 0) goto exit_close_error; @@ -81,9 +88,10 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, j = 0; fprintf(stdout, "["); - for (i = 0; i < nlines; i++) { - fprintf(stdout, "%d: %d", lines[i], data.values[i]); - if (i != (nlines - 1)) + for (i = 0; i < num_lines; i++) { + fprintf(stdout, "%d: %d", lines[i], + gpiotools_test_bit(values.bits, i)); + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "]\r"); @@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, ret = 0; exit_close_error: - gpiotools_release_linehandle(fd); + gpiotools_release_line(fd); exit_error: return ret; } @@ -121,7 +129,7 @@ int main(int argc, char **argv) const char *device_name = NULL; unsigned int lines[GPIOHANDLES_MAX]; unsigned int loops = 0; - int nlines; + int num_lines; int c; int i; @@ -158,11 +166,11 @@ int main(int argc, char **argv) return -1; } - nlines = i; + num_lines = i; - if (!device_name || !nlines) { + if (!device_name || !num_lines) { print_usage(); return -1; } - return hammer_device(device_name, lines, nlines, loops); + return hammer_device(device_name, lines, num_lines, loops); } diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c index 16a5d9cb9da2..37187e056c8b 100644 --- a/tools/gpio/gpio-utils.c +++ b/tools/gpio/gpio-utils.c @@ -38,7 +38,7 @@ * such as "gpiochip0" * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. + * @num_lines: The number of lines to request. * @flag: The new flag for requsted gpio. Reference * "linux/gpio.h" for the meaning of flag. * @data: Default value will be set to gpio when flag is @@ -56,7 +56,7 @@ * On failure return the errno. */ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int nlines, unsigned int flag, + unsigned int num_lines, unsigned int flag, struct gpiohandle_data *data, const char *consumer_label) { @@ -78,12 +78,12 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, goto exit_free_name; } - for (i = 0; i < nlines; i++) + for (i = 0; i < num_lines; i++) req.lineoffsets[i] = lines[i]; req.flags = flag; strcpy(req.consumer_label, consumer_label); - req.lines = nlines; + req.lines = num_lines; if (flag & GPIOHANDLE_REQUEST_OUTPUT) memcpy(req.default_values, data, sizeof(req.default_values)); @@ -100,20 +100,87 @@ exit_free_name: free(chrdev_name); return ret < 0 ? ret : req.fd; } + +/** + * gpiotools_request_line() - request gpio lines in a gpiochip + * @device_name: The name of gpiochip without prefix "/dev/", + * such as "gpiochip0" + * @lines: An array desired lines, specified by offset + * index for the associated GPIO device. + * @num_lines: The number of lines to request. + * @config: The new config for requested gpio. Reference + * "linux/gpio.h" for config details. + * @consumer: The name of consumer, such as "sysfs", + * "powerkey". This is useful for other users to + * know who is using. + * + * Request gpio lines through the ioctl provided by chardev. User + * could call gpiotools_set_values() and gpiotools_get_values() to + * read and write respectively through the returned fd. Call + * gpiotools_release_line() to release these lines after that. + * + * Return: On success return the fd; + * On failure return the errno. + */ +int gpiotools_request_line(const char *device_name, unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, + const char *consumer) +{ + struct gpio_v2_line_request req; + char *chrdev_name; + int fd; + int i; + int ret; + + ret = asprintf(&chrdev_name, "/dev/%s", device_name); + if (ret < 0) + return -ENOMEM; + + fd = open(chrdev_name, 0); + if (fd == -1) { + ret = -errno; + fprintf(stderr, "Failed to open %s, %s\n", + chrdev_name, strerror(errno)); + goto exit_free_name; + } + + memset(&req, 0, sizeof(req)); + for (i = 0; i < num_lines; i++) + req.offsets[i] = lines[i]; + + req.config = *config; + strcpy(req.consumer, consumer); + req.num_lines = num_lines; + + ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue %s (%d), %s\n", + "GPIO_GET_LINE_IOCTL", ret, strerror(errno)); + } + + if (close(fd) == -1) + perror("Failed to close GPIO character device file"); +exit_free_name: + free(chrdev_name); + return ret < 0 ? ret : req.fd; +} + /** * gpiotools_set_values(): Set the value of gpio(s) * @fd: The fd returned by - * gpiotools_request_linehandle(). - * @data: The array of values want to set. + * gpiotools_request_line(). + * @values: The array of values want to set. * * Return: On success return 0; * On failure return the errno. */ -int gpiotools_set_values(const int fd, struct gpiohandle_data *data) +int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values) { int ret; - ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data); + ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values); if (ret == -1) { ret = -errno; fprintf(stderr, "Failed to issue %s (%d), %s\n", @@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data) /** * gpiotools_get_values(): Get the value of gpio(s) * @fd: The fd returned by - * gpiotools_request_linehandle(). - * @data: The array of values get from hardware. + * gpiotools_request_line(). + * @values: The array of values get from hardware. * * Return: On success return 0; * On failure return the errno. */ -int gpiotools_get_values(const int fd, struct gpiohandle_data *data) +int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values) { int ret; - ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data); + ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values); if (ret == -1) { ret = -errno; fprintf(stderr, "Failed to issue %s (%d), %s\n", @@ -170,6 +237,27 @@ int gpiotools_release_linehandle(const int fd) } /** + * gpiotools_release_line(): Release the line(s) of gpiochip + * @fd: The fd returned by + * gpiotools_request_line(). + * + * Return: On success return 0; + * On failure return the errno. + */ +int gpiotools_release_line(const int fd) +{ + int ret; + + ret = close(fd); + if (ret == -1) { + perror("Failed to close GPIO LINE device file"); + ret = -errno; + } + + return ret; +} + +/** * gpiotools_get(): Get value from specific line * @device_name: The name of gpiochip without prefix "/dev/", * such as "gpiochip0" @@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd) */ int gpiotools_get(const char *device_name, unsigned int line) { - struct gpiohandle_data data; + int ret; + unsigned int value; unsigned int lines[] = {line}; - gpiotools_gets(device_name, lines, 1, &data); - return data.values[0]; + ret = gpiotools_gets(device_name, lines, 1, &value); + if (ret) + return ret; + return value; } @@ -194,28 +285,36 @@ int gpiotools_get(const char *device_name, unsigned int line) * such as "gpiochip0". * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. - * @data: The array of values get from gpiochip. + * @num_lines: The number of lines to request. + * @values: The array of values get from gpiochip. * * Return: On success return 0; * On failure return the errno. */ int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data) + unsigned int num_lines, unsigned int *values) { - int fd; + int fd, i; int ret; int ret_close; + struct gpio_v2_line_config config; + struct gpio_v2_line_values lv; - ret = gpiotools_request_linehandle(device_name, lines, nlines, - GPIOHANDLE_REQUEST_INPUT, data, - CONSUMER); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_INPUT; + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, CONSUMER); if (ret < 0) return ret; fd = ret; - ret = gpiotools_get_values(fd, data); - ret_close = gpiotools_release_linehandle(fd); + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&lv.mask, i); + ret = gpiotools_get_values(fd, &lv); + if (!ret) + for (i = 0; i < num_lines; i++) + values[i] = gpiotools_test_bit(lv.bits, i); + ret_close = gpiotools_release_line(fd); return ret < 0 ? ret : ret_close; } @@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines, int gpiotools_set(const char *device_name, unsigned int line, unsigned int value) { - struct gpiohandle_data data; unsigned int lines[] = {line}; - data.values[0] = value; - return gpiotools_sets(device_name, lines, 1, &data); + return gpiotools_sets(device_name, lines, 1, &value); } /** @@ -245,23 +342,32 @@ int gpiotools_set(const char *device_name, unsigned int line, * such as "gpiochip0". * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. - * @data: The array of values set to gpiochip, must be + * @num_lines: The number of lines to request. + * @value: The array of values set to gpiochip, must be * 0(low) or 1(high). * * Return: On success return 0; * On failure return the errno. */ int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data) + unsigned int num_lines, unsigned int *values) { - int ret; + int ret, i; + struct gpio_v2_line_config config; - ret = gpiotools_request_linehandle(device_name, lines, nlines, - GPIOHANDLE_REQUEST_OUTPUT, data, - CONSUMER); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_OUTPUT; + config.num_attrs = 1; + config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES; + for (i = 0; i < num_lines; i++) { + gpiotools_set_bit(&config.attrs[0].mask, i); + gpiotools_assign_bit(&config.attrs[0].attr.values, + i, values[i]); + } + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, CONSUMER); if (ret < 0) return ret; - return gpiotools_release_linehandle(ret); + return gpiotools_release_line(ret); } diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h index cf37f13f3dcb..6c69a9f1c253 100644 --- a/tools/gpio/gpio-utils.h +++ b/tools/gpio/gpio-utils.h @@ -12,7 +12,9 @@ #ifndef _GPIO_UTILS_H_ #define _GPIO_UTILS_H_ +#include <stdbool.h> #include <string.h> +#include <linux/types.h> #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -23,19 +25,55 @@ static inline int check_prefix(const char *str, const char *prefix) } int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int nlines, unsigned int flag, + unsigned int num_lines, unsigned int flag, struct gpiohandle_data *data, const char *consumer_label); -int gpiotools_set_values(const int fd, struct gpiohandle_data *data); -int gpiotools_get_values(const int fd, struct gpiohandle_data *data); int gpiotools_release_linehandle(const int fd); +int gpiotools_request_line(const char *device_name, + unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, + const char *consumer); +int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values); +int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values); +int gpiotools_release_line(const int fd); + int gpiotools_get(const char *device_name, unsigned int line); int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data); + unsigned int num_lines, unsigned int *values); int gpiotools_set(const char *device_name, unsigned int line, unsigned int value); int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data); + unsigned int num_lines, unsigned int *values); + +/* helper functions for gpio_v2_line_values bits */ +static inline void gpiotools_set_bit(__u64 *b, int n) +{ + *b |= _BITULL(n); +} + +static inline void gpiotools_change_bit(__u64 *b, int n) +{ + *b ^= _BITULL(n); +} + +static inline void gpiotools_clear_bit(__u64 *b, int n) +{ + *b &= ~_BITULL(n); +} + +static inline int gpiotools_test_bit(__u64 b, int n) +{ + return !!(b & _BITULL(n)); +} + +static inline void gpiotools_assign_bit(__u64 *b, int n, bool value) +{ + if (value) + gpiotools_set_bit(b, n); + else + gpiotools_clear_bit(b, n); +} #endif /* _GPIO_UTILS_H_ */ diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c index 5cea24fddfa7..f229ec62301b 100644 --- a/tools/gpio/gpio-watch.c +++ b/tools/gpio/gpio-watch.c @@ -21,8 +21,8 @@ int main(int argc, char **argv) { - struct gpioline_info_changed chg; - struct gpioline_info req; + struct gpio_v2_line_info_changed chg; + struct gpio_v2_line_info req; struct pollfd pfd; int fd, i, j, ret; char *event, *end; @@ -40,11 +40,11 @@ int main(int argc, char **argv) for (i = 0, j = 2; i < argc - 2; i++, j++) { memset(&req, 0, sizeof(req)); - req.line_offset = strtoul(argv[j], &end, 0); + req.offset = strtoul(argv[j], &end, 0); if (*end != '\0') goto err_usage; - ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req); + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req); if (ret) { perror("unable to set up line watch"); return EXIT_FAILURE; @@ -71,13 +71,13 @@ int main(int argc, char **argv) } switch (chg.event_type) { - case GPIOLINE_CHANGED_REQUESTED: + case GPIO_V2_LINE_CHANGED_REQUESTED: event = "requested"; break; - case GPIOLINE_CHANGED_RELEASED: + case GPIO_V2_LINE_CHANGED_RELEASED: event = "released"; break; - case GPIOLINE_CHANGED_CONFIG: + case GPIO_V2_LINE_CHANGED_CONFIG: event = "config changed"; break; default: @@ -87,7 +87,7 @@ int main(int argc, char **argv) } printf("line %u: %s at %llu\n", - chg.info.line_offset, event, chg.timestamp); + chg.info.offset, event, chg.timestamp_ns); } } diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index b08d7a5e779b..5a05a454d0c9 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -25,57 +25,73 @@ struct gpio_flag { char *name; - unsigned long mask; + unsigned long long mask; }; struct gpio_flag flagnames[] = { { - .name = "kernel", - .mask = GPIOLINE_FLAG_KERNEL, + .name = "used", + .mask = GPIO_V2_LINE_FLAG_USED, + }, + { + .name = "input", + .mask = GPIO_V2_LINE_FLAG_INPUT, }, { .name = "output", - .mask = GPIOLINE_FLAG_IS_OUT, + .mask = GPIO_V2_LINE_FLAG_OUTPUT, }, { .name = "active-low", - .mask = GPIOLINE_FLAG_ACTIVE_LOW, + .mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW, }, { .name = "open-drain", - .mask = GPIOLINE_FLAG_OPEN_DRAIN, + .mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN, }, { .name = "open-source", - .mask = GPIOLINE_FLAG_OPEN_SOURCE, + .mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE, }, { .name = "pull-up", - .mask = GPIOLINE_FLAG_BIAS_PULL_UP, + .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP, }, { .name = "pull-down", - .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN, + .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN, }, { .name = "bias-disabled", - .mask = GPIOLINE_FLAG_BIAS_DISABLE, + .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED, }, }; -void print_flags(unsigned long flags) +static void print_attributes(struct gpio_v2_line_info *info) { int i; - int printed = 0; + const char *field_format = "%s"; for (i = 0; i < ARRAY_SIZE(flagnames); i++) { - if (flags & flagnames[i].mask) { - if (printed) - fprintf(stdout, " "); - fprintf(stdout, "%s", flagnames[i].name); - printed++; + if (info->flags & flagnames[i].mask) { + fprintf(stdout, field_format, flagnames[i].name); + field_format = ", %s"; } } + + if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) && + (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)) + fprintf(stdout, field_format, "both-edges"); + else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) + fprintf(stdout, field_format, "rising-edge"); + else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING) + fprintf(stdout, field_format, "falling-edge"); + + for (i = 0; i < info->num_attrs; i++) { + if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE) + fprintf(stdout, ", debounce_period=%dusec", + info->attrs[0].debounce_period_us); + } } int list_device(const char *device_name) @@ -109,18 +125,18 @@ int list_device(const char *device_name) /* Loop over the lines and print info */ for (i = 0; i < cinfo.lines; i++) { - struct gpioline_info linfo; + struct gpio_v2_line_info linfo; memset(&linfo, 0, sizeof(linfo)); - linfo.line_offset = i; + linfo.offset = i; - ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo); + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo); if (ret == -1) { ret = -errno; perror("Failed to issue LINEINFO IOCTL\n"); goto exit_close_error; } - fprintf(stdout, "\tline %2d:", linfo.line_offset); + fprintf(stdout, "\tline %2d:", linfo.offset); if (linfo.name[0]) fprintf(stdout, " \"%s\"", linfo.name); else @@ -131,7 +147,7 @@ int list_device(const char *device_name) fprintf(stdout, " unused"); if (linfo.flags) { fprintf(stdout, " ["); - print_flags(linfo.flags); + print_attributes(&linfo); fprintf(stdout, "]"); } fprintf(stdout, "\n"); diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index f115d166c985..bb03859db89d 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -119,6 +119,7 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_PM2P5] = "pm2p5", [IIO_MOD_PM4] = "pm4", [IIO_MOD_PM10] = "pm10", + [IIO_MOD_O2] = "o2", }; static bool event_is_known(struct iio_event_data *event) @@ -211,6 +212,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_MOD_PM2P5: case IIO_MOD_PM4: case IIO_MOD_PM10: + case IIO_MOD_O2: break; default: return false; diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h index 348c6f47e4cc..af8d0fe1c6ce 100644 --- a/tools/include/linux/jhash.h +++ b/tools/include/linux/jhash.h @@ -5,7 +5,7 @@ * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * - * http://burtleburtle.net/bob/hash/ + * https://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h new file mode 100644 index 000000000000..ab82c793c897 --- /dev/null +++ b/tools/include/linux/objtool.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_OBJTOOL_H +#define _LINUX_OBJTOOL_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +/* + * This struct is used by asm and inline asm code to manually annotate the + * location of registers on the stack. + */ +struct unwind_hint { + u32 ip; + s16 sp_offset; + u8 sp_reg; + u8 type; + u8 end; +}; +#endif + +/* + * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP + * (the caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset + * points to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that + * sp_reg+sp_offset points to the iret return frame. + */ +#define UNWIND_HINT_TYPE_CALL 0 +#define UNWIND_HINT_TYPE_REGS 1 +#define UNWIND_HINT_TYPE_REGS_PARTIAL 2 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 + +#ifdef CONFIG_STACK_VALIDATION + +#ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "987: \n\t" \ + ".pushsection .discard.unwind_hints\n\t" \ + /* struct unwind_hint */ \ + ".long 987b - .\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ + ".byte " __stringify(sp_reg) "\n\t" \ + ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(end) "\n\t" \ + ".balign 4 \n\t" \ + ".popsection\n\t" + +/* + * This macro marks the given function's stack frame as "non-standard", which + * tells objtool to ignore the function when doing stack metadata validation. + * It should only be used in special cases where you're 100% sure it won't + * affect the reliability of frame pointers and kernel stack traces. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ +#define STACK_FRAME_NON_STANDARD(func) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ + *__func_stack_frame_non_standard_##func = func + +#else /* __ASSEMBLY__ */ + +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL \ + 999: \ + .pushsection .discard.intra_function_calls; \ + .long 999b; \ + .popsection; + +/* + * In asm, there are two kinds of code: normal C-type callable functions and + * the rest. The normal callable functions can be called by other code, and + * don't do anything unusual with the stack. Such normal callable functions + * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * category. In this case, no special debugging annotations are needed because + * objtool can automatically generate the ORC data for the ORC unwinder to read + * at runtime. + * + * Anything which doesn't fall into the above category, such as syscall and + * interrupt handlers, tends to not be called directly by other functions, and + * often does unusual non-C-function-type things with the stack pointer. Such + * code needs to be annotated such that objtool can understand it. The + * following CFI hint macros are for this type of code. + * + * These macros provide hints to objtool about the state of the stack at each + * instruction. Objtool starts from the hints and follows the code flow, + * making automatic CFI adjustments when it sees pushes and pops, filling out + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ + .long .Lunwind_hint_ip_\@ - . + .short \sp_offset + .byte \sp_reg + .byte \type + .byte \end + .balign 4 + .popsection +.endm + +#endif /* __ASSEMBLY__ */ + +#else /* !CONFIG_STACK_VALIDATION */ + +#ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "\n\t" +#define STACK_FRAME_NON_STANDARD(func) +#else +#define ANNOTATE_INTRA_FUNCTION_CALL +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.endm +#endif + +#endif /* CONFIG_STACK_VALIDATION */ + +#endif /* _LINUX_OBJTOOL_H */ diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h new file mode 100644 index 000000000000..89135bb35bf7 --- /dev/null +++ b/tools/include/linux/static_call_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _STATIC_CALL_TYPES_H +#define _STATIC_CALL_TYPES_H + +#include <linux/types.h> +#include <linux/stringify.h> + +#define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) +#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) + +#define STATIC_CALL_TRAMP_PREFIX __SCT__ +#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) +#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) +#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) + +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + +#endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index c8c189a5f0a6..f2b5d72a46c2 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat) #define __NR_umount2 39 __SYSCALL(__NR_umount2, sys_umount) #define __NR_mount 40 -__SC_COMP(__NR_mount, sys_mount, compat_sys_mount) +__SYSCALL(__NR_mount, sys_mount) #define __NR_pivot_root 41 __SYSCALL(__NR_pivot_root, sys_pivot_root) @@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read) #define __NR_write 64 __SYSCALL(__NR_write, sys_write) #define __NR_readv 65 -__SC_COMP(__NR_readv, sys_readv, compat_sys_readv) +__SC_COMP(__NR_readv, sys_readv, sys_readv) #define __NR_writev 66 -__SC_COMP(__NR_writev, sys_writev, compat_sys_writev) +__SC_COMP(__NR_writev, sys_writev, sys_writev) #define __NR_pread64 67 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64) #define __NR_pwrite64 68 @@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4) /* fs/splice.c */ #define __NR_vmsplice 75 -__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice) +__SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_splice 76 __SYSCALL(__NR_splice, sys_splice) #define __NR_tee 77 @@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns) #define __NR_sendmmsg 269 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg) #define __NR_process_vm_readv 270 -__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \ - compat_sys_process_vm_readv) +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) #define __NR_process_vm_writev 271 -__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ - compat_sys_process_vm_writev) +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #define __NR_kcmp 272 __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 @@ -850,6 +848,8 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open) #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) #endif +#define __NR_close_range 436 +__SYSCALL(__NR_close_range, sys_close_range) #define __NR_openat2 437 __SYSCALL(__NR_openat2, sys_openat2) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 14b67cd6b54b..00546062e023 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -55,7 +55,7 @@ extern "C" { * cause the related events to not be seen. * * I915_RESET_UEVENT - Event is generated just before an attempt to reset the - * the GPU. The value supplied with the event is always 1. NOTE: Disable + * GPU. The value supplied with the event is always 1. NOTE: Disable * reset via module parameter will cause this event to not be seen. */ #define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" @@ -1934,7 +1934,7 @@ enum drm_i915_perf_property_id { /** * The value specifies which set of OA unit metrics should be - * be configured, defining the contents of any OA unit reports. + * configured, defining the contents of any OA unit reports. * * This property is available in perf revision 1. */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b134e679e9db..b6238b2209b7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ @@ -766,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1032,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1146,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 8533bf07450f..7d6687618d80 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -123,6 +123,7 @@ struct in_addr { #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 #define IP_RECVFRAGSIZE 25 +#define IP_RECVERR_RFC4884 26 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ @@ -134,7 +135,7 @@ struct in_addr { * this socket to prevent accepting spoofed ones. */ #define IP_PMTUDISC_INTERFACE 4 -/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get +/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get * fragmented if they exeed the interface mtu */ #define IP_PMTUDISC_OMIT 5 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 4fdf30316582..7d8eced6f459 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -289,6 +289,7 @@ struct kvm_run { /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { @@ -789,9 +790,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 -/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ -#define KVM_VM_MIPS_TE 0 +/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */ +#define KVM_VM_MIPS_AUTO 0 #define KVM_VM_MIPS_VZ 1 +#define KVM_VM_MIPS_TE 2 #define KVM_S390_SIE_PAGE_OFFSET 1 @@ -1031,6 +1033,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_LAST_CPU 184 +#define KVM_CAP_SMALLER_MAXPHYADDR 185 +#define KVM_CAP_S390_DIAG318 186 +#define KVM_CAP_STEAL_TIME 187 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 21a1edd08cbe..3e5dcdd48a49 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -383,7 +383,8 @@ struct perf_event_attr { bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ - __reserved_1 : 31; + text_poke : 1, /* include text poke events */ + __reserved_1 : 30; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1041,12 +1042,35 @@ enum perf_event_type { */ PERF_RECORD_CGROUP = 19, + /* + * Records changes to kernel text i.e. self-modified code. 'old_len' is + * the number of old bytes, 'new_len' is the number of new bytes. Either + * 'old_len' or 'new_len' may be zero to indicate, for example, the + * addition or removal of a trampoline. 'bytes' contains the old bytes + * followed immediately by the new bytes. + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u16 old_len; + * u16 new_len; + * u8 bytes[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_TEXT_POKE = 20, + PERF_RECORD_MAX, /* non-ABI */ }; enum perf_record_ksymbol_type { PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + /* + * Out of line code such as kprobe-replaced instructions or optimized + * kprobes or ftrace trampolines. + */ + PERF_RECORD_KSYMBOL_TYPE_OOL = 2, PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ }; @@ -1172,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 37 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 0c2349612e77..75232185324a 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -91,6 +91,8 @@ /* Use message type V2 */ #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 +/* IOTLB can accept batching hints */ +#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c index 0f257139b003..7703f0118385 100644 --- a/tools/io_uring/io_uring-bench.c +++ b/tools/io_uring/io_uring-bench.c @@ -130,7 +130,7 @@ static int io_uring_register_files(struct submitter *s) s->nr_files); } -static int gettid(void) +static int lk_gettid(void) { return syscall(__NR_gettid); } @@ -281,7 +281,7 @@ static void *submitter_fn(void *data) struct io_sq_ring *ring = &s->sq_ring; int ret, prepped; - printf("submitter=%d\n", gettid()); + printf("submitter=%d\n", lk_gettid()); srand48_r(pthread_self(), &s->rand); diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c index 58d44d5eee31..5e6cb9debe37 100644 --- a/tools/lib/api/fd/array.c +++ b/tools/lib/api/fd/array.c @@ -8,6 +8,7 @@ #include <poll.h> #include <stdlib.h> #include <unistd.h> +#include <string.h> void fdarray__init(struct fdarray *fda, int nr_autogrow) { @@ -19,7 +20,7 @@ void fdarray__init(struct fdarray *fda, int nr_autogrow) int fdarray__grow(struct fdarray *fda, int nr) { - void *priv; + struct priv *priv; int nr_alloc = fda->nr_alloc + nr; size_t psize = sizeof(fda->priv[0]) * nr_alloc; size_t size = sizeof(struct pollfd) * nr_alloc; @@ -34,6 +35,9 @@ int fdarray__grow(struct fdarray *fda, int nr) return -ENOMEM; } + memset(&entries[fda->nr_alloc], 0, sizeof(struct pollfd) * nr); + memset(&priv[fda->nr_alloc], 0, sizeof(fda->priv[0]) * nr); + fda->nr_alloc = nr_alloc; fda->entries = entries; fda->priv = priv; @@ -69,7 +73,7 @@ void fdarray__delete(struct fdarray *fda) free(fda); } -int fdarray__add(struct fdarray *fda, int fd, short revents) +int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags) { int pos = fda->nr; @@ -79,6 +83,7 @@ int fdarray__add(struct fdarray *fda, int fd, short revents) fda->entries[fda->nr].fd = fd; fda->entries[fda->nr].events = revents; + fda->priv[fda->nr].flags = flags; fda->nr++; return pos; } @@ -93,22 +98,22 @@ int fdarray__filter(struct fdarray *fda, short revents, return 0; for (fd = 0; fd < fda->nr; ++fd) { + if (!fda->entries[fd].events) + continue; + if (fda->entries[fd].revents & revents) { if (entry_destructor) entry_destructor(fda, fd, arg); + fda->entries[fd].revents = fda->entries[fd].events = 0; continue; } - if (fd != nr) { - fda->entries[nr] = fda->entries[fd]; - fda->priv[nr] = fda->priv[fd]; - } - - ++nr; + if (!(fda->priv[fd].flags & fdarray_flag__nonfilterable)) + ++nr; } - return fda->nr = nr; + return nr; } int fdarray__poll(struct fdarray *fda, int timeout) diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index b39557d1a88f..7fcf21a33c0c 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -21,19 +21,27 @@ struct fdarray { int nr_alloc; int nr_autogrow; struct pollfd *entries; - union { - int idx; - void *ptr; + struct priv { + union { + int idx; + void *ptr; + }; + unsigned int flags; } *priv; }; +enum fdarray_flags { + fdarray_flag__default = 0x00000000, + fdarray_flag__nonfilterable = 0x00000001 +}; + void fdarray__init(struct fdarray *fda, int nr_autogrow); void fdarray__exit(struct fdarray *fda); struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow); void fdarray__delete(struct fdarray *fda); -int fdarray__add(struct fdarray *fda, int fd, short revents); +int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags); int fdarray__poll(struct fdarray *fda, int timeout); int fdarray__filter(struct fdarray *fda, short revents, void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index bf8ed134cb8a..9ae8f4ef0aac 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -59,7 +59,7 @@ FEATURE_USER = .libbpf FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray FEATURE_DISPLAY = libelf zlib bpf -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) check_feat := 1 @@ -152,6 +152,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -219,6 +220,7 @@ check_abi: $(OUTPUT)libbpf.so awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index eab14c97c15d..0750681057c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; attr.link_create.flags = OPTS_GET(opts, flags, 0); + attr.link_create.iter_info = + ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); + attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 28855fd5b5f4..015d13f25fcc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; + union bpf_iter_link_info *iter_info; + __u32 iter_info_len; }; -#define bpf_link_create_opts__last_field flags +#define bpf_link_create_opts__last_field iter_info_len LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index bc14db706b88..e9a4ecddb7a5 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -40,7 +40,7 @@ * Helper macro to manipulate data structures */ #ifndef offsetof -#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) +#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER) #endif #ifndef container_of #define container_of(ptr, type, member) \ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 856b09a04563..6bdbc389b493 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -41,6 +41,7 @@ struct btf { __u32 types_size; __u32 data_size; int fd; + int ptr_sz; }; static inline __u64 ptr_to_u64(const void *ptr) @@ -221,6 +222,70 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) return btf->types[type_id]; } +static int determine_ptr_size(const struct btf *btf) +{ + const struct btf_type *t; + const char *name; + int i; + + for (i = 1; i <= btf->nr_types; i++) { + t = btf__type_by_id(btf, i); + if (!btf_is_int(t)) + continue; + + name = btf__name_by_offset(btf, t->name_off); + if (!name) + continue; + + if (strcmp(name, "long int") == 0 || + strcmp(name, "long unsigned int") == 0) { + if (t->size != 4 && t->size != 8) + continue; + return t->size; + } + } + + return -1; +} + +static size_t btf_ptr_sz(const struct btf *btf) +{ + if (!btf->ptr_sz) + ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf); + return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz; +} + +/* Return pointer size this BTF instance assumes. The size is heuristically + * determined by looking for 'long' or 'unsigned long' integer type and + * recording its size in bytes. If BTF type information doesn't have any such + * type, this function returns 0. In the latter case, native architecture's + * pointer size is assumed, so will be either 4 or 8, depending on + * architecture that libbpf was compiled for. It's possible to override + * guessed value by using btf__set_pointer_size() API. + */ +size_t btf__pointer_size(const struct btf *btf) +{ + if (!btf->ptr_sz) + ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf); + + if (btf->ptr_sz < 0) + /* not enough BTF type info to guess */ + return 0; + + return btf->ptr_sz; +} + +/* Override or set pointer size in bytes. Only values of 4 and 8 are + * supported. + */ +int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) +{ + if (ptr_sz != 4 && ptr_sz != 8) + return -EINVAL; + btf->ptr_sz = ptr_sz; + return 0; +} + static bool btf_type_is_void(const struct btf_type *t) { return t == &btf_void || btf_is_fwd(t); @@ -253,7 +318,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) size = t->size; goto done; case BTF_KIND_PTR: - size = sizeof(void *); + size = btf_ptr_sz(btf); goto done; case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: @@ -293,9 +358,9 @@ int btf__align_of(const struct btf *btf, __u32 id) switch (kind) { case BTF_KIND_INT: case BTF_KIND_ENUM: - return min(sizeof(void *), (size_t)t->size); + return min(btf_ptr_sz(btf), (size_t)t->size); case BTF_KIND_PTR: - return sizeof(void *); + return btf_ptr_sz(btf); case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: @@ -533,6 +598,18 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) if (IS_ERR(btf)) goto done; + switch (gelf_getclass(elf)) { + case ELFCLASS32: + btf__set_pointer_size(btf, 4); + break; + case ELFCLASS64: + btf__set_pointer_size(btf, 8); + break; + default: + pr_warn("failed to get ELF class (bitness) for %s\n", path); + break; + } + if (btf_ext && btf_ext_data) { *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); @@ -564,8 +641,8 @@ done: struct btf *btf__parse_raw(const char *path) { + struct btf *btf = NULL; void *data = NULL; - struct btf *btf; FILE *f = NULL; __u16 magic; int err = 0; @@ -582,6 +659,12 @@ struct btf *btf__parse_raw(const char *path) err = -EIO; goto err_out; } + if (magic == __bswap_16(BTF_MAGIC)) { + /* non-native endian raw BTF */ + pr_warn("non-native BTF endianness is not supported\n"); + err = -LIBBPF_ERRNO__ENDIAN; + goto err_out; + } if (magic != BTF_MAGIC) { /* definitely not a raw BTF */ err = -EPROTO; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index f4a1a1d2b9a3..1ca14448df4c 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -76,6 +76,8 @@ LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); +LIBBPF_API size_t btf__pointer_size(const struct btf *btf); +LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index cf711168d34a..57c00fa63932 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -13,6 +13,7 @@ #include <errno.h> #include <linux/err.h> #include <linux/btf.h> +#include <linux/kernel.h> #include "btf.h" #include "hashmap.h" #include "libbpf.h" @@ -60,6 +61,7 @@ struct btf_dump { const struct btf_ext *btf_ext; btf_dump_printf_fn_t printf_fn; struct btf_dump_opts opts; + int ptr_sz; bool strip_mods; /* per-type auxiliary state */ @@ -138,6 +140,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf, d->btf_ext = btf_ext; d->printf_fn = printf_fn; d->opts.ctx = opts ? opts->ctx : NULL; + d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *); d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); if (IS_ERR(d->type_names)) { @@ -549,6 +552,9 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) } } +static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, + const struct btf_type *t); + static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, const struct btf_type *t); static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id, @@ -671,6 +677,9 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) switch (kind) { case BTF_KIND_INT: + /* Emit type alias definitions if necessary */ + btf_dump_emit_missing_aliases(d, id, t); + tstate->emit_state = EMITTED; break; case BTF_KIND_ENUM: @@ -797,7 +806,7 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d, int align, int lvl) { int off_diff = m_off - cur_off; - int ptr_bits = sizeof(void *) * 8; + int ptr_bits = d->ptr_sz * 8; if (off_diff <= 0) /* no gap */ @@ -870,7 +879,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, btf_dump_printf(d, ": %d", m_sz); off = m_off + m_sz; } else { - m_sz = max(0, btf__resolve_size(d->btf, m->type)); + m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); off = m_off + m_sz * 8; } btf_dump_printf(d, ";"); @@ -890,6 +899,32 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, btf_dump_printf(d, " __attribute__((packed))"); } +static const char *missing_base_types[][2] = { + /* + * GCC emits typedefs to its internal __PolyX_t types when compiling Arm + * SIMD intrinsics. Alias them to standard base types. + */ + { "__Poly8_t", "unsigned char" }, + { "__Poly16_t", "unsigned short" }, + { "__Poly64_t", "unsigned long long" }, + { "__Poly128_t", "unsigned __int128" }, +}; + +static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, + const struct btf_type *t) +{ + const char *name = btf_dump_type_name(d, id); + int i; + + for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) { + if (strcmp(name, missing_base_types[i][0]) == 0) { + btf_dump_printf(d, "typedef %s %s;\n\n", + missing_base_types[i][1], name); + break; + } + } +} + static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, const struct btf_type *t) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7be04e45d29c..e493d6048143 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2264,7 +2264,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, data = elf_getdata(scn, NULL); if (!scn || !data) { pr_warn("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); + obj->efile.btf_maps_shndx, MAPS_ELF_SEC); return -EINVAL; } @@ -2434,6 +2434,8 @@ static int bpf_object__init_btf(struct bpf_object *obj, BTF_ELF_SEC, err); goto out; } + /* enforce 8-byte pointers for BPF-targeted BTFs */ + btf__set_pointer_size(obj->btf, 8); err = 0; } if (btf_ext_data) { @@ -2542,6 +2544,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (IS_ERR(kern_btf)) return PTR_ERR(kern_btf); + /* enforce 8-byte pointers for BPF-targeted BTFs */ + btf__set_pointer_size(obj->btf, 8); bpf_object__sanitize_btf(obj, kern_btf); } @@ -3478,10 +3482,11 @@ bpf_object__probe_global_data(struct bpf_object *obj) map = bpf_create_map_xattr(&map_attr); if (map < 0) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, errno); - return -errno; + __func__, cp, -ret); + return ret; } insns[0].imm = map; @@ -5194,11 +5199,12 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, static int bpf_object__collect_map_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data) { - int i, j, nrels, new_sz, ptr_sz = sizeof(void *); + const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); + int i, j, nrels, new_sz; const struct btf_var_secinfo *vi = NULL; const struct btf_type *sec, *var, *def; + struct bpf_map *map = NULL, *targ_map; const struct btf_member *member; - struct bpf_map *map, *targ_map; const char *name, *mname; Elf_Data *symbols; unsigned int moff; @@ -5243,7 +5249,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, vi = btf_var_secinfos(sec) + map->btf_var_idx; if (vi->offset <= rel.r_offset && - rel.r_offset + sizeof(void *) <= vi->offset + vi->size) + rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size) break; } if (j == obj->nr_maps) { @@ -5279,17 +5285,20 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -EINVAL; moff = rel.r_offset - vi->offset - moff; - if (moff % ptr_sz) + /* here we use BPF pointer size, which is always 64 bit, as we + * are parsing ELF that was built for BPF target + */ + if (moff % bpf_ptr_sz) return -EINVAL; - moff /= ptr_sz; + moff /= bpf_ptr_sz; if (moff >= map->init_slots_sz) { new_sz = moff + 1; - tmp = realloc(map->init_slots, new_sz * ptr_sz); + tmp = realloc(map->init_slots, new_sz * host_ptr_sz); if (!tmp) return -ENOMEM; map->init_slots = tmp; memset(map->init_slots + map->init_slots_sz, 0, - (new_sz - map->init_slots_sz) * ptr_sz); + (new_sz - map->init_slots_sz) * host_ptr_sz); map->init_slots_sz = new_sz; } map->init_slots[moff] = targ_map; @@ -6012,9 +6021,10 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, } if (bpf_obj_pin(prog->instances.fds[instance], path)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("failed to pin program: %s\n", cp); - return -errno; + return err; } pr_debug("pinned program '%s'\n", path); @@ -6915,7 +6925,7 @@ static const struct bpf_sec_def section_defs[] = { BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, BPF_XDP_CPUMAP), - BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP, + BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, BPF_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -8306,10 +8316,8 @@ bpf_program__attach_iter(struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); - if (OPTS_HAS(opts, map_fd)) { - target_fd = opts->map_fd; - link_create_opts.flags = BPF_ITER_LINK_MAP_FD; - } + link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); + link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3ed1399bfbbc..5ecb4069a9f0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 map_fd; + union bpf_iter_link_info *link_info; + __u32 link_info_len; }; -#define bpf_iter_attach_opts__last_field map_fd +#define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0c4722bfdd0a..e35bd6cdbdbf 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -295,5 +295,7 @@ LIBBPF_0.1.0 { bpf_program__set_sk_lookup; btf__parse; btf__parse_raw; + btf__pointer_size; btf__set_fd; + btf__set_pointer_size; } LIBBPF_0.0.9; diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt index cae9757f49c1..8b75efcd67ce 100644 --- a/tools/lib/perf/Documentation/libperf-counting.txt +++ b/tools/lib/perf/Documentation/libperf-counting.txt @@ -7,13 +7,13 @@ libperf-counting - counting interface DESCRIPTION ----------- -The counting interface provides API to meassure and get count for specific perf events. +The counting interface provides API to measure and get count for specific perf events. The following test tries to explain count on `counting.c` example. It is by no means complete guide to counting, but shows libperf basic API for counting. -The `counting.c` comes with libbperf package and can be compiled and run like: +The `counting.c` comes with libperf package and can be compiled and run like: [source,bash] -- @@ -26,7 +26,8 @@ count 176242, enabled 176242, run 176242 It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event, which is available only for root. -The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it: +The `counting.c` example monitors two events on the current process and displays +their count, in a nutshell it: * creates events * adds them to the event list @@ -152,7 +153,7 @@ Configure event list with the thread map and open events: -- Both events are created as disabled (note the `disabled = 1` assignment above), -so we need to enable the whole list explicitely (both events). +so we need to enable the whole list explicitly (both events). From this moment events are counting and we can do our workload. @@ -167,7 +168,8 @@ When we are done we disable the events list. 79 perf_evlist__disable(evlist); -- -Now we need to get the counts from events, following code iterates throught the events list and read counts: +Now we need to get the counts from events, following code iterates through the +events list and read counts: [source,c] -- @@ -178,7 +180,7 @@ Now we need to get the counts from events, following code iterates throught the 85 } -- -And finaly cleanup. +And finally cleanup. We close the whole events list (both events) and remove it together with the threads map: diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt index d71a7b4fcf5f..d6ca24f6ef78 100644 --- a/tools/lib/perf/Documentation/libperf-sampling.txt +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -8,13 +8,13 @@ libperf-sampling - sampling interface DESCRIPTION ----------- -The sampling interface provides API to meassure and get count for specific perf events. +The sampling interface provides API to measure and get count for specific perf events. The following test tries to explain count on `sampling.c` example. It is by no means complete guide to sampling, but shows libperf basic API for sampling. -The `sampling.c` comes with libbperf package and can be compiled and run like: +The `sampling.c` comes with libperf package and can be compiled and run like: [source,bash] -- @@ -33,7 +33,8 @@ cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 176 It requires root access, because it uses hardware cycles event. -The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it: +The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a +nutshell it: - creates events - adds them to the event list @@ -90,7 +91,7 @@ Once the setup is complete we start by defining cycles event using the `struct p 36 }; -- -Next step is to prepare cpus map. +Next step is to prepare CPUs map. In this case we will monitor all the available CPUs: @@ -152,7 +153,7 @@ Once the events list is open, we can create memory maps AKA perf ring buffers: -- The event is created as disabled (note the `disabled = 1` assignment above), -so we need to enable the events list explicitely. +so we need to enable the events list explicitly. From this moment the cycles event is sampling. @@ -212,7 +213,7 @@ Each sample needs to get parsed: 106 cpu, pid, tid, ip, period); -- -And finaly cleanup. +And finally cleanup. We close the whole events list (both events) and remove it together with the threads map: diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 5a6bb512789d..0c74c30ed23a 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -29,7 +29,7 @@ SYNOPSIS void libperf_init(libperf_print_fn_t fn); -- -*API to handle cpu maps:* +*API to handle CPU maps:* [source,c] -- @@ -217,7 +217,7 @@ Following objects are key to the libperf interface: [horizontal] -struct perf_cpu_map:: Provides a cpu list abstraction. +struct perf_cpu_map:: Provides a CPU list abstraction. struct perf_thread_map:: Provides a thread list abstraction. diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 6a875a0f01bb..2208444ecb44 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -305,9 +305,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) } int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent) + void *ptr, short revent, enum fdarray_flags flags) { - int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); + int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags); if (pos >= 0) { evlist->pollfd.priv[pos].ptr = ptr; @@ -488,7 +488,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, revent = !overwrite ? POLLIN : 0; if (!evsel->system_wide && - perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { + perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) { perf_mmap__put(map); return -1; } diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 74dc8c3f0b66..2d0fa02b036f 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -45,7 +45,7 @@ struct perf_evlist_mmap_ops { int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent); + void *ptr, short revent, enum fdarray_flags flags); int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 69b44d2cc0f5..842028858d66 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -111,6 +111,14 @@ struct perf_record_cgroup { char path[PATH_MAX]; }; +struct perf_record_text_poke_event { + struct perf_event_header header; + __u64 addr; + __u16 old_len; + __u16 new_len; + __u8 bytes[]; +}; + struct perf_record_sample { struct perf_event_header header; __u64 array[]; @@ -367,6 +375,7 @@ union perf_event { struct perf_record_sample sample; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; + struct perf_record_text_poke_event text_poke; struct perf_record_header_attr attr; struct perf_record_event_update event_update; struct perf_record_header_event_type event_type; diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 06ac7bd2144b..727396de6be5 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -13,7 +13,7 @@ #include <linux/export.h> /* - * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree * * 1) A node is either red or black * 2) The root is black diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index 2859f107abc8..bf02d62a3b2b 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -65,12 +65,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) ci = cj = ei = 0; while (ci < cmds->cnt && ei < excludes->cnt) { cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); - if (cmp < 0) + if (cmp < 0) { cmds->names[cj++] = cmds->names[ci++]; - else if (cmp == 0) - ci++, ei++; - else if (cmp > 0) + } else if (cmp == 0) { + ci++; ei++; + } else if (cmp > 0) { + ei++; + } } while (ci < cmds->cnt) diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt index 596032ade31f..4d6394397d92 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt @@ -3,7 +3,7 @@ libtraceevent(3) NAME ---- -tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins. +tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins. SYNOPSIS -------- @@ -13,6 +13,12 @@ SYNOPSIS struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); +void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_, + void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep, + const char pass:[*]path, + const char pass:[*]name, + void pass:[*]data), + void pass:[*]_data_); -- DESCRIPTION @@ -22,11 +28,13 @@ directories. The _tep_ argument is trace event parser context. The plugin directories are : [verse] -- + - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST - System's plugin directory, defined at the library compile time. It depends on the library installation prefix and usually is _(install_preffix)/lib/traceevent/plugins_ - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - User's plugin directory, located at _~/.local/lib/traceevent/plugins_ + - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST -- Loading of plugins can be controlled by the _tep_flags_, using the _tep_set_flag()_ API: @@ -44,6 +52,12 @@ _tep_load_plugins()_. The _tep_ argument is trace event parser context. The _plugin_list_ is the list of loaded plugins, returned by the _tep_load_plugins()_ function. +The _tep_load_plugins_hook_ function walks through all directories with plugins +and calls user specified _load_plugin()_ hook for each plugin file. Only files +with given _suffix_ are considered to be plugins. The _data_ is a user specified +context, passed to _load_plugin()_. Directories and the walk order are the same +as in _tep_load_plugins()_ API. + RETURN VALUE ------------ The _tep_load_plugins()_ function returns a list of successfully loaded plugins, @@ -63,6 +77,15 @@ if (plugins == NULL) { } ... tep_unload_plugins(plugins, tep); +... +void print_plugin(struct tep_handle *tep, const char *path, + const char *name, void *data) +{ + pritnf("Found libtraceevent plugin %s/%s\n", path, name); +} +... +tep_load_plugins_hook(tep, ".so", print_plugin, NULL); +... -- FILES diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h index cee469803a34..d805a920af6f 100644 --- a/tools/lib/traceevent/event-parse-local.h +++ b/tools/lib/traceevent/event-parse-local.h @@ -13,6 +13,7 @@ struct func_map; struct func_list; struct event_handler; struct func_resolver; +struct tep_plugins_dir; struct tep_handle { int ref_count; @@ -47,7 +48,6 @@ struct tep_handle { struct printk_list *printklist; unsigned int printk_count; - struct tep_event **events; int nr_events; struct tep_event **sort_events; @@ -81,10 +81,30 @@ struct tep_handle { /* cache */ struct tep_event *last_event; + + struct tep_plugins_dir *plugins_dir; +}; + +enum tep_print_parse_type { + PRINT_FMT_STRING, + PRINT_FMT_ARG_DIGIT, + PRINT_FMT_ARG_POINTER, + PRINT_FMT_ARG_STRING, +}; + +struct tep_print_parse { + struct tep_print_parse *next; + + char *format; + int ls; + enum tep_print_parse_type type; + struct tep_print_arg *arg; + struct tep_print_arg *len_as_arg; }; void tep_free_event(struct tep_event *event); void tep_free_format_field(struct tep_format_field *field); +void tep_free_plugin_paths(struct tep_handle *tep); unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data); unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data); diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ba4f33804af1..5acc18b32606 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4565,43 +4565,93 @@ get_bprint_format(void *data, int size __maybe_unused, return format; } -static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) +static int print_mac_arg(struct trace_seq *s, const char *format, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg) { - unsigned char *buf; const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; + bool reverse = false; + unsigned char *buf; + int ret = 0; if (arg->type == TEP_PRINT_FUNC) { process_defined_func(s, data, size, event, arg); - return; + return 0; } if (arg->type != TEP_PRINT_FIELD) { trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return; + return 0; } - if (mac == 'm') + if (format[0] == 'm') { fmt = "%.2x%.2x%.2x%.2x%.2x%.2x"; + } else if (format[0] == 'M' && format[1] == 'F') { + fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x"; + ret++; + } + if (format[1] == 'R') { + reverse = true; + ret++; + } + if (!arg->field.field) { arg->field.field = tep_find_any_field(event, arg->field.name); if (!arg->field.field) { do_warning_event(event, "%s: field %s not found", __func__, arg->field.name); - return; + return ret; } } if (arg->field.field->size != 6) { trace_seq_printf(s, "INVALIDMAC"); - return; + return ret; } + buf = data + arg->field.field->offset; - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); + if (reverse) + trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]); + else + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); + + return ret; } -static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) +static int parse_ip4_print_args(struct tep_handle *tep, + const char *ptr, bool *reverse) +{ + int ret = 0; + + *reverse = false; + + /* hnbl */ + switch (*ptr) { + case 'h': + if (tep->file_bigendian) + *reverse = false; + else + *reverse = true; + ret++; + break; + case 'l': + *reverse = true; + ret++; + break; + case 'n': + case 'b': + ret++; + /* fall through */ + default: + *reverse = false; + break; + } + + return ret; +} + +static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf) { const char *fmt; @@ -4610,7 +4660,11 @@ static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) else fmt = "%d.%d.%d.%d"; - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); + if (reverse) + trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]); + else + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); + } static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) @@ -4693,7 +4747,7 @@ static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) if (useIPv4) { if (needcolon) trace_seq_printf(s, ":"); - print_ip4_addr(s, 'I', &in6.s6_addr[12]); + print_ip4_addr(s, 'I', false, &in6.s6_addr[12]); } return; @@ -4722,16 +4776,20 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { + bool reverse = false; unsigned char *buf; + int ret; + + ret = parse_ip4_print_args(event->tep, ptr, &reverse); if (arg->type == TEP_PRINT_FUNC) { process_defined_func(s, data, size, event, arg); - return 0; + return ret; } if (arg->type != TEP_PRINT_FIELD) { trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return 0; + return ret; } if (!arg->field.field) { @@ -4740,7 +4798,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, if (!arg->field.field) { do_warning("%s: field %s not found", __func__, arg->field.name); - return 0; + return ret; } } @@ -4748,11 +4806,12 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, if (arg->field.field->size != 4) { trace_seq_printf(s, "INVALIDIPv4"); - return 0; + return ret; } - print_ip4_addr(s, i, buf); - return 0; + print_ip4_addr(s, i, reverse, buf); + return ret; + } static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, @@ -4812,7 +4871,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, char have_c = 0, have_p = 0; unsigned char *buf; struct sockaddr_storage *sa; + bool reverse = false; int rc = 0; + int ret; /* pISpc */ if (i == 'I') { @@ -4827,6 +4888,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, rc++; } } + ret = parse_ip4_print_args(event->tep, ptr, &reverse); + ptr += ret; + rc += ret; if (arg->type == TEP_PRINT_FUNC) { process_defined_func(s, data, size, event, arg); @@ -4858,7 +4922,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, return rc; } - print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr); + print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr); if (have_p) trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); @@ -4892,25 +4956,20 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr, struct tep_print_arg *arg) { char i = *ptr; /* 'i' or 'I' */ - char ver; - int rc = 0; + int rc = 1; + /* IP version */ ptr++; - rc++; - ver = *ptr; - ptr++; - rc++; - - switch (ver) { + switch (*ptr) { case '4': - rc += print_ipv4_arg(s, ptr, i, data, size, event, arg); + rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg); break; case '6': - rc += print_ipv6_arg(s, ptr, i, data, size, event, arg); + rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg); break; case 'S': - rc += print_ipsa_arg(s, ptr, i, data, size, event, arg); + rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg); break; default: return 0; @@ -4919,6 +4978,133 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr, return rc; } +static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; +static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + +static int print_uuid_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg) +{ + const int *index = uuid_index; + char *format = "%02x"; + int ret = 0; + char *buf; + int i; + + switch (*(ptr + 1)) { + case 'L': + format = "%02X"; + /* fall through */ + case 'l': + index = guid_index; + ret++; + break; + case 'B': + format = "%02X"; + /* fall through */ + case 'b': + ret++; + break; + } + + if (arg->type == TEP_PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return ret; + } + + if (arg->type != TEP_PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return ret; + } + + if (!arg->field.field) { + arg->field.field = + tep_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return ret; + } + } + + if (arg->field.field->size != 16) { + trace_seq_printf(s, "INVALIDUUID"); + return ret; + } + + buf = data + arg->field.field->offset; + + for (i = 0; i < 16; i++) { + trace_seq_printf(s, format, buf[index[i]] & 0xff); + switch (i) { + case 3: + case 5: + case 7: + case 9: + trace_seq_printf(s, "-"); + break; + } + } + + return ret; +} + +static int print_raw_buff_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg, int print_len) +{ + int plen = print_len; + char *delim = " "; + int ret = 0; + char *buf; + int i; + unsigned long offset; + int arr_len; + + switch (*(ptr + 1)) { + case 'C': + delim = ":"; + ret++; + break; + case 'D': + delim = "-"; + ret++; + break; + case 'N': + delim = ""; + ret++; + break; + } + + if (arg->type == TEP_PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return ret; + } + + if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return ret; + } + + offset = tep_read_number(event->tep, + data + arg->dynarray.field->offset, + arg->dynarray.field->size); + arr_len = (unsigned long long)(offset >> 16); + buf = data + (offset & 0xffff); + + if (arr_len < plen) + plen = arr_len; + + if (plen < 1) + return ret; + + trace_seq_printf(s, "%02x", buf[0] & 0xff); + for (i = 1; i < plen; i++) + trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff); + + return ret; +} + static int is_printable_array(char *p, unsigned int len) { unsigned int i; @@ -5007,264 +5193,567 @@ void tep_print_fields(struct trace_seq *s, void *data, } } -static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) +static int print_function(struct trace_seq *s, const char *format, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg) { - struct tep_handle *tep = event->tep; - struct tep_print_fmt *print_fmt = &event->print_fmt; - struct tep_print_arg *arg = print_fmt->args; - struct tep_print_arg *args = NULL; - const char *ptr = print_fmt->format; - unsigned long long val; struct func_map *func; - const char *saveptr; - struct trace_seq p; - char *bprint_fmt = NULL; - char format[32]; - int show_func; - int len_as_arg; - int len_arg = 0; - int len; - int ls; + unsigned long long val; - if (event->flags & TEP_EVENT_FL_FAILED) { - trace_seq_printf(s, "[FAILED TO PARSE]"); - tep_print_fields(s, data, size, event); - return; + val = eval_num_arg(data, size, event, arg); + func = find_func(event->tep, val); + if (func) { + trace_seq_puts(s, func->func); + if (*format == 'F' || *format == 'S') + trace_seq_printf(s, "+0x%llx", val - func->addr); + } else { + if (event->tep->long_size == 4) + trace_seq_printf(s, "0x%lx", (long)val); + else + trace_seq_printf(s, "0x%llx", (long long)val); } - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - bprint_fmt = get_bprint_format(data, size, event); - args = make_bprint_args(bprint_fmt, data, size, event); - arg = args; - ptr = bprint_fmt; + return 0; +} + +static int print_arg_pointer(struct trace_seq *s, const char *format, int plen, + void *data, int size, + struct tep_event *event, struct tep_print_arg *arg) +{ + unsigned long long val; + int ret = 1; + + if (arg->type == TEP_PRINT_BSTRING) { + trace_seq_puts(s, arg->string.string); + return 0; + } + while (*format) { + if (*format == 'p') { + format++; + break; + } + format++; } - for (; *ptr; ptr++) { - ls = 0; - if (*ptr == '\\') { - ptr++; - switch (*ptr) { + switch (*format) { + case 'F': + case 'f': + case 'S': + case 's': + ret += print_function(s, format, data, size, event, arg); + break; + case 'M': + case 'm': + ret += print_mac_arg(s, format, data, size, event, arg); + break; + case 'I': + case 'i': + ret += print_ip_arg(s, format, data, size, event, arg); + break; + case 'U': + ret += print_uuid_arg(s, format, data, size, event, arg); + break; + case 'h': + ret += print_raw_buff_arg(s, format, data, size, event, arg, plen); + break; + default: + ret = 0; + val = eval_num_arg(data, size, event, arg); + trace_seq_printf(s, "%p", (void *)(intptr_t)val); + break; + } + + return ret; + +} + +static int print_arg_number(struct trace_seq *s, const char *format, int plen, + void *data, int size, int ls, + struct tep_event *event, struct tep_print_arg *arg) +{ + unsigned long long val; + + val = eval_num_arg(data, size, event, arg); + + switch (ls) { + case -2: + if (plen >= 0) + trace_seq_printf(s, format, plen, (char)val); + else + trace_seq_printf(s, format, (char)val); + break; + case -1: + if (plen >= 0) + trace_seq_printf(s, format, plen, (short)val); + else + trace_seq_printf(s, format, (short)val); + break; + case 0: + if (plen >= 0) + trace_seq_printf(s, format, plen, (int)val); + else + trace_seq_printf(s, format, (int)val); + break; + case 1: + if (plen >= 0) + trace_seq_printf(s, format, plen, (long)val); + else + trace_seq_printf(s, format, (long)val); + break; + case 2: + if (plen >= 0) + trace_seq_printf(s, format, plen, (long long)val); + else + trace_seq_printf(s, format, (long long)val); + break; + default: + do_warning_event(event, "bad count (%d)", ls); + event->flags |= TEP_EVENT_FL_FAILED; + } + return 0; +} + + +static void print_arg_string(struct trace_seq *s, const char *format, int plen, + void *data, int size, + struct tep_event *event, struct tep_print_arg *arg) +{ + struct trace_seq p; + + /* Use helper trace_seq */ + trace_seq_init(&p); + print_str_arg(&p, data, size, event, + format, plen, arg); + trace_seq_terminate(&p); + trace_seq_puts(s, p.buffer); + trace_seq_destroy(&p); +} + +static int parse_arg_format_pointer(const char *format) +{ + int ret = 0; + int index; + int loop; + + switch (*format) { + case 'F': + case 'S': + case 'f': + case 's': + ret++; + break; + case 'M': + case 'm': + /* [mM]R , [mM]F */ + switch (format[1]) { + case 'R': + case 'F': + ret++; + break; + } + ret++; + break; + case 'I': + case 'i': + index = 2; + loop = 1; + switch (format[1]) { + case 'S': + /*[S][pfs]*/ + while (loop) { + switch (format[index]) { + case 'p': + case 'f': + case 's': + ret++; + index++; + break; + default: + loop = 0; + break; + } + } + /* fall through */ + case '4': + /* [4S][hnbl] */ + switch (format[index]) { + case 'h': case 'n': - trace_seq_putc(s, '\n'); - break; - case 't': - trace_seq_putc(s, '\t'); - break; - case 'r': - trace_seq_putc(s, '\r'); - break; - case '\\': - trace_seq_putc(s, '\\'); + case 'l': + case 'b': + ret++; + index++; break; - default: - trace_seq_putc(s, *ptr); + } + if (format[1] == '4') { + ret++; break; } + /* fall through */ + case '6': + /* [6S]c */ + if (format[index] == 'c') + ret++; + ret++; + break; + } + ret++; + break; + case 'U': + switch (format[1]) { + case 'L': + case 'l': + case 'B': + case 'b': + ret++; + break; + } + ret++; + break; + case 'h': + switch (format[1]) { + case 'C': + case 'D': + case 'N': + ret++; + break; + } + ret++; + break; + default: + break; + } - } else if (*ptr == '%') { - saveptr = ptr; - show_func = 0; - len_as_arg = 0; - cont_process: - ptr++; - switch (*ptr) { - case '%': - trace_seq_putc(s, '%'); - break; - case '#': - /* FIXME: need to handle properly */ - goto cont_process; - case 'h': - ls--; - goto cont_process; - case 'l': - ls++; - goto cont_process; - case 'L': - ls = 2; - goto cont_process; - case '*': - /* The argument is the length. */ - if (!arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - len_arg = eval_num_arg(data, size, event, arg); - len_as_arg = 1; - arg = arg->next; - goto cont_process; - case '.': - case 'z': - case 'Z': - case '0' ... '9': - case '-': - goto cont_process; - case 'p': - if (tep->long_size == 4) - ls = 1; - else - ls = 2; + return ret; +} - if (isalnum(ptr[1])) - ptr++; +static void free_parse_args(struct tep_print_parse *arg) +{ + struct tep_print_parse *del; - if (arg->type == TEP_PRINT_BSTRING) { - trace_seq_puts(s, arg->string.string); - arg = arg->next; - break; - } + while (arg) { + del = arg; + arg = del->next; + free(del->format); + free(del); + } +} - if (*ptr == 'F' || *ptr == 'f' || - *ptr == 'S' || *ptr == 's') { - show_func = *ptr; - } else if (*ptr == 'M' || *ptr == 'm') { - print_mac_arg(s, *ptr, data, size, event, arg); - arg = arg->next; - break; - } else if (*ptr == 'I' || *ptr == 'i') { - int n; +static int parse_arg_add(struct tep_print_parse **parse, char *format, + enum tep_print_parse_type type, + struct tep_print_arg *arg, + struct tep_print_arg *len_as_arg, + int ls) +{ + struct tep_print_parse *parg = NULL; - n = print_ip_arg(s, ptr, data, size, event, arg); - if (n > 0) { - ptr += n - 1; - arg = arg->next; - break; - } - } + parg = calloc(1, sizeof(*parg)); + if (!parg) + goto error; + parg->format = strdup(format); + if (!parg->format) + goto error; + parg->type = type; + parg->arg = arg; + parg->len_as_arg = len_as_arg; + parg->ls = ls; + *parse = parg; + return 0; +error: + if (parg) { + free(parg->format); + free(parg); + } + return -1; +} - /* fall through */ - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - if (!arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } +static int parse_arg_format(struct tep_print_parse **parse, + struct tep_event *event, + const char *format, struct tep_print_arg **arg) +{ + struct tep_print_arg *len_arg = NULL; + char print_format[32]; + const char *start = format; + int ret = 0; + int ls = 0; + int res; + int len; - len = ((unsigned long)ptr + 1) - - (unsigned long)saveptr; + format++; + ret++; + for (; *format; format++) { + switch (*format) { + case '#': + /* FIXME: need to handle properly */ + break; + case 'h': + ls--; + break; + case 'l': + ls++; + break; + case 'L': + ls = 2; + break; + case '.': + case 'z': + case 'Z': + case '0' ... '9': + case '-': + break; + case '*': + /* The argument is the length. */ + if (!*arg) { + do_warning_event(event, "no argument match"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } + if (len_arg) { + do_warning_event(event, "argument already matched"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } + len_arg = *arg; + *arg = (*arg)->next; + break; + case 'p': + if (!*arg) { + do_warning_event(event, "no argument match"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } + res = parse_arg_format_pointer(format + 1); + if (res > 0) { + format += res; + ret += res; + } + len = ((unsigned long)format + 1) - + (unsigned long)start; + /* should never happen */ + if (len > 31) { + do_warning_event(event, "bad format!"); + event->flags |= TEP_EVENT_FL_FAILED; + len = 31; + } + memcpy(print_format, start, len); + print_format[len] = 0; - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } + parse_arg_add(parse, print_format, + PRINT_FMT_ARG_POINTER, *arg, len_arg, ls); + *arg = (*arg)->next; + ret++; + return ret; + case 'd': + case 'u': + case 'i': + case 'x': + case 'X': + case 'o': + if (!*arg) { + do_warning_event(event, "no argument match"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } - memcpy(format, saveptr, len); - format[len] = 0; + len = ((unsigned long)format + 1) - + (unsigned long)start; - val = eval_num_arg(data, size, event, arg); - arg = arg->next; + /* should never happen */ + if (len > 30) { + do_warning_event(event, "bad format!"); + event->flags |= TEP_EVENT_FL_FAILED; + len = 31; + } + memcpy(print_format, start, len); + print_format[len] = 0; - if (show_func) { - func = find_func(tep, val); - if (func) { - trace_seq_puts(s, func->func); - if (show_func == 'F') - trace_seq_printf(s, - "+0x%llx", - val - func->addr); - break; - } - } - if (tep->long_size == 8 && ls == 1 && - sizeof(long) != 8) { - char *p; - - /* make %l into %ll */ - if (ls == 1 && (p = strchr(format, 'l'))) - memmove(p+1, p, strlen(p)+1); - else if (strcmp(format, "%p") == 0) - strcpy(format, "0x%llx"); - ls = 2; - } - switch (ls) { - case -2: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (char)val); - else - trace_seq_printf(s, format, (char)val); - break; - case -1: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (short)val); - else - trace_seq_printf(s, format, (short)val); - break; - case 0: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (int)val); - else - trace_seq_printf(s, format, (int)val); - break; - case 1: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (long)val); - else - trace_seq_printf(s, format, (long)val); - break; - case 2: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, - (long long)val); - else - trace_seq_printf(s, format, (long long)val); - break; - default: - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - break; - case 's': - if (!arg) { - do_warning_event(event, "no matching argument"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } + if (event->tep->long_size == 8 && ls == 1 && + sizeof(long) != 8) { + char *p; + + /* make %l into %ll */ + if (ls == 1 && (p = strchr(print_format, 'l'))) + memmove(p+1, p, strlen(p)+1); + ls = 2; + } + if (ls < -2 || ls > 2) { + do_warning_event(event, "bad count (%d)", ls); + event->flags |= TEP_EVENT_FL_FAILED; + } + parse_arg_add(parse, print_format, + PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls); + *arg = (*arg)->next; + ret++; + return ret; + case 's': + if (!*arg) { + do_warning_event(event, "no matching argument"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } - len = ((unsigned long)ptr + 1) - - (unsigned long)saveptr; + len = ((unsigned long)format + 1) - + (unsigned long)start; - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } + /* should never happen */ + if (len > 31) { + do_warning_event(event, "bad format!"); + event->flags |= TEP_EVENT_FL_FAILED; + len = 31; + } + + memcpy(print_format, start, len); + print_format[len] = 0; + + parse_arg_add(parse, print_format, + PRINT_FMT_ARG_STRING, *arg, len_arg, 0); + *arg = (*arg)->next; + ret++; + return ret; + default: + snprintf(print_format, 32, ">%c<", *format); + parse_arg_add(parse, print_format, + PRINT_FMT_STRING, NULL, NULL, 0); + ret++; + return ret; + } + ret++; + } + +out_failed: + return ret; - memcpy(format, saveptr, len); - format[len] = 0; - if (!len_as_arg) - len_arg = -1; - /* Use helper trace_seq */ - trace_seq_init(&p); - print_str_arg(&p, data, size, event, - format, len_arg, arg); - trace_seq_terminate(&p); - trace_seq_puts(s, p.buffer); - trace_seq_destroy(&p); - arg = arg->next; +} + +static int parse_arg_string(struct tep_print_parse **parse, const char *format) +{ + struct trace_seq s; + int ret = 0; + + trace_seq_init(&s); + for (; *format; format++) { + if (*format == '\\') { + format++; + ret++; + switch (*format) { + case 'n': + trace_seq_putc(&s, '\n'); + break; + case 't': + trace_seq_putc(&s, '\t'); + break; + case 'r': + trace_seq_putc(&s, '\r'); + break; + case '\\': + trace_seq_putc(&s, '\\'); break; default: - trace_seq_printf(s, ">%c<", *ptr); - + trace_seq_putc(&s, *format); + break; } + } else if (*format == '%') { + if (*(format + 1) == '%') { + trace_seq_putc(&s, '%'); + format++; + ret++; + } else + break; } else - trace_seq_putc(s, *ptr); + trace_seq_putc(&s, *format); + + ret++; + } + trace_seq_terminate(&s); + parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0); + trace_seq_destroy(&s); + + return ret; +} + +static struct tep_print_parse * +parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg) +{ + struct tep_print_parse *parse_ret = NULL; + struct tep_print_parse **parse = NULL; + int ret; + int len; + + len = strlen(format); + while (*format) { + if (!parse_ret) + parse = &parse_ret; + if (*format == '%' && *(format + 1) != '%') + ret = parse_arg_format(parse, event, format, &arg); + else + ret = parse_arg_string(parse, format); + if (*parse) + parse = &((*parse)->next); + + len -= ret; + if (len > 0) + format += ret; + else + break; + } + return parse_ret; +} + +static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s, + void *data, int size, struct tep_event *event) +{ + int len_arg; + + while (parse) { + if (parse->len_as_arg) + len_arg = eval_num_arg(data, size, event, parse->len_as_arg); + switch (parse->type) { + case PRINT_FMT_ARG_DIGIT: + print_arg_number(s, parse->format, + parse->len_as_arg ? len_arg : -1, data, + size, parse->ls, event, parse->arg); + break; + case PRINT_FMT_ARG_POINTER: + print_arg_pointer(s, parse->format, + parse->len_as_arg ? len_arg : 1, + data, size, event, parse->arg); + break; + case PRINT_FMT_ARG_STRING: + print_arg_string(s, parse->format, + parse->len_as_arg ? len_arg : -1, + data, size, event, parse->arg); + break; + case PRINT_FMT_STRING: + default: + trace_seq_printf(s, "%s", parse->format); + break; + } + parse = parse->next; } +} + +static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) +{ + struct tep_print_parse *parse = event->print_fmt.print_cache; + struct tep_print_arg *args = NULL; + char *bprint_fmt = NULL; if (event->flags & TEP_EVENT_FL_FAILED) { -out_failed: trace_seq_printf(s, "[FAILED TO PARSE]"); + tep_print_fields(s, data, size, event); + return; } - if (args) { + if (event->flags & TEP_EVENT_FL_ISBPRINT) { + bprint_fmt = get_bprint_format(data, size, event); + args = make_bprint_args(bprint_fmt, data, size, event); + parse = parse_args(event, bprint_fmt, args); + } + + print_event_cache(parse, s, data, size, event); + + if (event->flags & TEP_EVENT_FL_ISBPRINT) { + free_parse_args(parse); free_args(args); free(bprint_fmt); } @@ -6363,9 +6852,13 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp, *list = arg; list = &arg->next; } - return 0; } + if (!(event->flags & TEP_EVENT_FL_ISBPRINT)) + event->print_fmt.print_cache = parse_args(event, + event->print_fmt.format, + event->print_fmt.args); + return 0; event_parse_failed: @@ -7032,7 +7525,7 @@ void tep_free_event(struct tep_event *event) free(event->print_fmt.format); free_args(event->print_fmt.args); - + free_parse_args(event->print_fmt.print_cache); free(event); } @@ -7120,6 +7613,7 @@ void tep_free(struct tep_handle *tep) free(tep->events); free(tep->sort_events); free(tep->func_resolver); + tep_free_plugin_paths(tep); free(tep); } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index b77837f75a0d..c29b693e31ee 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -1,21 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #ifndef _PARSE_EVENTS_H #define _PARSE_EVENTS_H @@ -272,9 +258,12 @@ struct tep_print_arg { }; }; +struct tep_print_parse; + struct tep_print_fmt { char *format; struct tep_print_arg *args; + struct tep_print_parse *print_cache; }; struct tep_event { @@ -379,7 +368,7 @@ enum tep_errno { * errno since SUS requires the errno has distinct positive values. * See 'Issue 6' in the link below. * - * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html */ __TEP_ERRNO__START = -100000, @@ -393,14 +382,29 @@ struct tep_plugin_list; #define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) +enum tep_plugin_load_priority { + TEP_PLUGIN_FIRST, + TEP_PLUGIN_LAST, +}; + +int tep_add_plugin_path(struct tep_handle *tep, char *path, + enum tep_plugin_load_priority prio); struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep); void tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep); +void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, + void (*load_plugin)(struct tep_handle *tep, + const char *path, + const char *name, + void *data), + void *data); char **tep_plugin_list_options(void); void tep_plugin_free_options_list(char **list); int tep_plugin_add_options(const char *name, struct tep_plugin_option *options); +int tep_plugin_add_option(const char *name, const char *val); void tep_plugin_remove_options(struct tep_plugin_option *options); +void tep_plugin_print_options(struct trace_seq *s); void tep_print_plugins(struct trace_seq *s, const char *prefix, const char *suffix, const struct tep_plugin_list *list); diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index e1f7ddd5a6cf..e7c2acb8680f 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -13,6 +13,7 @@ #include <sys/stat.h> #include <unistd.h> #include <dirent.h> +#include <errno.h> #include "event-parse.h" #include "event-parse-local.h" #include "event-utils.h" @@ -38,6 +39,12 @@ struct tep_plugin_list { void *handle; }; +struct tep_plugins_dir { + struct tep_plugins_dir *next; + char *path; + enum tep_plugin_load_priority prio; +}; + static void lower_case(char *str) { if (!str) @@ -247,6 +254,170 @@ void tep_plugin_remove_options(struct tep_plugin_option *options) } } +static int parse_option_name(char **option, char **plugin) +{ + char *p; + + *plugin = NULL; + + if ((p = strstr(*option, ":"))) { + *plugin = *option; + *p = '\0'; + *option = strdup(p + 1); + if (!*option) + return -1; + } + return 0; +} + +static struct tep_plugin_option * +find_registered_option(const char *plugin, const char *option) +{ + struct registered_plugin_options *reg; + struct tep_plugin_option *op; + const char *op_plugin; + + for (reg = registered_options; reg; reg = reg->next) { + for (op = reg->options; op->name; op++) { + if (op->plugin_alias) + op_plugin = op->plugin_alias; + else + op_plugin = op->file; + + if (plugin && strcmp(plugin, op_plugin) != 0) + continue; + if (strcmp(option, op->name) != 0) + continue; + + return op; + } + } + + return NULL; +} + +static int process_option(const char *plugin, const char *option, const char *val) +{ + struct tep_plugin_option *op; + + op = find_registered_option(plugin, option); + if (!op) + return 0; + + return update_option_value(op, val); +} + +/** + * tep_plugin_add_option - add an option/val pair to set plugin options + * @name: The name of the option (format: <plugin>:<option> or just <option>) + * @val: (optional) the value for the option + * + * Modify a plugin option. If @val is given than the value of the option + * is set (note, some options just take a boolean, so @val must be either + * "1" or "0" or "true" or "false"). + */ +int tep_plugin_add_option(const char *name, const char *val) +{ + struct trace_plugin_options *op; + char *option_str; + char *plugin; + + option_str = strdup(name); + if (!option_str) + return -ENOMEM; + + if (parse_option_name(&option_str, &plugin) < 0) + return -ENOMEM; + + /* If the option exists, update the val */ + for (op = trace_plugin_options; op; op = op->next) { + /* Both must be NULL or not NULL */ + if ((!plugin || !op->plugin) && plugin != op->plugin) + continue; + if (plugin && strcmp(plugin, op->plugin) != 0) + continue; + if (strcmp(op->option, option_str) != 0) + continue; + + /* update option */ + free(op->value); + if (val) { + op->value = strdup(val); + if (!op->value) + goto out_free; + } else + op->value = NULL; + + /* plugin and option_str don't get freed at the end */ + free(plugin); + free(option_str); + + plugin = op->plugin; + option_str = op->option; + break; + } + + /* If not found, create */ + if (!op) { + op = malloc(sizeof(*op)); + if (!op) + goto out_free; + memset(op, 0, sizeof(*op)); + op->plugin = plugin; + op->option = option_str; + if (val) { + op->value = strdup(val); + if (!op->value) { + free(op); + goto out_free; + } + } + op->next = trace_plugin_options; + trace_plugin_options = op; + } + + return process_option(plugin, option_str, val); + +out_free: + free(plugin); + free(option_str); + return -ENOMEM; +} + +static void print_op_data(struct trace_seq *s, const char *name, + const char *op) +{ + if (op) + trace_seq_printf(s, "%8s:\t%s\n", name, op); +} + +/** + * tep_plugin_print_options - print out the registered plugin options + * @s: The trace_seq descriptor to write the plugin options into + * + * Writes a list of options into trace_seq @s. + */ +void tep_plugin_print_options(struct trace_seq *s) +{ + struct registered_plugin_options *reg; + struct tep_plugin_option *op; + + for (reg = registered_options; reg; reg = reg->next) { + if (reg != registered_options) + trace_seq_printf(s, "============\n"); + for (op = reg->options; op->name; op++) { + if (op != reg->options) + trace_seq_printf(s, "------------\n"); + print_op_data(s, "file", op->file); + print_op_data(s, "plugin", op->plugin_alias); + print_op_data(s, "option", op->name); + print_op_data(s, "desc", op->description); + print_op_data(s, "value", op->value); + trace_seq_printf(s, "%8s:\t%d\n", "set", op->set); + } + } +} + /** * tep_print_plugins - print out the list of plugins loaded * @s: the trace_seq descripter to write to @@ -273,6 +444,7 @@ load_plugin(struct tep_handle *tep, const char *path, const char *file, void *data) { struct tep_plugin_list **plugin_list = data; + struct tep_plugin_option *options; tep_plugin_load_func func; struct tep_plugin_list *list; const char *alias; @@ -297,6 +469,16 @@ load_plugin(struct tep_handle *tep, const char *path, if (!alias) alias = file; + options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME); + if (options) { + while (options->name) { + ret = update_option(alias, options); + if (ret < 0) + goto out_free; + options++; + } + } + func = dlsym(handle, TEP_PLUGIN_LOADER_NAME); if (!func) { warning("could not find func '%s' in plugin '%s'\n%s\n", @@ -365,28 +547,53 @@ load_plugins_dir(struct tep_handle *tep, const char *suffix, closedir(dir); } -static void -load_plugins(struct tep_handle *tep, const char *suffix, - void (*load_plugin)(struct tep_handle *tep, - const char *path, - const char *name, - void *data), - void *data) +/** + * tep_load_plugins_hook - call a user specified callback to load a plugin + * @tep: handler to traceevent context + * @suffix: filter only plugin files with given suffix + * @load_plugin: user specified callback, called for each plugin file + * @data: custom context, passed to @load_plugin + * + * Searches for traceevent plugin files and calls @load_plugin for each + * The order of plugins search is: + * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST + * - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR + * - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR + * - In user's home: ~/.local/lib/traceevent/plugins/ + * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST + * + */ +void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, + void (*load_plugin)(struct tep_handle *tep, + const char *path, + const char *name, + void *data), + void *data) { + struct tep_plugins_dir *dir = NULL; char *home; char *path; char *envdir; int ret; - if (tep->flags & TEP_DISABLE_PLUGINS) + if (tep && tep->flags & TEP_DISABLE_PLUGINS) return; + if (tep) + dir = tep->plugins_dir; + while (dir) { + if (dir->prio == TEP_PLUGIN_FIRST) + load_plugins_dir(tep, suffix, dir->path, + load_plugin, data); + dir = dir->next; + } + /* * If a system plugin directory was defined, * check that first. */ #ifdef PLUGIN_DIR - if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS)) + if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS)) load_plugins_dir(tep, suffix, PLUGIN_DIR, load_plugin, data); #endif @@ -415,6 +622,15 @@ load_plugins(struct tep_handle *tep, const char *suffix, load_plugins_dir(tep, suffix, path, load_plugin, data); + if (tep) + dir = tep->plugins_dir; + while (dir) { + if (dir->prio == TEP_PLUGIN_LAST) + load_plugins_dir(tep, suffix, dir->path, + load_plugin, data); + dir = dir->next; + } + free(path); } @@ -423,10 +639,59 @@ tep_load_plugins(struct tep_handle *tep) { struct tep_plugin_list *list = NULL; - load_plugins(tep, ".so", load_plugin, &list); + tep_load_plugins_hook(tep, ".so", load_plugin, &list); return list; } +/** + * tep_add_plugin_path - Add a new plugin directory. + * @tep: Trace event handler. + * @path: Path to a directory. All plugin files in that + * directory will be loaded. + *@prio: Load priority of the plugins in that directory. + * + * Returns -1 in case of an error, 0 otherwise. + */ +int tep_add_plugin_path(struct tep_handle *tep, char *path, + enum tep_plugin_load_priority prio) +{ + struct tep_plugins_dir *dir; + + if (!tep || !path) + return -1; + + dir = calloc(1, sizeof(*dir)); + if (!dir) + return -1; + + dir->path = strdup(path); + if (!dir->path) { + free(dir); + return -1; + } + dir->prio = prio; + dir->next = tep->plugins_dir; + tep->plugins_dir = dir; + + return 0; +} + +void tep_free_plugin_paths(struct tep_handle *tep) +{ + struct tep_plugins_dir *dir; + + if (!tep) + return; + + dir = tep->plugins_dir; + while (dir) { + tep->plugins_dir = tep->plugins_dir->next; + free(dir->path); + free(dir); + dir = tep->plugins_dir; + } +} + void tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep) { diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h index 5fa8292e341b..a2b522093cfd 100644 --- a/tools/lib/traceevent/kbuffer.h +++ b/tools/lib/traceevent/kbuffer.h @@ -1,22 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #ifndef _KBUFFER_H #define _KBUFFER_H diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build index 210d26910613..dd4da823c38f 100644 --- a/tools/lib/traceevent/plugins/Build +++ b/tools/lib/traceevent/plugins/Build @@ -5,6 +5,8 @@ plugin_kvm-y += plugin_kvm.o plugin_mac80211-y += plugin_mac80211.o plugin_sched_switch-y += plugin_sched_switch.o plugin_function-y += plugin_function.o +plugin_futex-y += plugin_futex.o plugin_xen-y += plugin_xen.o plugin_scsi-y += plugin_scsi.o plugin_cfg80211-y += plugin_cfg80211.o +plugin_tlb-y += plugin_tlb.o
\ No newline at end of file diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile index 680d883efe05..47e802553250 100644 --- a/tools/lib/traceevent/plugins/Makefile +++ b/tools/lib/traceevent/plugins/Makefile @@ -134,9 +134,11 @@ PLUGINS += plugin_kvm.so PLUGINS += plugin_mac80211.so PLUGINS += plugin_sched_switch.so PLUGINS += plugin_function.so +PLUGINS += plugin_futex.so PLUGINS += plugin_xen.so PLUGINS += plugin_scsi.so PLUGINS += plugin_cfg80211.so +PLUGINS += plugin_tlb.so PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) PLUGINS_IN := $(PLUGINS:.so=-in.o) diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c index 7770fcb78e0f..807b16e1bf0f 100644 --- a/tools/lib/traceevent/plugins/plugin_function.c +++ b/tools/lib/traceevent/plugins/plugin_function.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> @@ -50,12 +35,20 @@ struct tep_plugin_option plugin_options[] = .set = 1, }, { + .name = "offset", + .plugin_alias = "ftrace", + .description = + "Show function names as well as their offsets", + .set = 0, + }, + { .name = NULL, } }; static struct tep_plugin_option *ftrace_parent = &plugin_options[0]; static struct tep_plugin_option *ftrace_indent = &plugin_options[1]; +static struct tep_plugin_option *ftrace_offset = &plugin_options[2]; static void add_child(struct func_stack *stack, const char *child, int pos) { @@ -123,6 +116,18 @@ static int add_and_get_index(const char *parent, const char *child, int cpu) return 0; } +static void show_function(struct trace_seq *s, struct tep_handle *tep, + const char *func, unsigned long long function) +{ + unsigned long long offset; + + trace_seq_printf(s, "%s", func); + if (ftrace_offset->set) { + offset = tep_find_function_address(tep, function); + trace_seq_printf(s, "+0x%x ", (int)(function - offset)); + } +} + static int function_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { @@ -149,14 +154,14 @@ static int function_handler(struct trace_seq *s, struct tep_record *record, trace_seq_printf(s, "%*s", index*3, ""); if (func) - trace_seq_printf(s, "%s", func); + show_function(s, tep, func, function); else trace_seq_printf(s, "0x%llx", function); if (ftrace_parent->set) { trace_seq_printf(s, " <-- "); if (parent) - trace_seq_printf(s, "%s", parent); + show_function(s, tep, parent, pfunction); else trace_seq_printf(s, "0x%llx", pfunction); } @@ -164,11 +169,93 @@ static int function_handler(struct trace_seq *s, struct tep_record *record, return 0; } +static int +trace_stack_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct tep_format_field *field; + unsigned long long addr; + const char *func; + int long_size; + void *data = record->data; + + field = tep_find_any_field(event, "caller"); + if (!field) { + trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller"); + return 0; + } + + trace_seq_puts(s, "<stack trace >\n"); + + long_size = tep_get_long_size(event->tep); + + for (data += field->offset; data < record->data + record->size; + data += long_size) { + addr = tep_read_number(event->tep, data, long_size); + + if ((long_size == 8 && addr == (unsigned long long)-1) || + ((int)addr == -1)) + break; + + func = tep_find_function(event->tep, addr); + if (func) + trace_seq_printf(s, "=> %s (%llx)\n", func, addr); + else + trace_seq_printf(s, "=> %llx\n", addr); + } + + return 0; +} + +static int +trace_raw_data_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct tep_format_field *field; + unsigned long long id; + int long_size; + void *data = record->data; + + if (tep_get_field_val(s, event, "id", record, &id, 1)) + return trace_seq_putc(s, '!'); + + trace_seq_printf(s, "# %llx", id); + + field = tep_find_any_field(event, "buf"); + if (!field) { + trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf"); + return 0; + } + + long_size = tep_get_long_size(event->tep); + + for (data += field->offset; data < record->data + record->size; + data += long_size) { + int size = sizeof(long); + int left = (record->data + record->size) - data; + int i; + + if (size > left) + size = left; + + for (i = 0; i < size; i++) + trace_seq_printf(s, " %02x", *(unsigned char *)(data + i)); + } + + return 0; +} + int TEP_PLUGIN_LOADER(struct tep_handle *tep) { tep_register_event_handler(tep, -1, "ftrace", "function", function_handler, NULL); + tep_register_event_handler(tep, -1, "ftrace", "kernel_stack", + trace_stack_handler, NULL); + + tep_register_event_handler(tep, -1, "ftrace", "raw_data", + trace_raw_data_handler, NULL); + tep_plugin_add_options("ftrace", plugin_options); return 0; diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c new file mode 100644 index 000000000000..eb7c9f8a850a --- /dev/null +++ b/tools/lib/traceevent/plugins/plugin_futex.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2017 National Instruments Corp. + * + * Author: Julia Cartwright <julia@ni.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/futex.h> + +#include "event-parse.h" + +#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0])) + +struct futex_args { + unsigned long long uaddr; + unsigned long long op; + unsigned long long val; + unsigned long long utime; /* or val2 */ + unsigned long long uaddr2; + unsigned long long val3; +}; + +struct futex_op { + const char *name; + const char *fmt_val; + const char *fmt_utime; + const char *fmt_uaddr2; + const char *fmt_val3; +}; + +static const struct futex_op futex_op_tbl[] = { + { "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx", NULL, NULL }, + { "FUTEX_WAKE", " val=%llu", NULL, NULL, NULL }, + { "FUTEX_FD", " val=%llu", NULL, NULL, NULL }, + { "FUTEX_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", NULL }, + { "FUTEX_CMP_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, + { "FUTEX_WAKE_OP", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, + { "FUTEX_LOCK_PI", NULL, " utime=0x%08llx", NULL, NULL }, + { "FUTEX_UNLOCK_PI", NULL, NULL, NULL, NULL }, + { "FUTEX_TRYLOCK_PI", NULL, NULL, NULL, NULL }, + { "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx", NULL, " val3=0x%08llx" }, + { "FUTEX_WAKE_BITSET", " val=%llu", NULL, NULL, " val3=0x%08llx" }, + { "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" }, + { "FUTEX_CMP_REQUEUE_PI", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, +}; + + +static void futex_print(struct trace_seq *s, const struct futex_args *args, + const struct futex_op *fop) +{ + trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr); + + if (fop->fmt_val) + trace_seq_printf(s, fop->fmt_val, args->val); + + if (fop->fmt_utime) + trace_seq_printf(s,fop->fmt_utime, args->utime); + + if (fop->fmt_uaddr2) + trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2); + + if (fop->fmt_val3) + trace_seq_printf(s, fop->fmt_val3, args->val3); +} + +static int futex_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + const struct futex_op *fop; + struct futex_args args; + unsigned long long cmd; + + if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1)) + return 1; + + if (tep_get_field_val(s, event, "op", record, &args.op, 1)) + return 1; + + if (tep_get_field_val(s, event, "val", record, &args.val, 1)) + return 1; + + if (tep_get_field_val(s, event, "utime", record, &args.utime, 1)) + return 1; + + if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1)) + return 1; + + if (tep_get_field_val(s, event, "val3", record, &args.val3, 1)) + return 1; + + cmd = args.op & FUTEX_CMD_MASK; + if (cmd >= ARRAY_SIZE(futex_op_tbl)) + return 1; + + fop = &futex_op_tbl[cmd]; + + trace_seq_printf(s, "op=%s", fop->name); + + if (args.op & FUTEX_PRIVATE_FLAG) + trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG"); + + if (args.op & FUTEX_CLOCK_REALTIME) + trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME"); + + futex_print(s, &args, fop); + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *tep) +{ + tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex", + futex_handler, NULL); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) +{ + tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex", + futex_handler, NULL); +} diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c index bb434e0ed03a..d98466788f14 100644 --- a/tools/lib/traceevent/plugins/plugin_hrtimer.c +++ b/tools/lib/traceevent/plugins/plugin_hrtimer.c @@ -1,22 +1,7 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c index 04fc125f38cb..69111a68d3cf 100644 --- a/tools/lib/traceevent/plugins/plugin_jbd2.c +++ b/tools/lib/traceevent/plugins/plugin_jbd2.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c index edaec5d962c3..4b4f7f9616e3 100644 --- a/tools/lib/traceevent/plugins/plugin_kmem.c +++ b/tools/lib/traceevent/plugins/plugin_kmem.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c index c8e623065a7e..51ceeb9147eb 100644 --- a/tools/lib/traceevent/plugins/plugin_kvm.c +++ b/tools/lib/traceevent/plugins/plugin_kvm.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> @@ -155,7 +140,23 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip, _ER(EXIT_WRITE_DR5, 0x035) \ _ER(EXIT_WRITE_DR6, 0x036) \ _ER(EXIT_WRITE_DR7, 0x037) \ - _ER(EXIT_EXCP_BASE, 0x040) \ + _ER(EXIT_EXCP_DE, 0x040) \ + _ER(EXIT_EXCP_DB, 0x041) \ + _ER(EXIT_EXCP_BP, 0x043) \ + _ER(EXIT_EXCP_OF, 0x044) \ + _ER(EXIT_EXCP_BR, 0x045) \ + _ER(EXIT_EXCP_UD, 0x046) \ + _ER(EXIT_EXCP_NM, 0x047) \ + _ER(EXIT_EXCP_DF, 0x048) \ + _ER(EXIT_EXCP_TS, 0x04a) \ + _ER(EXIT_EXCP_NP, 0x04b) \ + _ER(EXIT_EXCP_SS, 0x04c) \ + _ER(EXIT_EXCP_GP, 0x04d) \ + _ER(EXIT_EXCP_PF, 0x04e) \ + _ER(EXIT_EXCP_MF, 0x050) \ + _ER(EXIT_EXCP_AC, 0x051) \ + _ER(EXIT_EXCP_MC, 0x052) \ + _ER(EXIT_EXCP_XF, 0x053) \ _ER(EXIT_INTR, 0x060) \ _ER(EXIT_NMI, 0x061) \ _ER(EXIT_SMI, 0x062) \ @@ -201,7 +202,10 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip, _ER(EXIT_MONITOR, 0x08a) \ _ER(EXIT_MWAIT, 0x08b) \ _ER(EXIT_MWAIT_COND, 0x08c) \ - _ER(EXIT_NPF, 0x400) \ + _ER(EXIT_XSETBV, 0x08d) \ + _ER(EXIT_NPF, 0x400) \ + _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \ + _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \ _ER(EXIT_ERR, -1) #define _ER(reason, val) { #reason, val }, @@ -241,7 +245,7 @@ static const char *find_exit_reason(unsigned isa, int val) } if (!strings) return "UNKNOWN-ISA"; - for (i = 0; strings[i].val >= 0; i++) + for (i = 0; strings[i].str; i++) if (strings[i].val == val) break; diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c index 884303c26b5c..f48071e3cfb8 100644 --- a/tools/lib/traceevent/plugins/plugin_mac80211.c +++ b/tools/lib/traceevent/plugins/plugin_mac80211.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c index 957389a0ff7a..e12fa103820a 100644 --- a/tools/lib/traceevent/plugins/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugins/plugin_sched_switch.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c new file mode 100644 index 000000000000..43657fb60504 --- /dev/null +++ b/tools/lib/traceevent/plugins/plugin_tlb.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +enum tlb_flush_reason { + TLB_FLUSH_ON_TASK_SWITCH, + TLB_REMOTE_SHOOTDOWN, + TLB_LOCAL_SHOOTDOWN, + TLB_LOCAL_MM_SHOOTDOWN, + NR_TLB_FLUSH_REASONS, +}; + +static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long val; + + trace_seq_printf(s, "pages="); + + tep_print_num_field(s, "%ld", event, "pages", record, 1); + + if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0) + return -1; + + trace_seq_puts(s, " reason="); + + switch (val) { + case TLB_FLUSH_ON_TASK_SWITCH: + trace_seq_puts(s, "flush on task switch"); + break; + case TLB_REMOTE_SHOOTDOWN: + trace_seq_puts(s, "remote shootdown"); + break; + case TLB_LOCAL_SHOOTDOWN: + trace_seq_puts(s, "local shootdown"); + break; + case TLB_LOCAL_MM_SHOOTDOWN: + trace_seq_puts(s, "local mm shootdown"); + break; + } + + trace_seq_printf(s, " (%lld)", val); + + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *tep) +{ + tep_register_event_handler(tep, -1, "tlb", "tlb_flush", + tlb_flush_handler, NULL); + + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) +{ + tep_unregister_event_handler(tep, -1, + "tlb", "tlb_flush", + tlb_flush_handler, NULL); +} diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt index 33ba98d72b16..99d00870b160 100644 --- a/tools/memory-model/Documentation/cheatsheet.txt +++ b/tools/memory-model/Documentation/cheatsheet.txt @@ -3,9 +3,9 @@ C Self R W RMW Self R W DR DW RMW SV -- ---- - - --- ---- - - -- -- --- -- -Store, e.g., WRITE_ONCE() Y Y -Load, e.g., READ_ONCE() Y Y Y Y -Unsuccessful RMW operation Y Y Y Y +Relaxed store Y Y +Relaxed load Y Y Y Y +Relaxed RMW operation Y Y Y Y rcu_dereference() Y Y Y Y Successful *_acquire() R Y Y Y Y Y Y Successful *_release() C Y Y Y W Y @@ -17,14 +17,19 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y smp_mb__after_atomic() CP a a Y Y Y Y Y Y -Key: C: Ordering is cumulative - P: Ordering propagates - R: Read, for example, READ_ONCE(), or read portion of RMW - W: Write, for example, WRITE_ONCE(), or write portion of RMW - Y: Provides ordering - a: Provides ordering given intervening RMW atomic operation - DR: Dependent read (address dependency) - DW: Dependent write (address, data, or control dependency) - RMW: Atomic read-modify-write operation - SELF: Orders self, as opposed to accesses before and/or after - SV: Orders later accesses to the same variable +Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(), + a *_relaxed() RMW operation, an unsuccessful RMW + operation, a non-value-returning RMW operation such + as atomic_inc(), or one of the atomic*_read() and + atomic*_set() family of operations. + C: Ordering is cumulative + P: Ordering propagates + R: Read, for example, READ_ONCE(), or read portion of RMW + W: Write, for example, WRITE_ONCE(), or write portion of RMW + Y: Provides ordering + a: Provides ordering given intervening RMW atomic operation + DR: Dependent read (address dependency) + DW: Dependent write (address, data, or control dependency) + RMW: Atomic read-modify-write operation + SELF: Orders self, as opposed to accesses before and/or after + SV: Orders later accesses to the same variable diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt new file mode 100644 index 000000000000..2f840dcd15cf --- /dev/null +++ b/tools/memory-model/Documentation/litmus-tests.txt @@ -0,0 +1,1074 @@ +Linux-Kernel Memory Model Litmus Tests +====================================== + +This file describes the LKMM litmus-test format by example, describes +some tricks and traps, and finally outlines LKMM's limitations. Earlier +versions of this material appeared in a number of LWN articles, including: + +https://lwn.net/Articles/720550/ + A formal kernel memory-ordering model (part 2) +https://lwn.net/Articles/608550/ + Axiomatic validation of memory barriers and atomic instructions +https://lwn.net/Articles/470681/ + Validating Memory Barriers and Atomic Instructions + +This document presents information in decreasing order of applicability, +so that, where possible, the information that has proven more commonly +useful is shown near the beginning. + +For information on installing LKMM, including the underlying "herd7" +tool, please see tools/memory-model/README. + + +Copy-Pasta +========== + +As with other software, it is often better (if less macho) to adapt an +existing litmus test than it is to create one from scratch. A number +of litmus tests may be found in the kernel source tree: + + tools/memory-model/litmus-tests/ + Documentation/litmus-tests/ + +Several thousand more example litmus tests are available on github +and kernel.org: + + https://github.com/paulmckrcu/litmus + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus + +The -l and -L arguments to "git grep" can be quite helpful in identifying +existing litmus tests that are similar to the one you need. But even if +you start with an existing litmus test, it is still helpful to have a +good understanding of the litmus-test format. + + +Examples and Format +=================== + +This section describes the overall format of litmus tests, starting +with a small example of the message-passing pattern and moving on to +more complex examples that illustrate explicit initialization and LKMM's +minimalistic set of flow-control statements. + + +Message-Passing Example +----------------------- + +This section gives an overview of the format of a litmus test using an +example based on the common message-passing use case. This use case +appears often in the Linux kernel. For example, a flag (modeled by "y" +below) indicates that a buffer (modeled by "x" below) is now completely +filled in and ready for use. It would be very bad if the consumer saw the +flag set, but, due to memory misordering, saw old values in the buffer. + +This example asks whether smp_store_release() and smp_load_acquire() +suffices to avoid this bad outcome: + + 1 C MP+pooncerelease+poacquireonce + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 WRITE_ONCE(*x, 1); + 8 smp_store_release(y, 1); + 9 } +10 +11 P1(int *x, int *y) +12 { +13 int r0; +14 int r1; +15 +16 r0 = smp_load_acquire(y); +17 r1 = READ_ONCE(*x); +18 } +19 +20 exists (1:r0=1 /\ 1:r1=0) + +Line 1 starts with "C", which identifies this file as being in the +LKMM C-language format (which, as we will see, is a small fragment +of the full C language). The remainder of line 1 is the name of +the test, which by convention is the filename with the ".litmus" +suffix stripped. In this case, the actual test may be found in +tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus +in the Linux-kernel source tree. + +Mechanically generated litmus tests will often have an optional +double-quoted comment string on the second line. Such strings are ignored +when running the test. Yes, you can add your own comments to litmus +tests, but this is a bit involved due to the use of multiple parsers. +For now, you can use C-language comments in the C code, and these comments +may be in either the "/* */" or the "//" style. A later section will +cover the full litmus-test commenting story. + +Line 3 is the initialization section. Because the default initialization +to zero suffices for this test, the "{}" syntax is used, which mean the +initialization section is empty. Litmus tests requiring non-default +initialization must have non-empty initialization sections, as in the +example that will be presented later in this document. + +Lines 5-9 show the first process and lines 11-18 the second process. Each +process corresponds to a Linux-kernel task (or kthread, workqueue, thread, +and so on; LKMM discussions often use these terms interchangeably). +The name of the first process is "P0" and that of the second "P1". +You can name your processes anything you like as long as the names consist +of a single "P" followed by a number, and as long as the numbers are +consecutive starting with zero. This can actually be quite helpful, +for example, a .litmus file matching "^P1(" but not matching "^P2(" +must contain a two-process litmus test. + +The argument list for each function are pointers to the global variables +used by that function. Unlike normal C-language function parameters, the +names are significant. The fact that both P0() and P1() have a formal +parameter named "x" means that these two processes are working with the +same global variable, also named "x". So the "int *x, int *y" on P0() +and P1() mean that both processes are working with two shared global +variables, "x" and "y". Global variables are always passed to processes +by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)". + +P0() has no local variables, but P1() has two of them named "r0" and "r1". +These names may be freely chosen, but for historical reasons stemming from +other litmus-test formats, it is conventional to use names consisting of +"r" followed by a number as shown here. A common bug in litmus tests +is forgetting to add a global variable to a process's parameter list. +This will sometimes result in an error message, but can also cause the +intended global to instead be silently treated as an undeclared local +variable. + +Each process's code is similar to Linux-kernel C, as can be seen on lines +7-8 and 13-17. This code may use many of the Linux kernel's atomic +operations, some of its exclusive-lock functions, and some of its RCU +and SRCU functions. An approximate list of the currently supported +functions may be found in the linux-kernel.def file. + +The P0() process does "WRITE_ONCE(*x, 1)" on line 7. Because "x" is a +pointer in P0()'s parameter list, this does an unordered store to global +variable "x". Line 8 does "smp_store_release(y, 1)", and because "y" +is also in P0()'s parameter list, this does a release store to global +variable "y". + +The P1() process declares two local variables on lines 13 and 14. +Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load +from global variable "y" into local variable "r0". Line 17 does a +"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local +variable "r1". Both "x" and "y" are in P1()'s parameter list, so both +reference the same global variables that are used by P0(). + +Line 20 is the "exists" assertion expression to evaluate the final state. +This final state is evaluated after the dust has settled: both processes +have completed and all of their memory references and memory barriers +have propagated to all parts of the system. The references to the local +variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify +which process they are local to. + +Note that the assertion expression is written in the litmus-test +language rather than in C. For example, single "=" is an equality +operator rather than an assignment. The "/\" character combination means +"and". Similarly, "\/" stands for "or". Both of these are ASCII-art +representations of the corresponding mathematical symbols. Finally, +"~" stands for "logical not", which is "!" in C, and not to be confused +with the C-language "~" operator which instead stands for "bitwise not". +Parentheses may be used to override precedence. + +The "exists" assertion on line 20 is satisfied if the consumer sees the +flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1() +loaded a value from "x" that was equal to 1 but loaded a value from "y" +that was still equal to zero. + +This example can be checked by running the following command, which +absolutely must be run from the tools/memory-model directory and from +this directory only: + +herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus + +The output is the result of something similar to a full state-space +search, and is as follows: + + 1 Test MP+pooncerelease+poacquireonce Allowed + 2 States 3 + 3 1:r0=0; 1:r1=0; + 4 1:r0=0; 1:r1=1; + 5 1:r0=1; 1:r1=1; + 6 No + 7 Witnesses + 8 Positive: 0 Negative: 3 + 9 Condition exists (1:r0=1 /\ 1:r1=0) +10 Observation MP+pooncerelease+poacquireonce Never 0 3 +11 Time MP+pooncerelease+poacquireonce 0.00 +12 Hash=579aaa14d8c35a39429b02e698241d09 + +The most pertinent line is line 10, which contains "Never 0 3", which +indicates that the bad result flagged by the "exists" clause never +happens. This line might instead say "Sometimes" to indicate that the +bad result happened in some but not all executions, or it might say +"Always" to indicate that the bad result happened in all executions. +(The herd7 tool doesn't judge, so it is only an LKMM convention that the +"exists" clause indicates a bad result. To see this, invert the "exists" +clause's condition and run the test.) The numbers ("0 3") at the end +of this line indicate the number of end states satisfying the "exists" +clause (0) and the number not not satisfying that clause (3). + +Another important part of this output is shown in lines 2-5, repeated here: + + 2 States 3 + 3 1:r0=0; 1:r1=0; + 4 1:r0=0; 1:r1=1; + 5 1:r0=1; 1:r1=1; + +Line 2 gives the total number of end states, and each of lines 3-5 list +one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that +both of P1()'s loads returned the value "0". As expected, given the +"Never" on line 10, the state flagged by the "exists" clause is not +listed. This full list of states can be helpful when debugging a new +litmus test. + +The rest of the output is not normally needed, either due to irrelevance +or due to being redundant with the lines discussed above. However, the +following paragraph lists them for the benefit of readers possessed of +an insatiable curiosity. Other readers should feel free to skip ahead. + +Line 1 echos the test name, along with the "Test" and "Allowed". Line 6's +"No" says that the "exists" clause was not satisfied by any execution, +and as such it has the same meaning as line 10's "Never". Line 7 is a +lead-in to line 8's "Positive: 0 Negative: 3", which lists the number +of end states satisfying and not satisfying the "exists" clause, just +like the two numbers at the end of line 10. Line 9 repeats the "exists" +clause so that you don't have to look it up in the litmus-test file. +The number at the end of line 11 (which begins with "Time") gives the +time in seconds required to analyze the litmus test. Small tests such +as this one complete in a few milliseconds, so "0.00" is quite common. +Line 12 gives a hash of the contents for the litmus-test file, and is used +by tooling that manages litmus tests and their output. This tooling is +used by people modifying LKMM itself, and among other things lets such +people know which of the several thousand relevant litmus tests were +affected by a given change to LKMM. + + +Initialization +-------------- + +The previous example relied on the default zero initialization for +"x" and "y", but a similar litmus test could instead initialize them +to some other value: + + 1 C MP+pooncerelease+poacquireonce + 2 + 3 { + 4 x=42; + 5 y=42; + 6 } + 7 + 8 P0(int *x, int *y) + 9 { +10 WRITE_ONCE(*x, 1); +11 smp_store_release(y, 1); +12 } +13 +14 P1(int *x, int *y) +15 { +16 int r0; +17 int r1; +18 +19 r0 = smp_load_acquire(y); +20 r1 = READ_ONCE(*x); +21 } +22 +23 exists (1:r0=1 /\ 1:r1=42) + +Lines 3-6 now initialize both "x" and "y" to the value 42. This also +means that the "exists" clause on line 23 must change "1:r1=0" to +"1:r1=42". + +Running the test gives the same overall result as before, but with the +value 42 appearing in place of the value zero: + + 1 Test MP+pooncerelease+poacquireonce Allowed + 2 States 3 + 3 1:r0=1; 1:r1=1; + 4 1:r0=42; 1:r1=1; + 5 1:r0=42; 1:r1=42; + 6 No + 7 Witnesses + 8 Positive: 0 Negative: 3 + 9 Condition exists (1:r0=1 /\ 1:r1=42) +10 Observation MP+pooncerelease+poacquireonce Never 0 3 +11 Time MP+pooncerelease+poacquireonce 0.02 +12 Hash=ab9a9b7940a75a792266be279a980156 + +It is tempting to avoid the open-coded repetitions of the value "42" +by defining another global variable "initval=42" and replacing all +occurrences of "42" with "initval". This will not, repeat *not*, +initialize "x" and "y" to 42, but instead to the address of "initval" +(try it!). See the section below on linked lists to learn more about +why this approach to initialization can be useful. + + +Control Structures +------------------ + +LKMM supports the C-language "if" statement, which allows modeling of +conditional branches. In LKMM, conditional branches can affect ordering, +but only if you are *very* careful (compilers are surprisingly able +to optimize away conditional branches). The following example shows +the "load buffering" (LB) use case that is used in the Linux kernel to +synchronize between ring-buffer producers and consumers. In the example +below, P0() is one side checking to see if an operation may proceed and +P1() is the other side completing its update. + + 1 C LB+fencembonceonce+ctrlonceonce + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r0; + 8 + 9 r0 = READ_ONCE(*x); +10 if (r0) +11 WRITE_ONCE(*y, 1); +12 } +13 +14 P1(int *x, int *y) +15 { +16 int r0; +17 +18 r0 = READ_ONCE(*y); +19 smp_mb(); +20 WRITE_ONCE(*x, 1); +21 } +22 +23 exists (0:r0=1 /\ 1:r0=1) + +P1()'s "if" statement on line 10 works as expected, so that line 11 is +executed only if line 9 loads a non-zero value from "x". Because P1()'s +write of "1" to "x" happens only after P1()'s read from "y", one would +hope that the "exists" clause cannot be satisfied. LKMM agrees: + + 1 Test LB+fencembonceonce+ctrlonceonce Allowed + 2 States 2 + 3 0:r0=0; 1:r0=0; + 4 0:r0=1; 1:r0=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (0:r0=1 /\ 1:r0=1) + 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2 +10 Time LB+fencembonceonce+ctrlonceonce 0.00 +11 Hash=e5260556f6de495fd39b556d1b831c3b + +However, there is no "while" statement due to the fact that full +state-space search has some difficulty with iteration. However, there +are tricks that may be used to handle some special cases, which are +discussed below. In addition, loop-unrolling tricks may be applied, +albeit sparingly. + + +Tricks and Traps +================ + +This section covers extracting debug output from herd7, emulating +spin loops, handling trivial linked lists, adding comments to litmus tests, +emulating call_rcu(), and finally tricks to improve herd7 performance +in order to better handle large litmus tests. + + +Debug Output +------------ + +By default, the herd7 state output includes all variables mentioned +in the "exists" clause. But sometimes debugging efforts are greatly +aided by the values of other variables. Consider this litmus test +(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but +slightly modified), which probes an obscure corner of hardware memory +ordering: + + 1 C SB+rfionceonce-poonceonces + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r1; + 8 int r2; + 9 +10 WRITE_ONCE(*x, 1); +11 r1 = READ_ONCE(*x); +12 r2 = READ_ONCE(*y); +13 } +14 +15 P1(int *x, int *y) +16 { +17 int r3; +18 int r4; +19 +20 WRITE_ONCE(*y, 1); +21 r3 = READ_ONCE(*y); +22 r4 = READ_ONCE(*x); +23 } +24 +25 exists (0:r2=0 /\ 1:r4=0) + +The herd7 output is as follows: + + 1 Test SB+rfionceonce-poonceonces Allowed + 2 States 4 + 3 0:r2=0; 1:r4=0; + 4 0:r2=0; 1:r4=1; + 5 0:r2=1; 1:r4=0; + 6 0:r2=1; 1:r4=1; + 7 Ok + 8 Witnesses + 9 Positive: 1 Negative: 3 +10 Condition exists (0:r2=0 /\ 1:r4=0) +11 Observation SB+rfionceonce-poonceonces Sometimes 1 3 +12 Time SB+rfionceonce-poonceonces 0.01 +13 Hash=c7f30fe0faebb7d565405d55b7318ada + +(This output indicates that CPUs are permitted to "snoop their own +store buffers", which all of Linux's CPU families other than s390 will +happily do. Such snooping results in disagreement among CPUs on the +order of stores from different CPUs, which is rarely an issue.) + +But the herd7 output shows only the two variables mentioned in the +"exists" clause. Someone modifying this test might wish to know the +values of "x", "y", "0:r1", and "0:r3" as well. The "locations" +statement on line 25 shows how to cause herd7 to display additional +variables: + + 1 C SB+rfionceonce-poonceonces + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r1; + 8 int r2; + 9 +10 WRITE_ONCE(*x, 1); +11 r1 = READ_ONCE(*x); +12 r2 = READ_ONCE(*y); +13 } +14 +15 P1(int *x, int *y) +16 { +17 int r3; +18 int r4; +19 +20 WRITE_ONCE(*y, 1); +21 r3 = READ_ONCE(*y); +22 r4 = READ_ONCE(*x); +23 } +24 +25 locations [0:r1; 1:r3; x; y] +26 exists (0:r2=0 /\ 1:r4=0) + +The herd7 output then displays the values of all the variables: + + 1 Test SB+rfionceonce-poonceonces Allowed + 2 States 4 + 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1; + 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1; + 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1; + 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1; + 7 Ok + 8 Witnesses + 9 Positive: 1 Negative: 3 +10 Condition exists (0:r2=0 /\ 1:r4=0) +11 Observation SB+rfionceonce-poonceonces Sometimes 1 3 +12 Time SB+rfionceonce-poonceonces 0.01 +13 Hash=40de8418c4b395388f6501cafd1ed38d + +What if you would like to know the value of a particular global variable +at some particular point in a given process's execution? One approach +is to use a READ_ONCE() to load that global variable into a new local +variable, then add that local variable to the "locations" clause. +But be careful: In some litmus tests, adding a READ_ONCE() will change +the outcome! For one example, please see the C-READ_ONCE.litmus and +C-READ_ONCE-omitted.litmus tests located here: + + https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/ + + +Spin Loops +---------- + +The analysis carried out by herd7 explores full state space, which is +at best of exponential time complexity. Adding processes and increasing +the amount of code in a give process can greatly increase execution time. +Potentially infinite loops, such as those used to wait for locks to +become available, are clearly problematic. + +Fortunately, it is possible to avoid state-space explosion by specially +modeling such loops. For example, the following litmus tests emulates +locking using xchg_acquire(), but instead of enclosing xchg_acquire() +in a spin loop, it instead excludes executions that fail to acquire the +lock using a herd7 "filter" clause. Note that for exclusive locking, you +are better off using the spin_lock() and spin_unlock() that LKMM directly +models, if for no other reason that these are much faster. However, the +techniques illustrated in this section can be used for other purposes, +such as emulating reader-writer locking, which LKMM does not yet model. + + 1 C C-SB+l-o-o-u+l-o-o-u-X + 2 + 3 { + 4 } + 5 + 6 P0(int *sl, int *x0, int *x1) + 7 { + 8 int r2; + 9 int r1; +10 +11 r2 = xchg_acquire(sl, 1); +12 WRITE_ONCE(*x0, 1); +13 r1 = READ_ONCE(*x1); +14 smp_store_release(sl, 0); +15 } +16 +17 P1(int *sl, int *x0, int *x1) +18 { +19 int r2; +20 int r1; +21 +22 r2 = xchg_acquire(sl, 1); +23 WRITE_ONCE(*x1, 1); +24 r1 = READ_ONCE(*x0); +25 smp_store_release(sl, 0); +26 } +27 +28 filter (0:r2=0 /\ 1:r2=0) +29 exists (0:r1=0 /\ 1:r1=0) + +This litmus test may be found here: + +https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus + +This test uses two global variables, "x1" and "x2", and also emulates a +single global spinlock named "sl". This spinlock is held by whichever +process changes the value of "sl" from "0" to "1", and is released when +that process sets "sl" back to "0". P0()'s lock acquisition is emulated +on line 11 using xchg_acquire(), which unconditionally stores the value +"1" to "sl" and stores either "0" or "1" to "r2", depending on whether +the lock acquisition was successful or unsuccessful (due to "sl" already +having the value "1"), respectively. P1() operates in a similar manner. + +Rather unconventionally, execution appears to proceed to the critical +section on lines 12 and 13 in either case. Line 14 then uses an +smp_store_release() to store zero to "sl", thus emulating lock release. + +The case where xchg_acquire() fails to acquire the lock is handled by +the "filter" clause on line 28, which tells herd7 to keep only those +executions in which both "0:r2" and "1:r2" are zero, that is to pay +attention only to those executions in which both locks are actually +acquired. Thus, the bogus executions that would execute the critical +sections are discarded and any effects that they might have had are +ignored. Note well that the "filter" clause keeps those executions +for which its expression is satisfied, that is, for which the expression +evaluates to true. In other words, the "filter" clause says what to +keep, not what to discard. + +The result of running this test is as follows: + + 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed + 2 States 2 + 3 0:r1=0; 1:r1=1; + 4 0:r1=1; 1:r1=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (0:r1=0 /\ 1:r1=0) + 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2 +10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03 + +The "Never" on line 9 indicates that this use of xchg_acquire() and +smp_store_release() really does correctly emulate locking. + +Why doesn't the litmus test take the simpler approach of using a spin loop +to handle failed spinlock acquisitions, like the kernel does? The key +insight behind this litmus test is that spin loops have no effect on the +possible "exists"-clause outcomes of program execution in the absence +of deadlock. In other words, given a high-quality lock-acquisition +primitive in a deadlock-free program running on high-quality hardware, +each lock acquisition will eventually succeed. Because herd7 already +explores the full state space, the length of time required to actually +acquire the lock does not matter. After all, herd7 already models all +possible durations of the xchg_acquire() statements. + +Why not just add the "filter" clause to the "exists" clause, thus +avoiding the "filter" clause entirely? This does work, but is slower. +The reason that the "filter" clause is faster is that (in the common case) +herd7 knows to abandon an execution as soon as the "filter" expression +fails to be satisfied. In contrast, the "exists" clause is evaluated +only at the end of time, thus requiring herd7 to waste time on bogus +executions in which both critical sections proceed concurrently. In +addition, some LKMM users like the separation of concerns provided by +using the both the "filter" and "exists" clauses. + +Readers lacking a pathological interest in odd corner cases should feel +free to skip the remainder of this section. + +But what if the litmus test were to temporarily set "0:r2" to a non-zero +value? Wouldn't that cause herd7 to abandon the execution prematurely +due to an early mismatch of the "filter" clause? + +Why not just try it? Line 4 of the following modified litmus test +introduces a new global variable "x2" that is initialized to "1". Line 23 +of P1() reads that variable into "1:r2" to force an early mismatch with +the "filter" clause. Line 24 does a known-true "if" condition to avoid +and static analysis that herd7 might do. Finally the "exists" clause +on line 32 is updated to a condition that is alway satisfied at the end +of the test. + + 1 C C-SB+l-o-o-u+l-o-o-u-X + 2 + 3 { + 4 x2=1; + 5 } + 6 + 7 P0(int *sl, int *x0, int *x1) + 8 { + 9 int r2; +10 int r1; +11 +12 r2 = xchg_acquire(sl, 1); +13 WRITE_ONCE(*x0, 1); +14 r1 = READ_ONCE(*x1); +15 smp_store_release(sl, 0); +16 } +17 +18 P1(int *sl, int *x0, int *x1, int *x2) +19 { +20 int r2; +21 int r1; +22 +23 r2 = READ_ONCE(*x2); +24 if (r2) +25 r2 = xchg_acquire(sl, 1); +26 WRITE_ONCE(*x1, 1); +27 r1 = READ_ONCE(*x0); +28 smp_store_release(sl, 0); +29 } +30 +31 filter (0:r2=0 /\ 1:r2=0) +32 exists (x1=1) + +If the "filter" clause were to check each variable at each point in the +execution, running this litmus test would display no executions because +all executions would be filtered out at line 23. However, the output +is instead as follows: + + 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed + 2 States 1 + 3 x1=1; + 4 Ok + 5 Witnesses + 6 Positive: 2 Negative: 0 + 7 Condition exists (x1=1) + 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0 + 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04 +10 Hash=080bc508da7f291e122c6de76c0088e3 + +Line 3 shows that there is one execution that did not get filtered out, +so the "filter" clause is evaluated only on the last assignment to +the variables that it checks. In this case, the "filter" clause is a +disjunction, so it might be evaluated twice, once at the final (and only) +assignment to "0:r2" and once at the final assignment to "1:r2". + + +Linked Lists +------------ + +LKMM can handle linked lists, but only linked lists in which each node +contains nothing except a pointer to the next node in the list. This is +of course quite restrictive, but there is nevertheless quite a bit that +can be done within these confines, as can be seen in the litmus test +at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus: + + 1 C MP+onceassign+derefonce + 2 + 3 { + 4 y=z; + 5 z=0; + 6 } + 7 + 8 P0(int *x, int **y) + 9 { +10 WRITE_ONCE(*x, 1); +11 rcu_assign_pointer(*y, x); +12 } +13 +14 P1(int *x, int **y) +15 { +16 int *r0; +17 int r1; +18 +19 rcu_read_lock(); +20 r0 = rcu_dereference(*y); +21 r1 = READ_ONCE(*r0); +22 rcu_read_unlock(); +23 } +24 +25 exists (1:r0=x /\ 1:r1=0) + +Line 4's "y=z" may seem odd, given that "z" has not yet been initialized. +But "y=z" does not set the value of "y" to that of "z", but instead +sets the value of "y" to the *address* of "z". Lines 4 and 5 therefore +create a simple linked list, with "y" pointing to "z" and "z" having a +NULL pointer. A much longer linked list could be created if desired, +and circular singly linked lists can also be created and manipulated. + +The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s +"r0" not to the value of "x", but again to its address. This term of the +"exists" clause therefore tests whether line 20's load from "y" saw the +value stored by line 11, which is in fact what is required in this case. + +P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x" +from "y", replacing "z". + +P1()'s line 20 loads a pointer from "y", and line 21 dereferences that +pointer. The RCU read-side critical section spanning lines 19-22 is just +for show in this example. Note that the address used for line 21's load +depends on (in this case, "is exactly the same as") the value loaded by +line 20. This is an example of what is called an "address dependency". +This particular address dependency extends from the load on line 20 to the +load on line 21. Address dependencies provide a weak form of ordering. + +Running this test results in the following: + + 1 Test MP+onceassign+derefonce Allowed + 2 States 2 + 3 1:r0=x; 1:r1=1; + 4 1:r0=z; 1:r1=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (1:r0=x /\ 1:r1=0) + 9 Observation MP+onceassign+derefonce Never 0 2 +10 Time MP+onceassign+derefonce 0.00 +11 Hash=49ef7a741563570102448a256a0c8568 + +The only possible outcomes feature P1() loading a pointer to "z" +(which contains zero) on the one hand and P1() loading a pointer to "x" +(which contains the value one) on the other. This should be reassuring +because it says that RCU readers cannot see the old preinitialization +values when accessing a newly inserted list node. This undesirable +scenario is flagged by the "exists" clause, and would occur if P1() +loaded a pointer to "x", but obtained the pre-initialization value of +zero after dereferencing that pointer. + + +Comments +-------- + +Different portions of a litmus test are processed by different parsers, +which has the charming effect of requiring different comment syntax in +different portions of the litmus test. The C-syntax portions use +C-language comments (either "/* */" or "//"), while the other portions +use Ocaml comments "(* *)". + +The following litmus test illustrates the comment style corresponding +to each syntactic unit of the test: + + 1 C MP+onceassign+derefonce (* A *) + 2 + 3 (* B *) + 4 + 5 { + 6 y=z; (* C *) + 7 z=0; + 8 } // D + 9 +10 // E +11 +12 P0(int *x, int **y) // F +13 { +14 WRITE_ONCE(*x, 1); // G +15 rcu_assign_pointer(*y, x); +16 } +17 +18 // H +19 +20 P1(int *x, int **y) +21 { +22 int *r0; +23 int r1; +24 +25 rcu_read_lock(); +26 r0 = rcu_dereference(*y); +27 r1 = READ_ONCE(*r0); +28 rcu_read_unlock(); +29 } +30 +31 // I +32 +33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *) + +In short, use C-language comments in the C code and Ocaml comments in +the rest of the litmus test. + +On the other hand, if you prefer C-style comments everywhere, the +C preprocessor is your friend. + + +Asynchronous RCU Grace Periods +------------------------------ + +The following litmus test is derived from the example show in +Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to +emulate call_rcu(): + + 1 C RCU+sync+free + 2 + 3 { + 4 int x = 1; + 5 int *y = &x; + 6 int z = 1; + 7 } + 8 + 9 P0(int *x, int *z, int **y) +10 { +11 int *r0; +12 int r1; +13 +14 rcu_read_lock(); +15 r0 = rcu_dereference(*y); +16 r1 = READ_ONCE(*r0); +17 rcu_read_unlock(); +18 } +19 +20 P1(int *z, int **y, int *c) +21 { +22 rcu_assign_pointer(*y, z); +23 smp_store_release(*c, 1); // Emulate call_rcu(). +24 } +25 +26 P2(int *x, int *z, int **y, int *c) +27 { +28 int r0; +29 +30 r0 = smp_load_acquire(*c); // Note call_rcu() request. +31 synchronize_rcu(); // Wait one grace period. +32 WRITE_ONCE(*x, 0); // Emulate the RCU callback. +33 } +34 +35 filter (2:r0=1) (* Reject too-early starts. *) +36 exists (0:r0=x /\ 0:r1=0) + +Lines 4-6 initialize a linked list headed by "y" that initially contains +"x". In addition, "z" is pre-initialized to prepare for P1(), which +will replace "x" with "z" in this list. + +P0() on lines 9-18 enters an RCU read-side critical section, loads the +list header "y" and dereferences it, leaving the node in "0:r0" and +the node's value in "0:r1". + +P1() on lines 20-24 updates the list header to instead reference "z", +then emulates call_rcu() by doing a release store into "c". + +P2() on lines 27-33 emulates the behind-the-scenes effect of doing a +call_rcu(). Line 30 first does an acquire load from "c", then line 31 +waits for an RCU grace period to elapse, and finally line 32 emulates +the RCU callback, which in turn emulates a call to kfree(). + +Of course, it is possible for P2() to start too soon, so that the +value of "2:r0" is zero rather than the required value of "1". +The "filter" clause on line 35 handles this possibility, rejecting +all executions in which "2:r0" is not equal to the value "1". + + +Performance +----------- + +LKMM's exploration of the full state-space can be extremely helpful, +but it does not come for free. The price is exponential computational +complexity in terms of the number of processes, the average number +of statements in each process, and the total number of stores in the +litmus test. + +So it is best to start small and then work up. Where possible, break +your code down into small pieces each representing a core concurrency +requirement. + +That said, herd7 is quite fast. On an unprepossessing x86 laptop, it +was able to analyze the following 10-process RCU litmus test in about +six seconds. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus + +One way to make herd7 run faster is to use the "-speedcheck true" option. +This option prevents herd7 from generating all possible end states, +instead causing it to focus solely on whether or not the "exists" +clause can be satisfied. With this option, herd7 evaluates the above +litmus test in about 300 milliseconds, for more than an order of magnitude +improvement in performance. + +Larger 16-process litmus tests that would normally consume 15 minutes +of time complete in about 40 seconds with this option. To be fair, +you do get an extra 65,535 states when you leave off the "-speedcheck +true" option. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus + +Nevertheless, litmus-test analysis really is of exponential complexity, +whether with or without "-speedcheck true". Increasing by just three +processes to a 19-process litmus test requires 2 hours and 40 minutes +without, and about 8 minutes with "-speedcheck true". Each of these +results represent roughly an order of magnitude slowdown compared to the +16-process litmus test. Again, to be fair, the multi-hour run explores +no fewer than 524,287 additional states compared to the shorter one. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus + +If you don't like command-line arguments, you can obtain a similar speedup +by adding a "filter" clause with exactly the same expression as your +"exists" clause. + +However, please note that seeing the full set of states can be extremely +helpful when developing and debugging litmus tests. + + +LIMITATIONS +=========== + +Limitations of the Linux-kernel memory model (LKMM) include: + +1. Compiler optimizations are not accurately modeled. Of course, + the use of READ_ONCE() and WRITE_ONCE() limits the compiler's + ability to optimize, but under some circumstances it is possible + for the compiler to undermine the memory model. For more + information, see Documentation/explanation.txt (in particular, + the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING" + sections). + + Note that this limitation in turn limits LKMM's ability to + accurately model address, control, and data dependencies. + For example, if the compiler can deduce the value of some variable + carrying a dependency, then the compiler can break that dependency + by substituting a constant of that value. + +2. Multiple access sizes for a single variable are not supported, + and neither are misaligned or partially overlapping accesses. + +3. Exceptions and interrupts are not modeled. In some cases, + this limitation can be overcome by modeling the interrupt or + exception with an additional process. + +4. I/O such as MMIO or DMA is not supported. + +5. Self-modifying code (such as that found in the kernel's + alternatives mechanism, function tracer, Berkeley Packet Filter + JIT compiler, and module loader) is not supported. + +6. Complete modeling of all variants of atomic read-modify-write + operations, locking primitives, and RCU is not provided. + For example, call_rcu() and rcu_barrier() are not supported. + However, a substantial amount of support is provided for these + operations, as shown in the linux-kernel.def file. + + Here are specific limitations: + + a. When rcu_assign_pointer() is passed NULL, the Linux + kernel provides no ordering, but LKMM models this + case as a store release. + + b. The "unless" RMW operations are not currently modeled: + atomic_long_add_unless(), atomic_inc_unless_negative(), + and atomic_dec_unless_positive(). These can be emulated + in litmus tests, for example, by using atomic_cmpxchg(). + + One exception of this limitation is atomic_add_unless(), + which is provided directly by herd7 (so no corresponding + definition in linux-kernel.def). atomic_add_unless() is + modeled by herd7 therefore it can be used in litmus tests. + + c. The call_rcu() function is not modeled. As was shown above, + it can be emulated in litmus tests by adding another + process that invokes synchronize_rcu() and the body of the + callback function, with (for example) a release-acquire + from the site of the emulated call_rcu() to the beginning + of the additional process. + + d. The rcu_barrier() function is not modeled. It can be + emulated in litmus tests emulating call_rcu() via + (for example) a release-acquire from the end of each + additional call_rcu() process to the site of the + emulated rcu-barrier(). + + e. Although sleepable RCU (SRCU) is now modeled, there + are some subtle differences between its semantics and + those in the Linux kernel. For example, the kernel + might interpret the following sequence as two partially + overlapping SRCU read-side critical sections: + + 1 r1 = srcu_read_lock(&my_srcu); + 2 do_something_1(); + 3 r2 = srcu_read_lock(&my_srcu); + 4 do_something_2(); + 5 srcu_read_unlock(&my_srcu, r1); + 6 do_something_3(); + 7 srcu_read_unlock(&my_srcu, r2); + + In contrast, LKMM will interpret this as a nested pair of + SRCU read-side critical sections, with the outer critical + section spanning lines 1-7 and the inner critical section + spanning lines 3-5. + + This difference would be more of a concern had anyone + identified a reasonable use case for partially overlapping + SRCU read-side critical sections. For more information + on the trickiness of such overlapping, please see: + https://paulmck.livejournal.com/40593.html + + f. Reader-writer locking is not modeled. It can be + emulated in litmus tests using atomic read-modify-write + operations. + +The fragment of the C language supported by these litmus tests is quite +limited and in some ways non-standard: + +1. There is no automatic C-preprocessor pass. You can of course + run it manually, if you choose. + +2. There is no way to create functions other than the Pn() functions + that model the concurrent processes. + +3. The Pn() functions' formal parameters must be pointers to the + global shared variables. Nothing can be passed by value into + these functions. + +4. The only functions that can be invoked are those built directly + into herd7 or that are defined in the linux-kernel.def file. + +5. The "switch", "do", "for", "while", and "goto" C statements are + not supported. The "switch" statement can be emulated by the + "if" statement. The "do", "for", and "while" statements can + often be emulated by manually unrolling the loop, or perhaps by + enlisting the aid of the C preprocessor to minimize the resulting + code duplication. Some uses of "goto" can be emulated by "if", + and some others by unrolling. + +6. Although you can use a wide variety of types in litmus-test + variable declarations, and especially in global-variable + declarations, the "herd7" tool understands only int and + pointer types. There is no support for floating-point types, + enumerations, characters, strings, arrays, or structures. + +7. Parsing of variable declarations is very loose, with almost no + type checking. + +8. Initializers differ from their C-language counterparts. + For example, when an initializer contains the name of a shared + variable, that name denotes a pointer to that variable, not + the current value of that variable. For example, "int x = y" + is interpreted the way "int x = &y" would be in C. + +9. Dynamic memory allocation is not supported, although this can + be worked around in some cases by supplying multiple statically + allocated variables. + +Some of these limitations may be overcome in the future, but others are +more likely to be addressed by incorporating the Linux-kernel memory model +into other tools. + +Finally, please note that LKMM is subject to change as hardware, use cases, +and compilers evolve. diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt index 63c4adfed884..03f58b11c252 100644 --- a/tools/memory-model/Documentation/recipes.txt +++ b/tools/memory-model/Documentation/recipes.txt @@ -1,7 +1,7 @@ This document provides "recipes", that is, litmus tests for commonly occurring situations, as well as a few that illustrate subtly broken but attractive nuisances. Many of these recipes include example code from -v4.13 of the Linux kernel. +v5.7 of the Linux kernel. The first section covers simple special cases, the second section takes off the training wheels to cover more involved examples, @@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access first place (control dependency). Note that the term "data dependency" is sometimes casually used to cover both address and data dependencies. -In lib/prime_numbers.c, the expand_to_next_prime() function invokes +In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes rcu_assign_pointer(), and the next_prime_number() function invokes rcu_dereference(). This combination mediates access to a bit vector that is expanded as additional primes are needed. diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt index ecbbaa5396d4..c5fdfd19df24 100644 --- a/tools/memory-model/Documentation/references.txt +++ b/tools/memory-model/Documentation/references.txt @@ -120,7 +120,7 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and semantics of the weak consistency model specification language - cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531 + cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531 Memory-model comparisons diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt new file mode 100644 index 000000000000..81e1a0ec5342 --- /dev/null +++ b/tools/memory-model/Documentation/simple.txt @@ -0,0 +1,271 @@ +This document provides options for those wishing to keep their +memory-ordering lives simple, as is necessary for those whose domain +is complex. After all, there are bugs other than memory-ordering bugs, +and the time spent gaining memory-ordering knowledge is not available +for gaining domain knowledge. Furthermore Linux-kernel memory model +(LKMM) is quite complex, with subtle differences in code often having +dramatic effects on correctness. + +The options near the beginning of this list are quite simple. The idea +is not that kernel hackers don't already know about them, but rather +that they might need the occasional reminder. + +Please note that this is a generic guide, and that specific subsystems +will often have special requirements or idioms. For example, developers +of MMIO-based device drivers will often need to use mb(), rmb(), and +wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb() +to be more natural than smp_load_acquire() and smp_store_release(). +On the other hand, those coming in from other environments will likely +be more familiar with these last two. + + +Single-threaded code +==================== + +In single-threaded code, there is no reordering, at least assuming +that your toolchain and hardware are working correctly. In addition, +it is generally a mistake to assume your code will only run in a single +threaded context as the kernel can enter the same code path on multiple +CPUs at the same time. One important exception is a function that makes +no external data references. + +In the general case, you will need to take explicit steps to ensure that +your code really is executed within a single thread that does not access +shared variables. A simple way to achieve this is to define a global lock +that you acquire at the beginning of your code and release at the end, +taking care to ensure that all references to your code's shared data are +also carried out under that same lock. Because only one thread can hold +this lock at a given time, your code will be executed single-threaded. +This approach is called "code locking". + +Code locking can severely limit both performance and scalability, so it +should be used with caution, and only on code paths that execute rarely. +After all, a huge amount of effort was required to remove the Linux +kernel's old "Big Kernel Lock", so let's please be very careful about +adding new "little kernel locks". + +One of the advantages of locking is that, in happy contrast with the +year 1981, almost all kernel developers are very familiar with locking. +The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with +the formerly feared deadlock scenarios. + +Please use the standard locking primitives provided by the kernel rather +than rolling your own. For one thing, the standard primitives interact +properly with lockdep. For another thing, these primitives have been +tuned to deal better with high contention. And for one final thing, it is +surprisingly hard to correctly code production-quality lock acquisition +and release functions. After all, even simple non-production-quality +locking functions must carefully prevent both the CPU and the compiler +from moving code in either direction across the locking function. + +Despite the scalability limitations of single-threaded code, RCU +takes this approach for much of its grace-period processing and also +for early-boot operation. The reason RCU is able to scale despite +single-threaded grace-period processing is use of batching, where all +updates that accumulated during one grace period are handled by the +next one. In other words, slowing down grace-period processing makes +it more efficient. Nor is RCU unique: Similar batching optimizations +are used in many I/O operations. + + +Packaged code +============= + +Even if performance and scalability concerns prevent your code from +being completely single-threaded, it is often possible to use library +functions that handle the concurrency nearly or entirely on their own. +This approach delegates any LKMM worries to the library maintainer. + +In the kernel, what is the "library"? Quite a bit. It includes the +contents of the lib/ directory, much of the include/linux/ directory along +with a lot of other heavily used APIs. But heavily used examples include +the list macros (for example, include/linux/{,rcu}list.h), workqueues, +smp_call_function(), and the various hash tables and search trees. + + +Data locking +============ + +With code locking, we use single-threaded code execution to guarantee +serialized access to the data that the code is accessing. However, +we can also achieve this by instead associating the lock with specific +instances of the data structures. This creates a "critical section" +in the code execution that will execute as though it is single threaded. +By placing all the accesses and modifications to a shared data structure +inside a critical section, we ensure that the execution context that +holds the lock has exclusive access to the shared data. + +The poster boy for this approach is the hash table, where placing a lock +in each hash bucket allows operations on different buckets to proceed +concurrently. This works because the buckets do not overlap with each +other, so that an operation on one bucket does not interfere with any +other bucket. + +As the number of buckets increases, data locking scales naturally. +In particular, if the amount of data increases with the number of CPUs, +increasing the number of buckets as the number of CPUs increase results +in a naturally scalable data structure. + + +Per-CPU processing +================== + +Partitioning processing and data over CPUs allows each CPU to take +a single-threaded approach while providing excellent performance and +scalability. Of course, there is no free lunch: The dark side of this +excellence is substantially increased memory footprint. + +In addition, it is sometimes necessary to occasionally update some global +view of this processing and data, in which case something like locking +must be used to protect this global view. This is the approach taken +by the percpu_counter infrastructure. In many cases, there are already +generic/library variants of commonly used per-cpu constructs available. +Please use them rather than rolling your own. + +RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU +data sets. For example, each CPU does private quiescent-state processing +within its instance of the per-CPU rcu_data structure, and then uses data +locking to report quiescent states up the grace-period combining tree. + + +Packaged primitives: Sequence locking +===================================== + +Lockless programming is considered by many to be more difficult than +lock-based programming, but there are a few lockless design patterns that +have been built out into an API. One of these APIs is sequence locking. +Although this APIs can be used in extremely complex ways, there are simple +and effective ways of using it that avoid the need to pay attention to +memory ordering. + +The basic keep-things-simple rule for sequence locking is "do not write +in read-side code". Yes, you can do writes from within sequence-locking +readers, but it won't be so simple. For example, such writes will be +lockless and should be idempotent. + +For more sophisticated use cases, LKMM can guide you, including use +cases involving combining sequence locking with other synchronization +primitives. (LKMM does not yet know about sequence locking, so it is +currently necessary to open-code it in your litmus tests.) + +Additional information may be found in include/linux/seqlock.h. + +Packaged primitives: RCU +======================== + +Another lockless design pattern that has been baked into an API +is RCU. The Linux kernel makes sophisticated use of RCU, but the +keep-things-simple rules for RCU are "do not write in read-side code" +and "do not update anything that is visible to and accessed by readers", +and "protect updates with locking". + +These rules are illustrated by the functions foo_update_a() and +foo_get_a() shown in Documentation/RCU/whatisRCU.rst. Additional +RCU usage patterns maybe found in Documentation/RCU and in the +source code. + + +Packaged primitives: Atomic operations +====================================== + +Back in the day, the Linux kernel had three types of atomic operations: + +1. Initialization and read-out, such as atomic_set() and atomic_read(). + +2. Operations that did not return a value and provided no ordering, + such as atomic_inc() and atomic_dec(). + +3. Operations that returned a value and provided full ordering, such as + atomic_add_return() and atomic_dec_and_test(). Note that some + value-returning operations provide full ordering only conditionally. + For example, cmpxchg() provides ordering only upon success. + +More recent kernels have operations that return a value but do not +provide full ordering. These are flagged with either a _relaxed() +suffix (providing no ordering), or an _acquire() or _release() suffix +(providing limited ordering). + +Additional information may be found in these files: + +Documentation/atomic_t.txt +Documentation/atomic_bitops.txt +Documentation/core-api/atomic_ops.rst +Documentation/core-api/refcount-vs-atomic.rst + +Reading code using these primitives is often also quite helpful. + + +Lockless, fully ordered +======================= + +When using locking, there often comes a time when it is necessary +to access some variable or another without holding the data lock +that serializes access to that variable. + +If you want to keep things simple, use the initialization and read-out +operations from the previous section only when there are no racing +accesses. Otherwise, use only fully ordered operations when accessing +or modifying the variable. This approach guarantees that code prior +to a given access to that variable will be seen by all CPUs has having +happened before any code following any later access to that same variable. + +Please note that per-CPU functions are not atomic operations and +hence they do not provide any ordering guarantees at all. + +If the lockless accesses are frequently executed reads that are used +only for heuristics, or if they are frequently executed writes that +are used only for statistics, please see the next section. + + +Lockless statistics and heuristics +================================== + +Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and +WRITE_ONCE() can safely be used in some cases. These primitives provide +no ordering, but they do prevent the compiler from carrying out a number +of destructive optimizations (for which please see the next section). +One example use for these primitives is statistics, such as per-CPU +counters exemplified by the rt_cache_stat structure's routing-cache +statistics counters. Another example use case is heuristics, such as +the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters +controlling how often RCU scans for idle CPUs. + +But be careful. "Unordered" really does mean "unordered". It is all +too easy to assume ordering, and this assumption must be avoided when +using these primitives. + + +Don't let the compiler trip you up +================================== + +It can be quite tempting to use plain C-language accesses for lockless +loads from and stores to shared variables. Although this is both +possible and quite common in the Linux kernel, it does require a +surprising amount of analysis, care, and knowledge about the compiler. +Yes, some decades ago it was not unfair to consider a C compiler to be +an assembler with added syntax and better portability, but the advent of +sophisticated optimizing compilers mean that those days are long gone. +Today's optimizing compilers can profoundly rewrite your code during the +translation process, and have long been ready, willing, and able to do so. + +Therefore, if you really need to use C-language assignments instead of +READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good +understanding of both the C standard and your compiler. Here are some +introductory references and some tooling to start you on this noble quest: + +Who's afraid of a big bad optimizing compiler? + https://lwn.net/Articles/793253/ +Calibrating your fear of big bad optimizing compilers + https://lwn.net/Articles/799218/ +Concurrency bugs should fear the big bad data-race detector (part 1) + https://lwn.net/Articles/816850/ +Concurrency bugs should fear the big bad data-race detector (part 2) + https://lwn.net/Articles/816854/ + + +More complex use cases +====================== + +If the alternatives above do not do what you need, please look at the +recipes-pairs.txt file to peel off the next layer of the memory-ordering +onion. diff --git a/tools/memory-model/README b/tools/memory-model/README index ecb7385376bf..c8144d4aafa0 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -63,10 +63,32 @@ BASIC USAGE: HERD7 ================== The memory model is used, in conjunction with "herd7", to exhaustively -explore the state space of small litmus tests. +explore the state space of small litmus tests. Documentation describing +the format, features, capabilities and limitations of these litmus +tests is available in tools/memory-model/Documentation/litmus-tests.txt. -For example, to run SB+fencembonceonces.litmus against the memory model: +Example litmus tests may be found in the Linux-kernel source tree: + tools/memory-model/litmus-tests/ + Documentation/litmus-tests/ + +Several thousand more example litmus tests are available here: + + https://github.com/paulmckrcu/litmus + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus + +Documentation describing litmus tests and now to use them may be found +here: + + tools/memory-model/Documentation/litmus-tests.txt + +The remainder of this section uses the SB+fencembonceonces.litmus test +located in the tools/memory-model directory. + +To run SB+fencembonceonces.litmus against the memory model: + + $ cd $LINUX_SOURCE_TREE/tools/memory-model $ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus Here is the corresponding output: @@ -87,7 +109,11 @@ Here is the corresponding output: The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that this litmus test's "exists" clause can not be satisfied. -See "herd7 -help" or "herdtools7/doc/" for more information. +See "herd7 -help" or "herdtools7/doc/" for more information on running the +tool itself, but please be aware that this documentation is intended for +people who work on the memory model itself, that is, people making changes +to the tools/memory-model/linux-kernel.* files. It is not intended for +people focusing on writing, understanding, and running LKMM litmus tests. ===================== @@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus test's "exists" clause was not reached. And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/" -for more information. +for more information. And again, please be aware that this documentation +is intended for people who work on the memory model itself, that is, +people making changes to the tools/memory-model/linux-kernel.* files. +It is not intended for people focusing on writing, understanding, and +running LKMM litmus tests. ==================== @@ -137,12 +167,21 @@ Documentation/cheatsheet.txt Documentation/explanation.txt Describes the memory model in detail. +Documentation/litmus-tests.txt + Describes the format, features, capabilities, and limitations + of the litmus tests that LKMM can evaluate. + Documentation/recipes.txt Lists common memory-ordering patterns. Documentation/references.txt Provides background reading. +Documentation/simple.txt + Starting point for someone new to Linux-kernel concurrency. + And also for those needing a reminder of the simpler approaches + to concurrency! + linux-kernel.bell Categorizes the relevant instructions, including memory references, memory barriers, atomic read-modify-write operations, @@ -187,116 +226,3 @@ README This file. scripts Various scripts, see scripts/README. - - -=========== -LIMITATIONS -=========== - -The Linux-kernel memory model (LKMM) has the following limitations: - -1. Compiler optimizations are not accurately modeled. Of course, - the use of READ_ONCE() and WRITE_ONCE() limits the compiler's - ability to optimize, but under some circumstances it is possible - for the compiler to undermine the memory model. For more - information, see Documentation/explanation.txt (in particular, - the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING" - sections). - - Note that this limitation in turn limits LKMM's ability to - accurately model address, control, and data dependencies. - For example, if the compiler can deduce the value of some variable - carrying a dependency, then the compiler can break that dependency - by substituting a constant of that value. - -2. Multiple access sizes for a single variable are not supported, - and neither are misaligned or partially overlapping accesses. - -3. Exceptions and interrupts are not modeled. In some cases, - this limitation can be overcome by modeling the interrupt or - exception with an additional process. - -4. I/O such as MMIO or DMA is not supported. - -5. Self-modifying code (such as that found in the kernel's - alternatives mechanism, function tracer, Berkeley Packet Filter - JIT compiler, and module loader) is not supported. - -6. Complete modeling of all variants of atomic read-modify-write - operations, locking primitives, and RCU is not provided. - For example, call_rcu() and rcu_barrier() are not supported. - However, a substantial amount of support is provided for these - operations, as shown in the linux-kernel.def file. - - a. When rcu_assign_pointer() is passed NULL, the Linux - kernel provides no ordering, but LKMM models this - case as a store release. - - b. The "unless" RMW operations are not currently modeled: - atomic_long_add_unless(), atomic_inc_unless_negative(), - and atomic_dec_unless_positive(). These can be emulated - in litmus tests, for example, by using atomic_cmpxchg(). - - One exception of this limitation is atomic_add_unless(), - which is provided directly by herd7 (so no corresponding - definition in linux-kernel.def). atomic_add_unless() is - modeled by herd7 therefore it can be used in litmus tests. - - c. The call_rcu() function is not modeled. It can be - emulated in litmus tests by adding another process that - invokes synchronize_rcu() and the body of the callback - function, with (for example) a release-acquire from - the site of the emulated call_rcu() to the beginning - of the additional process. - - d. The rcu_barrier() function is not modeled. It can be - emulated in litmus tests emulating call_rcu() via - (for example) a release-acquire from the end of each - additional call_rcu() process to the site of the - emulated rcu-barrier(). - - e. Although sleepable RCU (SRCU) is now modeled, there - are some subtle differences between its semantics and - those in the Linux kernel. For example, the kernel - might interpret the following sequence as two partially - overlapping SRCU read-side critical sections: - - 1 r1 = srcu_read_lock(&my_srcu); - 2 do_something_1(); - 3 r2 = srcu_read_lock(&my_srcu); - 4 do_something_2(); - 5 srcu_read_unlock(&my_srcu, r1); - 6 do_something_3(); - 7 srcu_read_unlock(&my_srcu, r2); - - In contrast, LKMM will interpret this as a nested pair of - SRCU read-side critical sections, with the outer critical - section spanning lines 1-7 and the inner critical section - spanning lines 3-5. - - This difference would be more of a concern had anyone - identified a reasonable use case for partially overlapping - SRCU read-side critical sections. For more information, - please see: https://paulmck.livejournal.com/40593.html - - f. Reader-writer locking is not modeled. It can be - emulated in litmus tests using atomic read-modify-write - operations. - -The "herd7" tool has some additional limitations of its own, apart from -the memory model: - -1. Non-trivial data structures such as arrays or structures are - not supported. However, pointers are supported, allowing trivial - linked lists to be constructed. - -2. Dynamic memory allocation is not supported, although this can - be worked around in some cases by supplying multiple statically - allocated variables. - -Some of these limitations may be overcome in the future, but others are -more likely to be addressed by incorporating the Linux-kernel memory model -into other tools. - -Finally, please note that LKMM is subject to change as hardware, use cases, -and compilers evolve. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 7770edcda3a0..4ea9a833dde7 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -37,7 +37,7 @@ INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include -WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed +WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) @@ -55,6 +55,10 @@ ifeq ($(SRCARCH),x86) SUBCMD_ORC := y endif +ifeq ($(SUBCMD_ORC),y) + CFLAGS += -DINSN_USE_ORC +endif + export SUBCMD_CHECK SUBCMD_ORC export srctree OUTPUT CFLAGS SRCARCH AWK include $(srctree)/tools/build/Makefile.include diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index 2e2ce089b0e9..4a84c3081b8e 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -11,7 +11,9 @@ #include "objtool.h" #include "cfi.h" +#ifdef INSN_USE_ORC #include <asm/orc_types.h> +#endif enum insn_type { INSN_JUMP_CONDITIONAL, @@ -86,4 +88,6 @@ unsigned long arch_dest_reloc_offset(int addend); const char *arch_nop_insn(int len); +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); + #endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build index 7c5004008e97..9f7869b5c5e0 100644 --- a/tools/objtool/arch/x86/Build +++ b/tools/objtool/arch/x86/Build @@ -1,3 +1,4 @@ +objtool-y += special.o objtool-y += decode.o inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 1967370440b3..cde9c36e40ae 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -15,6 +15,7 @@ #include "../../elf.h" #include "../../arch.h" #include "../../warn.h" +#include <asm/orc_types.h> static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, @@ -583,3 +584,39 @@ const char *arch_nop_insn(int len) return nops[len-1]; } + +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) +{ + struct cfi_reg *cfa = &insn->cfi.cfa; + + switch (sp_reg) { + case ORC_REG_UNDEFINED: + cfa->base = CFI_UNDEFINED; + break; + case ORC_REG_SP: + cfa->base = CFI_SP; + break; + case ORC_REG_BP: + cfa->base = CFI_BP; + break; + case ORC_REG_SP_INDIRECT: + cfa->base = CFI_SP_INDIRECT; + break; + case ORC_REG_R10: + cfa->base = CFI_R10; + break; + case ORC_REG_R13: + cfa->base = CFI_R13; + break; + case ORC_REG_DI: + cfa->base = CFI_DI; + break; + case ORC_REG_DX: + cfa->base = CFI_DX; + break; + default: + return -1; + } + + return 0; +} diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h new file mode 100644 index 000000000000..d818b2bffa02 --- /dev/null +++ b/tools/objtool/arch/x86/include/arch_special.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _X86_ARCH_SPECIAL_H +#define _X86_ARCH_SPECIAL_H + +#define EX_ENTRY_SIZE 12 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 4 + +#define JUMP_ENTRY_SIZE 16 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 4 + +#define ALT_ENTRY_SIZE 13 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#endif /* _X86_ARCH_SPECIAL_H */ diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c new file mode 100644 index 000000000000..fd4af88c0ea5 --- /dev/null +++ b/tools/objtool/arch/x86/special.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <string.h> + +#include "../../special.h" +#include "../../builtin.h" + +#define X86_FEATURE_POPCNT (4 * 32 + 23) +#define X86_FEATURE_SMAP (9 * 32 + 20) + +void arch_handle_alternative(unsigned short feature, struct special_alt *alt) +{ + switch (feature) { + case X86_FEATURE_SMAP: + /* + * If UACCESS validation is enabled; force that alternative; + * otherwise force it the other way. + * + * What we want to avoid is having both the original and the + * alternative code flow at the same time, in that case we can + * find paths that see the STAC but take the NOP instead of + * CLAC and the other way around. + */ + if (uaccess) + alt->skip_orig = true; + else + alt->skip_alt = true; + break; + case X86_FEATURE_POPCNT: + /* + * It has been requested that we don't validate the !POPCNT + * feature path which is a "very very small percentage of + * machines". + */ + alt->skip_orig = true; + break; + default: + break; + } +} + +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, + struct reloc *reloc) +{ + /* + * The x86 alternatives code adjusts the offsets only when it + * encounters a branch instruction at the very beginning of the + * replacement group. + */ + return insn->offset == special_alt->new_off && + (insn->type == INSN_CALL || is_static_jump(insn)); +} + +/* + * There are 3 basic jump table patterns: + * + * 1. jmpq *[rodata addr](,%reg,8) + * + * This is the most common case by far. It jumps to an address in a simple + * jump table which is stored in .rodata. + * + * 2. jmpq *[rodata addr](%rip) + * + * This is caused by a rare GCC quirk, currently only seen in three driver + * functions in the kernel, only with certain obscure non-distro configs. + * + * As part of an optimization, GCC makes a copy of an existing switch jump + * table, modifies it, and then hard-codes the jump (albeit with an indirect + * jump) to use a single entry in the table. The rest of the jump table and + * some of its jump targets remain as dead code. + * + * In such a case we can just crudely ignore all unreachable instruction + * warnings for the entire object file. Ideally we would just ignore them + * for the function, but that would require redesigning the code quite a + * bit. And honestly that's just not worth doing: unreachable instruction + * warnings are of questionable value anyway, and this is such a rare issue. + * + * 3. mov [rodata addr],%reg1 + * ... some instructions ... + * jmpq *(%reg1,%reg2,8) + * + * This is a fairly uncommon pattern which is new for GCC 6. As of this + * writing, there are 11 occurrences of it in the allmodconfig kernel. + * + * As of GCC 7 there are quite a few more of these and the 'in between' code + * is significant. Esp. with KASAN enabled some of the code between the mov + * and jmpq uses .rodata itself, which can confuse things. + * + * TODO: Once we have DWARF CFI and smarter instruction decoding logic, + * ensure the same register is used in the mov and jump instructions. + * + * NOTE: RETPOLINE made it harder still to decode dynamic jumps. + */ +struct reloc *arch_find_switch_table(struct objtool_file *file, + struct instruction *insn) +{ + struct reloc *text_reloc, *rodata_reloc; + struct section *table_sec; + unsigned long table_offset; + + /* look for a relocation which references .rodata */ + text_reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (!text_reloc || text_reloc->sym->type != STT_SECTION || + !text_reloc->sym->sec->rodata) + return NULL; + + table_offset = text_reloc->addend; + table_sec = text_reloc->sym->sec; + + if (text_reloc->type == R_X86_64_PC32) + table_offset += 4; + + /* + * Make sure the .rodata address isn't associated with a + * symbol. GCC jump tables are anonymous data. + * + * Also support C jump tables which are in the same format as + * switch jump tables. For objtool to recognize them, they + * need to be placed in the C_JUMP_TABLE_SECTION section. They + * have symbols associated with them. + */ + if (find_symbol_containing(table_sec, table_offset) && + strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) + return NULL; + + /* + * Each table entry has a rela associated with it. The rela + * should reference text in the same function as the original + * instruction. + */ + rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset); + if (!rodata_reloc) + return NULL; + + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead + * code behind. + */ + if (text_reloc->type == R_X86_64_PC32) + file->ignore_unreachables = true; + + return rodata_reloc; +} diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 7a44174967b5..c6d199bfd0ae 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -41,6 +41,8 @@ const struct option check_options[] = { int cmd_check(int argc, const char **argv) { const char *objname, *s; + struct objtool_file *file; + int ret; argc = parse_options(argc, argv, check_options, check_usage, 0); @@ -53,5 +55,16 @@ int cmd_check(int argc, const char **argv) if (s && !s[9]) vmlinux = true; - return check(objname, false); + file = objtool_open_read(objname); + if (!file) + return 1; + + ret = check(file); + if (ret) + return ret; + + if (file->elf->changed) + return elf_write(file->elf); + + return 0; } diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index b1dfe2007962..7b31121fa60b 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -31,13 +31,38 @@ int cmd_orc(int argc, const char **argv) usage_with_options(orc_usage, check_options); if (!strncmp(argv[0], "gen", 3)) { + struct objtool_file *file; + int ret; + argc = parse_options(argc, argv, check_options, orc_usage, 0); if (argc != 1) usage_with_options(orc_usage, check_options); objname = argv[0]; - return check(objname, true); + file = objtool_open_read(objname); + if (!file) + return 1; + + ret = check(file); + if (ret) + return ret; + + if (list_empty(&file->insn_list)) + return 0; + + ret = create_orc(file); + if (ret) + return ret; + + ret = create_orc_sections(file); + if (ret) + return ret; + + if (!file->elf->changed) + return 0; + + return elf_write(file->elf); } if (!strcmp(argv[0], "dump")) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e034a8f24f46..c6ab44543c92 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -14,20 +14,19 @@ #include "warn.h" #include "arch_elf.h" +#include <linux/objtool.h> #include <linux/hashtable.h> #include <linux/kernel.h> +#include <linux/static_call_types.h> #define FAKE_JUMP_OFFSET -1 -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" - struct alternative { struct list_head list; struct instruction *insn; bool skip_orig; }; -const char *objname; struct cfi_init_state initial_func_cfi; struct instruction *find_insn(struct objtool_file *file, @@ -110,12 +109,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) -static bool is_static_jump(struct instruction *insn) -{ - return insn->type == INSN_JUMP_CONDITIONAL || - insn->type == INSN_JUMP_UNCONDITIONAL; -} - static bool is_sibling_call(struct instruction *insn) { /* An indirect jump is either a sibling call or a jump to a table. */ @@ -433,6 +426,103 @@ reachable: return 0; } +static int create_static_call_sections(struct objtool_file *file) +{ + struct section *sec, *reloc_sec; + struct reloc *reloc; + struct static_call_site *site; + struct instruction *insn; + struct symbol *key_sym; + char *key_name, *tmp; + int idx; + + sec = find_section_by_name(file->elf, ".static_call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->static_call_list); + WARN("file already has .static_call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->static_call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) + idx++; + + sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, + sizeof(struct static_call_site), idx); + if (!sec) + return -1; + + reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!reloc_sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) { + + site = (struct static_call_site *)sec->data->d_buf + idx; + memset(site, 0, sizeof(struct static_call_site)); + + /* populate reloc for 'addr' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = insn->sec->sym; + reloc->addend = insn->offset; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site); + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + /* find key symbol */ + key_name = strdup(insn->call_dest->name); + if (!key_name) { + perror("strdup"); + return -1; + } + if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, + STATIC_CALL_TRAMP_PREFIX_LEN)) { + WARN("static_call: trampoline name malformed: %s", key_name); + return -1; + } + tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; + memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN); + + key_sym = find_symbol_by_name(file->elf, tmp); + if (!key_sym) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + free(key_name); + + /* populate reloc for 'key' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = key_sym; + reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site) + 4; + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + idx++; + } + + if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return -1; + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -493,6 +583,8 @@ static const char *uaccess_safe_builtin[] = { "__asan_store4_noabort", "__asan_store8_noabort", "__asan_store16_noabort", + "__kasan_check_read", + "__kasan_check_write", /* KASAN in-line */ "__asan_report_load_n_noabort", "__asan_report_load1_noabort", @@ -528,6 +620,61 @@ static const char *uaccess_safe_builtin[] = { "__tsan_write4", "__tsan_write8", "__tsan_write16", + "__tsan_read_write1", + "__tsan_read_write2", + "__tsan_read_write4", + "__tsan_read_write8", + "__tsan_read_write16", + "__tsan_atomic8_load", + "__tsan_atomic16_load", + "__tsan_atomic32_load", + "__tsan_atomic64_load", + "__tsan_atomic8_store", + "__tsan_atomic16_store", + "__tsan_atomic32_store", + "__tsan_atomic64_store", + "__tsan_atomic8_exchange", + "__tsan_atomic16_exchange", + "__tsan_atomic32_exchange", + "__tsan_atomic64_exchange", + "__tsan_atomic8_fetch_add", + "__tsan_atomic16_fetch_add", + "__tsan_atomic32_fetch_add", + "__tsan_atomic64_fetch_add", + "__tsan_atomic8_fetch_sub", + "__tsan_atomic16_fetch_sub", + "__tsan_atomic32_fetch_sub", + "__tsan_atomic64_fetch_sub", + "__tsan_atomic8_fetch_and", + "__tsan_atomic16_fetch_and", + "__tsan_atomic32_fetch_and", + "__tsan_atomic64_fetch_and", + "__tsan_atomic8_fetch_or", + "__tsan_atomic16_fetch_or", + "__tsan_atomic32_fetch_or", + "__tsan_atomic64_fetch_or", + "__tsan_atomic8_fetch_xor", + "__tsan_atomic16_fetch_xor", + "__tsan_atomic32_fetch_xor", + "__tsan_atomic64_fetch_xor", + "__tsan_atomic8_fetch_nand", + "__tsan_atomic16_fetch_nand", + "__tsan_atomic32_fetch_nand", + "__tsan_atomic64_fetch_nand", + "__tsan_atomic8_compare_exchange_strong", + "__tsan_atomic16_compare_exchange_strong", + "__tsan_atomic32_compare_exchange_strong", + "__tsan_atomic64_compare_exchange_strong", + "__tsan_atomic8_compare_exchange_weak", + "__tsan_atomic16_compare_exchange_weak", + "__tsan_atomic32_compare_exchange_weak", + "__tsan_atomic64_compare_exchange_weak", + "__tsan_atomic8_compare_exchange_val", + "__tsan_atomic16_compare_exchange_val", + "__tsan_atomic32_compare_exchange_val", + "__tsan_atomic64_compare_exchange_val", + "__tsan_atomic_thread_fence", + "__tsan_atomic_signal_fence", /* KCOV */ "write_comp_data", "check_kcov_mode", @@ -548,8 +695,9 @@ static const char *uaccess_safe_builtin[] = { "__ubsan_handle_shift_out_of_bounds", /* misc */ "csum_partial_copy_generic", - "__memcpy_mcsafe", - "mcsafe_handle_tail", + "copy_mc_fragile", + "copy_mc_fragile_handle_tail", + "copy_mc_enhanced_fast_string", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ NULL }; @@ -619,7 +767,7 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET) + if (insn->offset == FAKE_JUMP_OFFSET) continue; reloc = find_reloc_by_dest_range(file->elf, insn->sec, @@ -649,6 +797,10 @@ static int add_jump_destinations(struct objtool_file *file) } else { /* external sibling call */ insn->call_dest = reloc->sym; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } continue; } @@ -700,6 +852,10 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call */ insn->call_dest = insn->jump_dest->func; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } } } } @@ -717,6 +873,17 @@ static void remove_insn_ops(struct instruction *insn) } } +static struct symbol *find_call_destination(struct section *sec, unsigned long offset) +{ + struct symbol *call_dest; + + call_dest = find_func_by_offset(sec, offset); + if (!call_dest) + call_dest = find_symbol_by_offset(sec, offset); + + return call_dest; +} + /* * Find the destination instructions for all calls. */ @@ -734,9 +901,7 @@ static int add_call_destinations(struct objtool_file *file) insn->offset, insn->len); if (!reloc) { dest_off = arch_jump_destination(insn); - insn->call_dest = find_func_by_offset(insn->sec, dest_off); - if (!insn->call_dest) - insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); + insn->call_dest = find_call_destination(insn->sec, dest_off); if (insn->ignore) continue; @@ -754,8 +919,8 @@ static int add_call_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_off = arch_dest_reloc_offset(reloc->addend); - insn->call_dest = find_func_by_offset(reloc->sym->sec, - dest_off); + insn->call_dest = find_call_destination(reloc->sym->sec, + dest_off); if (!insn->call_dest) { WARN_FUNC("can't find call dest symbol at %s+0x%lx", insn->sec, insn->offset, @@ -867,6 +1032,8 @@ static int handle_group_alt(struct objtool_file *file, alt_group = alt_group_next_index++; insn = *new_insn; sec_for_each_insn_from(file, insn) { + struct reloc *alt_reloc; + if (insn->offset >= special_alt->new_off + special_alt->new_len) break; @@ -883,14 +1050,11 @@ static int handle_group_alt(struct objtool_file *file, * .altinstr_replacement section, unless the arch's * alternatives code can adjust the relative offsets * accordingly. - * - * The x86 alternatives code adjusts the offsets only when it - * encounters a branch instruction at the very beginning of the - * replacement group. */ - if ((insn->offset != special_alt->new_off || - (insn->type != INSN_CALL && !is_static_jump(insn))) && - find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) { + alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (alt_reloc && + !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { WARN_FUNC("unsupported relocation in alternatives section", insn->sec, insn->offset); @@ -1092,56 +1256,15 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, } /* - * find_jump_table() - Given a dynamic jump, find the switch jump table in - * .rodata associated with it. - * - * There are 3 basic patterns: - * - * 1. jmpq *[rodata addr](,%reg,8) - * - * This is the most common case by far. It jumps to an address in a simple - * jump table which is stored in .rodata. - * - * 2. jmpq *[rodata addr](%rip) - * - * This is caused by a rare GCC quirk, currently only seen in three driver - * functions in the kernel, only with certain obscure non-distro configs. - * - * As part of an optimization, GCC makes a copy of an existing switch jump - * table, modifies it, and then hard-codes the jump (albeit with an indirect - * jump) to use a single entry in the table. The rest of the jump table and - * some of its jump targets remain as dead code. - * - * In such a case we can just crudely ignore all unreachable instruction - * warnings for the entire object file. Ideally we would just ignore them - * for the function, but that would require redesigning the code quite a - * bit. And honestly that's just not worth doing: unreachable instruction - * warnings are of questionable value anyway, and this is such a rare issue. - * - * 3. mov [rodata addr],%reg1 - * ... some instructions ... - * jmpq *(%reg1,%reg2,8) - * - * This is a fairly uncommon pattern which is new for GCC 6. As of this - * writing, there are 11 occurrences of it in the allmodconfig kernel. - * - * As of GCC 7 there are quite a few more of these and the 'in between' code - * is significant. Esp. with KASAN enabled some of the code between the mov - * and jmpq uses .rodata itself, which can confuse things. - * - * TODO: Once we have DWARF CFI and smarter instruction decoding logic, - * ensure the same register is used in the mov and jump instructions. - * - * NOTE: RETPOLINE made it harder still to decode dynamic jumps. + * find_jump_table() - Given a dynamic jump, find the switch jump table + * associated with it. */ static struct reloc *find_jump_table(struct objtool_file *file, struct symbol *func, struct instruction *insn) { - struct reloc *text_reloc, *table_reloc; + struct reloc *table_reloc; struct instruction *dest_insn, *orig_insn = insn; - struct section *table_sec; - unsigned long table_offset; /* * Backward search using the @first_jump_src links, these help avoid @@ -1162,52 +1285,13 @@ static struct reloc *find_jump_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; - /* look for a relocation which references .rodata */ - text_reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); - if (!text_reloc || text_reloc->sym->type != STT_SECTION || - !text_reloc->sym->sec->rodata) - continue; - - table_offset = text_reloc->addend; - table_sec = text_reloc->sym->sec; - - if (text_reloc->type == R_X86_64_PC32) - table_offset += 4; - - /* - * Make sure the .rodata address isn't associated with a - * symbol. GCC jump tables are anonymous data. - * - * Also support C jump tables which are in the same format as - * switch jump tables. For objtool to recognize them, they - * need to be placed in the C_JUMP_TABLE_SECTION section. They - * have symbols associated with them. - */ - if (find_symbol_containing(table_sec, table_offset) && - strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) - continue; - - /* - * Each table entry has a reloc associated with it. The reloc - * should reference text in the same function as the original - * instruction. - */ - table_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset); + table_reloc = arch_find_switch_table(file, insn); if (!table_reloc) continue; dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend); if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) continue; - /* - * Use of RIP-relative switch jumps is quite rare, and - * indicates a rare GCC quirk/bug which can leave dead code - * behind. - */ - if (text_reloc->type == R_X86_64_PC32) - file->ignore_unreachables = true; - return table_reloc; } @@ -1350,32 +1434,7 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; - switch (hint->sp_reg) { - case ORC_REG_UNDEFINED: - cfa->base = CFI_UNDEFINED; - break; - case ORC_REG_SP: - cfa->base = CFI_SP; - break; - case ORC_REG_BP: - cfa->base = CFI_BP; - break; - case ORC_REG_SP_INDIRECT: - cfa->base = CFI_SP_INDIRECT; - break; - case ORC_REG_R10: - cfa->base = CFI_R10; - break; - case ORC_REG_R13: - cfa->base = CFI_R13; - break; - case ORC_REG_DI: - cfa->base = CFI_DI; - break; - case ORC_REG_DX: - cfa->base = CFI_DX; - break; - default: + if (arch_decode_hint_reg(insn, hint->sp_reg)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; @@ -1522,6 +1581,23 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +static int read_static_call_tramps(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->bind == STB_GLOBAL && + !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; + } + } + + return 0; +} + static void mark_rodata(struct objtool_file *file) { struct section *sec; @@ -1569,6 +1645,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; @@ -1768,7 +1848,8 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, return 0; } - if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET) + if (cfi->type == UNWIND_HINT_TYPE_REGS || + cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL) return update_cfi_state_regs(insn, cfi, op); switch (op->dest.type) { @@ -2016,7 +2097,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* drap: push %rbp */ cfi->stack_size = 0; - } else if (regs[op->src.reg].base == CFI_UNDEFINED) { + } else { /* drap: push %reg */ save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size); @@ -2045,9 +2126,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* save drap offset so we know when to restore it */ cfi->drap_offset = op->dest.offset; - } - - else if (regs[op->src.reg].base == CFI_UNDEFINED) { + } else { /* drap: mov reg, disp(%rbp) */ save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset); @@ -2432,6 +2511,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; + if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } + break; case INSN_JUMP_CONDITIONAL: @@ -2612,9 +2696,10 @@ static bool is_ubsan_insn(struct instruction *insn) "__ubsan_handle_builtin_unreachable")); } -static bool ignore_unreachable_insn(struct instruction *insn) +static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn) { int i; + struct instruction *prev_insn; if (insn->ignore || insn->type == INSN_NOP) return true; @@ -2631,6 +2716,9 @@ static bool ignore_unreachable_insn(struct instruction *insn) !strcmp(insn->sec->name, ".altinstr_aux")) return true; + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET) + return true; + if (!insn->func) return false; @@ -2639,8 +2727,11 @@ static bool ignore_unreachable_insn(struct instruction *insn) * __builtin_unreachable(). The BUG() macro has an unreachable() after * the UD2, which causes GCC's undefined trap logic to emit another UD2 * (or occasionally a JMP to UD2). + * + * It may also insert a UD2 after calling a __noreturn function. */ - if (list_prev_entry(insn, list)->dead_end && + prev_insn = list_prev_entry(insn, list); + if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) && (insn->type == INSN_BUG || (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && insn->jump_dest->type == INSN_BUG))) @@ -2767,7 +2858,7 @@ static int validate_reachable_instructions(struct objtool_file *file) return 0; for_each_insn(file, insn) { - if (insn->visited || ignore_unreachable_insn(insn)) + if (insn->visited || ignore_unreachable_insn(file, insn)) continue; WARN_FUNC("unreachable instruction", insn->sec, insn->offset); @@ -2777,36 +2868,22 @@ static int validate_reachable_instructions(struct objtool_file *file) return 0; } -static struct objtool_file file; - -int check(const char *_objname, bool orc) +int check(struct objtool_file *file) { int ret, warnings = 0; - objname = _objname; - - file.elf = elf_open_read(objname, O_RDWR); - if (!file.elf) - return 1; - - INIT_LIST_HEAD(&file.insn_list); - hash_init(file.insn_hash); - file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); - file.ignore_unreachables = no_unreachable; - file.hints = false; - arch_initial_func_cfi_state(&initial_func_cfi); - ret = decode_sections(&file); + ret = decode_sections(file); if (ret < 0) goto out; warnings += ret; - if (list_empty(&file.insn_list)) + if (list_empty(&file->insn_list)) goto out; if (vmlinux && !validate_dup) { - ret = validate_vmlinux_functions(&file); + ret = validate_vmlinux_functions(file); if (ret < 0) goto out; @@ -2815,44 +2892,33 @@ int check(const char *_objname, bool orc) } if (retpoline) { - ret = validate_retpoline(&file); + ret = validate_retpoline(file); if (ret < 0) return ret; warnings += ret; } - ret = validate_functions(&file); + ret = validate_functions(file); if (ret < 0) goto out; warnings += ret; - ret = validate_unwind_hints(&file, NULL); + ret = validate_unwind_hints(file, NULL); if (ret < 0) goto out; warnings += ret; if (!warnings) { - ret = validate_reachable_instructions(&file); + ret = validate_reachable_instructions(file); if (ret < 0) goto out; warnings += ret; } - if (orc) { - ret = create_orc(&file); - if (ret < 0) - goto out; - - ret = create_orc_sections(&file); - if (ret < 0) - goto out; - } - - if (file.elf->changed) { - ret = elf_write(file.elf); - if (ret < 0) - goto out; - } + ret = create_static_call_sections(file); + if (ret < 0) + goto out; + warnings += ret; out: if (ret < 0) { diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 061aa96e15d3..5ec00a4b891b 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -22,6 +22,7 @@ struct insn_state { struct instruction { struct list_head list; struct hlist_node hash; + struct list_head static_call_node; struct section *sec; unsigned long offset; unsigned int len; @@ -42,9 +43,17 @@ struct instruction { struct symbol *func; struct list_head stack_ops; struct cfi_state cfi; +#ifdef INSN_USE_ORC struct orc_entry orc; +#endif }; +static inline bool is_static_jump(struct instruction *insn) +{ + return insn->type == INSN_JUMP_CONDITIONAL || + insn->type == INSN_JUMP_UNCONDITIONAL; +} + struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); @@ -57,5 +66,4 @@ struct instruction *find_insn(struct objtool_file *file, insn->sec == sec; \ insn = list_next_entry(insn, list)) - #endif /* _CHECK_H */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3ddbd66f1a37..4e1d7460574b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -652,7 +652,7 @@ err: } struct section *elf_create_section(struct elf *elf, const char *name, - size_t entsize, int nr) + unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; @@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_entsize = entsize; sec->sh.sh_type = SHT_PROGBITS; sec->sh.sh_addralign = 1; - sec->sh.sh_flags = SHF_ALLOC; + sec->sh.sh_flags = SHF_ALLOC | sh_flags; /* Add section name to .shstrtab (or .strtab for Clang) */ @@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect strcpy(relocname, ".rel"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0); free(relocname); if (!sec) return NULL; @@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec strcpy(relocname, ".rela"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); free(relocname); if (!sec) return NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 6cc80a075166..807f8c670097 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -56,6 +56,7 @@ struct symbol { unsigned int len; struct symbol *pfunc, *cfunc, *alias; bool uaccess_safe; + bool static_call_tramp; }; struct reloc { @@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc) } struct elf *elf_open_read(const char *name, int flags); -struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); +struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); void elf_add_reloc(struct elf *elf, struct reloc *reloc); int elf_write_insn(struct elf *elf, struct section *sec, diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 58fdda510653..9df0cd86d310 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -22,6 +22,8 @@ #include <linux/kernel.h> #include "builtin.h" +#include "objtool.h" +#include "warn.h" struct cmd_struct { const char *name; @@ -39,6 +41,34 @@ static struct cmd_struct objtool_cmds[] = { bool help; +const char *objname; +static struct objtool_file file; + +struct objtool_file *objtool_open_read(const char *_objname) +{ + if (objname) { + if (strcmp(objname, _objname)) { + WARN("won't handle more than one file at a time"); + return NULL; + } + return &file; + } + objname = _objname; + + file.elf = elf_open_read(objname, O_RDWR); + if (!file.elf) + return NULL; + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.static_call_list); + file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); + file.ignore_unreachables = no_unreachable; + file.hints = false; + + return &file; +} + static void cmd_usage(void) { unsigned int i, longest = 0; diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 528028a66816..4125d4578b23 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -12,14 +12,19 @@ #include "elf.h" +#define __weak __attribute__((weak)) + struct objtool_file { struct elf *elf; struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); + struct list_head static_call_list; bool ignore_unreachables, c_file, hints, rodata; }; -int check(const char *objname, bool orc); +struct objtool_file *objtool_open_read(const char *_objname); + +int check(struct objtool_file *file); int orc_dump(const char *objname); int create_orc(struct objtool_file *file); int create_orc_sections(struct objtool_file *file); diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index fca46e006fc2..5e6a95368d35 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -4,6 +4,7 @@ */ #include <unistd.h> +#include <linux/objtool.h> #include <asm/orc_types.h> #include "objtool.h" #include "warn.h" @@ -37,12 +38,12 @@ static const char *reg_name(unsigned int reg) static const char *orc_type_name(unsigned int type) { switch (type) { - case ORC_TYPE_CALL: + case UNWIND_HINT_TYPE_CALL: return "call"; - case ORC_TYPE_REGS: + case UNWIND_HINT_TYPE_REGS: return "regs"; - case ORC_TYPE_REGS_IRET: - return "iret"; + case UNWIND_HINT_TYPE_REGS_PARTIAL: + return "regs (partial)"; default: return "?"; } diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 968f55e6dd94..235663b96adc 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -6,6 +6,9 @@ #include <stdlib.h> #include <string.h> +#include <linux/objtool.h> +#include <asm/orc_types.h> + #include "check.h" #include "warn.h" @@ -18,6 +21,9 @@ int create_orc(struct objtool_file *file) struct cfi_reg *cfa = &insn->cfi.cfa; struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; + if (!insn->sec->text) + continue; + orc->end = insn->cfi.end; if (cfa->base == CFI_UNDEFINED) { @@ -143,7 +149,7 @@ int create_orc_sections(struct objtool_file *file) struct orc_entry empty = { .sp_reg = ORC_REG_UNDEFINED, .bp_reg = ORC_REG_UNDEFINED, - .type = ORC_TYPE_CALL, + .type = UNWIND_HINT_TYPE_CALL, }; sec = find_section_by_name(file->elf, ".orc_unwind"); @@ -177,7 +183,7 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); if (!sec) return -1; @@ -186,7 +192,7 @@ int create_orc_sections(struct objtool_file *file) return -1; /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", + u_sec = elf_create_section(file->elf, ".orc_unwind", 0, sizeof(struct orc_entry), idx); /* populate sections */ diff --git a/tools/objtool/special.c b/tools/objtool/special.c index e893f1e48e44..1a2420febd08 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -14,24 +14,7 @@ #include "builtin.h" #include "special.h" #include "warn.h" - -#define EX_ENTRY_SIZE 12 -#define EX_ORIG_OFFSET 0 -#define EX_NEW_OFFSET 4 - -#define JUMP_ENTRY_SIZE 16 -#define JUMP_ORIG_OFFSET 0 -#define JUMP_NEW_OFFSET 4 - -#define ALT_ENTRY_SIZE 13 -#define ALT_ORIG_OFFSET 0 -#define ALT_NEW_OFFSET 4 -#define ALT_FEATURE_OFFSET 8 -#define ALT_ORIG_LEN_OFFSET 10 -#define ALT_NEW_LEN_OFFSET 11 - -#define X86_FEATURE_POPCNT (4*32+23) -#define X86_FEATURE_SMAP (9*32+20) +#include "arch_special.h" struct special_entry { const char *sec; @@ -68,6 +51,10 @@ struct special_entry entries[] = { {}, }; +void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt) +{ +} + static int get_alt_entry(struct elf *elf, struct special_entry *entry, struct section *sec, int idx, struct special_alt *alt) @@ -92,30 +79,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, feature = *(unsigned short *)(sec->data->d_buf + offset + entry->feature); - - /* - * It has been requested that we don't validate the !POPCNT - * feature path which is a "very very small percentage of - * machines". - */ - if (feature == X86_FEATURE_POPCNT) - alt->skip_orig = true; - - /* - * If UACCESS validation is enabled; force that alternative; - * otherwise force it the other way. - * - * What we want to avoid is having both the original and the - * alternative code flow at the same time, in that case we can - * find paths that see the STAC but take the NOP instead of - * CLAC and the other way around. - */ - if (feature == X86_FEATURE_SMAP) { - if (uaccess) - alt->skip_orig = true; - else - alt->skip_alt = true; - } + arch_handle_alternative(feature, alt); } orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig); diff --git a/tools/objtool/special.h b/tools/objtool/special.h index 35061530e46e..abddf38ef334 100644 --- a/tools/objtool/special.h +++ b/tools/objtool/special.h @@ -7,8 +7,11 @@ #define _SPECIAL_H #include <stdbool.h> +#include "check.h" #include "elf.h" +#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" + struct special_alt { struct list_head list; @@ -28,4 +31,11 @@ struct special_alt { int special_get_alts(struct elf *elf, struct list_head *alts); +void arch_handle_alternative(unsigned short feature, struct special_alt *alt); + +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, + struct reloc *reloc); +struct reloc *arch_find_switch_table(struct objtool_file *file, + struct instruction *insn); #endif /* _SPECIAL_H */ diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 2a1261bfbb62..606a4b5e929f 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -1,13 +1,27 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -FILES=' +if [ -z "$SRCARCH" ]; then + echo 'sync-check.sh: error: missing $SRCARCH environment variable' >&2 + exit 1 +fi + +FILES="include/linux/objtool.h" + +if [ "$SRCARCH" = "x86" ]; then +FILES="$FILES arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk -' +include/linux/static_call_types.h +arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]' +arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]' +arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]' +arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]' +" +fi check_2 () { file1=$1 @@ -40,11 +54,12 @@ fi cd ../.. -for i in $FILES; do - check $i -done +while read -r file_entry; do + if [ -z "$file_entry" ]; then + continue + fi -check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' -check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' -check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' -check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' + check $file_entry +done <<EOF +$FILES +EOF diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index 942ea5e8ac36..7843e9a7a72f 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -9,17 +9,13 @@ #include <errno.h> #include "objtool.h" -#define __weak __attribute__((weak)) - #define UNSUPPORTED(name) \ ({ \ fprintf(stderr, "error: objtool: " name " not implemented\n"); \ return ENOSYS; \ }) -const char __weak *objname; - -int __weak check(const char *_objname, bool orc) +int __weak check(struct objtool_file *file) { UNSUPPORTED("check subcommand"); } diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index e817179c5027..d3740c8f399b 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -18,6 +18,7 @@ l synthesize last branch entries (use with i or x) L synthesize last branch entries on existing event records s skip initial number of events + q quicker (less detailed) decoding The default is all events i.e. the same as --itrace=ibxwpe, except for perf script where it is --itrace=ce @@ -47,3 +48,16 @@ --itrace=i0nss1000000 skips the first million instructions. + + The 'e' option may be followed by flags which affect what errors will or + will not be reported. Each flag must be preceded by either '+' or '-'. + The flags are: + o overflow + l trace data lost + + If supported, the 'd' option may be followed by flags which affect what + debug messages will or will not be logged. Each flag must be preceded + by either '+' or '-'. The flags are: + a all perf events + + If supported, the 'q' option may be repeated to increase the effect. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index bad16512c48d..a0529c7fa5ef 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -49,6 +49,9 @@ SUBSYSTEM 'sched':: Scheduler and IPC mechanisms. +'syscall':: + System call performance (throughput). + 'mem':: Memory access performance. @@ -137,6 +140,14 @@ Example of *pipe* 59004 ops/sec --------------------- +SUITES FOR 'syscall' +~~~~~~~~~~~~~~~~~~ +*basic*:: +Suite for evaluating performance of core system call throughput (both usecs/op and ops/sec metrics). +This uses a single thread simply doing getppid(2), which is a simple syscall where the result is not +cached by glibc. + + SUITES FOR 'mem' ~~~~~~~~~~~~~~~~ *memcpy*:: diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c7d3df5798e2..76408d986aed 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -614,8 +614,9 @@ trace.*:: ftrace.*:: ftrace.tracer:: - Can be used to select the default tracer. Possible values are - 'function' and 'function_graph'. + Can be used to select the default tracer when neither -G nor + -F option is not specified. Possible values are 'function' and + 'function_graph'. llvm.*:: llvm.clang-path:: diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index c87180764829..726b9bc9e1a7 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -27,6 +27,9 @@ OPTIONS for 'convert' --to-ctf:: Triggers the CTF conversion, specify the path of CTF data directory. +--tod:: + Convert time to wall clock time. + -i:: Specify input perf data file path. diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index b80c84307dc9..78358af9a1c4 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -24,16 +24,28 @@ OPTIONS -t:: --tracer=:: - Tracer to use: function_graph or function. + Tracer to use when neither -G nor -F option is not + specified: function_graph or function. -v:: --verbose=:: Verbosity level. +-F:: +--funcs:: + List all available functions to trace. + -p:: --pid=:: Trace on existing process id (comma separated list). +--tid=:: + Trace on existing thread id (comma separated list). + +-D:: +--delay:: + Time (ms) to wait before starting tracing after program start. + -a:: --all-cpus:: Force system-wide collection. Scripts run without a <command> @@ -48,39 +60,58 @@ OPTIONS Ranges of CPUs are specified with -: 0-2. Default is to trace on all online CPUs. +-m:: +--buffer-size:: + Set the size of per-cpu tracing buffer, <size> is expected to + be a number with appended unit character - B/K/M/G. + +--inherit:: + Trace children processes spawned by our target. + -T:: --trace-funcs=:: - Only trace functions given by the argument. Multiple functions - can be given by using this option more than once. The function - argument also can be a glob pattern. It will be passed to - 'set_ftrace_filter' in tracefs. + Select function tracer and set function filter on the given + function (or a glob pattern). Multiple functions can be given + by using this option more than once. The function argument also + can be a glob pattern. It will be passed to 'set_ftrace_filter' + in tracefs. -N:: --notrace-funcs=:: - Do not trace functions given by the argument. Like -T option, - this can be used more than once to specify multiple functions - (or glob patterns). It will be passed to 'set_ftrace_notrace' - in tracefs. + Select function tracer and do not trace functions given by the + argument. Like -T option, this can be used more than once to + specify multiple functions (or glob patterns). It will be + passed to 'set_ftrace_notrace' in tracefs. + +--func-opts:: + List of options allowed to set: + call-graph - Display kernel stack trace for function tracer. + irq-info - Display irq context info for function tracer. -G:: --graph-funcs=:: - Set graph filter on the given function (or a glob pattern). - This is useful for the function_graph tracer only and enables - tracing for functions executed from the given function. - This can be used more than once to specify multiple functions. - It will be passed to 'set_graph_function' in tracefs. + Select function_graph tracer and set graph filter on the given + function (or a glob pattern). This is useful to trace for + functions executed from the given function. This can be used more + than once to specify multiple functions. It will be passed to + 'set_graph_function' in tracefs. -g:: --nograph-funcs=:: - Set graph notrace filter on the given function (or a glob pattern). - Like -G option, this is useful for the function_graph tracer only - and disables tracing for function executed from the given function. - This can be used more than once to specify multiple functions. - It will be passed to 'set_graph_notrace' in tracefs. + Select function_graph tracer and set graph notrace filter on the + given function (or a glob pattern). Like -G option, this is useful + for the function_graph tracer only and disables tracing for function + executed from the given function. This can be used more than once to + specify multiple functions. It will be passed to 'set_graph_notrace' + in tracefs. --D:: ---graph-depth=:: - Set max depth for function graph tracer to follow +--graph-opts:: + List of options allowed to set: + nosleep-time - Measure on-CPU time only for function_graph tracer. + noirqs - Ignore functions that happen inside interrupt. + verbose - Show process names, PIDs, timestamps, etc. + thresh=<n> - Setup trace duration threshold in microseconds. + depth=<n> - Set max depth for function graph tracer to follow. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index f4cd49a7fcdb..d5a266d7f15b 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -825,6 +825,7 @@ The letters are: l synthesize last branch entries (use with i or x) L synthesize last branch entries on existing event records s skip initial number of events + q quicker (less detailed) decoding "Instructions" events look like they were recorded by "perf record -e instructions". @@ -871,11 +872,24 @@ Developer Manuals. Error events show where the decoder lost the trace. Error events are quite important. Users must know if what they are seeing is a complete -picture or not. +picture or not. The "e" option may be followed by flags which affect what errors +will or will not be reported. Each flag must be preceded by either '+' or '-'. +The flags supported by Intel PT are: + -o Suppress overflow errors + -l Suppress trace data lost errors +For example, for errors but not overflow or data lost errors: + + --itrace=e-o-l The "d" option will cause the creation of a file "intel_pt.log" containing all decoded packets and instructions. Note that this option slows down the decoder -and that the resulting file may be very large. +and that the resulting file may be very large. The "d" option may be followed +by flags which affect what debug messages will or will not be logged. Each flag +must be preceded by either '+' or '-'. The flags support by Intel PT are: + -a Suppress logging of perf events + +a Log all perf events +By default, logged perf events are filtered by any specified time ranges, but +flag +a overrides that. In addition, the period of the "instructions" event can be specified. e.g. @@ -956,6 +970,51 @@ at the beginning. This is useful to ignore initialization code. skips the first million instructions. +The q option changes the way the trace is decoded. The decoding is much faster +but much less detailed. Specifically, with the q option, the decoder does not +decode TNT packets, and does not walk object code, but gets the ip from FUP and +TIP packets. The q option can be used with the b and i options but the period +is not used. The q option decodes more quickly, but is useful only if the +control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or +TIP.PGD packets (refer below). However the q option could be used to find time +ranges that could then be decoded fully using the --time option. + +What will *not* be decoded with the (single) q option: + + - direct calls and jmps + - conditional branches + - non-branch instructions + +What *will* be decoded with the (single) q option: + + - asynchronous branches such as interrupts + - indirect branches + - function return target address *if* the noretcomp config term (refer + config terms section) was used + - start of (control-flow) tracing + - end of (control-flow) tracing, if it is not out of context + - power events, ptwrite, transaction start and abort + - instruction pointer associated with PSB packets + +Note the q option does not specify what events will be synthesized e.g. the p +option must be used also to show power events. + +Repeating the q option (double-q i.e. qq) results in even faster decoding and even +less detail. The decoder decodes only extended PSB (PSB+) packets, getting the +instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and +PSBEND). Note PSB packets occur regularly in the trace based on the psb_period +config term (refer config terms section). There will be a FUP packet if the +PSB+ occurs while control flow is being traced. + +What will *not* be decoded with the qq option: + + - everything except instruction pointer associated with PSB packets + +What *will* be decoded with the qq option: + + - instruction pointer associated with PSB packets + + dump option ~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 376a50b3452d..10ed539a8859 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -119,6 +119,7 @@ It's also possible to use pmu syntax: perf record -e r1a8 -a sleep 1 perf record -e cpu/r1a8/ ... + perf record -e cpu/r0x1a8/ ... You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index fa8a5fcd27ab..bd50cdff08a8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -33,6 +33,10 @@ OPTIONS - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. + - a symbolic or raw PMU event followed by an optional colon + and a list of event modifiers, e.g., cpu-cycles:p. See the + linkperf:perf-list[1] man page for details on event modifiers. + - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where 'param1', 'param2', etc are defined as formats for the PMU in /sys/bus/event_source/devices/<pmu>/format/*. @@ -407,8 +411,9 @@ if combined with -a or -C options. -D:: --delay=:: -After starting the program, wait msecs before measuring. This is useful to -filter out the startup phase of the program, which is often very different. +After starting the program, wait msecs before measuring (-1: start with events +disabled). This is useful to filter out the startup phase of the program, which +is often very different. -I:: --intr-regs:: @@ -626,6 +631,45 @@ option. The -e option and this one can be mixed and matched. Events can be grouped using the {} notation. endif::HAVE_LIBPFM[] +--control fd:ctl-fd[,ack-fd] +Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, +'disable': disable events). Measurements can be started with events disabled using +--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor +to synchronize with the controlling process. Example of bash shell script to enable and +disable events during measurements: + +#!/bin/bash + +ctl_dir=/tmp/ + +ctl_fifo=${ctl_dir}perf_ctl.fifo +test -p ${ctl_fifo} && unlink ${ctl_fifo} +mkfifo ${ctl_fifo} +exec {ctl_fd}<>${ctl_fifo} + +ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo +test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} +mkfifo ${ctl_ack_fifo} +exec {ctl_fd_ack}<>${ctl_ack_fifo} + +perf record -D -1 -e cpu-cycles -a \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & +perf_pid=$! + +sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" +sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + +exec {ctl_fd_ack}>&- +unlink ${ctl_ack_fifo} + +exec {ctl_fd}>&- +unlink ${ctl_fifo} + +wait -n ${perf_pid} +exit $? + + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 372dfd110e6d..4f712fb8f175 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -322,6 +322,10 @@ OPTIONS --show-cgroup-events Display cgroup events i.e. events of type PERF_RECORD_CGROUP. +--show-text-poke-events + Display text poke events i.e. events of type PERF_RECORD_TEXT_POKE and + PERF_RECORD_KSYMBOL. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index b029ee728a0b..db420dd75e43 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -39,6 +39,10 @@ report:: - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. + - a symbolic or raw PMU event followed by an optional colon + and a list of event modifiers, e.g., cpu-cycles:p. See the + linkperf:perf-list[1] man page for details on event modifiers. + - a symbolically formed event like 'pmu/param1=0x3,param2/' where param1 and param2 are defined as formats for the PMU in /sys/bus/event_source/devices/<pmu>/format/* @@ -176,6 +180,45 @@ with it. --append may be used here. Examples: 3>results perf stat --log-fd 3 -- $cmd 3>>results perf stat --log-fd 3 --append -- $cmd +--control fd:ctl-fd[,ack-fd] +Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, +'disable': disable events). Measurements can be started with events disabled using +--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor +to synchronize with the controlling process. Example of bash shell script to enable and +disable events during measurements: + +#!/bin/bash + +ctl_dir=/tmp/ + +ctl_fifo=${ctl_dir}perf_ctl.fifo +test -p ${ctl_fifo} && unlink ${ctl_fifo} +mkfifo ${ctl_fifo} +exec {ctl_fd}<>${ctl_fifo} + +ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo +test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} +mkfifo ${ctl_ack_fifo} +exec {ctl_fd_ack}<>${ctl_ack_fifo} + +perf stat -D -1 -e cpu-cycles -a -I 1000 \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & +perf_pid=$! + +sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" +sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + +exec {ctl_fd_ack}>&- +unlink ${ctl_ack_fifo} + +exec {ctl_fd}>&- +unlink ${ctl_fifo} + +wait -n ${perf_pid} +exit $? + + --pre:: --post:: Pre and post measurement hooks, e.g.: @@ -238,8 +281,9 @@ mode, use --per-node in addition to -a. (system-wide). -D msecs:: --delay msecs:: -After starting the program, wait msecs before measuring. This is useful to -filter out the startup phase of the program, which is often very different. +After starting the program, wait msecs before measuring (-1: start with events +disabled). This is useful to filter out the startup phase of the program, +which is often very different. -T:: --transaction:: @@ -376,6 +420,9 @@ counts for all hardware threads in a core but show the sum counts per hardware thread. This is essentially a replacement for the any bit and convenient for post processing. +--summary:: +Print summary for interval mode (-I). + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b6472e463284..9ee96640744e 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -389,6 +389,19 @@ struct { Example: cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake + HEADER_CLOCK_DATA = 29, + + Contains clock id and its reference time together with wall clock + time taken at the 'same time', both values are in nanoseconds. + The format of data is as below. + +struct { + u32 version; /* version = 1 */ + u32 clockid; + u64 wall_clock_ns; + u64 clockid_time_ns; +}; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 513633809c81..8137a6046a47 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -16,7 +16,7 @@ $(shell printf "" > $(OUTPUT).config-detected) detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) -CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) +CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) include $(srctree)/tools/scripts/Makefile.arch @@ -501,6 +501,14 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT endif + ifndef NO_LIBDEBUGINFOD + $(call feature_check,libdebuginfod) + ifeq ($(feature-libdebuginfod), 1) + CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT + EXTLIBS += -ldebuginfod + endif + endif + ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 86dbb51bb272..6031167939ae 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -124,6 +124,8 @@ include ../scripts/utilities.mak # # Define LIBPFM4 to enable libpfm4 events extension. # +# Define NO_LIBDEBUGINFOD if you do not want support debuginfod +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -418,6 +420,7 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) +beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/ linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ @@ -501,6 +504,12 @@ socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh $(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl) $(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@ +socket_arrays := $(beauty_outdir)/socket_arrays.c +socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh + +$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl) + $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@ + vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh @@ -697,6 +706,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(kcmp_type_array) \ $(kvm_ioctl_array) \ $(socket_ipproto_array) \ + $(socket_arrays) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(mmap_flags_array) \ @@ -1006,6 +1016,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(kvm_ioctl_array) \ $(OUTPUT)$(kcmp_type_array) \ $(OUTPUT)$(socket_ipproto_array) \ + $(OUTPUT)$(socket_arrays) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) \ $(OUTPUT)$(prctl_option_array) \ diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index 28a5d0c18b1d..b187bddbd01a 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -57,17 +57,15 @@ struct auxtrace_record struct evsel *evsel; bool found_etm = false; struct perf_pmu *found_spe = NULL; - static struct perf_pmu **arm_spe_pmus = NULL; - static int nr_spes = 0; + struct perf_pmu **arm_spe_pmus = NULL; + int nr_spes = 0; int i = 0; if (!evlist) return NULL; cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); - - if (!arm_spe_pmus) - arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err); + arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err); evlist__for_each_entry(evlist, evsel) { if (cs_etm_pmu && @@ -84,6 +82,7 @@ struct auxtrace_record } } } + free(arm_spe_pmus); if (found_etm && found_spe) { pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n"); diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index cea5e33d61d2..cad7bf783413 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -243,10 +243,10 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, } /* - * No sink was provided on the command line - for _now_ treat - * this as an error. + * No sink was provided on the command line - allow the CoreSight + * system to look for a default */ - return ret; + return 0; } static int cs_etm_recording_options(struct auxtrace_record *itr, diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index b190f2eb2611..b168364ac050 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -32,7 +32,7 @@ 18 spu oldstat sys_ni_syscall 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid -21 nospu mount sys_mount compat_sys_mount +21 nospu mount sys_mount 22 32 umount sys_oldumount 22 64 umount sys_ni_syscall 22 spu umount sys_ni_syscall @@ -189,11 +189,11 @@ 142 common _newselect sys_select compat_sys_select 143 common flock sys_flock 144 common msync sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall @@ -363,7 +363,7 @@ 282 common unshare sys_unshare 283 common splice sys_splice 284 common tee sys_tee -285 common vmsplice sys_vmsplice compat_sys_vmsplice +285 common vmsplice sys_vmsplice 286 common openat sys_openat compat_sys_openat 287 common mkdirat sys_mkdirat 288 common mknodat sys_mknodat @@ -443,8 +443,8 @@ 348 common syncfs sys_syncfs 349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 350 common setns sys_setns -351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +351 nospu process_vm_readv sys_process_vm_readv +352 nospu process_vm_writev sys_process_vm_writev 353 nospu finit_module sys_finit_module 354 nospu kcmp sys_kcmp 355 common sched_setattr sys_sched_setattr diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index e18a3556f5e3..63f3ac91049f 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -64,7 +64,13 @@ static const char *reg_names[] = { [PERF_REG_POWERPC_DAR] = "dar", [PERF_REG_POWERPC_DSISR] = "dsisr", [PERF_REG_POWERPC_SIER] = "sier", - [PERF_REG_POWERPC_MMCRA] = "mmcra" + [PERF_REG_POWERPC_MMCRA] = "mmcra", + [PERF_REG_POWERPC_MMCR0] = "mmcr0", + [PERF_REG_POWERPC_MMCR1] = "mmcr1", + [PERF_REG_POWERPC_MMCR2] = "mmcr2", + [PERF_REG_POWERPC_MMCR3] = "mmcr3", + [PERF_REG_POWERPC_SIER2] = "sier2", + [PERF_REG_POWERPC_SIER3] = "sier3", }; static inline const char *perf_reg_name(int id) diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h index 54cfa0530e86..488f4339b83c 100644 --- a/tools/perf/arch/powerpc/util/book3s_hcalls.h +++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h @@ -84,7 +84,7 @@ {0x1a4, "H_CREATE_RPT"}, \ {0x1a8, "H_REMOVE_RPT"}, \ {0x1ac, "H_REGISTER_RPAGES"}, \ - {0x1b0, "H_DISABLE_AND_GETC"}, \ + {0x1b0, "H_DISABLE_AND_GET"}, \ {0x1b4, "H_ERROR_DATA"}, \ {0x1b8, "H_GET_HCA_INFO"}, \ {0x1bc, "H_GET_PERF_COUNT"}, \ diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index d4870074f14c..1a950171a66f 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -7,17 +7,10 @@ #include <string.h> #include <linux/stringify.h> #include "header.h" +#include "utils_header.h" #include "metricgroup.h" #include <api/fs/fs.h> -#define mfspr(rn) ({unsigned long rval; \ - asm volatile("mfspr %0," __stringify(rn) \ - : "=r" (rval)); rval; }) - -#define SPRN_PVR 0x11F /* Processor Version Register */ -#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ -#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ - int get_cpuid(char *buffer, size_t sz) { diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 0a5242900248..2b6d4704e3aa 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -6,9 +6,16 @@ #include "../../../util/perf_regs.h" #include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/header.h" +#include "../../../perf-sys.h" +#include "utils_header.h" #include <linux/kernel.h> +#define PVR_POWER9 0x004E +#define PVR_POWER10 0x0080 + const struct sample_reg sample_reg_masks[] = { SMPL_REG(r0, PERF_REG_POWERPC_R0), SMPL_REG(r1, PERF_REG_POWERPC_R1), @@ -55,6 +62,12 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), SMPL_REG(sier, PERF_REG_POWERPC_SIER), SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA), + SMPL_REG(mmcr0, PERF_REG_POWERPC_MMCR0), + SMPL_REG(mmcr1, PERF_REG_POWERPC_MMCR1), + SMPL_REG(mmcr2, PERF_REG_POWERPC_MMCR2), + SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3), + SMPL_REG(sier2, PERF_REG_POWERPC_SIER2), + SMPL_REG(sier3, PERF_REG_POWERPC_SIER3), SMPL_REG_END }; @@ -163,3 +176,45 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) return SDT_ARG_VALID; } + +uint64_t arch__intr_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_INTR, + .precise_ip = 1, + .disabled = 1, + .exclude_kernel = 1, + }; + int fd; + u32 version; + u64 extended_mask = 0, mask = PERF_REGS_MASK; + + /* + * Get the PVR value to set the extended + * mask specific to platform. + */ + version = (((mfspr(SPRN_PVR)) >> 16) & 0xFFFF); + if (version == PVR_POWER9) + extended_mask = PERF_REG_PMU_MASK_300; + else if (version == PVR_POWER10) + extended_mask = PERF_REG_PMU_MASK_31; + else + return mask; + + attr.sample_regs_intr = extended_mask; + attr.sample_period = 1; + event_attr_init(&attr); + + /* + * check if the pmu supports perf extended regs, before + * returning the register mask to sample. + */ + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + mask |= extended_mask; + } + return mask; +} diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h new file mode 100644 index 000000000000..5788eb1f1fe3 --- /dev/null +++ b/tools/perf/arch/powerpc/util/utils_header.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_UTIL_HEADER_H +#define __PERF_UTIL_HEADER_H + +#include <linux/stringify.h> + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," __stringify(rn) \ + : "=r" (rval)); rval; }) + +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +#endif /* __PERF_UTIL_HEADER_H */ diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 56ae24b6e4be..d2fa9647ce25 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -26,7 +26,7 @@ 16 32 lchown - compat_sys_s390_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid -21 common mount sys_mount compat_sys_mount +21 common mount sys_mount 22 common umount sys_oldumount compat_sys_oldumount 23 32 setuid - compat_sys_s390_setuid16 24 32 getuid - compat_sys_s390_getuid16 @@ -134,11 +134,11 @@ 142 64 select sys_select - 143 common flock sys_flock sys_flock 144 common msync sys_msync compat_sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock compat_sys_mlock 151 common munlock sys_munlock compat_sys_munlock 152 common mlockall sys_mlockall sys_mlockall @@ -316,7 +316,7 @@ 306 common splice sys_splice compat_sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 308 common tee sys_tee compat_sys_tee -309 common vmsplice sys_vmsplice compat_sys_vmsplice +309 common vmsplice sys_vmsplice sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages 311 common getcpu sys_getcpu compat_sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait @@ -347,8 +347,8 @@ 337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr 343 common kcmp sys_kcmp compat_sys_kcmp 344 common finit_module sys_finit_module compat_sys_finit_module diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index e008d638e641..347809649ba2 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -164,7 +164,7 @@ 153 common vhangup sys_vhangup 154 common modify_ldt sys_modify_ldt 155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl +156 64 _sysctl sys_ni_syscall 157 common prctl sys_prctl 158 common arch_prctl sys_arch_prctl 159 common adjtimex sys_adjtimex @@ -357,6 +357,7 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 +436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 @@ -370,8 +371,8 @@ 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn 514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev +515 x32 readv sys_readv +516 x32 writev sys_writev 517 x32 recvfrom compat_sys_recvfrom 518 x32 sendmsg compat_sys_sendmsg 519 x32 recvmsg compat_sys_recvmsg @@ -387,15 +388,15 @@ 529 x32 waitid compat_sys_waitid 530 x32 set_robust_list compat_sys_set_robust_list 531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice +532 x32 vmsplice sys_vmsplice 533 x32 move_pages compat_sys_move_pages 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 537 x32 recvmmsg compat_sys_recvmmsg_time64 538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev +539 x32 process_vm_readv sys_process_vm_readv +540 x32 process_vm_writev sys_process_vm_writev 541 x32 setsockopt sys_setsockopt 542 x32 getsockopt sys_getsockopt 543 x32 io_setup compat_sys_io_setup diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6ce451293634..082e5f2a415a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -837,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } } + if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel && + perf_can_record_text_poke_events() && perf_can_record_cpu_wide()) + opts->text_poke = true; + if (intel_pt_evsel) { /* * To obtain the auxtrace buffer file descriptor, the auxtrace diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 768e408757a0..878db6a59a41 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,5 +1,6 @@ perf-y += sched-messaging.o perf-y += sched-pipe.o +perf-y += syscall.o perf-y += mem-functions.o perf-y += futex-hash.o perf-y += futex-wake.o @@ -10,8 +11,8 @@ perf-y += epoll-wait.o perf-y += epoll-ctl.o perf-y += synthesize.o perf-y += kallsyms-parse.o +perf-y += find-bit-bench.o -perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 61cae4966cae..2804812d4154 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -33,8 +33,10 @@ extern struct timeval bench__start, bench__end, bench__runtime; int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); int bench_sched_pipe(int argc, const char **argv); +int bench_syscall_basic(int argc, const char **argv); int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); +int bench_mem_find_bit(int argc, const char **argv); int bench_futex_hash(int argc, const char **argv); int bench_futex_wake(int argc, const char **argv); int bench_futex_wake_parallel(int argc, const char **argv); diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c new file mode 100644 index 000000000000..73b5bcc5946a --- /dev/null +++ b/tools/perf/bench/find-bit-bench.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark find_next_bit and related bit operations. + * + * Copyright 2020 Google LLC. + */ +#include <stdlib.h> +#include "bench.h" +#include "../util/stat.h" +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/time64.h> +#include <subcmd/parse-options.h> + +static unsigned int outer_iterations = 5; +static unsigned int inner_iterations = 100000; + +static const struct option options[] = { + OPT_UINTEGER('i', "outer-iterations", &outer_iterations, + "Number of outer iterations used"), + OPT_UINTEGER('j', "inner-iterations", &inner_iterations, + "Number of inner iterations used"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench mem find_bit <options>", + NULL +}; + +static unsigned int accumulator; +static unsigned int use_of_val; + +static noinline void workload(int val) +{ + use_of_val += val; + accumulator++; +} + +#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) +static bool asm_test_bit(long nr, const unsigned long *addr) +{ + bool oldbit; + + asm volatile("bt %2,%1" + : "=@ccc" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); + + return oldbit; +} +#else +#define asm_test_bit test_bit +#endif + +static int do_for_each_set_bit(unsigned int num_bits) +{ + unsigned long *to_test = bitmap_alloc(num_bits); + struct timeval start, end, diff; + u64 runtime_us; + struct stats fb_time_stats, tb_time_stats; + double time_average, time_stddev; + unsigned int bit, i, j; + unsigned int set_bits, skip; + unsigned int old; + + init_stats(&fb_time_stats); + init_stats(&tb_time_stats); + + for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) { + bitmap_zero(to_test, num_bits); + skip = num_bits / set_bits; + for (i = 0; i < num_bits; i += skip) + set_bit(i, to_test); + + for (i = 0; i < outer_iterations; i++) { + old = accumulator; + gettimeofday(&start, NULL); + for (j = 0; j < inner_iterations; j++) { + for_each_set_bit(bit, to_test, num_bits) + workload(bit); + } + gettimeofday(&end, NULL); + assert(old + (inner_iterations * set_bits) == accumulator); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&fb_time_stats, runtime_us); + + old = accumulator; + gettimeofday(&start, NULL); + for (j = 0; j < inner_iterations; j++) { + for (bit = 0; bit < num_bits; bit++) { + if (asm_test_bit(bit, to_test)) + workload(bit); + } + } + gettimeofday(&end, NULL); + assert(old + (inner_iterations * set_bits) == accumulator); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&tb_time_stats, runtime_us); + } + + printf("%d operations %d bits set of %d bits\n", + inner_iterations, set_bits, num_bits); + time_average = avg_stats(&fb_time_stats); + time_stddev = stddev_stats(&fb_time_stats); + printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + time_average = avg_stats(&tb_time_stats); + time_stddev = stddev_stats(&tb_time_stats); + printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + if (use_of_val == accumulator) /* Try to avoid compiler tricks. */ + printf("\n"); + } + bitmap_free(to_test); + return 0; +} + +int bench_mem_find_bit(int argc, const char **argv) +{ + int err = 0, i; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + for (i = 1; i <= 2048; i <<= 1) + do_for_each_set_bit(i); + + return err; +} diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 9235b76501be..19d45c377ac1 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 0; } -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) { - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memcpy_t fn = r->fn.memcpy; - int i; - /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ memset(src, 0, size); @@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo * to not measure page fault overhead: */ fn(dst, src, size); +} + +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_prefault(fn, size, src, dst); cycle_start = get_cycles(); for (i = 0; i < nr_loops; ++i) @@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void memcpy_t fn = r->fn.memcpy; int i; - /* - * We prefault the freshly allocated memory range here, - * to not measure page fault overhead: - */ - fn(dst, src, size); + memcpy_prefault(fn, size, src, dst); BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < nr_loops; ++i) diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c deleted file mode 100644 index 4130734dde84..000000000000 --- a/tools/perf/bench/mem-memcpy-x86-64-lib.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy - * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' - * happy. - */ -#include <linux/types.h> - -unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); -unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); - -unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) -{ - for (; len; --len, to++, from++) { - /* - * Call the assembly routine back directly since - * memcpy_mcsafe() may silently fallback to memcpy. - */ - unsigned long rem = __memcpy_mcsafe(to, from, 1); - - if (rem) - break; - } - return len; -} diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5797253b9700..f85bceccc459 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -247,17 +247,22 @@ static int is_node_present(int node) */ static bool node_has_cpus(int node) { - struct bitmask *cpu = numa_allocate_cpumask(); - unsigned int i; + struct bitmask *cpumask = numa_allocate_cpumask(); + bool ret = false; /* fall back to nocpus */ + int cpu; - if (cpu && !numa_node_to_cpus(node, cpu)) { - for (i = 0; i < cpu->size; i++) { - if (numa_bitmask_isbitset(cpu, i)) - return true; + BUG_ON(!cpumask); + if (!numa_node_to_cpus(node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) { + ret = true; + break; + } } } + numa_free_cpumask(cpumask); - return false; /* lets fall back to nocpus safely */ + return ret; } static cpu_set_t bind_to_cpu(int target_cpu) @@ -288,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu) static cpu_set_t bind_to_node(int target_node) { - int cpus_per_node = g->p.nr_cpus / nr_numa_nodes(); cpu_set_t orig_mask, mask; int cpu; int ret; - BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus); - BUG_ON(!cpus_per_node); - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); @@ -305,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node) for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { - int cpu_start = (target_node + 0) * cpus_per_node; - int cpu_stop = (target_node + 1) * cpus_per_node; + struct bitmask *cpumask = numa_allocate_cpumask(); - BUG_ON(cpu_stop > g->p.nr_cpus); - - for (cpu = cpu_start; cpu < cpu_stop; cpu++) - CPU_SET(cpu, &mask); + BUG_ON(!cpumask); + if (!numa_node_to_cpus(target_node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) + CPU_SET(cpu, &mask); + } + } + numa_free_cpumask(cpumask); } ret = sched_setaffinity(0, sizeof(mask), &mask); @@ -729,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused, return -1; return parse_node_list(arg); - - return 0; } #define BIT(x) (1ul << x) @@ -813,12 +815,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val } } } else if (!g->p.data_backwards || (nr + loop) & 1) { + /* Process data forwards: */ d0 = data + off; d = data + off + 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d >= d1)) d = data; @@ -836,7 +838,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val d = data + off - 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d < data)) d = data + words-1; @@ -1733,12 +1734,12 @@ err: */ static const char *tests[][MAX_ARGS] = { /* Basic single-stream NUMA bandwidth measurements: */ - { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM }, { "RAM-bw-local-NOTHP,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, - { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "1", OPT_BW_RAM }, /* 2-stream NUMA bandwidth measurements: */ @@ -1755,7 +1756,7 @@ static const char *tests[][MAX_ARGS] = { { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, - { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, + { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV }, { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, { " 4x4-convergence-NOTHP,", @@ -1780,24 +1781,24 @@ static const char *tests[][MAX_ARGS] = { "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, - { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, - { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, - { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, - { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, + { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, + { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, + { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, + { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, - { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, - { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, - { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, - { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, - { " 4x8-bw-thread-NOTHP,", + { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, + { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, + { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, + { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, + { " 4x8-bw-process-NOTHP,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, - { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, - { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, + { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, + { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, - { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, - { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, + { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, + { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, - { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, + { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, { "numa01-bw-thread-NOTHP,", diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 71d830d7b923..cecce93ccc63 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -66,11 +66,10 @@ static void fdpair(int fds[2]) /* Block until we're ready to go */ static void ready(int ready_out, int wakefd) { - char dummy; struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; /* Tell them we're ready. */ - if (write(ready_out, &dummy, 1) != 1) + if (write(ready_out, "R", 1) != 1) err(EXIT_FAILURE, "CLIENT: ready write"); /* Wait for "GO" signal */ @@ -85,6 +84,7 @@ static void *sender(struct sender_context *ctx) unsigned int i, j; ready(ctx->ready_out, ctx->wakefd); + memset(data, 'S', sizeof(data)); /* Now pump to every receiver. */ for (i = 0; i < nr_loops; i++) { diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c index 8d624aea1c5e..b2924e3181dc 100644 --- a/tools/perf/bench/synthesize.c +++ b/tools/perf/bench/synthesize.c @@ -162,8 +162,8 @@ static int do_run_multi_threaded(struct target *target, init_stats(&event_stats); for (i = 0; i < multi_iterations; i++) { session = perf_session__new(NULL, false, NULL); - if (!session) - return -ENOMEM; + if (IS_ERR(session)) + return PTR_ERR(session); atomic_set(&event_count, 0); gettimeofday(&start, NULL); diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c new file mode 100644 index 000000000000..5fe621cff8e9 --- /dev/null +++ b/tools/perf/bench/syscall.c @@ -0,0 +1,81 @@ +/* + * + * syscall.c + * + * syscall: Benchmark for system call performance + */ +#include "../perf.h" +#include "../util/util.h" +#include <subcmd/parse-options.h> +#include "../builtin.h" +#include "bench.h" + +#include <stdio.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdlib.h> + +#define LOOPS_DEFAULT 10000000 +static int loops = LOOPS_DEFAULT; + +static const struct option options[] = { + OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_syscall_usage[] = { + "perf bench syscall <options>", + NULL +}; + +int bench_syscall_basic(int argc, const char **argv) +{ + struct timeval start, stop, diff; + unsigned long long result_usec = 0; + int i; + + argc = parse_options(argc, argv, options, bench_syscall_usage, 0); + + gettimeofday(&start, NULL); + + for (i = 0; i < loops; i++) + getppid(); + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# Executed %'d getppid() calls\n", loops); + + result_usec = diff.tv_sec * 1000000; + result_usec += diff.tv_usec; + + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", + diff.tv_sec, + (unsigned long) (diff.tv_usec/1000)); + + printf(" %14lf usecs/op\n", + (double)result_usec / (double)loops); + printf(" %'14d ops/sec\n", + (int)((double)loops / + ((double)result_usec / (double)1000000))); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", + diff.tv_sec, + (unsigned long) (diff.tv_usec / 1000)); + break; + + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index cad31b1d3438..4f176039fc8f 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -11,6 +11,7 @@ * Available benchmark collection list: * * sched ... scheduler and IPC performance + * syscall ... System call performance * mem ... memory access performance * numa ... NUMA scheduling and MM performance * futex ... Futex performance @@ -49,9 +50,16 @@ static struct bench sched_benchmarks[] = { { NULL, NULL, NULL } }; +static struct bench syscall_benchmarks[] = { + { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic }, + { "all", "Run all syscall benchmarks", NULL }, + { NULL, NULL, NULL }, +}; + static struct bench mem_benchmarks[] = { { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy }, { "memset", "Benchmark for memset() functions", bench_mem_memset }, + { "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit }, { "all", "Run all memory access benchmarks", NULL }, { NULL, NULL, NULL } }; @@ -90,6 +98,7 @@ struct collection { static struct collection collections[] = { { "sched", "Scheduler and IPC benchmarks", sched_benchmarks }, + { "syscall", "System call benchmarks", syscall_benchmarks }, { "mem", "Memory access benchmarks", mem_benchmarks }, #ifdef HAVE_LIBNUMA_SUPPORT { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d617d5682c68..5938b100eaf4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2582,7 +2582,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) static int setup_callchain(struct evlist *evlist) { - u64 sample_type = perf_evlist__combined_sample_type(evlist); + u64 sample_type = evlist__combined_sample_type(evlist); enum perf_call_graph_mode mode = CALLCHAIN_NONE; if ((sample_type & PERF_SAMPLE_REGS_USER) && diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index ca2fb44874e4..8d23b8d6ee8e 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -65,6 +65,7 @@ static int cmd_data_convert(int argc, const char **argv) OPT_STRING('i', "input", &input_name, "file", "input file name"), #ifdef HAVE_LIBBABELTRACE_SUPPORT OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), + OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"), #endif OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"), OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"), diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 2bfc1b0db536..1d44bc2f63d8 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -3,6 +3,7 @@ * builtin-ftrace.c * * Copyright (c) 2013 LG Electronics, Namhyung Kim <namhyung@kernel.org> + * Copyright (c) 2020 Changbin Du <changbin.du@gmail.com>, significant enhancement. */ #include "builtin.h" @@ -26,6 +27,8 @@ #include "thread_map.h" #include "util/cap.h" #include "util/config.h" +#include "util/units.h" +#include "util/parse-sublevel-options.h" #define DEFAULT_TRACER "function_graph" @@ -33,11 +36,21 @@ struct perf_ftrace { struct evlist *evlist; struct target target; const char *tracer; + bool list_avail_functions; struct list_head filters; struct list_head notrace; struct list_head graph_funcs; struct list_head nograph_funcs; int graph_depth; + unsigned long percpu_buffer_size; + bool inherit; + int func_stack_trace; + int func_irq_info; + int graph_nosleep_time; + int graph_noirqs; + int graph_verbose; + int graph_thresh; + unsigned int initial_delay; }; struct filter_entry { @@ -128,9 +141,85 @@ static int append_tracing_file(const char *name, const char *val) return __write_tracing_file(name, val, true); } +static int read_tracing_file_to_stdout(const char *name) +{ + char buf[4096]; + char *file; + int fd; + int ret = -1; + + file = get_tracing_file(name); + if (!file) { + pr_debug("cannot get tracing file: %s\n", name); + return -1; + } + + fd = open(file, O_RDONLY); + if (fd < 0) { + pr_debug("cannot open tracing file: %s: %s\n", + name, str_error_r(errno, buf, sizeof(buf))); + goto out; + } + + /* read contents to stdout */ + while (true) { + int n = read(fd, buf, sizeof(buf)); + if (n == 0) + break; + else if (n < 0) + goto out_close; + + if (fwrite(buf, n, 1, stdout) != 1) + goto out_close; + } + ret = 0; + +out_close: + close(fd); +out: + put_tracing_file(file); + return ret; +} + +static int write_tracing_file_int(const char *name, int value) +{ + char buf[16]; + + snprintf(buf, sizeof(buf), "%d", value); + if (write_tracing_file(name, buf) < 0) + return -1; + + return 0; +} + +static int write_tracing_option_file(const char *name, const char *val) +{ + char *file; + int ret; + + if (asprintf(&file, "options/%s", name) < 0) + return -1; + + ret = __write_tracing_file(file, val, false); + free(file); + return ret; +} + static int reset_tracing_cpu(void); static void reset_tracing_filters(void); +static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) +{ + write_tracing_option_file("function-fork", "0"); + write_tracing_option_file("func_stack_trace", "0"); + write_tracing_option_file("sleep-time", "1"); + write_tracing_option_file("funcgraph-irqs", "1"); + write_tracing_option_file("funcgraph-proc", "0"); + write_tracing_option_file("funcgraph-abstime", "0"); + write_tracing_option_file("latency-format", "0"); + write_tracing_option_file("irq-info", "0"); +} + static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { if (write_tracing_file("tracing_on", "0") < 0) @@ -148,7 +237,11 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (write_tracing_file("max_graph_depth", "0") < 0) return -1; + if (write_tracing_file("tracing_thresh", "0") < 0) + return -1; + reset_tracing_filters(); + reset_tracing_options(ftrace); return 0; } @@ -204,6 +297,28 @@ static int set_tracing_cpu(struct perf_ftrace *ftrace) return set_tracing_cpumask(cpumap); } +static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace) +{ + if (!ftrace->func_stack_trace) + return 0; + + if (write_tracing_option_file("func_stack_trace", "1") < 0) + return -1; + + return 0; +} + +static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace) +{ + if (!ftrace->func_irq_info) + return 0; + + if (write_tracing_option_file("irq-info", "1") < 0) + return -1; + + return 0; +} + static int reset_tracing_cpu(void) { struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL); @@ -258,8 +373,6 @@ static void reset_tracing_filters(void) static int set_tracing_depth(struct perf_ftrace *ftrace) { - char buf[16]; - if (ftrace->graph_depth == 0) return 0; @@ -268,10 +381,152 @@ static int set_tracing_depth(struct perf_ftrace *ftrace) return -1; } - snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth); + if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0) + return -1; + + return 0; +} + +static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace) +{ + int ret; + + if (ftrace->percpu_buffer_size == 0) + return 0; + + ret = write_tracing_file_int("buffer_size_kb", + ftrace->percpu_buffer_size / 1024); + if (ret < 0) + return ret; + + return 0; +} + +static int set_tracing_trace_inherit(struct perf_ftrace *ftrace) +{ + if (!ftrace->inherit) + return 0; + + if (write_tracing_option_file("function-fork", "1") < 0) + return -1; + + return 0; +} + +static int set_tracing_sleep_time(struct perf_ftrace *ftrace) +{ + if (!ftrace->graph_nosleep_time) + return 0; + + if (write_tracing_option_file("sleep-time", "0") < 0) + return -1; + + return 0; +} + +static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace) +{ + if (!ftrace->graph_noirqs) + return 0; + + if (write_tracing_option_file("funcgraph-irqs", "0") < 0) + return -1; + + return 0; +} + +static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace) +{ + if (!ftrace->graph_verbose) + return 0; + + if (write_tracing_option_file("funcgraph-proc", "1") < 0) + return -1; + + if (write_tracing_option_file("funcgraph-abstime", "1") < 0) + return -1; + + if (write_tracing_option_file("latency-format", "1") < 0) + return -1; + + return 0; +} + +static int set_tracing_thresh(struct perf_ftrace *ftrace) +{ + int ret; + + if (ftrace->graph_thresh == 0) + return 0; + + ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh); + if (ret < 0) + return ret; + + return 0; +} + +static int set_tracing_options(struct perf_ftrace *ftrace) +{ + if (set_tracing_pid(ftrace) < 0) { + pr_err("failed to set ftrace pid\n"); + return -1; + } + + if (set_tracing_cpu(ftrace) < 0) { + pr_err("failed to set tracing cpumask\n"); + return -1; + } + + if (set_tracing_func_stack_trace(ftrace) < 0) { + pr_err("failed to set tracing option func_stack_trace\n"); + return -1; + } + + if (set_tracing_func_irqinfo(ftrace) < 0) { + pr_err("failed to set tracing option irq-info\n"); + return -1; + } + + if (set_tracing_filters(ftrace) < 0) { + pr_err("failed to set tracing filters\n"); + return -1; + } + + if (set_tracing_depth(ftrace) < 0) { + pr_err("failed to set graph depth\n"); + return -1; + } + + if (set_tracing_percpu_buffer_size(ftrace) < 0) { + pr_err("failed to set tracing per-cpu buffer size\n"); + return -1; + } + + if (set_tracing_trace_inherit(ftrace) < 0) { + pr_err("failed to set tracing option function-fork\n"); + return -1; + } + + if (set_tracing_sleep_time(ftrace) < 0) { + pr_err("failed to set tracing option sleep-time\n"); + return -1; + } + + if (set_tracing_funcgraph_irqs(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-irqs\n"); + return -1; + } - if (write_tracing_file("max_graph_depth", buf) < 0) + if (set_tracing_funcgraph_verbose(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n"); return -1; + } + + if (set_tracing_thresh(ftrace) < 0) { + pr_err("failed to set tracing thresh\n"); + return -1; + } return 0; } @@ -302,6 +557,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGPIPE, sig_handler); + if (ftrace->list_avail_functions) + return read_tracing_file_to_stdout("available_filter_functions"); + if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); goto out; @@ -317,25 +575,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out; } - if (set_tracing_pid(ftrace) < 0) { - pr_err("failed to set ftrace pid\n"); + if (set_tracing_options(ftrace) < 0) goto out_reset; - } - - if (set_tracing_cpu(ftrace) < 0) { - pr_err("failed to set tracing cpumask\n"); - goto out_reset; - } - - if (set_tracing_filters(ftrace) < 0) { - pr_err("failed to set tracing filters\n"); - goto out_reset; - } - - if (set_tracing_depth(ftrace) < 0) { - pr_err("failed to set graph depth\n"); - goto out_reset; - } if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); @@ -362,13 +603,26 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) fcntl(trace_fd, F_SETFL, O_NONBLOCK); pollfd.fd = trace_fd; - if (write_tracing_file("tracing_on", "1") < 0) { - pr_err("can't enable tracing\n"); - goto out_close_fd; + /* display column headers */ + read_tracing_file_to_stdout("trace"); + + if (!ftrace->initial_delay) { + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_close_fd; + } } perf_evlist__start_workload(ftrace->evlist); + if (ftrace->initial_delay) { + usleep(ftrace->initial_delay * 1000); + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_close_fd; + } + } + while (!done) { if (poll(&pollfd, 1, -1) < 0) break; @@ -455,6 +709,99 @@ static void delete_filter_func(struct list_head *head) } } +static int parse_buffer_size(const struct option *opt, + const char *str, int unset) +{ + unsigned long *s = (unsigned long *)opt->value; + static struct parse_tag tags_size[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; + unsigned long val; + + if (unset) { + *s = 0; + return 0; + } + + val = parse_tag_value(str, tags_size); + if (val != (unsigned long) -1) { + if (val < 1024) { + pr_err("buffer size too small, must larger than 1KB."); + return -1; + } + *s = val; + return 0; + } + + return -1; +} + +static int parse_func_tracer_opts(const struct option *opt, + const char *str, int unset) +{ + int ret; + struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value; + struct sublevel_option func_tracer_opts[] = { + { .name = "call-graph", .value_ptr = &ftrace->func_stack_trace }, + { .name = "irq-info", .value_ptr = &ftrace->func_irq_info }, + { .name = NULL, } + }; + + if (unset) + return 0; + + ret = perf_parse_sublevel_options(str, func_tracer_opts); + if (ret) + return ret; + + return 0; +} + +static int parse_graph_tracer_opts(const struct option *opt, + const char *str, int unset) +{ + int ret; + struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value; + struct sublevel_option graph_tracer_opts[] = { + { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time }, + { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs }, + { .name = "verbose", .value_ptr = &ftrace->graph_verbose }, + { .name = "thresh", .value_ptr = &ftrace->graph_thresh }, + { .name = "depth", .value_ptr = &ftrace->graph_depth }, + { .name = NULL, } + }; + + if (unset) + return 0; + + ret = perf_parse_sublevel_options(str, graph_tracer_opts); + if (ret) + return ret; + + return 0; +} + +static void select_tracer(struct perf_ftrace *ftrace) +{ + bool graph = !list_empty(&ftrace->graph_funcs) || + !list_empty(&ftrace->nograph_funcs); + bool func = !list_empty(&ftrace->filters) || + !list_empty(&ftrace->notrace); + + /* The function_graph has priority over function tracer. */ + if (graph) + ftrace->tracer = "function_graph"; + else if (func) + ftrace->tracer = "function"; + /* Otherwise, the default tracer is used. */ + + pr_debug("%s tracer is used\n", ftrace->tracer); +} + int cmd_ftrace(int argc, const char **argv) { int ret; @@ -469,25 +816,42 @@ int cmd_ftrace(int argc, const char **argv) }; const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", - "tracer to use: function_graph(default) or function"), + "Tracer to use: function_graph(default) or function"), + OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions, + "Show available functions to filter"), OPT_STRING('p', "pid", &ftrace.target.pid, "pid", - "trace on existing process id"), + "Trace on existing process id"), + /* TODO: Add short option -t after -t/--tracer can be removed. */ + OPT_STRING(0, "tid", &ftrace.target.tid, "tid", + "Trace on existing thread id (exclusive to --pid)"), OPT_INCR('v', "verbose", &verbose, - "be more verbose"), + "Be more verbose"), OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide, - "system-wide collection from all CPUs"), + "System-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", - "list of cpus to monitor"), + "List of cpus to monitor"), OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", - "trace given functions only", parse_filter_func), + "Trace given functions using function tracer", + parse_filter_func), OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", - "do not trace given functions", parse_filter_func), + "Do not trace given functions", parse_filter_func), + OPT_CALLBACK(0, "func-opts", &ftrace, "options", + "Function tracer options, available options: call-graph,irq-info", + parse_func_tracer_opts), OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", - "Set graph filter on given functions", parse_filter_func), + "Trace given functions using function_graph tracer", + parse_filter_func), OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", "Set nograph filter on given functions", parse_filter_func), - OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, - "Max depth for function graph tracer"), + OPT_CALLBACK(0, "graph-opts", &ftrace, "options", + "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=<n>,depth=<n>", + parse_graph_tracer_opts), + OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", + "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size), + OPT_BOOLEAN(0, "inherit", &ftrace.inherit, + "Trace children processes"), + OPT_UINTEGER('D', "delay", &ftrace.initial_delay, + "Number of milliseconds to wait before starting tracing after program start"), OPT_END() }; @@ -505,6 +869,8 @@ int cmd_ftrace(int argc, const char **argv) if (!argc && target__none(&ftrace.target)) ftrace.target.system_wide = true; + select_tracer(&ftrace); + ret = target__validate(&ftrace.target); if (ret) { char errbuf[512]; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 4a6de4b03ac0..6d2f410d773a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -292,7 +292,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool, * if jit marker, then inject jit mmaps and generate ELF images */ ret = jit_process(inject->session, &inject->output, machine, - event->mmap.filename, sample->pid, &n); + event->mmap.filename, event->mmap.pid, &n); if (ret < 0) return ret; if (ret) { @@ -330,7 +330,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, * if jit marker, then inject jit mmaps and generate ELF images */ ret = jit_process(inject->session, &inject->output, machine, - event->mmap2.filename, sample->pid, &n); + event->mmap2.filename, event->mmap2.pid, &n); if (ret < 0) return ret; if (ret) { diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 38a5ab683ebc..a50dae2c4ae9 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1933,7 +1933,8 @@ int cmd_kmem(int argc, const char **argv) return ret; argc = parse_options_subcommand(argc, argv, kmem_options, - kmem_subcommands, kmem_usage, 0); + kmem_subcommands, kmem_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(kmem_usage, kmem_options); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 95a77058023e..460945ded6dd 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1319,7 +1319,7 @@ static struct evlist *kvm_live_event_list(void) *name = '\0'; name++; - if (perf_evlist__add_newtp(evlist, sys, name, NULL)) { + if (evlist__add_newtp(evlist, sys, name, NULL)) { pr_err("Failed to add %s tracepoint to the list\n", *events_tp); free(tp); goto out; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a37e7910e9e9..772f1057647f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -46,6 +46,7 @@ #include "util/bpf-event.h" #include "util/util.h" #include "util/pfm.h" +#include "util/clockid.h" #include "asm/bug.h" #include "perf.h" @@ -70,6 +71,7 @@ #include <linux/time64.h> #include <linux/zalloc.h> #include <linux/bitmap.h> +#include <sys/time.h> struct switch_output { bool enabled; @@ -765,6 +767,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused) #endif +static int record__config_text_poke(struct evlist *evlist) +{ + struct evsel *evsel; + int err; + + /* Nothing to do if text poke is already configured */ + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.text_poke) + return 0; + } + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + evsel = evlist__last(evlist); + + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->core.attr.text_poke = 1; + evsel->core.attr.ksymbol = 1; + + evsel->core.system_wide = true; + evsel->no_aux_samples = true; + evsel->immediate = true; + + /* Text poke must be collected on all CPUs */ + perf_cpu_map__put(evsel->core.own_cpus); + evsel->core.own_cpus = perf_cpu_map__new(NULL); + perf_cpu_map__put(evsel->core.cpus); + evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); + + evsel__set_sample_bit(evsel, TIME); + + return 0; +} + static bool record__kcore_readable(struct machine *machine) { char kcore[PATH_MAX]; @@ -855,7 +894,7 @@ static int record__open(struct record *rec) pos = perf_evlist__get_tracking_event(evlist); if (!evsel__is_dummy_event(pos)) { /* Set up dummy event. */ - if (perf_evlist__add_dummy(evlist)) + if (evlist__add_dummy(evlist)) return -ENOMEM; pos = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, pos); @@ -1166,6 +1205,9 @@ static void record__init_features(struct record *rec) if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) perf_header__clear_feat(&session->header, HEADER_CLOCKID); + if (!rec->opts.use_clockid) + perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); if (!record__comp_enabled(rec)) perf_header__clear_feat(&session->header, HEADER_COMPRESSED); @@ -1489,7 +1531,7 @@ static int record__setup_sb_evlist(struct record *rec) evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); rec->thread_id = pthread_self(); } - +#ifdef HAVE_LIBBPF_SUPPORT if (!opts->no_bpf_event) { if (rec->sb_evlist == NULL) { rec->sb_evlist = evlist__new(); @@ -1505,7 +1547,7 @@ static int record__setup_sb_evlist(struct record *rec) return -1; } } - +#endif if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; @@ -1514,6 +1556,43 @@ static int record__setup_sb_evlist(struct record *rec) return 0; } +static int record__init_clock(struct record *rec) +{ + struct perf_session *session = rec->session; + struct timespec ref_clockid; + struct timeval ref_tod; + u64 ref; + + if (!rec->opts.use_clockid) + return 0; + + if (rec->opts.use_clockid && rec->opts.clockid_res_ns) + session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; + + session->header.env.clock.clockid = rec->opts.clockid; + + if (gettimeofday(&ref_tod, NULL) != 0) { + pr_err("gettimeofday failed, cannot set reference time.\n"); + return -1; + } + + if (clock_gettime(rec->opts.clockid, &ref_clockid)) { + pr_err("clock_gettime failed, cannot set reference time.\n"); + return -1; + } + + ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + + (u64) ref_tod.tv_usec * NSEC_PER_USEC; + + session->header.env.clock.tod_ns = ref; + + ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + + (u64) ref_clockid.tv_nsec; + + session->header.env.clock.clockid_ns = ref; + return 0; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1527,6 +1606,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) bool disabled = false, draining = false; int fd; float ratio = 0; + enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; atexit(record__sig_exit); signal(SIGCHLD, sig_handler); @@ -1593,10 +1673,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) return -1; } - record__init_features(rec); + if (record__init_clock(rec)) + return -1; - if (rec->opts.use_clockid && rec->opts.clockid_res_ns) - session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; + record__init_features(rec); if (forks) { err = perf_evlist__prepare_workload(rec->evlist, &opts->target, @@ -1646,7 +1726,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * Normally perf_session__new would do this, but it doesn't have the * evlist. */ - if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { + if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); rec->tool.ordered_events = false; } @@ -1748,9 +1828,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_evlist__start_workload(rec->evlist); } + if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack)) + goto out_child; + if (opts->initial_delay) { - usleep(opts->initial_delay * USEC_PER_MSEC); - evlist__enable(rec->evlist); + pr_info(EVLIST_DISABLED_MSG); + if (opts->initial_delay > 0) { + usleep(opts->initial_delay * USEC_PER_MSEC); + evlist__enable(rec->evlist); + pr_info(EVLIST_ENABLED_MSG); + } } trigger_ready(&auxtrace_snapshot_trigger); @@ -1842,6 +1929,21 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) draining = true; } + if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { + switch (cmd) { + case EVLIST_CTL_CMD_ENABLE: + pr_info(EVLIST_ENABLED_MSG); + break; + case EVLIST_CTL_CMD_DISABLE: + pr_info(EVLIST_DISABLED_MSG); + break; + case EVLIST_CTL_CMD_ACK: + case EVLIST_CTL_CMD_UNSUPPORTED: + default: + break; + } + } + /* * When perf is starting the traced process, at the end events * die with the process and we wait for that. Thus no need to @@ -1875,6 +1977,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__synthesize_workload(rec, true); out_child: + evlist__finalize_ctlfd(rec->evlist); record__mmap_read_all(rec, true); record__aio_mmap_read_sync(rec); @@ -2041,103 +2144,6 @@ static int perf_record_config(const char *var, const char *value, void *cb) return 0; } -struct clockid_map { - const char *name; - int clockid; -}; - -#define CLOCKID_MAP(n, c) \ - { .name = n, .clockid = (c), } - -#define CLOCKID_END { .name = NULL, } - - -/* - * Add the missing ones, we need to build on many distros... - */ -#ifndef CLOCK_MONOTONIC_RAW -#define CLOCK_MONOTONIC_RAW 4 -#endif -#ifndef CLOCK_BOOTTIME -#define CLOCK_BOOTTIME 7 -#endif -#ifndef CLOCK_TAI -#define CLOCK_TAI 11 -#endif - -static const struct clockid_map clockids[] = { - /* available for all events, NMI safe */ - CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), - CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), - - /* available for some events */ - CLOCKID_MAP("realtime", CLOCK_REALTIME), - CLOCKID_MAP("boottime", CLOCK_BOOTTIME), - CLOCKID_MAP("tai", CLOCK_TAI), - - /* available for the lazy */ - CLOCKID_MAP("mono", CLOCK_MONOTONIC), - CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), - CLOCKID_MAP("real", CLOCK_REALTIME), - CLOCKID_MAP("boot", CLOCK_BOOTTIME), - - CLOCKID_END, -}; - -static int get_clockid_res(clockid_t clk_id, u64 *res_ns) -{ - struct timespec res; - - *res_ns = 0; - if (!clock_getres(clk_id, &res)) - *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; - else - pr_warning("WARNING: Failed to determine specified clock resolution.\n"); - - return 0; -} - -static int parse_clockid(const struct option *opt, const char *str, int unset) -{ - struct record_opts *opts = (struct record_opts *)opt->value; - const struct clockid_map *cm; - const char *ostr = str; - - if (unset) { - opts->use_clockid = 0; - return 0; - } - - /* no arg passed */ - if (!str) - return 0; - - /* no setting it twice */ - if (opts->use_clockid) - return -1; - - opts->use_clockid = true; - - /* if its a number, we're done */ - if (sscanf(str, "%d", &opts->clockid) == 1) - return get_clockid_res(opts->clockid, &opts->clockid_res_ns); - - /* allow a "CLOCK_" prefix to the name */ - if (!strncasecmp(str, "CLOCK_", 6)) - str += 6; - - for (cm = clockids; cm->name; cm++) { - if (!strcasecmp(str, cm->name)) { - opts->clockid = cm->clockid; - return get_clockid_res(opts->clockid, - &opts->clockid_res_ns); - } - } - - opts->use_clockid = false; - ui__warning("unknown clockid %s, check man page\n", ostr); - return -1; -} static int record__parse_affinity(const struct option *opt, const char *str, int unset) { @@ -2224,6 +2230,33 @@ out_free: return ret; } +static int parse_control_option(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + char *comma = NULL, *endptr = NULL; + struct record_opts *config = (struct record_opts *)opt->value; + + if (strncmp(str, "fd:", 3)) + return -EINVAL; + + config->ctl_fd = strtoul(&str[3], &endptr, 0); + if (endptr == &str[3]) + return -EINVAL; + + comma = strchr(str, ','); + if (comma) { + if (endptr != comma) + return -EINVAL; + + config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); + if (endptr == comma + 1 || *endptr != '\0') + return -EINVAL; + } + + return 0; +} + static void switch_output_size_warn(struct record *rec) { u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); @@ -2360,6 +2393,8 @@ static struct record record = { }, .mmap_flush = MMAP_FLUSH_DEFAULT, .nr_threads_synthesize = 1, + .ctl_fd = -1, + .ctl_fd_ack = -1, }, .tool = { .sample = process_sample_event, @@ -2417,7 +2452,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), - OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), + OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", @@ -2462,8 +2497,8 @@ static struct option __record_options[] = { OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), - OPT_UINTEGER('D', "delay", &record.opts.initial_delay, - "ms to wait before starting measurement after program start"), + OPT_INTEGER('D', "delay", &record.opts.initial_delay, + "ms to wait before starting measurement after program start (-1: start with events disabled)"), OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", "user to profile"), @@ -2561,6 +2596,10 @@ static struct option __record_options[] = { "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif + OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]", + "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + parse_control_option), OPT_END() }; @@ -2722,7 +2761,7 @@ int cmd_record(int argc, const char **argv) record.opts.tail_synthesize = true; if (rec->evlist->core.nr_entries == 0 && - __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { + __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { pr_err("Not enough memory for event selector list\n"); goto out; } @@ -2766,6 +2805,14 @@ int cmd_record(int argc, const char **argv) if (rec->opts.full_auxtrace) rec->buildid_all = true; + if (rec->opts.text_poke) { + err = record__config_text_poke(rec->evlist); + if (err) { + pr_err("record__config_text_poke failed, error %d\n", err); + goto out; + } + } + if (record_opts__config(&rec->opts)) { err = -EINVAL; goto out; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5f1d2a878fad..3c74c9c0f3c3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -338,7 +338,7 @@ static int process_read_event(struct perf_tool *tool, static int report__setup_sample_type(struct report *rep) { struct perf_session *session = rep->session; - u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data__is_pipe(session->data); if (session->itrace_synth_opts->callchain || @@ -410,8 +410,7 @@ static int report__setup_sample_type(struct report *rep) } /* ??? handle more cases than just ANY? */ - if (!(perf_evlist__combined_branch_type(session->evlist) & - PERF_SAMPLE_BRANCH_ANY)) + if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; #if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) @@ -1093,7 +1092,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused, * Check if we need to enable callchains based * on events sample_type. */ - sample_type = perf_evlist__combined_sample_type(*pevlist); + sample_type = evlist__combined_sample_type(*pevlist); callchain_param_setup(sample_type); return 0; } @@ -1333,6 +1332,9 @@ int cmd_report(int argc, const char **argv) if (report.mmaps_mode) report.tasks_mode = true; + if (dump_trace) + report.tool.ordered_events = false; + if (quiet) perf_quiet_option(); @@ -1389,7 +1391,7 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER) + if (evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER) has_br_stack = false; setup_forced_leader(&report, session->evlist); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 459e4229945e..e6fc297cee91 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2398,6 +2398,15 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, printf("\n"); } +static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct evsel *evsel __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + static int timehist_sched_wakeup_event(struct perf_tool *tool, union perf_event *event __maybe_unused, struct evsel *evsel, @@ -2575,7 +2584,8 @@ static int timehist_sched_change_event(struct perf_tool *tool, } if (!sched->idle_hist || thread->tid == 0) { - timehist_update_runtime_stats(tr, t, tprev); + if (!cpu_list || test_bit(sample->cpu, cpu_bitmap)) + timehist_update_runtime_stats(tr, t, tprev); if (sched->idle_hist) { struct idle_thread_runtime *itr = (void *)tr; @@ -2848,6 +2858,9 @@ static void timehist_print_summary(struct perf_sched *sched, printf("\nIdle stats:\n"); for (i = 0; i < idle_max_cpu; ++i) { + if (cpu_list && !test_bit(i, cpu_bitmap)) + continue; + t = idle_threads[i]; if (!t) continue; @@ -2958,9 +2971,10 @@ static int timehist_check_attr(struct perf_sched *sched, static int perf_sched__timehist(struct perf_sched *sched) { - const struct evsel_str_handler handlers[] = { + struct evsel_str_handler handlers[] = { { "sched:sched_switch", timehist_sched_switch_event, }, { "sched:sched_wakeup", timehist_sched_wakeup_event, }, + { "sched:sched_waking", timehist_sched_wakeup_event, }, { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, }; const struct evsel_str_handler migrate_handlers[] = { @@ -3018,6 +3032,11 @@ static int perf_sched__timehist(struct perf_sched *sched) setup_pager(); + /* prefer sched_waking if it is captured */ + if (perf_evlist__find_tracepoint_by_name(session->evlist, + "sched:sched_waking")) + handlers[1].handler = timehist_sched_wakeup_ignore; + /* setup per-evsel handlers */ if (perf_session__set_tracepoints_handlers(session, handlers)) goto out; @@ -3330,12 +3349,16 @@ static int __cmd_record(int argc, const char **argv) "-e", "sched:sched_stat_iowait", "-e", "sched:sched_stat_runtime", "-e", "sched:sched_process_fork", - "-e", "sched:sched_wakeup", "-e", "sched:sched_wakeup_new", "-e", "sched:sched_migrate_task", }; + struct tep_event *waking_event; - rec_argc = ARRAY_SIZE(record_args) + argc - 1; + /* + * +2 for either "-e", "sched:sched_wakeup" or + * "-e", "sched:sched_waking" + */ + rec_argc = ARRAY_SIZE(record_args) + 2 + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -3344,6 +3367,13 @@ static int __cmd_record(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); + rec_argv[i++] = "-e"; + waking_event = trace_event__tp_format("sched", "sched_waking"); + if (!IS_ERR(waking_event)) + rec_argv[i++] = strdup("sched:sched_waking"); + else + rec_argv[i++] = strdup("sched:sched_wakeup"); + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 447457786362..484ce6067d23 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -82,38 +82,64 @@ static bool native_arch; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; enum perf_output_field { - PERF_OUTPUT_COMM = 1U << 0, - PERF_OUTPUT_TID = 1U << 1, - PERF_OUTPUT_PID = 1U << 2, - PERF_OUTPUT_TIME = 1U << 3, - PERF_OUTPUT_CPU = 1U << 4, - PERF_OUTPUT_EVNAME = 1U << 5, - PERF_OUTPUT_TRACE = 1U << 6, - PERF_OUTPUT_IP = 1U << 7, - PERF_OUTPUT_SYM = 1U << 8, - PERF_OUTPUT_DSO = 1U << 9, - PERF_OUTPUT_ADDR = 1U << 10, - PERF_OUTPUT_SYMOFFSET = 1U << 11, - PERF_OUTPUT_SRCLINE = 1U << 12, - PERF_OUTPUT_PERIOD = 1U << 13, - PERF_OUTPUT_IREGS = 1U << 14, - PERF_OUTPUT_BRSTACK = 1U << 15, - PERF_OUTPUT_BRSTACKSYM = 1U << 16, - PERF_OUTPUT_DATA_SRC = 1U << 17, - PERF_OUTPUT_WEIGHT = 1U << 18, - PERF_OUTPUT_BPF_OUTPUT = 1U << 19, - PERF_OUTPUT_CALLINDENT = 1U << 20, - PERF_OUTPUT_INSN = 1U << 21, - PERF_OUTPUT_INSNLEN = 1U << 22, - PERF_OUTPUT_BRSTACKINSN = 1U << 23, - PERF_OUTPUT_BRSTACKOFF = 1U << 24, - PERF_OUTPUT_SYNTH = 1U << 25, - PERF_OUTPUT_PHYS_ADDR = 1U << 26, - PERF_OUTPUT_UREGS = 1U << 27, - PERF_OUTPUT_METRIC = 1U << 28, - PERF_OUTPUT_MISC = 1U << 29, - PERF_OUTPUT_SRCCODE = 1U << 30, - PERF_OUTPUT_IPC = 1U << 31, + PERF_OUTPUT_COMM = 1ULL << 0, + PERF_OUTPUT_TID = 1ULL << 1, + PERF_OUTPUT_PID = 1ULL << 2, + PERF_OUTPUT_TIME = 1ULL << 3, + PERF_OUTPUT_CPU = 1ULL << 4, + PERF_OUTPUT_EVNAME = 1ULL << 5, + PERF_OUTPUT_TRACE = 1ULL << 6, + PERF_OUTPUT_IP = 1ULL << 7, + PERF_OUTPUT_SYM = 1ULL << 8, + PERF_OUTPUT_DSO = 1ULL << 9, + PERF_OUTPUT_ADDR = 1ULL << 10, + PERF_OUTPUT_SYMOFFSET = 1ULL << 11, + PERF_OUTPUT_SRCLINE = 1ULL << 12, + PERF_OUTPUT_PERIOD = 1ULL << 13, + PERF_OUTPUT_IREGS = 1ULL << 14, + PERF_OUTPUT_BRSTACK = 1ULL << 15, + PERF_OUTPUT_BRSTACKSYM = 1ULL << 16, + PERF_OUTPUT_DATA_SRC = 1ULL << 17, + PERF_OUTPUT_WEIGHT = 1ULL << 18, + PERF_OUTPUT_BPF_OUTPUT = 1ULL << 19, + PERF_OUTPUT_CALLINDENT = 1ULL << 20, + PERF_OUTPUT_INSN = 1ULL << 21, + PERF_OUTPUT_INSNLEN = 1ULL << 22, + PERF_OUTPUT_BRSTACKINSN = 1ULL << 23, + PERF_OUTPUT_BRSTACKOFF = 1ULL << 24, + PERF_OUTPUT_SYNTH = 1ULL << 25, + PERF_OUTPUT_PHYS_ADDR = 1ULL << 26, + PERF_OUTPUT_UREGS = 1ULL << 27, + PERF_OUTPUT_METRIC = 1ULL << 28, + PERF_OUTPUT_MISC = 1ULL << 29, + PERF_OUTPUT_SRCCODE = 1ULL << 30, + PERF_OUTPUT_IPC = 1ULL << 31, + PERF_OUTPUT_TOD = 1ULL << 32, +}; + +struct perf_script { + struct perf_tool tool; + struct perf_session *session; + bool show_task_events; + bool show_mmap_events; + bool show_switch_events; + bool show_namespace_events; + bool show_lost_events; + bool show_round_events; + bool show_bpf_events; + bool show_cgroup_events; + bool show_text_poke_events; + bool allocated; + bool per_event_dump; + bool stitch_lbr; + struct evswitch evswitch; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; + int name_width; + const char *time_str; + struct perf_time_interval *ptime_range; + int range_size; + int range_num; }; struct output_option { @@ -152,6 +178,7 @@ struct output_option { {.str = "misc", .field = PERF_OUTPUT_MISC}, {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, {.str = "ipc", .field = PERF_OUTPUT_IPC}, + {.str = "tod", .field = PERF_OUTPUT_TOD}, }; enum { @@ -388,7 +415,7 @@ static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char * return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false); } -static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session) +static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) { struct perf_event_attr *attr = &evsel->core.attr; bool allow_user_set; @@ -443,8 +470,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *sess return -EINVAL; } if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set && - !(perf_evlist__combined_branch_type(session->evlist) & - PERF_SAMPLE_BRANCH_ANY)) { + !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" "Hint: run 'perf record -b ...'\n"); return -EINVAL; @@ -503,6 +529,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr) */ static int perf_session__check_output_opt(struct perf_session *session) { + bool tod = false; unsigned int j; struct evsel *evsel; @@ -522,13 +549,14 @@ static int perf_session__check_output_opt(struct perf_session *session) } if (evsel && output[j].fields && - perf_evsel__check_attr(evsel, session)) + evsel__check_attr(evsel, session)) return -1; if (evsel == NULL) continue; set_print_ip_opts(&evsel->core.attr); + tod |= output[j].fields & PERF_OUTPUT_TOD; } if (!no_callchain) { @@ -569,13 +597,17 @@ static int perf_session__check_output_opt(struct perf_session *session) } } + if (tod && !session->header.env.clock.enabled) { + pr_err("Can't provide 'tod' time, missing clock data. " + "Please record with -k/--clockid option.\n"); + return -1; + } out: return 0; } static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, - FILE *fp -) + FILE *fp) { unsigned i = 0, r; int printed = 0; @@ -593,6 +625,56 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, return printed; } +#define DEFAULT_TOD_FMT "%F %H:%M:%S" + +static char* +tod_scnprintf(struct perf_script *script, char *buf, int buflen, + u64 timestamp) +{ + u64 tod_ns, clockid_ns; + struct perf_env *env; + unsigned long nsec; + struct tm ltime; + char date[64]; + time_t sec; + + buf[0] = '\0'; + if (buflen < 64 || !script) + return buf; + + env = &script->session->header.env; + if (!env->clock.enabled) { + scnprintf(buf, buflen, "disabled"); + return buf; + } + + clockid_ns = env->clock.clockid_ns; + tod_ns = env->clock.tod_ns; + + if (timestamp > clockid_ns) + tod_ns += timestamp - clockid_ns; + else + tod_ns -= clockid_ns - timestamp; + + sec = (time_t) (tod_ns / NSEC_PER_SEC); + nsec = tod_ns - sec * NSEC_PER_SEC; + + if (localtime_r(&sec, <ime) == NULL) { + scnprintf(buf, buflen, "failed"); + } else { + strftime(date, sizeof(date), DEFAULT_TOD_FMT, <ime); + + if (symbol_conf.nanosecs) { + snprintf(buf, buflen, "%s.%09lu", date, nsec); + } else { + snprintf(buf, buflen, "%s.%06lu", + date, nsec / NSEC_PER_USEC); + } + } + + return buf; +} + static int perf_sample__fprintf_iregs(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { @@ -607,7 +689,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample, attr->sample_regs_user, fp); } -static int perf_sample__fprintf_start(struct perf_sample *sample, +static int perf_sample__fprintf_start(struct perf_script *script, + struct perf_sample *sample, struct thread *thread, struct evsel *evsel, u32 type, FILE *fp) @@ -616,6 +699,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, unsigned long secs; unsigned long long nsecs; int printed = 0; + char tstr[128]; if (PRINT_FIELD(COMM)) { if (latency_format) @@ -684,6 +768,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += ret; } + if (PRINT_FIELD(TOD)) { + tod_scnprintf(script, tstr, sizeof(tstr), sample->time); + printed += fprintf(fp, "%s ", tstr); + } + if (PRINT_FIELD(TIME)) { u64 t = sample->time; if (reltime) { @@ -1668,31 +1757,7 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } -struct perf_script { - struct perf_tool tool; - struct perf_session *session; - bool show_task_events; - bool show_mmap_events; - bool show_switch_events; - bool show_namespace_events; - bool show_lost_events; - bool show_round_events; - bool show_bpf_events; - bool show_cgroup_events; - bool allocated; - bool per_event_dump; - bool stitch_lbr; - struct evswitch evswitch; - struct perf_cpu_map *cpus; - struct perf_thread_map *threads; - int name_width; - const char *time_str; - struct perf_time_interval *ptime_range; - int range_size; - int range_num; -}; - -static int perf_evlist__max_name_len(struct evlist *evlist) +static int evlist__max_name_len(struct evlist *evlist) { struct evsel *evsel; int max = 0; @@ -1739,7 +1804,7 @@ static void script_print_metric(struct perf_stat_config *config __maybe_unused, if (!fmt) return; - perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel, PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); if (color) @@ -1754,7 +1819,7 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused, { struct metric_ctx *mctx = ctx; - perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel, PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); } @@ -1865,7 +1930,7 @@ static void process_event(struct perf_script *script, ++es->samples; - perf_sample__fprintf_start(sample, thread, evsel, + perf_sample__fprintf_start(script, sample, thread, evsel, PERF_RECORD_SAMPLE, fp); if (PRINT_FIELD(PERIOD)) @@ -1875,7 +1940,7 @@ static void process_event(struct perf_script *script, const char *evname = evsel__name(evsel); if (!script->name_width) - script->name_width = perf_evlist__max_name_len(script->session->evlist); + script->name_width = evlist__max_name_len(script->session->evlist); fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]"); } @@ -2120,7 +2185,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, } if (evsel->core.attr.sample_type) { - err = perf_evsel__check_attr(evsel, scr->session); + err = evsel__check_attr(evsel, scr->session); if (err) return err; } @@ -2129,7 +2194,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, * Check if we need to enable callchains based * on events sample_type. */ - sample_type = perf_evlist__combined_sample_type(evlist); + sample_type = evlist__combined_sample_type(evlist); callchain_param_setup(sample_type); /* Enable fields for callchain entries */ @@ -2174,11 +2239,11 @@ static int print_event_with_time(struct perf_tool *tool, thread = machine__findnew_thread(machine, pid, tid); if (thread && evsel) { - perf_sample__fprintf_start(sample, thread, evsel, + perf_sample__fprintf_start(script, sample, thread, evsel, event->header.type, stdout); } - perf_event__fprintf(event, stdout); + perf_event__fprintf(event, machine, stdout); thread__put(thread); @@ -2313,7 +2378,7 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused, struct ordered_events *oe __maybe_unused) { - perf_event__fprintf(event, stdout); + perf_event__fprintf(event, NULL, stdout); return 0; } @@ -2330,6 +2395,18 @@ process_bpf_events(struct perf_tool *tool __maybe_unused, sample->tid); } +static int process_text_poke_events(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + if (perf_event__process_text_poke(tool, event, sample, machine) < 0) + return -1; + + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2438,6 +2515,10 @@ static int __cmd_script(struct perf_script *script) script->tool.ksymbol = process_bpf_events; script->tool.bpf = process_bpf_events; } + if (script->show_text_poke_events) { + script->tool.ksymbol = process_bpf_events; + script->tool.text_poke = process_text_poke_events; + } if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -3171,7 +3252,7 @@ static int have_cmd(int argc, const char **argv) static void script__setup_sample_type(struct perf_script *script) { struct perf_session *session = script->session; - u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = evlist__combined_sample_type(session->evlist); if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && @@ -3423,7 +3504,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc", + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), @@ -3474,6 +3555,8 @@ int cmd_script(int argc, const char **argv) "Show round events (if recorded)"), OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events, "Show bpf related events (if recorded)"), + OPT_BOOLEAN('\0', "show-text-poke-events", &script.show_text_poke_events, + "Show text poke related events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9be020e0098a..fddc97cac984 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -188,6 +188,8 @@ static struct perf_stat_config stat_config = { .metric_only_len = METRIC_ONLY_LEN, .walltime_nsecs_stats = &walltime_nsecs_stats, .big_num = true, + .ctl_fd = -1, + .ctl_fd_ack = -1 }; static bool cpus_map_matched(struct evsel *a, struct evsel *b) @@ -402,7 +404,7 @@ static void read_counters(struct timespec *rs) { struct evsel *counter; - if (!stat_config.summary && (read_affinity_counters(rs) < 0)) + if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0)) return; evlist__for_each_entry(evsel_list, counter) { @@ -475,18 +477,38 @@ static void process_interval(void) print_counters(&rs, 0, NULL); } +static bool handle_interval(unsigned int interval, int *times) +{ + if (interval) { + process_interval(); + if (interval_count && !(--(*times))) + return true; + } + return false; +} + static void enable_counters(void) { - if (stat_config.initial_delay) + if (stat_config.initial_delay < 0) { + pr_info(EVLIST_DISABLED_MSG); + return; + } + + if (stat_config.initial_delay > 0) { + pr_info(EVLIST_DISABLED_MSG); usleep(stat_config.initial_delay * USEC_PER_MSEC); + } /* * We need to enable counters only if: * - we don't have tracee (attaching to task or cpu) * - we have initial delay configured */ - if (!target__none(&target) || stat_config.initial_delay) + if (!target__none(&target) || stat_config.initial_delay) { evlist__enable(evsel_list); + if (stat_config.initial_delay > 0) + pr_info(EVLIST_ENABLED_MSG); + } } static void disable_counters(void) @@ -540,6 +562,86 @@ static bool is_target_alive(struct target *_target, return false; } +static void process_evlist(struct evlist *evlist, unsigned int interval) +{ + enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; + + if (evlist__ctlfd_process(evlist, &cmd) > 0) { + switch (cmd) { + case EVLIST_CTL_CMD_ENABLE: + pr_info(EVLIST_ENABLED_MSG); + if (interval) + process_interval(); + break; + case EVLIST_CTL_CMD_DISABLE: + if (interval) + process_interval(); + pr_info(EVLIST_DISABLED_MSG); + break; + case EVLIST_CTL_CMD_ACK: + case EVLIST_CTL_CMD_UNSUPPORTED: + default: + break; + } + } +} + +static void compute_tts(struct timespec *time_start, struct timespec *time_stop, + int *time_to_sleep) +{ + int tts = *time_to_sleep; + struct timespec time_diff; + + diff_timespec(&time_diff, time_stop, time_start); + + tts -= time_diff.tv_sec * MSEC_PER_SEC + + time_diff.tv_nsec / NSEC_PER_MSEC; + + if (tts < 0) + tts = 0; + + *time_to_sleep = tts; +} + +static int dispatch_events(bool forks, int timeout, int interval, int *times) +{ + int child_exited = 0, status = 0; + int time_to_sleep, sleep_time; + struct timespec time_start, time_stop; + + if (interval) + sleep_time = interval; + else if (timeout) + sleep_time = timeout; + else + sleep_time = 1000; + + time_to_sleep = sleep_time; + + while (!done) { + if (forks) + child_exited = waitpid(child_pid, &status, WNOHANG); + else + child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0; + + if (child_exited) + break; + + clock_gettime(CLOCK_MONOTONIC, &time_start); + if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */ + if (timeout || handle_interval(interval, times)) + break; + time_to_sleep = sleep_time; + } else { /* fd revent */ + process_evlist(evsel_list, interval); + clock_gettime(CLOCK_MONOTONIC, &time_stop); + compute_tts(&time_start, &time_stop, &time_to_sleep); + } + } + + return status; +} + enum counter_recovery { COUNTER_SKIP, COUNTER_RETRY, @@ -603,7 +705,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) char msg[BUFSIZ]; unsigned long long t0, t1; struct evsel *counter; - struct timespec ts; size_t l; int status = 0; const bool forks = (argc > 0); @@ -612,17 +713,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int i, cpu; bool second_pass = false; - if (interval) { - ts.tv_sec = interval / USEC_PER_MSEC; - ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; - } else if (timeout) { - ts.tv_sec = timeout / USEC_PER_MSEC; - ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; - } else { - ts.tv_sec = 1; - ts.tv_nsec = 0; - } - if (forks) { if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) { @@ -779,16 +869,8 @@ try_again_reset: perf_evlist__start_workload(evsel_list); enable_counters(); - if (interval || timeout) { - while (!waitpid(child_pid, &status, WNOHANG)) { - nanosleep(&ts, NULL); - if (timeout) - break; - process_interval(); - if (interval_count && !(--times)) - break; - } - } + if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) + status = dispatch_events(forks, timeout, interval, ×); if (child_pid != -1) { if (timeout) kill(child_pid, SIGTERM); @@ -805,18 +887,7 @@ try_again_reset: psignal(WTERMSIG(status), argv[0]); } else { enable_counters(); - while (!done) { - nanosleep(&ts, NULL); - if (!is_target_alive(&target, evsel_list->core.threads)) - break; - if (timeout) - break; - if (interval) { - process_interval(); - if (interval_count && !(--times)) - break; - } - } + status = dispatch_events(forks, timeout, interval, ×); } disable_counters(); @@ -826,9 +897,9 @@ try_again_reset: if (stat_config.walltime_run_table) stat_config.walltime_run[run_idx] = t1 - t0; - if (interval) { + if (interval && stat_config.summary) { stat_config.interval = 0; - stat_config.summary = true; + stat_config.stop_read_counter = true; init_stats(&walltime_nsecs_stats); update_stats(&walltime_nsecs_stats, t1 - t0); @@ -970,6 +1041,33 @@ static int parse_metric_groups(const struct option *opt, &stat_config.metric_events); } +static int parse_control_option(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + char *comma = NULL, *endptr = NULL; + struct perf_stat_config *config = (struct perf_stat_config *)opt->value; + + if (strncmp(str, "fd:", 3)) + return -EINVAL; + + config->ctl_fd = strtoul(&str[3], &endptr, 0); + if (endptr == &str[3]) + return -EINVAL; + + comma = strchr(str, ','); + if (comma) { + if (endptr != comma) + return -EINVAL; + + config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); + if (endptr == comma + 1 || *endptr != '\0') + return -EINVAL; + } + + return 0; +} + static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1041,8 +1139,8 @@ static struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode, "aggregate counts per numa node", AGGR_NODE), - OPT_UINTEGER('D', "delay", &stat_config.initial_delay, - "ms to wait before starting measurement after program start"), + OPT_INTEGER('D', "delay", &stat_config.initial_delay, + "ms to wait before starting measurement after program start (-1: start with events disabled)"), OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only), OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group, @@ -1066,11 +1164,17 @@ static struct option stat_options[] = { "Use with 'percore' event qualifier to show the event " "counts of one hardware thread by sum up total hardware " "threads of same physical core"), + OPT_BOOLEAN(0, "summary", &stat_config.summary, + "print summary for interval mode"), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif + OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]", + "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + parse_control_option), OPT_END() }; @@ -1679,19 +1783,17 @@ static int add_default_attributes(void) if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; - if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0) return -1; if (pmu_have_event("cpu", "stalled-cycles-frontend")) { - if (perf_evlist__add_default_attrs(evsel_list, - frontend_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0) return -1; } if (pmu_have_event("cpu", "stalled-cycles-backend")) { - if (perf_evlist__add_default_attrs(evsel_list, - backend_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0) return -1; } - if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) + if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } @@ -1701,21 +1803,21 @@ static int add_default_attributes(void) return 0; /* Append detailed run extra attributes: */ - if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) return -1; if (detailed_run < 2) return 0; /* Append very detailed run extra attributes: */ - if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) return -1; if (detailed_run < 3) return 0; /* Append very, very detailed run extra attributes: */ - return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); + return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } static const char * const stat_record_usage[] = { @@ -2242,6 +2344,9 @@ int cmd_stat(int argc, const char **argv) signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); + if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack)) + goto out; + status = 0; for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { if (stat_config.run_count != 1 && verbose > 0) @@ -2261,6 +2366,8 @@ int cmd_stat(int argc, const char **argv) if (!forever && status != -1 && (!interval || stat_config.summary)) print_counters(NULL, argc, argv); + evlist__finalize_ctlfd(evsel_list); + if (STAT_RECORD) { /* * We synthesize the kernel mmap record just so that older tools @@ -2307,6 +2414,7 @@ out: evlist__delete(evsel_list); + metricgroup__rblist_exit(&stat_config.metric_events); runtime_stat_delete(&stat_config); return status; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 13889d73f8dd..7c64134472c7 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1627,7 +1627,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; if (!top.evlist->core.nr_entries && - perf_evlist__add_default(top.evlist) < 0) { + evlist__add_default(top.evlist) < 0) { pr_err("Not enough memory for event selector list\n"); goto out_delete_evlist; } @@ -1746,6 +1746,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } +#ifdef HAVE_LIBBPF_SUPPORT if (!top.record_opts.no_bpf_event) { top.sb_evlist = evlist__new(); @@ -1759,6 +1760,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } } +#endif if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4cbb64edc998..bea461b6f937 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3917,8 +3917,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) + evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; /* * If a global cgroup was set, apply it to all the events without an @@ -4150,11 +4149,11 @@ out_error_raw_syscalls: goto out_error; out_error_mmap: - perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); + evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); goto out_error; out_error_open: - perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); out_error: fprintf(trace->output, "%s\n", errbuf); @@ -4813,7 +4812,7 @@ int cmd_trace(int argc, const char **argv) "per thread proc mmap processing timeout in ms"), OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only", trace__parse_cgroups), - OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, + OPT_INTEGER('D', "delay", &trace.opts.initial_delay, "ms to wait before starting measurement after program " "start"), OPTS_EVSWITCH(&trace.evswitch), diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 94c2bc22c2bb..0b4d6431b072 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -128,6 +128,9 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl +# These will require a beauty_check when we get some more like that +check_2 tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h + # check duplicated library files check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index 80816d6402e9..f8784c608479 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -60,7 +60,7 @@ }, { "BriefDescription": "Stalls due to short latency decimal floating ops.", - "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "dfu_stall_cpi - dflong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dfu_other_stall_cpi" }, @@ -72,7 +72,7 @@ }, { "BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache", - "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL", + "MetricExpr": "dmiss_non_local_stall_cpi - dmiss_remote_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dmiss_distant_stall_cpi" }, @@ -90,7 +90,7 @@ }, { "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict", - "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL", + "MetricExpr": "dmiss_l2l3_stall_cpi - dmiss_l2l3_conflict_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dmiss_l2l3_noconflict_stall_cpi" }, @@ -114,7 +114,7 @@ }, { "BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory", - "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL", + "MetricExpr": "dmiss_l3miss_stall_cpi - dmiss_l21_l31_stall_cpi - dmiss_lmem_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dmiss_non_local_stall_cpi" }, @@ -126,7 +126,7 @@ }, { "BriefDescription": "Stalls due to short latency double precision ops.", - "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "dp_stall_cpi - dplong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dp_other_stall_cpi" }, @@ -155,7 +155,7 @@ "MetricName": "emq_full_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL", + "MetricExpr": "erat_miss_stall_cpi + emq_full_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "emq_stall_cpi" }, @@ -173,7 +173,7 @@ }, { "BriefDescription": "Completion stall due to execution units for other reasons.", - "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricExpr": "exec_unit_stall_cpi - scalar_stall_cpi - vector_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "exec_unit_other_stall_cpi" }, @@ -197,7 +197,7 @@ }, { "BriefDescription": "Stalls due to short latency integer ops", - "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "fxu_stall_cpi - fxlong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "fxu_other_stall_cpi" }, @@ -221,7 +221,7 @@ }, { "BriefDescription": "Instruction Completion Table other stalls", - "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", + "MetricExpr": "nothing_dispatched_cpi - ict_noslot_ic_miss_cpi - ict_noslot_br_mpred_icmiss_cpi - ict_noslot_br_mpred_cpi - ict_noslot_disp_held_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "ict_noslot_cyc_other_cpi" }, @@ -245,7 +245,7 @@ }, { "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", - "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", + "MetricExpr": "ict_noslot_disp_held_cpi - ict_noslot_disp_held_hb_full_cpi - ict_noslot_disp_held_sync_cpi - ict_noslot_disp_held_tbegin_cpi - ict_noslot_disp_held_issq_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "ict_noslot_disp_held_other_cpi" }, @@ -263,7 +263,7 @@ }, { "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", - "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", + "MetricExpr": "ict_noslot_ic_miss_cpi - ict_noslot_ic_l3_cpi - ict_noslot_ic_l3miss_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "ict_noslot_ic_l2_cpi" }, @@ -286,7 +286,7 @@ "MetricName": "ict_noslot_ic_miss_cpi" }, { - "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL", + "MetricExpr": "ntc_issue_held_darq_full_cpi + ntc_issue_held_arb_cpi + ntc_issue_held_other_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "issue_hold_cpi" }, @@ -327,7 +327,7 @@ "MetricName": "lrq_other_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL", + "MetricExpr": "lmq_full_stall_cpi + st_fwd_stall_cpi + lhs_stall_cpi + lsu_mfspr_stall_cpi + larx_stall_cpi + lrq_other_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "lrq_stall_cpi" }, @@ -338,7 +338,7 @@ "MetricName": "lsaq_arb_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL", + "MetricExpr": "lrq_full_stall_cpi + srq_full_stall_cpi + lsaq_arb_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "lsaq_stall_cpi" }, @@ -362,7 +362,7 @@ }, { "BriefDescription": "Completion LSU stall for other reasons", - "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL", + "MetricExpr": "lsu_stall_cpi - lsu_fin_stall_cpi - store_finish_stall_cpi - srq_stall_cpi - load_finish_stall_cpi + lsu_stall_dcache_miss_cpi - lrq_stall_cpi + emq_stall_cpi - lsaq_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "lsu_other_stall_cpi" }, @@ -434,13 +434,13 @@ }, { "BriefDescription": "Cycles unaccounted for.", - "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL", + "MetricExpr": "run_cpi - completion_cpi - thread_block_stall_cpi - stall_cpi - nothing_dispatched_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "other_cpi" }, { "BriefDescription": "Completion stall for other reasons", - "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", + "MetricExpr": "stall_cpi - ntc_disp_fin_stall_cpi - ntc_flush_stall_cpi - lsu_stall_cpi - exec_unit_stall_cpi - bru_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "other_stall_cpi" }, @@ -469,7 +469,7 @@ "MetricName": "run_cyc_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL", + "MetricExpr": "fxu_stall_cpi + dp_stall_cpi + dfu_stall_cpi + pm_stall_cpi + crypto_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "scalar_stall_cpi" }, @@ -492,7 +492,7 @@ "MetricName": "srq_full_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL", + "MetricExpr": "store_data_stall_cpi + eieio_stall_cpi + stcx_stall_cpi + slb_stall_cpi + tend_stall_cpi + paste_stall_cpi + tlbie_stall_cpi + store_pipe_arb_stall_cpi + store_fin_arb_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "srq_stall_cpi" }, @@ -558,7 +558,7 @@ }, { "BriefDescription": "Vector stalls due to small latency double precision ops", - "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "vdp_stall_cpi - vdplong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "vdp_other_stall_cpi" }, @@ -575,7 +575,7 @@ "MetricName": "vdplong_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricExpr": "vfxu_stall_cpi + vdp_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "vector_stall_cpi" }, @@ -587,7 +587,7 @@ }, { "BriefDescription": "Vector stalls due to small latency integer ops", - "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "vfxu_stall_cpi - vfxlong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "vfxu_other_stall_cpi" }, @@ -1844,7 +1844,7 @@ }, { "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", - "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", + "MetricExpr": "dl1_reload_from_l31_mod_rate_percent + dl1_reload_from_l31_shr_rate_percent", "MetricName": "dl1_reload_from_l31_rate_percent" }, { @@ -1979,7 +1979,7 @@ }, { "BriefDescription": "Completion stall because a different thread was using the completion pipe", - "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL", + "MetricExpr": "thread_block_stall_cpi - exception_stall_cpi - any_sync_stall_cpi - sync_pmu_int_stall_cpi - spec_finish_stall_cpi - flush_any_thread_stall_cpi - lsu_flush_next_stall_cpi - nested_tbegin_stall_cpi - nested_tend_stall_cpi - mtfpscr_stall_cpi", "MetricName": "other_thread_cmpl_stall" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json index c121e526442a..8383a37647ad 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json @@ -15,5 +15,40 @@ "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )", "MetricName": "PowerBUS_Frequency", "ScaleUnit": "2.5e-7GHz" + }, + { + "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@", + "MetricName" : "mcs01-read", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@", + "MetricName" : "mcs23-read", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@", + "MetricName" : "mcs01-write", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@", + "MetricName" : "mcs23-write", + "MetricGroup" : "memory-bandwidth", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_powerbus0_imc@PM_PB_CYC@", + "MetricName" : "powerbus_freq", + "ScaleUnit": "1e-9GHz" + }, + { + "MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)", + "MetricName" : "Memory-bandwidth-MCS", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json index 7e1aa8273935..653b11b23399 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -61,7 +61,7 @@ { "EventName": "ex_ret_brn_ind_misp", "EventCode": "0xca", - "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted." }, { "EventName": "ex_ret_mmx_fp_instr.sse_instr", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json index de89e5a44ff1..4b75183da94a 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json @@ -125,6 +125,6 @@ { "EventName": "ex_ret_fus_brnch_inst", "EventCode": "0x1d0", - "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.", + "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8." } ] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index fa86c5f997cc..fc9c158bfa13 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -137,7 +137,7 @@ static char *fixregex(char *s) return s; /* allocate space for a new string */ - fixed = (char *) malloc(len + 1); + fixed = (char *) malloc(len + esc_count + 1); if (!fixed) return NULL; diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index cd00498a5dce..84352fc49a20 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -59,6 +59,7 @@ perf-y += genelf.o perf-y += api-io.o perf-y += demangle-java-test.o perf-y += pfm.o +perf-y += parse-metric.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README index 430024f618f1..a36f49fb4dbe 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/attr/README @@ -49,10 +49,12 @@ Following tests are defined (with perf commands): perf record --call-graph fp kill (test-record-graph-fp) perf record --group -e cycles,instructions kill (test-record-group) perf record -e '{cycles,instructions}' kill (test-record-group1) + perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2) perf record -D kill (test-record-no-delay) perf record -i kill (test-record-no-inherit) perf record -n kill (test-record-no-samples) perf record -c 100 -P kill (test-record-period) + perf record -c 1 --pfm-events=cycles:period=2 (test-record-pfm-period) perf record -R kill (test-record-raw) perf stat -e cycles kill (test-stat-basic) perf stat kill (test-stat-default) diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/attr/test-record-group2 new file mode 100644 index 000000000000..6b9f8d182ce1 --- /dev/null +++ b/tools/perf/tests/attr/test-record-group2 @@ -0,0 +1,29 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 +ret = 1 + +[event-1:base-record] +fd=1 +group_fd=-1 +config=0|1 +sample_period=1234000 +sample_type=87 +read_format=12 +inherit=0 +freq=0 + +[event-2:base-record] +fd=2 +group_fd=1 +config=0|1 +sample_period=6789000 +sample_type=87 +read_format=12 +disabled=0 +inherit=0 +mmap=0 +comm=0 +freq=0 +enable_on_exec=0 +task=0 diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/attr/test-record-pfm-period new file mode 100644 index 000000000000..368f5b814094 --- /dev/null +++ b/tools/perf/tests/attr/test-record-pfm-period @@ -0,0 +1,9 @@ +[config] +command = record +args = --no-bpf-event -c 10000 --pfm-events=cycles:period=77777 kill >/dev/null 2>&1 +ret = 1 + +[event:base-record] +sample_period=77777 +sample_type=7 +freq=0 diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index da8ec1e8e064..cc9fbcedb364 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -45,10 +45,13 @@ volatile long the_var; #if defined (__x86_64__) extern void __test_function(volatile long *ptr); asm ( + ".pushsection .text;" ".globl __test_function\n" + ".type __test_function, @function;" "__test_function:\n" "incq (%rdi)\n" - "ret\n"); + "ret\n" + ".popsection\n"); #else static void __test_function(volatile long *ptr) { diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 5d20bf8397f0..cd77e334e577 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -197,7 +197,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), perf_mmap__read_done(&md->core); } - if (count != expect) { + if (count != expect * evlist->core.nr_entries) { pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); goto out_delete_evlist; } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index da5b6cc23f25..d328caaba45d 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -338,6 +338,10 @@ static struct test generic_tests[] = { .func = test__demangle_java, }, { + .desc = "Parse and process metrics", + .func = test__parse_metric, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6fe221d31f07..035c9123549a 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -678,7 +678,7 @@ static int do_test_code_reading(bool try_kcore) if (verbose > 0) { char errbuf[512]; - perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 1cb02ca2b15f..4d01051951cd 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -18,14 +18,15 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2) int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) { + struct expr_id_data *val_ptr; const char *p; - double val, *val_ptr; + double val; int ret; struct expr_parse_ctx ctx; expr__ctx_init(&ctx); - expr__add_id(&ctx, strdup("FOO"), 1); - expr__add_id(&ctx, strdup("BAR"), 2); + expr__add_id_val(&ctx, strdup("FOO"), 1); + expr__add_id_val(&ctx, strdup("BAR"), 2); ret = test(&ctx, "1+1", 2); ret |= test(&ctx, "FOO+BAR", 3); @@ -39,6 +40,14 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) ret |= test(&ctx, "1+1 if 3*4 else 0", 2); ret |= test(&ctx, "1.1 + 2.1", 3.2); ret |= test(&ctx, ".1 + 2.", 2.1); + ret |= test(&ctx, "d_ratio(1, 2)", 0.5); + ret |= test(&ctx, "d_ratio(2.5, 0)", 0); + ret |= test(&ctx, "1.1 < 2.2", 1); + ret |= test(&ctx, "2.2 > 1.1", 1); + ret |= test(&ctx, "1.1 < 1.1", 0); + ret |= test(&ctx, "2.2 > 2.2", 0); + ret |= test(&ctx, "2.2 < 1.1", 0); + ret |= test(&ctx, "1.1 > 2.2", 0); if (ret) return ret; diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index c7c81c4a5b2b..d9eca8e86a6b 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -12,6 +12,7 @@ static void fdarray__init_revents(struct fdarray *fda, short revents) for (fd = 0; fd < fda->nr; ++fd) { fda->entries[fd].fd = fda->nr - fd; + fda->entries[fd].events = revents; fda->entries[fd].revents = revents; } } @@ -29,7 +30,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused) { - int nr_fds, expected_fd[2], fd, err = TEST_FAIL; + int nr_fds, err = TEST_FAIL; struct fdarray *fda = fdarray__new(5, 5); if (fda == NULL) { @@ -55,7 +56,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_ fdarray__init_revents(fda, POLLHUP); fda->entries[2].revents = POLLIN; - expected_fd[0] = fda->entries[2].fd; pr_debug("\nfiltering all but fda->entries[2]:"); fdarray__fprintf_prefix(fda, "before", stderr); @@ -66,17 +66,9 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_ goto out_delete; } - if (fda->entries[0].fd != expected_fd[0]) { - pr_debug("\nfda->entries[0].fd=%d != %d\n", - fda->entries[0].fd, expected_fd[0]); - goto out_delete; - } - fdarray__init_revents(fda, POLLHUP); fda->entries[0].revents = POLLIN; - expected_fd[0] = fda->entries[0].fd; fda->entries[3].revents = POLLIN; - expected_fd[1] = fda->entries[3].fd; pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):"); fdarray__fprintf_prefix(fda, "before", stderr); @@ -88,14 +80,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_ goto out_delete; } - for (fd = 0; fd < 2; ++fd) { - if (fda->entries[fd].fd != expected_fd[fd]) { - pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd, - fda->entries[fd].fd, expected_fd[fd]); - goto out_delete; - } - } - pr_debug("\n"); err = 0; @@ -128,7 +112,7 @@ int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unu } #define FDA_ADD(_idx, _fd, _revents, _nr) \ - if (fdarray__add(fda, _fd, _revents) < 0) { \ + if (fdarray__add(fda, _fd, _revents, fdarray_flag__default) < 0) { \ pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \ __LINE__,_fd, _revents); \ goto out_delete; \ diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 895188b63f96..aae0fd9045c1 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -631,6 +631,34 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong val", term->val.num == 1); TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask")); + /* + * read + * + * The perf_pmu__test_parse_init injects 'read' term into + * perf_pmu_events_list, so 'read' is evaluated as read term + * and not as raw event with 'ead' hex value. + */ + term = list_entry(term->list.next, struct parse_events_term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_USER); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 1); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read")); + + /* + * r0xead + * + * To be still able to pass 'ead' value with 'r' syntax, + * we added support to parse 'r0xHEX' event. + */ + term = list_entry(term->list.next, struct parse_events_term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 0xead); + TEST_ASSERT_VAL("wrong config", !term->config); return 0; } @@ -691,7 +719,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -814,7 +842,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1766,6 +1794,11 @@ static struct evlist_test test__events_pmu[] = { .check = test__checkevent_raw_pmu, .id = 4, }, + { + .name = "software/r0x1a/", + .check = test__checkevent_raw_pmu, + .id = 4, + }, }; struct terms_test { @@ -1776,7 +1809,7 @@ struct terms_test { static struct terms_test test__terms[] = { [0] = { - .str = "config=10,config1,config2=3,umask=1", + .str = "config=10,config1,config2=3,umask=1,read,r0xead", .check = test__checkterms_simple, }, }; @@ -1836,6 +1869,13 @@ static int test_term(struct terms_test *t) INIT_LIST_HEAD(&terms); + /* + * The perf_pmu__test_parse_init prepares perf_pmu_events_list + * which gets freed in parse_events_terms. + */ + if (perf_pmu__test_parse_init()) + return -1; + ret = parse_events_terms(&terms, t->str); if (ret) { pr_debug("failed to parse terms '%s', err %d\n", diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c new file mode 100644 index 000000000000..cd7331aac3bd --- /dev/null +++ b/tools/perf/tests/parse-metric.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <string.h> +#include <perf/cpumap.h> +#include <perf/evlist.h> +#include "metricgroup.h" +#include "tests.h" +#include "pmu-events/pmu-events.h" +#include "evlist.h" +#include "rblist.h" +#include "debug.h" +#include "expr.h" +#include "stat.h" +#include <perf/cpumap.h> +#include <perf/evlist.h> + +static struct pmu_event pme_test[] = { +{ + .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread", + .metric_name = "IPC", + .metric_group = "group1", +}, +{ + .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * " + "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))", + .metric_name = "Frontend_Bound_SMT", +}, +{ + .metric_expr = "l1d\\-loads\\-misses / inst_retired.any", + .metric_name = "dcache_miss_cpi", +}, +{ + .metric_expr = "l1i\\-loads\\-misses / inst_retired.any", + .metric_name = "icache_miss_cycles", +}, +{ + .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)", + .metric_name = "cache_miss_cycles", + .metric_group = "group1", +}, +{ + .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit", + .metric_name = "DCache_L2_All_Hits", +}, +{ + .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + " + "l2_rqsts.pf_miss + l2_rqsts.rfo_miss", + .metric_name = "DCache_L2_All_Miss", +}, +{ + .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss", + .metric_name = "DCache_L2_All", +}, +{ + .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)", + .metric_name = "DCache_L2_Hits", +}, +{ + .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)", + .metric_name = "DCache_L2_Misses", +}, +{ + .metric_expr = "ipc + m2", + .metric_name = "M1", +}, +{ + .metric_expr = "ipc + m1", + .metric_name = "M2", +}, +{ + .metric_expr = "1/m3", + .metric_name = "M3", +}, +{ + .name = NULL, +} +}; + +static struct pmu_events_map map = { + .cpuid = "test", + .version = "1", + .type = "core", + .table = pme_test, +}; + +struct value { + const char *event; + u64 val; +}; + +static u64 find_value(const char *name, struct value *values) +{ + struct value *v = values; + + while (v->event) { + if (!strcmp(name, v->event)) + return v->val; + v++; + }; + return 0; +} + +static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, + struct value *vals) +{ + struct evsel *evsel; + u64 count; + + evlist__for_each_entry(evlist, evsel) { + count = find_value(evsel->name, vals); + perf_stat__update_shadow_stats(evsel, count, 0, st); + } +} + +static double compute_single(struct rblist *metric_events, struct evlist *evlist, + struct runtime_stat *st, const char *name) +{ + struct metric_expr *mexp; + struct metric_event *me; + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + me = metricgroup__lookup(metric_events, evsel, false); + if (me != NULL) { + list_for_each_entry (mexp, &me->head, nd) { + if (strcmp(mexp->metric_name, name)) + continue; + return test_generic_metric(mexp, 0, st); + } + } + } + return 0.; +} + +static int __compute_metric(const char *name, struct value *vals, + const char *name1, double *ratio1, + const char *name2, double *ratio2) +{ + struct rblist metric_events = { + .nr_entries = 0, + }; + struct perf_cpu_map *cpus; + struct runtime_stat st; + struct evlist *evlist; + int err; + + /* + * We need to prepare evlist for stat mode running on CPU 0 + * because that's where all the stats are going to be created. + */ + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + cpus = perf_cpu_map__new("0"); + if (!cpus) { + evlist__delete(evlist); + return -ENOMEM; + } + + perf_evlist__set_maps(&evlist->core, cpus, NULL); + + /* Parse the metric into metric_events list. */ + err = metricgroup__parse_groups_test(evlist, &map, name, + false, false, + &metric_events); + if (err) + goto out; + + err = perf_evlist__alloc_stats(evlist, false); + if (err) + goto out; + + /* Load the runtime stats with given numbers for events. */ + runtime_stat__init(&st); + load_runtime_stat(&st, evlist, vals); + + /* And execute the metric */ + if (name1 && ratio1) + *ratio1 = compute_single(&metric_events, evlist, &st, name1); + if (name2 && ratio2) + *ratio2 = compute_single(&metric_events, evlist, &st, name2); + +out: + /* ... clenup. */ + metricgroup__rblist_exit(&metric_events); + runtime_stat__exit(&st); + perf_evlist__free_stats(evlist); + perf_cpu_map__put(cpus); + evlist__delete(evlist); + return err; +} + +static int compute_metric(const char *name, struct value *vals, double *ratio) +{ + return __compute_metric(name, vals, name, ratio, NULL, NULL); +} + +static int compute_metric_group(const char *name, struct value *vals, + const char *name1, double *ratio1, + const char *name2, double *ratio2) +{ + return __compute_metric(name, vals, name1, ratio1, name2, ratio2); +} + +static int test_ipc(void) +{ + double ratio; + struct value vals[] = { + { .event = "inst_retired.any", .val = 300 }, + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("IPC", vals, &ratio) == 0); + + TEST_ASSERT_VAL("IPC failed, wrong ratio", + ratio == 1.5); + return 0; +} + +static int test_frontend(void) +{ + double ratio; + struct value vals[] = { + { .event = "idq_uops_not_delivered.core", .val = 300 }, + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 }, + { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0); + + TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio", + ratio == 0.45); + return 0; +} + +static int test_cache_miss_cycles(void) +{ + double ratio; + struct value vals[] = { + { .event = "l1d-loads-misses", .val = 300 }, + { .event = "l1i-loads-misses", .val = 200 }, + { .event = "inst_retired.any", .val = 400 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("cache_miss_cycles", vals, &ratio) == 0); + + TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio", + ratio == 1.25); + return 0; +} + + +/* + * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi + * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + + * l2_rqsts.pf_miss + l2_rqsts.rfo_miss + * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss + * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all) + * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all) + * + * l2_rqsts.demand_data_rd_hit = 100 + * l2_rqsts.pf_hit = 200 + * l2_rqsts.rfo_hi = 300 + * l2_rqsts.all_demand_data_rd = 400 + * l2_rqsts.pf_miss = 500 + * l2_rqsts.rfo_miss = 600 + * + * DCache_L2_All_Hits = 600 + * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400 + * DCache_L2_All = 600 + 1400 = 2000 + * DCache_L2_Hits = 600 / 2000 = 0.3 + * DCache_L2_Misses = 1400 / 2000 = 0.7 + */ +static int test_dcache_l2(void) +{ + double ratio; + struct value vals[] = { + { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 }, + { .event = "l2_rqsts.pf_hit", .val = 200 }, + { .event = "l2_rqsts.rfo_hit", .val = 300 }, + { .event = "l2_rqsts.all_demand_data_rd", .val = 400 }, + { .event = "l2_rqsts.pf_miss", .val = 500 }, + { .event = "l2_rqsts.rfo_miss", .val = 600 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("DCache_L2_Hits", vals, &ratio) == 0); + + TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio", + ratio == 0.3); + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("DCache_L2_Misses", vals, &ratio) == 0); + + TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio", + ratio == 0.7); + return 0; +} + +static int test_recursion_fail(void) +{ + double ratio; + struct value vals[] = { + { .event = "inst_retired.any", .val = 300 }, + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to find recursion", + compute_metric("M1", vals, &ratio) == -1); + + TEST_ASSERT_VAL("failed to find recursion", + compute_metric("M3", vals, &ratio) == -1); + return 0; +} + +static int test_metric_group(void) +{ + double ratio1, ratio2; + struct value vals[] = { + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = "l1d-loads-misses", .val = 300 }, + { .event = "l1i-loads-misses", .val = 200 }, + { .event = "inst_retired.any", .val = 400 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to find recursion", + compute_metric_group("group1", vals, + "IPC", &ratio1, + "cache_miss_cycles", &ratio2) == 0); + + TEST_ASSERT_VAL("group IPC failed, wrong ratio", + ratio1 == 2.0); + + TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio", + ratio2 == 1.25); + return 0; +} + +int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); + TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); + TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); + TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); + TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); + TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); + return 0; +} diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 83adfd846ccd..67d3f5aad016 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -185,14 +185,14 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus err = perf_evlist__parse_sample(evlist, event, &sample); if (err < 0) { if (verbose > 0) - perf_event__fprintf(event, stderr); + perf_event__fprintf(event, NULL, stderr); pr_debug("Couldn't parse sample\n"); goto out_delete_evlist; } if (verbose > 0) { pr_info("%" PRIu64" %d ", sample.time, sample.cpu); - perf_event__fprintf(event, stderr); + perf_event__fprintf(event, NULL, stderr); } if (prev_time > sample.time) { diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index ab64b4a4e284..d3517a74d95e 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -274,6 +274,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count) int res = 0; bool use_uncore_table; struct pmu_events_map *map = __test_pmu_get_events_map(); + struct perf_pmu_alias *a, *tmp; if (!map) return -1; @@ -347,6 +348,10 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count) pmu_name, alias->name); } + list_for_each_entry_safe(a, tmp, &aliases, list) { + list_del(&a->list); + perf_pmu_free_alias(a); + } free(pmu); return res; } @@ -390,9 +395,9 @@ static bool is_number(const char *str) return errno == 0 && end_ptr != str; } -static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) +static int check_parse_id(const char *id, struct parse_events_error *error, + struct perf_pmu *fake_pmu) { - struct parse_events_error error; struct evlist *evlist; int ret; @@ -401,8 +406,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) return 0; evlist = evlist__new(); - memset(&error, 0, sizeof(error)); - ret = parse_events(evlist, id, &error); + if (!evlist) + return -ENOMEM; + ret = __parse_events(evlist, id, error, fake_pmu); + evlist__delete(evlist); + return ret; +} + +static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe) +{ + struct parse_events_error error = { .idx = 0, }; + + int ret = check_parse_id(id, &error, NULL); if (ret && same_cpu) { pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n", pe->metric_name, id, pe->metric_expr); @@ -413,7 +428,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) id, pe->metric_name, pe->metric_expr); ret = 0; } - evlist__delete(evlist); + free(error.str); + free(error.help); + free(error.first_str); + free(error.first_help); + return ret; +} + +static int check_parse_fake(const char *id) +{ + struct parse_events_error error = { .idx = 0, }; + int ret = check_parse_id(id, &error, &perf_pmu__fake); + free(error.str); free(error.help); free(error.first_str); @@ -471,10 +497,10 @@ static int test_parsing(void) */ k = 1; hashmap__for_each_entry((&ctx.ids), cur, bkt) - expr__add_id(&ctx, strdup(cur->key), k++); + expr__add_id_val(&ctx, strdup(cur->key), k++); hashmap__for_each_entry((&ctx.ids), cur, bkt) { - if (check_parse_id(cur->key, map == cpus_map, + if (check_parse_cpu(cur->key, map == cpus_map, pe)) ret++; } @@ -490,6 +516,100 @@ static int test_parsing(void) return ret == 0 ? TEST_OK : TEST_SKIP; } +struct test_metric { + const char *str; +}; + +static struct test_metric metrics[] = { + { "(unc_p_power_state_occupancy.cores_c0 / unc_p_clockticks) * 100." }, + { "imx8_ddr0@read\\-cycles@ * 4 * 4", }, + { "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@ * 4", }, + { "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", }, + { "(imx8_ddr0@read\\-cycles@ + imx8_ddr0@write\\-cycles@)", }, +}; + +static int metric_parse_fake(const char *str) +{ + struct expr_parse_ctx ctx; + struct hashmap_entry *cur; + double result; + int ret = -1; + size_t bkt; + int i; + + pr_debug("parsing '%s'\n", str); + + expr__ctx_init(&ctx); + if (expr__find_other(str, NULL, &ctx, 0) < 0) { + pr_err("expr__find_other failed\n"); + return -1; + } + + /* + * Add all ids with a made up value. The value may + * trigger divide by zero when subtracted and so try to + * make them unique. + */ + i = 1; + hashmap__for_each_entry((&ctx.ids), cur, bkt) + expr__add_id_val(&ctx, strdup(cur->key), i++); + + hashmap__for_each_entry((&ctx.ids), cur, bkt) { + if (check_parse_fake(cur->key)) { + pr_err("check_parse_fake failed\n"); + goto out; + } + } + + if (expr__parse(&result, &ctx, str, 1)) + pr_err("expr__parse failed\n"); + else + ret = 0; + +out: + expr__ctx_clear(&ctx); + return ret; +} + +/* + * Parse all the metrics for current architecture, + * or all defined cpus via the 'fake_pmu' + * in parse_events. + */ +static int test_parsing_fake(void) +{ + struct pmu_events_map *map; + struct pmu_event *pe; + unsigned int i, j; + int err = 0; + + for (i = 0; i < ARRAY_SIZE(metrics); i++) { + err = metric_parse_fake(metrics[i].str); + if (err) + return err; + } + + i = 0; + for (;;) { + map = &pmu_events_map[i++]; + if (!map->table) + break; + j = 0; + for (;;) { + pe = &map->table[j++]; + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + err = metric_parse_fake(pe->metric_expr); + if (err) + return err; + } + } + + return 0; +} + static const struct { int (*func)(void); const char *desc; @@ -506,6 +626,10 @@ static const struct { .func = test_parsing, .desc = "Parsing of PMU event table metrics", }, + { + .func = test_parsing_fake, + .desc = "Parsing of PMU event table metrics with fake PMUs", + }, }; const char *test__pmu_events_subtest_get_desc(int subtest) diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 5c11fe2b3040..714e6830a758 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -173,6 +173,7 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused) ret = 0; } while (0); + perf_pmu__del_formats(&formats); test_format_dir_put(format); return ret; } diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 54030c18bfc2..bf9e729b3ecf 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -20,13 +20,13 @@ file=$(mktemp /tmp/temporary_file.XXXXX) record_open_file() { echo "Recording open file:" - perf record -o ${perfdata} -e probe:vfs_getname touch $file + perf record -o ${perfdata} -e probe:vfs_getname\* touch $file } perf_script_filenames() { echo "Looking at perf.data file for vfs_getname records for the file we touched:" perf script -i ${perfdata} | \ - egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\"" + egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname[_0-9]*: +\([[:xdigit:]]+\) +pathname=\"${file}\"" } add_probe_vfs_getname || skip_if_no_debuginfo diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 76a4e352eaaf..4447a516c689 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -121,6 +121,7 @@ int test__demangle_java(struct test *test, int subtest); int test__pfm(struct test *test, int subtest); const char *test__pfm_subtest_get_desc(int subtest); int test__pfm_subtest_get_nr(void); +int test__parse_metric(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h new file mode 100644 index 000000000000..e9cb30d8cbfb --- /dev/null +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SOCKET_H +#define _LINUX_SOCKET_H + + +#include <asm/socket.h> /* arch-dependent defines */ +#include <linux/sockios.h> /* the SIOCxxx I/O controls */ +#include <linux/uio.h> /* iovec support */ +#include <linux/types.h> /* pid_t */ +#include <linux/compiler.h> /* __user */ +#include <uapi/linux/socket.h> + +struct file; +struct pid; +struct cred; +struct socket; + +#define __sockaddr_check_size(size) \ + BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) + +#ifdef CONFIG_PROC_FS +struct seq_file; +extern void socket_seq_show(struct seq_file *seq); +#endif + +typedef __kernel_sa_family_t sa_family_t; + +/* + * 1003.1g requires sa_family_t and that sa_data is char. + */ + +struct sockaddr { + sa_family_t sa_family; /* address family, AF_xxx */ + char sa_data[14]; /* 14 bytes of protocol address */ +}; + +struct linger { + int l_onoff; /* Linger active */ + int l_linger; /* How long to linger for */ +}; + +#define sockaddr_storage __kernel_sockaddr_storage + +/* + * As we do 4.4BSD message passing we use a 4.4BSD message passing + * system, not 4.3. Thus msg_accrights(len) are now missing. They + * belong in an obscure libc emulation or the bin. + */ + +struct msghdr { + void *msg_name; /* ptr to socket address structure */ + int msg_namelen; /* size of socket address structure */ + struct iov_iter msg_iter; /* data */ + + /* + * Ancillary data. msg_control_user is the user buffer used for the + * recv* side when msg_control_is_user is set, msg_control is the kernel + * buffer used for all other cases. + */ + union { + void *msg_control; + void __user *msg_control_user; + }; + bool msg_control_is_user : 1; + __kernel_size_t msg_controllen; /* ancillary data buffer length */ + unsigned int msg_flags; /* flags on received message */ + struct kiocb *msg_iocb; /* ptr to iocb for async requests */ +}; + +struct user_msghdr { + void __user *msg_name; /* ptr to socket address structure */ + int msg_namelen; /* size of socket address structure */ + struct iovec __user *msg_iov; /* scatter/gather array */ + __kernel_size_t msg_iovlen; /* # elements in msg_iov */ + void __user *msg_control; /* ancillary data */ + __kernel_size_t msg_controllen; /* ancillary data buffer length */ + unsigned int msg_flags; /* flags on received message */ +}; + +/* For recvmmsg/sendmmsg */ +struct mmsghdr { + struct user_msghdr msg_hdr; + unsigned int msg_len; +}; + +/* + * POSIX 1003.1g - ancillary data object information + * Ancillary data consits of a sequence of pairs of + * (cmsghdr, cmsg_data[]) + */ + +struct cmsghdr { + __kernel_size_t cmsg_len; /* data byte count, including hdr */ + int cmsg_level; /* originating protocol */ + int cmsg_type; /* protocol-specific type */ +}; + +/* + * Ancillary data object information MACROS + * Table 5-14 of POSIX 1003.1g + */ + +#define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg)) +#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg)) + +#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) + +#define CMSG_DATA(cmsg) \ + ((void *)(cmsg) + sizeof(struct cmsghdr)) +#define CMSG_USER_DATA(cmsg) \ + ((void __user *)(cmsg) + sizeof(struct cmsghdr)) +#define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len)) +#define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len)) + +#define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \ + (struct cmsghdr *)(ctl) : \ + (struct cmsghdr *)NULL) +#define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen) +#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \ + (cmsg)->cmsg_len <= (unsigned long) \ + ((mhdr)->msg_controllen - \ + ((char *)(cmsg) - (char *)(mhdr)->msg_control))) +#define for_each_cmsghdr(cmsg, msg) \ + for (cmsg = CMSG_FIRSTHDR(msg); \ + cmsg; \ + cmsg = CMSG_NXTHDR(msg, cmsg)) + +/* + * Get the next cmsg header + * + * PLEASE, do not touch this function. If you think, that it is + * incorrect, grep kernel sources and think about consequences + * before trying to improve it. + * + * Now it always returns valid, not truncated ancillary object + * HEADER. But caller still MUST check, that cmsg->cmsg_len is + * inside range, given by msg->msg_controllen before using + * ancillary object DATA. --ANK (980731) + */ + +static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, + struct cmsghdr *__cmsg) +{ + struct cmsghdr * __ptr; + + __ptr = (struct cmsghdr*)(((unsigned char *) __cmsg) + CMSG_ALIGN(__cmsg->cmsg_len)); + if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size) + return (struct cmsghdr *)0; + + return __ptr; +} + +static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) +{ + return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); +} + +static inline size_t msg_data_left(struct msghdr *msg) +{ + return iov_iter_count(&msg->msg_iter); +} + +/* "Socket"-level control message types: */ + +#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ +#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ +#define SCM_SECURITY 0x03 /* rw: security label */ + +struct ucred { + __u32 pid; + __u32 uid; + __u32 gid; +}; + +/* Supported address families. */ +#define AF_UNSPEC 0 +#define AF_UNIX 1 /* Unix domain sockets */ +#define AF_LOCAL 1 /* POSIX name for AF_UNIX */ +#define AF_INET 2 /* Internet IP Protocol */ +#define AF_AX25 3 /* Amateur Radio AX.25 */ +#define AF_IPX 4 /* Novell IPX */ +#define AF_APPLETALK 5 /* AppleTalk DDP */ +#define AF_NETROM 6 /* Amateur Radio NET/ROM */ +#define AF_BRIDGE 7 /* Multiprotocol bridge */ +#define AF_ATMPVC 8 /* ATM PVCs */ +#define AF_X25 9 /* Reserved for X.25 project */ +#define AF_INET6 10 /* IP version 6 */ +#define AF_ROSE 11 /* Amateur Radio X.25 PLP */ +#define AF_DECnet 12 /* Reserved for DECnet project */ +#define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/ +#define AF_SECURITY 14 /* Security callback pseudo AF */ +#define AF_KEY 15 /* PF_KEY key management API */ +#define AF_NETLINK 16 +#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */ +#define AF_PACKET 17 /* Packet family */ +#define AF_ASH 18 /* Ash */ +#define AF_ECONET 19 /* Acorn Econet */ +#define AF_ATMSVC 20 /* ATM SVCs */ +#define AF_RDS 21 /* RDS sockets */ +#define AF_SNA 22 /* Linux SNA Project (nutters!) */ +#define AF_IRDA 23 /* IRDA sockets */ +#define AF_PPPOX 24 /* PPPoX sockets */ +#define AF_WANPIPE 25 /* Wanpipe API Sockets */ +#define AF_LLC 26 /* Linux LLC */ +#define AF_IB 27 /* Native InfiniBand address */ +#define AF_MPLS 28 /* MPLS */ +#define AF_CAN 29 /* Controller Area Network */ +#define AF_TIPC 30 /* TIPC sockets */ +#define AF_BLUETOOTH 31 /* Bluetooth sockets */ +#define AF_IUCV 32 /* IUCV sockets */ +#define AF_RXRPC 33 /* RxRPC sockets */ +#define AF_ISDN 34 /* mISDN sockets */ +#define AF_PHONET 35 /* Phonet sockets */ +#define AF_IEEE802154 36 /* IEEE802154 sockets */ +#define AF_CAIF 37 /* CAIF sockets */ +#define AF_ALG 38 /* Algorithm sockets */ +#define AF_NFC 39 /* NFC sockets */ +#define AF_VSOCK 40 /* vSockets */ +#define AF_KCM 41 /* Kernel Connection Multiplexor*/ +#define AF_QIPCRTR 42 /* Qualcomm IPC Router */ +#define AF_SMC 43 /* smc sockets: reserve number for + * PF_SMC protocol family that + * reuses AF_INET address family + */ +#define AF_XDP 44 /* XDP sockets */ + +#define AF_MAX 45 /* For now.. */ + +/* Protocol families, same as address families. */ +#define PF_UNSPEC AF_UNSPEC +#define PF_UNIX AF_UNIX +#define PF_LOCAL AF_LOCAL +#define PF_INET AF_INET +#define PF_AX25 AF_AX25 +#define PF_IPX AF_IPX +#define PF_APPLETALK AF_APPLETALK +#define PF_NETROM AF_NETROM +#define PF_BRIDGE AF_BRIDGE +#define PF_ATMPVC AF_ATMPVC +#define PF_X25 AF_X25 +#define PF_INET6 AF_INET6 +#define PF_ROSE AF_ROSE +#define PF_DECnet AF_DECnet +#define PF_NETBEUI AF_NETBEUI +#define PF_SECURITY AF_SECURITY +#define PF_KEY AF_KEY +#define PF_NETLINK AF_NETLINK +#define PF_ROUTE AF_ROUTE +#define PF_PACKET AF_PACKET +#define PF_ASH AF_ASH +#define PF_ECONET AF_ECONET +#define PF_ATMSVC AF_ATMSVC +#define PF_RDS AF_RDS +#define PF_SNA AF_SNA +#define PF_IRDA AF_IRDA +#define PF_PPPOX AF_PPPOX +#define PF_WANPIPE AF_WANPIPE +#define PF_LLC AF_LLC +#define PF_IB AF_IB +#define PF_MPLS AF_MPLS +#define PF_CAN AF_CAN +#define PF_TIPC AF_TIPC +#define PF_BLUETOOTH AF_BLUETOOTH +#define PF_IUCV AF_IUCV +#define PF_RXRPC AF_RXRPC +#define PF_ISDN AF_ISDN +#define PF_PHONET AF_PHONET +#define PF_IEEE802154 AF_IEEE802154 +#define PF_CAIF AF_CAIF +#define PF_ALG AF_ALG +#define PF_NFC AF_NFC +#define PF_VSOCK AF_VSOCK +#define PF_KCM AF_KCM +#define PF_QIPCRTR AF_QIPCRTR +#define PF_SMC AF_SMC +#define PF_XDP AF_XDP +#define PF_MAX AF_MAX + +/* Maximum queue length specifiable by listen. */ +#define SOMAXCONN 4096 + +/* Flags we can use with send/ and recv. + Added those for 1003.1g not all are supported yet + */ + +#define MSG_OOB 1 +#define MSG_PEEK 2 +#define MSG_DONTROUTE 4 +#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ +#define MSG_CTRUNC 8 +#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ +#define MSG_TRUNC 0x20 +#define MSG_DONTWAIT 0x40 /* Nonblocking io */ +#define MSG_EOR 0x80 /* End of record */ +#define MSG_WAITALL 0x100 /* Wait for a full request */ +#define MSG_FIN 0x200 +#define MSG_SYN 0x400 +#define MSG_CONFIRM 0x800 /* Confirm path validity */ +#define MSG_RST 0x1000 +#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ +#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ +#define MSG_MORE 0x8000 /* Sender will send more */ +#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ +#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */ +#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ +#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ +#define MSG_EOF MSG_FIN +#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ +#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry + * plain text and require encryption + */ + +#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ +#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file + descriptor received through + SCM_RIGHTS */ +#if defined(CONFIG_COMPAT) +#define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */ +#else +#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ +#endif + + +/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ +#define SOL_IP 0 +/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */ +#define SOL_TCP 6 +#define SOL_UDP 17 +#define SOL_IPV6 41 +#define SOL_ICMPV6 58 +#define SOL_SCTP 132 +#define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */ +#define SOL_RAW 255 +#define SOL_IPX 256 +#define SOL_AX25 257 +#define SOL_ATALK 258 +#define SOL_NETROM 259 +#define SOL_ROSE 260 +#define SOL_DECNET 261 +#define SOL_X25 262 +#define SOL_PACKET 263 +#define SOL_ATM 264 /* ATM layer (cell level) */ +#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */ +#define SOL_IRDA 266 +#define SOL_NETBEUI 267 +#define SOL_LLC 268 +#define SOL_DCCP 269 +#define SOL_NETLINK 270 +#define SOL_TIPC 271 +#define SOL_RXRPC 272 +#define SOL_PPPOL2TP 273 +#define SOL_BLUETOOTH 274 +#define SOL_PNPIPE 275 +#define SOL_RDS 276 +#define SOL_IUCV 277 +#define SOL_CAIF 278 +#define SOL_ALG 279 +#define SOL_NFC 280 +#define SOL_KCM 281 +#define SOL_TLS 282 +#define SOL_XDP 283 + +/* IPX options */ +#define IPX_TYPE 1 + +extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); +extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); + +struct timespec64; +struct __kernel_timespec; +struct old_timespec32; + +struct scm_timestamping_internal { + struct timespec64 ts[3]; +}; + +extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); +extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss); + +/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff + * forbid_cmsg_compat==false + */ +extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); +extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); +extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + struct __kernel_timespec __user *timeout, + struct old_timespec32 __user *timeout32); +extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + bool forbid_cmsg_compat); +extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, + unsigned int flags); +extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, + struct user_msghdr __user *umsg, + struct sockaddr __user *uaddr, + unsigned int flags); +extern int sendmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct iovec **iov); +extern int recvmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct sockaddr __user **uaddr, + struct iovec **iov); +extern int __copy_msghdr_from_user(struct msghdr *kmsg, + struct user_msghdr __user *umsg, + struct sockaddr __user **save_addr, + struct iovec __user **uiov, size_t *nsegs); + +/* helpers which do the actual work for syscalls */ +extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, + unsigned int flags, struct sockaddr __user *addr, + int __user *addr_len); +extern int __sys_sendto(int fd, void __user *buff, size_t len, + unsigned int flags, struct sockaddr __user *addr, + int addr_len); +extern int __sys_accept4_file(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags, + unsigned long nofile); +extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); +extern int __sys_socket(int family, int type, int protocol); +extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, + int addrlen, int file_flags); +extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, + int addrlen); +extern int __sys_listen(int fd, int backlog); +extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); +extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); +extern int __sys_socketpair(int family, int type, int protocol, + int __user *usockvec); +extern int __sys_shutdown(int fd, int how); + +extern struct ns_common *get_net_ns(struct ns_common *ns); +#endif /* _LINUX_SOCKET_H */ diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c index e0c13e6a5788..cd110634ab09 100644 --- a/tools/perf/trace/beauty/sockaddr.c +++ b/tools/perf/trace/beauty/sockaddr.c @@ -7,14 +7,7 @@ #include <sys/un.h> #include <arpa/inet.h> -static const char *socket_families[] = { - "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", - "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", - "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", - "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", - "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", - "ALG", "NFC", "VSOCK", -}; +#include "trace/beauty/generated/socket_arrays.c" DEFINE_STRARRAY(socket_families, "PF_"); static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size) diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh new file mode 100755 index 000000000000..3820e5c82293 --- /dev/null +++ b/tools/perf/trace/beauty/socket.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +# This one uses a copy from the kernel sources headers that is in a +# place used just for these tools/perf/beauty/ usage, we shouldn't not +# put it in tools/include/linux otherwise they would be used in the +# normal compiler building process and would drag needless stuff from the +# kernel. + +# When what these scripts need is already in tools/include/ then use it, +# otherwise grab and check the copy from the kernel sources just for these +# string table building scripts. + +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/ + +printf "static const char *socket_families[] = {\n" +# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ +regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*' + +egrep $regex ${header_dir}/socket.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s] = \"%s\",\n" | \ + egrep -v "\"(UNIX|MAX)\"" +printf "};\n" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 9023267e5643..bd77825fd5a1 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -209,7 +209,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) ui_browser__mark_fused(browser, pcnt_width + 3 + notes->widths.addr + width, from - 1, - to > from ? true : false); + to > from); } } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index be9c4c0549bc..a07626f07208 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3629,8 +3629,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, { int nr_entries = evlist->core.nr_entries; -single_entry: if (perf_evlist__single_entry(evlist)) { +single_entry: { struct evsel *first = evlist__first(evlist); return perf_evsel__hists_browse(first, nr_entries, help, @@ -3638,6 +3638,7 @@ single_entry: env, warn_lost_event, annotation_opts); } + } if (symbol_conf.event_group) { struct evsel *pos; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8d18380ecd10..cd5e41960e64 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -117,6 +117,7 @@ endif perf-y += parse-branch-options.o perf-y += dump-insn.o perf-y += parse-regs-options.o +perf-y += parse-sublevel-options.o perf-y += term.o perf-y += help-unknown-cmd.o perf-y += mem-events.o @@ -128,6 +129,7 @@ perf-y += expr-bison.o perf-y += expr.o perf-y += branch.o perf-y += mem2node.o +perf-y += clockid.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o @@ -191,36 +193,60 @@ CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(call rule_mkdir) - $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l + $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \ + --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y +$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \ + -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ -$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c +$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c $(call rule_mkdir) - $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l + $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \ + --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/expr-bison.c: util/expr.y +$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_ + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \ + -o $(OUTPUT)util/expr-bison.c -p expr_ -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c +$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) - $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l + $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \ + --header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/pmu-bison.c: util/pmu.y +$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ - -CFLAGS_parse-events-flex.o += -w -CFLAGS_pmu-flex.o += -w -CFLAGS_expr-flex.o += -w -CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w -CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w -CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \ + -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ + +FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26) +ifeq ($(FLEX_GE_26),1) + flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations + CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?) + ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1) + flex_flags += -Wno-misleading-indentation + endif +else + flex_flags := -w +endif +CFLAGS_parse-events-flex.o += $(flex_flags) +CFLAGS_pmu-flex.o += $(flex_flags) +CFLAGS_expr-flex.o += $(flex_flags) + +bison_flags := -DYYENABLE_NLS=0 +BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35) +ifeq ($(BISON_GE_35),1) + bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum +else + bison_flags += -w +endif +CFLAGS_parse-events-bison.o += $(bison_flags) +CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) +CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 76bfb4a9d94e..0a1fcf787538 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1621,6 +1621,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil char *build_id_filename; char *build_id_path = NULL; char *pos; + int len; if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) @@ -1649,10 +1650,16 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) dirname(build_id_path); - if (dso__is_kcore(dso) || - readlink(build_id_path, linkname, sizeof(linkname)) < 0 || - strstr(linkname, DSO__NAME_KALLSYMS) || - access(filename, R_OK)) { + if (dso__is_kcore(dso)) + goto fallback; + + len = readlink(build_id_path, linkname, sizeof(linkname) - 1); + if (len < 0) + goto fallback; + + linkname[len] = '\0'; + if (strstr(linkname, DSO__NAME_KALLSYMS) || + access(filename, R_OK)) { fallback: /* * If we don't have build-ids or the build-id file isn't in the diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 302a14d0aca9..93e063f22be5 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -182,15 +182,15 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) if (payload & BIT(EV_TLB_ACCESS)) decoder->record.type |= ARM_SPE_TLB_ACCESS; - if ((idx == 1 || idx == 2 || idx == 3) && + if ((idx == 2 || idx == 4 || idx == 8) && (payload & BIT(EV_LLC_MISS))) decoder->record.type |= ARM_SPE_LLC_MISS; - if ((idx == 1 || idx == 2 || idx == 3) && + if ((idx == 2 || idx == 4 || idx == 8) && (payload & BIT(EV_LLC_ACCESS))) decoder->record.type |= ARM_SPE_LLC_ACCESS; - if ((idx == 1 || idx == 2 || idx == 3) && + if ((idx == 2 || idx == 4 || idx == 8) && (payload & BIT(EV_REMOTE_ACCESS))) decoder->record.type |= ARM_SPE_REMOTE_ACCESS; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 25c639ac4ad4..42a85c86421d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1349,6 +1349,47 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->initial_skip = 0; } +static int get_flag(const char **ptr, unsigned int *flags) +{ + while (1) { + char c = **ptr; + + if (c >= 'a' && c <= 'z') { + *flags |= 1 << (c - 'a'); + ++*ptr; + return 0; + } else if (c == ' ') { + ++*ptr; + continue; + } else { + return -1; + } + } +} + +static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags) +{ + while (1) { + switch (**ptr) { + case '+': + ++*ptr; + if (get_flag(ptr, plus_flags)) + return -1; + break; + case '-': + ++*ptr; + if (get_flag(ptr, minus_flags)) + return -1; + break; + case ' ': + ++*ptr; + break; + default: + return 0; + } + } +} + /* * Please check tools/perf/Documentation/perf-script.txt for information * about the options parsed here, which is introduced after this cset, @@ -1436,9 +1477,15 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, break; case 'e': synth_opts->errors = true; + if (get_flags(&p, &synth_opts->error_plus_flags, + &synth_opts->error_minus_flags)) + goto out_err; break; case 'd': synth_opts->log = true; + if (get_flags(&p, &synth_opts->log_plus_flags, + &synth_opts->log_minus_flags)) + goto out_err; break; case 'c': synth_opts->branches = true; @@ -1507,6 +1554,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'a': synth_opts->remote_access = true; break; + case 'q': + synth_opts->quick += 1; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 142ccf7d34df..951d2d14cf24 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -55,6 +55,11 @@ enum itrace_period_type { PERF_ITRACE_PERIOD_NANOSECS, }; +#define AUXTRACE_ERR_FLG_OVERFLOW (1 << ('o' - 'a')) +#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a')) + +#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a')) + /** * struct itrace_synth_opts - AUX area tracing synthesis options. * @set: indicates whether or not options have been set @@ -91,6 +96,11 @@ enum itrace_period_type { * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all * @ptime_range: time intervals to trace or NULL * @range_num: number of time intervals to trace + * @error_plus_flags: flags to affect what errors are reported + * @error_minus_flags: flags to affect what errors are reported + * @log_plus_flags: flags to affect what is logged + * @log_minus_flags: flags to affect what is logged + * @quick: quicker (less detailed) decoding */ struct itrace_synth_opts { bool set; @@ -124,6 +134,11 @@ struct itrace_synth_opts { unsigned long *cpu_bitmap; struct perf_time_interval *ptime_range; int range_num; + unsigned int error_plus_flags; + unsigned int error_minus_flags; + unsigned int log_plus_flags; + unsigned int log_minus_flags; + unsigned int quick; }; /** @@ -604,22 +619,32 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, struct evsel *evsel); #define ITRACE_HELP \ -" i: synthesize instructions events\n" \ +" i[period]: synthesize instructions events\n" \ " b: synthesize branches events (branch misses for Arm SPE)\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ " w: synthesize ptwrite events\n" \ " p: synthesize power events\n" \ -" e: synthesize error events\n" \ -" d: create a debug log\n" \ +" o: synthesize other events recorded due to the use\n" \ +" of aux-output (refer to perf record)\n" \ +" e[flags]: synthesize error events\n" \ +" each flag must be preceded by + or -\n" \ +" error flags are: o (overflow)\n" \ +" l (data lost)\n" \ +" d[flags]: create a debug log\n" \ +" each flag must be preceded by + or -\n" \ +" log flags are: a (all perf events)\n" \ " f: synthesize first level cache events\n" \ " m: synthesize last level cache events\n" \ " t: synthesize TLB events\n" \ " a: synthesize remote access events\n" \ " g[len]: synthesize a call chain (use with i or x)\n" \ +" G[len]: synthesize a call chain on existing event records\n" \ " l[len]: synthesize last branch entries (use with i or x)\n" \ +" L[len]: synthesize last branch entries on existing event records\n" \ " sNUMBER: skip initial number of events\n" \ +" q: quicker (less detailed) decoding\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index c076fc7fe025..31207b6e2066 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -31,6 +31,10 @@ #include "probe-file.h" #include "strlist.h" +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include <elfutils/debuginfod.h> +#endif + #include <linux/ctype.h> #include <linux/zalloc.h> @@ -636,6 +640,21 @@ static char *build_id_cache__find_debug(const char *sbuild_id, if (realname && access(realname, R_OK)) zfree(&realname); nsinfo__mountns_exit(&nsc); + +#ifdef HAVE_DEBUGINFOD_SUPPORT + if (realname == NULL) { + debuginfod_client* c = debuginfod_begin(); + if (c != NULL) { + int fd = debuginfod_find_debuginfo(c, + (const unsigned char*)sbuild_id, 0, + &realname); + if (fd >= 0) + close(fd); /* retaining reference by realname */ + debuginfod_end(c); + } + } +#endif + out: free(debugfile); return realname; diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c new file mode 100644 index 000000000000..74365a5d99c1 --- /dev/null +++ b/tools/perf/util/clockid.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <subcmd/parse-options.h> +#include <stdio.h> +#include <time.h> +#include <strings.h> +#include <linux/time64.h> +#include "debug.h" +#include "clockid.h" +#include "record.h" + +struct clockid_map { + const char *name; + int clockid; +}; + +#define CLOCKID_MAP(n, c) \ + { .name = n, .clockid = (c), } + +#define CLOCKID_END { .name = NULL, } + + +/* + * Add the missing ones, we need to build on many distros... + */ +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif +#ifndef CLOCK_BOOTTIME +#define CLOCK_BOOTTIME 7 +#endif +#ifndef CLOCK_TAI +#define CLOCK_TAI 11 +#endif + +static const struct clockid_map clockids[] = { + /* available for all events, NMI safe */ + CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), + CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), + + /* available for some events */ + CLOCKID_MAP("realtime", CLOCK_REALTIME), + CLOCKID_MAP("boottime", CLOCK_BOOTTIME), + CLOCKID_MAP("tai", CLOCK_TAI), + + /* available for the lazy */ + CLOCKID_MAP("mono", CLOCK_MONOTONIC), + CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), + CLOCKID_MAP("real", CLOCK_REALTIME), + CLOCKID_MAP("boot", CLOCK_BOOTTIME), + + CLOCKID_END, +}; + +static int get_clockid_res(clockid_t clk_id, u64 *res_ns) +{ + struct timespec res; + + *res_ns = 0; + if (!clock_getres(clk_id, &res)) + *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; + else + pr_warning("WARNING: Failed to determine specified clock resolution.\n"); + + return 0; +} + +int parse_clockid(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + const struct clockid_map *cm; + const char *ostr = str; + + if (unset) { + opts->use_clockid = 0; + return 0; + } + + /* no arg passed */ + if (!str) + return 0; + + /* no setting it twice */ + if (opts->use_clockid) + return -1; + + opts->use_clockid = true; + + /* if its a number, we're done */ + if (sscanf(str, "%d", &opts->clockid) == 1) + return get_clockid_res(opts->clockid, &opts->clockid_res_ns); + + /* allow a "CLOCK_" prefix to the name */ + if (!strncasecmp(str, "CLOCK_", 6)) + str += 6; + + for (cm = clockids; cm->name; cm++) { + if (!strcasecmp(str, cm->name)) { + opts->clockid = cm->clockid; + return get_clockid_res(opts->clockid, + &opts->clockid_res_ns); + } + } + + opts->use_clockid = false; + ui__warning("unknown clockid %s, check man page\n", ostr); + return -1; +} + +const char *clockid_name(clockid_t clk_id) +{ + const struct clockid_map *cm; + + for (cm = clockids; cm->name; cm++) { + if (cm->clockid == clk_id) + return cm->name; + } + return "(not found)"; +} diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h new file mode 100644 index 000000000000..9b49b4711c76 --- /dev/null +++ b/tools/perf/util/clockid.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PERF_CLOCKID_H +#define __PERF_CLOCKID_H + +struct option; +int parse_clockid(const struct option *opt, const char *str, int unset); + +const char *clockid_name(clockid_t clk_id); + +#endif diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index c283223fb31f..a2a369e2fbb6 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1344,8 +1344,15 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; } - if (etm->synth_opts.last_branch) + if (etm->synth_opts.last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } if (etm->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 5f36fc6a5578..27c5fef9ad54 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -31,6 +31,9 @@ #include "config.h" #include <linux/ctype.h> #include <linux/err.h> +#include <linux/time64.h> +#include "util.h" +#include "clockid.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -1381,11 +1384,26 @@ do { \ return 0; } -static int ctf_writer__setup_clock(struct ctf_writer *cw) +static int ctf_writer__setup_clock(struct ctf_writer *cw, + struct perf_session *session, + bool tod) { struct bt_ctf_clock *clock = cw->clock; + const char *desc = "perf clock"; + int64_t offset = 0; - bt_ctf_clock_set_description(clock, "perf clock"); + if (tod) { + struct perf_env *env = &session->header.env; + + if (!env->clock.enabled) { + pr_err("Can't provide --tod time, missing clock data. " + "Please record with -k/--clockid option.\n"); + return -1; + } + + desc = clockid_name(env->clock.clockid); + offset = env->clock.tod_ns - env->clock.clockid_ns; + } #define SET(__n, __v) \ do { \ @@ -1394,8 +1412,8 @@ do { \ } while (0) SET(frequency, 1000000000); - SET(offset_s, 0); - SET(offset, 0); + SET(offset, offset); + SET(description, desc); SET(precision, 10); SET(is_absolute, 0); @@ -1481,7 +1499,8 @@ static void ctf_writer__cleanup(struct ctf_writer *cw) memset(cw, 0, sizeof(*cw)); } -static int ctf_writer__init(struct ctf_writer *cw, const char *path) +static int ctf_writer__init(struct ctf_writer *cw, const char *path, + struct perf_session *session, bool tod) { struct bt_ctf_writer *writer; struct bt_ctf_stream_class *stream_class; @@ -1505,7 +1524,7 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path) cw->clock = clock; - if (ctf_writer__setup_clock(cw)) { + if (ctf_writer__setup_clock(cw, session, tod)) { pr("Failed to setup CTF clock.\n"); goto err_cleanup; } @@ -1613,17 +1632,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, if (err) return err; - /* CTF writer */ - if (ctf_writer__init(cw, path)) - return -1; - err = -1; /* perf.data session */ session = perf_session__new(&data, 0, &c.tool); - if (IS_ERR(session)) { - err = PTR_ERR(session); - goto free_writer; - } + if (IS_ERR(session)) + return PTR_ERR(session); + + /* CTF writer */ + if (ctf_writer__init(cw, path, session, opts->tod)) + goto free_session; if (c.queue_size) { ordered_events__set_alloc_size(&session->ordered_events, @@ -1632,17 +1649,17 @@ int bt_convert__perf2ctf(const char *input, const char *path, /* CTF writer env/clock setup */ if (ctf_writer__setup_env(cw, session)) - goto free_session; + goto free_writer; /* CTF events setup */ if (setup_events(cw, session)) - goto free_session; + goto free_writer; if (opts->all && setup_non_sample_events(cw, session)) - goto free_session; + goto free_writer; if (setup_streams(cw, session)) - goto free_session; + goto free_writer; err = perf_session__process_events(session); if (!err) @@ -1670,10 +1687,10 @@ int bt_convert__perf2ctf(const char *input, const char *path, return err; -free_session: - perf_session__delete(session); free_writer: ctf_writer__cleanup(cw); +free_session: + perf_session__delete(session); pr_err("Error during conversion setup.\n"); return err; } diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h index af90b6076c06..feab5f114e37 100644 --- a/tools/perf/util/data-convert.h +++ b/tools/perf/util/data-convert.h @@ -5,6 +5,7 @@ struct perf_data_convert_opts { bool force; bool all; + bool tod; }; #endif /* __DATA_CONVERT_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index adb656745ecc..5cda5565777a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -20,6 +20,7 @@ #include "target.h" #include "ui/helpline.h" #include "ui/ui.h" +#include "util/parse-sublevel-options.h" #include <linux/ctype.h> @@ -173,65 +174,37 @@ void trace_event(union perf_event *event) trace_event_printer, event); } -static struct debug_variable { - const char *name; - int *ptr; -} debug_variables[] = { - { .name = "verbose", .ptr = &verbose }, - { .name = "ordered-events", .ptr = &debug_ordered_events}, - { .name = "stderr", .ptr = &redirect_to_stderr}, - { .name = "data-convert", .ptr = &debug_data_convert }, - { .name = "perf-event-open", .ptr = &debug_peo_args }, +static struct sublevel_option debug_opts[] = { + { .name = "verbose", .value_ptr = &verbose }, + { .name = "ordered-events", .value_ptr = &debug_ordered_events}, + { .name = "stderr", .value_ptr = &redirect_to_stderr}, + { .name = "data-convert", .value_ptr = &debug_data_convert }, + { .name = "perf-event-open", .value_ptr = &debug_peo_args }, { .name = NULL, } }; int perf_debug_option(const char *str) { - struct debug_variable *var = &debug_variables[0]; - char *vstr, *s = strdup(str); - int v = 1; - - vstr = strchr(s, '='); - if (vstr) - *vstr++ = 0; - - while (var->name) { - if (!strcmp(s, var->name)) - break; - var++; - } - - if (!var->name) { - pr_err("Unknown debug variable name '%s'\n", s); - free(s); - return -1; - } + int ret; - if (vstr) { - v = atoi(vstr); - /* - * Allow only values in range (0, 10), - * otherwise set 0. - */ - v = (v < 0) || (v > 10) ? 0 : v; - } + ret = perf_parse_sublevel_options(str, debug_opts); + if (ret) + return ret; - if (quiet) - v = -1; + /* Allow only verbose value in range (0, 10), otherwise set 0. */ + verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; - *var->ptr = v; - free(s); return 0; } int perf_quiet_option(void) { - struct debug_variable *var = &debug_variables[0]; + struct sublevel_option *opt = &debug_opts[0]; /* disable all debug messages */ - while (var->name) { - *var->ptr = -1; - var++; + while (opt->name) { + *opt->value_ptr = -1; + opt++; } return 0; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 99f0a39c3c59..5a3b4755f0b3 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -208,6 +208,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; @@ -898,6 +899,8 @@ static struct dso_cache *dso_cache__populate(struct dso *dso, if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) *ret = bpf_read(dso, cache_offset, cache->data); + else if (dso->binary_type == DSO_BINARY_TYPE__OOL) + *ret = DSO__DATA_CACHE_SIZE; else *ret = file_read(dso, machine, cache_offset, cache->data); @@ -1262,7 +1265,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->has_build_id = 0; dso->has_srcline = 1; dso->a2l_fails = 1; - dso->kernel = DSO_TYPE_USER; + dso->kernel = DSO_SPACE__USER; dso->needs_swap = DSO_SWAP__UNSET; dso->comp = COMP_ID__NONE; RB_CLEAR_NODE(&dso->rb_node); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index d3d03274b0d1..8ad17f395a19 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -42,13 +42,14 @@ enum dso_binary_type { DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__BPF_PROG_INFO, DSO_BINARY_TYPE__BPF_IMAGE, + DSO_BINARY_TYPE__OOL, DSO_BINARY_TYPE__NOT_FOUND, }; -enum dso_kernel_type { - DSO_TYPE_USER = 0, - DSO_TYPE_KERNEL, - DSO_TYPE_GUEST_KERNEL +enum dso_space_type { + DSO_SPACE__USER = 0, + DSO_SPACE__KERNEL, + DSO_SPACE__KERNEL_GUEST }; enum dso_swap_type { @@ -159,7 +160,7 @@ struct dso { void *a2l; char *symsrc_filename; unsigned int a2l_fails; - enum dso_kernel_type kernel; + enum dso_space_type kernel; enum dso_swap_type needs_swap; enum dso_binary_type symtab_type; enum dso_binary_type binary_type; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1ab2682d5d2b..a12972652006 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -77,7 +77,6 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; - u64 clockid_res_ns; /* * bpf_info_lock protects bpf rbtrees. This is needed because the @@ -100,6 +99,19 @@ struct perf_env { /* For fast cpu to numa node lookup via perf_env__numa_node */ int *numa_map; int nr_numa_map; + + /* For real clock time reference. */ + struct { + u64 tod_ns; + u64 clockid_ns; + u64 clockid_res_ns; + int clockid; + /* + * enabled is valid for report mode, and is true if above + * values are set, it's set in process_clock_data + */ + bool enabled; + } clock; }; enum perf_compress_type { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f581550a3015..317a26571845 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -31,6 +31,7 @@ #include "stat.h" #include "session.h" #include "bpf-event.h" +#include "print_binary.h" #include "tool.h" #include "../perf.h" @@ -55,6 +56,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_KSYMBOL] = "KSYMBOL", [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_CGROUP] = "CGROUP", + [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -267,6 +269,14 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused, return machine__process_bpf(machine, event, sample); } +int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_text_poke(machine, event, sample); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n", @@ -413,7 +423,52 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp) event->bpf.type, event->bpf.flags, event->bpf.id); } -size_t perf_event__fprintf(union perf_event *event, FILE *fp) +static int text_poke_printer(enum binary_printer_ops op, unsigned int val, + void *extra, FILE *fp) +{ + bool old = *(bool *)extra; + + switch ((int)op) { + case BINARY_PRINT_LINE_BEGIN: + return fprintf(fp, " %s bytes:", old ? "Old" : "New"); + case BINARY_PRINT_NUM_DATA: + return fprintf(fp, " %02x", val); + case BINARY_PRINT_LINE_END: + return fprintf(fp, "\n"); + default: + return 0; + } +} + +size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp) +{ + struct perf_record_text_poke_event *tp = &event->text_poke; + size_t ret; + bool old; + + ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr); + if (machine) { + struct addr_location al; + + al.map = maps__find(&machine->kmaps, tp->addr); + if (al.map && map__load(al.map) >= 0) { + al.addr = al.map->map_ip(al.map, tp->addr); + al.sym = map__find_symbol(al.map, al.addr); + if (al.sym) + ret += symbol__fprintf_symname_offs(al.sym, &al, fp); + } + } + ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len); + old = true; + ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer, + &old, fp); + old = false; + ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16, + text_poke_printer, &old, fp); + return ret; +} + +size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", perf_event__name(event->header.type)); @@ -457,6 +512,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_BPF_EVENT: ret += perf_event__fprintf_bpf(event, fp); break; + case PERF_RECORD_TEXT_POKE: + ret += perf_event__fprintf_text_poke(event, machine, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 6ae01c3c2ffa..b828b99176f4 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -351,6 +351,10 @@ int perf_event__process_bpf(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_text_poke(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -385,7 +389,8 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); -size_t perf_event__fprintf(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp); +size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp); int kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name, u64 *addr); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ab48be4cf258..c0768c61eb43 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -63,6 +63,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, perf_evlist__set_maps(&evlist->core, cpus, threads); evlist->workload.pid = -1; evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; + evlist->ctl_fd.fd = -1; + evlist->ctl_fd.ack = -1; + evlist->ctl_fd.pos = -1; } struct evlist *evlist__new(void) @@ -79,7 +82,7 @@ struct evlist *perf_evlist__new_default(void) { struct evlist *evlist = evlist__new(); - if (evlist && perf_evlist__add_default(evlist)) { + if (evlist && evlist__add_default(evlist)) { evlist__delete(evlist); evlist = NULL; } @@ -91,7 +94,7 @@ struct evlist *perf_evlist__new_dummy(void) { struct evlist *evlist = evlist__new(); - if (evlist && perf_evlist__add_dummy(evlist)) { + if (evlist && evlist__add_dummy(evlist)) { evlist__delete(evlist); evlist = NULL; } @@ -231,7 +234,7 @@ void perf_evlist__set_leader(struct evlist *evlist) } } -int __perf_evlist__add_default(struct evlist *evlist, bool precise) +int __evlist__add_default(struct evlist *evlist, bool precise) { struct evsel *evsel = evsel__new_cycles(precise); @@ -242,7 +245,7 @@ int __perf_evlist__add_default(struct evlist *evlist, bool precise) return 0; } -int perf_evlist__add_dummy(struct evlist *evlist) +int evlist__add_dummy(struct evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, @@ -258,8 +261,7 @@ int perf_evlist__add_dummy(struct evlist *evlist) return 0; } -static int evlist__add_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs) +static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; LIST_HEAD(head); @@ -282,8 +284,7 @@ out_delete_partial_list: return -1; } -int __perf_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs) +int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { size_t i; @@ -322,8 +323,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist, return NULL; } -int perf_evlist__add_newtp(struct evlist *evlist, - const char *sys, const char *name, void *handler) +int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler) { struct evsel *evsel = evsel__newtp(sys, name); @@ -500,7 +500,7 @@ int perf_evlist__enable_event_idx(struct evlist *evlist, int evlist__add_pollfd(struct evlist *evlist, int fd) { - return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN); + return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default); } int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) @@ -540,7 +540,7 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) if (sid) return container_of(sid->evsel, struct evsel, core); - if (!perf_evlist__sample_id_all(evlist)) + if (!evlist__sample_id_all(evlist)) return evlist__first(evlist); return NULL; @@ -946,6 +946,10 @@ int perf_evlist__create_maps(struct evlist *evlist, struct target *target) perf_evlist__set_maps(&evlist->core, cpus, threads); + /* as evlist now has references, put count here */ + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); + return 0; out_delete_threads: @@ -1088,7 +1092,7 @@ int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) return perf_evlist__append_tp_filter_pids(evlist, 1, &pid); } -bool perf_evlist__valid_sample_type(struct evlist *evlist) +bool evlist__valid_sample_type(struct evlist *evlist) { struct evsel *pos; @@ -1107,7 +1111,7 @@ bool perf_evlist__valid_sample_type(struct evlist *evlist) return true; } -u64 __perf_evlist__combined_sample_type(struct evlist *evlist) +u64 __evlist__combined_sample_type(struct evlist *evlist) { struct evsel *evsel; @@ -1120,13 +1124,13 @@ u64 __perf_evlist__combined_sample_type(struct evlist *evlist) return evlist->combined_sample_type; } -u64 perf_evlist__combined_sample_type(struct evlist *evlist) +u64 evlist__combined_sample_type(struct evlist *evlist) { evlist->combined_sample_type = 0; - return __perf_evlist__combined_sample_type(evlist); + return __evlist__combined_sample_type(evlist); } -u64 perf_evlist__combined_branch_type(struct evlist *evlist) +u64 evlist__combined_branch_type(struct evlist *evlist) { struct evsel *evsel; u64 branch_type = 0; @@ -1191,7 +1195,7 @@ out: return size; } -bool perf_evlist__valid_sample_id_all(struct evlist *evlist) +bool evlist__valid_sample_id_all(struct evlist *evlist) { struct evsel *first = evlist__first(evlist), *pos = first; @@ -1203,7 +1207,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist) return true; } -bool perf_evlist__sample_id_all(struct evlist *evlist) +bool evlist__sample_id_all(struct evlist *evlist) { struct evsel *first = evlist__first(evlist); return first->core.attr.sample_id_all; @@ -1273,11 +1277,12 @@ static int perf_evlist__create_syswide_maps(struct evlist *evlist) goto out_put; perf_evlist__set_maps(&evlist->core, cpus, threads); -out: - return err; + + perf_thread_map__put(threads); out_put: perf_cpu_map__put(cpus); - goto out; +out: + return err; } int evlist__open(struct evlist *evlist) @@ -1464,8 +1469,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, return evsel__parse_sample_timestamp(evsel, event, timestamp); } -int perf_evlist__strerror_open(struct evlist *evlist, - int err, char *buf, size_t size) +int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size) { int printed, value; char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); @@ -1518,7 +1522,7 @@ out_default: return 0; } -int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) +int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) { char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0; @@ -1727,3 +1731,143 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, } return leader; } + +int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack) +{ + if (fd == -1) { + pr_debug("Control descriptor is not initialized\n"); + return 0; + } + + evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, + fdarray_flag__nonfilterable); + if (evlist->ctl_fd.pos < 0) { + evlist->ctl_fd.pos = -1; + pr_err("Failed to add ctl fd entry: %m\n"); + return -1; + } + + evlist->ctl_fd.fd = fd; + evlist->ctl_fd.ack = ack; + + return 0; +} + +bool evlist__ctlfd_initialized(struct evlist *evlist) +{ + return evlist->ctl_fd.pos >= 0; +} + +int evlist__finalize_ctlfd(struct evlist *evlist) +{ + struct pollfd *entries = evlist->core.pollfd.entries; + + if (!evlist__ctlfd_initialized(evlist)) + return 0; + + entries[evlist->ctl_fd.pos].fd = -1; + entries[evlist->ctl_fd.pos].events = 0; + entries[evlist->ctl_fd.pos].revents = 0; + + evlist->ctl_fd.pos = -1; + evlist->ctl_fd.ack = -1; + evlist->ctl_fd.fd = -1; + + return 0; +} + +static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, + char *cmd_data, size_t data_size) +{ + int err; + char c; + size_t bytes_read = 0; + + memset(cmd_data, 0, data_size); + data_size--; + + do { + err = read(evlist->ctl_fd.fd, &c, 1); + if (err > 0) { + if (c == '\n' || c == '\0') + break; + cmd_data[bytes_read++] = c; + if (bytes_read == data_size) + break; + } else { + if (err == -1) + pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd); + break; + } + } while (1); + + pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data, + bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0"); + + if (err > 0) { + if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG, + (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_ENABLE; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG, + (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_DISABLE; + } + } + + return err; +} + +static int evlist__ctlfd_ack(struct evlist *evlist) +{ + int err; + + if (evlist->ctl_fd.ack == -1) + return 0; + + err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG, + sizeof(EVLIST_CTL_CMD_ACK_TAG)); + if (err == -1) + pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack); + + return err; +} + +int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) +{ + int err = 0; + char cmd_data[EVLIST_CTL_CMD_MAX_LEN]; + int ctlfd_pos = evlist->ctl_fd.pos; + struct pollfd *entries = evlist->core.pollfd.entries; + + if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents) + return 0; + + if (entries[ctlfd_pos].revents & POLLIN) { + err = evlist__ctlfd_recv(evlist, cmd, cmd_data, + EVLIST_CTL_CMD_MAX_LEN); + if (err > 0) { + switch (*cmd) { + case EVLIST_CTL_CMD_ENABLE: + evlist__enable(evlist); + break; + case EVLIST_CTL_CMD_DISABLE: + evlist__disable(evlist); + break; + case EVLIST_CTL_CMD_ACK: + case EVLIST_CTL_CMD_UNSUPPORTED: + default: + pr_debug("ctlfd: unsupported %d\n", *cmd); + break; + } + if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED)) + evlist__ctlfd_ack(evlist); + } + } + + if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR)) + evlist__finalize_ctlfd(evlist); + else + entries[ctlfd_pos].revents = 0; + + return err; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a8081dfc19cf..c73f7f7f120b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -74,6 +74,11 @@ struct evlist { pthread_t th; volatile int done; } thread; + struct { + int fd; /* control file descriptor */ + int ack; /* ack file descriptor for control commands */ + int pos; /* index at evlist core object to check signals */ + } ctl_fd; }; struct evsel_str_handler { @@ -92,20 +97,20 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int __perf_evlist__add_default(struct evlist *evlist, bool precise); +int __evlist__add_default(struct evlist *evlist, bool precise); -static inline int perf_evlist__add_default(struct evlist *evlist) +static inline int evlist__add_default(struct evlist *evlist) { - return __perf_evlist__add_default(evlist, true); + return __evlist__add_default(evlist, true); } -int __perf_evlist__add_default_attrs(struct evlist *evlist, +int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); -#define perf_evlist__add_default_attrs(evlist, array) \ - __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) +#define evlist__add_default_attrs(evlist, array) \ + __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) -int perf_evlist__add_dummy(struct evlist *evlist); +int evlist__add_dummy(struct evlist *evlist); int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, @@ -116,8 +121,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target); void perf_evlist__stop_sb_thread(struct evlist *evlist); -int perf_evlist__add_newtp(struct evlist *evlist, - const char *sys, const char *name, void *handler); +int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); int __evlist__set_tracepoints_handlers(struct evlist *evlist, const struct evsel_str_handler *assocs, @@ -219,10 +223,10 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct evlist *evlist); -u64 __perf_evlist__combined_sample_type(struct evlist *evlist); -u64 perf_evlist__combined_sample_type(struct evlist *evlist); -u64 perf_evlist__combined_branch_type(struct evlist *evlist); -bool perf_evlist__sample_id_all(struct evlist *evlist); +u64 __evlist__combined_sample_type(struct evlist *evlist); +u64 evlist__combined_sample_type(struct evlist *evlist); +u64 evlist__combined_branch_type(struct evlist *evlist); +bool evlist__sample_id_all(struct evlist *evlist); u16 perf_evlist__id_hdr_size(struct evlist *evlist); int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, @@ -232,8 +236,8 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp); -bool perf_evlist__valid_sample_type(struct evlist *evlist); -bool perf_evlist__valid_sample_id_all(struct evlist *evlist); +bool evlist__valid_sample_type(struct evlist *evlist); +bool evlist__valid_sample_id_all(struct evlist *evlist); bool perf_evlist__valid_read_format(struct evlist *evlist); void perf_evlist__splice_list_tail(struct evlist *evlist, @@ -258,8 +262,8 @@ static inline struct evsel *evlist__last(struct evlist *evlist) return container_of(evsel, struct evsel, core); } -int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); -int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); +int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); +int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); bool perf_evlist__can_select_event(struct evlist *evlist, const char *str); void perf_evlist__to_front(struct evlist *evlist, @@ -356,4 +360,25 @@ void perf_evlist__force_leader(struct evlist *evlist); struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close); +#define EVLIST_CTL_CMD_ENABLE_TAG "enable" +#define EVLIST_CTL_CMD_DISABLE_TAG "disable" +#define EVLIST_CTL_CMD_ACK_TAG "ack\n" + +#define EVLIST_CTL_CMD_MAX_LEN 64 + +enum evlist_ctl_cmd { + EVLIST_CTL_CMD_UNSUPPORTED = 0, + EVLIST_CTL_CMD_ENABLE, + EVLIST_CTL_CMD_DISABLE, + EVLIST_CTL_CMD_ACK +}; + +int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); +int evlist__finalize_ctlfd(struct evlist *evlist); +bool evlist__ctlfd_initialized(struct evlist *evlist); +int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd); + +#define EVLIST_ENABLED_MSG "Events enabled\n" +#define EVLIST_DISABLED_MSG "Events disabled\n" + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef802f6d40c1..459b51e90063 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -976,16 +976,20 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period || (opts->user_freq != UINT_MAX || - opts->user_interval != ULLONG_MAX)) { + if (!attr->sample_period) { if (opts->freq) { - evsel__set_sample_bit(evsel, PERIOD); attr->freq = 1; attr->sample_freq = opts->freq; } else { attr->sample_period = opts->default_interval; } } + /* + * If attr->freq was set (here or earlier), ask for period + * to be sampled. + */ + if (attr->freq) + evsel__set_sample_bit(evsel, PERIOD); if (opts->no_samples) attr->sample_freq = 0; @@ -1014,12 +1018,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (callchain && callchain->enabled && !evsel->no_aux_samples) evsel__config_callchain(evsel, opts, callchain); - if (opts->sample_intr_regs && !evsel->no_aux_samples) { + if (opts->sample_intr_regs && !evsel->no_aux_samples && + !evsel__is_dummy_event(evsel)) { attr->sample_regs_intr = opts->sample_intr_regs; evsel__set_sample_bit(evsel, REGS_INTR); } - if (opts->sample_user_regs && !evsel->no_aux_samples) { + if (opts->sample_user_regs && !evsel->no_aux_samples && + !evsel__is_dummy_event(evsel)) { attr->sample_regs_user |= opts->sample_user_regs; evsel__set_sample_bit(evsel, REGS_USER); } @@ -1064,7 +1070,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; - attr->ksymbol = track && !perf_missing_features.ksymbol; + /* + * ksymbol is tracked separately with text poke because it needs to be + * system wide and enabled immediately. + */ + if (!opts->text_poke) + attr->ksymbol = track && !perf_missing_features.ksymbol; attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf; if (opts->record_namespaces) @@ -2495,8 +2506,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg + printed, size - printed, "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n" - "access to performance monitoring and observability operations for users\n" - "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n" + "access to performance monitoring and observability operations for processes\n" + "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n" + "More information can be found at 'Perf events and tool security' document:\n" + "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n" "perf_event_paranoid setting is %d:\n" " -1: Allow use of (almost) all events by all users\n" " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" @@ -2528,6 +2541,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, "No such device - did you specify an out-of-range profile CPU?"); break; case EOPNOTSUPP: + if (evsel->core.attr.aux_output) + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support 'aux_output' feature", + evsel__name(evsel)); if (evsel->core.attr.sample_period != 0) return scnprintf(msg, size, "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index f64ab91c432b..53482ef53c41 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -1,10 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdbool.h> #include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include "metricgroup.h" +#include "debug.h" #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" #include <linux/kernel.h> +#include <linux/zalloc.h> +#include <ctype.h> #ifdef PARSER_DEBUG extern int expr_debug; @@ -30,35 +37,144 @@ static bool key_equal(const void *key1, const void *key2, } /* Caller must make sure id is allocated */ -int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val) +int expr__add_id(struct expr_parse_ctx *ctx, const char *id) { - double *val_ptr = NULL, *old_val = NULL; + struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; int ret; - if (val != 0.0) { - val_ptr = malloc(sizeof(double)); - if (!val_ptr) - return -ENOMEM; - *val_ptr = val; + data_ptr = malloc(sizeof(*data_ptr)); + if (!data_ptr) + return -ENOMEM; + + data_ptr->parent = ctx->parent; + + ret = hashmap__set(&ctx->ids, id, data_ptr, + (const void **)&old_key, (void **)&old_data); + if (ret) + free(data_ptr); + free(old_key); + free(old_data); + return ret; +} + +/* Caller must make sure id is allocated */ +int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) +{ + struct expr_id_data *data_ptr = NULL, *old_data = NULL; + char *old_key = NULL; + int ret; + + data_ptr = malloc(sizeof(*data_ptr)); + if (!data_ptr) + return -ENOMEM; + data_ptr->val = val; + data_ptr->is_ref = false; + + ret = hashmap__set(&ctx->ids, id, data_ptr, + (const void **)&old_key, (void **)&old_data); + if (ret) + free(data_ptr); + free(old_key); + free(old_data); + return ret; +} + +int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) +{ + struct expr_id_data *data_ptr = NULL, *old_data = NULL; + char *old_key = NULL; + char *name, *p; + int ret; + + data_ptr = zalloc(sizeof(*data_ptr)); + if (!data_ptr) + return -ENOMEM; + + name = strdup(ref->metric_name); + if (!name) { + free(data_ptr); + return -ENOMEM; } - ret = hashmap__set(&ctx->ids, name, val_ptr, - (const void **)&old_key, (void **)&old_val); + + /* + * The jevents tool converts all metric expressions + * to lowercase, including metric references, hence + * we need to add lowercase name for metric, so it's + * properly found. + */ + for (p = name; *p; p++) + *p = tolower(*p); + + /* + * Intentionally passing just const char pointers, + * originally from 'struct pmu_event' object. + * We don't need to change them, so there's no + * need to create our own copy. + */ + data_ptr->ref.metric_name = ref->metric_name; + data_ptr->ref.metric_expr = ref->metric_expr; + data_ptr->ref.counted = false; + data_ptr->is_ref = true; + + ret = hashmap__set(&ctx->ids, name, data_ptr, + (const void **)&old_key, (void **)&old_data); + if (ret) + free(data_ptr); + + pr_debug2("adding ref metric %s: %s\n", + ref->metric_name, ref->metric_expr); + free(old_key); - free(old_val); + free(old_data); return ret; } -int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr) +int expr__get_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **data) +{ + return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1; +} + +int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **datap) { - double *data; + struct expr_id_data *data; - if (!hashmap__find(&ctx->ids, id, (void **)&data)) + if (expr__get_id(ctx, id, datap) || !*datap) { + pr_debug("%s not found\n", id); return -1; - *val_ptr = (data == NULL) ? 0.0 : *data; + } + + data = *datap; + + pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n", + data->is_ref, data->ref.counted, data->val, id); + + if (data->is_ref && !data->ref.counted) { + data->ref.counted = true; + pr_debug("processing metric: %s ENTRY\n", id); + if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) { + pr_debug("%s failed to count\n", id); + return -1; + } + pr_debug("processing metric: %s EXIT: %f\n", id, data->val); + } + return 0; } +void expr__del_id(struct expr_parse_ctx *ctx, const char *id) +{ + struct expr_id_data *old_val = NULL; + char *old_key = NULL; + + hashmap__delete(&ctx->ids, id, + (const void **)&old_key, (void **)&old_val); + free(old_key); + free(old_val); +} + void expr__ctx_init(struct expr_parse_ctx *ctx) { hashmap__init(&ctx->ids, key_hash, key_equal, NULL); @@ -88,6 +204,8 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, void *scanner; int ret; + pr_debug2("parsing metric: %s\n", expr); + ret = expr_lex_init_extra(&scanner_ctx, &scanner); if (ret) return ret; @@ -116,16 +234,10 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx, int expr__find_other(const char *expr, const char *one, struct expr_parse_ctx *ctx, int runtime) { - double *old_val = NULL; - char *old_key = NULL; int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); - if (one) { - hashmap__delete(&ctx->ids, one, - (const void **)&old_key, (void **)&old_val); - free(old_key); - free(old_val); - } + if (one) + expr__del_id(ctx, one); return ret; } diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 8a2c1074f90f..fc2b5e824a66 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -11,8 +11,30 @@ #include "util/hashmap.h" //#endif +struct metric_ref; + +struct expr_id { + char *id; + struct expr_id *parent; +}; + struct expr_parse_ctx { - struct hashmap ids; + struct hashmap ids; + struct expr_id *parent; +}; + +struct expr_id_data { + union { + double val; + struct { + const char *metric_name; + const char *metric_expr; + bool counted; + } ref; + struct expr_id *parent; + }; + + bool is_ref; }; struct expr_scanner_ctx { @@ -22,8 +44,14 @@ struct expr_scanner_ctx { void expr__ctx_init(struct expr_parse_ctx *ctx); void expr__ctx_clear(struct expr_parse_ctx *ctx); -int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val); -int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr); +void expr__del_id(struct expr_parse_ctx *ctx, const char *id); +int expr__add_id(struct expr_parse_ctx *ctx, const char *id); +int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); +int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref); +int expr__get_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **data); +int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **datap); int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime); int expr__find_other(const char *expr, const char *one, diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index f397bf8b1a48..13e5e3c75f56 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -100,6 +100,7 @@ symbol ({spec}|{sym})+ } } +d_ratio { return D_RATIO; } max { return MAX; } min { return MIN; } if { return IF; } @@ -110,6 +111,8 @@ else { return ELSE; } "|" { return '|'; } "^" { return '^'; } "&" { return '&'; } +"<" { return '<'; } +">" { return '>'; } "-" { return '-'; } "+" { return '+'; } "*" { return '*'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index bf3e898e3055..d34b370391c6 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -10,6 +10,14 @@ #include "smt.h" #include <string.h> +static double d_ratio(double val0, double val1) +{ + if (val1 == 0) { + return 0; + } + return val0 / val1; +} + %} %define api.pure full @@ -28,11 +36,12 @@ %token <num> NUMBER %token <str> ID %destructor { free ($$); } <str> -%token MIN MAX IF ELSE SMT_ON +%token MIN MAX IF ELSE SMT_ON D_RATIO %left MIN MAX IF %left '|' %left '^' %left '&' +%left '<' '>' %left '-' '+' %left '*' '/' '%' %left NEG NOT @@ -60,11 +69,12 @@ all_other: all_other other other: ID { - expr__add_id(ctx, $1, 0.0); + expr__add_id(ctx, $1); } | MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' - +| +'<' | '>' | D_RATIO all_expr: if_expr { *final_val = $1; } ; @@ -75,16 +85,22 @@ if_expr: ; expr: NUMBER - | ID { if (expr__get_id(ctx, $1, &$$)) { - pr_debug("%s not found\n", $1); + | ID { + struct expr_id_data *data; + + if (expr__resolve_id(ctx, $1, &data)) { + free($1); + YYABORT; + } + + $$ = data->val; free($1); - YYABORT; - } - free($1); } | expr '|' expr { $$ = (long)$1 | (long)$3; } | expr '&' expr { $$ = (long)$1 & (long)$3; } | expr '^' expr { $$ = (long)$1 ^ (long)$3; } + | expr '<' expr { $$ = $1 < $3; } + | expr '>' expr { $$ = $1 > $3; } | expr '+' expr { $$ = $1 + $3; } | expr '-' expr { $$ = $1 - $3; } | expr '*' expr { $$ = $1 * $3; } @@ -105,6 +121,7 @@ expr: NUMBER | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } | SMT_ON { $$ = smt_on() > 0; } + | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); } ; %% diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7a67d017d72c..9cf4efdcbbbd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -46,6 +46,7 @@ #include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" +#include "clockid.h" #include <linux/ctype.h> #include <internal/lib.h> @@ -891,8 +892,42 @@ static int write_auxtrace(struct feat_fd *ff, static int write_clockid(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - return do_write(ff, &ff->ph->env.clockid_res_ns, - sizeof(ff->ph->env.clockid_res_ns)); + return do_write(ff, &ff->ph->env.clock.clockid_res_ns, + sizeof(ff->ph->env.clock.clockid_res_ns)); +} + +static int write_clock_data(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + u64 *data64; + u32 data32; + int ret; + + /* version */ + data32 = 1; + + ret = do_write(ff, &data32, sizeof(data32)); + if (ret < 0) + return ret; + + /* clockid */ + data32 = ff->ph->env.clock.clockid; + + ret = do_write(ff, &data32, sizeof(data32)); + if (ret < 0) + return ret; + + /* TOD ref time */ + data64 = &ff->ph->env.clock.tod_ns; + + ret = do_write(ff, data64, sizeof(*data64)); + if (ret < 0) + return ret; + + /* clockid ref time */ + data64 = &ff->ph->env.clock.clockid_ns; + + return do_write(ff, data64, sizeof(*data64)); } static int write_dir_format(struct feat_fd *ff, @@ -1546,7 +1581,50 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) static void print_clockid(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n", - ff->ph->env.clockid_res_ns * 1000); + ff->ph->env.clock.clockid_res_ns * 1000); +} + +static void print_clock_data(struct feat_fd *ff, FILE *fp) +{ + struct timespec clockid_ns; + char tstr[64], date[64]; + struct timeval tod_ns; + clockid_t clockid; + struct tm ltime; + u64 ref; + + if (!ff->ph->env.clock.enabled) { + fprintf(fp, "# reference time disabled\n"); + return; + } + + /* Compute TOD time. */ + ref = ff->ph->env.clock.tod_ns; + tod_ns.tv_sec = ref / NSEC_PER_SEC; + ref -= tod_ns.tv_sec * NSEC_PER_SEC; + tod_ns.tv_usec = ref / NSEC_PER_USEC; + + /* Compute clockid time. */ + ref = ff->ph->env.clock.clockid_ns; + clockid_ns.tv_sec = ref / NSEC_PER_SEC; + ref -= clockid_ns.tv_sec * NSEC_PER_SEC; + clockid_ns.tv_nsec = ref; + + clockid = ff->ph->env.clock.clockid; + + if (localtime_r(&tod_ns.tv_sec, <ime) == NULL) + snprintf(tstr, sizeof(tstr), "<error>"); + else { + strftime(date, sizeof(date), "%F %T", <ime); + scnprintf(tstr, sizeof(tstr), "%s.%06d", + date, (int) tod_ns.tv_usec); + } + + fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid); + fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n", + tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec, + clockid_ns.tv_sec, clockid_ns.tv_nsec, + clockid_name(clockid)); } static void print_dir_format(struct feat_fd *ff, FILE *fp) @@ -1978,7 +2056,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, struct machine *machine; u16 cpumode; struct dso *dso; - enum dso_kernel_type dso_type; + enum dso_space_type dso_space; machine = perf_session__findnew_machine(session, bev->pid); if (!machine) @@ -1988,14 +2066,14 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, switch (cpumode) { case PERF_RECORD_MISC_KERNEL: - dso_type = DSO_TYPE_KERNEL; + dso_space = DSO_SPACE__KERNEL; break; case PERF_RECORD_MISC_GUEST_KERNEL: - dso_type = DSO_TYPE_GUEST_KERNEL; + dso_space = DSO_SPACE__KERNEL_GUEST; break; case PERF_RECORD_MISC_USER: case PERF_RECORD_MISC_GUEST_USER: - dso_type = DSO_TYPE_USER; + dso_space = DSO_SPACE__USER; break; default: goto out; @@ -2007,14 +2085,13 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, dso__set_build_id(dso, &bev->build_id); - if (dso_type != DSO_TYPE_USER) { + if (dso_space != DSO_SPACE__USER) { struct kmod_path m = { .name = NULL, }; if (!kmod_path__parse_name(&m, filename) && m.kmod) dso__set_module_info(dso, &m, machine); - else - dso->kernel = dso_type; + dso->kernel = dso_space; free(m.name); } @@ -2732,9 +2809,43 @@ out: static int process_clockid(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u64(ff, &ff->ph->env.clockid_res_ns)) + if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns)) + return -1; + + return 0; +} + +static int process_clock_data(struct feat_fd *ff, + void *_data __maybe_unused) +{ + u32 data32; + u64 data64; + + /* version */ + if (do_read_u32(ff, &data32)) + return -1; + + if (data32 != 1) + return -1; + + /* clockid */ + if (do_read_u32(ff, &data32)) + return -1; + + ff->ph->env.clock.clockid = data32; + + /* TOD ref time */ + if (do_read_u64(ff, &data64)) + return -1; + + ff->ph->env.clock.tod_ns = data64; + + /* clockid ref time */ + if (do_read_u64(ff, &data64)) return -1; + ff->ph->env.clock.clockid_ns = data64; + ff->ph->env.clock.enabled = true; return 0; } @@ -3008,6 +3119,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(BPF_BTF, bpf_btf, false), FEAT_OPR(COMPRESSED, compressed, false), FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), + FEAT_OPR(CLOCK_DATA, clock_data, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 650bd1c7a99b..2aca71763ecf 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -44,6 +44,7 @@ enum { HEADER_BPF_BTF, HEADER_COMPRESSED, HEADER_CPU_PMU_CAPS, + HEADER_CLOCK_DATA, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f8ccfd6be0ee..697513f35154 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -55,6 +55,7 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_TIP_PGD, INTEL_PT_STATE_FUP, INTEL_PT_STATE_FUP_NO_TIP, + INTEL_PT_STATE_RESAMPLE, }; static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) @@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) case INTEL_PT_STATE_ERR_RESYNC: case INTEL_PT_STATE_IN_SYNC: case INTEL_PT_STATE_TNT_CONT: + case INTEL_PT_STATE_RESAMPLE: return true; case INTEL_PT_STATE_TNT: case INTEL_PT_STATE_TIP: @@ -109,6 +111,9 @@ struct intel_pt_decoder { bool fixup_last_mtc; bool have_last_ip; bool in_psb; + bool hop; + bool hop_psb_fup; + bool leap; enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; @@ -235,6 +240,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->data = params->data; decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; + decoder->hop = params->quick >= 1; + decoder->leap = params->quick >= 2; decoder->flags = params->flags; @@ -275,6 +282,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + if (decoder->hop) + intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n"); + return decoder; } @@ -1164,6 +1174,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) return 0; if (err == -EAGAIN || intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; if (intel_pt_fup_event(decoder)) return 0; return -EAGAIN; @@ -1729,8 +1740,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - if (decoder->packet.count) + if (decoder->packet.count) { intel_pt_set_last_ip(decoder); + if (decoder->hop) { + /* Act on FUP at PSBEND */ + decoder->ip = decoder->last_ip; + decoder->hop_psb_fup = true; + } + } break; case INTEL_PT_MODE_TSX: @@ -1874,6 +1891,127 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) } } +static int intel_pt_resample(struct intel_pt_decoder *decoder) +{ + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; +} + +#define HOP_PROCESS 0 +#define HOP_IGNORE 1 +#define HOP_RETURN 2 +#define HOP_AGAIN 3 + +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder); + +/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ +static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) +{ + /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ + if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { + *err = intel_pt_scan_for_psb(decoder); + if (*err) + return HOP_RETURN; + } + + switch (decoder->packet.type) { + case INTEL_PT_TNT: + return HOP_IGNORE; + + case INTEL_PT_TIP_PGD: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + decoder->state.type |= INTEL_PT_TRACE_END; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return HOP_RETURN; + + case INTEL_PT_TIP: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + + case INTEL_PT_FUP: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + if (intel_pt_fup_event(decoder)) + return HOP_RETURN; + if (!decoder->branch_enable) + *no_tip = true; + if (*no_tip) { + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + } + *err = intel_pt_walk_fup_tip(decoder); + if (!*err) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + return HOP_RETURN; + + case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; + decoder->hop_psb_fup = false; + *err = intel_pt_walk_psbend(decoder); + if (*err == -EAGAIN) + return HOP_AGAIN; + if (*err) + return HOP_RETURN; + if (decoder->hop_psb_fup) { + decoder->hop_psb_fup = false; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + } + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; + return HOP_RETURN; + } + return HOP_IGNORE; + + case INTEL_PT_BAD: + case INTEL_PT_PAD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MTC: + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_PSBEND: + case INTEL_PT_CBR: + case INTEL_PT_TRACESTOP: + case INTEL_PT_PIP: + case INTEL_PT_OVF: + case INTEL_PT_MNT: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + default: + return HOP_PROCESS; + } +} + static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) { bool no_tip = false; @@ -1884,6 +2022,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + if (decoder->hop) { + switch (intel_pt_hop_trace(decoder, &no_tip, &err)) { + case HOP_IGNORE: + continue; + case HOP_RETURN: + return err; + case HOP_AGAIN: + goto next; + default: + break; + } + } + switch (decoder->packet.type) { case INTEL_PT_TNT: if (!decoder->packet.count) @@ -1913,6 +2064,12 @@ next: decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; decoder->state.type |= INTEL_PT_TRACE_BEGIN; + /* + * In hop mode, resample to get the to_ip as an + * "instruction" sample. + */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; return 0; } @@ -1942,17 +2099,13 @@ next: } if (decoder->set_fup_mwait) no_tip = true; + if (no_tip) + decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP; + else + decoder->pkt_state = INTEL_PT_STATE_FUP; err = intel_pt_walk_fup(decoder); - if (err != -EAGAIN) { - if (err) - return err; - if (no_tip) - decoder->pkt_state = - INTEL_PT_STATE_FUP_NO_TIP; - else - decoder->pkt_state = INTEL_PT_STATE_FUP; - return 0; - } + if (err != -EAGAIN) + return err; if (no_tip) { no_tip = false; break; @@ -1980,8 +2133,10 @@ next: * possibility of another CBR change that gets caught up * in the PSB+. */ - if (decoder->cbr != decoder->cbr_seen) + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; return 0; + } break; case INTEL_PT_PIP: @@ -2022,8 +2177,10 @@ next: case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); - if (decoder->cbr != decoder->cbr_seen) + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; return 0; + } break; case INTEL_PT_MODE_EXEC: @@ -2032,7 +2189,7 @@ next: case INTEL_PT_MODE_TSX: /* MODE_TSX need not be followed by FUP */ - if (!decoder->pge) { + if (!decoder->pge || decoder->in_psb) { intel_pt_update_in_tx(decoder); break; } @@ -2423,7 +2580,11 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) if (err) return err; - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + /* In hop mode, resample to get the to_ip as an "instruction" sample */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + else + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->overflow = false; decoder->state.from_ip = 0; @@ -2531,6 +2692,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); +leap: err = intel_pt_scan_for_psb(decoder); if (err) return err; @@ -2544,7 +2706,20 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (decoder->ip) { decoder->state.type = 0; /* Do not have a sample */ - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + /* + * In hop mode, resample to get the PSB FUP ip as an + * "instruction" sample. + */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + else + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + } else if (decoder->leap) { + /* + * In leap mode, only PSB+ is decoded, so keeping leaping to the + * next PSB until there is an ip. + */ + goto leap; } else { return intel_pt_sync_ip(decoder); } @@ -2599,19 +2774,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_walk_tip(decoder); break; case INTEL_PT_STATE_FUP: - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_fup(decoder); if (err == -EAGAIN) err = intel_pt_walk_fup_tip(decoder); - else if (!err) - decoder->pkt_state = INTEL_PT_STATE_FUP; break; case INTEL_PT_STATE_FUP_NO_TIP: - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_fup(decoder); if (err == -EAGAIN) err = intel_pt_walk_trace(decoder); break; + case INTEL_PT_STATE_RESAMPLE: + err = intel_pt_resample(decoder); + break; default: err = intel_pt_bug(decoder); break; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index e289e463d635..8645fc265481 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -250,6 +250,7 @@ struct intel_pt_params { uint32_t tsc_ctc_ratio_n; uint32_t tsc_ctc_ratio_d; enum intel_pt_param_flags flags; + unsigned int quick; }; struct intel_pt_decoder; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index cb3c1e569a2d..0af4e81c46e2 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -236,7 +236,7 @@ static void intel_pt_log_event(union perf_event *event) if (!intel_pt_enable_logging || !f) return; - perf_event__fprintf(event, f); + perf_event__fprintf(event, NULL, f); } static void intel_pt_dump_sample(struct perf_session *session, @@ -249,6 +249,24 @@ static void intel_pt_dump_sample(struct perf_session *session, intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); } +static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) +{ + struct perf_time_interval *range = pt->synth_opts.ptime_range; + int n = pt->synth_opts.range_num; + + if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return true; + + if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return false; + + /* perf_time__ranges_skip_sample does not work if time is zero */ + if (!tm) + tm = 1; + + return !n || !perf_time__ranges_skip_sample(range, n, tm); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -520,6 +538,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } +static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, + u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return; + + auxtrace_cache__remove(dso->auxtrace_cache, offset); +} + static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) { return ip >= pt->kernel_start ? @@ -1001,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; + params.quick = pt->synth_opts.quick; if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; @@ -1394,7 +1424,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; - sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + if (pt->synth_opts.quick) + sample.period = 1; + else + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { @@ -1852,6 +1885,15 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, char msg[MAX_AUXTRACE_ERROR_MSG]; int err; + if (pt->synth_opts.error_minus_flags) { + if (code == INTEL_PT_ERR_OVR && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW) + return 0; + if (code == INTEL_PT_ERR_LOST && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST) + return 0; + } + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, @@ -2566,10 +2608,6 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, return -EINVAL; } - intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", - cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, - &pt->tc)); - ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); if (ret <= 0) return ret; @@ -2594,6 +2632,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + if (!al->map || addr < al->map->start || addr >= al->map->end) { + if (!thread__find_map(thread, cpumode, addr, al)) + return -1; + } + + return 0; +} + +/* Invalidate all instruction cache entries that overlap the text poke */ +static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) +{ + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; + /* Assume text poke begins in a basic block no more than 4096 bytes */ + int cnt = 4096 + event->text_poke.new_len; + struct thread *thread = pt->unknown_thread; + struct addr_location al = { .map = NULL }; + struct machine *machine = pt->machine; + struct intel_pt_cache_entry *e; + u64 offset; + + if (!event->text_poke.new_len) + return 0; + + for (; cnt; cnt--, addr--) { + if (intel_pt_find_map(thread, cpumode, addr, &al)) { + if (addr < event->text_poke.addr) + return 0; + continue; + } + + if (!al.map->dso || !al.map->dso->auxtrace_cache) + continue; + + offset = al.map->map_ip(al.map, addr); + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (!e) + continue; + + if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { + /* + * No overlap. Working backwards there cannot be another + * basic block that overlaps the text poke if there is a + * branch instruction before the text poke address. + */ + if (e->branch != INTEL_PT_BR_NO_BRANCH) + return 0; + } else { + intel_pt_cache_invalidate(al.map->dso, machine, offset); + intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", + al.map->dso->long_name, addr); + } + } + + return 0; +} + static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -2662,9 +2761,14 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", - event->header.type, sample->cpu, sample->time, timestamp); - intel_pt_log_event(event); + if (!err && event->header.type == PERF_RECORD_TEXT_POKE) + err = intel_pt_text_poke(pt, event); + + if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) { + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); + } return err; } @@ -2913,8 +3017,15 @@ static int intel_pt_synth_events(struct intel_pt *pt, if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) + if (pt->synth_opts.last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } if (pt->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 32bb05e03fb2..0804308ef285 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -26,6 +26,7 @@ #include "jit.h" #include "jitdump.h" #include "genelf.h" +#include "thread.h" #include <linux/ctype.h> #include <linux/zalloc.h> @@ -749,6 +750,28 @@ jit_detect(char *mmap_name, pid_t pid) return 0; } +static void jit_add_pid(struct machine *machine, pid_t pid) +{ + struct thread *thread = machine__findnew_thread(machine, pid, pid); + + if (!thread) { + pr_err("%s: thread %d not found or created\n", __func__, pid); + return; + } + + thread->priv = (void *)1; +} + +static bool jit_has_pid(struct machine *machine, pid_t pid) +{ + struct thread *thread = machine__find_thread(machine, pid, pid); + + if (!thread) + return 0; + + return (bool)thread->priv; +} + int jit_process(struct perf_session *session, struct perf_data *output, @@ -764,8 +787,13 @@ jit_process(struct perf_session *session, /* * first, detect marker mmap (i.e., the jitdump mmap) */ - if (jit_detect(filename, pid)) + if (jit_detect(filename, pid)) { + // Strip //anon* mmaps if we processed a jitdump for this pid + if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0)) + return 1; + return 0; + } memset(&jd, 0, sizeof(jd)); @@ -784,6 +812,7 @@ jit_process(struct perf_session *session, ret = jit_inject(&jd, filename); if (!ret) { + jit_add_pid(machine, pid); *nbytes = jd.bytes_written; ret = 1; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d5384807372b..85587de027a5 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -703,7 +703,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__set_module_info(dso, m, machine); dso__set_long_name(dso, strdup(filename), true); - dso->kernel = DSO_TYPE_KERNEL; + dso->kernel = DSO_SPACE__KERNEL; } dso__get(dso); @@ -736,12 +736,6 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } -static int is_bpf_image(const char *name) -{ - return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 || - strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0; -} - static int machine__process_ksymbol_register(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) @@ -753,7 +747,7 @@ static int machine__process_ksymbol_register(struct machine *machine, struct dso *dso = dso__new(event->ksymbol.name); if (dso) { - dso->kernel = DSO_TYPE_KERNEL; + dso->kernel = DSO_SPACE__KERNEL; map = map__new2(0, dso); } @@ -762,6 +756,12 @@ static int machine__process_ksymbol_register(struct machine *machine, return -ENOMEM; } + if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) { + map->dso->binary_type = DSO_BINARY_TYPE__OOL; + map->dso->data.file_size = event->ksymbol.len; + dso__set_loaded(map->dso); + } + map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); @@ -808,6 +808,47 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused, return machine__process_ksymbol_register(machine, event, sample); } +int machine__process_text_poke(struct machine *machine, union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct map *map = maps__find(&machine->kmaps, event->text_poke.addr); + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + if (dump_trace) + perf_event__fprintf_text_poke(event, machine, stdout); + + if (!event->text_poke.new_len) + return 0; + + if (cpumode != PERF_RECORD_MISC_KERNEL) { + pr_debug("%s: unsupported cpumode - ignoring\n", __func__); + return 0; + } + + if (map && map->dso) { + u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len; + int ret; + + /* + * Kernel maps might be changed when loading symbols so loading + * must be done prior to using kernel maps. + */ + map__load(map); + ret = dso__data_write_cache_addr(map->dso, map, machine, + event->text_poke.addr, + new_bytes, + event->text_poke.new_len); + if (ret != event->text_poke.new_len) + pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n", + event->text_poke.addr); + } else { + pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n", + event->text_poke.addr); + } + + return 0; +} + static struct map *machine__addnew_module_map(struct machine *machine, u64 start, const char *filename) { @@ -924,14 +965,14 @@ static struct dso *machine__get_kernel(struct machine *machine) vmlinux_name = symbol_conf.vmlinux_name; kernel = machine__findnew_kernel(machine, vmlinux_name, - "[kernel]", DSO_TYPE_KERNEL); + "[kernel]", DSO_SPACE__KERNEL); } else { if (symbol_conf.default_guest_vmlinux_name) vmlinux_name = symbol_conf.default_guest_vmlinux_name; kernel = machine__findnew_kernel(machine, vmlinux_name, "[guest.kernel]", - DSO_TYPE_GUEST_KERNEL); + DSO_SPACE__KERNEL_GUEST); } if (kernel != NULL && (!kernel->has_build_id)) @@ -1559,7 +1600,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { struct map *map; - enum dso_kernel_type kernel_type; + enum dso_space_type dso_space; bool is_kernel_mmap; /* If we have maps from kcore then we do not need or want any others */ @@ -1567,9 +1608,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine, return 0; if (machine__is_host(machine)) - kernel_type = DSO_TYPE_KERNEL; + dso_space = DSO_SPACE__KERNEL; else - kernel_type = DSO_TYPE_GUEST_KERNEL; + dso_space = DSO_SPACE__KERNEL_GUEST; is_kernel_mmap = memcmp(event->mmap.filename, machine->mmap_name, @@ -1629,7 +1670,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (kernel == NULL) goto out_problem; - kernel->kernel = kernel_type; + kernel->kernel = dso_space; if (__machine__create_kernel_maps(machine, kernel) < 0) { dso__put(kernel); goto out_problem; @@ -1930,6 +1971,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_ksymbol(machine, event, sample); break; case PERF_RECORD_BPF_EVENT: ret = machine__process_bpf(machine, event, sample); break; + case PERF_RECORD_TEXT_POKE: + ret = machine__process_text_poke(machine, event, sample); break; default: ret = -1; break; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index fa1be9ea00fa..062c36a8433c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -138,6 +138,9 @@ int machine__process_mmap2_event(struct machine *machine, union perf_event *even int machine__process_ksymbol(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_text_poke(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 53d96611e6a6..cc0faf8f1321 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -267,6 +267,27 @@ bool __map__is_bpf_prog(const struct map *map) return name && (strstr(name, "bpf_prog_") == name); } +bool __map__is_bpf_image(const struct map *map) +{ + const char *name; + + if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) + return true; + + /* + * If PERF_RECORD_KSYMBOL is not included, the dso will not have + * type of DSO_BINARY_TYPE__BPF_IMAGE. In such cases, we can + * guess the type based on name. + */ + name = map->dso->short_name; + return name && is_bpf_image(name); +} + +bool __map__is_ool(const struct map *map) +{ + return map->dso && map->dso->binary_type == DSO_BINARY_TYPE__OOL; +} + bool map__has_symbols(const struct map *map) { return dso__has_symbols(map->dso); @@ -481,7 +502,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip) * kernel modules also have DSO_TYPE_USER in dso->kernel, * but all kernel modules are ET_REL, so won't get here. */ - if (map->dso->kernel == DSO_TYPE_USER) + if (map->dso->kernel == DSO_SPACE__USER) return rip + map->dso->text_offset; return map->unmap_ip(map, rip) - map->reloc; @@ -511,7 +532,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) * kernel modules also have DSO_TYPE_USER in dso->kernel, * but all kernel modules are ET_REL, so won't get here. */ - if (map->dso->kernel == DSO_TYPE_USER) + if (map->dso->kernel == DSO_SPACE__USER) return map->unmap_ip(map, ip - map->dso->text_offset); return ip + map->reloc; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 067036e8970c..c2f5d28fe73a 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -147,11 +147,14 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, bool __map__is_kernel(const struct map *map); bool __map__is_extra_kernel_map(const struct map *map); bool __map__is_bpf_prog(const struct map *map); +bool __map__is_bpf_image(const struct map *map); +bool __map__is_ool(const struct map *map); static inline bool __map__is_kmodule(const struct map *map) { return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) && - !__map__is_bpf_prog(map); + !__map__is_bpf_prog(map) && !__map__is_ool(map) && + !__map__is_bpf_image(map); } bool map__has_symbols(const struct map *map); @@ -163,4 +166,9 @@ static inline bool is_entry_trampoline(const char *name) return !strcmp(name, ENTRY_TRAMPOLINE_NAME); } +static inline bool is_bpf_image(const char *name) +{ + return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 || + strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0; +} #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 9e21aa767e41..ab5030fcfed4 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -24,6 +24,7 @@ #include <subcmd/parse-options.h> #include <api/fs/fs.h> #include "util.h" +#include <asm/bug.h> struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -76,23 +77,79 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused, return &me->nd; } +static void metric_event_delete(struct rblist *rblist __maybe_unused, + struct rb_node *rb_node) +{ + struct metric_event *me = container_of(rb_node, struct metric_event, nd); + struct metric_expr *expr, *tmp; + + list_for_each_entry_safe(expr, tmp, &me->head, nd) { + free(expr->metric_refs); + free(expr->metric_events); + free(expr); + } + + free(me); +} + static void metricgroup__rblist_init(struct rblist *metric_events) { rblist__init(metric_events); metric_events->node_cmp = metric_event_cmp; metric_events->node_new = metric_event_new; + metric_events->node_delete = metric_event_delete; } -struct egroup { +void metricgroup__rblist_exit(struct rblist *metric_events) +{ + rblist__exit(metric_events); +} + +/* + * A node in the list of referenced metrics. metric_expr + * is held as a convenience to avoid a search through the + * metric list. + */ +struct metric_ref_node { + const char *metric_name; + const char *metric_expr; + struct list_head list; +}; + +struct metric { struct list_head nd; struct expr_parse_ctx pctx; const char *metric_name; const char *metric_expr; const char *metric_unit; + struct list_head metric_refs; + int metric_refs_cnt; int runtime; bool has_constraint; }; +#define RECURSION_ID_MAX 1000 + +struct expr_ids { + struct expr_id id[RECURSION_ID_MAX]; + int cnt; +}; + +static struct expr_id *expr_ids__alloc(struct expr_ids *ids) +{ + if (ids->cnt >= RECURSION_ID_MAX) + return NULL; + return &ids->id[ids->cnt++]; +} + +static void expr_ids__exit(struct expr_ids *ids) +{ + int i; + + for (i = 0; i < ids->cnt; i++) + free(ids->id[i].id); +} + /** * Find a group of events in perf_evlist that correpond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as @@ -119,7 +176,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, unsigned long *evlist_used) { struct evsel *ev, *current_leader = NULL; - double *val_ptr; + struct expr_id_data *val_ptr; int i = 0, matched_events = 0, events_to_match; const int idnum = (int)hashmap__size(&pctx->ids); @@ -206,7 +263,7 @@ static int metricgroup__setup_events(struct list_head *groups, struct metric_expr *expr; int i = 0; int ret = 0; - struct egroup *eg; + struct metric *m; struct evsel *evsel, *tmp; unsigned long *evlist_used; @@ -214,22 +271,23 @@ static int metricgroup__setup_events(struct list_head *groups, if (!evlist_used) return -ENOMEM; - list_for_each_entry (eg, groups, nd) { + list_for_each_entry (m, groups, nd) { struct evsel **metric_events; + struct metric_ref *metric_refs = NULL; metric_events = calloc(sizeof(void *), - hashmap__size(&eg->pctx.ids) + 1); + hashmap__size(&m->pctx.ids) + 1); if (!metric_events) { ret = -ENOMEM; break; } - evsel = find_evsel_group(perf_evlist, &eg->pctx, + evsel = find_evsel_group(perf_evlist, &m->pctx, metric_no_merge, - eg->has_constraint, metric_events, + m->has_constraint, metric_events, evlist_used); if (!evsel) { pr_debug("Cannot resolve %s: %s\n", - eg->metric_name, eg->metric_expr); + m->metric_name, m->metric_expr); free(metric_events); continue; } @@ -247,11 +305,42 @@ static int metricgroup__setup_events(struct list_head *groups, free(metric_events); break; } - expr->metric_expr = eg->metric_expr; - expr->metric_name = eg->metric_name; - expr->metric_unit = eg->metric_unit; + + /* + * Collect and store collected nested expressions + * for metric processing. + */ + if (m->metric_refs_cnt) { + struct metric_ref_node *ref; + + metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1)); + if (!metric_refs) { + ret = -ENOMEM; + free(metric_events); + free(expr); + break; + } + + i = 0; + list_for_each_entry(ref, &m->metric_refs, list) { + /* + * Intentionally passing just const char pointers, + * originally from 'struct pmu_event' object. + * We don't need to change them, so there's no + * need to create our own copy. + */ + metric_refs[i].metric_name = ref->metric_name; + metric_refs[i].metric_expr = ref->metric_expr; + i++; + } + }; + + expr->metric_refs = metric_refs; + expr->metric_expr = m->metric_expr; + expr->metric_name = m->metric_name; + expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; - expr->runtime = eg->runtime; + expr->runtime = m->runtime; list_add(&expr->nd, &me->head); } @@ -443,6 +532,9 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, continue; strlist__add(me->metrics, s); } + + if (!raw) + free(s); } free(omg); } @@ -552,123 +644,349 @@ int __weak arch_get_runtimeparam(void) return 1; } -static int __metricgroup__add_metric(struct list_head *group_list, - struct pmu_event *pe, - bool metric_no_group, - int runtime) +static int __add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + int runtime, + struct metric **mp, + struct expr_id *parent, + struct expr_ids *ids) { - struct egroup *eg; + struct metric_ref_node *ref; + struct metric *m; - eg = malloc(sizeof(*eg)); - if (!eg) - return -ENOMEM; + if (*mp == NULL) { + /* + * We got in here for the parent group, + * allocate it and put it on the list. + */ + m = zalloc(sizeof(*m)); + if (!m) + return -ENOMEM; + + expr__ctx_init(&m->pctx); + m->metric_name = pe->metric_name; + m->metric_expr = pe->metric_expr; + m->metric_unit = pe->unit; + m->runtime = runtime; + m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + INIT_LIST_HEAD(&m->metric_refs); + m->metric_refs_cnt = 0; + + parent = expr_ids__alloc(ids); + if (!parent) { + free(m); + return -EINVAL; + } - expr__ctx_init(&eg->pctx); - eg->metric_name = pe->metric_name; - eg->metric_expr = pe->metric_expr; - eg->metric_unit = pe->unit; - eg->runtime = runtime; - eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + parent->id = strdup(pe->metric_name); + if (!parent->id) { + free(m); + return -ENOMEM; + } + *mp = m; + } else { + /* + * We got here for the referenced metric, via the + * recursive metricgroup__add_metric call, add + * it to the parent group. + */ + m = *mp; + + ref = malloc(sizeof(*ref)); + if (!ref) + return -ENOMEM; + + /* + * Intentionally passing just const char pointers, + * from 'pe' object, so they never go away. We don't + * need to change them, so there's no need to create + * our own copy. + */ + ref->metric_name = pe->metric_name; + ref->metric_expr = pe->metric_expr; - if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) { - expr__ctx_clear(&eg->pctx); - free(eg); + list_add(&ref->list, &m->metric_refs); + m->metric_refs_cnt++; + } + + /* Force all found IDs in metric to have us as parent ID. */ + WARN_ON_ONCE(!parent); + m->pctx.parent = parent; + + /* + * For both the parent and referenced metrics, we parse + * all the metric's IDs and add it to the parent context. + */ + if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) { + if (m->metric_refs_cnt == 0) { + expr__ctx_clear(&m->pctx); + free(m); + *mp = NULL; + } return -EINVAL; } - if (list_empty(group_list)) - list_add(&eg->nd, group_list); + /* + * We add new group only in the 'parent' call, + * so bail out for referenced metric case. + */ + if (m->metric_refs_cnt) + return 0; + + if (list_empty(metric_list)) + list_add(&m->nd, metric_list); else { struct list_head *pos; /* Place the largest groups at the front. */ - list_for_each_prev(pos, group_list) { - struct egroup *old = list_entry(pos, struct egroup, nd); + list_for_each_prev(pos, metric_list) { + struct metric *old = list_entry(pos, struct metric, nd); - if (hashmap__size(&eg->pctx.ids) <= + if (hashmap__size(&m->pctx.ids) <= hashmap__size(&old->pctx.ids)) break; } - list_add(&eg->nd, pos); + list_add(&m->nd, pos); } return 0; } +#define map_for_each_event(__pe, __idx, __map) \ + for (__idx = 0, __pe = &__map->table[__idx]; \ + __pe->name || __pe->metric_group || __pe->metric_name; \ + __pe = &__map->table[++__idx]) + +#define map_for_each_metric(__pe, __idx, __map, __metric) \ + map_for_each_event(__pe, __idx, __map) \ + if (__pe->metric_expr && \ + (match_metric(__pe->metric_group, __metric) || \ + match_metric(__pe->metric_name, __metric))) + +static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map) +{ + struct pmu_event *pe; + int i; + + map_for_each_event(pe, i, map) { + if (match_metric(pe->metric_name, metric)) + return pe; + } + + return NULL; +} + +static int recursion_check(struct metric *m, const char *id, struct expr_id **parent, + struct expr_ids *ids) +{ + struct expr_id_data *data; + struct expr_id *p; + int ret; + + /* + * We get the parent referenced by 'id' argument and + * traverse through all the parent object IDs to check + * if we already processed 'id', if we did, it's recursion + * and we fail. + */ + ret = expr__get_id(&m->pctx, id, &data); + if (ret) + return ret; + + p = data->parent; + + while (p->parent) { + if (!strcmp(p->id, id)) { + pr_err("failed: recursion detected for %s\n", id); + return -1; + } + p = p->parent; + } + + /* + * If we are over the limit of static entris, the metric + * is too difficult/nested to process, fail as well. + */ + p = expr_ids__alloc(ids); + if (!p) { + pr_err("failed: too many nested metrics\n"); + return -EINVAL; + } + + p->id = strdup(id); + p->parent = data->parent; + *parent = p; + + return p->id ? 0 : -ENOMEM; +} + +static int add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + struct metric **mp, + struct expr_id *parent, + struct expr_ids *ids); + +static int __resolve_metric(struct metric *m, + bool metric_no_group, + struct list_head *metric_list, + struct pmu_events_map *map, + struct expr_ids *ids) +{ + struct hashmap_entry *cur; + size_t bkt; + bool all; + int ret; + + /* + * Iterate all the parsed IDs and if there's metric, + * add it to the context. + */ + do { + all = true; + hashmap__for_each_entry((&m->pctx.ids), cur, bkt) { + struct expr_id *parent; + struct pmu_event *pe; + + pe = find_metric(cur->key, map); + if (!pe) + continue; + + ret = recursion_check(m, cur->key, &parent, ids); + if (ret) + return ret; + + all = false; + /* The metric key itself needs to go out.. */ + expr__del_id(&m->pctx, cur->key); + + /* ... and it gets resolved to the parent context. */ + ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids); + if (ret) + return ret; + + /* + * We added new metric to hashmap, so we need + * to break the iteration and start over. + */ + break; + } + } while (!all); + + return 0; +} + +static int resolve_metric(bool metric_no_group, + struct list_head *metric_list, + struct pmu_events_map *map, + struct expr_ids *ids) +{ + struct metric *m; + int err; + + list_for_each_entry(m, metric_list, nd) { + err = __resolve_metric(m, metric_no_group, metric_list, map, ids); + if (err) + return err; + } + return 0; +} + +static int add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + struct metric **m, + struct expr_id *parent, + struct expr_ids *ids) +{ + struct metric *orig = *m; + int ret = 0; + + pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); + + if (!strstr(pe->metric_expr, "?")) { + ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids); + } else { + int j, count; + + count = arch_get_runtimeparam(); + + /* This loop is added to create multiple + * events depend on count value and add + * those events to metric_list. + */ + + for (j = 0; j < count && !ret; j++, *m = orig) + ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids); + } + + return ret; +} + static int metricgroup__add_metric(const char *metric, bool metric_no_group, struct strbuf *events, - struct list_head *group_list) + struct list_head *metric_list, + struct pmu_events_map *map) { - struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct expr_ids ids = { .cnt = 0, }; struct pmu_event *pe; - struct egroup *eg; + struct metric *m; + LIST_HEAD(list); int i, ret; bool has_match = false; - if (!map) - return 0; + map_for_each_metric(pe, i, map, metric) { + has_match = true; + m = NULL; - for (i = 0; ; i++) { - pe = &map->table[i]; + ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids); + if (ret) + goto out; - if (!pe->name && !pe->metric_group && !pe->metric_name) { - /* End of pmu events. */ - if (!has_match) - return -EINVAL; - break; - } - if (!pe->metric_expr) - continue; - if (match_metric(pe->metric_group, metric) || - match_metric(pe->metric_name, metric)) { - has_match = true; - pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); - - if (!strstr(pe->metric_expr, "?")) { - ret = __metricgroup__add_metric(group_list, - pe, - metric_no_group, - 1); - if (ret) - return ret; - } else { - int j, count; - - count = arch_get_runtimeparam(); - - /* This loop is added to create multiple - * events depend on count value and add - * those events to group_list. - */ + /* + * Process any possible referenced metrics + * included in the expression. + */ + ret = resolve_metric(metric_no_group, + &list, map, &ids); + if (ret) + goto out; + } - for (j = 0; j < count; j++) { - ret = __metricgroup__add_metric( - group_list, pe, - metric_no_group, j); - if (ret) - return ret; - } - } - } + /* End of pmu events. */ + if (!has_match) { + ret = -EINVAL; + goto out; } - list_for_each_entry(eg, group_list, nd) { + + list_for_each_entry(m, &list, nd) { if (events->len > 0) strbuf_addf(events, ","); - if (eg->has_constraint) { + if (m->has_constraint) { metricgroup__add_metric_non_group(events, - &eg->pctx); + &m->pctx); } else { metricgroup__add_metric_weak_group(events, - &eg->pctx); + &m->pctx); } } - return 0; + +out: + /* + * add to metric_list so that they can be released + * even if it's failed + */ + list_splice(&list, metric_list); + expr_ids__exit(&ids); + return ret; } static int metricgroup__add_metric_list(const char *list, bool metric_no_group, struct strbuf *events, - struct list_head *group_list) + struct list_head *metric_list, + struct pmu_events_map *map) { char *llist, *nlist, *p; int ret = -EINVAL; @@ -683,7 +1001,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, while ((p = strsep(&llist, ",")) != NULL) { ret = metricgroup__add_metric(p, metric_no_group, events, - group_list); + metric_list, map); if (ret == -EINVAL) { fprintf(stderr, "Cannot find metric or group `%s'\n", p); @@ -698,50 +1016,88 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, return ret; } -static void metricgroup__free_egroups(struct list_head *group_list) +static void metric__free_refs(struct metric *metric) { - struct egroup *eg, *egtmp; + struct metric_ref_node *ref, *tmp; - list_for_each_entry_safe (eg, egtmp, group_list, nd) { - expr__ctx_clear(&eg->pctx); - list_del_init(&eg->nd); - free(eg); + list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) { + list_del(&ref->list); + free(ref); } } -int metricgroup__parse_groups(const struct option *opt, - const char *str, - bool metric_no_group, - bool metric_no_merge, - struct rblist *metric_events) +static void metricgroup__free_metrics(struct list_head *metric_list) +{ + struct metric *m, *tmp; + + list_for_each_entry_safe (m, tmp, metric_list, nd) { + metric__free_refs(m); + expr__ctx_clear(&m->pctx); + list_del_init(&m->nd); + free(m); + } +} + +static int parse_groups(struct evlist *perf_evlist, const char *str, + bool metric_no_group, + bool metric_no_merge, + struct perf_pmu *fake_pmu, + struct rblist *metric_events, + struct pmu_events_map *map) { struct parse_events_error parse_error; - struct evlist *perf_evlist = *(struct evlist **)opt->value; struct strbuf extra_events; - LIST_HEAD(group_list); + LIST_HEAD(metric_list); int ret; if (metric_events->nr_entries == 0) metricgroup__rblist_init(metric_events); ret = metricgroup__add_metric_list(str, metric_no_group, - &extra_events, &group_list); + &extra_events, &metric_list, map); if (ret) - return ret; + goto out; pr_debug("adding %s\n", extra_events.buf); bzero(&parse_error, sizeof(parse_error)); - ret = parse_events(perf_evlist, extra_events.buf, &parse_error); + ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu); if (ret) { parse_events_print_error(&parse_error, extra_events.buf); goto out; } - strbuf_release(&extra_events); - ret = metricgroup__setup_events(&group_list, metric_no_merge, + ret = metricgroup__setup_events(&metric_list, metric_no_merge, perf_evlist, metric_events); out: - metricgroup__free_egroups(&group_list); + metricgroup__free_metrics(&metric_list); + strbuf_release(&extra_events); return ret; } +int metricgroup__parse_groups(const struct option *opt, + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events) +{ + struct evlist *perf_evlist = *(struct evlist **)opt->value; + struct pmu_events_map *map = perf_pmu__find_map(NULL); + + if (!map) + return 0; + + return parse_groups(perf_evlist, str, metric_no_group, + metric_no_merge, NULL, metric_events, map); +} + +int metricgroup__parse_groups_test(struct evlist *evlist, + struct pmu_events_map *map, + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events) +{ + return parse_groups(evlist, str, metric_no_group, + metric_no_merge, &perf_pmu__fake, metric_events, map); +} + bool metricgroup__has_metric(const char *metric) { struct pmu_events_map *map = perf_pmu__find_map(NULL); diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 287850bcdeca..62623a39cbec 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -7,8 +7,10 @@ #include <stdbool.h> struct evsel; +struct evlist; struct option; struct rblist; +struct pmu_events_map; struct metric_event { struct rb_node nd; @@ -16,12 +18,18 @@ struct metric_event { struct list_head head; /* list of metric_expr */ }; +struct metric_ref { + const char *metric_name; + const char *metric_expr; +}; + struct metric_expr { struct list_head nd; const char *metric_expr; const char *metric_name; const char *metric_unit; struct evsel **metric_events; + struct metric_ref *metric_refs; int runtime; }; @@ -34,8 +42,16 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_merge, struct rblist *metric_events); +int metricgroup__parse_groups_test(struct evlist *evlist, + struct pmu_events_map *map, + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events); + void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); int arch_get_runtimeparam(void); +void metricgroup__rblist_exit(struct rblist *metric_events); #endif diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3decbb203846..667cbca1547a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -37,6 +37,7 @@ #include "util/evsel_config.h" #include "util/event.h" #include "util/pfm.h" +#include "perf.h" #define MAX_NAME_LEN 100 @@ -410,7 +411,7 @@ static int add_event_tool(struct list_head *list, int *idx, return -ENOMEM; evsel->tool_event = tool_event; if (tool_event == PERF_TOOL_DURATION_TIME) - evsel->unit = strdup("ns"); + evsel->unit = "ns"; return 0; } @@ -767,8 +768,8 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_state->error->help = strdup("(add -v to see detail)"); - parse_state->error->str = strdup(errbuf); + parse_events__handle_error(parse_state->error, 0, + strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -784,36 +785,38 @@ parse_events_config_bpf(struct parse_events_state *parse_state, return 0; list_for_each_entry(term, head_config, list) { - char errbuf[BUFSIZ]; int err; if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - snprintf(errbuf, sizeof(errbuf), - "Invalid config term for BPF object"); - errbuf[BUFSIZ - 1] = '\0'; - - parse_state->error->idx = term->err_term; - parse_state->error->str = strdup(errbuf); + parse_events__handle_error(parse_state->error, term->err_term, + strdup("Invalid config term for BPF object"), + NULL); return -EINVAL; } err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos); if (err) { + char errbuf[BUFSIZ]; + int idx; + bpf__strerror_config_obj(obj, term, parse_state->evlist, &error_pos, err, errbuf, sizeof(errbuf)); - parse_state->error->help = strdup( + + if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) + idx = term->err_val; + else + idx = term->err_term + error_pos; + + parse_events__handle_error(parse_state->error, idx, + strdup(errbuf), + strdup( "Hint:\tValid config terms:\n" " \tmap:[<arraymap>].value<indices>=[value]\n" " \tmap:[<eventmap>].event<indices>=[event]\n" "\n" " \twhere <indices> is something like [0,3...5] or [all]\n" -" \t(add -v to see detail)"); - parse_state->error->str = strdup(errbuf); - if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) - parse_state->error->idx = term->err_val; - else - parse_state->error->idx = term->err_term + error_pos; +" \t(add -v to see detail)")); return err; } } @@ -877,8 +880,8 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, -err, errbuf, sizeof(errbuf)); - parse_state->error->help = strdup("(add -v to see detail)"); - parse_state->error->str = strdup(errbuf); + parse_events__handle_error(parse_state->error, 0, + strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -1450,7 +1453,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, fprintf(stderr, "' that may result in non-fatal errors\n"); } - pmu = perf_pmu__find(name); + pmu = parse_state->fake_pmu ?: perf_pmu__find(name); if (!pmu) { char *err_str; @@ -1483,7 +1486,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } } - if (perf_pmu__check_alias(pmu, head_config, &info)) + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info)) return -EINVAL; if (verbose > 1) { @@ -1516,7 +1519,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) return -ENOMEM; - if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { + if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { struct evsel_config_term *pos, *tmp; list_for_each_entry_safe(pos, tmp, &config_terms, list) { @@ -1531,19 +1534,23 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, true, get_config_name(head_config), pmu, &config_terms, auto_merge_stats, NULL); - if (evsel) { - evsel->unit = info.unit; - evsel->scale = info.scale; - evsel->per_pkg = info.per_pkg; - evsel->snapshot = info.snapshot; - evsel->metric_expr = info.metric_expr; - evsel->metric_name = info.metric_name; - evsel->pmu_name = name ? strdup(name) : NULL; - evsel->use_uncore_alias = use_uncore_alias; - evsel->percore = config_term_percore(&evsel->config_terms); - } + if (!evsel) + return -ENOMEM; - return evsel ? 0 : -ENOMEM; + evsel->pmu_name = name ? strdup(name) : NULL; + evsel->use_uncore_alias = use_uncore_alias; + evsel->percore = config_term_percore(&evsel->config_terms); + + if (parse_state->fake_pmu) + return 0; + + evsel->unit = info.unit; + evsel->scale = info.scale; + evsel->per_pkg = info.per_pkg; + evsel->snapshot = info.snapshot; + evsel->metric_expr = info.metric_expr; + evsel->metric_name = info.metric_name; + return 0; } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, @@ -1792,6 +1799,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, if (*str == 'u') { if (!exclude) exclude = eu = ek = eh = 1; + if (!exclude_GH && !perf_guest) + eG = 1; eu = 0; } else if (*str == 'k') { if (!exclude) @@ -2017,6 +2026,32 @@ err: perf_pmu__parse_cleanup(); } +/* + * This function injects special term in + * perf_pmu_events_list so the test code + * can check on this functionality. + */ +int perf_pmu__test_parse_init(void) +{ + struct perf_pmu_event_symbol *list; + + list = malloc(sizeof(*list) * 1); + if (!list) + return -ENOMEM; + + list->type = PMU_EVENT_SYMBOL; + list->symbol = strdup("read"); + + if (!list->symbol) { + free(list); + return -ENOMEM; + } + + perf_pmu_events_list = list; + perf_pmu_events_list_num = 1; + return 0; +} + enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name) { @@ -2078,6 +2113,8 @@ int parse_events_terms(struct list_head *terms, const char *str) int ret; ret = parse_events__scanner(str, &parse_state); + perf_pmu__parse_cleanup(); + if (!ret) { list_splice(parse_state.terms, terms); zfree(&parse_state.terms); @@ -2088,15 +2125,16 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -int parse_events(struct evlist *evlist, const char *str, - struct parse_events_error *err) +int __parse_events(struct evlist *evlist, const char *str, + struct parse_events_error *err, struct perf_pmu *fake_pmu) { struct parse_events_state parse_state = { - .list = LIST_HEAD_INIT(parse_state.list), - .idx = evlist->core.nr_entries, - .error = err, - .evlist = evlist, - .stoken = PE_START_EVENTS, + .list = LIST_HEAD_INIT(parse_state.list), + .idx = evlist->core.nr_entries, + .error = err, + .evlist = evlist, + .stoken = PE_START_EVENTS, + .fake_pmu = fake_pmu, }; int ret; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1fe23a2f9b36..00cde7d2e30c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -33,8 +33,15 @@ const char *event_type(int type); int parse_events_option(const struct option *opt, const char *str, int unset); int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); -int parse_events(struct evlist *evlist, const char *str, - struct parse_events_error *error); +int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error, + struct perf_pmu *fake_pmu); + +static inline int parse_events(struct evlist *evlist, const char *str, + struct parse_events_error *err) +{ + return __parse_events(evlist, str, err, NULL); +} + int parse_events_terms(struct list_head *terms, const char *str); int parse_filter(const struct option *opt, const char *str, int unset); int exclude_perf(const struct option *opt, const char *arg, int unset); @@ -127,9 +134,10 @@ struct parse_events_state { int idx; int nr_groups; struct parse_events_error *error; - struct evlist *evlist; + struct evlist *evlist; struct list_head *terms; int stoken; + struct perf_pmu *fake_pmu; }; void parse_events__handle_error(struct parse_events_error *err, int idx, @@ -253,4 +261,6 @@ static inline bool is_sdt_event(char *str __maybe_unused) } #endif /* HAVE_LIBELF_SUPPORT */ +int perf_pmu__test_parse_init(void); + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 002802e17059..3ca5fd2829ca 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -41,14 +41,6 @@ static int value(yyscan_t scanner, int base) return __value(yylval, text, base, PE_VALUE); } -static int raw(yyscan_t scanner) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - return __value(yylval, text + 1, 16, PE_RAW); -} - static int str(yyscan_t scanner, int token) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -72,6 +64,17 @@ static int str(yyscan_t scanner, int token) return token; } +static int raw(yyscan_t scanner) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL) + return str(scanner, PE_NAME); + + return __value(yylval, text + 1, 16, PE_RAW); +} + static bool isbpf_suffix(char *text) { int len = strlen(text); @@ -129,12 +132,16 @@ do { \ yyless(0); \ } while (0) -static int pmu_str_check(yyscan_t scanner) +static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state) { YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); yylval->str = strdup(text); + + if (parse_state->fake_pmu) + return PE_PMU_EVENT_FAKE; + switch (perf_pmu__parse_check(text)) { case PMU_EVENT_SYMBOL_PREFIX: return PE_PMU_EVENT_PRE; @@ -289,6 +296,7 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } r{num_raw_hex} { return raw(yyscanner); } +r0x{num_raw_hex} { return raw(yyscanner); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } @@ -376,7 +384,7 @@ r{num_raw_hex} { return raw(yyscanner); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } {bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } {bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } -{name} { return pmu_str_check(yyscanner); } +{name} { return pmu_str_check(yyscanner, _parse_state); } {name_tag} { return str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } - { return '-'; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index acef87d9af58..645bf4f1859f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list, %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %type <num> PE_VALUE @@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list, %type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME -%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type <str> PE_DRV_CFG_TERM %destructor { free ($$); } <str> %type <term> event_term @@ -356,6 +356,43 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc YYABORT; $$ = list; } +| +PE_PMU_EVENT_FAKE sep_dc +{ + struct list_head *list; + int err; + + list = alloc_list(); + if (!list) + YYABORT; + + err = parse_events_add_pmu(_parse_state, list, $1, NULL, false, false); + free($1); + if (err < 0) { + free(list); + YYABORT; + } + $$ = list; +} +| +PE_PMU_EVENT_FAKE opt_pmu_config +{ + struct list_head *list; + int err; + + list = alloc_list(); + if (!list) + YYABORT; + + err = parse_events_add_pmu(_parse_state, list, $1, $2, false, false); + free($1); + parse_events_terms__delete($2); + if (err < 0) { + free(list); + YYABORT; + } + $$ = list; +} value_sym: PE_VALUE_SYM_HW @@ -474,7 +511,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc list = alloc_list(); ABORT_ON(!list); err = parse_events_add_breakpoint(list, &parse_state->idx, - (void *) $2, $6, $4); + (void *)(uintptr_t) $2, $6, $4); free($6); if (err) { free(list); @@ -491,7 +528,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc list = alloc_list(); ABORT_ON(!list); if (parse_events_add_breakpoint(list, &parse_state->idx, - (void *) $2, NULL, $4)) { + (void *)(uintptr_t) $2, NULL, $4)) { free(list); YYABORT; } @@ -507,7 +544,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc list = alloc_list(); ABORT_ON(!list); err = parse_events_add_breakpoint(list, &parse_state->idx, - (void *) $2, $4, 0); + (void *)(uintptr_t) $2, $4, 0); free($4); if (err) { free(list); @@ -524,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc list = alloc_list(); ABORT_ON(!list); if (parse_events_add_breakpoint(list, &parse_state->idx, - (void *) $2, NULL, 0)) { + (void *)(uintptr_t) $2, NULL, 0)) { free(list); YYABORT; } diff --git a/tools/perf/util/parse-sublevel-options.c b/tools/perf/util/parse-sublevel-options.c new file mode 100644 index 000000000000..a841d17ffd57 --- /dev/null +++ b/tools/perf/util/parse-sublevel-options.c @@ -0,0 +1,70 @@ +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <stdio.h> + +#include "util/debug.h" +#include "util/parse-sublevel-options.h" + +static int parse_one_sublevel_option(const char *str, + struct sublevel_option *opts) +{ + struct sublevel_option *opt = opts; + char *vstr, *s = strdup(str); + int v = 1; + + if (!s) { + pr_err("no memory\n"); + return -1; + } + + vstr = strchr(s, '='); + if (vstr) + *vstr++ = 0; + + while (opt->name) { + if (!strcmp(s, opt->name)) + break; + opt++; + } + + if (!opt->name) { + pr_err("Unknown option name '%s'\n", s); + free(s); + return -1; + } + + if (vstr) + v = atoi(vstr); + + *opt->value_ptr = v; + free(s); + return 0; +} + +/* parse options like --foo a=<n>,b,c... */ +int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts) +{ + char *s = strdup(str); + char *p = NULL; + int ret; + + if (!s) { + pr_err("no memory\n"); + return -1; + } + + p = strtok(s, ","); + while (p) { + ret = parse_one_sublevel_option(p, opts); + if (ret) { + free(s); + return ret; + } + + p = strtok(NULL, ","); + } + + free(s); + return 0; +} diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h new file mode 100644 index 000000000000..9b9efcc2aaad --- /dev/null +++ b/tools/perf/util/parse-sublevel-options.h @@ -0,0 +1,11 @@ +#ifndef _PERF_PARSE_SUBLEVEL_OPTIONS_H +#define _PERF_PARSE_SUBLEVEL_OPTIONS_H + +struct sublevel_option { + const char *name; + int *value_ptr; +}; + +int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts); + +#endif
\ No newline at end of file diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 1337965673d7..3840d02f0f7b 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -93,6 +93,11 @@ static void perf_probe_context_switch(struct evsel *evsel) evsel->core.attr.context_switch = 1; } +static void perf_probe_text_poke(struct evsel *evsel) +{ + evsel->core.attr.text_poke = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -108,6 +113,11 @@ bool perf_can_record_switch_events(void) return perf_probe_api(perf_probe_context_switch); } +bool perf_can_record_text_poke_events(void) +{ + return perf_probe_api(perf_probe_text_poke); +} + bool perf_can_record_cpu_wide(void) { struct perf_event_attr attr = { diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h index 706c3c6426e2..d5506a983a94 100644 --- a/tools/perf/util/perf_api_probe.h +++ b/tools/perf/util/perf_api_probe.h @@ -9,6 +9,7 @@ bool perf_can_aux_sample(void); bool perf_can_comm_exec(void); bool perf_can_record_cpu_wide(void); bool perf_can_record_switch_events(void); +bool perf_can_record_text_poke_events(void); bool perf_can_sample_identifier(void); #endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index b94fa07f5d32..e67a227c0ce7 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -147,6 +147,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(aux_watermark, p_unsigned); PRINT_ATTRf(sample_max_stack, p_unsigned); PRINT_ATTRf(aux_sample_size, p_unsigned); + PRINT_ATTRf(text_poke, p_unsigned); return ret; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 93fe72a9dc0b..d41caeb35cf6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -26,6 +26,8 @@ #include "strbuf.h" #include "fncache.h" +struct perf_pmu perf_pmu__fake; + struct perf_pmu_format { char *name; int value; @@ -272,7 +274,7 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old, } /* Delete an alias entry. */ -static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +void perf_pmu_free_alias(struct perf_pmu_alias *newalias) { zfree(&newalias->name); zfree(&newalias->desc); @@ -1352,6 +1354,17 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) set_bit(b, bits); } +void perf_pmu__del_formats(struct list_head *formats) +{ + struct perf_pmu_format *fmt, *tmp; + + list_for_each_entry_safe(fmt, tmp, formats, list) { + list_del(&fmt->list); + free(fmt->name); + free(fmt); + } +} + static int sub_non_neg(int a, int b) { if (b > a) @@ -1400,6 +1413,7 @@ struct sevent { char *pmu; char *metric_expr; char *metric_name; + int is_cpu; }; static int cmp_sevent(const void *a, const void *b) @@ -1416,6 +1430,11 @@ static int cmp_sevent(const void *a, const void *b) if (n) return n; } + + /* Order CPU core events to be first */ + if (as->is_cpu != bs->is_cpu) + return bs->is_cpu - as->is_cpu; + return strcmp(as->name, bs->name); } @@ -1475,7 +1494,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = !strcmp(pmu->name, "cpu"); + bool is_cpu = is_pmu_core(pmu->name); if (alias->deprecated && !deprecated) continue; @@ -1507,6 +1526,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, aliases[j].pmu = pmu->name; aliases[j].metric_expr = alias->metric_expr; aliases[j].metric_name = alias->metric_name; + aliases[j].is_cpu = is_cpu; j++; } if (pmu->selectable && diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index f971d9aa4570..a64e9c9ce731 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -43,6 +43,8 @@ struct perf_pmu { struct list_head list; /* ELEM */ }; +extern struct perf_pmu perf_pmu__fake; + struct perf_pmu_info { const char *unit; const char *metric_expr; @@ -92,6 +94,7 @@ int perf_pmu__new_format(struct list_head *list, char *name, int config, unsigned long *bits); void perf_pmu__set_format(unsigned long *bits, long from, long to); int perf_pmu__format_parse(char *dir, struct list_head *head); +void perf_pmu__del_formats(struct list_head *formats); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); @@ -111,6 +114,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); +void perf_pmu_free_alias(struct perf_pmu_alias *alias); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index df713a5d1e26..99d36ac77c08 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -375,9 +375,13 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, /* Find the address of given function */ map__for_each_symbol_by_name(map, pp->function, sym) { - if (uprobes) + if (uprobes) { address = sym->start; - else + if (sym->type == STT_GNU_IFUNC) + pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n" + "Consider identifying the final function used at run time and set the probe directly on that.\n", + pp->function); + } else address = map->unmap_ip(map, sym->start) - map->reloc; break; } @@ -2968,6 +2972,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, for (j = 0; j < num_matched_functions; j++) { sym = syms[j]; + /* There can be duplicated symbols in the map */ + for (i = 0; i < j; i++) + if (sym->start == syms[i]->start) { + pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n", + sym->name, sym->start); + break; + } + if (i != j) + continue; + tev = (*tevs) + ret; tp = &tev->point; if (ret == num_matched_functions) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 55924255c535..659024342e9a 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1408,6 +1408,9 @@ static int fill_empty_trace_arg(struct perf_probe_event *pev, char *type; int i, j, ret; + if (!ntevs) + return -ENOENT; + for (i = 0; i < pev->nargs; i++) { type = NULL; for (j = 0; j < ntevs; j++) { @@ -1464,7 +1467,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, if (ret >= 0 && tf.pf.skip_empty_arg) ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs); - if (ret < 0) { + if (ret < 0 || tf.ntevs == 0) { for (i = 0; i < tf.ntevs; i++) clear_probe_trace_event(&tf.tevs[i]); zfree(tevs); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index a4cc11592f6b..ea9aa1d7cf50 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,6 +2,7 @@ #include "debug.h" #include "evlist.h" #include "evsel.h" +#include "evsel_config.h" #include "parse-events.h" #include <errno.h> #include <limits.h> @@ -33,11 +34,24 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl return leader; } +static u64 evsel__config_term_mask(struct evsel *evsel) +{ + struct evsel_config_term *term; + struct list_head *config_terms = &evsel->config_terms; + u64 term_types = 0; + + list_for_each_entry(term, config_terms, list) { + term_types |= 1 << term->type; + } + return term_types; +} + static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel->leader; struct evsel *read_sampler; + u64 term_types, freq_mask; if (!leader->sample_read) return; @@ -47,16 +61,20 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev if (evsel == read_sampler) return; + term_types = evsel__config_term_mask(evsel); /* - * Disable sampling for all group members other than the leader in - * case the leader 'leads' the sampling, except when the leader is an - * AUX area event, in which case the 2nd event in the group is the one - * that 'leads' the sampling. + * Disable sampling for all group members except those with explicit + * config terms or the leader. In the case of an AUX area event, the 2nd + * event in the group is the one that 'leads' the sampling. */ - attr->freq = 0; - attr->sample_freq = 0; - attr->sample_period = 0; - attr->write_backward = 0; + freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD); + if ((term_types & freq_mask) == 0) { + attr->freq = 0; + attr->sample_freq = 0; + attr->sample_period = 0; + } + if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0) + attr->write_backward = 0; /* * We don't get a sample for slave events, we make them when delivering diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 39d1de4b2a36..03678ff25539 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -48,6 +48,7 @@ struct record_opts { bool sample_id; bool no_bpf_event; bool kcore; + bool text_poke; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; @@ -61,7 +62,7 @@ struct record_opts { const char *auxtrace_snapshot_opts; const char *auxtrace_sample_opts; bool sample_transaction; - unsigned initial_delay; + int initial_delay; bool use_clockid; clockid_t clockid; u64 clockid_res_ns; @@ -70,6 +71,8 @@ struct record_opts { int mmap_flush; unsigned int comp_level; unsigned int nr_threads_synthesize; + int ctl_fd; + int ctl_fd_ack; }; extern const char * const *record_usage; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1a157e84a04a..7a5f03764702 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -87,7 +87,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, session->decomp_last = decomp; } - pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size); + pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); return 0; } @@ -115,12 +115,12 @@ static int perf_session__open(struct perf_session *session) if (perf_header__has_feat(&session->header, HEADER_STAT)) return 0; - if (!perf_evlist__valid_sample_type(session->evlist)) { + if (!evlist__valid_sample_type(session->evlist)) { pr_err("non matching sample_type\n"); return -1; } - if (!perf_evlist__valid_sample_id_all(session->evlist)) { + if (!evlist__valid_sample_id_all(session->evlist)) { pr_err("non matching sample_id_all\n"); return -1; } @@ -252,10 +252,10 @@ struct perf_session *perf_session__new(struct perf_data *data, /* * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is - * processed, so perf_evlist__sample_id_all is not meaningful here. + * processed, so evlist__sample_id_all is not meaningful here. */ if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps && - tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { + tool->ordered_events && !evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; } @@ -490,6 +490,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->ksymbol = perf_event__process_ksymbol; if (tool->bpf == NULL) tool->bpf = perf_event__process_bpf; + if (tool->text_poke == NULL) + tool->text_poke = perf_event__process_text_poke; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -659,6 +661,24 @@ static void perf_event__switch_swap(union perf_event *event, bool sample_id_all) swap_sample_id_all(event, &event->context_switch + 1); } +static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all) +{ + event->text_poke.addr = bswap_64(event->text_poke.addr); + event->text_poke.old_len = bswap_16(event->text_poke.old_len); + event->text_poke.new_len = bswap_16(event->text_poke.new_len); + + if (sample_id_all) { + size_t len = sizeof(event->text_poke.old_len) + + sizeof(event->text_poke.new_len) + + event->text_poke.old_len + + event->text_poke.new_len; + void *data = &event->text_poke.old_len; + + data += PERF_ALIGN(len, sizeof(u64)); + swap_sample_id_all(event, data); + } +} + static void perf_event__throttle_swap(union perf_event *event, bool sample_id_all) { @@ -932,6 +952,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, + [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1160,10 +1181,10 @@ static void perf_evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = __perf_evlist__combined_sample_type(evlist); + u64 sample_type = __evlist__combined_sample_type(evlist); if (event->header.type != PERF_RECORD_SAMPLE && - !perf_evlist__sample_id_all(evlist)) { + !evlist__sample_id_all(evlist)) { fputs("-1 -1 ", stdout); return; } @@ -1474,6 +1495,8 @@ static int machines__deliver_event(struct machines *machines, return tool->ksymbol(tool, event, sample, machine); case PERF_RECORD_BPF_EVENT: return tool->bpf(tool, event, sample, machine); + case PERF_RECORD_TEXT_POKE: + return tool->text_poke(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -1655,7 +1678,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, return -1; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(session->evlist)); + event_swap(event, evlist__sample_id_all(session->evlist)); out_parse_sample: @@ -1704,7 +1727,7 @@ static s64 perf_session__process_event(struct perf_session *session, int ret; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(evlist)); + event_swap(event, evlist__sample_id_all(evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 57d0706e1330..493ec372fdec 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -117,7 +117,7 @@ static void aggr_printout(struct perf_stat_config *config, cpu_map__id_to_die(id), config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id), config->csv_sep); - } else { + } else if (id > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, evsel__cpus(evsel)->map[id], diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a7c13a88ecb9..924b54d15d54 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -517,7 +517,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio); } static void print_l1_icache_misses(struct perf_stat_config *config, @@ -538,7 +538,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio); } static void print_dtlb_cache_misses(struct perf_stat_config *config, @@ -558,7 +558,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio); } static void print_itlb_cache_misses(struct perf_stat_config *config, @@ -578,7 +578,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio); } static void print_ll_cache_misses(struct perf_stat_config *config, @@ -598,7 +598,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio); } /* @@ -730,25 +730,17 @@ static void print_smi_cost(struct perf_stat_config *config, out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); } -static void generic_metric(struct perf_stat_config *config, - const char *metric_expr, - struct evsel **metric_events, - char *name, - const char *metric_name, - const char *metric_unit, - int runtime, - int cpu, - struct perf_stat_output_ctx *out, - struct runtime_stat *st) +static int prepare_metric(struct evsel **metric_events, + struct metric_ref *metric_refs, + struct expr_parse_ctx *pctx, + int cpu, + struct runtime_stat *st) { - print_metric_t print_metric = out->print_metric; - struct expr_parse_ctx pctx; - double ratio, scale; - int i; - void *ctxp = out->ctx; + double scale; char *n, *pn; + int i, j, ret; - expr__ctx_init(&pctx); + expr__ctx_init(pctx); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; @@ -771,7 +763,7 @@ static void generic_metric(struct perf_stat_config *config, n = strdup(metric_events[i]->name); if (!n) - return; + return -ENOMEM; /* * This display code with --no-merge adds [cpu] postfixes. * These are not supported by the parser. Remove everything @@ -782,11 +774,42 @@ static void generic_metric(struct perf_stat_config *config, *pn = 0; if (metric_total) - expr__add_id(&pctx, n, metric_total); + expr__add_id_val(pctx, n, metric_total); else - expr__add_id(&pctx, n, avg_stats(stats)*scale); + expr__add_id_val(pctx, n, avg_stats(stats)*scale); } + for (j = 0; metric_refs && metric_refs[j].metric_name; j++) { + ret = expr__add_ref(pctx, &metric_refs[j]); + if (ret) + return ret; + } + + return i; +} + +static void generic_metric(struct perf_stat_config *config, + const char *metric_expr, + struct evsel **metric_events, + struct metric_ref *metric_refs, + char *name, + const char *metric_name, + const char *metric_unit, + int runtime, + int cpu, + struct perf_stat_output_ctx *out, + struct runtime_stat *st) +{ + print_metric_t print_metric = out->print_metric; + struct expr_parse_ctx pctx; + double ratio, scale; + int i; + void *ctxp = out->ctx; + + i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st); + if (i < 0) + return; + if (!metric_events[i]) { if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { char *unit; @@ -827,6 +850,22 @@ static void generic_metric(struct perf_stat_config *config, expr__ctx_clear(&pctx); } +double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) +{ + struct expr_parse_ctx pctx; + double ratio = 0.0; + + if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) + goto out; + + if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) + ratio = 0.0; + +out: + expr__ctx_clear(&pctx); + return ratio; +} + void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, double avg, int cpu, @@ -881,7 +920,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) print_l1_dcache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I | @@ -891,7 +930,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) print_l1_icache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB | @@ -901,7 +940,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB | @@ -911,7 +950,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) print_itlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL | @@ -921,7 +960,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) print_ll_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); @@ -1035,8 +1074,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, NULL, 1, cpu, out, st); + generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, + evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; @@ -1064,7 +1103,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (num++ > 0) out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, - evsel->name, mexp->metric_name, + mexp->metric_refs, evsel->name, mexp->metric_name, mexp->metric_unit, mexp->runtime, cpu, out, st); } } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f75ae679eb28..aa3bed48511b 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -113,10 +113,11 @@ struct perf_stat_config { bool summary; bool metric_no_group; bool metric_no_merge; + bool stop_read_counter; FILE *output; unsigned int interval; unsigned int timeout; - unsigned int initial_delay; + int initial_delay; unsigned int unit_width; unsigned int metric_only_len; int times; @@ -133,6 +134,8 @@ struct perf_stat_config { struct perf_cpu_map *cpus_aggr_map; u64 *walltime_run; struct rblist metric_events; + int ctl_fd; + int ctl_fd_ack; }; void perf_stat__set_big_num(int set); @@ -230,4 +233,7 @@ perf_evlist__print_counters(struct evlist *evlist, struct target *_target, struct timespec *ts, int argc, const char **argv); + +struct metric_expr; +double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); #endif diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 5e43054bffea..8cc4b0059fb0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -789,7 +789,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, if (ss->opdshdr.sh_type != SHT_PROGBITS) ss->opdsec = NULL; - if (dso->kernel == DSO_TYPE_USER) + if (dso->kernel == DSO_SPACE__USER) ss->adjust_symbols = true; else ss->adjust_symbols = elf__needs_adjust_symbols(ehdr); @@ -872,7 +872,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * kallsyms and identity maps. Overwrite it to * map to the kernel dso. */ - if (*remap_kernel && dso->kernel) { + if (*remap_kernel && dso->kernel && !kmodule) { *remap_kernel = false; map->start = shdr->sh_addr + ref_reloc(kmap); map->end = map->start + shdr->sh_size; @@ -1068,7 +1068,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, * Initial kernel and module mappings do not map to the dso. * Flag the fixups. */ - if (dso->kernel || kmodule) { + if (dso->kernel) { remap_kernel = true; adjust_kernel_syms = dso->adjust_symbols; } @@ -1130,7 +1130,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, (sym.st_value & 1)) --sym.st_value; - if (dso->kernel || kmodule) { + if (dso->kernel) { if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map, section_name, adjust_kernel_syms, kmodule, &remap_kernel)) goto out_elf_end; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5ddf84dcbae7..5151a8c0b791 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -663,9 +663,12 @@ static bool symbol__is_idle(const char *name) "exit_idle", "mwait_idle", "mwait_idle_with_hints", + "mwait_idle_with_hints.constprop.0", "poll_idle", "ppc64_runlatch_off", "pseries_dedicated_idle_sleep", + "psw_idle", + "psw_idle_exit", NULL }; int i; @@ -806,7 +809,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, if (strcmp(curr_map->dso->short_name, module)) { if (curr_map != initial_map && - dso->kernel == DSO_TYPE_GUEST_KERNEL && + dso->kernel == DSO_SPACE__KERNEL_GUEST && machine__is_default_guest(machine)) { /* * We assume all symbols of a module are @@ -863,7 +866,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, goto add_symbol; } - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) snprintf(dso_name, sizeof(dso_name), "[guest.kernel].%d", kernel_range++); @@ -907,7 +910,7 @@ discard_symbol: } if (curr_map != initial_map && - dso->kernel == DSO_TYPE_GUEST_KERNEL && + dso->kernel == DSO_SPACE__KERNEL_GUEST && machine__is_default_guest(kmaps->machine)) { dso__set_loaded(curr_map->dso); } @@ -1385,7 +1388,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, * Set the data type and long name so that kcore can be read via * dso__data_read_addr(). */ - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE; else dso->binary_type = DSO_BINARY_TYPE__KCORE; @@ -1449,7 +1452,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, symbols__fixup_end(&dso->symbols); symbols__fixup_duplicate(&dso->symbols); - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; else dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; @@ -1535,17 +1538,17 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: - return !kmod && dso->kernel == DSO_TYPE_USER; + return !kmod && dso->kernel == DSO_SPACE__USER; case DSO_BINARY_TYPE__KALLSYMS: case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__KCORE: - return dso->kernel == DSO_TYPE_KERNEL; + return dso->kernel == DSO_SPACE__KERNEL; case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__GUEST_KCORE: - return dso->kernel == DSO_TYPE_GUEST_KERNEL; + return dso->kernel == DSO_SPACE__KERNEL_GUEST; case DSO_BINARY_TYPE__GUEST_KMODULE: case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: @@ -1563,6 +1566,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; @@ -1647,9 +1651,9 @@ int dso__load(struct dso *dso, struct map *map) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; if (dso->kernel && !kmod) { - if (dso->kernel == DSO_TYPE_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL) ret = dso__load_kernel_sym(dso, map); - else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + else if (dso->kernel == DSO_SPACE__KERNEL_GUEST) ret = dso__load_guest_kernel_sym(dso, map); machine = map__kmaps(map)->machine; @@ -1879,7 +1883,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, else symbol__join_symfs(symfs_vmlinux, vmlinux); - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else symtab_type = DSO_BINARY_TYPE__VMLINUX; @@ -1891,7 +1895,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, symsrc__destroy(&ss); if (err > 0) { - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else dso->binary_type = DSO_BINARY_TYPE__VMLINUX; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 3fb67bd31e4a..bbbc0dcd461f 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -57,7 +57,8 @@ struct perf_tool { throttle, unthrottle, ksymbol, - bpf; + bpf, + text_poke; event_attr_op attr; event_attr_op event_update; diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c index d2202392ffdb..48dd2b018c47 100644 --- a/tools/perf/util/zstd.c +++ b/tools/perf/util/zstd.c @@ -99,7 +99,7 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size while (input.pos < input.size) { ret = ZSTD_decompressStream(data->dstream, &output, &input); if (ZSTD_isError(ret)) { - pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n", + pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n", src_size, output.size, dst_size, ZSTD_getErrorName(ret)); break; } diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index ebd3e1a1c28e..a249c50ebf55 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -7,6 +7,8 @@ include ../../scripts/Makefile.include +.NOTPARALLEL: + all: acpidbg acpidump ec clean: acpidbg_clean acpidump_clean ec_clean install: acpidbg_install acpidump_install ec_install diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index dd38c2b2e1b4..11c5046dce16 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -110,7 +110,7 @@ u32 gbl_table_count = 0; * * RETURN: Status; Converted from errno. * - * DESCRIPTION: Get last errno and conver it to acpi_status. + * DESCRIPTION: Get last errno and convert it to acpi_status. * *****************************************************************************/ diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 6ed82fba5aaa..7b2164e07057 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -99,13 +99,17 @@ static unsigned long string_to_frequency(const char *str) continue; if (str[cp] == '.') { - while (power > -1 && isdigit(str[cp+1])) - cp++, power--; + while (power > -1 && isdigit(str[cp+1])) { + cp++; + power--; + } } - if (power >= -1) /* not enough => pad */ + if (power >= -1) { /* not enough => pad */ pad = power + 1; - else /* to much => strip */ - pad = 0, cp += power + 1; + } else { /* too much => strip */ + pad = 0; + cp += power + 1; + } /* check bounds */ if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1) return 0; diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 46ff97e909c6..1bc36a1db14f 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -171,7 +171,7 @@ class SystemValues: tracefuncs = { 'sys_sync': {}, 'ksys_sync': {}, - '__pm_notifier_call_chain': {}, + 'pm_notifier_call_chain_robust': {}, 'pm_prepare_console': {}, 'pm_notifier_call_chain': {}, 'freeze_processes': {}, diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 9f4b190f1d74..cd089a505859 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.5"; +static const char *version_str = "v1.6"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -545,20 +545,23 @@ static void set_cpu_present_cpu_mask(void) } } -int get_core_count(int pkg_id, int die_id) +int get_max_punit_core_id(int pkg_id, int die_id) { - int cnt = 0; + int max_id = 0; + int i; - if (pkg_id < MAX_PACKAGE_COUNT && die_id < MAX_DIE_PER_PACKAGE) { - int i; + for (i = 0; i < topo_max_cpus; ++i) + { + if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask)) + continue; - for (i = 0; i < sizeof(long long) * 8; ++i) { - if (core_mask[pkg_id][die_id] & (1ULL << i)) - cnt++; - } + if (cpu_map[i].pkg_id == pkg_id && + cpu_map[i].die_id == die_id && + cpu_map[i].punit_cpu_core > max_id) + max_id = cpu_map[i].punit_cpu_core; } - return cnt; + return max_id; } int get_cpu_count(int pkg_id, int die_id) diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index a7f4337c5777..1d7ecb54352e 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -396,7 +396,7 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info) { struct isst_pkg_ctdp_level_info ctdp_level; struct isst_pkg_ctdp pkg_dev; - int i, ret, core_cnt, max; + int i, ret, max_punit_core, max_mask_index; unsigned int req, resp; ret = isst_get_ctdp_levels(cpu, &pkg_dev); @@ -421,10 +421,10 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info) pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask); - core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu)); - max = core_cnt > 32 ? 2 : 1; + max_punit_core = get_max_punit_core_id(get_physical_package_id(cpu), get_physical_die_id(cpu)); + max_mask_index = max_punit_core > 32 ? 2 : 1; - for (i = 0; i < max; ++i) { + for (i = 0; i < max_mask_index; ++i) { unsigned long long mask; int count; diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 094ba4589a9c..29715e9c2e06 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -170,7 +170,7 @@ struct isst_pkg_ctdp { extern int get_topo_max_cpus(void); extern int get_cpu_count(int pkg_id, int die_id); -extern int get_core_count(int pkg_id, int die_id); +extern int get_max_punit_core_id(int pkg_id, int die_id); /* Common interfaces */ FILE *get_output_file(void); diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README index a12d295a09d8..4f048789b260 100644 --- a/tools/testing/ktest/examples/README +++ b/tools/testing/ktest/examples/README @@ -11,7 +11,7 @@ crosstests.conf - this config shows an example of testing a git repo against lots of different architectures. It only does build tests, but makes it easy to compile test different archs. You can download the arch cross compilers from: - http://kernel.org/pub/tools/crosstool/files/bin/x86_64/ + https://kernel.org/pub/tools/crosstool/files/bin/x86_64/ test.conf - A generic example of a config. This is based on an actual config used to perform real testing. diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf index 6907f32590b2..3b15e85f26bd 100644 --- a/tools/testing/ktest/examples/crosstests.conf +++ b/tools/testing/ktest/examples/crosstests.conf @@ -3,7 +3,7 @@ # # In this config, it is expected that the tool chains from: # -# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/ +# https://kernel.org/pub/tools/crosstool/files/bin/x86_64/ # # running on a x86_64 system have been downloaded and installed into: # diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 7570e36d636d..cb16d2aac51c 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -11,6 +11,7 @@ use File::Path qw(mkpath); use File::Copy qw(cp); use FileHandle; use FindBin; +use IO::Handle; my $VERSION = "0.2"; @@ -81,6 +82,8 @@ my %default = ( "IGNORE_UNUSED" => 0, ); +my $test_log_start = 0; + my $ktest_config = "ktest.conf"; my $version; my $have_version = 0; @@ -98,6 +101,7 @@ my $final_post_ktest; my $pre_ktest; my $post_ktest; my $pre_test; +my $pre_test_die; my $post_test; my $pre_build; my $post_build; @@ -223,6 +227,7 @@ my $dirname = $FindBin::Bin; my $mailto; my $mailer; my $mail_path; +my $mail_max_size; my $mail_command; my $email_on_error; my $email_when_finished; @@ -259,6 +264,7 @@ my %option_map = ( "MAILTO" => \$mailto, "MAILER" => \$mailer, "MAIL_PATH" => \$mail_path, + "MAIL_MAX_SIZE" => \$mail_max_size, "MAIL_COMMAND" => \$mail_command, "EMAIL_ON_ERROR" => \$email_on_error, "EMAIL_WHEN_FINISHED" => \$email_when_finished, @@ -273,6 +279,7 @@ my %option_map = ( "PRE_KTEST" => \$pre_ktest, "POST_KTEST" => \$post_ktest, "PRE_TEST" => \$pre_test, + "PRE_TEST_DIE" => \$pre_test_die, "POST_TEST" => \$post_test, "BUILD_TYPE" => \$build_type, "BUILD_OPTIONS" => \$build_options, @@ -507,9 +514,7 @@ EOF sub _logit { if (defined($opt{"LOG_FILE"})) { - open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; - print OUT @_; - close(OUT); + print LOG @_; } } @@ -909,6 +914,12 @@ sub process_expression { } } + if ($val =~ s/^\s*NOT\s+(.*)//) { + my $express = $1; + my $ret = process_expression($name, $express); + return !$ret; + } + if ($val =~ /^\s*0\s*$/) { return 0; } elsif ($val =~ /^\s*\d+\s*$/) { @@ -1485,8 +1496,32 @@ sub dodie { if ($email_on_error) { my $name = get_test_name; + my $log_file; + + if (defined($opt{"LOG_FILE"})) { + my $whence = 0; # beginning of file + my $pos = $test_log_start; + + if (defined($mail_max_size)) { + my $log_size = tell LOG; + $log_size -= $test_log_start; + if ($log_size > $mail_max_size) { + $whence = 2; # end of file + $pos = - $mail_max_size; + } + } + $log_file = "$tmpdir/log"; + open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)"; + open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n"; + seek(L, $pos, $whence); + while (<L>) { + print O; + } + close O; + close L; + } send_email("KTEST: critical failure for test $i [$name]", - "Your test started at $script_start_time has failed with:\n@_\n"); + "Your test started at $script_start_time has failed with:\n@_\n", $log_file); } if ($monitor_cnt) { @@ -1508,7 +1543,7 @@ sub create_pty { my $TIOCGPTN = 0x80045430; sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or - dodie "Cant open /dev/ptmx"; + dodie "Can't open /dev/ptmx"; # unlockpt() $tmp = pack("i", 0); @@ -1772,8 +1807,6 @@ sub run_command { (fail "unable to exec $command" and return 0); if (defined($opt{"LOG_FILE"})) { - open(LOG, ">>$opt{LOG_FILE}") or - dodie "failed to write to log"; $dolog = 1; } @@ -1821,7 +1854,6 @@ sub run_command { } close(CMD); - close(LOG) if ($dolog); close(RD) if ($dord); $end_time = time; @@ -3188,6 +3220,8 @@ sub config_bisect_end { doprint "***************************************\n\n"; } +my $pass = 1; + sub run_config_bisect { my ($good, $bad, $last_result) = @_; my $reset = ""; @@ -3210,11 +3244,15 @@ sub run_config_bisect { $ret = run_config_bisect_test $config_bisect_type; if ($ret) { - doprint "NEW GOOD CONFIG\n"; + doprint "NEW GOOD CONFIG ($pass)\n"; + system("cp $output_config $tmpdir/good_config.tmp.$pass"); + $pass++; # Return 3 for good config return 3; } else { - doprint "NEW BAD CONFIG\n"; + doprint "NEW BAD CONFIG ($pass)\n"; + system("cp $output_config $tmpdir/bad_config.tmp.$pass"); + $pass++; # Return 4 for bad config return 4; } @@ -4077,8 +4115,12 @@ if ($#new_configs >= 0) { } } -if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) { - unlink $opt{"LOG_FILE"}; +if (defined($opt{"LOG_FILE"})) { + if ($opt{"CLEAR_LOG"}) { + unlink $opt{"LOG_FILE"}; + } + open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; + LOG->autoflush(1); } doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; @@ -4171,7 +4213,7 @@ sub find_mailer { } sub do_send_mail { - my ($subject, $message) = @_; + my ($subject, $message, $file) = @_; if (!defined($mail_path)) { # find the mailer @@ -4181,16 +4223,30 @@ sub do_send_mail { } } + my $header_file = "$tmpdir/header"; + open (HEAD, ">$header_file") or die "Can not create $header_file\n"; + print HEAD "To: $mailto\n"; + print HEAD "Subject: $subject\n\n"; + print HEAD "$message\n"; + close HEAD; + if (!defined($mail_command)) { if ($mailer eq "mail" || $mailer eq "mailx") { - $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'"; + $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO"; } elsif ($mailer eq "sendmail" ) { - $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO"; + $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -t \$MAILTO"; } else { die "\nYour mailer: $mailer is not supported.\n"; } } + if (defined($file)) { + $mail_command =~ s/\$BODY_FILE/$file/g; + } else { + $mail_command =~ s/\$BODY_FILE//g; + } + + $mail_command =~ s/\$HEADER_FILE/$header_file/g; $mail_command =~ s/\$MAILER/$mailer/g; $mail_command =~ s/\$MAIL_PATH/$mail_path/g; $mail_command =~ s/\$MAILTO/$mailto/g; @@ -4338,10 +4394,19 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { } doprint "\n\n"; + + if (defined($opt{"LOG_FILE"})) { + $test_log_start = tell(LOG); + } + doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n"; if (defined($pre_test)) { - run_command $pre_test; + my $ret = run_command $pre_test; + if (!$ret && defined($pre_test_die) && + $pre_test_die) { + dodie "failed to pre_test\n"; + } } unlink $dmesg; @@ -4441,4 +4506,10 @@ if ($email_when_finished) { send_email("KTEST: Your test has finished!", "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); } + +if (defined($opt{"LOG_FILE"})) { + print "\n See $opt{LOG_FILE} for the record of results.\n\n"; + close LOG; +} + exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index 27666b8007ed..5e7d1d729752 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -442,6 +442,19 @@ # Users can cancel the test by Ctrl^C # (default 0) #EMAIL_WHEN_CANCELED = 1 +# +# If a test ends with an error and EMAIL_ON_ERROR is set as well +# as a LOG_FILE is defined, then the log of the failing test will +# be included in the email that is sent. +# It is possible that the log may be very large, in which case, +# only the last amount of the log should be sent. To limit how +# much of the log is sent, set MAIL_MAX_SIZE. This will be the +# size in bytes of the last portion of the log of the failed +# test file. That is, if this is set to 100000, then only the +# last 100 thousand bytes of the log file will be included in +# the email. +# (default undef) +#MAIL_MAX_SIZE = 1000000 # Start a test setup. If you leave this off, all options # will be default and the test will run once. @@ -557,6 +570,11 @@ # default (undefined) #PRE_TEST = ${SSH} reboot_to_special_kernel +# To kill the entire test if PRE_TEST is defined but fails set this +# to 1. +# (default 0) +#PRE_TEST_DIE = 1 + # If there is a command you want to run after the individual test case # completes, then you can set this option. # diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c index 7e5d979e73cb..fb342a8c98d3 100644 --- a/tools/testing/nvdimm/dax-dev.c +++ b/tools/testing/nvdimm/dax-dev.c @@ -9,12 +9,19 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { - struct resource *res = &dev_dax->region->res; - phys_addr_t addr; + int i; - addr = pgoff * PAGE_SIZE + res->start; - if (addr >= res->start && addr <= res->end) { - if (addr + size - 1 <= res->end) { + for (i = 0; i < dev_dax->nr_range; i++) { + struct dev_dax_range *dax_range = &dev_dax->ranges[i]; + struct range *range = &dax_range->range; + unsigned long long pgoff_end; + phys_addr_t addr; + + pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1; + if (pgoff < dax_range->pgoff || pgoff > pgoff_end) + continue; + addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start; + if (addr + size - 1 <= range->end) { if (get_nfit_res(addr)) { struct page *page; @@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, page = vmalloc_to_page((void *)addr); return PFN_PHYS(page_to_pfn(page)); - } else - return addr; + } + return addr; } + break; } return -1; } diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 03e40b3b0106..c62d372d426f 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref) void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { int error; - resource_size_t offset = pgmap->res.start; + resource_size_t offset = pgmap->range.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (!nfit_res) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a8ee5c4d41eb..2ac0fff6dad8 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -23,7 +23,8 @@ #include "nfit_test.h" #include "../watermark.h" -#include <asm/mcsafe_test.h> +#include <asm/copy_mc_test.h> +#include <asm/mce.h> /* * Generate an NFIT table to describe the following topology: @@ -173,6 +174,9 @@ struct nfit_test_fw { u64 version; u32 size_received; u64 end_time; + bool armed; + bool missed_activate; + unsigned long last_activate; }; struct nfit_test { @@ -345,7 +349,7 @@ static int nd_intel_test_finish_fw(struct nfit_test *t, __func__, t, nd_cmd, buf_len, idx); if (fw->state == FW_STATE_UPDATED) { - /* update already done, need cold boot */ + /* update already done, need activation */ nd_cmd->status = 0x20007; return 0; } @@ -430,6 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t, } dev_dbg(dev, "%s: transition out verify\n", __func__); fw->state = FW_STATE_UPDATED; + fw->missed_activate = false; /* fall through */ case FW_STATE_UPDATED: nd_cmd->status = 0; @@ -1178,6 +1183,134 @@ static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t, return 0; } +static unsigned long last_activate; + +static int nvdimm_bus_intel_fw_activate_businfo(struct nfit_test *t, + struct nd_intel_bus_fw_activate_businfo *nd_cmd, + unsigned int buf_len) +{ + int i, armed = 0; + int state; + u64 tmo; + + for (i = 0; i < NUM_DCR; i++) { + struct nfit_test_fw *fw = &t->fw[i]; + + if (fw->armed) + armed++; + } + + /* + * Emulate 3 second activation max, and 1 second incremental + * quiesce time per dimm requiring multiple activates to get all + * DIMMs updated. + */ + if (armed) + state = ND_INTEL_FWA_ARMED; + else if (!last_activate || time_after(jiffies, last_activate + 3 * HZ)) + state = ND_INTEL_FWA_IDLE; + else + state = ND_INTEL_FWA_BUSY; + + tmo = armed * USEC_PER_SEC; + *nd_cmd = (struct nd_intel_bus_fw_activate_businfo) { + .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE + | ND_INTEL_BUS_FWA_CAP_OSQUIESCE + | ND_INTEL_BUS_FWA_CAP_RESET, + .state = state, + .activate_tmo = tmo, + .cpu_quiesce_tmo = tmo, + .io_quiesce_tmo = tmo, + .max_quiesce_tmo = 3 * USEC_PER_SEC, + }; + + return 0; +} + +static int nvdimm_bus_intel_fw_activate(struct nfit_test *t, + struct nd_intel_bus_fw_activate *nd_cmd, + unsigned int buf_len) +{ + struct nd_intel_bus_fw_activate_businfo info; + u32 status = 0; + int i; + + nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info)); + if (info.state == ND_INTEL_FWA_BUSY) + status = ND_INTEL_BUS_FWA_STATUS_BUSY; + else if (info.activate_tmo > info.max_quiesce_tmo) + status = ND_INTEL_BUS_FWA_STATUS_TMO; + else if (info.state == ND_INTEL_FWA_IDLE) + status = ND_INTEL_BUS_FWA_STATUS_NOARM; + + dev_dbg(&t->pdev.dev, "status: %d\n", status); + nd_cmd->status = status; + if (status && status != ND_INTEL_BUS_FWA_STATUS_TMO) + return 0; + + last_activate = jiffies; + for (i = 0; i < NUM_DCR; i++) { + struct nfit_test_fw *fw = &t->fw[i]; + + if (!fw->armed) + continue; + if (fw->state != FW_STATE_UPDATED) + fw->missed_activate = true; + else + fw->state = FW_STATE_NEW; + fw->armed = false; + fw->last_activate = last_activate; + } + + return 0; +} + +static int nd_intel_test_cmd_fw_activate_dimminfo(struct nfit_test *t, + struct nd_intel_fw_activate_dimminfo *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct nd_intel_bus_fw_activate_businfo info; + struct nfit_test_fw *fw = &t->fw[dimm]; + u32 result, state; + + nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info)); + + if (info.state == ND_INTEL_FWA_BUSY) + state = ND_INTEL_FWA_BUSY; + else if (info.state == ND_INTEL_FWA_IDLE) + state = ND_INTEL_FWA_IDLE; + else if (fw->armed) + state = ND_INTEL_FWA_ARMED; + else + state = ND_INTEL_FWA_IDLE; + + result = ND_INTEL_DIMM_FWA_NONE; + if (last_activate && fw->last_activate == last_activate && + state == ND_INTEL_FWA_IDLE) { + if (fw->missed_activate) + result = ND_INTEL_DIMM_FWA_NOTSTAGED; + else + result = ND_INTEL_DIMM_FWA_SUCCESS; + } + + *nd_cmd = (struct nd_intel_fw_activate_dimminfo) { + .result = result, + .state = state, + }; + + return 0; +} + +static int nd_intel_test_cmd_fw_activate_arm(struct nfit_test *t, + struct nd_intel_fw_activate_arm *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct nfit_test_fw *fw = &t->fw[dimm]; + + fw->armed = nd_cmd->activate_arm == ND_INTEL_DIMM_FWA_ARM; + nd_cmd->status = 0; + return 0; +} static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) { @@ -1192,6 +1325,29 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) return i; } +static void nfit_ctl_dbg(struct acpi_nfit_desc *acpi_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int len) +{ + struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + unsigned int func = cmd; + unsigned int family = 0; + + if (cmd == ND_CMD_CALL) { + struct nd_cmd_pkg *pkg = buf; + + len = pkg->nd_size_in; + family = pkg->nd_family; + buf = pkg->nd_payload; + func = pkg->nd_command; + } + dev_dbg(&t->pdev.dev, "%s family: %d cmd: %d: func: %d input length: %d\n", + nvdimm ? nvdimm_name(nvdimm) : "bus", family, cmd, func, + len); + print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 16, 4, + buf, min(len, 256u), true); +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -1205,6 +1361,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, cmd_rc = &__cmd_rc; *cmd_rc = 0; + nfit_ctl_dbg(acpi_desc, nvdimm, cmd, buf, buf_len); + if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -1224,6 +1382,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, i = get_dimm(nfit_mem, func); if (i < 0) return i; + if (i >= NUM_DCR) { + dev_WARN_ONCE(&t->pdev.dev, 1, + "ND_CMD_CALL only valid for nfit_test0\n"); + return -EINVAL; + } switch (func) { case NVDIMM_INTEL_GET_SECURITY_STATE: @@ -1252,11 +1415,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, break; case NVDIMM_INTEL_OVERWRITE: rc = nd_intel_test_cmd_overwrite(t, - buf, buf_len, i - t->dcr_idx); + buf, buf_len, i); break; case NVDIMM_INTEL_QUERY_OVERWRITE: rc = nd_intel_test_cmd_query_overwrite(t, - buf, buf_len, i - t->dcr_idx); + buf, buf_len, i); break; case NVDIMM_INTEL_SET_MASTER_PASSPHRASE: rc = nd_intel_test_cmd_master_set_pass(t, @@ -1266,54 +1429,59 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nd_intel_test_cmd_master_secure_erase(t, buf, buf_len, i); break; + case NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO: + rc = nd_intel_test_cmd_fw_activate_dimminfo( + t, buf, buf_len, i); + break; + case NVDIMM_INTEL_FW_ACTIVATE_ARM: + rc = nd_intel_test_cmd_fw_activate_arm( + t, buf, buf_len, i); + break; case ND_INTEL_ENABLE_LSS_STATUS: rc = nd_intel_test_cmd_set_lss_status(t, buf, buf_len); break; case ND_INTEL_FW_GET_INFO: rc = nd_intel_test_get_fw_info(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_START_UPDATE: rc = nd_intel_test_start_update(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_SEND_DATA: rc = nd_intel_test_send_data(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_FINISH_UPDATE: rc = nd_intel_test_finish_fw(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_FINISH_QUERY: rc = nd_intel_test_finish_query(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_SMART: rc = nfit_test_cmd_smart(buf, buf_len, - &t->smart[i - t->dcr_idx]); + &t->smart[i]); break; case ND_INTEL_SMART_THRESHOLD: rc = nfit_test_cmd_smart_threshold(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx]); + &t->smart_threshold[i]); break; case ND_INTEL_SMART_SET_THRESHOLD: rc = nfit_test_cmd_smart_set_threshold(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx], - &t->smart[i - t->dcr_idx], + &t->smart_threshold[i], + &t->smart[i], &t->pdev.dev, t->dimm_dev[i]); break; case ND_INTEL_SMART_INJECT: rc = nfit_test_cmd_smart_inject(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx], - &t->smart[i - t->dcr_idx], + &t->smart_threshold[i], + &t->smart[i], &t->pdev.dev, t->dimm_dev[i]); break; default: @@ -1353,9 +1521,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!nd_desc) return -ENOTTY; - if (cmd == ND_CMD_CALL) { + if (cmd == ND_CMD_CALL && call_pkg->nd_family + == NVDIMM_BUS_FAMILY_NFIT) { func = call_pkg->nd_command; - buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; buf = (void *) call_pkg->nd_payload; @@ -1379,7 +1547,26 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, default: return -ENOTTY; } - } + } else if (cmd == ND_CMD_CALL && call_pkg->nd_family + == NVDIMM_BUS_FAMILY_INTEL) { + func = call_pkg->nd_command; + buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; + buf = (void *) call_pkg->nd_payload; + + switch (func) { + case NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO: + rc = nvdimm_bus_intel_fw_activate_businfo(t, + buf, buf_len); + return rc; + case NVDIMM_BUS_INTEL_FW_ACTIVATE: + rc = nvdimm_bus_intel_fw_activate(t, buf, + buf_len); + return rc; + default: + return -ENOTTY; + } + } else if (cmd == ND_CMD_CALL) + return -ENOTTY; if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; @@ -1805,6 +1992,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_flush_address *flush; struct acpi_nfit_capabilities *pcap; unsigned int offset = 0, i; + unsigned long *acpi_mask; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -2507,10 +2695,10 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); - set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); + set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_dsm_mask); set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); @@ -2531,6 +2719,12 @@ static void nfit_test0_setup(struct nfit_test *t) &acpi_desc->dimm_cmd_force_en); set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_FW_ACTIVATE_ARM, &acpi_desc->dimm_cmd_force_en); + + acpi_mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]; + set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, acpi_mask); + set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE, acpi_mask); } static void nfit_test1_setup(struct nfit_test *t) @@ -2699,14 +2893,18 @@ static int nfit_ctl_test(struct device *dev) struct acpi_nfit_desc *acpi_desc; const u64 test_val = 0x0123456789abcdefULL; unsigned long mask, cmd_size, offset; - union { - struct nd_cmd_get_config_size cfg_size; - struct nd_cmd_clear_error clear_err; - struct nd_cmd_ars_status ars_stat; - struct nd_cmd_ars_cap ars_cap; - char buf[sizeof(struct nd_cmd_ars_status) - + sizeof(struct nd_ars_record)]; - } cmds; + struct nfit_ctl_test_cmd { + struct nd_cmd_pkg pkg; + union { + struct nd_cmd_get_config_size cfg_size; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_status ars_stat; + struct nd_cmd_ars_cap ars_cap; + struct nd_intel_bus_fw_activate_businfo fwa_info; + char buf[sizeof(struct nd_cmd_ars_status) + + sizeof(struct nd_ars_record)]; + }; + } cmd; adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL); if (!adev) @@ -2731,11 +2929,15 @@ static int nfit_ctl_test(struct device *dev) .module = THIS_MODULE, .provider_name = "ACPI.NFIT", .ndctl = acpi_nfit_ctl, - .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA - | 1UL << NFIT_CMD_ARS_INJECT_SET - | 1UL << NFIT_CMD_ARS_INJECT_CLEAR - | 1UL << NFIT_CMD_ARS_INJECT_GET, + .bus_family_mask = 1UL << NVDIMM_BUS_FAMILY_NFIT + | 1UL << NVDIMM_BUS_FAMILY_INTEL, }, + .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA + | 1UL << NFIT_CMD_ARS_INJECT_SET + | 1UL << NFIT_CMD_ARS_INJECT_CLEAR + | 1UL << NFIT_CMD_ARS_INJECT_GET, + .family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL] = + NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK, .dev = &adev->dev, }; @@ -2766,21 +2968,21 @@ static int nfit_ctl_test(struct device *dev) /* basic checkout of a typical 'get config size' command */ - cmd_size = sizeof(cmds.cfg_size); - cmds.cfg_size = (struct nd_cmd_get_config_size) { + cmd_size = sizeof(cmd.cfg_size); + cmd.cfg_size = (struct nd_cmd_get_config_size) { .status = 0, .config_size = SZ_128K, .max_xfer = SZ_4K, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); - if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0 - || cmds.cfg_size.config_size != SZ_128K - || cmds.cfg_size.max_xfer != SZ_4K) { + if (rc < 0 || cmd_rc || cmd.cfg_size.status != 0 + || cmd.cfg_size.config_size != SZ_128K + || cmd.cfg_size.max_xfer != SZ_4K) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", __func__, __LINE__, rc, cmd_rc); return -EIO; @@ -2789,14 +2991,14 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with zero output */ cmd_size = offsetof(struct nd_cmd_ars_status, address); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = 0, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2806,16 +3008,16 @@ static int nfit_ctl_test(struct device *dev) /* test ars_cap with benign extended status */ - cmd_size = sizeof(cmds.ars_cap); - cmds.ars_cap = (struct nd_cmd_ars_cap) { + cmd_size = sizeof(cmd.ars_cap); + cmd.ars_cap = (struct nd_cmd_ars_cap) { .status = ND_ARS_PERSISTENT << 16, }; offset = offsetof(struct nd_cmd_ars_cap, status); - rc = setup_result(cmds.buf + offset, cmd_size - offset); + rc = setup_result(cmd.buf + offset, cmd_size - offset); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2825,19 +3027,19 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with 'status' trimmed from 'out_length' */ - cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record); + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = cmd_size - 4, }; - record = &cmds.ars_stat.records[0]; + record = &cmd.ars_stat.records[0]; *record = (struct nd_ars_record) { .length = test_val, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc || record->length != test_val) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2847,19 +3049,19 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with 'Output (Size)' including 'status' */ - cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record); + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = cmd_size, }; - record = &cmds.ars_stat.records[0]; + record = &cmd.ars_stat.records[0]; *record = (struct nd_ars_record) { .length = test_val, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc || record->length != test_val) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2869,15 +3071,15 @@ static int nfit_ctl_test(struct device *dev) /* test extended status for get_config_size results in failure */ - cmd_size = sizeof(cmds.cfg_size); - cmds.cfg_size = (struct nd_cmd_get_config_size) { + cmd_size = sizeof(cmd.cfg_size); + cmd.cfg_size = (struct nd_cmd_get_config_size) { .status = 1 << 16, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc >= 0) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2886,16 +3088,46 @@ static int nfit_ctl_test(struct device *dev) } /* test clear error */ - cmd_size = sizeof(cmds.clear_err); - cmds.clear_err = (struct nd_cmd_clear_error) { + cmd_size = sizeof(cmd.clear_err); + cmd.clear_err = (struct nd_cmd_clear_error) { .length = 512, .cleared = 512, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); + if (rc < 0 || cmd_rc) { + dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", + __func__, __LINE__, rc, cmd_rc); + return -EIO; + } + + /* test firmware activate bus info */ + cmd_size = sizeof(cmd.fwa_info); + cmd = (struct nfit_ctl_test_cmd) { + .pkg = { + .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, + .nd_family = NVDIMM_BUS_FAMILY_INTEL, + .nd_size_out = cmd_size, + .nd_fw_size = cmd_size, + }, + .fwa_info = { + .state = ND_INTEL_FWA_IDLE, + .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE + | ND_INTEL_BUS_FWA_CAP_OSQUIESCE, + .activate_tmo = 1, + .cpu_quiesce_tmo = 1, + .io_quiesce_tmo = 1, + .max_quiesce_tmo = 1, + }, + }; + rc = setup_result(cmd.buf, cmd_size); + if (rc) + return rc; + rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CALL, + &cmd, sizeof(cmd.pkg) + cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", __func__, __LINE__, rc, cmd_rc); @@ -3052,7 +3284,7 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; -static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); +static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); enum INJECT { INJECT_NONE, @@ -3060,7 +3292,7 @@ enum INJECT { INJECT_DST, }; -static void mcsafe_test_init(char *dst, char *src, size_t size) +static void copy_mc_test_init(char *dst, char *src, size_t size) { size_t i; @@ -3069,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size) src[i] = (char) i; } -static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, +static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src, size_t size, unsigned long rem) { size_t i; @@ -3090,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, return true; } -void mcsafe_test(void) +void copy_mc_test(void) { char *inject_desc[] = { "none", "source", "destination" }; enum INJECT inj; - if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { + if (IS_ENABLED(CONFIG_COPY_MC_TEST)) { pr_info("%s: run...\n", __func__); } else { pr_info("%s: disabled, skip.\n", __func__); @@ -3113,31 +3345,31 @@ void mcsafe_test(void) switch (inj) { case INJECT_NONE: - mcsafe_inject_src(NULL); - mcsafe_inject_dst(NULL); - dst = &mcsafe_buf[2048]; - src = &mcsafe_buf[1024 - i]; + copy_mc_inject_src(NULL); + copy_mc_inject_dst(NULL); + dst = ©_mc_buf[2048]; + src = ©_mc_buf[1024 - i]; expect = 0; break; case INJECT_SRC: - mcsafe_inject_src(&mcsafe_buf[1024]); - mcsafe_inject_dst(NULL); - dst = &mcsafe_buf[2048]; - src = &mcsafe_buf[1024 - i]; + copy_mc_inject_src(©_mc_buf[1024]); + copy_mc_inject_dst(NULL); + dst = ©_mc_buf[2048]; + src = ©_mc_buf[1024 - i]; expect = 512 - i; break; case INJECT_DST: - mcsafe_inject_src(NULL); - mcsafe_inject_dst(&mcsafe_buf[2048]); - dst = &mcsafe_buf[2048 - i]; - src = &mcsafe_buf[1024]; + copy_mc_inject_src(NULL); + copy_mc_inject_dst(©_mc_buf[2048]); + dst = ©_mc_buf[2048 - i]; + src = ©_mc_buf[1024]; expect = 512 - i; break; } - mcsafe_test_init(dst, src, 512); - rem = __memcpy_mcsafe(dst, src, 512); - valid = mcsafe_test_validate(dst, src, 512, expect); + copy_mc_test_init(dst, src, 512); + rem = copy_mc_fragile(dst, src, 512); + valid = copy_mc_test_validate(dst, src, 512, expect); if (rem == expect && valid) continue; pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", @@ -3149,8 +3381,8 @@ void mcsafe_test(void) } } - mcsafe_inject_src(NULL); - mcsafe_inject_dst(NULL); + copy_mc_inject_src(NULL); + copy_mc_inject_dst(NULL); } static __init int nfit_test_init(void) @@ -3161,7 +3393,7 @@ static __init int nfit_test_init(void) libnvdimm_test(); acpi_nfit_test(); device_dax_test(); - mcsafe_test(); + copy_mc_test(); dax_pmem_test(); dax_pmem_core_test(); #ifdef CONFIG_DEV_DAX_PMEM_COMPAT diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index e03bc15ce731..9018f45d631d 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -32,6 +32,7 @@ TARGETS += lkdtm TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug +TARGETS += mincore TARGETS += mount TARGETS += mqueue TARGETS += net diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 93b567d23c8b..2c9d012797a7 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal +ARM64_SUBTARGETS ?= tags signal pauth fp mte else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore new file mode 100644 index 000000000000..d66f76d2a650 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -0,0 +1,5 @@ +fpsimd-test +sve-probe-vls +sve-ptrace +sve-test +vlset diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile new file mode 100644 index 000000000000..a57009d3a0dc --- /dev/null +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := sve-ptrace sve-probe-vls +TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset + +all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) + +fpsimd-test: fpsimd-test.o + $(CC) -nostdlib $^ -o $@ +sve-ptrace: sve-ptrace.o sve-ptrace-asm.o +sve-probe-vls: sve-probe-vls.o +sve-test: sve-test.o + $(CC) -nostdlib $^ -o $@ +vlset: vlset.o + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README new file mode 100644 index 000000000000..03e3dad865d8 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/README @@ -0,0 +1,100 @@ +This directory contains a mix of tests integrated with kselftest and +standalone stress tests. + +kselftest tests +=============== + +sve-probe-vls - Checks the SVE vector length enumeration interface +sve-ptrace - Checks the SVE ptrace interface + +Running the non-kselftest tests +=============================== + +sve-stress performs an SVE context switch stress test, as described +below. + +(The fpsimd-stress test works the same way; just substitute "fpsimd" for +"sve" in the following commands.) + + +The test runs until killed by the user. + +If no context switch error was detected, you will see output such as +the following: + +$ ./sve-stress +(wait for some time) +^C +Vector length: 512 bits +PID: 1573 +Terminated by signal 15, no error, iterations=9467, signals=1014 +Vector length: 512 bits +PID: 1575 +Terminated by signal 15, no error, iterations=9448, signals=1028 +Vector length: 512 bits +PID: 1577 +Terminated by signal 15, no error, iterations=9436, signals=1039 +Vector length: 512 bits +PID: 1579 +Terminated by signal 15, no error, iterations=9421, signals=1039 +Vector length: 512 bits +PID: 1581 +Terminated by signal 15, no error, iterations=9403, signals=1039 +Vector length: 512 bits +PID: 1583 +Terminated by signal 15, no error, iterations=9385, signals=1036 +Vector length: 512 bits +PID: 1585 +Terminated by signal 15, no error, iterations=9376, signals=1039 +Vector length: 512 bits +PID: 1587 +Terminated by signal 15, no error, iterations=9361, signals=1039 +Vector length: 512 bits +PID: 1589 +Terminated by signal 15, no error, iterations=9350, signals=1039 + + +If an error was detected, details of the mismatch will be printed +instead of "no error". + +Ideally, the test should be allowed to run for many minutes or hours +to maximise test coverage. + + +KVM stress testing +================== + +To try to reproduce the bugs that we have been observing, sve-stress +should be run in parallel in two KVM guests, while simultaneously +running on the host. + +1) Start 2 guests, using the following command for each: + +$ lkvm run --console=virtio -pconsole=hvc0 --sve Image + +(Depending on the hardware GIC implementation, you may also need +--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I +can't remember whether my branch is new enough for that. Try without +the option first.) + +Kvmtool occupies the terminal until you kill it (Ctrl+A x), +or until the guest terminates. It is therefore recommended to run +each instance in separate terminal (use screen or ssh etc.) This +allows multiple guests to be run in parallel while running other +commands on the host. + +Within the guest, the host filesystem is accessible, mounted on /host. + +2) Run the sve-stress on *each* guest with the Vector-Length set to 32: +guest$ ./vlset --inherit 32 ./sve-stress + +3) Run the sve-stress on the host with the maximum Vector-Length: +host$ ./vlset --inherit --max ./sve-stress + + +Again, the test should be allowed to run for many minutes or hours to +maximise test coverage. + +If no error is detected, you will see output from each sve-stress +instance similar to that illustrated above; otherwise details of the +observed mismatches will be printed. diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h new file mode 100644 index 000000000000..a180851496ec --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-offsets.h @@ -0,0 +1,11 @@ +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 +#define SIGUSR1 10 +#define SIGTERM 15 +#define SIGINT 2 +#define SIGABRT 6 +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h new file mode 100644 index 000000000000..8944f2189206 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +.macro __for from:req, to:req + .if (\from) == (\to) + _for__body %\from + .else + __for \from, %(\from) + ((\to) - (\from)) / 2 + __for %(\from) + ((\to) - (\from)) / 2 + 1, \to + .endif +.endm + +.macro _for var:req, from:req, to:req, insn:vararg + .macro _for__body \var:req + .noaltmacro + \insn + .altmacro + .endm + + .altmacro + __for \from, \to + .noaltmacro + + .purgem _for__body +.endm + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +.macro define_accessor name, num, insn + .macro \name\()_entry n + \insn \n, 1 + ret + .endm + +function \name + adr x2, .L__accessor_tbl\@ + add x2, x2, x0, lsl #3 + br x2 + +.L__accessor_tbl\@: + _for x, 0, (\num) - 1, \name\()_entry \x +endfunction + + .purgem \name\()_entry +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress new file mode 100755 index 000000000000..781b5b022eaf --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-stress @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT +trap child_died CHLD + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./fpsimd-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S new file mode 100644 index 000000000000..1c5556bdd11d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple FPSIMD context switch test +// Repeatedly writes unique test patterns into each FPSIMD register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +#define NVR 32 +#define MAXVL_B (128 / 8) + +.macro _vldr Vn:req, Xt:req + ld1 {v\Vn\().2d}, [x\Xt] +.endm + +.macro _vstr Vn:req, Xt:req + st1 {v\Vn\().2d}, [x\Xt] +.endm + +// Generate accessor functions to read/write programmatically selected +// FPSIMD registers. +// x0 is the register index to access +// x1 is the memory address to read from (getv,setp) or store to (setv,setp) +// All clobber x0-x2 +define_accessor setv, NVR, _vldr +define_accessor getv, NVR, _vstr + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +vref: + .space MAXVL_B * NVR +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for FPSIMD V-register V<xn> +.macro _adrv xd, xn, nrtmp + ldr \xd, =vref + mov x\nrtmp, #16 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a FPSIMD V-register +// x0: pid +// x1: register number +// x2: generation +function setup_vreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrv x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setv + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 1f + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne barf + subs x2, x2, #1 + b.ne 0b + +1: ret +endfunction + +// Verify that a FPSIMD V-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x5. +function check_vreg + mov x3, x30 + + _adrv x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getv + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random V-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #7 + movi v9.16b, #9 + movi v31.8b, #31 + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + mov x19, #128 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + + mov x21, #0 // Set up V-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c new file mode 100644 index 000000000000..b29cbc642c57 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/sigcontext.h> + +#include "../../kselftest.h" + +int main(int argc, char **argv) +{ + unsigned int vq; + int vl; + static unsigned int vqs[SVE_VQ_MAX]; + unsigned int nvqs = 0; + + ksft_print_header(); + ksft_set_plan(2); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available"); + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (!sve_vl_valid(vl)) + ksft_exit_fail_msg("VL %d invalid\n", vl); + vq = sve_vq_from_vl(vl); + + if (!(nvqs < SVE_VQ_MAX)) + ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n", + nvqs); + vqs[nvqs++] = vq; + } + ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs); + ksft_test_result_pass("All vector lengths valid\n"); + + /* Print out the vector lengths in ascending order: */ + while (nvqs--) + ksft_print_msg("%u\n", 16 * vqs[nvqs]); + + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S new file mode 100644 index 000000000000..3e81f9fab574 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +#include <asm/unistd.h> + +.arch_extension sve + +.globl sve_store_patterns + +sve_store_patterns: + mov x1, x0 + + index z0.b, #0, #1 + str q0, [x1] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x10] + + mov z1.d, z0.d + str q0, [x1, #0x20] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x30] + + mov z1.d, z0.d + str q0, [x1, #0x40] + + ret + +.size sve_store_patterns, . - sve_store_patterns +.type sve_store_patterns, @function diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c new file mode 100644 index 000000000000..b2282be6f938 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_SVE +#define NT_ARM_SVE 0x405 +#endif + +/* Number of registers filled in by sve_store_patterns */ +#define NR_VREGS 5 + +void sve_store_patterns(__uint128_t v[NR_VREGS]); + +static void dump(const void *buf, size_t size) +{ + size_t i; + const unsigned char *p = buf; + + for (i = 0; i < size; ++i) + printf(" %.2x", *p++); +} + +static int check_vregs(const __uint128_t vregs[NR_VREGS]) +{ + int i; + int ok = 1; + + for (i = 0; i < NR_VREGS; ++i) { + printf("# v[%d]:", i); + dump(&vregs[i], sizeof vregs[i]); + putchar('\n'); + + if (vregs[i] != vregs[0]) + ok = 0; + } + + return ok; +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) +{ + struct user_sve_header *sve; + void *p; + size_t sz = sizeof *sve; + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov)) + goto error; + + sve = *buf; + if (sve->size <= sz) + break; + + sz = sve->size; + } + + return sve; + +error: + return NULL; +} + +static int set_sve(pid_t pid, const struct user_sve_header *sve) +{ + struct iovec iov; + + iov.iov_base = (void *)sve; + iov.iov_len = sve->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); +} + +static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, + unsigned int vlmax) +{ + unsigned int vq; + unsigned int i; + + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + ksft_exit_fail_msg("Dumping non-SVE register\n"); + + if (vlmax > sve->vl) + vlmax = sve->vl; + + vq = sve_vq_from_vl(sve->vl); + for (i = 0; i < num; ++i) { + printf("# z%u:", i); + dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), + vlmax); + printf("%s\n", vlmax == sve->vl ? "" : " ..."); + } +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + void *svebuf = NULL, *newsvebuf; + size_t svebufsz = 0, newsvebufsz; + struct user_sve_header *sve, *new_sve; + struct user_fpsimd_state *fpsimd; + unsigned int i, j; + unsigned char *p; + unsigned int vq; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", + status); + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + sve = get_sve(pid, &svebuf, &svebufsz); + if (!sve) { + int e = errno; + + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } else { + ksft_test_result_pass("get_sve\n"); + } + + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto error; + + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) + p[j] = j; + } + + if (set_sve(pid, sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + vq = sve_vq_from_vl(sve->vl); + + newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + new_sve = newsvebuf = malloc(newsvebufsz); + if (!new_sve) { + errno = ENOMEM; + perror(NULL); + goto error; + } + + *new_sve = *sve; + new_sve->flags &= ~SVE_PT_REGS_MASK; + new_sve->flags |= SVE_PT_REGS_SVE; + memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), + 0, SVE_PT_SVE_ZREG_SIZE(vq)); + new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + if (set_sve(pid, new_sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); + if (!new_sve) { + int e = errno; + + ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, + "SVE registers\n"); + if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + goto error; + + dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + + p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); + for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { + unsigned char expected = i; + + if (__BYTE_ORDER == __BIG_ENDIAN) + expected = sizeof fpsimd->vregs[0] - 1 - expected; + + ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); + if (p[i] != expected) + goto error; + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + __uint128_t v[NR_VREGS]; + pid_t child; + + ksft_print_header(); + ksft_set_plan(20); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available\n"); + + sve_store_patterns(v); + + if (!check_vregs(v)) + ksft_exit_fail_msg("Initial check_vregs() failed\n"); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress new file mode 100755 index 000000000000..24dd0922cc02 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./sve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S new file mode 100644 index 000000000000..f95074c9b48b --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple Scalable Vector Extension context switch test +// Repeatedly writes unique test patterns into each SVE register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +#define NZR 32 +#define NPR 16 +#define MAXVL_B (2048 / 8) + +.arch_extension sve + +.macro _sve_ldr_v zt, xn + ldr z\zt, [x\xn] +.endm + +.macro _sve_str_v zt, xn + str z\zt, [x\xn] +.endm + +.macro _sve_ldr_p pt, xn + ldr p\pt, [x\xn] +.endm + +.macro _sve_str_p pt, xn + str p\pt, [x\xn] +.endm + +// Generate accessor functions to read/write programmatically selected +// SVE registers. +// x0 is the register index to access +// x1 is the memory address to read from (getz,setp) or store to (setz,setp) +// All clobber x0-x2 +define_accessor setz, NZR, _sve_ldr_v +define_accessor getz, NZR, _sve_str_v +define_accessor setp, NPR, _sve_ldr_p +define_accessor getp, NPR, _sve_str_p + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +zref: + .space MAXVL_B * NZR +pref: + .space MAXVL_B / 8 * NPR +ffrref: + .space MAXVL_B / 8 +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:16 register number +// bits 15: 0 pid + +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for SVE Z-register Z<xn> +.macro _adrz xd, xn, nrtmp + ldr \xd, =zref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Get the address of shadow data for SVE P-register P<xn - NZR> +.macro _adrp xd, xn, nrtmp + ldr \xd, =pref + rdvl x\nrtmp, #1 + lsr x\nrtmp, x\nrtmp, #3 + sub \xn, \xn, #NZR + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a SVE Z-register +// x0: pid +// x1: register number +// x2: generation +function setup_zreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrz x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setz + + ret x4 +endfunction + +// Set up test pattern in a SVE P-register +// x0: pid +// x1: register number +// x2: generation +function setup_preg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrp x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setp + + ret x4 +endfunction + +// Set up test pattern in the FFR +// x0: pid +// x2: generation +// Beware: corrupts P0. +function setup_ffr + mov x4, x30 + + bl pattern + ldr x0, =ffrref + ldr x1, =scratch + rdvl x2, #1 + lsr x2, x2, #3 + bl memcpy + + mov x0, #0 + ldr x1, =ffrref + bl setp + + wrffr p0.b + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a SVE Z-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_zreg + mov x3, x30 + + _adrz x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getz + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that a SVE P-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_preg + mov x3, x30 + + _adrp x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getp + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that the FFR matches its shadow in memory, else abort +// Beware -- corrupts P0. +// Clobbers x0-x5. +function check_ffr + mov x3, x30 + + ldr x4, =scratch + rdvl x5, #1 + lsr x5, x5, #3 + + mov x0, x4 + mov x1, x5 + bl memfill_ae + + rdffr p0.b + mov x0, #0 + mov x1, x4 + bl getp + + ldr x0, =ffrref + mov x1, x4 + mov x2, x5 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random Z-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 + // And P0 + rdffr p0.b + // And FFR + wrffr p15.b + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + mov x21, #0 // Set up Z-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + + mov x0, x20 // Set up FFR & shadow with test pattern + mov x1, #NZR + NPR + and x2, x22, #0xf + bl setup_ffr + +0: mov x0, x20 // Set up P-regs & shadow with test pattern + mov x1, x21 + and x2, x22, #0xf + bl setup_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: +// mov x8, #__NR_sched_yield // Encourage preemption +// svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + +0: mov x0, x21 + bl check_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + + bl check_ffr + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c new file mode 100644 index 000000000000..308d27a68226 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/vlset.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2019 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/hwcap.h> +#include <asm/sigcontext.h> + +static int inherit = 0; +static int no_inherit = 0; +static int force = 0; +static unsigned long vl; + +static const struct option options[] = { + { "force", no_argument, NULL, 'f' }, + { "inherit", no_argument, NULL, 'i' }, + { "max", no_argument, NULL, 'M' }, + { "no-inherit", no_argument, &no_inherit, 1 }, + { "help", no_argument, NULL, '?' }, + {} +}; + +static char const *program_name; + +static int parse_options(int argc, char **argv) +{ + int c; + char *rest; + + program_name = strrchr(argv[0], '/'); + if (program_name) + ++program_name; + else + program_name = argv[0]; + + while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1) + switch (c) { + case 'M': vl = SVE_VL_MAX; break; + case 'f': force = 1; break; + case 'i': inherit = 1; break; + case 0: break; + default: goto error; + } + + if (inherit && no_inherit) + goto error; + + if (!vl) { + /* vector length */ + if (optind >= argc) + goto error; + + errno = 0; + vl = strtoul(argv[optind], &rest, 0); + if (*rest) { + vl = ULONG_MAX; + errno = EINVAL; + } + if (vl == ULONG_MAX && errno) { + fprintf(stderr, "%s: %s: %s\n", + program_name, argv[optind], strerror(errno)); + goto error; + } + + ++optind; + } + + /* command */ + if (optind >= argc) + goto error; + + return 0; + +error: + fprintf(stderr, + "Usage: %s [-f | --force] " + "[-i | --inherit | --no-inherit] " + "{-M | --max | <vector length>} " + "<command> [<arguments> ...]\n", + program_name); + return -1; +} + +int main(int argc, char **argv) +{ + int ret = 126; /* same as sh(1) command-not-executable error */ + long flags; + char *path; + int t, e; + + if (parse_options(argc, argv)) + return 2; /* same as sh(1) builtin incorrect-usage */ + + if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) { + fprintf(stderr, "%s: Invalid vector length %lu\n", + program_name, vl); + return 2; /* same as sh(1) builtin incorrect-usage */ + } + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + fprintf(stderr, "%s: Scalable Vector Extension not present\n", + program_name); + + if (!force) + goto error; + + fputs("Going ahead anyway (--force): " + "This is a debug option. Don't rely on it.\n", + stderr); + } + + flags = PR_SVE_SET_VL_ONEXEC; + if (inherit) + flags |= PR_SVE_VL_INHERIT; + + t = prctl(PR_SVE_SET_VL, vl | flags); + if (t < 0) { + fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + + t = prctl(PR_SVE_GET_VL); + if (t == -1) { + fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + flags = PR_SVE_VL_LEN_MASK; + flags = t & ~flags; + + assert(optind < argc); + path = argv[optind]; + + execvp(path, &argv[optind]); + e = errno; + if (errno == ENOENT) + ret = 127; /* same as sh(1) not-found error */ + fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e)); + +error: + return ret; /* same as sh(1) not-executable error */ +} diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore new file mode 100644 index 000000000000..bc3ac63f3314 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -0,0 +1,6 @@ +check_buffer_fill +check_tags_inclusion +check_child_memory +check_mmap_options +check_ksm_options +check_user_mem diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile new file mode 100644 index 000000000000..2480226dfe57 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +CFLAGS += -std=gnu99 -I. +SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +#Add mte compiler option +ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),) +CFLAGS += -march=armv8.5-a+memtag +endif + +#check if the compiler works well +mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(mte_cc_support),1) +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(PROGS) + +# Get Kernel headers installed and use them. +KSFT_KHDR_INSTALL := 1 +endif + +# Include KSFT lib.mk. +include ../../lib.mk + +ifeq ($(mte_cc_support),1) +$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S +endif diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c new file mode 100644 index 000000000000..242635d79035 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <stddef.h> +#include <stdio.h> +#include <string.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define OVERFLOW_RANGE MT_GRANULE_SIZE + +static int sizes[] = { + 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +enum mte_block_test_alloc { + UNTAGGED_TAGGED, + TAGGED_UNTAGGED, + TAGGED_TAGGED, + BLOCK_ALLOC_MAX, +}; + +static int check_buffer_by_byte(int mem_type, int mode) +{ + char *ptr; + int i, j, item; + bool err; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true); + if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]); + /* Set some value in tagged memory */ + for (j = 0; j < sizes[i]; j++) + ptr[j] = '1'; + mte_wait_after_trig(); + err = cur_mte_cxt.fault_valid; + /* Check the buffer whether it is filled. */ + for (j = 0; j < sizes[i] && !err; j++) { + if (ptr[j] != '1') + err = true; + } + mte_free_memory((void *)ptr, sizes[i], mem_type, true); + + if (err) + break; + } + if (!err) + return KSFT_PASS; + else + return KSFT_FAIL; +} + +static int check_buffer_underflow_by_byte(int mem_type, int mode, + int underflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + char *und_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + underflow_range, 0); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + underflow_range, 0) != KSFT_PASS) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range); + last_index = 0; + /* Set some value in tagged memory and make the buffer underflow */ + for (j = sizes[i] - 1; (j >= -underflow_range) && + (cur_mte_cxt.fault_valid == false); j--) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_underflow_by_byte_err; + + switch (mode) { + case MTE_NONE_ERR: + if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) { + err = true; + break; + } + /* There were no fault so the underflow area should be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range); + for (j = 0 ; j < underflow_range; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the underflow area before the fault should be filled. + */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = last_index ; j < 0 ; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != (-1))) { + err = true; + break; + } + /* Underflow area should not be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + if (und_ptr[-1] == '1') + err = true; + break; + default: + err = true; + break; + } +check_buffer_underflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_overflow_by_byte(int mem_type, int mode, + int overflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + size_t tagged_size, overflow_size; + char *over_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + 0, overflow_range); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + 0, overflow_range) != KSFT_PASS) + return KSFT_FAIL; + + tagged_size = MT_ALIGN_UP(sizes[i]); + + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range); + + /* Set some value in tagged memory and make the buffer underflow */ + for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) && + (cur_mte_cxt.fault_valid == false); j++) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_overflow_by_byte_err; + + overflow_size = overflow_range - (tagged_size - sizes[i]); + + switch (mode) { + case MTE_NONE_ERR: + if ((cur_mte_cxt.fault_valid == true) || + (last_index != (sizes[i] + overflow_range - 1))) { + err = true; + break; + } + /* There were no fault so the overflow area should be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the overflow area should be filled before the fault. + */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = tagged_size ; j < last_index; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) { + err = true; + break; + } + /* Underflow area should not be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] == '1') + err = true; + } + break; + default: + err = true; + break; + } +check_buffer_overflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size) +{ + char *src, *dst; + int j, result = KSFT_PASS; + enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED; + + for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) { + switch (alloc_type) { + case UNTAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, false); + return KSFT_FAIL; + } + + break; + case TAGGED_UNTAGGED: + dst = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)dst, size, mem_type, false); + return KSFT_FAIL; + } + break; + case TAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, true); + return KSFT_FAIL; + } + break; + default: + return KSFT_FAIL; + } + + cur_mte_cxt.fault_valid = false; + result = KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)dst, size); + /* Set some value in memory and copy*/ + memset((void *)src, (int)'1', size); + memcpy((void *)dst, (void *)src, size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) { + result = KSFT_FAIL; + goto check_buffer_by_block_err; + } + /* Check the buffer whether it is filled. */ + for (j = 0; j < size; j++) { + if (src[j] != dst[j] || src[j] != '1') { + result = KSFT_FAIL; + break; + } + } +check_buffer_by_block_err: + mte_free_memory((void *)src, size, mem_type, + MT_FETCH_TAG((uintptr_t)src) ? true : false); + mte_free_memory((void *)dst, size, mem_type, + MT_FETCH_TAG((uintptr_t)dst) ? true : false); + if (result != KSFT_PASS) + return result; + } + return result; +} + +static int check_buffer_by_block(int mem_type, int mode) +{ + int i, item, result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + cur_mte_cxt.fault_valid = false; + for (i = 0; i < item; i++) { + result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]); + if (result != KSFT_PASS) + break; + } + return result; +} + +static int compare_memory_tags(char *ptr, size_t size, int tag) +{ + int i, new_tag; + + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (tag != new_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + return KSFT_FAIL; + } + } + return KSFT_PASS; +} + +static int check_memory_initial_tags(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, fd; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + /* check initial tags for anonymous mmap */ + ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + + /* check initial tags for file mmap */ + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + size_t page_size = getpagesize(); + int item = sizeof(sizes)/sizeof(int); + + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + /* Buffer by byte tests */ + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n"); + + /* Check buffer underflow with underflow size as 16 */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer underflow with underflow size as page size */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer overflow with overflow size as 16 */ + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n"); + + /* Buffer by block tests */ + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR), + "Check buffer write correctness by block with sync mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer write correctness by block with async mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR), + "Check buffer write correctness by block with tag fault ignore and mmap memory\n"); + + /* Initial tags are supposed to be 0 */ + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n"); + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c new file mode 100644 index 000000000000..97bebdecd29e --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_child_tag_inheritance(char *ptr, int size, int mode) +{ + int i, parent_tag, child_tag, fault, child_status; + pid_t child; + + parent_tag = MT_FETCH_TAG((uintptr_t)ptr); + fault = 0; + + child = fork(); + if (child == -1) { + ksft_print_msg("FAIL: child process creation\n"); + return KSFT_FAIL; + } else if (child == 0) { + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + /* Do copy on write */ + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) { + fault = 1; + goto check_child_tag_inheritance_err; + } + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (parent_tag != child_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + fault = 1; + goto check_child_tag_inheritance_err; + } + } + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } +check_child_tag_inheritance_err: + _exit(fault); + } + /* Wait for child process to terminate */ + wait(&child_status); + if (WIFEXITED(child_status)) + fault = WEXITSTATUS(child_status); + else + fault = 1; + return (fault) ? KSFT_FAIL : KSFT_PASS; +} + +static int check_child_memory_mapping(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, result; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result == KSFT_FAIL) + return result; + } + return KSFT_PASS; +} + +static int check_child_file_mapping(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, fd, map_size, result = KSFT_PASS; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler); + + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c new file mode 100644 index 000000000000..bc41ae630c86 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define TEST_UNIT 10 +#define PATH_KSM "/sys/kernel/mm/ksm/" +#define MAX_LOOP 4 + +static size_t page_sz; +static unsigned long ksm_sysfs[5]; + +static unsigned long read_sysfs(char *str) +{ + FILE *f; + unsigned long val = 0; + + f = fopen(str, "r"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return 0; + } + fscanf(f, "%lu", &val); + fclose(f); + return val; +} + +static void write_sysfs(char *str, unsigned long val) +{ + FILE *f; + + f = fopen(str, "w"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return; + } + fprintf(f, "%lu", val); + fclose(f); +} + +static void mte_ksm_setup(void) +{ + ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes"); + write_sysfs(PATH_KSM "merge_across_nodes", 1); + ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs"); + write_sysfs(PATH_KSM "sleep_millisecs", 0); + ksm_sysfs[2] = read_sysfs(PATH_KSM "run"); + write_sysfs(PATH_KSM "run", 1); + ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing"); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT); + ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan"); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT); +} + +static void mte_ksm_restore(void) +{ + write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]); + write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]); + write_sysfs(PATH_KSM "run", ksm_sysfs[2]); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]); +} + +static void mte_ksm_scan(void) +{ + int cur_count = read_sysfs(PATH_KSM "full_scans"); + int scan_count = cur_count + 1; + int max_loop_count = MAX_LOOP; + + while ((cur_count < scan_count) && max_loop_count) { + sleep(1); + cur_count = read_sysfs(PATH_KSM "full_scans"); + max_loop_count--; + } +#ifdef DEBUG + ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n", + read_sysfs(PATH_KSM "pages_shared"), + read_sysfs(PATH_KSM "pages_sharing")); +#endif +} + +static int check_madvise_options(int mem_type, int mode, int mapping) +{ + char *ptr; + int err, ret; + + err = KSFT_FAIL; + if (access(PATH_KSM, F_OK) == -1) { + ksft_print_msg("ERR: Kernel KSM config not enabled\n"); + return err; + } + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true); + if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + /* Insert same data in all the pages */ + memset(ptr, 'A', TEST_UNIT * page_sz); + ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE); + if (ret) { + ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n"); + goto madvise_err; + } + mte_ksm_scan(); + /* Tagged pages should not merge */ + if ((read_sysfs(PATH_KSM "pages_shared") < 1) || + (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1))) + err = KSFT_PASS; +madvise_err: + mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + /* Enable KSM */ + mte_ksm_setup(); + + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, async mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, async mode and mmap memory\n"); + + mte_ksm_restore(); + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c new file mode 100644 index 000000000000..33b13b86199b --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE +#define TAG_CHECK_ON 0 +#define TAG_CHECK_OFF 1 + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, result, map_size; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n"); + munmap((void *)map_ptr, map_size); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + mte_free_memory((void *)map_ptr, map_size, mem_type, false); + if (result == KSFT_FAIL) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + int result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + UNDERFLOW + OVERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result == KSFT_FAIL) + break; + } + return result; +} + +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, prot_flag, result, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + + prot_flag = PROT_READ | PROT_WRITE; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result != KSFT_PASS) + return KSFT_FAIL; + + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW, fd); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + err = mte_default_setup(); + if (err) + return err; + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); + + mte_disable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); + + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c new file mode 100644 index 000000000000..94d245a0ed56 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT * 2) +#define MTE_LAST_TAG_MASK (0x7FFF) + +static int verify_mte_pointer_validity(char *ptr, int mode) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the validity of the tagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + /* Proceed further for nonzero tags */ + if (!MT_FETCH_TAG((uintptr_t)ptr)) + return KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1); + /* Check the validity outside the range */ + ptr[BUFFER_SIZE] = '2'; + mte_wait_after_trig(); + if (!cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +static int check_single_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { + mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAG(tag)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_multiple_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + unsigned long excl_mask = 0; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) { + excl_mask |= 1 << tag; + mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAGS(excl_mask)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_all_included_tags(int mem_type, int mode) +{ + char *ptr; + int run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* + * Here tag byte can be between 0x0 to 0xF (full allowed range) + * so no need to match so just verify if it is writable. + */ + result = verify_mte_pointer_validity(ptr, mode); + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_none_included_tags(int mem_type, int mode) +{ + char *ptr; + int run; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; run < RUNS; run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Here all tags exluded so tag value generated should be 0 */ + if (MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: included tag value found\n"); + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the write validity of the untagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + break; + } + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check an included tag value with sync mode\n"); + evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check different included tags value with sync mode\n"); + evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check none included tags value with sync mode\n"); + evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check all included tags value with sync mode\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c new file mode 100644 index 000000000000..594e98e76880 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> +#include <sys/mman.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +static size_t page_sz; + +static int check_usermem_access_fault(int mem_type, int mode, int mapping) +{ + int fd, i, err; + char val = 'A'; + size_t len, read_len; + void *ptr, *ptr_next; + + err = KSFT_FAIL; + len = 2 * page_sz; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + for (i = 0; i < len; i++) + write(fd, &val, sizeof(val)); + lseek(fd, 0, 0); + ptr = mte_allocate_memory(len, mem_type, mapping, true); + if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, len); + /* Copy from file into buffer with valid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid || read_len < len) + goto usermem_acc_err; + /* Verify same pattern is read */ + for (i = 0; i < len; i++) + if (*(char *)(ptr + i) != val) + break; + if (i < len) + goto usermem_acc_err; + + /* Tag the next half of memory with different value */ + ptr_next = (void *)((unsigned long)ptr + page_sz); + ptr_next = mte_insert_new_tag(ptr_next); + mte_set_tag_address_range(ptr_next, page_sz); + + lseek(fd, 0, 0); + /* Copy from file into buffer with invalid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + /* + * Accessing user memory in kernel with invalid tag should fail in sync + * mode without fault but may not fail in async mode as per the + * implemented MTE userspace support in Arm64 kernel. + */ + if (mode == MTE_SYNC_ERR && + !cur_mte_cxt.fault_valid && read_len < len) { + err = KSFT_PASS; + } else if (mode == MTE_ASYNC_ERR && + !cur_mte_cxt.fault_valid && read_len == len) { + err = KSFT_PASS; + } +usermem_acc_err: + mte_free_memory((void *)ptr, len, mem_type, true); + close(fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + err = mte_default_setup(); + if (err) + return err; + /* Register signal handlers */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in async mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c new file mode 100644 index 000000000000..39f8908988ea --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/auxvec.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> + +#include <asm/hwcap.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define INIT_BUFFER_SIZE 256 + +struct mte_fault_cxt cur_mte_cxt; +static unsigned int mte_cur_mode; +static unsigned int mte_cur_pstate_tco; + +void mte_default_handler(int signum, siginfo_t *si, void *uc) +{ + unsigned long addr = (unsigned long)si->si_addr; + + if (signum == SIGSEGV) { +#ifdef DEBUG + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); +#endif + if (si->si_code == SEGV_MTEAERR) { + if (cur_mte_cxt.trig_si_code == si->si_code) + cur_mte_cxt.fault_valid = true; + return; + } + /* Compare the context for precise error */ + else if (si->si_code == SEGV_MTESERR) { + if (cur_mte_cxt.trig_si_code == si->si_code && + ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } else { + ksft_print_msg("Invalid MTE synchronous exception caught!\n"); + exit(1); + } + } else { + ksft_print_msg("Unknown SIGSEGV exception caught!\n"); + exit(1); + } + } else if (signum == SIGBUS) { + ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + if ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } + } +} + +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +{ + struct sigaction sa; + + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(signal, &sa, NULL); +} + +void mte_wait_after_trig(void) +{ + sched_yield(); +} + +void *mte_insert_tags(void *ptr, size_t size) +{ + void *tag_ptr; + int align_size; + + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return NULL; + } + align_size = MT_ALIGN_UP(size); + tag_ptr = mte_insert_random_tag(ptr); + mte_set_tag_address_range(tag_ptr, align_size); + return tag_ptr; +} + +void mte_clear_tags(void *ptr, size_t size) +{ + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return; + } + size = MT_ALIGN_UP(size); + ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr); + mte_clear_tag_address_range(ptr, size); +} + +static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, + bool tags, int fd) +{ + void *ptr; + int prot_flag, map_flag; + size_t entire_size = size + range_before + range_after; + + if (mem_type != USE_MALLOC && mem_type != USE_MMAP && + mem_type != USE_MPROTECT) { + ksft_print_msg("FAIL: Invalid allocate request\n"); + return NULL; + } + if (mem_type == USE_MALLOC) + return malloc(entire_size) + range_before; + + prot_flag = PROT_READ | PROT_WRITE; + if (mem_type == USE_MMAP) + prot_flag |= PROT_MTE; + + map_flag = mapping; + if (fd == -1) + map_flag = MAP_ANONYMOUS | map_flag; + if (!(mapping & MAP_SHARED)) + map_flag |= MAP_PRIVATE; + ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0); + if (ptr == MAP_FAILED) { + ksft_print_msg("FAIL: mmap allocation\n"); + return NULL; + } + if (mem_type == USE_MPROTECT) { + if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) { + munmap(ptr, size); + ksft_print_msg("FAIL: mprotect PROT_MTE property\n"); + return NULL; + } + } + if (tags) + ptr = mte_insert_tags(ptr + range_before, size); + return ptr; +} + +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, -1); +} + +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1); +} + +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); +} + +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + int map_size = size + range_before + range_after; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, map_size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, fd); +} + +static void __mte_free_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after, bool tags) +{ + switch (mem_type) { + case USE_MALLOC: + free(ptr - range_before); + break; + case USE_MMAP: + case USE_MPROTECT: + if (tags) + mte_clear_tags(ptr, size); + munmap(ptr - range_before, size + range_before + range_after); + break; + default: + ksft_print_msg("FAIL: Invalid free request\n"); + break; + } +} + +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true); +} + +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags) +{ + __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags); +} + +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range) +{ + cur_mte_cxt.fault_valid = false; + cur_mte_cxt.trig_addr = ptr; + cur_mte_cxt.trig_range = range; + if (mode == MTE_SYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTESERR; + else if (mode == MTE_ASYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTEAERR; + else + cur_mte_cxt.trig_si_code = 0; +} + +int mte_switch_mode(int mte_option, unsigned long incl_mask) +{ + unsigned long en = 0; + + if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR || + mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) { + ksft_print_msg("FAIL: Invalid mte config option\n"); + return -EINVAL; + } + en = PR_TAGGED_ADDR_ENABLE; + if (mte_option == MTE_SYNC_ERR) + en |= PR_MTE_TCF_SYNC; + else if (mte_option == MTE_ASYNC_ERR) + en |= PR_MTE_TCF_ASYNC; + else if (mte_option == MTE_NONE_ERR) + en |= PR_MTE_TCF_NONE; + + en |= (incl_mask << PR_MTE_TAG_SHIFT); + /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ + if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) { + ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n"); + return -EINVAL; + } + return 0; +} + +#define ID_AA64PFR1_MTE_SHIFT 8 +#define ID_AA64PFR1_MTE 2 + +int mte_default_setup(void) +{ + unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long en = 0; + int ret; + + if (!(hwcaps & HWCAP_CPUID)) { + ksft_print_msg("FAIL: CPUID registers unavailable\n"); + return KSFT_FAIL; + } + /* Read ID_AA64PFR1_EL1 register */ + asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory"); + if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) { + ksft_print_msg("FAIL: MTE features unavailable\n"); + return KSFT_SKIP; + } + /* Get current mte mode */ + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); + if (ret < 0) { + ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret); + return KSFT_FAIL; + } + if (ret & PR_MTE_TCF_SYNC) + mte_cur_mode = MTE_SYNC_ERR; + else if (ret & PR_MTE_TCF_ASYNC) + mte_cur_mode = MTE_ASYNC_ERR; + else if (ret & PR_MTE_TCF_NONE) + mte_cur_mode = MTE_NONE_ERR; + + mte_cur_pstate_tco = mte_get_pstate_tco(); + /* Disable PSTATE.TCO */ + mte_disable_pstate_tco(); + return 0; +} + +void mte_restore_setup(void) +{ + mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG); + if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN) + mte_enable_pstate_tco(); + else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS) + mte_disable_pstate_tco(); +} + +int create_temp_file(void) +{ + int fd; + char filename[] = "/dev/shm/tmp_XXXXXX"; + + /* Create a file in the tmpfs filesystem */ + fd = mkstemp(&filename[0]); + if (fd == -1) { + ksft_print_msg("FAIL: Unable to open temporary file\n"); + return 0; + } + unlink(&filename[0]); + return fd; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h new file mode 100644 index 000000000000..195a7d1879e6 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _MTE_COMMON_UTIL_H +#define _MTE_COMMON_UTIL_H + +#include <signal.h> +#include <stdbool.h> +#include <stdlib.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include "mte_def.h" +#include "kselftest.h" + +enum mte_mem_type { + USE_MALLOC, + USE_MMAP, + USE_MPROTECT, +}; + +enum mte_mode { + MTE_NONE_ERR, + MTE_SYNC_ERR, + MTE_ASYNC_ERR, +}; + +struct mte_fault_cxt { + /* Address start which triggers mte tag fault */ + unsigned long trig_addr; + /* Address range for mte tag fault and negative value means underflow */ + ssize_t trig_range; + /* siginfo si code */ + unsigned long trig_si_code; + /* Flag to denote if correct fault caught */ + bool fault_valid; +}; + +extern struct mte_fault_cxt cur_mte_cxt; + +/* MTE utility functions */ +void mte_default_handler(int signum, siginfo_t *si, void *uc); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_wait_after_trig(void); +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after); +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, + bool tags, int fd); +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd); +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags); +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after); +void *mte_insert_tags(void *ptr, size_t size); +void mte_clear_tags(void *ptr, size_t size); +int mte_default_setup(void); +void mte_restore_setup(void); +int mte_switch_mode(int mte_option, unsigned long incl_mask); +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range); + +/* Common utility functions */ +int create_temp_file(void); + +/* Assembly MTE utility functions */ +void *mte_insert_random_tag(void *ptr); +void *mte_insert_new_tag(void *ptr); +void *mte_get_tag_address(void *ptr); +void mte_set_tag_address_range(void *ptr, int range); +void mte_clear_tag_address_range(void *ptr, int range); +void mte_disable_pstate_tco(void); +void mte_enable_pstate_tco(void); +unsigned int mte_get_pstate_tco(void); + +/* Test framework static inline functions/macros */ +static inline void evaluate_test(int err, const char *msg) +{ + if (err == KSFT_PASS) + ksft_test_result_pass(msg); + else if (err == KSFT_FAIL) + ksft_test_result_fail(msg); +} + +static inline int check_allocated_memory(void *ptr, size_t size, + int mem_type, bool tags) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory((void *)ptr, size, mem_type, false); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (!MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before, + range_after); + return KSFT_FAIL; + } + return KSFT_PASS; +} + +#endif /* _MTE_COMMON_UTIL_H */ diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h new file mode 100644 index 000000000000..9b188254b61a --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +/* + * Below definitions may be found in kernel headers, However, they are + * redefined here to decouple the MTE selftests compilations from them. + */ +#ifndef SEGV_MTEAERR +#define SEGV_MTEAERR 8 +#endif +#ifndef SEGV_MTESERR +#define SEGV_MTESERR 9 +#endif +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef PR_MTE_TCF_SHIFT +#define PR_MTE_TCF_SHIFT 1 +#endif +#ifndef PR_MTE_TCF_NONE +#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_SYNC +#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_ASYNC +#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TAG_SHIFT +#define PR_MTE_TAG_SHIFT 3 +#endif + +/* MTE Hardware feature definitions below. */ +#define MT_TAG_SHIFT 56 +#define MT_TAG_MASK 0xFUL +#define MT_FREE_TAG 0x0UL +#define MT_GRANULE_SIZE 16 +#define MT_TAG_COUNT 16 +#define MT_INCLUDE_TAG_MASK 0xFFFF +#define MT_EXCLUDE_TAG_MASK 0x0 + +#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) +#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) +#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT)) +#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) +#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) + +#define MT_PSTATE_TCO_SHIFT 25 +#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) +#define MT_PSTATE_TCO_EN 1 +#define MT_PSTATE_TCO_DIS 0 + +#define MT_EXCLUDE_TAG(x) (1 << (x)) +#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x)) +#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x)) +#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0) diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S new file mode 100644 index 000000000000..a02c04cd0aac --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_helper.S @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#include "mte_def.h" + +#define ENTRY(name) \ + .globl name ;\ + .p2align 2;\ + .type name, @function ;\ +name: + +#define ENDPROC(name) \ + .size name, .-name ; + + .text +/* + * mte_insert_random_tag: Insert random tag and might be same as the source tag if + * the source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_random_tag) + irg x0, x0, xzr + ret +ENDPROC(mte_insert_random_tag) + +/* + * mte_insert_new_tag: Insert new tag and different from the source tag if + * source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_new_tag) + gmi x1, x0, xzr + irg x0, x0, x1 + ret +ENDPROC(mte_insert_new_tag) + +/* + * mte_get_tag_address: Get the tag from given address. + * Input: + * x0 - source pointer + * Return: + * x0 - pointer with appended tag + */ +ENTRY(mte_get_tag_address) + ldg x0, [x0] + ret +ENDPROC(mte_get_tag_address) + +/* + * mte_set_tag_address_range: Set the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_set_tag_address_range) + cbz x1, 2f +1: + stg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_set_tag_address_range) + +/* + * mt_clear_tag_address_range: Clear the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_clear_tag_address_range) + cbz x1, 2f +1: + stzg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_clear_tag_address_range) + +/* + * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_enable_pstate_tco) + msr tco, #MT_PSTATE_TCO_EN + ret +ENDPROC(mte_enable_pstate_tco) + +/* + * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_disable_pstate_tco) + msr tco, #MT_PSTATE_TCO_DIS + ret +ENDPROC(mte_disable_pstate_tco) + +/* + * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * x0 + */ +ENTRY(mte_get_pstate_tco) + mrs x0, tco + ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1 + ret +ENDPROC(mte_get_pstate_tco) diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore new file mode 100644 index 000000000000..155137d92722 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/.gitignore @@ -0,0 +1,2 @@ +exec_target +pac diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile new file mode 100644 index 000000000000..72e290b0b10c --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + +CFLAGS += -mbranch-protection=pac-ret +# check if the compiler supports ARMv8.3 and branch protection with PAuth +pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(pauth_cc_support),1) +TEST_GEN_PROGS := pac +TEST_GEN_FILES := pac_corruptor.o helper.o +TEST_GEN_PROGS_EXTENDED := exec_target +endif + +include ../../lib.mk + +ifeq ($(pauth_cc_support),1) +# pac* and aut* instructions are not available on architectures berfore +# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly +$(OUTPUT)/pac_corruptor.o: pac_corruptor.S + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +$(OUTPUT)/helper.o: helper.c + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or +# greater, gcc emits pac* instructions which are not in HINT NOP space, +# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can +# run on earlier targets and print a meaningful error messages +$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a + +$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a +endif diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c new file mode 100644 index 000000000000..4435600ca400 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/exec_target.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include <stdio.h> +#include <stdlib.h> +#include <sys/auxv.h> + +#include "helper.h" + +int main(void) +{ + struct signatures signed_vals; + unsigned long hwcaps; + size_t val; + + fread(&val, sizeof(size_t), 1, stdin); + + /* don't try to execute illegal (unimplemented) instructions) caller + * should have checked this and keep worker simple + */ + hwcaps = getauxval(AT_HWCAP); + + if (hwcaps & HWCAP_PACA) { + signed_vals.keyia = keyia_sign(val); + signed_vals.keyib = keyib_sign(val); + signed_vals.keyda = keyda_sign(val); + signed_vals.keydb = keydb_sign(val); + } + signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0; + + fwrite(&signed_vals, sizeof(struct signatures), 1, stdout); + + return 0; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c new file mode 100644 index 000000000000..2c201e7d0d50 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include "helper.h" + +size_t keyia_sign(size_t ptr) +{ + asm volatile("paciza %0" : "+r" (ptr)); + return ptr; +} + +size_t keyib_sign(size_t ptr) +{ + asm volatile("pacizb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyda_sign(size_t ptr) +{ + asm volatile("pacdza %0" : "+r" (ptr)); + return ptr; +} + +size_t keydb_sign(size_t ptr) +{ + asm volatile("pacdzb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyg_sign(size_t ptr) +{ + /* output is encoded in the upper 32 bits */ + size_t dest = 0; + size_t modifier = 0; + + asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier)); + + return dest; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h new file mode 100644 index 000000000000..652496c7b411 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _HELPER_H_ +#define _HELPER_H_ + +#include <stdlib.h> + +#define NKEYS 5 + +struct signatures { + size_t keyia; + size_t keyib; + size_t keyda; + size_t keydb; + size_t keyg; +}; + +void pac_corruptor(void); + +/* PAuth sign a value with key ia and modifier value 0 */ +size_t keyia_sign(size_t val); +size_t keyib_sign(size_t val); +size_t keyda_sign(size_t val); +size_t keydb_sign(size_t val); +size_t keyg_sign(size_t val); + +#endif diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c new file mode 100644 index 000000000000..592fe538506e --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <sys/auxv.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <signal.h> +#include <setjmp.h> +#include <sched.h> + +#include "../../kselftest_harness.h" +#include "helper.h" + +#define PAC_COLLISION_ATTEMPTS 10 +/* + * The kernel sets TBID by default. So bits 55 and above should remain + * untouched no matter what. + * The VA space size is 48 bits. Bigger is opt-in. + */ +#define PAC_MASK (~0xff80ffffffffffff) +#define ARBITRARY_VALUE (0x1234) +#define ASSERT_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* data key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \ +} while (0) +#define ASSERT_GENERIC_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* generic key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \ +} while (0) + +void sign_specific(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); +} + +void sign_all(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); + sign->keyg = keyg_sign(val); +} + +int n_same(struct signatures *old, struct signatures *new, int nkeys) +{ + int res = 0; + + res += old->keyia == new->keyia; + res += old->keyib == new->keyib; + res += old->keyda == new->keyda; + res += old->keydb == new->keydb; + if (nkeys == NKEYS) + res += old->keyg == new->keyg; + + return res; +} + +int n_same_single_set(struct signatures *sign, int nkeys) +{ + size_t vals[nkeys]; + int same = 0; + + vals[0] = sign->keyia & PAC_MASK; + vals[1] = sign->keyib & PAC_MASK; + vals[2] = sign->keyda & PAC_MASK; + vals[3] = sign->keydb & PAC_MASK; + + if (nkeys >= 4) + vals[4] = sign->keyg & PAC_MASK; + + for (int i = 0; i < nkeys - 1; i++) { + for (int j = i + 1; j < nkeys; j++) { + if (vals[i] == vals[j]) + same += 1; + } + } + return same; +} + +int exec_sign_all(struct signatures *signed_vals, size_t val) +{ + int new_stdin[2]; + int new_stdout[2]; + int status; + int i; + ssize_t ret; + pid_t pid; + cpu_set_t mask; + + ret = pipe(new_stdin); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + ret = pipe(new_stdout); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + /* + * pin this process and all its children to a single CPU, so it can also + * guarantee a context switch with its child + */ + sched_getaffinity(0, sizeof(mask), &mask); + + for (i = 0; i < sizeof(cpu_set_t); i++) + if (CPU_ISSET(i, &mask)) + break; + + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); + + pid = fork(); + // child + if (pid == 0) { + dup2(new_stdin[0], STDIN_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + dup2(new_stdout[1], STDOUT_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + close(new_stdin[0]); + close(new_stdin[1]); + close(new_stdout[0]); + close(new_stdout[1]); + + ret = execl("exec_target", "exec_target", (char *)NULL); + if (ret == -1) { + perror("exec returned error"); + exit(1); + } + } + + close(new_stdin[0]); + close(new_stdout[1]); + + ret = write(new_stdin[1], &val, sizeof(size_t)); + if (ret == -1) { + perror("write returned error"); + return -1; + } + + /* + * wait for the worker to finish, so that read() reads all data + * will also context switch with worker so that this function can be used + * for context switch tests + */ + waitpid(pid, &status, 0); + if (WIFEXITED(status) == 0) { + fprintf(stderr, "worker exited unexpectedly\n"); + return -1; + } + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "worker exited with error\n"); + return -1; + } + + ret = read(new_stdout[0], signed_vals, sizeof(struct signatures)); + if (ret == -1) { + perror("read returned error"); + return -1; + } + + return 0; +} + +sigjmp_buf jmpbuf; +void pac_signal_handler(int signum, siginfo_t *si, void *uc) +{ + if (signum == SIGSEGV || signum == SIGILL) + siglongjmp(jmpbuf, 1); +} + +/* check that a corrupted PAC results in SIGSEGV or SIGILL */ +TEST(corrupt_pac) +{ + struct sigaction sa; + + ASSERT_PAUTH_ENABLED(); + if (sigsetjmp(jmpbuf, 1) == 0) { + sa.sa_sigaction = pac_signal_handler; + sa.sa_flags = SA_SIGINFO | SA_RESETHAND; + sigemptyset(&sa.sa_mask); + + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + + pac_corruptor(); + ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur"); + } +} + +/* + * There are no separate pac* and aut* controls so checking only the pac* + * instructions is sufficient + */ +TEST(pac_instructions_not_nop) +{ + size_t keyia = 0; + size_t keyib = 0; + size_t keyda = 0; + size_t keydb = 0; + + ASSERT_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + keyia |= keyia_sign(i) & PAC_MASK; + keyib |= keyib_sign(i) & PAC_MASK; + keyda |= keyda_sign(i) & PAC_MASK; + keydb |= keydb_sign(i) & PAC_MASK; + } + + ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing"); + ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing"); + ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing"); + ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing"); +} + +TEST(pac_instructions_not_nop_generic) +{ + size_t keyg = 0; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) + keyg |= keyg_sign(i) & PAC_MASK; + + ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); +} + +TEST(single_thread_different_keys) +{ + int same = 10; + int nkeys = NKEYS; + int tmp; + struct signatures signed_vals; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + /* + * In Linux the PAC field can be up to 7 bits wide. Even if keys are + * different, there is about 5% chance for PACs to collide with + * different addresses. This chance rapidly increases with fewer bits + * allocated for the PAC (e.g. wider address). A comparison of the keys + * directly will be more reliable. + * All signed values need to be different at least once out of n + * attempts to be certain that the keys are different + */ + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + if (nkeys == NKEYS) + sign_all(&signed_vals, i); + else + sign_specific(&signed_vals, i); + + tmp = n_same_single_set(&signed_vals, nkeys); + if (tmp < same) + same = tmp; + } + + ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same); +} + +/* + * fork() does not change keys. Only exec() does so call a worker program. + * Its only job is to sign a value and report back the resutls + */ +TEST(exec_changed_keys) +{ + struct signatures new_keys; + struct signatures old_keys; + int ret; + int same = 10; + int nkeys = NKEYS; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + ret = exec_sign_all(&new_keys, i); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + if (nkeys == NKEYS) + sign_all(&old_keys, i); + else + sign_specific(&old_keys, i); + + ret = n_same(&old_keys, &new_keys, nkeys); + if (ret < same) + same = ret; + } + + ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same); +} + +TEST(context_switch_keep_keys) +{ + int ret; + struct signatures trash; + struct signatures before; + struct signatures after; + + ASSERT_PAUTH_ENABLED(); + + sign_specific(&before, ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + sign_specific(&after, ARBITRARY_VALUE); + + ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching"); + ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching"); + ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching"); + ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching"); +} + +TEST(context_switch_keep_keys_generic) +{ + int ret; + struct signatures trash; + size_t before; + size_t after; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + before = keyg_sign(ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + after = keyg_sign(ARBITRARY_VALUE); + + ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S new file mode 100644 index 000000000000..aa6588050752 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +.global pac_corruptor + +.text +/* + * Corrupting a single bit of the PAC ensures the authentication will fail. It + * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no + * top byte PAC is tested + */ + pac_corruptor: + paciasp + + /* corrupt the top bit of the PAC */ + eor lr, lr, #1 << 53 + + autiasp + ret diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1bb204cee853..9a0946ddb705 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -6,7 +6,6 @@ test_lpm_map test_tag FEATURE-DUMP.libbpf fixdep -test_align test_dev_cgroup /test_progs* test_tcpbpf_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e7a8cf83ba48..fc946b7ac288 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ + test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ @@ -102,7 +102,7 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef include ../lib.mk @@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +$(OUTPUT)/%.o: %.c + $(call msg,CC,,$@) + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) - $(CC) -c $(CFLAGS) -o $@ $< + $(Q)$(CC) -c $(CFLAGS) -o $@ $< VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -142,7 +146,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)/runqslower: $(BPFOBJ) +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool + +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ @@ -164,7 +170,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool @@ -180,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): $(call msg,MKDIR,,$@) - mkdir -p $@ + $(Q)mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) - $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(call msg,CP,,$@) - cp "$(VMLINUX_H)" $@ + $(Q)cp "$(VMLINUX_H)" $@ endif $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ @@ -237,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -300,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): $$(call msg,MKDIR,,$$@) - mkdir -p $$@ + $(Q)mkdir -p $$@ endif # ensure we set up BPF objects generation rule just once for a given @@ -320,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $$(BPFTOOL) gen skeleton $$< > $$@ + $(Q)$$(BPFTOOL) gen skeleton $$< > $$@ endif # ensure we set up tests.h header generation rule just once @@ -344,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ @@ -352,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) - $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - cp -a $$^ $(TRUNNER_OUTPUT)/ + $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ @@ -366,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(RESOLVE_BTFIDS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ endef @@ -420,17 +425,17 @@ verifier/tests.h: verifier/*.c ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) - $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(call msg,CC,,$@) - $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ @@ -443,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 4ffefdc1130f..7375d9a6d242 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -468,6 +468,7 @@ static void test_bpf_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u64 val, expected_val = 0; struct bpf_link *link; struct key_t { @@ -490,13 +491,16 @@ static void test_bpf_hash_map(void) goto out; /* iterator with hashmap2 and hashmap3 should fail */ - opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap2 unexpected succeeded\n")) goto out; - opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap3 unexpected succeeded\n")) @@ -519,7 +523,7 @@ static void test_bpf_hash_map(void) goto out; } - opts.map_fd = map_fd; + linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u32 expected_val = 0; struct bpf_link *link; struct key_t { @@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -649,6 +657,7 @@ static void test_bpf_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); __u32 expected_key = 0, res_first_key; struct bpf_iter_bpf_array_map *skel; + union bpf_iter_link_info linfo; int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; @@ -673,7 +682,10 @@ static void test_bpf_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_array_map *skel; __u32 expected_key = 0, expected_val = 0; + union bpf_iter_link_info linfo; int err, i, j, map_fd, iter_fd; struct bpf_link *link; char buf[64]; @@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, i, len, map_fd, iter_fd, num_sockets; struct bpf_iter_bpf_sk_storage_map *skel; + union bpf_iter_link_info linfo; int sock_fd[3] = {-1, -1, -1}; __u32 val, expected_val = 0; struct bpf_link *link; @@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_test_kern5 *skel; + union bpf_iter_link_info linfo; struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); @@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void) "skeleton open_and_load failed\n")) return; - opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 7afa4160416f..284d5921c345 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -159,15 +159,15 @@ void test_bpf_obj_id(void) /* Check getting link info */ info_len = sizeof(struct bpf_link_info) * 2; bzero(&link_infos[i], info_len); - link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name; + link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name); link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name); err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]), &link_infos[i], &info_len); if (CHECK(err || link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT || link_infos[i].prog_id != prog_infos[i].id || - link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name || - strcmp((char *)link_infos[i].raw_tracepoint.tp_name, + link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) || + strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter") || info_len != sizeof(struct bpf_link_info), "get-link-info(fd)", @@ -178,7 +178,7 @@ void test_bpf_obj_id(void) link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, link_infos[i].id, link_infos[i].prog_id, prog_infos[i].id, - (char *)link_infos[i].raw_tracepoint.tp_name, + (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter")) goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index cb33a7ee4e04..39fb81d9daeb 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -12,15 +12,16 @@ void btf_dump_printf(void *ctx, const char *fmt, va_list args) static struct btf_dump_test_case { const char *name; const char *file; + bool known_ptr_sz; struct btf_dump_opts opts; } btf_dump_test_cases[] = { - {"btf_dump: syntax", "btf_dump_test_case_syntax", {}}, - {"btf_dump: ordering", "btf_dump_test_case_ordering", {}}, - {"btf_dump: padding", "btf_dump_test_case_padding", {}}, - {"btf_dump: packing", "btf_dump_test_case_packing", {}}, - {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}}, - {"btf_dump: multidim", "btf_dump_test_case_multidim", {}}, - {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}}, + {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}}, + {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}}, + {"btf_dump: padding", "btf_dump_test_case_padding", true, {}}, + {"btf_dump: packing", "btf_dump_test_case_packing", true, {}}, + {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}}, + {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}}, + {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}}, }; static int btf_dump_all_types(const struct btf *btf, @@ -62,6 +63,18 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) goto done; } + /* tests with t->known_ptr_sz have no "long" or "unsigned long" type, + * so it's impossible to determine correct pointer size; but if they + * do, it should be 8 regardless of host architecture, becaues BPF + * target is always 64-bit + */ + if (!t->known_ptr_sz) { + btf__set_pointer_size(btf, 8); + } else { + CHECK(btf__pointer_size(btf) != 8, "ptr_sz", "exp %d, got %zu\n", + 8, btf__pointer_size(btf)); + } + snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file); fd = mkstemp(out_file); if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) { diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c index b093787e9448..1931a158510e 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_extern.c +++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c @@ -159,8 +159,8 @@ void test_core_extern(void) exp = (uint64_t *)&t->data; for (j = 0; j < n; j++) { CHECK(got[j] != exp[j], "check_res", - "result #%d: expected %lx, but got %lx\n", - j, exp[j], got[j]); + "result #%d: expected %llx, but got %llx\n", + j, (__u64)exp[j], (__u64)got[j]); } cleanup: test_core_extern__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 084ed26a7d78..a54eafc5e4b3 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -237,8 +237,8 @@ .union_sz = sizeof(((type *)0)->union_field), \ .arr_sz = sizeof(((type *)0)->arr_field), \ .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \ - .ptr_sz = sizeof(((type *)0)->ptr_field), \ - .enum_sz = sizeof(((type *)0)->enum_field), \ + .ptr_sz = 8, /* always 8-byte pointer for BPF */ \ + .enum_sz = sizeof(((type *)0)->enum_field), \ } #define SIZE_CASE(name) { \ @@ -432,20 +432,20 @@ static struct core_reloc_test_case test_cases[] = { .sb4 = -1, .sb20 = -0x17654321, .u32 = 0xBEEF, - .s32 = -0x3FEDCBA987654321, + .s32 = -0x3FEDCBA987654321LL, }), BITFIELDS_CASE(bitfields___bitfield_vs_int, { - .ub1 = 0xFEDCBA9876543210, + .ub1 = 0xFEDCBA9876543210LL, .ub2 = 0xA6, - .ub7 = -0x7EDCBA987654321, - .sb4 = -0x6123456789ABCDE, - .sb20 = 0xD00D, + .ub7 = -0x7EDCBA987654321LL, + .sb4 = -0x6123456789ABCDELL, + .sb20 = 0xD00DLL, .u32 = -0x76543, - .s32 = 0x0ADEADBEEFBADB0B, + .s32 = 0x0ADEADBEEFBADB0BLL, }), BITFIELDS_CASE(bitfields___just_big_enough, { - .ub1 = 0xF, - .ub2 = 0x0812345678FEDCBA, + .ub1 = 0xFLL, + .ub2 = 0x0812345678FEDCBALL, }), BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield), diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index a895bfed55db..197d0d217b56 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -16,7 +16,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, __u32 duration = 0, retval; struct bpf_map *data_map; const int zero = 0; - u64 *result = NULL; + __u64 *result = NULL; err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); @@ -29,7 +29,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, link = calloc(sizeof(struct bpf_link *), prog_cnt); prog = calloc(sizeof(struct bpf_program *), prog_cnt); - result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64)); + result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64)); if (CHECK(!link || !prog || !result, "alloc_memory", "failed to alloc memory")) goto close_prog; @@ -72,7 +72,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; for (i = 0; i < prog_cnt; i++) - if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n", + if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n", result[i])) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index f11f187990e9..cd6dc80edf18 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -591,7 +591,7 @@ void test_flow_dissector(void) CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) || err || tattr.retval != 1, tests[i].name, - "err %d errno %d retval %d duration %d size %u/%lu\n", + "err %d errno %d retval %d duration %d size %u/%zu\n", err, errno, tattr.retval, tattr.duration, tattr.data_size_out, sizeof(flow_keys)); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index e3cb62b0a110..9efa7e50eab2 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -5,7 +5,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration) { int i, err, map_fd; - uint64_t num; + __u64 num; map_fd = bpf_find_map(__func__, obj, "result_number"); if (CHECK_FAIL(map_fd < 0)) @@ -14,7 +14,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration) struct { char *name; uint32_t key; - uint64_t num; + __u64 num; } tests[] = { { "relocate .bss reference", 0, 0 }, { "relocate .data reference", 1, 42 }, @@ -32,7 +32,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration) for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num); CHECK(err || num != tests[i].num, tests[i].name, - "err %d result %lx expected %lx\n", + "err %d result %llx expected %llx\n", err, num, tests[i].num); } } diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 43d0b5578f46..9c3c5c0f068f 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -21,7 +21,7 @@ void test_mmap(void) const long page_size = sysconf(_SC_PAGE_SIZE); int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd; struct bpf_map *data_map, *bss_map; - void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; + void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp0, *tmp1, *tmp2; struct test_mmap__bss *bss_data; struct bpf_map_info map_info; __u32 map_info_sz = sizeof(map_info); @@ -183,16 +183,23 @@ void test_mmap(void) /* check some more advanced mmap() manipulations */ + tmp0 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); + if (CHECK(tmp0 == MAP_FAILED, "adv_mmap0", "errno %d\n", errno)) + goto cleanup; + /* map all but last page: pages 1-3 mapped */ - tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED, + tmp1 = mmap(tmp0, 3 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED, data_map_fd, 0); - if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno)) + if (CHECK(tmp0 != tmp1, "adv_mmap1", "tmp0: %p, tmp1: %p\n", tmp0, tmp1)) { + munmap(tmp0, 4 * page_size); goto cleanup; + } /* unmap second page: pages 1, 3 mapped */ err = munmap(tmp1 + page_size, page_size); if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) { - munmap(tmp1, map_sz); + munmap(tmp1, 4 * page_size); goto cleanup; } @@ -201,7 +208,7 @@ void test_mmap(void) MAP_SHARED | MAP_FIXED, data_map_fd, 0); if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) { munmap(tmp1, page_size); - munmap(tmp1 + 2*page_size, page_size); + munmap(tmp1 + 2*page_size, 2 * page_size); goto cleanup; } CHECK(tmp1 + page_size != tmp2, "adv_mmap4", @@ -211,7 +218,7 @@ void test_mmap(void) tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED, data_map_fd, 0); if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) { - munmap(tmp1, 3 * page_size); /* unmap page 1 */ + munmap(tmp1, 4 * page_size); /* unmap page 1 */ goto cleanup; } CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2); diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c index dde2b7ae7bc9..935a294f049a 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c @@ -28,7 +28,7 @@ void test_prog_run_xattr(void) "err %d errno %d retval %d\n", err, errno, tattr.retval); CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", - "incorrect output size, want %lu have %u\n", + "incorrect output size, want %zu have %u\n", sizeof(pkt_v4), tattr.data_size_out); CHECK_ATTR(buf[5] != 0, "overflow", diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 504abb7bfb95..7043e6ded0e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr, close(pipe_p2c[1]); /* close write */ /* notify parent signal handler is installed */ - write(pipe_c2p[1], buf, 1); + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* make sure parent enabled bpf program to send_signal */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* wait a little for signal handler */ sleep(1); - if (sigusr1_received) - write(pipe_c2p[1], "2", 1); - else - write(pipe_c2p[1], "0", 1); + buf[0] = sigusr1_received ? '2' : '0'; + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for parent notification and exit */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, } /* wait until child signal handler installed */ - read(pipe_c2p[0], buf, 1); + CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* trigger the bpf send_signal */ skel->bss->pid = pid; @@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for result */ err = read(pipe_c2p[0], buf, 1); @@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); disable_pmu: close(pmu_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index c571584c00f5..9ff0412e1fd3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -309,6 +309,7 @@ static void v4_to_v6(struct sockaddr_storage *ss) v6->sin6_addr.s6_addr[10] = 0xff; v6->sin6_addr.s6_addr[11] = 0xff; memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4); + memset(&v6->sin6_addr.s6_addr[0], 0, 10); } static int udp_recv_send(int server_fd) diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index 25de86af2d03..fafeddaad6a9 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -81,7 +81,7 @@ void test_skb_ctx(void) CHECK_ATTR(tattr.ctx_size_out != sizeof(skb), "ctx_size_out", - "incorrect output size, want %lu have %u\n", + "incorrect output size, want %zu have %u\n", sizeof(skb), tattr.ctx_size_out); for (i = 0; i < 5; i++) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f002e3090d92..11a769e18f5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void) { __u64 sample_freq = 5000; /* fallback to 5000 on error */ FILE *f; + __u32 duration = 0; f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); if (f == NULL) return sample_freq; - fscanf(f, "%llu", &sample_freq); + CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate", + "return default value: 5000,err %d\n", -errno); fclose(f); return sample_freq; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 25b068591e9a..193002b14d7f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -19,7 +19,7 @@ static int libbpf_debug_print(enum libbpf_print_level level, log_buf = va_arg(args, char *); if (!log_buf) goto out; - if (strstr(log_buf, err_str) == 0) + if (err_str && strstr(log_buf, err_str) == 0) found = true; out: printf(format, log_buf); diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c index c75525eab02c..dd324b4933db 100644 --- a/tools/testing/selftests/bpf/prog_tests/varlen.c +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -44,25 +44,25 @@ void test_varlen(void) CHECK_VAL(bss->payload1_len2, size2); CHECK_VAL(bss->total1, size1 + size2); CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check", - "doesn't match!"); + "doesn't match!\n"); CHECK_VAL(data->payload2_len1, size1); CHECK_VAL(data->payload2_len2, size2); CHECK_VAL(data->total2, size1 + size2); CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check", - "doesn't match!"); + "doesn't match!\n"); CHECK_VAL(data->payload3_len1, size1); CHECK_VAL(data->payload3_len2, size2); CHECK_VAL(data->total3, size1 + size2); CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check", - "doesn't match!"); + "doesn't match!\n"); CHECK_VAL(data->payload4_len1, size1); CHECK_VAL(data->payload4_len2, size2); CHECK_VAL(data->total4, size1 + size2); CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", - "doesn't match!"); + "doesn't match!\n"); cleanup: test_varlen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index 07ddbfdbcab7..6dfce3fd68bc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -47,7 +47,10 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) __u32 seq_num = ctx->meta->seq_num; struct bpf_map *map = ctx->map; struct key_t *key = ctx->key; + struct key_t tmp_key; __u64 *val = ctx->value; + __u64 tmp_val = 0; + int ret; if (in_test_mode) { /* test mode is used by selftests to @@ -61,6 +64,18 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) if (key == (void *)0 || val == (void *)0) return 0; + /* update the value and then delete the <key, value> pair. + * it should not impact the existing 'val' which is still + * accessible under rcu. + */ + __builtin_memcpy(&tmp_key, key, sizeof(struct key_t)); + ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0); + if (ret) + return 0; + ret = bpf_map_delete_elem(&hashmap1, &tmp_key); + if (ret) + return 0; + key_sum_a += key->a; key_sum_b += key->b; key_sum_c += key->c; diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 34d84717c946..69139ed66216 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -1,5 +1,10 @@ #include <stdint.h> #include <stdbool.h> + +void preserce_ptr_sz_fn(long x) {} + +#define __bpf_aligned __attribute__((aligned(8))) + /* * KERNEL */ @@ -444,51 +449,51 @@ struct core_reloc_primitives { char a; int b; enum core_reloc_primitives_enum c; - void *d; - int (*f)(const char *); + void *d __bpf_aligned; + int (*f)(const char *) __bpf_aligned; }; struct core_reloc_primitives___diff_enum_def { char a; int b; - void *d; - int (*f)(const char *); + void *d __bpf_aligned; + int (*f)(const char *) __bpf_aligned; enum { X = 100, Y = 200, - } c; /* inline enum def with differing set of values */ + } c __bpf_aligned; /* inline enum def with differing set of values */ }; struct core_reloc_primitives___diff_func_proto { - void (*f)(int); /* incompatible function prototype */ - void *d; - enum core_reloc_primitives_enum c; + void (*f)(int) __bpf_aligned; /* incompatible function prototype */ + void *d __bpf_aligned; + enum core_reloc_primitives_enum c __bpf_aligned; int b; char a; }; struct core_reloc_primitives___diff_ptr_type { - const char * const d; /* different pointee type + modifiers */ - char a; + const char * const d __bpf_aligned; /* different pointee type + modifiers */ + char a __bpf_aligned; int b; enum core_reloc_primitives_enum c; - int (*f)(const char *); + int (*f)(const char *) __bpf_aligned; }; struct core_reloc_primitives___err_non_enum { char a[1]; int b; int c; /* int instead of enum */ - void *d; - int (*f)(const char *); + void *d __bpf_aligned; + int (*f)(const char *) __bpf_aligned; }; struct core_reloc_primitives___err_non_int { char a[1]; - int *b; /* ptr instead of int */ - enum core_reloc_primitives_enum c; - void *d; - int (*f)(const char *); + int *b __bpf_aligned; /* ptr instead of int */ + enum core_reloc_primitives_enum c __bpf_aligned; + void *d __bpf_aligned; + int (*f)(const char *) __bpf_aligned; }; struct core_reloc_primitives___err_non_ptr { @@ -496,7 +501,7 @@ struct core_reloc_primitives___err_non_ptr { int b; enum core_reloc_primitives_enum c; int d; /* int instead of ptr */ - int (*f)(const char *); + int (*f)(const char *) __bpf_aligned; }; /* @@ -507,7 +512,7 @@ struct core_reloc_mods_output { }; typedef const int int_t; -typedef const char *char_ptr_t; +typedef const char *char_ptr_t __bpf_aligned; typedef const int arr_t[7]; struct core_reloc_mods_substruct { @@ -523,9 +528,9 @@ typedef struct { struct core_reloc_mods { int a; int_t b; - char *c; + char *c __bpf_aligned; char_ptr_t d; - int e[3]; + int e[3] __bpf_aligned; arr_t f; struct core_reloc_mods_substruct g; core_reloc_mods_substruct_t h; @@ -535,9 +540,9 @@ struct core_reloc_mods { struct core_reloc_mods___mod_swap { int b; int_t a; - char *d; + char *d __bpf_aligned; char_ptr_t c; - int f[3]; + int f[3] __bpf_aligned; arr_t e; struct { int y; @@ -555,7 +560,7 @@ typedef arr1_t arr2_t; typedef arr2_t arr3_t; typedef arr3_t arr4_t; -typedef const char * const volatile fancy_char_ptr_t; +typedef const char * const volatile fancy_char_ptr_t __bpf_aligned; typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt; @@ -567,7 +572,7 @@ struct core_reloc_mods___typedefs { arr4_t e; fancy_char_ptr_t d; fancy_char_ptr_t c; - int3_t b; + int3_t b __bpf_aligned; int3_t a; }; @@ -739,19 +744,19 @@ struct core_reloc_bitfields___bit_sz_change { int8_t sb4: 1; /* 4 -> 1 */ int32_t sb20: 30; /* 20 -> 30 */ /* non-bitfields */ - uint16_t u32; /* 32 -> 16 */ - int64_t s32; /* 32 -> 64 */ + uint16_t u32; /* 32 -> 16 */ + int64_t s32 __bpf_aligned; /* 32 -> 64 */ }; /* turn bitfield into non-bitfield and vice versa */ struct core_reloc_bitfields___bitfield_vs_int { uint64_t ub1; /* 3 -> 64 non-bitfield */ uint8_t ub2; /* 20 -> 8 non-bitfield */ - int64_t ub7; /* 7 -> 64 non-bitfield signed */ - int64_t sb4; /* 4 -> 64 non-bitfield signed */ - uint64_t sb20; /* 20 -> 16 non-bitfield unsigned */ - int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */ - uint64_t s32: 60; /* 32 non-bitfield -> 60 bitfield */ + int64_t ub7 __bpf_aligned; /* 7 -> 64 non-bitfield signed */ + int64_t sb4 __bpf_aligned; /* 4 -> 64 non-bitfield signed */ + uint64_t sb20 __bpf_aligned; /* 20 -> 16 non-bitfield unsigned */ + int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */ + uint64_t s32: 60 __bpf_aligned; /* 32 non-bitfield -> 60 bitfield */ }; struct core_reloc_bitfields___just_big_enough { diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 1f1966e86e9f..3e6912e4df3d 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -54,6 +54,7 @@ SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)]; + struct bpf_sock_ops *reuse = skops; struct tcphdr *thdr; int good_call_rv = 0; int bad_call_rv = 0; @@ -62,6 +63,46 @@ int bpf_testcb(struct bpf_sock_ops *skops) int v = 0; int op; + /* Test reading fields in bpf_sock_ops using single register */ + asm volatile ( + "%[reuse] = *(u32 *)(%[reuse] +96)" + : [reuse] "+r"(reuse) + :); + + asm volatile ( + "%[op] = *(u32 *)(%[skops] +96)" + : [op] "+r"(op) + : [skops] "r"(skops) + :); + + asm volatile ( + "r9 = %[skops];\n" + "r8 = *(u32 *)(r9 +164);\n" + "*(u32 *)(r9 +164) = r8;\n" + :: [skops] "r"(skops) + : "r9", "r8"); + + asm volatile ( + "r1 = %[skops];\n" + "r1 = *(u64 *)(r1 +184);\n" + "if r1 == 0 goto +1;\n" + "r1 = *(u32 *)(r1 +4);\n" + :: [skops] "r"(skops):"r1"); + + asm volatile ( + "r9 = %[skops];\n" + "r9 = *(u64 *)(r9 +184);\n" + "if r9 == 0 goto +1;\n" + "r9 = *(u32 *)(r9 +4);\n" + :: [skops] "r"(skops):"r9"); + + asm volatile ( + "r1 = %[skops];\n" + "r2 = *(u64 *)(r1 +184);\n" + "if r2 == 0 goto +1;\n" + "r2 = *(u32 *)(r2 +4);\n" + :: [skops] "r"(skops):"r1", "r2"); + op = (int) skops->op; update_event_map(op); diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index cd4b72c55dfe..913acdffd90f 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -15,9 +15,9 @@ int test_pid = 0; bool capture = false; /* .bss */ -long payload1_len1 = 0; -long payload1_len2 = 0; -long total1 = 0; +__u64 payload1_len1 = 0; +__u64 payload1_len2 = 0; +__u64 total1 = 0; char payload1[MAX_LEN + MAX_LEN] = {}; /* .data */ diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/bpf/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 305fae8f80a9..c75fc6447186 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -3883,7 +3883,7 @@ static int test_big_btf_info(unsigned int test_num) info_garbage.garbage = 0; err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); if (CHECK(err || info_len != sizeof(*info), - "err:%d errno:%d info_len:%u sizeof(*info):%lu", + "err:%d errno:%d info_len:%u sizeof(*info):%zu", err, errno, info_len, sizeof(*info))) { err = -1; goto done; @@ -4094,7 +4094,7 @@ static int do_test_get_info(unsigned int test_num) if (CHECK(err || !info.id || info_len != sizeof(info) || info.btf_size != raw_btf_size || (ret = memcmp(raw_btf, user_btf, expected_nbytes)), - "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d", + "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%zu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d", err, errno, info.id, info_len, sizeof(info), raw_btf_size, info.btf_size, expected_nbytes, ret)) { err = -1; @@ -4730,7 +4730,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, nexpected_line = snprintf(expected_line, line_size, "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," - "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," + "{%llu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," "%u,0x%x,[[%d,%d],[%d,%d]]}\n", percpu_map ? "\tcpu" : "", percpu_map ? cpu : next_key, @@ -4738,7 +4738,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, v->unused_bits2a, v->bits28, v->unused_bits2b, - v->ui64, + (__u64)v->ui64, v->ui8a[0], v->ui8a[1], v->ui8a[2], v->ui8a[3], v->ui8a[4], v->ui8a[5], diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 754cf611723e..0d92ebcb335d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1274,6 +1274,8 @@ static void __run_parallel(unsigned int tasks, pid_t pid[tasks]; int i; + fflush(stdout); + for (i = 0; i < tasks; i++) { pid[i] = fork(); if (pid[i] == 0) { diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index b1e4dadacd9b..22943b58d752 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -618,7 +618,9 @@ int cd_flavor_subdir(const char *exec_name) if (!flavor) return 0; flavor++; - fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); + if (env.verbosity > VERBOSE_NONE) + fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); + return chdir(flavor); } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 6e09bf738473..dbb820dde138 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -135,6 +135,11 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +static inline void *u64_to_ptr(__u64 ptr) +{ + return (void *) (unsigned long) ptr; +} + int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 8549b31716ab..73da7fe8c152 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -124,17 +124,24 @@ int main(int argc, char **argv) sprintf(test_script, "iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } sprintf(test_script, "nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); - system(test_script); + if (system(test_script)) + printf("execute command: %s, err %d\n", test_script, -errno); sprintf(test_script, "iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g); if (rv != 0) { diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index aa6de65b0838..84cfcabea838 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -2,3 +2,4 @@ test_memcontrol test_core test_freezer +test_kmem
\ No newline at end of file diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 967f268fde74..f027d933595b 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -6,11 +6,13 @@ all: TEST_FILES := with_stress.sh TEST_PROGS := test_stress.sh TEST_GEN_PROGS = test_memcontrol +TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer include ../lib.mk $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h $(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h $(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 8a637ca7d73a..05853b0b8831 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -106,7 +106,7 @@ int cg_read_strcmp(const char *cgroup, const char *control, /* Handle the case of comparing against empty string */ if (!expected) - size = 32; + return -1; else size = strlen(expected) + 1; diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c new file mode 100644 index 000000000000..0941aa16157e --- /dev/null +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/wait.h> +#include <errno.h> +#include <sys/sysinfo.h> +#include <pthread.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + + +/* + * Memory cgroup charging and vmstat data aggregation is performed using + * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum + * discrepancy between charge and vmstat entries is number of cpus multiplied + * by 32 pages multiplied by 2. + */ +#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs()) + + +static int alloc_dcache(const char *cgroup, void *arg) +{ + unsigned long i; + struct stat st; + char buf[128]; + + for (i = 0; i < (unsigned long)arg; i++) { + snprintf(buf, sizeof(buf), + "/something-non-existent-with-a-long-name-%64lu-%d", + i, getpid()); + stat(buf, &st); + } + + return 0; +} + +/* + * This test allocates 100000 of negative dentries with long names. + * Then it checks that "slab" in memory.stat is larger than 1M. + * Then it sets memory.high to 1M and checks that at least 1/2 + * of slab memory has been reclaimed. + */ +static int test_kmem_basic(const char *root) +{ + int ret = KSFT_FAIL; + char *cg = NULL; + long slab0, slab1, current; + + cg = cg_name(root, "kmem_basic_test"); + if (!cg) + goto cleanup; + + if (cg_create(cg)) + goto cleanup; + + if (cg_run(cg, alloc_dcache, (void *)100000)) + goto cleanup; + + slab0 = cg_read_key_long(cg, "memory.stat", "slab "); + if (slab0 < (1 << 20)) + goto cleanup; + + cg_write(cg, "memory.high", "1M"); + slab1 = cg_read_key_long(cg, "memory.stat", "slab "); + if (slab1 <= 0) + goto cleanup; + + current = cg_read_long(cg, "memory.current"); + if (current <= 0) + goto cleanup; + + if (slab1 < slab0 / 2 && current < slab0 / 2) + ret = KSFT_PASS; +cleanup: + cg_destroy(cg); + free(cg); + + return ret; +} + +static void *alloc_kmem_fn(void *arg) +{ + alloc_dcache(NULL, (void *)100); + return NULL; +} + +static int alloc_kmem_smp(const char *cgroup, void *arg) +{ + int nr_threads = 2 * get_nprocs(); + pthread_t *tinfo; + unsigned long i; + int ret = -1; + + tinfo = calloc(nr_threads, sizeof(pthread_t)); + if (tinfo == NULL) + return -1; + + for (i = 0; i < nr_threads; i++) { + if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn, + (void *)i)) { + free(tinfo); + return -1; + } + } + + for (i = 0; i < nr_threads; i++) { + ret = pthread_join(tinfo[i], NULL); + if (ret) + break; + } + + free(tinfo); + return ret; +} + +static int cg_run_in_subcgroups(const char *parent, + int (*fn)(const char *cgroup, void *arg), + void *arg, int times) +{ + char *child; + int i; + + for (i = 0; i < times; i++) { + child = cg_name_indexed(parent, "child", i); + if (!child) + return -1; + + if (cg_create(child)) { + cg_destroy(child); + free(child); + return -1; + } + + if (cg_run(child, fn, NULL)) { + cg_destroy(child); + free(child); + return -1; + } + + cg_destroy(child); + free(child); + } + + return 0; +} + +/* + * The test creates and destroys a large number of cgroups. In each cgroup it + * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS + * threads. Then it checks the sanity of numbers on the parent level: + * the total size of the cgroups should be roughly equal to + * anon + file + slab + kernel_stack. + */ +static int test_kmem_memcg_deletion(const char *root) +{ + long current, slab, anon, file, kernel_stack, sum; + int ret = KSFT_FAIL; + char *parent; + + parent = cg_name(root, "kmem_memcg_deletion_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100)) + goto cleanup; + + current = cg_read_long(parent, "memory.current"); + slab = cg_read_key_long(parent, "memory.stat", "slab "); + anon = cg_read_key_long(parent, "memory.stat", "anon "); + file = cg_read_key_long(parent, "memory.stat", "file "); + kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); + if (current < 0 || slab < 0 || anon < 0 || file < 0 || + kernel_stack < 0) + goto cleanup; + + sum = slab + anon + file + kernel_stack; + if (abs(sum - current) < MAX_VMSTAT_ERROR) { + ret = KSFT_PASS; + } else { + printf("memory.current = %ld\n", current); + printf("slab + anon + file + kernel_stack = %ld\n", sum); + printf("slab = %ld\n", slab); + printf("anon = %ld\n", anon); + printf("file = %ld\n", file); + printf("kernel_stack = %ld\n", kernel_stack); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + +/* + * The test reads the entire /proc/kpagecgroup. If the operation went + * successfully (and the kernel didn't panic), the test is treated as passed. + */ +static int test_kmem_proc_kpagecgroup(const char *root) +{ + unsigned long buf[128]; + int ret = KSFT_FAIL; + ssize_t len; + int fd; + + fd = open("/proc/kpagecgroup", O_RDONLY); + if (fd < 0) + return ret; + + do { + len = read(fd, buf, sizeof(buf)); + } while (len > 0); + + if (len == 0) + ret = KSFT_PASS; + + close(fd); + return ret; +} + +static void *pthread_wait_fn(void *arg) +{ + sleep(100); + return NULL; +} + +static int spawn_1000_threads(const char *cgroup, void *arg) +{ + int nr_threads = 1000; + pthread_t *tinfo; + unsigned long i; + long stack; + int ret = -1; + + tinfo = calloc(nr_threads, sizeof(pthread_t)); + if (tinfo == NULL) + return -1; + + for (i = 0; i < nr_threads; i++) { + if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn, + (void *)i)) { + free(tinfo); + return(-1); + } + } + + stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack "); + if (stack >= 4096 * 1000) + ret = 0; + + free(tinfo); + return ret; +} + +/* + * The test spawns a process, which spawns 1000 threads. Then it checks + * that memory.stat's kernel_stack is at least 1000 pages large. + */ +static int test_kmem_kernel_stacks(const char *root) +{ + int ret = KSFT_FAIL; + char *cg = NULL; + + cg = cg_name(root, "kmem_kernel_stacks_test"); + if (!cg) + goto cleanup; + + if (cg_create(cg)) + goto cleanup; + + if (cg_run(cg, spawn_1000_threads, NULL)) + goto cleanup; + + ret = KSFT_PASS; +cleanup: + cg_destroy(cg); + free(cg); + + return ret; +} + +/* + * This test sequentionally creates 30 child cgroups, allocates some + * kernel memory in each of them, and deletes them. Then it checks + * that the number of dying cgroups on the parent level is 0. + */ +static int test_kmem_dead_cgroups(const char *root) +{ + int ret = KSFT_FAIL; + char *parent; + long dead; + int i; + + parent = cg_name(root, "kmem_dead_cgroups_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) + goto cleanup; + + for (i = 0; i < 5; i++) { + dead = cg_read_key_long(parent, "cgroup.stat", + "nr_dying_descendants "); + if (dead == 0) { + ret = KSFT_PASS; + break; + } + /* + * Reclaiming cgroups might take some time, + * let's wait a bit and repeat. + */ + sleep(1); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + +/* + * This test creates a sub-tree with 1000 memory cgroups. + * Then it checks that the memory.current on the parent level + * is greater than 0 and approximates matches the percpu value + * from memory.stat. + */ +static int test_percpu_basic(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child; + long current, percpu; + int i; + + parent = cg_name(root, "percpu_basic_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + if (!child) + return -1; + + if (cg_create(child)) + goto cleanup_children; + + free(child); + } + + current = cg_read_long(parent, "memory.current"); + percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + + if (current > 0 && percpu > 0 && abs(current - percpu) < + MAX_VMSTAT_ERROR) + ret = KSFT_PASS; + else + printf("memory.current %ld\npercpu %ld\n", + current, percpu); + +cleanup_children: + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + cg_destroy(child); + free(child); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + +#define T(x) { x, #x } +struct kmem_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_kmem_basic), + T(test_kmem_memcg_deletion), + T(test_kmem_proc_kpagecgroup), + T(test_kmem_kernel_stacks), + T(test_kmem_dead_cgroups), + T(test_percpu_basic), +}; +#undef T + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + /* + * Check that memory controller is available: + * memory is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index b7e6dec36173..42be3b925830 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -20,13 +20,6 @@ #include "../kselftest.h" #include "clone3_selftests.h" -/* - * Different sizes of struct clone_args - */ -#ifndef CLONE3_ARGS_SIZE_V0 -#define CLONE3_ARGS_SIZE_V0 64 -#endif - enum test_mode { CLONE3_ARGS_NO_TEST, CLONE3_ARGS_ALL_0, @@ -38,13 +31,13 @@ enum test_mode { static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) { - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, }; struct clone_args_extended { - struct clone_args args; + struct __clone_args args; __aligned_u64 excess_space[2]; } args_ext; @@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) int status; memset(&args_ext, 0, sizeof(args_ext)); - if (size > sizeof(struct clone_args)) + if (size > sizeof(struct __clone_args)) args_ext.excess_space[1] = 1; if (size == 0) - size = sizeof(struct clone_args); + size = sizeof(struct __clone_args); switch (test_mode) { case CLONE3_ARGS_ALL_0: @@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) break; } - memcpy(&args_ext.args, &args, sizeof(struct clone_args)); + memcpy(&args_ext.args, &args, sizeof(struct __clone_args)); - pid = sys_clone3((struct clone_args *)&args_ext, size); + pid = sys_clone3((struct __clone_args *)&args_ext, size); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); @@ -144,14 +137,14 @@ int main(int argc, char *argv[]) else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */ - test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */ + test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */ - test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */ + test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 */ - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); /* Do a clone3() with exit_signal having highest 32 bits non-zero */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG); @@ -165,31 +158,31 @@ int main(int argc, char *argv[]) /* Do a clone3() with NSIG < exit_signal < CSIG */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG); - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG, CLONE3_ARGS_ALL_0); /* Do a clone3() with > page size */ test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */ + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */ if (uid == 0) - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0, + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */ - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */ + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */ if (uid == 0) - test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0, + test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 9562425aa0a9..55bd387ce7ec 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { TH_LOG("%s - Failed to create new process", strerror(errno)); return -errno; diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c index db5fc9c5edcf..47a8c0fc3676 100644 --- a/tools/testing/selftests/clone3/clone3_clear_sighand.c +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void) { int ret; pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; struct sigaction act; /* diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index 91c1a78ddb39..e81ffaaee02b 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -19,13 +19,11 @@ #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ #endif -#ifndef CLONE_ARGS_SIZE_VER0 -#define CLONE_ARGS_SIZE_VER0 64 -#endif - #ifndef __NR_clone3 #define __NR_clone3 -1 -struct clone_args { +#endif + +struct __clone_args { __aligned_u64 flags; __aligned_u64 pidfd; __aligned_u64 child_tid; @@ -34,15 +32,21 @@ struct clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; -#define CLONE_ARGS_SIZE_VER1 80 +#ifndef CLONE_ARGS_SIZE_VER0 +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +#endif __aligned_u64 set_tid; __aligned_u64 set_tid_size; -#define CLONE_ARGS_SIZE_VER2 88 +#ifndef CLONE_ARGS_SIZE_VER1 +#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#endif __aligned_u64 cgroup; +#ifndef CLONE_ARGS_SIZE_VER2 +#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ +#endif }; -#endif /* __NR_clone3 */ -static pid_t sys_clone3(struct clone_args *args, size_t size) +static pid_t sys_clone3(struct __clone_args *args, size_t size) { fflush(stdout); fflush(stderr); @@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size) static inline void test_clone3_supported(void) { pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; if (__NR_clone3 < 0) ksft_exit_skip("clone3() syscall is not supported\n"); diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c index 5831c1082d6d..0229e9ebb995 100644 --- a/tools/testing/selftests/clone3/clone3_set_tid.c +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 94b02a18f230..344a99c6da1b 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -10,3 +10,4 @@ execveat.denatured /recursion-depth xxxxxxxx* pipe +S_I*.test diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 4453b8f8def3..0a13b110c1e6 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script +TEST_PROGS := binfmt_script non-regular TEST_GEN_PROGS := execveat TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe # Makefile is a run-time dependency, since it's accessed by the execveat test @@ -11,7 +11,8 @@ TEST_FILES := Makefile TEST_GEN_PROGS += recursion-depth -EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* +EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \ + $(OUTPUT)/S_I*.test include ../lib.mk diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c new file mode 100644 index 000000000000..cd3a34aca93e --- /dev/null +++ b/tools/testing/selftests/exec/non-regular.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/types.h> + +#include "../kselftest_harness.h" + +/* Remove a file, ignoring the result if it didn't exist. */ +void rm(struct __test_metadata *_metadata, const char *pathname, + int is_dir) +{ + int rc; + + if (is_dir) + rc = rmdir(pathname); + else + rc = unlink(pathname); + + if (rc < 0) { + ASSERT_EQ(errno, ENOENT) { + TH_LOG("Not ENOENT: %s", pathname); + } + } else { + ASSERT_EQ(rc, 0) { + TH_LOG("Failed to remove: %s", pathname); + } + } +} + +FIXTURE(file) { + char *pathname; + int is_dir; +}; + +FIXTURE_VARIANT(file) +{ + const char *name; + int expected; + int is_dir; + void (*setup)(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant); + int major, minor, mode; /* for mknod() */ +}; + +void setup_link(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + const char * const paths[] = { + "/bin/true", + "/usr/bin/true", + }; + int i; + + for (i = 0; i < ARRAY_SIZE(paths); i++) { + if (access(paths[i], X_OK) == 0) { + ASSERT_EQ(symlink(paths[i], self->pathname), 0); + return; + } + } + ASSERT_EQ(1, 0) { + TH_LOG("Could not find viable 'true' binary"); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFLNK) +{ + .name = "S_IFLNK", + .expected = ELOOP, + .setup = setup_link, +}; + +void setup_dir(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkdir(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFDIR) +{ + .name = "S_IFDIR", + .is_dir = 1, + .expected = EACCES, + .setup = setup_dir, +}; + +void setup_node(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + dev_t dev; + int rc; + + dev = makedev(variant->major, variant->minor); + rc = mknod(self->pathname, 0755 | variant->mode, dev); + ASSERT_EQ(rc, 0) { + if (errno == EPERM) + SKIP(return, "Please run as root; cannot mknod(%s)", + variant->name); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFBLK) +{ + .name = "S_IFBLK", + .expected = EACCES, + .setup = setup_node, + /* /dev/loop0 */ + .major = 7, + .minor = 0, + .mode = S_IFBLK, +}; + +FIXTURE_VARIANT_ADD(file, S_IFCHR) +{ + .name = "S_IFCHR", + .expected = EACCES, + .setup = setup_node, + /* /dev/zero */ + .major = 1, + .minor = 5, + .mode = S_IFCHR, +}; + +void setup_fifo(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkfifo(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFIFO) +{ + .name = "S_IFIFO", + .expected = EACCES, + .setup = setup_fifo, +}; + +FIXTURE_SETUP(file) +{ + ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6); + self->is_dir = variant->is_dir; + + rm(_metadata, self->pathname, variant->is_dir); + variant->setup(_metadata, self, variant); +} + +FIXTURE_TEARDOWN(file) +{ + rm(_metadata, self->pathname, self->is_dir); +} + +TEST_F(file, exec_errno) +{ + char * const argv[2] = { (char * const)self->pathname, NULL }; + + EXPECT_LT(execv(argv[0], argv), 0); + EXPECT_EQ(errno, variant->expected); +} + +/* S_IFSOCK */ +FIXTURE(sock) +{ + int fd; +}; + +FIXTURE_SETUP(sock) +{ + self->fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd, 0); +} + +FIXTURE_TEARDOWN(sock) +{ + if (self->fd >= 0) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(sock, exec_errno) +{ + char * const argv[2] = { " magic socket ", NULL }; + char * const envp[1] = { NULL }; + + EXPECT_LT(fexecve(self->fd, argv, envp), 0); + EXPECT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index fcc281373b4d..c2a2a100114b 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -149,6 +149,26 @@ config_unset_into_buf() echo 0 > $DIR/config_into_buf } +config_set_buf_size() +{ + echo $1 > $DIR/config_buf_size +} + +config_set_file_offset() +{ + echo $1 > $DIR/config_file_offset +} + +config_set_partial() +{ + echo 1 > $DIR/config_partial +} + +config_unset_partial() +{ + echo 0 > $DIR/config_partial +} + config_set_sync_direct() { echo 1 > $DIR/config_sync_direct @@ -207,6 +227,35 @@ read_firmwares() done } +read_partial_firmwares() +{ + if [ "$(cat $DIR/config_into_buf)" == "1" ]; then + fwfile="${FW_INTO_BUF}" + else + fwfile="${FW}" + fi + + if [ "$1" = "xzonly" ]; then + fwfile="${fwfile}-orig" + fi + + # Strip fwfile down to match partial offset and length + partial_data="$(cat $fwfile)" + partial_data="${partial_data:$2:$3}" + + for i in $(seq 0 3); do + config_set_read_fw_idx $i + + read_firmware="$(cat $DIR/read_firmware)" + + # Verify the contents are what we expect. + if [ $read_firmware != $partial_data ]; then + echo "request #$i: partial firmware was not loaded" >&2 + exit 1 + fi + done +} + read_firmwares_expect_nofile() { for i in $(seq 0 3); do @@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile() echo "OK" } +test_request_partial_firmware_into_buf_nofile() +{ + echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: " + config_reset + config_set_name nope-test-firmware.bin + config_set_into_buf + config_set_partial + config_set_buf_size $2 + config_set_file_offset $1 + config_trigger_sync + read_firmwares_expect_nofile + release_all_firmware + echo "OK" +} + test_batched_request_firmware_direct_nofile() { echo -n "Batched request_firmware_direct() nofile try #$1: " @@ -356,6 +420,21 @@ test_request_firmware_nowait_custom() echo "OK" } +test_request_partial_firmware_into_buf() +{ + echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: " + config_reset + config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME + config_set_into_buf + config_set_partial + config_set_buf_size $2 + config_set_file_offset $1 + config_trigger_sync + read_partial_firmwares normal $1 $2 + release_all_firmware + echo "OK" +} + # Only continue if batched request triggers are present on the # test-firmware driver test_config_present @@ -383,6 +462,12 @@ for i in $(seq 1 5); do test_request_firmware_nowait_custom $i normal done +# Partial loads cannot use fallback, so do not repeat tests. +test_request_partial_firmware_into_buf 0 10 +test_request_partial_firmware_into_buf 0 5 +test_request_partial_firmware_into_buf 1 6 +test_request_partial_firmware_into_buf 2 10 + # Test for file not found, errors are expected, the failure would be # a hung task, which would require a hard reset. echo @@ -407,6 +492,12 @@ for i in $(seq 1 5); do test_request_firmware_nowait_custom_nofile $i done +# Partial loads cannot use fallback, so do not repeat tests. +test_request_partial_firmware_into_buf_nofile 0 10 +test_request_partial_firmware_into_buf_nofile 0 5 +test_request_partial_firmware_into_buf_nofile 1 6 +test_request_partial_firmware_into_buf_nofile 2 10 + test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0 # test with both files present diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 68550f97d3c3..3bcd4c3624ee 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index c969be9eb7de..438961971b7e 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index 16d543eaac88..a8603bd23e0d 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 0f41e441c203..98305d76bd04 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo _do_fork:stacktrace >> set_ftrace_filter +echo kernel_clone:stacktrace >> set_ftrace_filter -grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter +grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index eba858c21815..9737cd0578a7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index d10bf4f05bc8..f9a40af76888 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index 61f2ac441aec..eb543d3cfe5f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events +echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep '_do_fork' | \ +grep testprobe trace | grep 'kernel_clone' | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 05aaeed6987f..4e5b63be51c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events +echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index b5fa05443b39..a1d70588ab21 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test _do_fork" >> kprobe_events +echo "p:test kernel_clone" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test _do_fork" >> kprobe_events +echo "p:test kernel_clone" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index b8c75a3d003c..bd25dd0ba0d0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 0610e0b5587c..91fcce1c241c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 81d8b58c03bc..0d179094191f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo _do_fork > set_ftrace_filter -echo 'p:testprobe _do_fork' > kprobe_events +echo kernel_clone > set_ftrace_filter +echo 'p:testprobe kernel_clone' > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep '_do_fork <-' trace +! grep 'kernel_clone <-' trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep '_do_fork <-' trace +! grep 'kernel_clone <-' trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 366b7e1b6718..45d90b6c763d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=_do_fork +SYM1=kernel_clone SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index b4d834675e59..c02ea50d63ea 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,15 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent _do_fork' > kprobe_events +echo 'p:kprobes/testevent kernel_clone' > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE +printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events +check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE +check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE fi exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 523fde6d1aa5..7ae492c204a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 _do_fork $retval' > kprobe_events +echo 'r:testprobe2 kernel_clone $retval' > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- _do_fork' +cat trace | grep testprobe2 | grep -q '<- kernel_clone' echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index ff6c44adc8a0..c4093fc1a773 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index ea2147248ebe..afd42387e8b2 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -343,7 +343,7 @@ kmod_test_0001_driver() kmod_defaults_driver config_num_threads 1 - printf '\000' >"$DIR"/config_test_driver + printf $NAME >"$DIR"/config_test_driver config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND } @@ -354,7 +354,7 @@ kmod_test_0001_fs() kmod_defaults_fs config_num_threads 1 - printf '\000' >"$DIR"/config_test_fs + printf $NAME >"$DIR"/config_test_fs config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} -EINVAL } diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4f78e4805633..f19804df244c 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -971,6 +971,11 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg(" RUN %s%s%s.%s ...\n", f->name, variant->name[0] ? "." : "", variant->name, t->name); + + /* Make sure output buffers are flushed before fork */ + fflush(stdout); + fflush(stderr); + t->pid = fork(); if (t->pid < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 8162c58a1234..2fc6b3af81a1 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -40,11 +40,11 @@ static void guest_code(void) /* Single step test, covers 2 basic instructions and 2 emulated */ asm volatile("ss_start: " - "xor %%rax,%%rax\n\t" + "xor %%eax,%%eax\n\t" "cpuid\n\t" "movl $0x1a0,%%ecx\n\t" "rdmsr\n\t" - : : : "rax", "ecx"); + : : : "eax", "ebx", "ecx", "edx"); /* DR6.BD test */ asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax"); @@ -73,7 +73,7 @@ int main(void) int i; /* Instruction lengths starting at ss_start */ int ss_size[4] = { - 3, /* xor */ + 2, /* xor */ 2, /* cpuid */ 5, /* mov */ 2, /* rdmsr */ diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index 8383eb89d88a..bb7a1775307b 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -82,7 +82,7 @@ dmesg > "$DMESG" ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true # Record and dump the results -dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true +dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true cat "$LOG" # Check for expected output diff --git a/tools/testing/selftests/mincore/.gitignore b/tools/testing/selftests/mincore/.gitignore new file mode 100644 index 000000000000..15c4dfc2df00 --- /dev/null +++ b/tools/testing/selftests/mincore/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +mincore_selftest diff --git a/tools/testing/selftests/mincore/Makefile b/tools/testing/selftests/mincore/Makefile new file mode 100644 index 000000000000..38c7db1e8926 --- /dev/null +++ b/tools/testing/selftests/mincore/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0+ + +CFLAGS += -Wall + +TEST_GEN_PROGS := mincore_selftest +include ../lib.mk diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c new file mode 100644 index 000000000000..5a1e85ff5d32 --- /dev/null +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * kselftest suite for mincore(). + * + * Copyright (C) 2020 Collabora, Ltd. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include <fcntl.h> +#include <string.h> + +#include "../kselftest.h" +#include "../kselftest_harness.h" + +/* Default test file size: 4MB */ +#define MB (1UL << 20) +#define FILE_SIZE (4 * MB) + + +/* + * Tests the user interface. This test triggers most of the documented + * error conditions in mincore(). + */ +TEST(basic_interface) +{ + int retval; + int page_size; + unsigned char vec[1]; + char *addr; + + page_size = sysconf(_SC_PAGESIZE); + + /* Query a 0 byte sized range */ + retval = mincore(0, 0, vec); + EXPECT_EQ(0, retval); + + /* Addresses in the specified range are invalid or unmapped */ + errno = 0; + retval = mincore(NULL, page_size, vec); + EXPECT_EQ(-1, retval); + EXPECT_EQ(ENOMEM, errno); + + errno = 0; + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + + /* <addr> argument is not page-aligned */ + errno = 0; + retval = mincore(addr + 1, page_size, vec); + EXPECT_EQ(-1, retval); + EXPECT_EQ(EINVAL, errno); + + /* <length> argument is too large */ + errno = 0; + retval = mincore(addr, -1, vec); + EXPECT_EQ(-1, retval); + EXPECT_EQ(ENOMEM, errno); + + /* <vec> argument points to an illegal address */ + errno = 0; + retval = mincore(addr, page_size, NULL); + EXPECT_EQ(-1, retval); + EXPECT_EQ(EFAULT, errno); + munmap(addr, page_size); +} + + +/* + * Test mincore() behavior on a private anonymous page mapping. + * Check that the page is not loaded into memory right after the mapping + * but after accessing it (on-demand allocation). + * Then free the page and check that it's not memory-resident. + */ +TEST(check_anonymous_locked_pages) +{ + unsigned char vec[1]; + char *addr; + int retval; + int page_size; + + page_size = sysconf(_SC_PAGESIZE); + + /* Map one page and check it's not memory-resident */ + errno = 0; + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(0, vec[0]) { + TH_LOG("Page found in memory before use"); + } + + /* Touch the page and check again. It should now be in memory */ + addr[0] = 1; + mlock(addr, page_size); + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[0]) { + TH_LOG("Page not found in memory after use"); + } + + /* + * It shouldn't be memory-resident after unlocking it and + * marking it as unneeded. + */ + munlock(addr, page_size); + madvise(addr, page_size, MADV_DONTNEED); + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(0, vec[0]) { + TH_LOG("Page in memory after being zapped"); + } + munmap(addr, page_size); +} + + +/* + * Check mincore() behavior on huge pages. + * This test will be skipped if the mapping fails (ie. if there are no + * huge pages available). + * + * Make sure the system has at least one free huge page, check + * "HugePages_Free" in /proc/meminfo. + * Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if + * needed. + */ +TEST(check_huge_pages) +{ + unsigned char vec[1]; + char *addr; + int retval; + int page_size; + + page_size = sysconf(_SC_PAGESIZE); + + errno = 0; + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (addr == MAP_FAILED) { + if (errno == ENOMEM) + SKIP(return, "No huge pages available."); + else + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(0, vec[0]) { + TH_LOG("Page found in memory before use"); + } + + addr[0] = 1; + mlock(addr, page_size); + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[0]) { + TH_LOG("Page not found in memory after use"); + } + + munlock(addr, page_size); + munmap(addr, page_size); +} + + +/* + * Test mincore() behavior on a file-backed page. + * No pages should be loaded into memory right after the mapping. Then, + * accessing any address in the mapping range should load the page + * containing the address and a number of subsequent pages (readahead). + * + * The actual readahead settings depend on the test environment, so we + * can't make a lot of assumptions about that. This test covers the most + * general cases. + */ +TEST(check_file_mmap) +{ + unsigned char *vec; + int vec_size; + char *addr; + int retval; + int page_size; + int fd; + int i; + int ra_pages = 0; + + page_size = sysconf(_SC_PAGESIZE); + vec_size = FILE_SIZE / page_size; + if (FILE_SIZE % page_size) + vec_size++; + + vec = calloc(vec_size, sizeof(unsigned char)); + ASSERT_NE(NULL, vec) { + TH_LOG("Can't allocate array"); + } + + errno = 0; + fd = open(".", O_TMPFILE | O_RDWR, 0600); + ASSERT_NE(-1, fd) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + errno = 0; + retval = fallocate(fd, 0, 0, FILE_SIZE); + ASSERT_EQ(0, retval) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + + /* + * Map the whole file, the pages shouldn't be fetched yet. + */ + errno = 0; + addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + for (i = 0; i < vec_size; i++) { + ASSERT_EQ(0, vec[i]) { + TH_LOG("Unexpected page in memory"); + } + } + + /* + * Touch a page in the middle of the mapping. We expect the next + * few pages (the readahead window) to be populated too. + */ + addr[FILE_SIZE / 2] = 1; + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) { + TH_LOG("Page not found in memory after use"); + } + + i = FILE_SIZE / 2 / page_size + 1; + while (i < vec_size && vec[i]) { + ra_pages++; + i++; + } + EXPECT_GT(ra_pages, 0) { + TH_LOG("No read-ahead pages found in memory"); + } + + EXPECT_LT(i, vec_size) { + TH_LOG("Read-ahead pages reached the end of the file"); + } + /* + * End of the readahead window. The rest of the pages shouldn't + * be in memory. + */ + if (i < vec_size) { + while (i < vec_size && !vec[i]) + i++; + EXPECT_EQ(vec_size, i) { + TH_LOG("Unexpected page in memory beyond readahead window"); + } + } + + munmap(addr, FILE_SIZE); + close(fd); + free(vec); +} + + +/* + * Test mincore() behavior on a page backed by a tmpfs file. This test + * performs the same steps as the previous one. However, we don't expect + * any readahead in this case. + */ +TEST(check_tmpfs_mmap) +{ + unsigned char *vec; + int vec_size; + char *addr; + int retval; + int page_size; + int fd; + int i; + int ra_pages = 0; + + page_size = sysconf(_SC_PAGESIZE); + vec_size = FILE_SIZE / page_size; + if (FILE_SIZE % page_size) + vec_size++; + + vec = calloc(vec_size, sizeof(unsigned char)); + ASSERT_NE(NULL, vec) { + TH_LOG("Can't allocate array"); + } + + errno = 0; + fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600); + ASSERT_NE(-1, fd) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + errno = 0; + retval = fallocate(fd, 0, 0, FILE_SIZE); + ASSERT_EQ(0, retval) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + + /* + * Map the whole file, the pages shouldn't be fetched yet. + */ + errno = 0; + addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + for (i = 0; i < vec_size; i++) { + ASSERT_EQ(0, vec[i]) { + TH_LOG("Unexpected page in memory"); + } + } + + /* + * Touch a page in the middle of the mapping. We expect only + * that page to be fetched into memory. + */ + addr[FILE_SIZE / 2] = 1; + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) { + TH_LOG("Page not found in memory after use"); + } + + i = FILE_SIZE / 2 / page_size + 1; + while (i < vec_size && vec[i]) { + ra_pages++; + i++; + } + ASSERT_EQ(ra_pages, 0) { + TH_LOG("Read-ahead pages found in memory"); + } + + munmap(addr, FILE_SIZE); + close(fd); + free(vec); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index 18c5de53558a..bf361f30d6ef 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -180,6 +180,8 @@ setup() ;; r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2499824d9e1c..8df5cb8f71ff 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,6 @@ CONFIG_MPTCP=y CONFIG_MPTCP_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cad6f73a5fd0..090620c3e10c 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... but we still receive. * Close our write side, ev. give some time - * for address notification + * for address notification and/or checking + * the current status */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_wait) + if (cfg_join) usleep(cfg_wait); close(peerfd); diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7c38a909f8b8..8a2fe6d64bf2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1175,6 +1175,51 @@ kci_test_neigh_get() echo "PASS: neigh get" } +kci_test_bridge_parent_id() +{ + local ret=0 + sysfsnet=/sys/bus/netdevsim/devices/netdevsim + probed=false + + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + modprobe -q netdevsim + check_err $? + if [ $ret -ne 0 ]; then + echo "SKIP: bridge_parent_id can't load netdevsim" + return $ksft_skip + fi + probed=true + fi + + echo "10 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}10 ] ; do :; done + echo "20 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}20 ] ; do :; done + udevadm settle + dev10=`ls ${sysfsnet}10/net/` + dev20=`ls ${sysfsnet}20/net/` + + ip link add name test-bond0 type bond mode 802.3ad + ip link set dev $dev10 master test-bond0 + ip link set dev $dev20 master test-bond0 + ip link add name test-br0 type bridge + ip link set dev test-bond0 master test-br0 + check_err $? + + # clean up any leftovers + ip link del dev test-br0 + ip link del dev test-bond0 + echo 20 > /sys/bus/netdevsim/del_device + echo 10 > /sys/bus/netdevsim/del_device + $probed && rmmod netdevsim + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge_parent_id" + return 1 + fi + echo "PASS: bridge_parent_id" +} + kci_test_rtnl() { local ret=0 @@ -1224,6 +1269,8 @@ kci_test_rtnl() check_err $? kci_test_neigh_get check_err $? + kci_test_bridge_parent_id + check_err $? kci_del_dummy return $ret diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index d3e0809ab368..431296c0f91c 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -2,13 +2,18 @@ # SPDX-License-Identifier: GPL-2.0 # # This tests basic flowtable functionality. -# Creates following topology: +# Creates following default topology: # # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) # Router1 is the one doing flow offloading, Router2 has no special # purpose other than having a link that is smaller than either Originator # and responder, i.e. TCPMSS announced values are too large and will still # result in fragmentation and/or PMTU discovery. +# +# You can check with different Orgininator/Link/Responder MTU eg: +# nft_flowtable.sh -o8000 -l1500 -r2000 +# + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -21,29 +26,17 @@ ns2out="" log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -which nc > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nc (netcat)" - exit $ksft_skip -fi +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} -ip netns add nsr1 -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace" - exit $ksft_skip -fi +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "which nc" "run test without nc (netcat)" +checktool "ip netns add nsr1" "create net namespace" ip netns add ns1 ip netns add ns2 @@ -89,11 +82,41 @@ ip -net nsr2 addr add dead:2::1/64 dev veth1 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers # is NOT the lowest link mtu. -ip -net nsr1 link set veth0 mtu 9000 -ip -net ns1 link set eth0 mtu 9000 +omtu=9000 +lmtu=1500 +rmtu=2000 + +usage(){ + echo "nft_flowtable.sh [OPTIONS]" + echo + echo "MTU options" + echo " -o originator" + echo " -l link" + echo " -r responder" + exit 1 +} + +while getopts "o:l:r:" o +do + case $o in + o) omtu=$OPTARG;; + l) lmtu=$OPTARG;; + r) rmtu=$OPTARG;; + *) usage;; + esac +done + +if ! ip -net nsr1 link set veth0 mtu $omtu; then + exit 1 +fi + +ip -net ns1 link set eth0 mtu $omtu + +if ! ip -net nsr2 link set veth1 mtu $rmtu; then + exit 1 +fi -ip -net nsr2 link set veth1 mtu 2000 -ip -net ns2 link set eth0 mtu 2000 +ip -net ns2 link set eth0 mtu $rmtu # transfer-net between nsr1 and nsr2. # these addresses are not used for connections. @@ -113,7 +136,10 @@ for i in 1 2; do ip -net ns$i route add default via 10.0.$i.1 ip -net ns$i addr add dead:$i::99/64 dev eth0 ip -net ns$i route add default via dead:$i::1 - ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null + if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then + echo "ERROR: Check Originator/Responder values (problem during address addition)" + exit 1 + fi # don't set ip DF bit for first two tests ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null @@ -147,7 +173,7 @@ table inet filter { # as PMTUd is off. # This rule is deleted for the last test, when we expect PMTUd # to kick in and ensure all packets meet mtu requirements. - meta length gt 1500 accept comment something-to-grep-for + meta length gt $lmtu accept comment something-to-grep-for # next line blocks connection w.o. working offload. # we only do this for reverse dir, because we expect packets to @@ -171,15 +197,13 @@ if [ $? -ne 0 ]; then fi # test basic connectivity -ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null -if [ $? -ne 0 ];then +if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: ns1 cannot reach ns2" 1>&2 bash exit 1 fi -ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null -if [ $? -ne 0 ];then +if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then echo "ERROR: ns2 cannot reach ns1" 1>&2 exit 1 fi @@ -196,7 +220,6 @@ ns2out=$(mktemp) make_file() { name=$1 - who=$2 SIZE=$((RANDOM % (1024 * 8))) TSIZE=$((SIZE * 1024)) @@ -215,8 +238,7 @@ check_transfer() out=$2 what=$3 - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then + if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 ls -l "$in" ls -l "$out" @@ -243,17 +265,21 @@ test_tcp_forwarding_ip() sleep 3 - kill $lpid - kill $cpid + if ps -p $lpid > /dev/null;then + kill $lpid + fi + + if ps -p $cpid > /dev/null;then + kill $cpid + fi + wait - check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" - if [ $? -ne 0 ];then + if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then lret=1 fi - check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" - if [ $? -ne 0 ];then + if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then lret=1 fi @@ -282,13 +308,12 @@ test_tcp_forwarding_nat() return $lret } -make_file "$ns1in" "ns1" -make_file "$ns2in" "ns2" +make_file "$ns1in" +make_file "$ns2in" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. -test_tcp_forwarding ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -319,9 +344,7 @@ table ip nat { } EOF -test_tcp_forwarding_nat ns1 ns2 - -if [ $? -eq 0 ] ;then +if test_tcp_forwarding_nat ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT" else echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 @@ -333,8 +356,7 @@ fi # Same as second test, but with PMTU discovery enabled. handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) -ip netns exec nsr1 nft delete rule inet filter forward $handle -if [ $? -ne 0 ] ;then +if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then echo "FAIL: Could not delete large-packet accept rule" exit 1 fi @@ -342,8 +364,7 @@ fi ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -test_tcp_forwarding_nat ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding_nat ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" else echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 @@ -389,8 +410,7 @@ ip -net ns2 route del 192.168.10.1 via 10.0.2.1 ip -net ns2 route add default via 10.0.2.1 ip -net ns2 route add default via dead:2::1 -test_tcp_forwarding ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding ns1 ns2; then echo "PASS: ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index a2c80914e3dc..01f8d3c0cf2c 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -46,6 +46,10 @@ #define __NR_pidfd_getfd -1 #endif +#ifndef PIDFD_NONBLOCK +#define PIDFD_NONBLOCK O_NONBLOCK +#endif + /* * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c * That means, when it wraps around any pid < 300 will be skipped. diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7dca1aa4672d..1f085b922c6e 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options) pid_t create_child(int *pidfd, unsigned flags) { - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_PIDFD | flags, .exit_signal = SIGCHLD, .pidfd = ptr_to_u64(pidfd), diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 7079f8eef792..be2943f072f6 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -17,10 +17,15 @@ #include <unistd.h> #include "pidfd.h" -#include "../kselftest.h" +#include "../kselftest_harness.h" #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + static pid_t sys_clone3(struct clone_args *args) { return syscall(__NR_clone3, args, sizeof(struct clone_args)); @@ -32,9 +37,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, return syscall(__NR_waitid, which, pid, info, options, ru); } -static int test_pidfd_wait_simple(void) +TEST(wait_simple) { - const char *test_name = "pidfd wait simple"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -50,76 +54,40 @@ static int test_pidfd_wait_simple(void) }; pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); - if (pidfd < 0) - ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1; pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); - if (pidfd == 0) - ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1; pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 0); if (pid == 0) exit(EXIT_SUCCESS); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status)) - ksft_exit_fail_msg( - "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - close(pidfd); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_EXITED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + ASSERT_GE(pid, 0); + ASSERT_EQ(WIFEXITED(info.si_status), true); + ASSERT_EQ(WEXITSTATUS(info.si_status), 0); + EXPECT_EQ(close(pidfd), 0); + + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); } -static int test_pidfd_wait_states(void) +TEST(wait_states) { - const char *test_name = "pidfd wait states"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -135,9 +103,7 @@ static int test_pidfd_wait_states(void) }; pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 0); if (pid == 0) { kill(getpid(), SIGSTOP); @@ -145,127 +111,115 @@ static int test_pidfd_wait_states(void) exit(EXIT_SUCCESS); } - ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_CONTINUED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_KILLED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - close(pidfd); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_CONTINUED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_KILLED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); } -int main(int argc, char **argv) +TEST(wait_nonblock) { - ksft_print_header(); - ksft_set_plan(2); + int pidfd, status = 0; + unsigned int flags = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .flags = CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + /* + * Callers need to see ECHILD with non-blocking pidfds when no child + * processes exists. + */ + pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, ECHILD); + EXPECT_EQ(close(pidfd), 0); + + pid = sys_clone3(&args); + ASSERT_GE(pid, 0); + + if (pid == 0) { + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } - test_pidfd_wait_simple(); - test_pidfd_wait_states(); + pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + flags = fcntl(pidfd, F_GETFL, 0); + ASSERT_GT(flags, 0); + ASSERT_GT((flags & O_NONBLOCK), 0); + + /* + * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when + * child processes exist but none have exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EAGAIN); + + /* + * Callers need to continue seeing 0 with non-blocking pidfd and + * WNOHANG raised explicitly when child processes exist but none have + * exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ASSERT_EQ(ret, 0); - return ksft_exit_pass(); + ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 0453c50c949c..55ef15184057 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -9,7 +9,17 @@ * This selftest exercises the powerpc alignment fault handler. * * We create two sets of source and destination buffers, one in regular memory, - * the other cache-inhibited (we use /dev/fb0 for this). + * the other cache-inhibited (by default we use /dev/fb0 for this, but an + * alterative path for cache-inhibited memory may be provided). + * + * One way to get cache-inhibited memory is to use the "mem" kernel parameter + * to limit the kernel to less memory than actually exists. Addresses above + * the limit may still be accessed but will be treated as cache-inhibited. For + * example, if there is actually 4GB of memory and the parameter "mem=3GB" is + * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. + * To access this region /dev/mem is used. The kernel should be configured + * without CONFIG_STRICT_DEVMEM. In this case use: + * ./alignment_handler /dev/mem 0xc0000000 * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the @@ -48,11 +58,14 @@ #include <asm/cputable.h> #include "utils.h" +#include "instructions.h" int bufsize; int debug; int testing; volatile int gotsig; +char *cipath = "/dev/fb0"; +long cioffset; void sighandler(int sig, siginfo_t *info, void *ctx) { @@ -84,6 +97,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx) } \ rc |= do_test(#name, test_##name) +#define TESTP(name, ld_op, st_op, ld_reg, st_reg) \ + void test_##name(char *s, char *d) \ + { \ + asm volatile( \ + ld_op(ld_reg, %0, 0, 0) \ + st_op(st_reg, %1, 0, 0) \ + :: "r"(s), "r"(d), "r"(0) \ + : "memory", "vs0", "vs32", "r31"); \ + } \ + rc |= do_test(#name, test_##name) + #define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32) #define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32) #define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32) @@ -103,6 +127,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx) #define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0) #define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0) +#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31) +#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31) + +#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31) +#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31) + +#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0) +#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0) + +#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32) +#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0) /* FIXME: Unimplemented tests: */ // STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */ @@ -195,17 +230,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) printf("\tDoing %s:\t", test_name); - fd = open("/dev/fb0", O_RDWR); + fd = open(cipath, O_RDWR); if (fd < 0) { printf("\n"); - perror("Can't open /dev/fb0 now?"); + perror("Can't open ci file now?"); return 1; } - ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, - fd, 0x0); - ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, - fd, bufsize); + ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, cioffset); + ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, cioffset + bufsize); + if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) { printf("\n"); perror("mmap failed"); @@ -270,11 +306,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) return rc; } -static bool can_open_fb0(void) +static bool can_open_cifile(void) { int fd; - fd = open("/dev/fb0", O_RDWR); + fd = open(cipath, O_RDWR); if (fd < 0) return false; @@ -286,7 +322,7 @@ int test_alignment_handler_vsx_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("VSX: 2.06B\n"); @@ -304,7 +340,7 @@ int test_alignment_handler_vsx_207(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); printf("VSX: 2.07B\n"); @@ -320,7 +356,7 @@ int test_alignment_handler_vsx_300(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); printf("VSX: 3.00B\n"); @@ -348,11 +384,30 @@ int test_alignment_handler_vsx_300(void) return rc; } +int test_alignment_handler_vsx_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("VSX: PREFIX\n"); + LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1); + STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1); + return rc; +} + int test_alignment_handler_integer(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); printf("Integer\n"); LOAD_DFORM_TEST(lbz); @@ -408,7 +463,7 @@ int test_alignment_handler_integer_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("Integer: 2.06\n"); @@ -419,11 +474,32 @@ int test_alignment_handler_integer_206(void) return rc; } +int test_alignment_handler_integer_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("Integer: PREFIX\n"); + LOAD_MLS_PREFIX_TEST(PLBZ); + LOAD_MLS_PREFIX_TEST(PLHZ); + LOAD_MLS_PREFIX_TEST(PLHA); + LOAD_MLS_PREFIX_TEST(PLWZ); + LOAD_8LS_PREFIX_TEST(PLWA); + LOAD_8LS_PREFIX_TEST(PLD); + STORE_MLS_PREFIX_TEST(PSTB); + STORE_MLS_PREFIX_TEST(PSTH); + STORE_MLS_PREFIX_TEST(PSTW); + STORE_8LS_PREFIX_TEST(PSTD); + return rc; +} + int test_alignment_handler_vmx(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)); printf("VMX\n"); @@ -451,7 +527,7 @@ int test_alignment_handler_fp(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); printf("Floating point\n"); LOAD_FLOAT_DFORM_TEST(lfd); @@ -479,7 +555,7 @@ int test_alignment_handler_fp_205(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05)); printf("Floating point: 2.05\n"); @@ -497,7 +573,7 @@ int test_alignment_handler_fp_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("Floating point: 2.06\n"); @@ -507,13 +583,32 @@ int test_alignment_handler_fp_206(void) return rc; } + +int test_alignment_handler_fp_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("Floating point: PREFIX\n"); + LOAD_FLOAT_DFORM_TEST(lfs); + LOAD_FLOAT_MLS_PREFIX_TEST(PLFS); + LOAD_FLOAT_MLS_PREFIX_TEST(PLFD); + STORE_FLOAT_MLS_PREFIX_TEST(PSTFS); + STORE_FLOAT_MLS_PREFIX_TEST(PSTFD); + return rc; +} + void usage(char *prog) { - printf("Usage: %s [options]\n", prog); + printf("Usage: %s [options] [path [offset]]\n", prog); printf(" -d Enable debug error output\n"); printf("\n"); - printf("This test requires a POWER8 or POWER9 CPU and a usable "); - printf("framebuffer at /dev/fb0.\n"); + printf("This test requires a POWER8, POWER9 or POWER10 CPU "); + printf("and either a usable framebuffer at /dev/fb0 or "); + printf("the path to usable cache inhibited memory and optional "); + printf("offset to be provided\n"); } int main(int argc, char *argv[]) @@ -533,6 +628,13 @@ int main(int argc, char *argv[]) exit(1); } } + argc -= optind; + argv += optind; + + if (argc > 0) + cipath = argv[0]; + if (argc > 1) + cioffset = strtol(argv[1], 0, 0x10); bufsize = getpagesize(); @@ -552,10 +654,14 @@ int main(int argc, char *argv[]) "test_alignment_handler_vsx_207"); rc |= test_harness(test_alignment_handler_vsx_300, "test_alignment_handler_vsx_300"); + rc |= test_harness(test_alignment_handler_vsx_prefix, + "test_alignment_handler_vsx_prefix"); rc |= test_harness(test_alignment_handler_integer, "test_alignment_handler_integer"); rc |= test_harness(test_alignment_handler_integer_206, "test_alignment_handler_integer_206"); + rc |= test_harness(test_alignment_handler_integer_prefix, + "test_alignment_handler_integer_prefix"); rc |= test_harness(test_alignment_handler_vmx, "test_alignment_handler_vmx"); rc |= test_harness(test_alignment_handler_fp, @@ -564,5 +670,7 @@ int main(int argc, char *argv[]) "test_alignment_handler_fp_205"); rc |= test_harness(test_alignment_handler_fp_206, "test_alignment_handler_fp_206"); + rc |= test_harness(test_alignment_handler_fp_prefix, + "test_alignment_handler_fp_prefix"); return rc; } diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index a2e8c9da7fa5..d50cc05df495 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -19,6 +19,7 @@ #include <limits.h> #include <sys/time.h> #include <sys/syscall.h> +#include <sys/sysinfo.h> #include <sys/types.h> #include <sys/shm.h> #include <linux/futex.h> @@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu) static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) { - int pid; - cpu_set_t cpuset; + int pid, ncpus; + cpu_set_t *cpuset; + size_t size; pid = fork(); if (pid == -1) { @@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) if (pid) return; - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); + ncpus = get_nprocs(); + size = CPU_ALLOC_SIZE(ncpus); + cpuset = CPU_ALLOC(ncpus); + if (!cpuset) { + perror("malloc"); + exit(1); + } + CPU_ZERO_S(size, cpuset); + CPU_SET_S(cpu, size, cpuset); - if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + if (sched_setaffinity(0, size, cpuset)) { perror("sched_setaffinity"); + CPU_FREE(cpuset); exit(1); } + CPU_FREE(cpuset); fn(arg); exit(0); diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index ddaf140b8255..994b11af765c 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -12,4 +12,4 @@ memcpy_p7_t1 copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 -memcpy_mcsafe_64 +copy_mc_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 0917983a1c78..3095b1f1c02b 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ - memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \ + memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES) -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ -o $@ $^ -$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES) +$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ - -D COPY_LOOP=test_memcpy_mcsafe \ + -D COPY_LOOP=test_copy_mc_generic \ -o $@ $^ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S new file mode 120000 index 000000000000..dcbe06d500fb --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/copy_mc_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S deleted file mode 120000 index f0feef3062f6..000000000000 --- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S +++ /dev/null @@ -1 +0,0 @@ -../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index f52ed92b53e7..00dc32c0ed75 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -5,12 +5,17 @@ pe_ok() { local dev="$1" local path="/sys/bus/pci/devices/$dev/eeh_pe_state" - if ! [ -e "$path" ] ; then + # if a driver doesn't support the error handling callbacks then the + # device is recovered by removing and re-probing it. This causes the + # sysfs directory to disappear so read the PE state once and squash + # any potential error messages + local eeh_state="$(cat $path 2>/dev/null)" + if [ -z "$eeh_state" ]; then return 1; fi - local fw_state="$(cut -d' ' -f1 < $path)" - local sw_state="$(cut -d' ' -f2 < $path)" + local fw_state="$(echo $eeh_state | cut -d' ' -f1)" + local sw_state="$(echo $eeh_state | cut -d' ' -f2)" # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an # error state or being recovered. Either way, not ok. diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h index f36061eb6f0f..4efa6314bd96 100644 --- a/tools/testing/selftests/powerpc/include/instructions.h +++ b/tools/testing/selftests/powerpc/include/instructions.h @@ -66,4 +66,81 @@ static inline int paste_last(void *i) #define PPC_INST_PASTE __PASTE(0, 0, 0, 0) #define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1) +/* This defines the prefixed load/store instructions */ +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +#else +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +#endif + +#define __PPC_RA(a) (((a) & 0x1f) << 16) +#define __PPC_RS(s) (((s) & 0x1f) << 21) +#define __PPC_RT(t) __PPC_RS(t) +#define __PPC_PREFIX_R(r) (((r) & 0x1) << 20) + +#define PPC_PREFIX_MLS 0x06000000 +#define PPC_PREFIX_8LS 0x04000000 + +#define PPC_INST_LBZ 0x88000000 +#define PPC_INST_LHZ 0xa0000000 +#define PPC_INST_LHA 0xa8000000 +#define PPC_INST_LWZ 0x80000000 +#define PPC_INST_STB 0x98000000 +#define PPC_INST_STH 0xb0000000 +#define PPC_INST_STW 0x90000000 +#define PPC_INST_STD 0xf8000000 +#define PPC_INST_LFS 0xc0000000 +#define PPC_INST_LFD 0xc8000000 +#define PPC_INST_STFS 0xd0000000 +#define PPC_INST_STFD 0xd8000000 + +#define PREFIX_MLS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \ + stringify_in_c(.long PPC_PREFIX_MLS | \ + __PPC_PREFIX_R(r) | \ + (((d) >> 16) & 0x3ffff);) \ + stringify_in_c(.long (instr) | \ + __PPC_RT(t) | \ + __PPC_RA(a) | \ + ((d) & 0xffff);\n) + +#define PREFIX_8LS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \ + stringify_in_c(.long PPC_PREFIX_8LS | \ + __PPC_PREFIX_R(r) | \ + (((d) >> 16) & 0x3ffff);) \ + stringify_in_c(.long (instr) | \ + __PPC_RT(t) | \ + __PPC_RA(a) | \ + ((d) & 0xffff);\n) + +/* Prefixed Integer Load/Store instructions */ +#define PLBZ(t, a, r, d) PREFIX_MLS(PPC_INST_LBZ, t, a, r, d) +#define PLHZ(t, a, r, d) PREFIX_MLS(PPC_INST_LHZ, t, a, r, d) +#define PLHA(t, a, r, d) PREFIX_MLS(PPC_INST_LHA, t, a, r, d) +#define PLWZ(t, a, r, d) PREFIX_MLS(PPC_INST_LWZ, t, a, r, d) +#define PLWA(t, a, r, d) PREFIX_8LS(0xa4000000, t, a, r, d) +#define PLD(t, a, r, d) PREFIX_8LS(0xe4000000, t, a, r, d) +#define PLQ(t, a, r, d) PREFIX_8LS(0xe0000000, t, a, r, d) +#define PSTB(s, a, r, d) PREFIX_MLS(PPC_INST_STB, s, a, r, d) +#define PSTH(s, a, r, d) PREFIX_MLS(PPC_INST_STH, s, a, r, d) +#define PSTW(s, a, r, d) PREFIX_MLS(PPC_INST_STW, s, a, r, d) +#define PSTD(s, a, r, d) PREFIX_8LS(0xf4000000, s, a, r, d) +#define PSTQ(s, a, r, d) PREFIX_8LS(0xf0000000, s, a, r, d) + +/* Prefixed Floating-Point Load/Store Instructions */ +#define PLFS(frt, a, r, d) PREFIX_MLS(PPC_INST_LFS, frt, a, r, d) +#define PLFD(frt, a, r, d) PREFIX_MLS(PPC_INST_LFD, frt, a, r, d) +#define PSTFS(frs, a, r, d) PREFIX_MLS(PPC_INST_STFS, frs, a, r, d) +#define PSTFD(frs, a, r, d) PREFIX_MLS(PPC_INST_STFD, frs, a, r, d) + +/* Prefixed VSX Load/Store Instructions */ +#define PLXSD(vrt, a, r, d) PREFIX_8LS(0xa8000000, vrt, a, r, d) +#define PLXSSP(vrt, a, r, d) PREFIX_8LS(0xac000000, vrt, a, r, d) +#define PLXV0(s, a, r, d) PREFIX_8LS(0xc8000000, s, a, r, d) +#define PLXV1(s, a, r, d) PREFIX_8LS(0xcc000000, s, a, r, d) +#define PSTXSD(vrs, a, r, d) PREFIX_8LS(0xb8000000, vrs, a, r, d) +#define PSTXSSP(vrs, a, r, d) PREFIX_8LS(0xbc000000, vrs, a, r, d) +#define PSTXV0(s, a, r, d) PREFIX_8LS(0xd8000000, s, a, r, d) +#define PSTXV1(s, a, r, d) PREFIX_8LS(0xdc000000, s, a, r, d) + #endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */ diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h new file mode 100644 index 000000000000..3312cb1b058d --- /dev/null +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2020, Sandipan Das, IBM Corp. + */ + +#ifndef _SELFTESTS_POWERPC_PKEYS_H +#define _SELFTESTS_POWERPC_PKEYS_H + +#include <sys/mman.h> + +#include "reg.h" +#include "utils.h" + +/* + * Older versions of libc use the Intel-specific access rights. + * Hence, override the definitions as they might be incorrect. + */ +#undef PKEY_DISABLE_ACCESS +#define PKEY_DISABLE_ACCESS 0x3 + +#undef PKEY_DISABLE_WRITE +#define PKEY_DISABLE_WRITE 0x2 + +#undef PKEY_DISABLE_EXECUTE +#define PKEY_DISABLE_EXECUTE 0x4 + +/* Older versions of libc do not not define this */ +#ifndef SEGV_PKUERR +#define SEGV_PKUERR 4 +#endif + +#define SI_PKEY_OFFSET 0x20 + +#define __NR_pkey_mprotect 386 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 + +#define PKEY_BITS_PER_PKEY 2 +#define NR_PKEYS 32 +#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1) + +inline unsigned long pkeyreg_get(void) +{ + return mfspr(SPRN_AMR); +} + +inline void pkeyreg_set(unsigned long amr) +{ + set_amr(amr); +} + +void pkey_set_rights(int pkey, unsigned long rights) +{ + unsigned long amr, shift; + + shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; + amr = pkeyreg_get(); + amr &= ~(PKEY_BITS_MASK << shift); + amr |= (rights & PKEY_BITS_MASK) << shift; + pkeyreg_set(amr); +} + +int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey) +{ + return syscall(__NR_pkey_mprotect, addr, len, prot, pkey); +} + +int sys_pkey_alloc(unsigned long flags, unsigned long rights) +{ + return syscall(__NR_pkey_alloc, flags, rights); +} + +int sys_pkey_free(int pkey) +{ + return syscall(__NR_pkey_free, pkey); +} + +int pkeys_unsupported(void) +{ + bool hash_mmu = false; + int pkey; + + /* Protection keys are currently supported on Hash MMU only */ + FAIL_IF(using_hash_mmu(&hash_mmu)); + SKIP_IF(!hash_mmu); + + /* Check if the system call is supported */ + pkey = sys_pkey_alloc(0, 0); + SKIP_IF(pkey < 0); + sys_pkey_free(pkey); + + return 0; +} + +int siginfo_pkey(siginfo_t *si) +{ + /* + * In older versions of libc, siginfo_t does not have si_pkey as + * a member. + */ +#ifdef si_pkey + return si->si_pkey; +#else + return *((int *)(((char *) si) + SI_PKEY_OFFSET)); +#endif +} + +#define pkey_rights(r) ({ \ + static char buf[4] = "rwx"; \ + unsigned int amr_bits; \ + if ((r) & PKEY_DISABLE_EXECUTE) \ + buf[2] = '-'; \ + amr_bits = (r) & PKEY_BITS_MASK; \ + if (amr_bits & PKEY_DISABLE_WRITE) \ + buf[1] = '-'; \ + if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE) \ + buf[0] = '-'; \ + buf; \ +}) + +unsigned long next_pkey_rights(unsigned long rights) +{ + if (rights == PKEY_DISABLE_ACCESS) + return PKEY_DISABLE_EXECUTE; + else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE)) + return 0; + + if ((rights & PKEY_BITS_MASK) == 0) + rights |= PKEY_DISABLE_WRITE; + else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE) + rights |= PKEY_DISABLE_ACCESS; + + return rights; +} + +#endif /* _SELFTESTS_POWERPC_PKEYS_H */ diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 022c5076b2c5..c0f2742a3a59 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -57,6 +57,12 @@ #define SPRN_PPR 896 /* Program Priority Register */ #define SPRN_AMR 13 /* Authority Mask Register - problem state */ +#define set_amr(v) asm volatile("isync;" \ + "mtspr " __stringify(SPRN_AMR) ",%0;" \ + "isync" : \ + : "r" ((unsigned long)(v)) \ + : "memory") + /* TEXASR register bits */ #define TEXASR_FC 0xFE00000000000000 #define TEXASR_FP 0x0100000000000000 diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index e089a0c30d9a..71d2924f5b8b 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,16 @@ int perf_event_enable(int fd); int perf_event_disable(int fd); int perf_event_reset(int fd); +#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) +#include <unistd.h> +#include <sys/syscall.h> + +static inline pid_t gettid(void) +{ + return syscall(SYS_gettid); +} +#endif + static inline bool have_hwcap(unsigned long ftr) { return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr; @@ -60,6 +70,7 @@ static inline bool have_hwcap2(unsigned long ftr2) #endif bool is_ppc64le(void); +int using_hash_mmu(bool *using_hash); /* Yes, this is evil */ #define FAIL_IF(x) \ @@ -71,6 +82,15 @@ do { \ } \ } while (0) +#define FAIL_IF_EXIT(x) \ +do { \ + if ((x)) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d\n", __LINE__); \ + _exit(1); \ + } \ +} while (0) + /* The test harness uses this, yes it's gross */ #define MAGIC_SKIP_RETURN_VALUE 99 @@ -96,11 +116,20 @@ do { \ #define _str(s) #s #define str(s) _str(s) +#define sigsafe_err(msg) ({ \ + ssize_t nbytes __attribute__((unused)); \ + nbytes = write(STDERR_FILENO, msg, strlen(msg)); }) + /* POWER9 feature */ #ifndef PPC_FEATURE2_ARCH_3_00 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #endif +/* POWER10 feature */ +#ifndef PPC_FEATURE2_ARCH_3_1 +#define PPC_FEATURE2_ARCH_3_1 0x00040000 +#endif + #if defined(__powerpc64__) #define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] #define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR] diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index e31ca6f453ed..d0c23b2e4b60 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -6,3 +6,4 @@ vmx_preempt fpu_signal vmx_signal vsx_preempt +fpu_denormal diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 11a10d7a2bbd..fcc91c205984 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt +TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt top_srcdir = ../../../../.. include ../../lib.mk @@ -11,9 +11,9 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S $(OUTPUT)/fpu_preempt: fpu_asm.S $(OUTPUT)/fpu_signal: fpu_asm.S -$(OUTPUT)/vmx_syscall: vmx_asm.S -$(OUTPUT)/vmx_preempt: vmx_asm.S -$(OUTPUT)/vmx_signal: vmx_asm.S +$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c +$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c +$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx -$(OUTPUT)/vsx_preempt: vsx_asm.S +$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c new file mode 100644 index 000000000000..5f96682abaa8 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright IBM Corp. 2020 + * + * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately + * if the denormal handler is not configured or working properly, this can cause a bad + * crash in kernel mode when the kernel tries to save FP registers when the process + * exits. + */ + +#include <stdio.h> +#include <string.h> + +#include "utils.h" + +static int test_denormal_fpu(void) +{ + unsigned int m32; + unsigned long m64; + volatile float f; + volatile double d; + + /* try to induce lfs <denormal> ; stfd */ + + m32 = 0x00715fcf; /* random denormal */ + memcpy((float *)&f, &m32, sizeof(f)); + d = f; + memcpy(&m64, (double *)&d, sizeof(d)); + + FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */ + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_denormal_fpu, "fpu_denormal"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c index 2e059f154e77..6761d6ce30ec 100644 --- a/tools/testing/selftests/powerpc/math/vmx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -57,6 +57,9 @@ int test_preempt_vmx(void) int i, rc, threads; pthread_t *tids; + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c index 785a48e0976f..b340a5c4e79d 100644 --- a/tools/testing/selftests/powerpc/math/vmx_signal.c +++ b/tools/testing/selftests/powerpc/math/vmx_signal.c @@ -96,6 +96,9 @@ int test_signal_vmx(void) void *rc_p; pthread_t *tids; + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c index 9ee293cc868e..03c78dfe3444 100644 --- a/tools/testing/selftests/powerpc/math/vmx_syscall.c +++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c @@ -49,9 +49,14 @@ int test_vmx_syscall(void) * Setup an environment with much context switching */ pid_t pid2; - pid_t pid = fork(); + pid_t pid; int ret; int child_ret; + + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + + pid = fork(); FAIL_IF(pid == -1); pid2 = fork(); diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c index 63de9c6e2cd3..d1601bb889d4 100644 --- a/tools/testing/selftests/powerpc/math/vsx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c @@ -92,6 +92,8 @@ int test_preempt_vsx(void) int i, rc, threads; pthread_t *tids; + SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 2ca523255b1b..aac4a59f9e28 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -8,3 +8,7 @@ wild_bctr large_vm_fork_separation bad_accesses tlbie_test +pkey_exec_prot +pkey_siginfo +stack_expansion_ldst +stack_expansion_signal diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index b9103c4bb414..defe488d6bf1 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -3,22 +3,32 @@ noarg: $(MAKE) -C ../ TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ - large_vm_fork_separation bad_accesses + large_vm_fork_separation bad_accesses pkey_exec_prot \ + pkey_siginfo stack_expansion_signal stack_expansion_ldst + TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. include ../../lib.mk -$(TEST_GEN_PROGS): ../harness.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/bad_accesses: CFLAGS += -m64 +$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64 +$(OUTPUT)/pkey_siginfo: CFLAGS += -m64 + +$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c + +$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector +$(OUTPUT)/stack_expansion_ldst: ../utils.c $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 $(OUTPUT)/tlbie_test: LDLIBS += -lpthread +$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index adc465f499ef..a864ed7e2008 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -64,34 +64,6 @@ int bad_access(char *p, bool write) return 0; } -static int using_hash_mmu(bool *using_hash) -{ - char line[128]; - FILE *f; - int rc; - - f = fopen("/proc/cpuinfo", "r"); - FAIL_IF(!f); - - rc = 0; - while (fgets(line, sizeof(line), f) != NULL) { - if (strcmp(line, "MMU : Hash\n") == 0) { - *using_hash = true; - goto out; - } - - if (strcmp(line, "MMU : Radix\n") == 0) { - *using_hash = false; - goto out; - } - } - - rc = -1; -out: - fclose(f); - return rc; -} - static int test(void) { unsigned long i, j, addr, region_shift, page_shift, page_size; diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c new file mode 100644 index 000000000000..9e5c7f3f498a --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2020, Sandipan Das, IBM Corp. + * + * Test if applying execute protection on pages using memory + * protection keys works as expected. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> + +#include <unistd.h> + +#include "pkeys.h" + +#define PPC_INST_NOP 0x60000000 +#define PPC_INST_TRAP 0x7fe00008 +#define PPC_INST_BLR 0x4e800020 + +static volatile sig_atomic_t fault_pkey, fault_code, fault_type; +static volatile sig_atomic_t remaining_faults; +static volatile unsigned int *fault_addr; +static unsigned long pgsize, numinsns; +static unsigned int *insns; + +static void trap_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) + sigsafe_err("got a fault for an unexpected address\n"); + + _exit(1); +} + +static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + int signal_pkey; + + signal_pkey = siginfo_pkey(sinfo); + fault_code = sinfo->si_code; + + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) { + sigsafe_err("got a fault for an unexpected address\n"); + _exit(1); + } + + /* Check if too many faults have occurred for a single test case */ + if (!remaining_faults) { + sigsafe_err("got too many faults for the same address\n"); + _exit(1); + } + + + /* Restore permissions in order to continue */ + switch (fault_code) { + case SEGV_ACCERR: + if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) { + sigsafe_err("failed to set access permissions\n"); + _exit(1); + } + break; + case SEGV_PKUERR: + if (signal_pkey != fault_pkey) { + sigsafe_err("got a fault for an unexpected pkey\n"); + _exit(1); + } + + switch (fault_type) { + case PKEY_DISABLE_ACCESS: + pkey_set_rights(fault_pkey, 0); + break; + case PKEY_DISABLE_EXECUTE: + /* + * Reassociate the exec-only pkey with the region + * to be able to continue. Unlike AMR, we cannot + * set IAMR directly from userspace to restore the + * permissions. + */ + if (mprotect(insns, pgsize, PROT_EXEC)) { + sigsafe_err("failed to set execute permissions\n"); + _exit(1); + } + break; + default: + sigsafe_err("got a fault with an unexpected type\n"); + _exit(1); + } + break; + default: + sigsafe_err("got a fault with an unexpected code\n"); + _exit(1); + } + + remaining_faults--; +} + +static int test(void) +{ + struct sigaction segv_act, trap_act; + unsigned long rights; + int pkey, ret, i; + + ret = pkeys_unsupported(); + if (ret) + return ret; + + /* Setup SIGSEGV handler */ + segv_act.sa_handler = 0; + segv_act.sa_sigaction = segv_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0); + segv_act.sa_flags = SA_SIGINFO; + segv_act.sa_restorer = 0; + FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0); + + /* Setup SIGTRAP handler */ + trap_act.sa_handler = 0; + trap_act.sa_sigaction = trap_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0); + trap_act.sa_flags = SA_SIGINFO; + trap_act.sa_restorer = 0; + FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0); + + /* Setup executable region */ + pgsize = getpagesize(); + numinsns = pgsize / sizeof(unsigned int); + insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + FAIL_IF(insns == MAP_FAILED); + + /* Write the instruction words */ + for (i = 1; i < numinsns - 1; i++) + insns[i] = PPC_INST_NOP; + + /* + * Set the first instruction as an unconditional trap. If + * the last write to this address succeeds, this should + * get overwritten by a no-op. + */ + insns[0] = PPC_INST_TRAP; + + /* + * Later, to jump to the executable region, we use a branch + * and link instruction (bctrl) which sets the return address + * automatically in LR. Use that to return back. + */ + insns[numinsns - 1] = PPC_INST_BLR; + + /* Allocate a pkey that restricts execution */ + rights = PKEY_DISABLE_EXECUTE; + pkey = sys_pkey_alloc(0, rights); + FAIL_IF(pkey < 0); + + /* + * Pick the first instruction's address from the executable + * region. + */ + fault_addr = insns; + + /* The following two cases will avoid SEGV_PKUERR */ + fault_type = -1; + fault_pkey = -1; + + /* + * Read an instruction word from the address when AMR bits + * are not set i.e. the pkey permits both read and write + * access. + * + * This should not generate a fault as having PROT_EXEC + * implies PROT_READ on GNU systems. The pkey currently + * restricts execution only based on the IAMR bits. The + * AMR bits are cleared. + */ + remaining_faults = 0; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("read from %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + i = *fault_addr; + FAIL_IF(remaining_faults != 0); + + /* + * Write an instruction word to the address when AMR bits + * are not set i.e. the pkey permits both read and write + * access. + * + * This should generate an access fault as having just + * PROT_EXEC also restricts writes. The pkey currently + * restricts execution only based on the IAMR bits. The + * AMR bits are cleared. + */ + remaining_faults = 1; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("write to %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + *fault_addr = PPC_INST_TRAP; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR); + + /* The following three cases will generate SEGV_PKUERR */ + rights |= PKEY_DISABLE_ACCESS; + fault_type = PKEY_DISABLE_ACCESS; + fault_pkey = pkey; + + /* + * Read an instruction word from the address when AMR bits + * are set i.e. the pkey permits neither read nor write + * access. + * + * This should generate a pkey fault based on AMR bits only + * as having PROT_EXEC implicitly allows reads. + */ + remaining_faults = 1; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + pkey_set_rights(pkey, rights); + printf("read from %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + i = *fault_addr; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR); + + /* + * Write an instruction word to the address when AMR bits + * are set i.e. the pkey permits neither read nor write + * access. + * + * This should generate two faults. First, a pkey fault + * based on AMR bits and then an access fault since + * PROT_EXEC does not allow writes. + */ + remaining_faults = 2; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + pkey_set_rights(pkey, rights); + printf("write to %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + *fault_addr = PPC_INST_NOP; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR); + + /* Free the current pkey */ + sys_pkey_free(pkey); + + rights = 0; + do { + /* + * Allocate pkeys with all valid combinations of read, + * write and execute restrictions. + */ + pkey = sys_pkey_alloc(0, rights); + FAIL_IF(pkey < 0); + + /* + * Jump to the executable region. AMR bits may or may not + * be set but they should not affect execution. + * + * This should generate pkey faults based on IAMR bits which + * may be set to restrict execution. + * + * The first iteration also checks if the overwrite of the + * first instruction word from a trap to a no-op succeeded. + */ + fault_pkey = pkey; + fault_type = -1; + remaining_faults = 0; + if (rights & PKEY_DISABLE_EXECUTE) { + fault_type = PKEY_DISABLE_EXECUTE; + remaining_faults = 1; + } + + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("execute at %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + asm volatile("mtctr %0; bctrl" : : "r"(insns)); + FAIL_IF(remaining_faults != 0); + if (rights & PKEY_DISABLE_EXECUTE) + FAIL_IF(fault_code != SEGV_PKUERR); + + /* Free the current pkey */ + sys_pkey_free(pkey); + + /* Find next valid combination of pkey rights */ + rights = next_pkey_rights(rights); + } while (rights); + + /* Cleanup */ + munmap((void *) insns, pgsize); + + return 0; +} + +int main(void) +{ + test_harness(test, "pkey_exec_prot"); +} diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c new file mode 100644 index 000000000000..4f815d7c1214 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020, Sandipan Das, IBM Corp. + * + * Test if the signal information reports the correct memory protection + * key upon getting a key access violation fault for a page that was + * attempted to be protected by two different keys from two competing + * threads at the same time. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> + +#include <unistd.h> +#include <pthread.h> +#include <sys/mman.h> + +#include "pkeys.h" + +#define PPC_INST_NOP 0x60000000 +#define PPC_INST_BLR 0x4e800020 +#define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC) + +#define NUM_ITERATIONS 1000000 + +static volatile sig_atomic_t perm_pkey, rest_pkey; +static volatile sig_atomic_t rights, fault_count; +static volatile unsigned int *volatile fault_addr; +static pthread_barrier_t iteration_barrier; + +static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + void *pgstart; + size_t pgsize; + int pkey; + + pkey = siginfo_pkey(sinfo); + + /* Check if this fault originated from a pkey access violation */ + if (sinfo->si_code != SEGV_PKUERR) { + sigsafe_err("got a fault for an unexpected reason\n"); + _exit(1); + } + + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) { + sigsafe_err("got a fault for an unexpected address\n"); + _exit(1); + } + + /* Check if this fault originated from the restrictive pkey */ + if (pkey != rest_pkey) { + sigsafe_err("got a fault for an unexpected pkey\n"); + _exit(1); + } + + /* Check if too many faults have occurred for the same iteration */ + if (fault_count > 0) { + sigsafe_err("got too many faults for the same address\n"); + _exit(1); + } + + pgsize = getpagesize(); + pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1)); + + /* + * If the current fault occurred due to lack of execute rights, + * reassociate the page with the exec-only pkey since execute + * rights cannot be changed directly for the faulting pkey as + * IAMR is inaccessible from userspace. + * + * Otherwise, if the current fault occurred due to lack of + * read-write rights, change the AMR permission bits for the + * pkey. + * + * This will let the test continue. + */ + if (rights == PKEY_DISABLE_EXECUTE && + mprotect(pgstart, pgsize, PROT_EXEC)) + _exit(1); + else + pkey_set_rights(pkey, 0); + + fault_count++; +} + +struct region { + unsigned long rights; + unsigned int *base; + size_t size; +}; + +static void *protect(void *p) +{ + unsigned long rights; + unsigned int *base; + size_t size; + int tid, i; + + tid = gettid(); + base = ((struct region *) p)->base; + size = ((struct region *) p)->size; + FAIL_IF_EXIT(!base); + + /* No read, write and execute restrictions */ + rights = 0; + + printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights)); + + /* Allocate the permissive pkey */ + perm_pkey = sys_pkey_alloc(0, rights); + FAIL_IF_EXIT(perm_pkey < 0); + + /* + * Repeatedly try to protect the common region with a permissive + * pkey + */ + for (i = 0; i < NUM_ITERATIONS; i++) { + /* + * Wait until the other thread has finished allocating the + * restrictive pkey or until the next iteration has begun + */ + pthread_barrier_wait(&iteration_barrier); + + /* Try to associate the permissive pkey with the region */ + FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX, + perm_pkey)); + } + + /* Free the permissive pkey */ + sys_pkey_free(perm_pkey); + + return NULL; +} + +static void *protect_access(void *p) +{ + size_t size, numinsns; + unsigned int *base; + int tid, i; + + tid = gettid(); + base = ((struct region *) p)->base; + size = ((struct region *) p)->size; + rights = ((struct region *) p)->rights; + numinsns = size / sizeof(base[0]); + FAIL_IF_EXIT(!base); + + /* Allocate the restrictive pkey */ + rest_pkey = sys_pkey_alloc(0, rights); + FAIL_IF_EXIT(rest_pkey < 0); + + printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights)); + printf("tid %d, %s randomly in range [%p, %p]\n", tid, + (rights == PKEY_DISABLE_EXECUTE) ? "execute" : + (rights == PKEY_DISABLE_WRITE) ? "write" : "read", + base, base + numinsns); + + /* + * Repeatedly try to protect the common region with a restrictive + * pkey and read, write or execute from it + */ + for (i = 0; i < NUM_ITERATIONS; i++) { + /* + * Wait until the other thread has finished allocating the + * permissive pkey or until the next iteration has begun + */ + pthread_barrier_wait(&iteration_barrier); + + /* Try to associate the restrictive pkey with the region */ + FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX, + rest_pkey)); + + /* Choose a random instruction word address from the region */ + fault_addr = base + (rand() % numinsns); + fault_count = 0; + + switch (rights) { + /* Read protection test */ + case PKEY_DISABLE_ACCESS: + /* + * Read an instruction word from the region and + * verify if it has not been overwritten to + * something unexpected + */ + FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP && + *fault_addr != PPC_INST_BLR); + break; + + /* Write protection test */ + case PKEY_DISABLE_WRITE: + /* + * Write an instruction word to the region and + * verify if the overwrite has succeeded + */ + *fault_addr = PPC_INST_BLR; + FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR); + break; + + /* Execute protection test */ + case PKEY_DISABLE_EXECUTE: + /* Jump to the region and execute instructions */ + asm volatile( + "mtctr %0; bctrl" + : : "r"(fault_addr) : "ctr", "lr"); + break; + } + + /* + * Restore the restrictions originally imposed by the + * restrictive pkey as the signal handler would have + * cleared out the corresponding AMR bits + */ + pkey_set_rights(rest_pkey, rights); + } + + /* Free restrictive pkey */ + sys_pkey_free(rest_pkey); + + return NULL; +} + +static void reset_pkeys(unsigned long rights) +{ + int pkeys[NR_PKEYS], i; + + /* Exhaustively allocate all available pkeys */ + for (i = 0; i < NR_PKEYS; i++) + pkeys[i] = sys_pkey_alloc(0, rights); + + /* Free all allocated pkeys */ + for (i = 0; i < NR_PKEYS; i++) + sys_pkey_free(pkeys[i]); +} + +static int test(void) +{ + pthread_t prot_thread, pacc_thread; + struct sigaction act; + pthread_attr_t attr; + size_t numinsns; + struct region r; + int ret, i; + + srand(time(NULL)); + ret = pkeys_unsupported(); + if (ret) + return ret; + + /* Allocate the region */ + r.size = getpagesize(); + r.base = mmap(NULL, r.size, PROT_RWX, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + FAIL_IF(r.base == MAP_FAILED); + + /* + * Fill the region with no-ops with a branch at the end + * for returning to the caller + */ + numinsns = r.size / sizeof(r.base[0]); + for (i = 0; i < numinsns - 1; i++) + r.base[i] = PPC_INST_NOP; + r.base[i] = PPC_INST_BLR; + + /* Setup SIGSEGV handler */ + act.sa_handler = 0; + act.sa_sigaction = segv_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0); + act.sa_flags = SA_SIGINFO; + act.sa_restorer = 0; + FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0); + + /* + * For these tests, the parent process should clear all bits of + * AMR and IAMR, i.e. impose no restrictions, for all available + * pkeys. This will be the base for the initial AMR and IAMR + * values for all the test thread pairs. + * + * If the AMR and IAMR bits of all available pkeys are cleared + * before running the tests and a fault is generated when + * attempting to read, write or execute instructions from a + * pkey protected region, the pkey responsible for this must be + * the one from the protect-and-access thread since the other + * one is fully permissive. Despite that, if the pkey reported + * by siginfo is not the restrictive pkey, then there must be a + * kernel bug. + */ + reset_pkeys(0); + + /* Setup barrier for protect and protect-and-access threads */ + FAIL_IF(pthread_attr_init(&attr) != 0); + FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0); + + /* Setup and start protect and protect-and-read threads */ + puts("starting thread pair (protect, protect-and-read)"); + r.rights = PKEY_DISABLE_ACCESS; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Setup and start protect and protect-and-write threads */ + puts("starting thread pair (protect, protect-and-write)"); + r.rights = PKEY_DISABLE_WRITE; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Setup and start protect and protect-and-execute threads */ + puts("starting thread pair (protect, protect-and-execute)"); + r.rights = PKEY_DISABLE_EXECUTE; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Cleanup */ + FAIL_IF(pthread_attr_destroy(&attr) != 0); + FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0); + munmap(r.base, r.size); + + return 0; +} + +int main(void) +{ + test_harness(test, "pkey_siginfo"); +} diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c index e2eed65b7735..30b71b1d78d5 100644 --- a/tools/testing/selftests/powerpc/mm/prot_sao.c +++ b/tools/testing/selftests/powerpc/mm/prot_sao.c @@ -7,6 +7,7 @@ #include <stdlib.h> #include <string.h> #include <sys/mman.h> +#include <unistd.h> #include <asm/cputable.h> @@ -18,8 +19,13 @@ int test_prot_sao(void) { char *p; - /* 2.06 or later should support SAO */ - SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + /* + * SAO was introduced in 2.06 and removed in 3.1. It's disabled in + * guests/LPARs by default, so also skip if we are running in a guest. + */ + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) || + have_hwcap2(PPC_FEATURE2_ARCH_3_1) || + access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0); /* * Ensure we can ask for PROT_SAO. diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c new file mode 100644 index 000000000000..ed9143990888 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that loads/stores expand the stack segment, or trigger a SEGV, in + * various conditions. + * + * Based on test code by Tom Lane. + */ + +#undef NDEBUG +#include <assert.h> + +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#define _KB (1024) +#define _MB (1024 * 1024) + +volatile char *stack_top_ptr; +volatile unsigned long stack_top_sp; +volatile char c; + +enum access_type { + LOAD, + STORE, +}; + +/* + * Consume stack until the stack pointer is below @target_sp, then do an access + * (load or store) at offset @delta from either the base of the stack or the + * current stack pointer. + */ +__attribute__ ((noinline)) +int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type) +{ + unsigned long target; + char stack_cur; + + if ((unsigned long)&stack_cur > target_sp) + return consume_stack(target_sp, stack_high, delta, type); + else { + // We don't really need this, but without it GCC might not + // generate a recursive call above. + stack_top_ptr = &stack_cur; + +#ifdef __powerpc__ + asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp)); +#else + asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp)); +#endif + target = stack_high - delta + 1; + volatile char *p = (char *)target; + + if (type == STORE) + *p = c; + else + c = *p; + + // Do something to prevent the stack frame being popped prior to + // our access above. + getpid(); + } + + return 0; +} + +static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high) +{ + unsigned long start, end; + static char buf[4096]; + char name[128]; + FILE *f; + int rc; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(buf, sizeof(buf), f)) { + rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n", + &start, &end, name); + if (rc == 2) + continue; + + if (rc != 3) { + printf("sscanf errored\n"); + rc = -1; + break; + } + + if (strstr(name, needle)) { + *low = start; + *high = end - 1; + rc = 0; + break; + } + } + + fclose(f); + + return rc; +} + +int child(unsigned int stack_used, int delta, enum access_type type) +{ + unsigned long low, stack_high; + + assert(search_proc_maps("[stack]", &low, &stack_high) == 0); + + assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0); + + printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n", + type == LOAD ? "load" : "store", delta, stack_used, stack_high, + stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1); + + return 0; +} + +static int test_one(unsigned int stack_used, int delta, enum access_type type) +{ + pid_t pid; + int rc; + + pid = fork(); + if (pid == 0) + exit(child(stack_used, delta, type)); + + assert(waitpid(pid, &rc, 0) != -1); + + if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) + return 0; + + // We don't expect a non-zero exit that's not a signal + assert(!WIFEXITED(rc)); + + printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n", + type == LOAD ? "load" : "store", delta, stack_used, + WTERMSIG(rc)); + + return 1; +} + +// This is fairly arbitrary but is well below any of the targets below, +// so that the delta between the stack pointer and the target is large. +#define DEFAULT_SIZE (32 * _KB) + +static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur) +{ + unsigned long delta; + + // We should be able to access anywhere within the rlimit + for (delta = page_size; delta <= rlim_cur; delta += page_size) + assert(test_one(DEFAULT_SIZE, delta, type) == 0); + + assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0); + + // But if we go past the rlimit it should fail + assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0); +} + +static int test(void) +{ + unsigned long page_size; + struct rlimit rlimit; + + page_size = getpagesize(); + getrlimit(RLIMIT_STACK, &rlimit); + printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + + printf("Testing loads ...\n"); + test_one_type(LOAD, page_size, rlimit.rlim_cur); + printf("Testing stores ...\n"); + test_one_type(STORE, page_size, rlimit.rlim_cur); + + printf("All OK\n"); + + return 0; +} + +#ifdef __powerpc__ +#include "utils.h" + +int main(void) +{ + return test_harness(test, "stack_expansion_ldst"); +} +#else +int main(void) +{ + return test(); +} +#endif diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c new file mode 100644 index 000000000000..c8b32a29e274 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that signal delivery is able to expand the stack segment without + * triggering a SEGV. + * + * Based on test code by Tom Lane. + */ + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../pmu/lib.h" +#include "utils.h" + +#define _KB (1024) +#define _MB (1024 * 1024) + +static char *stack_base_ptr; +static char *stack_top_ptr; + +static volatile sig_atomic_t sig_occurred = 0; + +static void sigusr1_handler(int signal_arg) +{ + sig_occurred = 1; +} + +static int consume_stack(unsigned int stack_size, union pipe write_pipe) +{ + char stack_cur; + + if ((stack_base_ptr - &stack_cur) < stack_size) + return consume_stack(stack_size, write_pipe); + else { + stack_top_ptr = &stack_cur; + + FAIL_IF(notify_parent(write_pipe)); + + while (!sig_occurred) + barrier(); + } + + return 0; +} + +static int child(unsigned int stack_size, union pipe write_pipe) +{ + struct sigaction act; + char stack_base; + + act.sa_handler = sigusr1_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + if (sigaction(SIGUSR1, &act, NULL) < 0) + err(1, "sigaction"); + + stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL); + + FAIL_IF(consume_stack(stack_size, write_pipe)); + + printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n", + stack_size, stack_base_ptr, stack_top_ptr, + stack_base_ptr - stack_top_ptr); + + return 0; +} + +static int test_one_size(unsigned int stack_size) +{ + union pipe read_pipe, write_pipe; + pid_t pid; + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + close(read_pipe.read_fd); + close(write_pipe.write_fd); + exit(child(stack_size, read_pipe)); + } + + close(read_pipe.write_fd); + close(write_pipe.read_fd); + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + kill(pid, SIGUSR1); + + FAIL_IF(wait_for_child(pid)); + + close(read_pipe.read_fd); + close(write_pipe.write_fd); + + return 0; +} + +int test(void) +{ + unsigned int i, size; + + // Test with used stack from 1MB - 64K to 1MB + 64K + // Increment by 64 to get more coverage of odd sizes + for (i = 0; i < (128 * _KB); i += 64) { + size = i + (1 * _MB) - (64 * _KB); + FAIL_IF(test_one_size(size)); + } + + return 0; +} + +int main(void) +{ + return test_harness(test, "stack_expansion_signal"); +} diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 7b4ac4537702..2980abca31e0 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -9,6 +9,7 @@ #include <stdbool.h> #include <string.h> #include <sys/prctl.h> +#include <asm/cputable.h> #include "event.h" #include "utils.h" @@ -104,6 +105,9 @@ static int test_body(void) struct event events[3]; u64 overhead; + // The STCX_FAIL event we use works on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions"); setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles"); setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail"); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c index a2d7b0e3dca9..a26ac122c759 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c @@ -91,8 +91,6 @@ int back_to_back_ebbs(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c index bc893813483e..bb9f587fa76e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c @@ -42,8 +42,6 @@ int cycles(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c index dcd351d20328..9ae795ce314e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c @@ -99,8 +99,6 @@ int cycles_with_freeze(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); printf("EBBs while frozen %d\n", ebbs_while_frozen); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c index 94c99c12c0f2..4b45a2e70f62 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c @@ -71,8 +71,6 @@ int cycles_with_mmcr2(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index dfbc5c3ad52d..21537d6eb6b7 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c index ca2f7d729155..b208bf6ad58d 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c @@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); FAIL_IF(ebb_state.stats.ebb_count == 0); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c index ac3e6e182614..ba2681a12cc7 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c @@ -75,7 +75,6 @@ static int test_body(void) ebb_freeze_pmcs(); ebb_global_disable(); - count_pmc(4, sample_period); mtspr(SPRN_PMC4, 0xdead); dump_summary_ebb_state(); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c index b8242e9d97d2..791d37ba327b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c @@ -70,13 +70,6 @@ int multi_counter(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - count_pmc(2, sample_period); - count_pmc(3, sample_period); - count_pmc(4, sample_period); - count_pmc(5, sample_period); - count_pmc(6, sample_period); - dump_ebb_state(); for (i = 0; i < 6; i++) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c index a05c0e18ded6..9b0f70d59702 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c @@ -61,8 +61,6 @@ static int cycles_child(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_summary_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c index 153ebc92234f..2904c741e04e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c @@ -82,8 +82,6 @@ static int test_body(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); if (mmcr0_mismatch) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c index eadad75ed7e6..b29f8ba22d1e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -76,8 +76,6 @@ int pmc56_overflow(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(2, sample_period); - dump_ebb_state(); printf("PMC5/6 overflow %d\n", pmc56_overflowed); diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h index fa12e7d0b4d3..bf1bec013bbb 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.h +++ b/tools/testing/selftests/powerpc/pmu/lib.h @@ -6,6 +6,7 @@ #ifndef __SELFTESTS_POWERPC_PMU_LIB_H #define __SELFTESTS_POWERPC_PMU_LIB_H +#include <stdbool.h> #include <stdio.h> #include <stdint.h> #include <string.h> diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c index 2756fe2efdc5..2d37942bf72b 100644 --- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c +++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c @@ -12,6 +12,8 @@ #include <string.h> #include <sys/prctl.h> +#include <asm/cputable.h> + #include "event.h" #include "lib.h" #include "utils.h" @@ -23,12 +25,9 @@ static int per_event_excludes(void) { struct event *e, events[4]; - char *platform; int i; - platform = (char *)get_auxv_entry(AT_BASE_PLATFORM); - FAIL_IF(!platform); - SKIP_IF(strcmp(platform, "power8") != 0); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); /* * We need to create the events disabled, otherwise the running/enabled diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index d5c64fee032d..bbc05ffc5860 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -150,7 +150,7 @@ static int child(struct shared_info *info) printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", user_write, info->amr, pkey1, pkey2, pkey3); - mtspr(SPRN_AMR, info->amr); + set_amr(info->amr); /* * We won't use pkey3. This tests whether the kernel restores the UAMOR diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bdbbbe8431e0..bc454f899124 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -44,7 +44,7 @@ struct shared_info { unsigned long amr2; /* AMR value that ptrace should refuse to write to the child. */ - unsigned long amr3; + unsigned long invalid_amr; /* IAMR value the parent expects to read from the child. */ unsigned long expected_iamr; @@ -57,8 +57,8 @@ struct shared_info { * (even though they're valid ones) because userspace doesn't have * access to those registers. */ - unsigned long new_iamr; - unsigned long new_uamor; + unsigned long invalid_iamr; + unsigned long invalid_uamor; }; static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) @@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) return syscall(__NR_pkey_alloc, flags, init_access_rights); } -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int child(struct shared_info *info) { unsigned long reg; @@ -100,33 +95,37 @@ static int child(struct shared_info *info) info->amr1 |= 3ul << pkeyshift(pkey1); info->amr2 |= 3ul << pkeyshift(pkey2); - info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3); + /* + * invalid amr value where we try to force write + * things which are deined by a uamor setting. + */ + info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor); + /* + * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr + */ if (disable_execute) info->expected_iamr |= 1ul << pkeyshift(pkey1); else info->expected_iamr &= ~(1ul << pkeyshift(pkey1)); - info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3)); - - info->expected_uamor |= 3ul << pkeyshift(pkey1) | - 3ul << pkeyshift(pkey2); - info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); - info->new_uamor |= 3ul << pkeyshift(pkey1); + /* + * We allocated pkey2 and pkey 3 above. Clear the IAMR bits. + */ + info->expected_iamr &= ~(1ul << pkeyshift(pkey2)); + info->expected_iamr &= ~(1ul << pkeyshift(pkey3)); /* - * We won't use pkey3. We just want a plausible but invalid key to test - * whether ptrace will let us write to AMR bits we are not supposed to. - * - * This also tests whether the kernel restores the UAMOR permissions - * after a key is freed. + * Create an IAMR value different from expected value. + * Kernel will reject an IAMR and UAMOR change. */ - sys_pkey_free(pkey3); + info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2)); + info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1)); printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", user_write, info->amr1, pkey1, pkey2, pkey3); - mtspr(SPRN_AMR, info->amr1); + set_amr(info->amr1); /* Wait for parent to read our AMR value and write a new one. */ ret = prod_parent(&info->child_sync); @@ -196,9 +195,9 @@ static int parent(struct shared_info *info, pid_t pid) PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); PARENT_FAIL_IF(ret, &info->child_sync); - info->amr1 = info->amr2 = info->amr3 = regs[0]; - info->expected_iamr = info->new_iamr = regs[1]; - info->expected_uamor = info->new_uamor = regs[2]; + info->amr1 = info->amr2 = regs[0]; + info->expected_iamr = regs[1]; + info->expected_uamor = regs[2]; /* Wake up child so that it can set itself up. */ ret = prod_child(&info->child_sync); @@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid) return ret; /* Write invalid AMR value in child. */ - ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1); + ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1); PARENT_FAIL_IF(ret, &info->child_sync); - printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3); + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr); /* Wake up child so that it can verify it didn't change. */ ret = prod_child(&info->child_sync); @@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid) /* Try to write to IAMR. */ regs[0] = info->amr1; - regs[1] = info->new_iamr; + regs[1] = info->invalid_iamr; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2); PARENT_FAIL_IF(!ret, &info->child_sync); @@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid) ptrace_write_running, regs[0], regs[1]); /* Try to write to IAMR and UAMOR. */ - regs[2] = info->new_uamor; + regs[2] = info->invalid_uamor; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3); PARENT_FAIL_IF(!ret, &info->child_sync); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c index 58cb1a860cc9..4436ca9d3caf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c @@ -78,6 +78,9 @@ int ptrace_tar(void) pid_t pid; int ret, status; + // TAR was added in v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c index c4fe0e893306..cb9875f764ca 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c @@ -61,6 +61,8 @@ int ptrace_vsx(void) pid_t pid; int ret, status, i; + SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); for (i = 0; i < VEC_MAX; i++) diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index 8c6b982af2a8..c8d82b784102 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -183,6 +183,16 @@ int spectre_v2_test(void) if (miss_percent > 15) { printf("Branch misses > 15%% unexpected in this configuration!\n"); printf("Possible mis-match between reported & actual mitigation\n"); + /* + * Such a mismatch may be caused by a guest system + * reporting as vulnerable when the host is mitigated. + * Return skip code to avoid detecting this as an error. + * We are not vulnerable and reporting otherwise, so + * missing such a mismatch is safe. + */ + if (state == VULNERABLE) + return 4; + return 1; } break; diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 7fc0623d85c3..9c39f55a58ff 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null TEST_GEN_PROGS := memcmp_64 strlen -$(OUTPUT)/memcmp_64: memcmp.c +$(OUTPUT)/memcmp_64: memcmp.c ../utils.c $(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec ifeq ($(build_32bit),1) diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index b1fa7546957f..979df3d98368 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -2,7 +2,9 @@ #include <malloc.h> #include <stdlib.h> #include <string.h> +#include <sys/mman.h> #include <time.h> +#include <asm/cputable.h> #include "utils.h" #define SIZE 256 @@ -13,6 +15,9 @@ #define LARGE_MAX_OFFSET 32 #define LARGE_SIZE_START 4096 +/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */ +#define MAP_SIZE (64 * 1024) + #define MAX_OFFSET_DIFF_S1_S2 48 int vmx_count; @@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset, static int testcase(bool islarge) { - char *s1; - char *s2; - unsigned long i; - - unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE); - unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2; - int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS; - - s1 = memalign(128, alloc_size); - if (!s1) { - perror("memalign"); - exit(1); - } + unsigned long i, comp_size, alloc_size; + char *p, *s1, *s2; + int iterations; - s2 = memalign(128, alloc_size); - if (!s2) { - perror("memalign"); - exit(1); - } + comp_size = (islarge ? LARGE_SIZE : SIZE); + alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2; + iterations = islarge ? LARGE_ITERATIONS : ITERATIONS; + + p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + FAIL_IF(p == MAP_FAILED); + + /* Put s1/s2 at the end of a page */ + s1 = p + MAP_SIZE - alloc_size; + s2 = p + 3 * MAP_SIZE - alloc_size; + + /* And unmap the subsequent page to force a fault if we overread */ + munmap(p + MAP_SIZE, MAP_SIZE); + munmap(p + 3 * MAP_SIZE, MAP_SIZE); srandom(time(0)); @@ -147,6 +152,11 @@ static int testcase(bool islarge) static int testcases(void) { +#ifdef __powerpc64__ + // vcmpequd used in memcmp_64.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); +#endif + testcase(0); testcase(1); return 0; diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 5ee0e98c4896..18b6a773d5c7 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -16,6 +16,7 @@ #include <string.h> #include <sys/ioctl.h> #include <sys/stat.h> +#include <sys/sysinfo.h> #include <sys/types.h> #include <sys/utsname.h> #include <unistd.h> @@ -88,28 +89,40 @@ void *get_auxv_entry(int type) int pick_online_cpu(void) { - cpu_set_t mask; - int cpu; + int ncpus, cpu = -1; + cpu_set_t *mask; + size_t size; + + ncpus = get_nprocs_conf(); + size = CPU_ALLOC_SIZE(ncpus); + mask = CPU_ALLOC(ncpus); + if (!mask) { + perror("malloc"); + return -1; + } - CPU_ZERO(&mask); + CPU_ZERO_S(size, mask); - if (sched_getaffinity(0, sizeof(mask), &mask)) { + if (sched_getaffinity(0, size, mask)) { perror("sched_getaffinity"); - return -1; + goto done; } /* We prefer a primary thread, but skip 0 */ - for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8) - if (CPU_ISSET(cpu, &mask)) - return cpu; + for (cpu = 8; cpu < ncpus; cpu += 8) + if (CPU_ISSET_S(cpu, size, mask)) + goto done; /* Search for anything, but in reverse */ - for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--) - if (CPU_ISSET(cpu, &mask)) - return cpu; + for (cpu = ncpus - 1; cpu >= 0; cpu--) + if (CPU_ISSET_S(cpu, size, mask)) + goto done; printf("No cpus in affinity mask?!\n"); - return -1; + +done: + CPU_FREE(mask); + return cpu; } bool is_ppc64le(void) @@ -293,3 +306,31 @@ void set_dscr(unsigned long val) asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); } + +int using_hash_mmu(bool *using_hash) +{ + char line[128]; + FILE *f; + int rc; + + f = fopen("/proc/cpuinfo", "r"); + FAIL_IF(!f); + + rc = 0; + while (fgets(line, sizeof(line), f) != NULL) { + if (strcmp(line, "MMU : Hash\n") == 0) { + *using_hash = true; + goto out; + } + + if (strcmp(line, "MMU : Radix\n") == 0) { + *using_hash = false; + goto out; + } + } + + rc = -1; +out: + fclose(f); + return rc; +} diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index e8a657a5f48a..384589095864 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: LGPL-2.1 #define _GNU_SOURCE #include <assert.h> +#include <linux/membarrier.h> #include <pthread.h> #include <sched.h> +#include <stdatomic.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -1131,6 +1133,220 @@ static int set_signal_handler(void) return ret; } +struct test_membarrier_thread_args { + int stop; + intptr_t percpu_list_ptr; +}; + +/* Worker threads modify data in their "active" percpu lists. */ +void *test_membarrier_worker_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + const int iters = opt_reps; + int i; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Wait for initialization. */ + while (!atomic_load(&args->percpu_list_ptr)) {} + + for (i = 0; i < iters; ++i) { + int ret; + + do { + int cpu = rseq_cpu_start(); + + ret = rseq_offset_deref_addv(&args->percpu_list_ptr, + sizeof(struct percpu_list_entry) * cpu, 1, cpu); + } while (rseq_unlikely(ret)); + } + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +void test_membarrier_init_percpu_list(struct percpu_list *list) +{ + int i; + + memset(list, 0, sizeof(*list)); + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list_node *node; + + node = malloc(sizeof(*node)); + assert(node); + node->data = 0; + node->next = NULL; + list->c[i].head = node; + } +} + +void test_membarrier_free_percpu_list(struct percpu_list *list) +{ + int i; + + for (i = 0; i < CPU_SETSIZE; i++) + free(list->c[i].head); +} + +static int sys_membarrier(int cmd, int flags, int cpu_id) +{ + return syscall(__NR_membarrier, cmd, flags, cpu_id); +} + +/* + * The manager thread swaps per-cpu lists that worker threads see, + * and validates that there are no unexpected modifications. + */ +void *test_membarrier_manager_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + struct percpu_list list_a, list_b; + intptr_t expect_a = 0, expect_b = 0; + int cpu_a = 0, cpu_b = 0; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Init lists. */ + test_membarrier_init_percpu_list(&list_a); + test_membarrier_init_percpu_list(&list_b); + + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + + while (!atomic_load(&args->stop)) { + /* list_a is "active". */ + cpu_a = rand() % CPU_SETSIZE; + /* + * As list_b is "inactive", we should never see changes + * to list_b. + */ + if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_b "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_a) && + errno != ENXIO /* missing CPU */) { + perror("sys_membarrier"); + abort(); + } + /* + * Cpu A should now only modify list_b, so the values + * in list_a should be stable. + */ + expect_a = atomic_load(&list_a.c[cpu_a].head->data); + + cpu_b = rand() % CPU_SETSIZE; + /* + * As list_a is "inactive", we should never see changes + * to list_a. + */ + if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_a "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_b) && + errno != ENXIO /* missing CPU*/) { + perror("sys_membarrier"); + abort(); + } + /* Remember a value from list_b. */ + expect_b = atomic_load(&list_b.c[cpu_b].head->data); + } + + test_membarrier_free_percpu_list(&list_a); + test_membarrier_free_percpu_list(&list_b); + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ +#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +void test_membarrier(void) +{ + const int num_threads = opt_threads; + struct test_membarrier_thread_args thread_args; + pthread_t worker_threads[num_threads]; + pthread_t manager_thread; + int i, ret; + + if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { + perror("sys_membarrier"); + abort(); + } + + thread_args.stop = 0; + thread_args.percpu_list_ptr = 0; + ret = pthread_create(&manager_thread, NULL, + test_membarrier_manager_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(&worker_threads[i], NULL, + test_membarrier_worker_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(worker_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + atomic_store(&thread_args.stop, 1); + ret = pthread_join(manager_thread, NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } +} +#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ +void test_membarrier(void) +{ + fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " + "Skipping membarrier test.\n"); +} +#endif + static void show_usage(int argc, char **argv) { printf("Usage : %s <OPTIONS>\n", @@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv) printf(" [-r N] Number of repetitions per thread (default 5000)\n"); printf(" [-d] Disable rseq system call (no initialization)\n"); printf(" [-D M] Disable rseq for each M threads\n"); - printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); + printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); printf(" [-v] Verbose output.\n"); printf(" [-h] Show this help.\n"); @@ -1268,6 +1484,7 @@ int main(int argc, char **argv) case 'i': case 'b': case 'm': + case 'r': break; default: show_usage(argc, argv); @@ -1320,6 +1537,10 @@ int main(int argc, char **argv) printf_verbose("counter increment\n"); test_percpu_inc(); break; + case 'r': + printf_verbose("membarrier\n"); + test_membarrier(); + break; } if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index b2da6004fe30..640411518e46 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -279,6 +279,63 @@ error1: #endif } +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __attribute__((always_inline)) +int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) +#endif + /* get p+v */ + "movq %[ptr], %%rbx\n\t" + "addq %[off], %%rbx\n\t" + /* get pv */ + "movq (%%rbx), %%rcx\n\t" + /* *pv += inc */ + "addq %[inc], (%%rcx)\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_abi] "r" (&__rseq_abi), + /* final store input */ + [ptr] "m" (*ptr), + [off] "er" (off), + [inc] "er" (inc) + : "memory", "cc", "rax", "rbx", "rcx" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + static inline __attribute__((always_inline)) int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t newv2, diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index e426304fd4a0..f51bc83c9e41 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -15,6 +15,7 @@ TEST_LIST=( "-T m" "-T m -M" "-T i" + "-T r" ) TEST_NAME=( @@ -25,6 +26,7 @@ TEST_NAME=( "memcpy" "memcpy with barrier" "increment" + "membarrier" ) IFS="$OLDIFS" diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 535720b2592a..4a180439ee9e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -133,6 +133,8 @@ struct seccomp_data { # define __NR_seccomp 348 # elif defined(__xtensa__) # define __NR_seccomp 337 +# elif defined(__sh__) +# define __NR_seccomp 372 # else # warning "seccomp syscall number unknown for this architecture" # define __NR_seccomp 0xffff @@ -772,8 +774,15 @@ void *kill_thread(void *data) return (void *)SIBLING_EXIT_UNKILLED; } +enum kill_t { + KILL_THREAD, + KILL_PROCESS, + RET_UNKNOWN +}; + /* Prepare a thread that will kill itself or both of us. */ -void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +void kill_thread_or_group(struct __test_metadata *_metadata, + enum kill_t kill_how) { pthread_t thread; void *status; @@ -789,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) .len = (unsigned short)ARRAY_SIZE(filter_thread), .filter = filter_thread, }; + int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA; struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, kill), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog_process = { @@ -806,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) } ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, - kill_process ? &prog_process : &prog_thread)); + kill_how == KILL_THREAD ? &prog_thread + : &prog_process)); /* * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS * flag cannot be downgraded by a new filter. */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + if (kill_how == KILL_PROCESS) + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); /* Start a thread that will exit immediately. */ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); @@ -840,7 +852,7 @@ TEST(KILL_thread) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, false); + kill_thread_or_group(_metadata, KILL_THREAD); _exit(38); } @@ -859,7 +871,7 @@ TEST(KILL_process) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, true); + kill_thread_or_group(_metadata, KILL_PROCESS); _exit(38); } @@ -870,6 +882,27 @@ TEST(KILL_process) ASSERT_EQ(SIGSYS, WTERMSIG(status)); } +TEST(KILL_unknown) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, RET_UNKNOWN); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + EXPECT_TRUE(WIFSIGNALED(status)) { + TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); + } + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + /* TODO(wad) add 64-bit versus 32-bit arg tests. */ TEST(arg_out_of_range) { @@ -1665,66 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally) } #if defined(__x86_64__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_rax -# define SYSCALL_RET rax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_rax +# define SYSCALL_RET(_regs) (_regs).rax #elif defined(__i386__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_eax -# define SYSCALL_RET eax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_eax +# define SYSCALL_RET(_regs) (_regs).eax #elif defined(__arm__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM ARM_r7 -# define SYSCALL_RET ARM_r0 +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).ARM_r7 +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) +# define SYSCALL_RET(_regs) (_regs).ARM_r0 #elif defined(__aarch64__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM regs[8] -# define SYSCALL_RET regs[0] +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[8] +# ifndef NT_ARM_SYSTEM_CALL +# define NT_ARM_SYSTEM_CALL 0x404 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + struct iovec __v; \ + typeof(_nr) __nr = (_nr); \ + __v.iov_base = &__nr; \ + __v.iov_len = sizeof(__nr); \ + EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ + NT_ARM_SYSTEM_CALL, &__v)); \ + } while (0) +# define SYSCALL_RET(_regs) (_regs).regs[0] #elif defined(__riscv) && __riscv_xlen == 64 -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM a7 -# define SYSCALL_RET a0 +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).a7 +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__csky__) -# define ARCH_REGS struct pt_regs -#if defined(__CSKYABIV2__) -# define SYSCALL_NUM regs[3] -#else -# define SYSCALL_NUM regs[9] -#endif -# define SYSCALL_RET a0 +# define ARCH_REGS struct pt_regs +# if defined(__CSKYABIV2__) +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# else +# define SYSCALL_NUM(_regs) (_regs).regs[9] +# endif +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__hppa__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM gr[20] -# define SYSCALL_RET gr[28] +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).gr[20] +# define SYSCALL_RET(_regs) (_regs).gr[28] #elif defined(__powerpc__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[0] -# define SYSCALL_RET gpr[3] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[0] +# define SYSCALL_RET(_regs) (_regs).gpr[3] +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + typeof(_val) _result = (_val); \ + /* \ + * A syscall error is signaled by CR0 SO bit \ + * and the code is stored as a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = result; \ + (_regs).ccr &= ~0x10000000; \ + } \ + } while (0) +# define SYSCALL_RET_SET_ON_PTRACE_EXIT #elif defined(__s390__) -# define ARCH_REGS s390_regs -# define SYSCALL_NUM gprs[2] -# define SYSCALL_RET gprs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# define ARCH_REGS s390_regs +# define SYSCALL_NUM(_regs) (_regs).gprs[2] +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__mips__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM regs[2] -# define SYSCALL_SYSCALL_NUM regs[4] -# define SYSCALL_RET regs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# include <asm/unistd_nr_n32.h> +# include <asm/unistd_nr_n64.h> +# include <asm/unistd_nr_o32.h> +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) \ + ({ \ + typeof((_regs).regs[2]) _nr; \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + _nr = (_regs).regs[4]; \ + else \ + _nr = (_regs).regs[2]; \ + _nr; \ + }) +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + (_regs).regs[4] = _nr; \ + else \ + (_regs).regs[2] = _nr; \ + } while (0) +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__xtensa__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM syscall +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).syscall /* * On xtensa syscall return value is in the register * a2 of the current window which is not fixed. */ -#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2] +#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] +#elif defined(__sh__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[3] +# define SYSCALL_RET(_regs) (_regs).gpr[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif +/* + * Most architectures can change the syscall by just updating the + * associated register. This is the default if not defined above. + */ +#ifndef SYSCALL_NUM_SET +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + SYSCALL_NUM(_regs) = (_nr); \ + } while (0) +#endif +/* + * Most architectures can change the syscall return value by just + * writing to the SYSCALL_RET register. This is the default if not + * defined above. If an architecture cannot set the return value + * (for example when the syscall and return value register is + * shared), report it with TH_LOG() in an arch-specific definition + * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. + */ +#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) +# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" +#endif +#ifndef SYSCALL_RET_SET +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + SYSCALL_RET(_regs) = (_val); \ + } while (0) +#endif + /* When the syscall return can't be changed, stub out the tests for it. */ -#ifdef SYSCALL_NUM_RET_SHARE_REG +#ifndef SYSCALL_RET # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else # define EXPECT_SYSCALL_RETURN(val, action) \ @@ -1739,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally) } while (0) #endif -/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for +/* + * Some architectures (e.g. powerpc) can only set syscall + * return values on syscall exit during ptrace. + */ +const bool ptrace_entry_set_syscall_nr = true; +const bool ptrace_entry_set_syscall_ret = +#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT + true; +#else + false; +#endif + +/* + * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) -#define HAVE_GETREGS +# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) +# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) +#else +# define ARCH_GETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) +# define ARCH_SETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) #endif /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { ARCH_REGS regs; -#ifdef HAVE_GETREGS - EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { - TH_LOG("PTRACE_GETREGS failed"); - return -1; - } -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { - TH_LOG("PTRACE_GETREGSET failed"); + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return -1; } -#endif -#if defined(__mips__) - if (regs.SYSCALL_NUM == __NR_O32_Linux) - return regs.SYSCALL_SYSCALL_NUM; -#endif - return regs.SYSCALL_NUM; + return SYSCALL_NUM(regs); } /* Architecture-specific syscall changing routine. */ -void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall, int result) +void __change_syscall(struct __test_metadata *_metadata, + pid_t tracee, long *syscall, long *ret) { - int ret; - ARCH_REGS regs; -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); -#endif - EXPECT_EQ(0, ret) {} + ARCH_REGS orig, regs; -#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ - defined(__xtensa__) || defined(__csky__) - { - regs.SYSCALL_NUM = syscall; - } -#elif defined(__mips__) - { - if (regs.SYSCALL_NUM == __NR_O32_Linux) - regs.SYSCALL_SYSCALL_NUM = syscall; - else - regs.SYSCALL_NUM = syscall; - } + /* Do not get/set registers if we have nothing to do. */ + if (!syscall && !ret) + return; -#elif defined(__arm__) -# ifndef PTRACE_SET_SYSCALL -# define PTRACE_SET_SYSCALL 23 -# endif - { - ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); - EXPECT_EQ(0, ret); + EXPECT_EQ(0, ARCH_GETREGS(regs)) { + return; } + orig = regs; -#elif defined(__aarch64__) -# ifndef NT_ARM_SYSTEM_CALL -# define NT_ARM_SYSTEM_CALL 0x404 -# endif - { - iov.iov_base = &syscall; - iov.iov_len = sizeof(syscall); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, - &iov); - EXPECT_EQ(0, ret); - } + if (syscall) + SYSCALL_NUM_SET(regs, *syscall); -#else - ASSERT_EQ(1, 0) { - TH_LOG("How is the syscall changed on this architecture?"); - } -#endif + if (ret) + SYSCALL_RET_SET(regs, *ret); - /* If syscall is skipped, change return value. */ - if (syscall == -1) -#ifdef SYSCALL_NUM_RET_SHARE_REG - TH_LOG("Can't modify syscall return on this architecture"); + /* Flush any register changes made. */ + if (memcmp(&orig, ®s, sizeof(orig)) != 0) + EXPECT_EQ(0, ARCH_SETREGS(regs)); +} -#elif defined(__xtensa__) - regs.SYSCALL_RET(regs) = result; -#else - regs.SYSCALL_RET = result; -#endif +/* Change only syscall number. */ +void change_syscall_nr(struct __test_metadata *_metadata, + pid_t tracee, long syscall) +{ + __change_syscall(_metadata, tracee, &syscall, NULL); +} -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); -#else - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); -#endif - EXPECT_EQ(0, ret); +/* Change syscall return value (and set syscall number to -1). */ +void change_syscall_ret(struct __test_metadata *_metadata, + pid_t tracee, long ret) +{ + long syscall = -1; + + __change_syscall(_metadata, tracee, &syscall, &ret); } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, @@ -1866,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid, 0); + change_syscall_nr(_metadata, tracee, __NR_getppid); break; case 0x1003: /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, 45000); + change_syscall_ret(_metadata, tracee, 45000); break; case 0x1004: /* skip openat with error. */ EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, -ESRCH); + change_syscall_ret(_metadata, tracee, -ESRCH); break; case 0x1005: /* do nothing (allow getppid) */ @@ -1891,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, } +FIXTURE(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; + long syscall_nr; +}; + void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { - int ret, nr; + int ret; unsigned long msg; static bool entry; + long syscall_nr_val, syscall_ret_val; + long *syscall_nr = NULL, *syscall_ret = NULL; + FIXTURE_DATA(TRACE_syscall) *self = args; /* * The traditional way to tell PTRACE_SYSCALL entry/exit @@ -1910,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); - if (!entry) - return; + /* + * Some architectures only support setting return values during + * syscall exit under ptrace, and on exit the syscall number may + * no longer be available. Therefore, save the initial sycall + * number here, so it can be examined during both entry and exit + * phases. + */ + if (entry) + self->syscall_nr = get_syscall(_metadata, tracee); - nr = get_syscall(_metadata, tracee); + /* + * Depending on the architecture's syscall setting abilities, we + * pick which things to set during this phase (entry or exit). + */ + if (entry == ptrace_entry_set_syscall_nr) + syscall_nr = &syscall_nr_val; + if (entry == ptrace_entry_set_syscall_ret) + syscall_ret = &syscall_ret_val; + + /* Now handle the actual rewriting cases. */ + switch (self->syscall_nr) { + case __NR_getpid: + syscall_nr_val = __NR_getppid; + /* Never change syscall return for this case. */ + syscall_ret = NULL; + break; + case __NR_gettid: + syscall_nr_val = -1; + syscall_ret_val = 45000; + break; + case __NR_openat: + syscall_nr_val = -1; + syscall_ret_val = -ESRCH; + break; + default: + /* Unhandled, do nothing. */ + return; + } - if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid, 0); - if (nr == __NR_gettid) - change_syscall(_metadata, tracee, -1, 45000); - if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1, -ESRCH); + __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); } -FIXTURE(TRACE_syscall) { - struct sock_fprog prog; - pid_t tracer, mytid, mypid, parent; -}; - FIXTURE_VARIANT(TRACE_syscall) { /* * All of the SECCOMP_RET_TRACE behaviors can be tested with either @@ -1986,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall) self->tracer = setup_trace_fixture(_metadata, variant->use_ptrace ? tracer_ptrace : tracer_seccomp, - NULL, variant->use_ptrace); + self, variant->use_ptrace); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); @@ -3136,11 +3260,11 @@ skip: static int user_notif_syscall(int nr, unsigned int flags) { struct sock_filter filter[] = { - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { @@ -3693,7 +3817,7 @@ TEST(user_notification_filter_empty) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -3709,7 +3833,7 @@ TEST(user_notification_filter_empty) if (pid == 0) { int listener; - listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); if (listener < 0) _exit(EXIT_FAILURE); @@ -3747,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore index d5a2da428752..be8266f5d04c 100644 --- a/tools/testing/selftests/splice/.gitignore +++ b/tools/testing/selftests/splice/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only default_file_splice_read +splice_read diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile index e519b159b60d..541cd826d5a5 100644 --- a/tools/testing/selftests/splice/Makefile +++ b/tools/testing/selftests/splice/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := default_file_splice_read.sh -TEST_GEN_PROGS_EXTENDED := default_file_splice_read +TEST_PROGS := default_file_splice_read.sh short_splice_read.sh +TEST_GEN_PROGS_EXTENDED := default_file_splice_read splice_read include ../lib.mk diff --git a/tools/testing/selftests/splice/config b/tools/testing/selftests/splice/config new file mode 100644 index 000000000000..058c928368b8 --- /dev/null +++ b/tools/testing/selftests/splice/config @@ -0,0 +1 @@ +CONFIG_TEST_LKM=m diff --git a/tools/testing/selftests/splice/settings b/tools/testing/selftests/splice/settings new file mode 100644 index 000000000000..89cedfc0d12b --- /dev/null +++ b/tools/testing/selftests/splice/settings @@ -0,0 +1 @@ +timeout=5 diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh new file mode 100755 index 000000000000..7810d3589d9a --- /dev/null +++ b/tools/testing/selftests/splice/short_splice_read.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +set -e + +ret=0 + +do_splice() +{ + filename="$1" + bytes="$2" + expected="$3" + + out=$(./splice_read "$filename" "$bytes" | cat) + if [ "$out" = "$expected" ] ; then + echo "ok: $filename $bytes" + else + echo "FAIL: $filename $bytes" + ret=1 + fi +} + +test_splice() +{ + filename="$1" + + full=$(cat "$filename") + two=$(echo "$full" | grep -m1 . | cut -c-2) + + # Make sure full splice has the same contents as a standard read. + do_splice "$filename" 4096 "$full" + + # Make sure a partial splice see the first two characters. + do_splice "$filename" 2 "$two" +} + +# proc_single_open(), seq_read() +test_splice /proc/$$/limits +# special open, seq_read() +test_splice /proc/$$/comm + +# proc_handler, proc_dointvec_minmax +test_splice /proc/sys/fs/nr_open +# proc_handler, proc_dostring +test_splice /proc/sys/kernel/modprobe +# proc_handler, special read +test_splice /proc/sys/kernel/version + +if ! [ -d /sys/module/test_module/sections ] ; then + modprobe test_module +fi +# kernfs, attr +test_splice /sys/module/test_module/coresize +# kernfs, binattr +test_splice /sys/module/test_module/sections/.init.text + +exit $ret diff --git a/tools/testing/selftests/splice/splice_read.c b/tools/testing/selftests/splice/splice_read.c new file mode 100644 index 000000000000..46dae6a25cfb --- /dev/null +++ b/tools/testing/selftests/splice/splice_read.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> + +int main(int argc, char *argv[]) +{ + int fd; + size_t size; + ssize_t spliced; + + if (argc < 2) { + fprintf(stderr, "Usage: %s INPUT [BYTES]\n", argv[0]); + return EXIT_FAILURE; + } + + fd = open(argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + return EXIT_FAILURE; + } + + if (argc == 3) + size = atol(argv[2]); + else { + struct stat statbuf; + + if (fstat(fd, &statbuf) < 0) { + perror(argv[1]); + return EXIT_FAILURE; + } + + if (statbuf.st_size > INT_MAX) { + fprintf(stderr, "%s: Too big\n", argv[1]); + return EXIT_FAILURE; + } + + size = statbuf.st_size; + } + + /* splice(2) file to stdout. */ + spliced = splice(fd, NULL, STDOUT_FILENO, NULL, + size, SPLICE_F_MOVE); + if (spliced < 0) { + perror("splice"); + return EXIT_FAILURE; + } + + close(fd); + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 7656c7ce79d9..0e73a16874c4 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) +TEST_FILES := settings include ../lib.mk diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/timers/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index a9026706d597..30873b19d04b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -3,6 +3,23 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +# Without this, failed build products remain, with up-to-date timestamps, +# thus tricking Make (and you!) into believing that All Is Well, in subsequent +# make invocations: +.DELETE_ON_ERROR: + +# Avoid accidental wrong builds, due to built-in rules working just a little +# bit too well--but not quite as well as required for our situation here. +# +# In other words, "make userfaultfd" is supposed to fail to build at all, +# because this Makefile only supports either "make" (all), or "make /full/path". +# However, the built-in rules, if not suppressed, will pick up CFLAGS and the +# initial LDLIBS (but not the target-specific LDLIBS, because those are only +# set for the full path target!). This causes it to get pretty far into building +# things despite using incorrect values such as an *occasionally* incomplete +# LDLIBS. +MAKEFLAGS += --no-builtin-rules + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index bcec71250873..9b420140ba2b 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -18,7 +18,8 @@ #include "../kselftest.h" -#define MAP_SIZE 1048576 +#define MAP_SIZE_MB 100 +#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024) struct map_list { void *map; @@ -165,7 +166,7 @@ int main(int argc, char **argv) void *map = NULL; unsigned long mem_free = 0; unsigned long hugepage_size = 0; - unsigned long mem_fragmentable = 0; + long mem_fragmentable_MB = 0; if (prereq() != 0) { printf("Either the sysctl compact_unevictable_allowed is not\n" @@ -190,9 +191,9 @@ int main(int argc, char **argv) return -1; } - mem_fragmentable = mem_free * 0.8 / 1024; + mem_fragmentable_MB = mem_free * 0.8 / 1024; - while (mem_fragmentable > 0) { + while (mem_fragmentable_MB > 0) { map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); if (map == MAP_FAILED) @@ -213,7 +214,7 @@ int main(int argc, char **argv) for (i = 0; i < MAP_SIZE; i += page_size) *(unsigned long *)(map + i) = (unsigned long)map + i; - mem_fragmentable--; + mem_fragmentable_MB -= MAP_SIZE_MB; } for (entry = list; entry != NULL; entry = entry->next) { diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 43b4dfe161a2..1d4359341e44 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -15,12 +15,12 @@ #define PAGE_SIZE sysconf(_SC_PAGESIZE) #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) -#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) -#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark) /* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ -#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark) -#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark) +#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark) /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ @@ -52,6 +52,9 @@ int main(int argc, char **argv) case 'b': cmd = PIN_BENCHMARK; break; + case 'L': + cmd = PIN_LONGTERM_BENCHMARK; + break; case 'm': size = atoi(optarg) * MB; break; @@ -67,9 +70,6 @@ int main(int argc, char **argv) case 'T': thp = 0; break; - case 'L': - cmd = GUP_LONGTERM_BENCHMARK; - break; case 'U': cmd = GUP_BENCHMARK; break; @@ -105,12 +105,16 @@ int main(int argc, char **argv) gup.flags |= FOLL_WRITE; fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); - if (fd == -1) - perror("open"), exit(1); + if (fd == -1) { + perror("open"); + exit(1); + } p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); - if (p == MAP_FAILED) - perror("mmap"), exit(1); + if (p == MAP_FAILED) { + perror("mmap"); + exit(1); + } gup.addr = (unsigned long)p; if (thp == 1) @@ -123,8 +127,10 @@ int main(int argc, char **argv) for (i = 0; i < repeats; i++) { gup.size = size; - if (ioctl(fd, cmd, &gup)) - perror("ioctl"), exit(1); + if (ioctl(fd, cmd, &gup)) { + perror("ioctl"); + exit(1); + } printf("Time: get:%lld put:%lld us", gup.get_delta_usec, gup.put_delta_usec); diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 91d38a29956b..0a28a6a29581 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -680,7 +680,7 @@ TEST_F(hmm, anon_write_hugetlbfs) n = gethugepagesizes(pagesizes, 4); if (n <= 0) - return; + SKIP(return, "Huge page size could not be determined"); for (idx = 0; --n > 0; ) { if (pagesizes[n] < pagesizes[idx]) idx = n; @@ -694,7 +694,7 @@ TEST_F(hmm, anon_write_hugetlbfs) buffer->ptr = get_hugepage_region(size, GHR_STRICT); if (buffer->ptr == NULL) { free(buffer); - return; + SKIP(return, "Huge page could not be allocated"); } buffer->fd = -1; @@ -942,6 +942,41 @@ TEST_F(hmm, migrate_fault) } /* + * Migrate anonymous shared memory to device private memory. + */ +TEST_F(hmm, migrate_shared) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Migrate memory to device. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, -ENOENT); + + hmm_buffer_free(buffer); +} + +/* * Try to migrate various memory types to device private memory. */ TEST_F(hmm2, migrate_mixed) diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 6af951900aa3..312889edb84a 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -83,7 +83,7 @@ int main(int argc, char **argv) } if (shift) - printf("%u kB hugepages\n", 1 << shift); + printf("%u kB hugepages\n", 1 << (shift - 10)); else printf("Default size hugepages\n"); printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 61e5cfeb1350..9b0912a01777 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area) huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) { - if (munmap(*alloc_area, nr_pages * page_size) < 0) - perror("hugetlb munmap"), exit(1); + if (munmap(*alloc_area, nr_pages * page_size) < 0) { + perror("hugetlb munmap"); + exit(1); + } *alloc_area = NULL; return; } @@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) /* Undo write-protect, do wakeup after that */ prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; - if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) - fprintf(stderr, "clear WP failed for address 0x%Lx\n", - start), exit(1); + if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) { + fprintf(stderr, "clear WP failed for address 0x%Lx\n", start); + exit(1); + } } static void *locking_thread(void *arg) @@ -359,8 +362,10 @@ static void *locking_thread(void *arg) seed += cpu; bzero(&rand, sizeof(rand)); bzero(&randstate, sizeof(randstate)); - if (initstate_r(seed, randstate, sizeof(randstate), &rand)) - fprintf(stderr, "srandom_r error\n"), exit(1); + if (initstate_r(seed, randstate, sizeof(randstate), &rand)) { + fprintf(stderr, "srandom_r error\n"); + exit(1); + } } else { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) @@ -369,12 +374,16 @@ static void *locking_thread(void *arg) while (!finished) { if (bounces & BOUNCE_RANDOM) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 1 error\n"), exit(1); + if (random_r(&rand, &rand_nr)) { + fprintf(stderr, "random_r 1 error\n"); + exit(1); + } page_nr = rand_nr; if (sizeof(page_nr) > sizeof(rand_nr)) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 2 error\n"), exit(1); + if (random_r(&rand, &rand_nr)) { + fprintf(stderr, "random_r 2 error\n"); + exit(1); + } page_nr |= (((unsigned long) rand_nr) << 16) << 16; } @@ -385,11 +394,13 @@ static void *locking_thread(void *arg) start = time(NULL); if (bounces & BOUNCE_VERIFY) { count = *area_count(area_dst, page_nr); - if (!count) + if (!count) { fprintf(stderr, "page_nr %lu wrong count %Lu %Lu\n", page_nr, count, - count_verify[page_nr]), exit(1); + count_verify[page_nr]); + exit(1); + } /* @@ -401,11 +412,12 @@ static void *locking_thread(void *arg) */ #if 1 if (!my_bcmp(area_dst + page_nr * page_size, zeropage, - page_size)) + page_size)) { fprintf(stderr, "my_bcmp page_nr %lu wrong count %Lu %Lu\n", - page_nr, count, - count_verify[page_nr]), exit(1); + page_nr, count, count_verify[page_nr]); + exit(1); + } #else unsigned long loops; @@ -437,7 +449,7 @@ static void *locking_thread(void *arg) fprintf(stderr, "page_nr %lu memory corruption %Lu %Lu\n", page_nr, count, - count_verify[page_nr]), exit(1); + count_verify[page_nr]); exit(1); } count++; *area_count(area_dst, page_nr) = count_verify[page_nr] = count; @@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, offset); if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ - if (uffdio_copy->copy != -EEXIST) + if (uffdio_copy->copy != -EEXIST) { fprintf(stderr, "UFFDIO_COPY retry error %Ld\n", - uffdio_copy->copy), exit(1); + uffdio_copy->copy); + exit(1); + } } else { fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n", - uffdio_copy->copy), exit(1); + uffdio_copy->copy); exit(1); } } @@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); + if (offset >= nr_pages * page_size) { + fprintf(stderr, "unexpected offset %lu\n", offset); + exit(1); + } uffdio_copy.dst = (unsigned long) area_dst + offset; uffdio_copy.src = (unsigned long) area_src + offset; uffdio_copy.len = page_size; @@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) uffdio_copy.copy = 0; if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ - if (uffdio_copy.copy != -EEXIST) + if (uffdio_copy.copy != -EEXIST) { fprintf(stderr, "UFFDIO_COPY error %Ld\n", - uffdio_copy.copy), exit(1); + uffdio_copy.copy); + exit(1); + } } else if (uffdio_copy.copy != page_size) { fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", - uffdio_copy.copy), exit(1); + uffdio_copy.copy); exit(1); } else { if (test_uffdio_copy_eexist && retry) { test_uffdio_copy_eexist = false; @@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg) if (ret < 0) { if (errno == EAGAIN) return 1; - else - perror("blocking read error"), exit(1); + perror("blocking read error"); } else { - fprintf(stderr, "short read\n"), exit(1); + fprintf(stderr, "short read\n"); } + exit(1); } return 0; @@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, { unsigned long offset; - if (msg->event != UFFD_EVENT_PAGEFAULT) - fprintf(stderr, "unexpected msg event %u\n", - msg->event), exit(1); + if (msg->event != UFFD_EVENT_PAGEFAULT) { + fprintf(stderr, "unexpected msg event %u\n", msg->event); + exit(1); + } if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { wp_range(uffd, msg->arg.pagefault.address, page_size, false); @@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, } else { /* Missing page faults */ if (bounces & BOUNCE_VERIFY && - msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); + msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) { + fprintf(stderr, "unexpected write fault\n"); + exit(1); + } offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; offset &= ~(page_size-1); @@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg) for (;;) { ret = poll(pollfd, 2, -1); - if (!ret) - fprintf(stderr, "poll error %d\n", ret), exit(1); - if (ret < 0) - perror("poll"), exit(1); + if (!ret) { + fprintf(stderr, "poll error %d\n", ret); + exit(1); + } + if (ret < 0) { + perror("poll"); + exit(1); + } if (pollfd[1].revents & POLLIN) { - if (read(pollfd[1].fd, &tmp_chr, 1) != 1) - fprintf(stderr, "read pipefd error\n"), - exit(1); + if (read(pollfd[1].fd, &tmp_chr, 1) != 1) { + fprintf(stderr, "read pipefd error\n"); + exit(1); + } break; } - if (!(pollfd[0].revents & POLLIN)) + if (!(pollfd[0].revents & POLLIN)) { fprintf(stderr, "pollfd[0].revents %d\n", - pollfd[0].revents), exit(1); + pollfd[0].revents); + exit(1); + } if (uffd_read_msg(uffd, &msg)) continue; switch (msg.event) { default: fprintf(stderr, "unexpected msg event %u\n", - msg.event), exit(1); + msg.event); exit(1); break; case UFFD_EVENT_PAGEFAULT: uffd_handle_page_fault(&msg, stats); @@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg) uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) - fprintf(stderr, "remove failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) { + fprintf(stderr, "remove failure\n"); + exit(1); + } break; case UFFD_EVENT_REMAP: area_dst = (char *)(unsigned long)msg.arg.remap.to; @@ -879,8 +908,10 @@ static int faulting_process(int signal_test) area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) - perror("mremap"), exit(1); + if (area_dst == MAP_FAILED) { + perror("mremap"); + exit(1); + } for (; nr < nr_pages; nr++) { count = *area_count(area_dst, nr); @@ -888,7 +919,7 @@ static int faulting_process(int signal_test) fprintf(stderr, "nr %lu memory corruption %Lu %Lu\n", nr, count, - count_verify[nr]), exit(1); + count_verify[nr]); exit(1); } /* * Trigger write protection if there is by writting @@ -901,8 +932,10 @@ static int faulting_process(int signal_test) return 1; for (nr = 0; nr < nr_pages; nr++) { - if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) - fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); + if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) { + fprintf(stderr, "nr %lu is not zero\n", nr); + exit(1); + } } return 0; @@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd, uffdio_zeropage->range.len, offset); if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { - if (uffdio_zeropage->zeropage != -EEXIST) + if (uffdio_zeropage->zeropage != -EEXIST) { fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n", - uffdio_zeropage->zeropage), exit(1); + uffdio_zeropage->zeropage); + exit(1); + } } else { fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n", - uffdio_zeropage->zeropage), exit(1); + uffdio_zeropage->zeropage); exit(1); } } @@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); + if (offset >= nr_pages * page_size) { + fprintf(stderr, "unexpected offset %lu\n", offset); + exit(1); + } uffdio_zeropage.range.start = (unsigned long) area_dst + offset; uffdio_zeropage.range.len = page_size; uffdio_zeropage.mode = 0; @@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) if (ret) { /* real retval in ufdio_zeropage.zeropage */ if (has_zeropage) { - if (uffdio_zeropage.zeropage == -EEXIST) - fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"), - exit(1); - else + if (uffdio_zeropage.zeropage == -EEXIST) { + fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"); + exit(1); + } else { fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); + exit(1); + } } else { - if (uffdio_zeropage.zeropage != -EINVAL) + if (uffdio_zeropage.zeropage != -EINVAL) { fprintf(stderr, "UFFDIO_ZEROPAGE not -EINVAL %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); + exit(1); + } } } else if (has_zeropage) { if (uffdio_zeropage.zeropage != page_size) { fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); exit(1); } else { if (test_uffdio_zeropage_eexist && retry) { test_uffdio_zeropage_eexist = false; @@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) } else { fprintf(stderr, "UFFDIO_ZEROPAGE succeeded %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); exit(1); } return 0; @@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) + expected_ioctls) { fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + "unexpected missing ioctl for anon memory\n"); + exit(1); + } if (uffdio_zeropage(uffd, 0)) { - if (my_bcmp(area_dst, zeropage, page_size)) - fprintf(stderr, "zeropage is not zero\n"), exit(1); + if (my_bcmp(area_dst, zeropage, page_size)) { + fprintf(stderr, "zeropage is not zero\n"); + exit(1); + } } close(uffd); @@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl for anon memory\n"); + exit(1); + } - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } pid = fork(); - if (pid < 0) - perror("fork"), exit(1); + if (pid < 0) { + perror("fork"); + exit(1); + } if (!pid) return faulting_process(0); waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (err) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } if (pthread_join(uffd_mon, NULL)) return 1; @@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl for anon memory\n"); + exit(1); + } - if (faulting_process(1)) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (faulting_process(1)) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } if (uffd_test_ops->release_pages(area_dst)) return 1; - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } pid = fork(); - if (pid < 0) - perror("fork"), exit(1); + if (pid < 0) { + perror("fork"); + exit(1); + } if (!pid) exit(faulting_process(2)); waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (err) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } if (pthread_join(uffd_mon, (void **)&userfaults)) return 1; @@ -1395,7 +1460,7 @@ static void set_test_type(const char *type) test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else { - fprintf(stderr, "Unknown test type: %s\n", type), exit(1); + fprintf(stderr, "Unknown test type: %s\n", type); exit(1); } if (test_type == TEST_HUGETLB) @@ -1403,12 +1468,15 @@ static void set_test_type(const char *type) else page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) - fprintf(stderr, "Unable to determine page size\n"), - exit(2); + if (!page_size) { + fprintf(stderr, "Unable to determine page size\n"); + exit(2); + } if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) - fprintf(stderr, "Impossible to run this test\n"), exit(2); + > page_size) { + fprintf(stderr, "Impossible to run this test\n"); + exit(2); + } } static void sigalrm(int sig) @@ -1425,8 +1493,10 @@ int main(int argc, char **argv) if (argc < 4) usage(); - if (signal(SIGALRM, sigalrm) == SIG_ERR) - fprintf(stderr, "failed to arm SIGALRM"), exit(1); + if (signal(SIGALRM, sigalrm) == SIG_ERR) { + fprintf(stderr, "failed to arm SIGALRM"); + exit(1); + } alarm(ALARM_INTERVAL_SECS); set_test_type(argv[1]); diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 998319553523..7161cfc2e60b 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -443,6 +443,68 @@ static void test_unexpected_base(void) #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) +static void test_ptrace_write_gs_read_base(void) +{ + int status; + pid_t child = fork(); + + if (child < 0) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); + + printf("[RUN]\tARCH_SET_GS to 1\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) + err(1, "ARCH_SET_GS"); + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) + err(1, "PTRACE_TRACEME"); + + raise(SIGTRAP); + _exit(0); + } + + wait(&status); + + if (WSTOPSIG(status) == SIGTRAP) { + unsigned long base; + unsigned long gs_offset = USER_REGS_OFFSET(gs); + unsigned long base_offset = USER_REGS_OFFSET(gs_base); + + /* Read the initial base. It should be 1. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + if (base == 1) { + printf("[OK]\tGSBASE started at 1\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); + } + + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); + + /* Poke an LDT selector into GS. */ + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) + err(1, "PTRACE_POKEUSER"); + + /* And read the base. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + + if (base == 0 || base == 1) { + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); + } + } + + ptrace(PTRACE_CONT, child, NULL, NULL); + + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); +} + static void test_ptrace_write_gsbase(void) { int status; @@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void) END: ptrace(PTRACE_CONT, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); } int main() @@ -526,6 +591,9 @@ int main() shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + /* Do these tests before we have an LDT. */ + test_ptrace_write_gs_read_base(); + /* Probe FSGSBASE */ sethandler(SIGILL, sigill, 0); if (sigsetjmp(jmpbuf, 1) == 0) { diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index c41f24b517f4..65c141ebfbbd 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -462,6 +462,17 @@ static int test_vsys_x(void) return 0; } +/* + * Debuggers expect ptrace() to be able to peek at the vsyscall page. + * Use process_vm_readv() as a proxy for ptrace() to test this. We + * want it to work in the vsyscall=emulate case and to fail in the + * vsyscall=xonly case. + * + * It's worth noting that this ABI is a bit nutty. write(2) can't + * read from the vsyscall page on any kernel version or mode. The + * fact that ptrace() ever worked was a nice courtesy of old kernels, + * but the code to support it is fairly gross. + */ static int test_process_vm_readv(void) { #ifdef __x86_64__ @@ -477,8 +488,12 @@ static int test_process_vm_readv(void) remote.iov_len = 4096; ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); if (ret != 4096) { - printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno); - return 0; + /* + * We expect process_vm_readv() to work if and only if the + * vsyscall page is readable. + */ + printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno); + return vsyscall_map_r ? 1 : 0; } if (vsyscall_map_r) { @@ -488,6 +503,9 @@ static int test_process_vm_readv(void) printf("[FAIL]\tIt worked but returned incorrect data\n"); return 1; } + } else { + printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n"); + return 1; } #endif diff --git a/tools/usb/Build b/tools/usb/Build new file mode 100644 index 000000000000..2ad6f9745816 --- /dev/null +++ b/tools/usb/Build @@ -0,0 +1,2 @@ +testusb-y += testusb.o +ffs-test-y += ffs-test.o diff --git a/tools/usb/Makefile b/tools/usb/Makefile index 01d758d73b6d..1b128e551b2e 100644 --- a/tools/usb/Makefile +++ b/tools/usb/Makefile @@ -1,14 +1,51 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for USB tools +include ../scripts/Makefile.include -PTHREAD_LIBS = -lpthread -WARNINGS = -Wall -Wextra -CFLAGS = $(WARNINGS) -g -I../include -LDFLAGS = $(PTHREAD_LIBS) +bindir ?= /usr/bin -all: testusb ffs-test -%: %.c - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + +override CFLAGS += -O2 -Wall -Wextra -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(srctree)/tools/include +override LDFLAGS += -lpthread + +ALL_TARGETS := testusb ffs-test +ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) + +all: $(ALL_PROGRAMS) + +export srctree OUTPUT CC LD CFLAGS +include $(srctree)/tools/build/Makefile.include + +TESTUSB_IN := $(OUTPUT)testusb-in.o +$(TESTUSB_IN): FORCE + $(Q)$(MAKE) $(build)=testusb +$(OUTPUT)testusb: $(TESTUSB_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) + +FFS_TEST_IN := $(OUTPUT)ffs-test-in.o +$(FFS_TEST_IN): FORCE + $(Q)$(MAKE) $(build)=ffs-test +$(OUTPUT)ffs-test: $(FFS_TEST_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) clean: - $(RM) testusb ffs-test + rm -f $(ALL_PROGRAMS) + find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.o.cmd' -delete + +install: $(ALL_PROGRAMS) + install -d -m 755 $(DESTDIR)$(bindir); \ + for program in $(ALL_PROGRAMS); do \ + install $$program $(DESTDIR)$(bindir); \ + done + +FORCE: + +.PHONY: all install clean FORCE prepare diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index dbf14c1e2188..f2640e505c4e 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev, (__virtio_test_bit((dev), feature)) /** - * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device */ -static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) { /* * Note the reverse polarity of the quirk feature (compared to most * other features), this is for compatibility with legacy systems. */ - return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } static inline bool virtio_is_little_endian(struct virtio_device *vdev) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 58c0eab71bca..0517c744b04e 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -78,6 +78,7 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 /* [48-] take some arbitrary free slots for expanding overloaded flags * not part of kernel API @@ -135,6 +136,7 @@ static const char * const page_flag_names[] = { [KPF_ARCH] = "h:arch", [KPF_UNCACHED] = "c:uncached", [KPF_SOFTDIRTY] = "f:softdirty", + [KPF_ARCH_2] = "H:arch_2", [KPF_READAHEAD] = "I:readahead", [KPF_SLOB_FREE] = "P:slob_free", |