From 6ea5d1a3e301a3d1f0364dfd481210aa6aa3cf17 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Sep 2021 08:38:14 -0700 Subject: perf script: Support instruction latency The instruction latency information can be recorded on some platforms, e.g., the Intel Sapphire Rapids server. With both memory latency (weight) and the new instruction latency information, users can easily locate the expensive load instructions, and also understand the time spent in different stages. The users can optimize their applications in different pipeline stages. Add a new field "ins_lat" to filter the instruction latency information, which is available with sample type PERF_SAMPLE_WEIGHT_STRUCT. Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Jiri Olsa Cc: Joe Mario Link: https://lore.kernel.org/r/1632929894-102778-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6211d0b84b7a..a6258f6f816c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -122,6 +122,7 @@ enum perf_output_field { PERF_OUTPUT_TOD = 1ULL << 32, PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, + PERF_OUTPUT_INS_LAT = 1ULL << 35, }; struct perf_script { @@ -188,6 +189,7 @@ struct output_option { {.str = "tod", .field = PERF_OUTPUT_TOD}, {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, + {.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT}, }; enum { @@ -262,7 +264,8 @@ static struct { PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | - PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE, + PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE | + PERF_OUTPUT_INS_LAT, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -522,6 +525,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE)) return -EINVAL; + if (PRINT_FIELD(INS_LAT) && + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT)) + return -EINVAL; + return 0; } @@ -2039,6 +2046,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(WEIGHT)) fprintf(fp, "%16" PRIu64, sample->weight); + if (PRINT_FIELD(INS_LAT)) + fprintf(fp, "%16" PRIu16, sample->ins_lat); + if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; @@ -3715,7 +3725,7 @@ int cmd_script(int argc, const char **argv) "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size,code_page_size", + "data_page_size,code_page_size,ins_lat", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), -- cgit v1.2.3