From 0f59d7a352c11712de0f226b46cb82775b4fcece Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 1 Sep 2017 10:49:12 -0700 Subject: perf sched timehist: Add pid and tid options Add options to only show event for specific pid(s) and tid(s). Signed-off-by: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1504288152-19690-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 8 ++++++++ tools/perf/builtin-sched.c | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index a092a2499e8f..55b67338548e 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -106,6 +106,14 @@ OPTIONS for 'perf sched timehist' --max-stack:: Maximum number of functions to display in backtrace, default 5. +-p=:: +--pid=:: + Only show events for given process ID (comma separated list). + +-t=:: +--tid=:: + Only show events for given thread ID (comma separated list). + -s:: --summary:: Show only a summary of scheduling by thread with min, max, and average diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 322b4def8411..b7e8812ee80c 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3363,6 +3363,10 @@ int cmd_sched(int argc, const char **argv) OPT_STRING(0, "time", &sched.time_str, "str", "Time span for analysis (start,stop)"), OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), + OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "analyze events only for given process id(s)"), + OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "analyze events only for given thread id(s)"), OPT_PARENT(sched_options) }; -- cgit v1.2.3 From 5a5dfe4b8548d806bf433090995ee0ee4c139f11 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:26 -0700 Subject: perf tools: Support weak groups in 'perf stat' Setting up groups can be complicated due to the complicated scheduling restrictions of different PMUs. User tools usually don't understand all these restrictions. Still in many cases it is useful to set up groups and they work most of the time. However if the group is set up wrong some members will not report any value because they never get scheduled. Add a concept of a 'weak group': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. In theory it would be possible to have more complex fallback strategies (e.g. try to split the group in half), but the simple fallback of not using a group seems to work for now. So far the weak group is only implemented for perf stat, not for record. Here's an unschedulable group (on IvyBridge with SMT on) % perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}' -a sleep 1 73,806,067 branches 4,848,144 branch-misses # 6.57% of all branches 14,754,458 l1d.replacement 24,905,558 l2_lines_in.all l2_rqsts.all_code_rd <------- will never report anything With the weak group: % perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}:W' -a sleep 1 125,366,055 branches (80.02%) 9,208,402 branch-misses # 7.35% of all branches (80.01%) 24,560,249 l1d.replacement (80.00%) 43,174,971 l2_lines_in.all (80.05%) 31,891,457 l2_rqsts.all_code_rd (79.92%) The extra event scheduled with some extra multiplexing v2: Move fallback code to separate function. Add comment on for_each_group_member Adjust to new perf_evsel__close interface v3: Fix debug print out. Committer testing: Before: # perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}' -a sleep 1 Performance counter stats for 'system wide': branches branch-misses l1d.replacement l2_lines_in.all l2_rqsts.all_code_rd 1.002147212 seconds time elapsed # perf stat -e '{branches,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}' -a sleep 1 Performance counter stats for 'system wide': 83,207,892 branches 11,065,444 l1d.replacement 28,484,024 l2_lines_in.all 12,186,179 l2_rqsts.all_code_rd 1.001739493 seconds time elapsed After: # perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}':W -a sleep 1 Performance counter stats for 'system wide': 543,323,909 branches (80.01%) 27,100,512 branch-misses # 4.99% of all branches (80.02%) 50,402,905 l1d.replacement (80.03%) 67,385,892 l2_lines_in.all (80.01%) 21,352,885 l2_rqsts.all_code_rd (79.94%) 1.001086658 seconds time elapsed # Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20170831194036.30146-2-andi@firstfloor.org [ Add a "'perf stat' only, for now" comment in the man page, suggested by Jiri ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 2 ++ tools/perf/builtin-stat.c | 35 ++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 8 +++++++- tools/perf/util/parse-events.l | 2 +- 5 files changed, 46 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index f709de54707b..75fc17f47298 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -47,6 +47,8 @@ counted. The following modifiers exist: P - use maximum detected precise level S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU + W - group is weak and will fallback to non-group if not schedulable, + only supported in 'perf stat' for now. The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 69523ed55894..7cc61eb0d83b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -582,6 +582,32 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter) return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; } +static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) +{ + struct perf_evsel *c2, *leader; + bool is_open = true; + + leader = evsel->leader; + pr_debug("Weak group for %s/%d failed\n", + leader->name, leader->nr_members); + + /* + * for_each_group_member doesn't work here because it doesn't + * include the first entry. + */ + evlist__for_each_entry(evsel_list, c2) { + if (c2 == evsel) + is_open = false; + if (c2->leader == leader) { + if (is_open) + perf_evsel__close(c2); + c2->leader = c2; + c2->nr_members = 0; + } + } + return leader; +} + static int __run_perf_stat(int argc, const char **argv) { int interval = stat_config.interval; @@ -618,6 +644,15 @@ static int __run_perf_stat(int argc, const char **argv) evlist__for_each_entry(evsel_list, counter) { try_again: if (create_perf_stat_counter(counter) < 0) { + + /* Weak group failed. Reset the group. */ + if (errno == EINVAL && + counter->leader != counter && + counter->weak_group) { + counter = perf_evsel__reset_weak_group(counter); + goto try_again; + } + /* * PPC returns ENXIO for HW counters until 2.6.37 * (behavior changed with commit b0a873e). diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dd2c4b5112a5..db658785d828 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -137,6 +137,7 @@ struct perf_evsel { const char * metric_name; struct perf_evsel **metric_events; bool collect_stat; + bool weak_group; }; union u64_swap { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f6257fb4f08c..57d7acf890e0 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1366,6 +1366,7 @@ struct event_modifier { int exclude_GH; int sample_read; int pinned; + int weak; }; static int get_event_modifier(struct event_modifier *mod, char *str, @@ -1384,6 +1385,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; + int weak = 0; memset(mod, 0, sizeof(*mod)); @@ -1421,6 +1423,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, sample_read = 1; } else if (*str == 'D') { pinned = 1; + } else if (*str == 'W') { + weak = 1; } else break; @@ -1451,6 +1455,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; mod->pinned = pinned; + mod->weak = weak; return 0; } @@ -1464,7 +1469,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1)) return -1; while (*p) { @@ -1504,6 +1509,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; evsel->precise_max = mod.precise_max; + evsel->weak_group = mod.weak; if (perf_evsel__is_group_leader(evsel)) evsel->attr.pinned = mod.pinned; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c42edeac451f..fdb5bb52f01f 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -161,7 +161,7 @@ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpPGHSDI]+ +modifier_event [ukhpPGHSDIW]+ modifier_bp [rwx]{1,3} %% -- cgit v1.2.3 From 3ba36d3620d08be31f5ee9ae20abb9bf3bdeb05a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:27 -0700 Subject: perf vendor events: Support metric_group and no event name in JSON parser Some enhancements to the JSON parser to prepare for metrics support - Parse the new MetricGroup field - Support JSON events with no event name, that have only MetricName. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 24 ++++++++++++++++++------ tools/perf/pmu-events/jevents.h | 2 +- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index d51dc9ca8861..9eb7047bafe4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -292,7 +292,7 @@ static int print_events_table_entry(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name) + char *metric_name, char *metric_group) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -304,8 +304,10 @@ static int print_events_table_entry(void *data, char *name, char *event, */ fprintf(outfp, "{\n"); - fprintf(outfp, "\t.name = \"%s\",\n", name); - fprintf(outfp, "\t.event = \"%s\",\n", event); + if (name) + fprintf(outfp, "\t.name = \"%s\",\n", name); + if (event) + fprintf(outfp, "\t.event = \"%s\",\n", event); fprintf(outfp, "\t.desc = \"%s\",\n", desc); fprintf(outfp, "\t.topic = \"%s\",\n", topic); if (long_desc && long_desc[0]) @@ -320,6 +322,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); if (metric_name) fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); + if (metric_group) + fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); fprintf(outfp, "},\n"); return 0; @@ -357,6 +361,9 @@ static char *real_event(const char *name, char *event) { int i; + if (!name) + return NULL; + for (i = 0; fixed[i].name; i++) if (!strcasecmp(name, fixed[i].name)) return (char *)fixed[i].event; @@ -369,7 +376,7 @@ int json_events(const char *fn, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name), + char *metric_name, char *metric_group), void *data) { int err = -EIO; @@ -397,6 +404,7 @@ int json_events(const char *fn, char *unit = NULL; char *metric_expr = NULL; char *metric_name = NULL; + char *metric_group = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -476,6 +484,8 @@ int json_events(const char *fn, addfield(map, &perpkg, "", "", val); } else if (json_streq(map, field, "MetricName")) { addfield(map, &metric_name, "", "", val); + } else if (json_streq(map, field, "MetricGroup")) { + addfield(map, &metric_group, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) @@ -501,10 +511,11 @@ int json_events(const char *fn, addfield(map, &event, ",", filter, NULL); if (msr != NULL) addfield(map, &event, ",", msr->pname, msrval); - fixname(name); + if (name) + fixname(name); err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg, metric_expr, metric_name); + pmu, unit, perpkg, metric_expr, metric_name, metric_group); free(event); free(desc); free(name); @@ -516,6 +527,7 @@ int json_events(const char *fn, free(unit); free(metric_expr); free(metric_name); + free(metric_group); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 611fac01913d..557994754410 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -6,7 +6,7 @@ int json_events(const char *fn, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name), + char *metric_name, char *metric_group), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 569eab3688dd..94fa1720f6fd 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -15,6 +15,7 @@ struct pmu_event { const char *perpkg; const char *metric_expr; const char *metric_name; + const char *metric_group; }; /* -- cgit v1.2.3 From bba49af87393ebc8960bf8abdcbb9af53bf1aba1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:28 -0700 Subject: perf stat: Factor out generic metric printing The 'perf stat' shadow metric printing already supports generic metrics. Factor out the code doing that into a separate function that can be re-used in a later patch. No behavior changes. v2: Fix indentation Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 69 ++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 27 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a04cf56d3517..96aa6cbf24d6 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -627,6 +627,46 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel, out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); } +static void generic_metric(const char *metric_expr, + struct perf_evsel **metric_events, + char *name, + const char *metric_name, + double avg, + int cpu, + int ctx, + struct perf_stat_output_ctx *out) +{ + print_metric_t print_metric = out->print_metric; + struct parse_ctx pctx; + double ratio; + int i; + void *ctxp = out->ctx; + + expr__ctx_init(&pctx); + expr__add_id(&pctx, name, avg); + for (i = 0; metric_events[i]; i++) { + struct saved_value *v; + + v = saved_value_lookup(metric_events[i], cpu, ctx, false); + if (!v) + break; + expr__add_id(&pctx, metric_events[i]->name, avg_stats(&v->stats)); + } + if (!metric_events[i]) { + const char *p = metric_expr; + + if (expr__parse(&ratio, &pctx, &p) == 0) + print_metric(ctxp, NULL, "%8.1f", + metric_name ? + metric_name : + out->force_header ? name : "", + ratio); + else + print_metric(ctxp, NULL, NULL, "", 0); + } else + print_metric(ctxp, NULL, NULL, "", 0); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out) @@ -819,33 +859,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - struct parse_ctx pctx; - int i; - - expr__ctx_init(&pctx); - expr__add_id(&pctx, evsel->name, avg); - for (i = 0; evsel->metric_events[i]; i++) { - struct saved_value *v; - - v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); - if (!v) - break; - expr__add_id(&pctx, evsel->metric_events[i]->name, - avg_stats(&v->stats)); - } - if (!evsel->metric_events[i]) { - const char *p = evsel->metric_expr; - - if (expr__parse(&ratio, &pctx, &p) == 0) - print_metric(ctxp, NULL, "%8.1f", - evsel->metric_name ? - evsel->metric_name : - out->force_header ? evsel->name : "", - ratio); - else - print_metric(ctxp, NULL, NULL, "", 0); - } else - print_metric(ctxp, NULL, NULL, "", 0); + generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, + evsel->metric_name, avg, cpu, ctx, out); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; -- cgit v1.2.3 From 4ed962eb38c8a33b8b6ded911410afaefa1ca48c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:29 -0700 Subject: perf stat: Print generic metric header even for failed expressions Print the generic metric header even when the expression evaluation failed. Otherwise an expression that fails on the first collections due to division by zero may suddenly reappear later without an header. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 96aa6cbf24d6..8c7ab29169b9 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -662,7 +662,9 @@ static void generic_metric(const char *metric_expr, out->force_header ? name : "", ratio); else - print_metric(ctxp, NULL, NULL, "", 0); + print_metric(ctxp, NULL, NULL, + out->force_header ? + (metric_name ? metric_name : name) : "", 0); } else print_metric(ctxp, NULL, NULL, "", 0); } -- cgit v1.2.3 From d77ade9f4199c77c63e2ae382a8c8fbe0582ede2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:30 -0700 Subject: perf pmu: Extract function to get JSON alias map Extract the code to get the per cpu JSON alias into a separate function for reuse. No behavior changes. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-6-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 49 +++++++++++++++++++++++++++++++++---------------- tools/perf/util/pmu.h | 2 ++ 2 files changed, 35 insertions(+), 16 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ac16a9db1fb5..ed25d7f88731 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -516,16 +516,8 @@ char * __weak get_cpuid_str(void) return NULL; } -/* - * From the pmu_events_map, find the table of PMU events that corresponds - * to the current running CPU. Then, add all PMU events from that table - * as aliases. - */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static char *perf_pmu__getcpuid(void) { - int i; - struct pmu_events_map *map; - struct pmu_event *pe; char *cpuid; static bool printed; @@ -535,22 +527,50 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) if (!cpuid) cpuid = get_cpuid_str(); if (!cpuid) - return; + return NULL; if (!printed) { pr_debug("Using CPUID %s\n", cpuid); printed = true; } + return cpuid; +} + +struct pmu_events_map *perf_pmu__find_map(void) +{ + struct pmu_events_map *map; + char *cpuid = perf_pmu__getcpuid(); + int i; i = 0; - while (1) { + for (;;) { map = &pmu_events_map[i++]; - if (!map->table) - goto out; + if (!map->table) { + map = NULL; + break; + } if (!strcmp(map->cpuid, cpuid)) break; } + free(cpuid); + return map; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + + map = perf_pmu__find_map(); + if (!map) + return; /* * Found a matching PMU events table. Create aliases @@ -575,9 +595,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) (char *)pe->metric_expr, (char *)pe->metric_name); } - -out: - free(cpuid); } struct perf_event_attr * __weak diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 389e9729331f..060f6abba8ed 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -90,4 +90,6 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); +struct pmu_events_map *perf_pmu__find_map(void); + #endif /* __PMU_H */ -- cgit v1.2.3 From b18f3e365019de1a5b26a851e123f0aedcce881f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:31 -0700 Subject: perf stat: Support JSON metrics in perf stat Add generic support for standalone metrics specified in JSON files to perf stat. A metric is a formula that uses multiple events to compute a higher level result (e.g. IPC). Previously metrics were always tied to an event and automatically enabled with that event. But now change it that we can have standalone metrics. They are in the same JSON data structure as events, but don't have an event name. We also allow to organize the metrics in metric groups, which allows a short cut to select several related metrics at once. Add a new -M / --metrics option to perf stat that adds the metrics or metric groups specified. Add the core code to manage and parse the metric groups. They are collected from the JSON data structures into a separate rblist. When computing shadow values look for metrics in that list. Then they are computed using the existing saved values infrastructure in stat-shadow.c The actual JSON metrics are in a separate pull request. % perf stat -M Summary --metric-only -a sleep 1 Performance counter stats for 'system wide': Instructions CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 317614222.0 1392930775.0 0.0 0.0 0.2 0.1 1.001497549 seconds time elapsed % perf stat -M GFLOPs flops Performance counter stats for 'flops': 3,999,541,471 fp_comp_ops_exe.sse_scalar_single # 1.2 GFLOPs (66.65%) 14 fp_comp_ops_exe.sse_scalar_double (66.65%) 0 fp_comp_ops_exe.sse_packed_double (66.67%) 0 fp_comp_ops_exe.sse_packed_single (66.70%) 0 simd_fp_256.packed_double (66.70%) 0 simd_fp_256.packed_single (66.67%) 0 duration_time 3.238372845 seconds time elapsed v2: Add missing header file v3: Move find_map to pmu.c Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-7-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 7 + tools/perf/builtin-stat.c | 18 +- tools/perf/util/Build | 1 + tools/perf/util/metricgroup.c | 313 +++++++++++++++++++++++++++++++++ tools/perf/util/metricgroup.h | 31 ++++ tools/perf/util/pmu.c | 5 +- tools/perf/util/stat-shadow.c | 22 ++- tools/perf/util/stat.h | 4 +- 8 files changed, 395 insertions(+), 6 deletions(-) create mode 100644 tools/perf/util/metricgroup.c create mode 100644 tools/perf/util/metricgroup.h (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c37d61682dfb..823fce7674bb 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -199,6 +199,13 @@ Aggregate counts per processor socket for system-wide mode measurements. --per-core:: Aggregate counts per physical processor for system-wide mode measurements. +-M:: +--metrics:: +Print metrics or metricgroups specified in a comma separated list. +For a group all metrics from the group are added. +The events from the metrics are automatically measured. +See perf list output for the possble metrics and metricgroups. + -A:: --no-aggr:: Do not aggregate counts across all monitored CPUs. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7cc61eb0d83b..874bc6dd8d60 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,6 +65,7 @@ #include "util/tool.h" #include "util/group.h" #include "util/string2.h" +#include "util/metricgroup.h" #include "asm/bug.h" #include @@ -133,6 +134,8 @@ static const char *smi_cost_attrs = { static struct perf_evlist *evsel_list; +static struct rblist metric_events; + static struct target target = { .uid = UINT_MAX, }; @@ -1234,7 +1237,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), - &out); + &out, &metric_events); if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); @@ -1565,7 +1568,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) os.evsel = counter; perf_stat__print_shadow_stats(counter, 0, 0, - &out); + &out, + &metric_events); } fputc('\n', stat_config.output); } @@ -1789,6 +1793,13 @@ static int enable_metric_only(const struct option *opt __maybe_unused, return 0; } +static int parse_metric_groups(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + return metricgroup__parse_groups(opt, str, &metric_events); +} + static const struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1854,6 +1865,9 @@ static const struct option stat_options[] = { "measure topdown level 1 statistics"), OPT_BOOLEAN(0, "smi-cost", &smi_cost, "measure SMI cost"), + OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", + "monitor specified metrics or metric groups (separated by ,)", + parse_metric_groups), OPT_END() }; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 94518c1bf8b6..71ab8466714d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -34,6 +34,7 @@ libperf-y += dso.o libperf-y += symbol.o libperf-y += symbol_fprintf.o libperf-y += color.o +libperf-y += metricgroup.o libperf-y += header.o libperf-y += callchain.o libperf-y += values.o diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c new file mode 100644 index 000000000000..7516b1746594 --- /dev/null +++ b/tools/perf/util/metricgroup.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2017, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/* Manage metrics and groups of metrics from JSON files */ + +#include "metricgroup.h" +#include "evlist.h" +#include "strbuf.h" +#include "pmu.h" +#include "expr.h" +#include "rblist.h" +#include "pmu.h" +#include +#include +#include +#include "pmu-events/pmu-events.h" +#include "strbuf.h" +#include "strlist.h" +#include +#include + +struct metric_event *metricgroup__lookup(struct rblist *metric_events, + struct perf_evsel *evsel, + bool create) +{ + struct rb_node *nd; + struct metric_event me = { + .evsel = evsel + }; + nd = rblist__find(metric_events, &me); + if (nd) + return container_of(nd, struct metric_event, nd); + if (create) { + rblist__add_node(metric_events, &me); + nd = rblist__find(metric_events, &me); + if (nd) + return container_of(nd, struct metric_event, nd); + } + return NULL; +} + +static int metric_event_cmp(struct rb_node *rb_node, const void *entry) +{ + struct metric_event *a = container_of(rb_node, + struct metric_event, + nd); + const struct metric_event *b = entry; + + if (a->evsel == b->evsel) + return 0; + if ((char *)a->evsel < (char *)b->evsel) + return -1; + return +1; +} + +static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused, + const void *entry) +{ + struct metric_event *me = malloc(sizeof(struct metric_event)); + + if (!me) + return NULL; + memcpy(me, entry, sizeof(struct metric_event)); + me->evsel = ((struct metric_event *)entry)->evsel; + INIT_LIST_HEAD(&me->head); + return &me->nd; +} + +static void metricgroup__rblist_init(struct rblist *metric_events) +{ + rblist__init(metric_events); + metric_events->node_cmp = metric_event_cmp; + metric_events->node_new = metric_event_new; +} + +struct egroup { + struct list_head nd; + int idnum; + const char **ids; + const char *metric_name; + const char *metric_expr; +}; + +static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist, + const char **ids, + int idnum, + struct perf_evsel **metric_events) +{ + struct perf_evsel *ev, *start = NULL; + int ind = 0; + + evlist__for_each_entry (perf_evlist, ev) { + if (!strcmp(ev->name, ids[ind])) { + metric_events[ind] = ev; + if (ind == 0) + start = ev; + if (++ind == idnum) { + metric_events[ind] = NULL; + return start; + } + } else { + ind = 0; + start = NULL; + } + } + /* + * This can happen when an alias expands to multiple + * events, like for uncore events. + * We don't support this case for now. + */ + return NULL; +} + +static int metricgroup__setup_events(struct list_head *groups, + struct perf_evlist *perf_evlist, + struct rblist *metric_events_list) +{ + struct metric_event *me; + struct metric_expr *expr; + int i = 0; + int ret = 0; + struct egroup *eg; + struct perf_evsel *evsel; + + list_for_each_entry (eg, groups, nd) { + struct perf_evsel **metric_events; + + metric_events = calloc(sizeof(void *), eg->idnum + 1); + if (!metric_events) { + ret = -ENOMEM; + break; + } + evsel = find_evsel(perf_evlist, eg->ids, eg->idnum, + metric_events); + if (!evsel) { + pr_debug("Cannot resolve %s: %s\n", + eg->metric_name, eg->metric_expr); + continue; + } + for (i = 0; i < eg->idnum; i++) + metric_events[i]->collect_stat = true; + me = metricgroup__lookup(metric_events_list, evsel, true); + if (!me) { + ret = -ENOMEM; + break; + } + expr = malloc(sizeof(struct metric_expr)); + if (!expr) { + ret = -ENOMEM; + break; + } + expr->metric_expr = eg->metric_expr; + expr->metric_name = eg->metric_name; + expr->metric_events = metric_events; + list_add(&expr->nd, &me->head); + } + return ret; +} + +static bool match_metric(const char *n, const char *list) +{ + int len; + char *m; + + if (!list) + return false; + if (!strcmp(list, "all")) + return true; + if (!n) + return !strcasecmp(list, "No_group"); + len = strlen(list); + m = strcasestr(n, list); + if (!m) + return false; + if ((m == n || m[-1] == ';' || m[-1] == ' ') && + (m[len] == 0 || m[len] == ';')) + return true; + return false; +} + +static int metricgroup__add_metric(const char *metric, struct strbuf *events, + struct list_head *group_list) +{ + struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_event *pe; + int ret = -EINVAL; + int i, j; + + strbuf_init(events, 100); + strbuf_addf(events, "%s", ""); + + if (!map) + return 0; + + for (i = 0; ; i++) { + pe = &map->table[i]; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + if (match_metric(pe->metric_group, metric) || + match_metric(pe->metric_name, metric)) { + const char **ids; + int idnum; + struct egroup *eg; + + pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); + + if (expr__find_other(pe->metric_expr, + NULL, &ids, &idnum) < 0) + continue; + if (events->len > 0) + strbuf_addf(events, ","); + for (j = 0; j < idnum; j++) { + pr_debug("found event %s\n", ids[j]); + strbuf_addf(events, "%s%s", + j == 0 ? "{" : ",", + ids[j]); + } + strbuf_addf(events, "}:W"); + + eg = malloc(sizeof(struct egroup)); + if (!eg) { + ret = -ENOMEM; + break; + } + eg->ids = ids; + eg->idnum = idnum; + eg->metric_name = pe->metric_name; + eg->metric_expr = pe->metric_expr; + list_add_tail(&eg->nd, group_list); + ret = 0; + } + } + return ret; +} + +static int metricgroup__add_metric_list(const char *list, struct strbuf *events, + struct list_head *group_list) +{ + char *llist, *nlist, *p; + int ret = -EINVAL; + + nlist = strdup(list); + if (!nlist) + return -ENOMEM; + llist = nlist; + while ((p = strsep(&llist, ",")) != NULL) { + ret = metricgroup__add_metric(p, events, group_list); + if (ret == -EINVAL) { + fprintf(stderr, "Cannot find metric or group `%s'\n", + p); + break; + } + } + free(nlist); + return ret; +} + +static void metricgroup__free_egroups(struct list_head *group_list) +{ + struct egroup *eg, *egtmp; + int i; + + list_for_each_entry_safe (eg, egtmp, group_list, nd) { + for (i = 0; i < eg->idnum; i++) + free((char *)eg->ids[i]); + free(eg->ids); + free(eg); + } +} + +int metricgroup__parse_groups(const struct option *opt, + const char *str, + struct rblist *metric_events) +{ + struct parse_events_error parse_error; + struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value; + struct strbuf extra_events; + LIST_HEAD(group_list); + int ret; + + if (metric_events->nr_entries == 0) + metricgroup__rblist_init(metric_events); + ret = metricgroup__add_metric_list(str, &extra_events, &group_list); + if (ret) + return ret; + pr_debug("adding %s\n", extra_events.buf); + memset(&parse_error, 0, sizeof(struct parse_events_error)); + ret = parse_events(perf_evlist, extra_events.buf, &parse_error); + if (ret) { + pr_err("Cannot set up events %s\n", extra_events.buf); + goto out; + } + strbuf_release(&extra_events); + ret = metricgroup__setup_events(&group_list, perf_evlist, + metric_events); +out: + metricgroup__free_egroups(&group_list); + return ret; +} diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h new file mode 100644 index 000000000000..06854e125ee7 --- /dev/null +++ b/tools/perf/util/metricgroup.h @@ -0,0 +1,31 @@ +#ifndef METRICGROUP_H +#define METRICGROUP_H 1 + +#include "linux/list.h" +#include "rblist.h" +#include +#include "evlist.h" +#include "strbuf.h" + +struct metric_event { + struct rb_node nd; + struct perf_evsel *evsel; + struct list_head head; /* list of metric_expr */ +}; + +struct metric_expr { + struct list_head nd; + const char *metric_expr; + const char *metric_name; + struct perf_evsel **metric_events; +}; + +struct metric_event *metricgroup__lookup(struct rblist *metric_events, + struct perf_evsel *evsel, + bool create); +int metricgroup__parse_groups(const struct option *opt, + const char *str, + struct rblist *metric_events); + +void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); +#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ed25d7f88731..7070638ab600 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -580,8 +580,11 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) const char *pname; pe = &map->table[i++]; - if (!pe->name) + if (!pe->name) { + if (pe->metric_group || pe->metric_name) + continue; break; + } pname = pe->pmu ? pe->pmu : "cpu"; if (strncmp(pname, name, strlen(pname))) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 8c7ab29169b9..42e6c17be7ff 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -6,6 +6,7 @@ #include "rblist.h" #include "evlist.h" #include "expr.h" +#include "metricgroup.h" enum { CTX_BIT_USER = 1 << 0, @@ -671,13 +672,16 @@ static void generic_metric(const char *metric_expr, void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct rblist *metric_events) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; int ctx = evsel_context(evsel); + struct metric_event *me; + int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); @@ -880,6 +884,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { print_smi_cost(cpu, evsel, out); } else { - print_metric(ctxp, NULL, NULL, NULL, 0); + num = 0; } + + if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { + struct metric_expr *mexp; + + list_for_each_entry (mexp, &me->head, nd) { + if (num++ > 0) + out->new_line(ctxp); + generic_metric(mexp->metric_expr, mexp->metric_events, + evsel->name, mexp->metric_name, + avg, cpu, ctx, out); + } + } + if (num == 0) + print_metric(ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eacaf958e19d..47915df346fb 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -91,9 +91,11 @@ struct perf_stat_output_ctx { bool force_header; }; +struct rblist; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, - struct perf_stat_output_ctx *out); + struct perf_stat_output_ctx *out, + struct rblist *metric_events); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); -- cgit v1.2.3 From 71b0acce78d12e99eeda6fd6642ba89cc2b2b49c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:32 -0700 Subject: perf list: Add metric groups to perf list Add code to perf list to print metric groups, and metrics that don't have an event name. The metricgroup code collects the eventgroups and events into a rblist, and then prints them according to the configured filters. The metricgroups are printed by default, but can be limited by perf list metric or perf list metricgroup % perf list metricgroup .. Metric Groups: DSB: DSB_Coverage [Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)] FLOPS: GFLOPs [Giga Floating Point Operations Per Second] Frontend: IFetch_Line_Utilization [Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions] Frontend_Bandwidth: DSB_Coverage [Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)] Memory_BW: MLP [Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)] v2: Check return value of asprintf to fix warning on FC26 Fix key in lookup/addition for the groups list Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-8-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 7 +- tools/perf/builtin-list.c | 7 ++ tools/perf/util/metricgroup.c | 176 +++++++++++++++++++++++++++++++++ tools/perf/util/parse-events.c | 3 + 4 files changed, 192 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 75fc17f47298..24679aed90b7 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,8 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob] +'perf list' [--no-desc] [--long-desc] + [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob] DESCRIPTION ----------- @@ -248,6 +249,10 @@ To limit the list use: . 'sdt' to list all Statically Defined Tracepoint events. +. 'metric' to list metrics + +. 'metricgroup' to list metricgroups with metrics. + . If none of the above is matched, it will apply the supplied glob to all events, printing the ones that match. diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 4bf2cb4d25aa..b2d2ad3dd478 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -15,6 +15,7 @@ #include "util/cache.h" #include "util/pmu.h" #include "util/debug.h" +#include "util/metricgroup.h" #include static bool desc_flag = true; @@ -79,6 +80,10 @@ int cmd_list(int argc, const char **argv) long_desc_flag, details_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); + else if (strcmp(argv[i], "metric") == 0) + metricgroup__print(true, false, NULL, raw_dump); + else if (strcmp(argv[i], "metricgroup") == 0) + metricgroup__print(false, true, NULL, raw_dump); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; @@ -96,6 +101,7 @@ int cmd_list(int argc, const char **argv) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); + metricgroup__print(true, true, s, raw_dump); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -112,6 +118,7 @@ int cmd_list(int argc, const char **argv) details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); + metricgroup__print(true, true, NULL, raw_dump); free(s); } } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 7516b1746594..2d60114f1870 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -189,6 +189,182 @@ static bool match_metric(const char *n, const char *list) return false; } +struct mep { + struct rb_node nd; + const char *name; + struct strlist *metrics; +}; + +static int mep_cmp(struct rb_node *rb_node, const void *entry) +{ + struct mep *a = container_of(rb_node, struct mep, nd); + struct mep *b = (struct mep *)entry; + + return strcmp(a->name, b->name); +} + +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, + const void *entry) +{ + struct mep *me = malloc(sizeof(struct mep)); + + if (!me) + return NULL; + memcpy(me, entry, sizeof(struct mep)); + me->name = strdup(me->name); + if (!me->name) + goto out_me; + me->metrics = strlist__new(NULL, NULL); + if (!me->metrics) + goto out_name; + return &me->nd; +out_name: + free((char *)me->name); +out_me: + free(me); + return NULL; +} + +static struct mep *mep_lookup(struct rblist *groups, const char *name) +{ + struct rb_node *nd; + struct mep me = { + .name = name + }; + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + rblist__add_node(groups, &me); + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + return NULL; +} + +static void mep_delete(struct rblist *rl __maybe_unused, + struct rb_node *nd) +{ + struct mep *me = container_of(nd, struct mep, nd); + + strlist__delete(me->metrics); + free((void *)me->name); + free(me); +} + +static void metricgroup__print_strlist(struct strlist *metrics, bool raw) +{ + struct str_node *sn; + int n = 0; + + strlist__for_each_entry (sn, metrics) { + if (raw) + printf("%s%s", n > 0 ? " " : "", sn->s); + else + printf(" %s\n", sn->s); + n++; + } + if (raw) + putchar('\n'); +} + +void metricgroup__print(bool metrics, bool metricgroups, char *filter, + bool raw) +{ + struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_event *pe; + int i; + struct rblist groups; + struct rb_node *node, *next; + struct strlist *metriclist = NULL; + + if (!map) + return; + + if (!metricgroups) { + metriclist = strlist__new(NULL, NULL); + if (!metriclist) + return; + } + + rblist__init(&groups); + groups.node_new = mep_new; + groups.node_cmp = mep_cmp; + groups.node_delete = mep_delete; + for (i = 0; ; i++) { + const char *g; + pe = &map->table[i]; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + g = pe->metric_group; + if (!g && pe->metric_name) { + if (pe->name) + continue; + g = "No_group"; + } + if (g) { + char *omg; + char *mg = strdup(g); + + if (!mg) + return; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + char *s; + + if (*g == 0) + g = "No_group"; + while (isspace(*g)) + g++; + if (filter && !strstr(g, filter)) + continue; + if (raw) + s = (char *)pe->metric_name; + else { + if (asprintf(&s, "%s\n\t[%s]", + pe->metric_name, pe->desc) < 0) + return; + } + + if (!s) + continue; + + if (!metricgroups) { + strlist__add(metriclist, s); + } else { + me = mep_lookup(&groups, g); + if (!me) + continue; + strlist__add(me->metrics, s); + } + } + free(omg); + } + } + + if (metricgroups && !raw) + printf("\nMetric Groups:\n\n"); + else if (metrics && !raw) + printf("\nMetrics:\n\n"); + + for (node = rb_first(&groups.entries); node; node = next) { + struct mep *me = container_of(node, struct mep, nd); + + if (metricgroups) + printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); + if (metrics) + metricgroup__print_strlist(me->metrics, raw); + next = rb_next(node); + rblist__remove_node(&groups, node); + } + if (!metricgroups) + metricgroup__print_strlist(metriclist, raw); + strlist__delete(metriclist); +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 57d7acf890e0..75588920fccc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -28,6 +28,7 @@ #include "probe-file.h" #include "asm/bug.h" #include "util/parse-branch-options.h" +#include "metricgroup.h" #define MAX_NAME_LEN 100 @@ -2380,6 +2381,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_tracepoint_events(NULL, NULL, name_only); print_sdt_events(NULL, NULL, name_only); + + metricgroup__print(true, true, NULL, name_only); } int parse_events__is_hardcoded_term(struct parse_events_term *term) -- cgit v1.2.3 From 4e1a096380e3b558ef021afc08e193ce5d1be478 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:33 -0700 Subject: perf stat: Don't use ctx for saved values lookup We don't need to use ctx to look up events for saved values. The context is already part of the evsel pointer, which is the primary key. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-9-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 42e6c17be7ff..664f49a9b012 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -56,7 +56,6 @@ struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; int cpu; - int ctx; struct stats stats; }; @@ -67,8 +66,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->ctx != b->ctx) - return a->ctx - b->ctx; if (a->cpu != b->cpu) return a->cpu - b->cpu; if (a->evsel == b->evsel) @@ -90,13 +87,12 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, } static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, - int cpu, int ctx, + int cpu, bool create) { struct rb_node *nd; struct saved_value dm = { .cpu = cpu, - .ctx = ctx, .evsel = evsel, }; nd = rblist__find(&runtime_saved_values, &dm); @@ -232,8 +228,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, ctx, - true); + struct saved_value *v = saved_value_lookup(counter, cpu, true); update_stats(&v->stats, count[0]); } } @@ -634,7 +629,6 @@ static void generic_metric(const char *metric_expr, const char *metric_name, double avg, int cpu, - int ctx, struct perf_stat_output_ctx *out) { print_metric_t print_metric = out->print_metric; @@ -648,7 +642,7 @@ static void generic_metric(const char *metric_expr, for (i = 0; metric_events[i]; i++) { struct saved_value *v; - v = saved_value_lookup(metric_events[i], cpu, ctx, false); + v = saved_value_lookup(metric_events[i], cpu, false); if (!v) break; expr__add_id(&pctx, metric_events[i]->name, avg_stats(&v->stats)); @@ -866,7 +860,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, avg, cpu, ctx, out); + evsel->metric_name, avg, cpu, out); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; @@ -895,7 +889,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, out->new_line(ctxp); generic_metric(mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - avg, cpu, ctx, out); + avg, cpu, out); } } if (num == 0) -- cgit v1.2.3 From fd48aad9b0f3f7654433dfae3a72ceda36e2de28 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:34 -0700 Subject: perf stat: Support duration_time for metrics Some of the metrics formulas (like GFLOPs) need to know how long the measurement period is. Support an internal event called duration_time, which reports time in second. It maps to the dummy event, but is special cased for statistics to report the walltime duration. So far it is not printed, but only used internally for metrics. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-10-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 1 + tools/perf/util/stat-shadow.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index fdb5bb52f01f..ea2426daf7e8 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -288,6 +288,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } +duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } /* diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 664f49a9b012..a2c12d1ef32a 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -641,11 +641,20 @@ static void generic_metric(const char *metric_expr, expr__add_id(&pctx, name, avg); for (i = 0; metric_events[i]; i++) { struct saved_value *v; + struct stats *stats; + double scale; - v = saved_value_lookup(metric_events[i], cpu, false); - if (!v) - break; - expr__add_id(&pctx, metric_events[i]->name, avg_stats(&v->stats)); + if (!strcmp(metric_events[i]->name, "duration_time")) { + stats = &walltime_nsecs_stats; + scale = 1e-9; + } else { + v = saved_value_lookup(metric_events[i], cpu, false); + if (!v) + break; + stats = &v->stats; + scale = 1.0; + } + expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); } if (!metric_events[i]) { const char *p = metric_expr; -- cgit v1.2.3 From e864c5ca145e49bfce4847bd14b47b5f8549b2b1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:35 -0700 Subject: perf stat: Hide internal duration_time counter Some perf stat metrics use an internal "duration_time" metric. It is not correctly printed however. So hide it during output to avoid confusing users with 0 counts. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 874bc6dd8d60..855890e0b70b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -195,6 +195,11 @@ static struct perf_stat_config stat_config = { .scale = true, }; +static bool is_duration_time(struct perf_evsel *evsel) +{ + return !strcmp(evsel->name, "duration_time"); +} + static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -1363,6 +1368,9 @@ static void print_aggr(char *prefix) ad.id = id = aggr_map->map[s]; first = true; evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; + ad.val = ad.ena = ad.run = 0; ad.nr = 0; if (!collect_data(counter, aggr_cb, &ad)) @@ -1506,6 +1514,8 @@ static void print_no_aggr_metric(char *prefix) if (prefix) fputs(prefix, stat_config.output); evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; if (first) { aggr_printout(counter, cpu, 0); first = false; @@ -1560,6 +1570,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) /* Print metrics headers only */ evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; os.evsel = counter; out.ctx = &os; out.print_metric = print_metric_header; @@ -1707,12 +1719,18 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) print_aggr(prefix); break; case AGGR_THREAD: - evlist__for_each_entry(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; print_aggr_thread(counter, prefix); + } break; case AGGR_GLOBAL: - evlist__for_each_entry(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; print_counter_aggr(counter, prefix); + } if (metric_only) fputc('\n', stat_config.output); break; @@ -1720,8 +1738,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) print_no_aggr_metric(prefix); else { - evlist__for_each_entry(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; print_counter(counter, prefix); + } } break; case AGGR_UNSET: -- cgit v1.2.3 From b90f1333ef08d2a497ae239798868b046f4e3a97 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:36 -0700 Subject: perf stat: Update walltime_nsecs_stats in interval mode Some metrics (like GFLOPs) need walltime_nsecs_stats for each interval. Compute it for each interval instead of only at the end. Pointed out by Jiri. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-12-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 855890e0b70b..88f1d5fbdb48 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -415,6 +415,8 @@ static void process_interval(void) pr_err("failed to write stat round event\n"); } + init_stats(&walltime_nsecs_stats); + update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); print_counters(&rs, 0, NULL); } -- cgit v1.2.3 From 84c417422798c897f637b0249f64a52807b4a61b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 10:00:28 -0700 Subject: perf record: Support direct --user-regs arguments USER_REGS can currently only collected implicitely with call graph recording. Sometimes it is useful to see them separately, and filter them. Add a new --user-regs option to record that is similar to --intr-regs, but acts on user regs. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905170029.19722-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 2 ++ tools/perf/builtin-record.c | 3 +++ tools/perf/perf.h | 1 + tools/perf/util/evsel.c | 7 ++++++- 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e397453e5a46..68a1ffb0a8a5 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -377,6 +377,8 @@ symbolic names, e.g. on x86, ax, si. To list the available registers use --intr-regs=\?. To name registers, pass a comma separated list such as --intr-regs=ax,bx. The list of register is architecture dependent. +--user-regs:: +Capture user registers at sample time. Same arguments as -I. --running-time:: Record running and enabled time for read events (:S) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 56f8142ff97f..9b379f3a3d99 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1643,6 +1643,9 @@ static struct option __record_options[] = { OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", "sample selected machine registers on interrupt," " use -I ? to list register names", parse_regs), + OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", + "sample selected machine registers on interrupt," + " use -I ? to list register names", parse_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index dc442ba21bf6..fbb0a9cd0ac6 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -65,6 +65,7 @@ struct record_opts { unsigned int user_freq; u64 branch_stack; u64 sample_intr_regs; + u64 sample_user_regs; u64 default_interval; u64 user_interval; size_t auxtrace_snapshot_size; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4bb89373eb52..7389746c0dc4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -678,7 +678,7 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); - attr->sample_regs_user = PERF_REGS_MASK; + attr->sample_regs_user |= PERF_REGS_MASK; attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { @@ -931,6 +931,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, perf_evsel__set_sample_bit(evsel, REGS_INTR); } + if (opts->sample_user_regs) { + attr->sample_regs_user |= opts->sample_user_regs; + perf_evsel__set_sample_bit(evsel, REGS_USER); + } + if (target__has_cpu(&opts->target) || opts->sample_cpu) perf_evsel__set_sample_bit(evsel, CPU); -- cgit v1.2.3 From b1491ace8eb2e92677cd9ee966763f8f53d29d16 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 11:40:57 -0700 Subject: perf script: Support user regs Teach perf script to print user regs. % perf record --user-regs=ip,sp ... % perf script -F ip,sym,uregs ... ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e00cc12 intel_pmu_handle_irq ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 v2: Rebased on top of phys-addr patches Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170905184057.26135-1-andi@firstfloor.org [ Use PRIu64 for regs->abi in print_sample_uregs() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 4 ++-- tools/perf/builtin-script.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 18dfcfa38454..bcc1ba35a2d8 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,8 +116,8 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, - callindent, insn, insnlen, synth, phys_addr. + srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, + brstackoff, callindent, insn, insnlen, synth, phys_addr. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3d4c3b5e1868..725dbd3dd104 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -88,6 +88,7 @@ enum perf_output_field { PERF_OUTPUT_BRSTACKOFF = 1U << 24, PERF_OUTPUT_SYNTH = 1U << 25, PERF_OUTPUT_PHYS_ADDR = 1U << 26, + PERF_OUTPUT_UREGS = 1U << 27, }; struct output_option { @@ -109,6 +110,7 @@ struct output_option { {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "period", .field = PERF_OUTPUT_PERIOD}, {.str = "iregs", .field = PERF_OUTPUT_IREGS}, + {.str = "uregs", .field = PERF_OUTPUT_UREGS}, {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, @@ -385,6 +387,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_IREGS)) return -EINVAL; + if (PRINT_FIELD(UREGS) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", + PERF_OUTPUT_UREGS)) + return -EINVAL; + if (PRINT_FIELD(PHYS_ADDR) && perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) @@ -509,6 +516,24 @@ static void print_sample_iregs(struct perf_sample *sample, } } +static void print_sample_uregs(struct perf_sample *sample, + struct perf_event_attr *attr) +{ + struct regs_dump *regs = &sample->user_regs; + uint64_t mask = attr->sample_regs_user; + unsigned i = 0, r; + + if (!regs || !regs->regs) + return; + + printf(" ABI:%" PRIu64 " ", regs->abi); + + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs->regs[i++]; + printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); + } +} + static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) @@ -1444,6 +1469,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(IREGS)) print_sample_iregs(sample, attr); + if (PRINT_FIELD(UREGS)) + print_sample_uregs(sample, attr); + if (PRINT_FIELD(BRSTACK)) print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) @@ -2739,7 +2767,7 @@ int cmd_script(int argc, const char **argv) "+field to add and -field to remove." "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,brstack,brstacksym,flags," + "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags," "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, -- cgit v1.2.3 From 80f873557112fc163f011cd131d4cfe4959100a6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 10:46:21 +0200 Subject: perf tools: Add python-clean target To be able to cleanup only python related binaries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908084621.31595-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 91ef44bfaf3e..1df93b4c4648 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -173,7 +173,7 @@ AWK = awk # non-config cases config := 1 -NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf +NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) @@ -802,7 +802,10 @@ config-clean: $(call QUIET_CLEAN, config) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean +python-clean: + $(python-clean) + +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected @@ -819,7 +822,6 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean - $(python-clean) # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) -- cgit v1.2.3 From 25cc4eb44b0c840eff0e5a46a85b9ccbde77401b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 14:05:09 +0200 Subject: perf ui progress: Add ui specific init function Adding ui specific init function allowing to setup the progress bar width based on current screen scales. Adding TUI init function to get more grained update of the progress bar. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908120510.22515-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/progress.c | 2 ++ tools/perf/ui/progress.h | 1 + tools/perf/ui/tui/progress.c | 9 +++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index ae91c8148edf..3e2b5d64c55e 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -34,6 +34,8 @@ void ui_progress__init(struct ui_progress *p, u64 total, const char *title) p->total = total; p->title = title; + if (ui_progress__ops->init) + ui_progress__ops->init(p); } void ui_progress__finish(void) diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index 717d39d3052b..e5f434a2070b 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -14,6 +14,7 @@ void ui_progress__init(struct ui_progress *p, u64 total, const char *title); void ui_progress__update(struct ui_progress *p, u64 adv); struct ui_progress_ops { + void (*init)(struct ui_progress *p); void (*update)(struct ui_progress *p); void (*finish)(void); }; diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index c4b99008e2c9..f6b8f52aad7e 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -5,6 +5,11 @@ #include "tui.h" #include "../browser.h" +static void __tui_progress__init(struct ui_progress *p) +{ + p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1; +} + static void tui_progress__update(struct ui_progress *p) { int bar, y; @@ -49,8 +54,8 @@ static void tui_progress__finish(void) pthread_mutex_unlock(&ui__lock); } -static struct ui_progress_ops tui_progress__ops = -{ +static struct ui_progress_ops tui_progress__ops = { + .init = __tui_progress__init, .update = tui_progress__update, .finish = tui_progress__finish, }; -- cgit v1.2.3 From 8233822f403b67bbaa1d58e8fa6b8f821fe7626d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 14:05:10 +0200 Subject: perf ui progress: Add size info into progress bar Adding the size values '[current/total]' into progress bar, to show more detailed progress of data reading. Adding new ui_progress__init_size function to specify we want to display the size. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908120510.22515-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/progress.c | 4 +++- tools/perf/ui/progress.h | 11 ++++++++++- tools/perf/ui/tui/progress.c | 23 ++++++++++++++++++++++- tools/perf/util/session.c | 2 +- 4 files changed, 36 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index 3e2b5d64c55e..7ade387d511c 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -27,12 +27,14 @@ void ui_progress__update(struct ui_progress *p, u64 adv) } } -void ui_progress__init(struct ui_progress *p, u64 total, const char *title) +void __ui_progress__init(struct ui_progress *p, u64 total, + const char *title, bool size) { p->curr = 0; p->next = p->step = total / 16 ?: 1; p->total = total; p->title = title; + p->size = size; if (ui_progress__ops->init) ui_progress__ops->init(p); diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index e5f434a2070b..fbaa1507ebfe 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -8,9 +8,18 @@ void ui_progress__finish(void); struct ui_progress { const char *title; u64 curr, next, step, total; + bool size; }; -void ui_progress__init(struct ui_progress *p, u64 total, const char *title); +void __ui_progress__init(struct ui_progress *p, u64 total, + const char *title, bool size); + +#define ui_progress__init(p, total, title) \ + __ui_progress__init(p, total, title, false) + +#define ui_progress__init_size(p, total, title) \ + __ui_progress__init(p, total, title, true) + void ui_progress__update(struct ui_progress *p, u64 adv); struct ui_progress_ops { diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index f6b8f52aad7e..68f6144ea603 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -1,8 +1,10 @@ +#include #include "../cache.h" #include "../progress.h" #include "../libslang.h" #include "../ui.h" #include "tui.h" +#include "units.h" #include "../browser.h" static void __tui_progress__init(struct ui_progress *p) @@ -10,8 +12,22 @@ static void __tui_progress__init(struct ui_progress *p) p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1; } +static int get_title(struct ui_progress *p, char *buf, size_t size) +{ + char buf_cur[20]; + char buf_tot[20]; + int ret; + + ret = unit_number__scnprintf(buf_cur, sizeof(buf_cur), p->curr); + ret += unit_number__scnprintf(buf_tot, sizeof(buf_tot), p->total); + + return ret + scnprintf(buf, size, "%s [%s/%s]", + p->title, buf_cur, buf_tot); +} + static void tui_progress__update(struct ui_progress *p) { + char buf[100], *title = (char *) p->title; int bar, y; /* * FIXME: We should have a per UI backend way of showing progress, @@ -23,13 +39,18 @@ static void tui_progress__update(struct ui_progress *p) if (p->total == 0) return; + if (p->size) { + get_title(p, buf, sizeof(buf)); + title = buf; + } + ui__refresh_dimensions(false); pthread_mutex_lock(&ui__lock); y = SLtt_Screen_Rows / 2 - 2; SLsmg_set_color(0); SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols); SLsmg_gotorc(y++, 1); - SLsmg_write_string((char *)p->title); + SLsmg_write_string(title); SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' '); SLsmg_set_color(HE_COLORSET_SELECTED); bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a7ebd9fe8e40..ceac0848469d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1847,7 +1847,7 @@ static int __perf_session__process_events(struct perf_session *session, if (data_offset + data_size < file_size) file_size = data_offset + data_size; - ui_progress__init(&prog, file_size, "Processing events..."); + ui_progress__init_size(&prog, file_size, "Processing events..."); mmap_size = MMAP_SIZE; if (mmap_size > file_size) { -- cgit v1.2.3 From ecdad24d7a4480c9af0ff6dbe00ac8bbae720d19 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 7 Sep 2017 10:55:46 -0700 Subject: perf tools: Use scandir() to replace readdir() In perf_event__synthesize_threads() perf goes through all proc files serially by readdir. scandir() does a snapshoot of /proc, which is multithreading friendly. It's possible that some threads which are added during event synthesize. But the number of lost threads should be small. They should not impact the final analysis. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1504806954-150842-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1c905ba3641b..17c21ea68a72 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -683,12 +683,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool, bool mmap_data, unsigned int proc_map_timeout) { - DIR *proc; - char proc_path[PATH_MAX]; - struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; + char proc_path[PATH_MAX]; + struct dirent **dirent; int err = -1; + char *end; + pid_t pid; + int n, i; if (machine__is_default_guest(machine)) return 0; @@ -712,29 +714,32 @@ int perf_event__synthesize_threads(struct perf_tool *tool, goto out_free_fork; snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - proc = opendir(proc_path); + n = scandir(proc_path, &dirent, 0, alphasort); - if (proc == NULL) + if (n < 0) goto out_free_namespaces; - while ((dirent = readdir(proc)) != NULL) { - char *end; - pid_t pid = strtol(dirent->d_name, &end, 10); - - if (*end) /* only interested in proper numerical dirents */ + for (i = 0; i < n; i++) { + if (!isdigit(dirent[i]->d_name[0])) continue; - /* - * We may race with exiting thread, so don't stop just because - * one thread couldn't be synthesized. - */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, - namespaces_event, pid, 1, process, - tool, machine, mmap_data, - proc_map_timeout); - } + pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); + /* only interested in proper numerical dirents */ + if (!*end) { + /* + * We may race with exiting thread, so don't stop just because + * one thread couldn't be synthesized. + */ + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data, + proc_map_timeout); + } + free(dirent[i]); + } + free(dirent); err = 0; - closedir(proc); + out_free_namespaces: free(namespaces_event); out_free_fork: -- cgit v1.2.3 From 5c2615556d4410baebc9b336f14befe0bb32cde4 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 7 Sep 2017 12:18:51 +0900 Subject: perf config: Write a config file just once Currently set_config() can be repeatedly called for each input config on the below case: $ perf config kmem.default=slab report.children=false ... But it's a waste, so only once write a config file gathering all given config key=value pairs. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1504754331-9776-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index a1d82e33282c..b89417d9305e 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -34,8 +34,7 @@ static struct option config_options[] = { OPT_END() }; -static int set_config(struct perf_config_set *set, const char *file_name, - const char *var, const char *value) +static int set_config(struct perf_config_set *set, const char *file_name) { struct perf_config_section *section = NULL; struct perf_config_item *item = NULL; @@ -49,7 +48,6 @@ static int set_config(struct perf_config_set *set, const char *file_name, if (!fp) return -1; - perf_config_set__collect(set, file_name, var, value); fprintf(fp, "%s\n", first_line); /* overwrite configvariables */ @@ -161,6 +159,7 @@ int cmd_config(int argc, const char **argv) struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); const char *config_filename; + bool changed = false; argc = parse_options(argc, argv, config_options, config_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -231,15 +230,26 @@ int cmd_config(int argc, const char **argv) goto out_err; } } else { - if (set_config(set, config_filename, var, value) < 0) { - pr_err("Failed to set '%s=%s' on %s\n", - var, value, config_filename); + if (perf_config_set__collect(set, config_filename, + var, value) < 0) { + pr_err("Failed to add '%s=%s'\n", + var, value); free(arg); goto out_err; } + changed = true; } free(arg); } + + if (!changed) + break; + + if (set_config(set, config_filename) < 0) { + pr_err("Failed to set the configs on %s\n", + config_filename); + goto out_err; + } } ret = 0; -- cgit v1.2.3 From 55421b4fb7054f85274b1b6a321e204dac696133 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 7 Sep 2017 12:18:56 +0900 Subject: perf config: Allow creating empty config set for config file autogeneration When there isn't a config file (e.g. ~/.perfconfig) or it has nothing, the config set wasn't created. If the config set does not exist, a config file can't be autogenerated. So allow creating a empty config set in the above case, then we can support the config file autogeneration. Before: $ rm -f ~/.perfconfig $ perf config --user report.children=false $ cat ~/.perfconfig cat: /root/.perfconfig: No such file or directory But I think it should work even if there isn't a config file. After: $ rm -f ~/.perfconfig $ perf config --user report.children=false $ cat ~/.perfconfig # this file is auto-generated. [report] children = false NOTE: As a result, if perf_config_set__init() fails, it looks as if the config set isn't freed. But it isn't a problem. Because the config set will be freed by perf_config_set__delete() at the end of cmd_config(). Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1504754336-9824-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index bc75596f9e79..d2b6983b1779 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -700,10 +700,7 @@ struct perf_config_set *perf_config_set__new(void) if (set) { INIT_LIST_HEAD(&set->sections); - if (perf_config_set__init(set) < 0) { - perf_config_set__delete(set); - set = NULL; - } + perf_config_set__init(set); } return set; -- cgit v1.2.3 From c23c2a0f236601c635d9a9d18d7993641e72aa8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Sep 2017 10:50:26 -0300 Subject: perf tools: Make copyfile_offset() static There are no usage outside util.c and this is the only remaining reason for fcntl.h to be included in util.h, to get the loff_t definition in Alpine Linux, so make it static. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2dzlsao7k6ihozs5karw6kpx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/util/data.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/evlist.h | 1 + tools/perf/util/namespaces.c | 1 + tools/perf/util/pmu.c | 1 + tools/perf/util/probe-file.c | 1 + tools/perf/util/util.c | 3 ++- tools/perf/util/util.h | 2 -- tools/perf/util/zlib.c | 1 + 11 files changed, 11 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 377bea009163..d0fee35db0e7 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -3,6 +3,7 @@ * * Builtin regression testing command: ever growing number of sanity tests */ +#include #include #include #include diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 263f5a906ba5..1123b30e3033 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index b9e087fb8247..ffd723179a4b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "compress.h" #include "path.h" #include "symbol.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 17c21ea68a72..10366b87d0b5 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bf2c4936e35f..b1c14f1fdc27 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "../perf.h" #include "event.h" diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index a58e91197729..5be021701f34 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -11,6 +11,7 @@ #include "event.h" #include #include +#include #include #include #include diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7070638ab600..0b11dfc0af44 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index cdf8d83a484c..4ae1123c6794 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -15,6 +15,7 @@ * */ #include +#include #include #include #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 4c360daa4e24..3c9c39711343 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -174,7 +175,7 @@ out: return err; } -int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) { void *ptr; loff_t pgoff; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b136c271125f..03946d5e4741 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -5,7 +5,6 @@ /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 -#include #include #include #include @@ -35,7 +34,6 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); -int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 1329d843eb7b..7c1175310a12 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -1,3 +1,4 @@ +#include #include #include #include -- cgit v1.2.3 From 35c1980eb3d1acb3cac11c38252339399dca77e3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 14:13:24 -0700 Subject: perf stat: Fall weak group back even for EBADF It's not possible to run a package event and a per cpu event in the same group. This is used by some of the power metrics. They work correctly when not using a group. Normally weak groups should handle that, but in this case EBADF is returned instead of the normal EINVAL. $ strace -e perf_event_open ./perf stat -v -e '{cstate_pkg/c2-residency/,msr/tsc/}:W' -a sleep 1 Using CPUID GenuineIntel-6-3E perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, PERF_FLAG_FD_CLOEXEC) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = 3 perf_event_open({type=0x7 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, 3, 0) = 4 perf_event_open({type=0x7 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 1, 0, 0) = -1 EBADF (Bad file descriptor) and perf errors out. Make weak groups trigger a fall back for EBADF too. Then this case works correctly: $ perf stat -v -e '{cstate_pkg/c2-residency/,msr/tsc/}:W' -a sleep 1 Using CPUID GenuineIntel-6-3E Weak group for cstate_pkg/c2-residency//2 failed cstate_pkg/c2-residency/: 476709882 1000598460 1000598460 msr/tsc/: 39625837911 12007369110 12007369110 Performance counter stats for 'system wide': 476,709,882 cstate_pkg/c2-residency/ 39,625,837,911 msr/tsc/ 1.000697588 seconds time elapsed This fixes perf stat -M Power ... $ perf stat -M Power --metric-only -a sleep 1 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 1.0 0.7 30.0 0.0 0.9 0.1 0.4 0.0 1.001240740 seconds time elapsed Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170905211324.32427-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 88f1d5fbdb48..dd525417880a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -656,7 +656,7 @@ try_again: if (create_perf_stat_counter(counter) < 0) { /* Weak group failed. Reset the group. */ - if (errno == EINVAL && + if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { counter = perf_evsel__reset_weak_group(counter); -- cgit v1.2.3 From cf97962308ba6b6afdeb038505032c7c0972bdfa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:53:25 -0700 Subject: perf vendor events: Add JSON metrics for Broadwell Add JSON metrics for Broadwell. Commiter testing: # uname -a Linux jouet 4.13.0-rc7+ #3 SMP Sat Sep 2 09:04:44 -03 2017 x86_64 x86_64 x86_64 GNU/Linux # grep "model name" /proc/cpuinfo | head -1 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz # perf list metricgroup List of pre-defined events (to be used in -e): Metric Groups: DSB FLOPS Frontend Frontend_Bandwidth Memory_BW Memory_Bound Memory_Lat Pipeline Ports_Utilization Power SMT Summary TLB TopDownL1 Unknown_Branches # perf stat -M Power --metric-only -a sleep 1 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 1.1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.003502904 seconds time elapsed # # perf stat -M Memory_BW --metric-only -a sleep 1 Performance counter stats for 'system wide': MLP 1.7 1.001364525 seconds time elapsed # # perf stat -M TLB --metric-only -a sleep 1 Performance counter stats for 'system wide': Page_Walks_Utilization 0.1 1.005962198 seconds time elapsed # # perf stat -M Summary --metric-only -a sleep 1 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 7281856697.0 0.0 11150898087.0 1.0 0.0 1.0 0.7 1.012134025 seconds time elapsed # Running in verbose mode shows which counters and expressions are being used: # perf stat -v -M Summary --metric-only -a sleep 1 Using CPUID GenuineIntel-6-3D metric expr 1 / inst_retired.any / cycles for CPI found event inst_retired.any found event cycles metric expr cpu_clk_unhalted.thread for CLKS found event cpu_clk_unhalted.thread metric expr inst_retired.any for Instructions found event inst_retired.any metric expr cpu_clk_unhalted.ref_tsc / msr@tsc@ for CPU_Utilization found event cpu_clk_unhalted.ref_tsc found event msr/tsc/ metric expr ( 1*( fp_arith_inst_retired.scalar_single + fp_arith_inst_retired.scalar_double ) + 2* fp_arith_inst_retired.128b_packed_double + 4*( fp_arith_inst_retired.128b_packed_single + fp_arith_inst_retired.256b_packed_double ) + 8* fp_arith_inst_retired.256b_packed_single ) / 1000000000 / duration_time for GFLOPs found event fp_arith_inst_retired.scalar_single found event fp_arith_inst_retired.scalar_double found event fp_arith_inst_retired.128b_packed_double found event fp_arith_inst_retired.128b_packed_single found event fp_arith_inst_retired.256b_packed_double found event fp_arith_inst_retired.256b_packed_single found event duration_time metric expr 1 - cpu_clk_thread_unhalted.one_thread_active / ( cpu_clk_thread_unhalted.ref_xclk_any / 2 ) if #smt_on else 0 for SMT_2T_Utilization found event cpu_clk_thread_unhalted.one_thread_active found event cpu_clk_thread_unhalted.ref_xclk_any metric expr cpu_clk_unhalted.ref_tsc:u / cpu_clk_unhalted.ref_tsc for Kernel_Utilization found event cpu_clk_unhalted.ref_tsc:u found event cpu_clk_unhalted.ref_tsc adding {inst_retired.any,cycles}:W,{cpu_clk_unhalted.thread}:W,{inst_retired.any}:W,{cpu_clk_unhalted.ref_tsc,msr/tsc/}:W,{fp_arith_inst_retired.scalar_single,fp_arith_inst_retired.scalar_double,fp_arith_inst_retired.128b_packed_double,fp_arith_inst_retired.128b_packed_single,fp_arith_inst_retired.256b_packed_double,fp_arith_inst_retired.256b_packed_single,duration_time}:W,{cpu_clk_thread_unhalted.one_thread_active,cpu_clk_thread_unhalted.ref_xclk_any}:W,{cpu_clk_unhalted.ref_tsc:u,cpu_clk_unhalted.ref_tsc}:W inst_retired.any -> cpu/event=0xc0/ cpu_clk_unhalted.thread -> cpu/event=0x3c/ inst_retired.any -> cpu/event=0xc0/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ fp_arith_inst_retired.scalar_single -> cpu/umask=0x2,period=2000003,event=0xc7/ fp_arith_inst_retired.scalar_double -> cpu/umask=0x1,period=2000003,event=0xc7/ fp_arith_inst_retired.128b_packed_double -> cpu/umask=0x4,period=2000003,event=0xc7/ fp_arith_inst_retired.128b_packed_single -> cpu/umask=0x8,period=2000003,event=0xc7/ fp_arith_inst_retired.256b_packed_double -> cpu/umask=0x10,period=2000003,event=0xc7/ fp_arith_inst_retired.256b_packed_single -> cpu/umask=0x20,period=2000003,event=0xc7/ cpu_clk_thread_unhalted.one_thread_active -> cpu/umask=0x2,period=2000003,event=0x3c/ cpu_clk_thread_unhalted.ref_xclk_any -> cpu/umask=0x1,any=1,period=2000003,event=0x3c/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ Weak group for fp_arith_inst_retired.scalar_single/7 failed Weak group for cpu_clk_unhalted.ref_tsc:u/2 failed inst_retired.any: 8704146437 4026374016 619883741 cycles: 11180800018 4026374016 619883741 cpu_clk_unhalted.thread: 11140030295 4026323772 931621933 inst_retired.any: 8643115117 4026260510 1243595906 cpu_clk_unhalted.ref_tsc: 10201638510 4026184297 1247351077 msr/tsc/: 10378022785 4026184297 1247351077 fp_arith_inst_retired.scalar_single: 134697 4026102728 1559210545 fp_arith_inst_retired.scalar_double: 274339 4026007348 1870014984 fp_arith_inst_retired.128b_packed_double: 1639 4025886054 1866736918 fp_arith_inst_retired.128b_packed_single: 0 4025776614 2175106569 fp_arith_inst_retired.256b_packed_double: 0 4025681734 1235551129 fp_arith_inst_retired.256b_packed_single: 0 4025582962 1232398454 duration_time: 0 4025552913 4025552913 cpu_clk_thread_unhalted.one_thread_active: 10505 4025474649 923893076 cpu_clk_thread_unhalted.ref_xclk_any: 394992110 4025474649 923893076 cpu_clk_unhalted.ref_tsc:u: 5341421014 4025360315 1231634198 cpu_clk_unhalted.ref_tsc: 10258278508 4025252611 307909362 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 8704146437.0 0.0 11140030295.0 1.0 0.0 1.0 0.5 1.006783654 seconds time elapsed # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905195235.GW2482@two.firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/broadwell/bdw-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json new file mode 100644 index 000000000000..49c5f123d811 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 2e006a24127ad88422632f9e5d6a8039a40f01da Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:54:49 -0700 Subject: perf vendor events: Add JSON metrics for Skylake Add JSON metrics for Skylake. Committer testing: # grep "model name" /proc/cpuinfo | head -1 model name : Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz # uname -a Linux seventh 4.12.0-rc6+ #1 SMP Fri Jun 30 16:40:55 -03 2017 x86_64 x86_64 x86_64 GNU/Linux # perf stat --metric-only -M Summary -a sleep 1 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 34021097.0 0.0 119424171.0 0.0 0.0 0.0 0.0 1.001001793 seconds time elapsed # perf list metricgroup List of pre-defined events (to be used in -e): Metric Groups: DSB FLOPS Frontend Frontend_Bandwidth Memory_BW Memory_Bound Memory_Lat Pipeline Ports_Utilization Power SMT Summary TLB TopDownL1 Unknown_Branches # perf stat --metric-only -M Ports_Utilization -a sleep 1 Performance counter stats for 'system wide': ILP 1475828.0 1.000688547 seconds time elapsed # perf stat -v --metric-only -M Ports_Utilization -a sleep 1 Using CPUID GenuineIntel-6-9E metric expr uops_executed.thread / ( uops_executed.core_cycles_ge_1 / 2) if #smt_on else uops_executed.core_cycles_ge_1 for ILP found event uops_executed.thread found event uops_executed.core_cycles_ge_1 adding {uops_executed.thread,uops_executed.core_cycles_ge_1}:W uops_executed.thread -> cpu/umask=0x1,period=2000003,event=0xb1/ uops_executed.core_cycles_ge_1 -> cpu/umask=0x2,period=2000003,cmask=1,event=0xb1/ uops_executed.thread: 8115271 4002547654 4002547654 uops_executed.core_cycles_ge_1: 3282969 4002547654 4002547654 Performance counter stats for 'system wide': ILP 3282969.0 1.000719870 seconds time elapsed # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905195235.GW2482@two.firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/skylake/skl-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json new file mode 100644 index 000000000000..411f9411ec9e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles )", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 97dca6715d0a058a6af028a3019432740b4a0011 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:50:34 -0700 Subject: perf vendor events: Add JSON metrics for Sandy Bridge Add JSON metrics for Sandy Bridge. Committer testing: # grep "model name" /proc/cpuinfo | head -1 model name : Intel(R) Core(TM) i5-2400 CPU @ 3.10GHz # perf list metricgroup List of pre-defined events (to be used in -e): Metric Groups: DSB FLOPS Frontend Frontend_Bandwidth Pipeline Ports_Utilization Power SMT Summary TopDownL1 # perf stat -M Power --metric-only -a sleep 1 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 0.8 0.0 98.1 0.0 0.0 0.0 23.4 0.0 1.001153658 seconds time elapsed # perf stat -v -M Power --metric-only -a sleep 1 Using CPUID GenuineIntel-6-2A metric expr cpu_clk_unhalted.thread / cpu_clk_unhalted.ref_tsc for Turbo_Utilization found event cpu_clk_unhalted.thread found event cpu_clk_unhalted.ref_tsc metric expr (cstate_core@c3\-residency@ / msr@tsc@) * 100 for C3_Core_Residency found event cstate_core/c3-residency/ found event msr/tsc/ metric expr (cstate_core@c6\-residency@ / msr@tsc@) * 100 for C6_Core_Residency found event cstate_core/c6-residency/ found event msr/tsc/ metric expr (cstate_core@c7\-residency@ / msr@tsc@) * 100 for C7_Core_Residency found event cstate_core/c7-residency/ found event msr/tsc/ metric expr (cstate_pkg@c2\-residency@ / msr@tsc@) * 100 for C2_Pkg_Residency found event cstate_pkg/c2-residency/ found event msr/tsc/ metric expr (cstate_pkg@c3\-residency@ / msr@tsc@) * 100 for C3_Pkg_Residency found event cstate_pkg/c3-residency/ found event msr/tsc/ metric expr (cstate_pkg@c6\-residency@ / msr@tsc@) * 100 for C6_Pkg_Residency found event cstate_pkg/c6-residency/ found event msr/tsc/ metric expr (cstate_pkg@c7\-residency@ / msr@tsc@) * 100 for C7_Pkg_Residency found event cstate_pkg/c7-residency/ found event msr/tsc/ adding {cpu_clk_unhalted.thread,cpu_clk_unhalted.ref_tsc}:W,{cstate_core/c3-residency/,msr/tsc/}:W,{cstate_core/c6-residency/,msr/tsc/}:W,{cstate_core/c7-residency/,msr/tsc/}:W,{cstate_pkg/c2-residency/,msr/tsc/}:W,{cstate_pkg/c3-residency/,msr/tsc/}:W,{cstate_pkg/c6-residency/,msr/tsc/}:W,{cstate_pkg/c7-residency/,msr/tsc/}:W cpu_clk_unhalted.thread -> cpu/event=0x3c/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ Weak group for cstate_pkg/c2-residency//2 failed Weak group for cstate_pkg/c3-residency//2 failed Weak group for cstate_pkg/c6-residency//2 failed Weak group for cstate_pkg/c7-residency//2 failed cpu_clk_unhalted.thread: 5564185 4002833569 4002833569 cpu_clk_unhalted.ref_tsc: 7325424 4002833569 4002833569 cstate_core/c3-residency/: 68293 4003027101 4003027101 msr/tsc/: 12451294472 4003027101 4003027101 cstate_core/c6-residency/: 12238830163 4003260984 4003260984 msr/tsc/: 12452017806 4003260984 4003260984 cstate_core/c7-residency/: 0 4003489648 4003489648 msr/tsc/: 12452725162 4003489648 4003489648 cstate_pkg/c2-residency/: 1830054 1000913138 1000913138 msr/tsc/: 12453441079 4003717513 4003717513 cstate_pkg/c3-residency/: 0 1000973570 1000973570 msr/tsc/: 12454177865 4003954758 4003954758 cstate_pkg/c6-residency/: 2940448859 1001032370 1001032370 msr/tsc/: 12454833890 4004166118 4004166118 cstate_pkg/c7-residency/: 0 1001049818 1001049818 msr/tsc/: 12454919470 4004194204 4004194204 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 0.8 0.0 98.3 0.0 0.0 0.0 23.6 0.0 1.001126519 seconds time elapsed # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905195235.GW2482@two.firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/sandybridge/snb-metrics.json | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json new file mode 100644 index 000000000000..b35b1c153c8a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json @@ -0,0 +1,140 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 28bc0ddb3a89a09b6f2d4dc97b85e28b2a70db1e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:51:07 -0700 Subject: perf vendor events: Add JSON metrics for Sandy Bridge EP Add JSON metrics for Sandy Bridge EP. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/jaketown/jkt-metrics.json | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json new file mode 100644 index 000000000000..b35b1c153c8a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json @@ -0,0 +1,140 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 8853d2de0efe25572d3a7033bbb87c9b1208391e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:51:25 -0700 Subject: perf vendor events: Add JSON metrics for Ivy Bridge Add JSON metrics for Ivy Bridge. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/ivybridge/ivb-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json new file mode 100644 index 000000000000..057a832f1a6a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 2099f51d1851c8955ed3406683be0b961c7792cb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:51:47 -0700 Subject: perf vendor events: Add JSON metrics for Haswell Add JSON metrics for Haswell. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/haswell/hsw-metrics.json | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json new file mode 100644 index 000000000000..03d894988f0f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json @@ -0,0 +1,158 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else (UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@)", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 43fd36a19d501483d5ec243dd28268646f717cde Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:55:16 -0700 Subject: perf vendor events: Add JSON metrics for Ivy Town Add JSON metrics for Ivy Town. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/ivytown/ivt-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json new file mode 100644 index 000000000000..057a832f1a6a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 5e49f7321b06428ac68c51096cff9a4a55c3d4d6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:55:39 -0700 Subject: perf vendor events: Add JSON metrics for Haswell EP Add JSON metrics for Haswell EP. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/haswellx/hsx-metrics.json | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json new file mode 100644 index 000000000000..0bae5eda9fb3 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json @@ -0,0 +1,158 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 6d75abd3e84596933aa2ca91751744271cfec6cb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:55:59 -0700 Subject: perf vendor events: Add JSON metrics for Broadwell Server Add JSON metrics for Broadwell Server. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/broadwellx/bdx-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json new file mode 100644 index 000000000000..4248b029d199 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 69e932139db1d23e978256652dbb179d6ed75204 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 22:00:42 -0700 Subject: perf vendor events: Add JSON metrics for Broadwell DE Add JSON metrics for Broadwell DE Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/broadwellde/bdwde-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json new file mode 100644 index 000000000000..49c5f123d811 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 56de5b63ffaff859f75c19aff057ee10f20c6c07 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 16:26:13 -0700 Subject: perf vendor events: Add JSON metrics for Skylake server Add JSON metrics for Skylake server Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/skylakex/skx-metrics.json | 182 +++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json new file mode 100644 index 000000000000..68f12a26c816 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -0,0 +1,182 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots (per-core)", + "MetricExpr": "4*cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "( RS_EVENTS.EMPTY_CYCLES - (ICACHE_16B.IFDATA_STALL +2* ICACHE_16B.IFDATA_STALL:c1:e1) - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if 1 else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) )", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "L1 cache miss per kilo instruction for demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS_PS / INST_RETIRED.ANY", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "BriefDescription": "L2 cache miss per kilo instruction for demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS_PS / INST_RETIRED.ANY", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "BriefDescription": "L3 cache miss per kilo instruction for demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS_PS / INST_RETIRED.ANY", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16* FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 91e467bc568f15da2eac688e131010601e889184 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Sun, 10 Sep 2017 19:23:14 -0700 Subject: perf machine: Use hashtable for machine threads To process any events, it needs to find the thread in the machine first. The machine maintains a rb tree to store all threads. The rb tree is protected by a rw lock. It is not a problem for current perf which serially processing events. However, it will have scalability performance issue to process events in parallel, especially on a heavy load system which have many threads. Introduce a hashtable to divide the big rb tree into many samll rb tree for threads. The index is thread id % hashtable size. It can reduce the lock contention. Committer notes: Renamed some variables and function names to reduce semantic confusion: 'struct threads' pointers: thread -> threads threads hastable index: tid -> hash_bucket struct threads *machine__thread() -> machine__threads() Cast tid to (unsigned int) to handle -1 in machine__threads() (Kan Liang) Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1505096603-215017-2-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 19 ++++--- tools/perf/util/machine.c | 136 +++++++++++++++++++++++++++----------------- tools/perf/util/machine.h | 23 ++++++-- tools/perf/util/rb_resort.h | 5 +- 4 files changed, 117 insertions(+), 66 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 771ddab94bb0..ee8c6e814437 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2730,20 +2730,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host); size_t printed = trace__fprintf_threads_header(fp); struct rb_node *nd; + int i; - if (threads == NULL) { - fprintf(fp, "%s", "Error sorting output by nr_events!\n"); - return 0; - } + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); - resort_rb__for_each_entry(nd, threads) - printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + if (threads == NULL) { + fprintf(fp, "%s", "Error sorting output by nr_events!\n"); + return 0; + } - resort_rb__delete(threads); + resort_rb__for_each_entry(nd, threads) + printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + resort_rb__delete(threads); + } return printed; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df709363ef69..f4f926753209 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -33,6 +33,20 @@ static void dsos__init(struct dsos *dsos) pthread_rwlock_init(&dsos->lock, NULL); } +static void machine__threads_init(struct machine *machine) +{ + int i; + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + threads->entries = RB_ROOT; + pthread_rwlock_init(&threads->lock, NULL); + threads->nr = 0; + INIT_LIST_HEAD(&threads->dead); + threads->last_match = NULL; + } +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { memset(machine, 0, sizeof(*machine)); @@ -40,11 +54,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); - machine->threads = RB_ROOT; - pthread_rwlock_init(&machine->threads_lock, NULL); - machine->nr_threads = 0; - INIT_LIST_HEAD(&machine->dead_threads); - machine->last_match = NULL; + machine__threads_init(machine); machine->vdso_info = NULL; machine->env = NULL; @@ -141,27 +151,37 @@ static void dsos__exit(struct dsos *dsos) void machine__delete_threads(struct machine *machine) { struct rb_node *nd; + int i; - pthread_rwlock_wrlock(&machine->threads_lock); - nd = rb_first(&machine->threads); - while (nd) { - struct thread *t = rb_entry(nd, struct thread, rb_node); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + pthread_rwlock_wrlock(&threads->lock); + nd = rb_first(&threads->entries); + while (nd) { + struct thread *t = rb_entry(nd, struct thread, rb_node); - nd = rb_next(nd); - __machine__remove_thread(machine, t, false); + nd = rb_next(nd); + __machine__remove_thread(machine, t, false); + } + pthread_rwlock_unlock(&threads->lock); } - pthread_rwlock_unlock(&machine->threads_lock); } void machine__exit(struct machine *machine) { + int i; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); - pthread_rwlock_destroy(&machine->threads_lock); + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + pthread_rwlock_destroy(&threads->lock); + } } void machine__delete(struct machine *machine) @@ -382,7 +402,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid, bool create) { - struct rb_node **p = &machine->threads.rb_node; + struct threads *threads = machine__threads(machine, tid); + struct rb_node **p = &threads->entries.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -391,14 +412,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * so most of the time we dont have to look up * the full rbtree: */ - th = machine->last_match; + th = threads->last_match; if (th != NULL) { if (th->tid == tid) { machine__update_thread_pid(machine, th, pid); return thread__get(th); } - machine->last_match = NULL; + threads->last_match = NULL; } while (*p != NULL) { @@ -406,7 +427,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + threads->last_match = th; machine__update_thread_pid(machine, th, pid); return thread__get(th); } @@ -423,7 +444,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &machine->threads); + rb_insert_color(&th->rb_node, &threads->entries); /* * We have to initialize map_groups separately @@ -434,7 +455,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &machine->threads); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -443,8 +464,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * It is now in the rbtree, get a ref */ thread__get(th); - machine->last_match = th; - ++machine->nr_threads; + threads->last_match = th; + ++threads->nr; } return th; @@ -458,21 +479,24 @@ struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&machine->threads_lock); + pthread_rwlock_wrlock(&threads->lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&threads->lock); return th; } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&machine->threads_lock); + + pthread_rwlock_rdlock(&threads->lock); th = ____machine__findnew_thread(machine, pid, tid, false); - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&threads->lock); return th; } @@ -719,21 +743,24 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret; struct rb_node *nd; + size_t ret; + int i; - pthread_rwlock_rdlock(&machine->threads_lock); - - ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + pthread_rwlock_rdlock(&threads->lock); - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); + ret = fprintf(fp, "Threads: %u\n", threads->nr); - ret += thread__fprintf(pos, fp); - } + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); - pthread_rwlock_unlock(&machine->threads_lock); + ret += thread__fprintf(pos, fp); + } + pthread_rwlock_unlock(&threads->lock); + } return ret; } @@ -1479,23 +1506,25 @@ out_problem: static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) { - if (machine->last_match == th) - machine->last_match = NULL; + struct threads *threads = machine__threads(machine, th->tid); + + if (threads->last_match == th) + threads->last_match = NULL; BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&machine->threads_lock); - rb_erase_init(&th->rb_node, &machine->threads); + pthread_rwlock_wrlock(&threads->lock); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); - --machine->nr_threads; + --threads->nr; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor * will be called and we will remove it from the dead_threads list. */ - list_add_tail(&th->node, &machine->dead_threads); + list_add_tail(&th->node, &threads->dead); if (lock) - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&threads->lock); thread__put(th); } @@ -2140,21 +2169,26 @@ int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv) { + struct threads *threads; struct rb_node *nd; struct thread *thread; int rc = 0; + int i; - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - thread = rb_entry(nd, struct thread, rb_node); - rc = fn(thread, priv); - if (rc != 0) - return rc; - } + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + threads = &machine->threads[i]; + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + thread = rb_entry(nd, struct thread, rb_node); + rc = fn(thread, priv); + if (rc != 0) + return rc; + } - list_for_each_entry(thread, &machine->dead_threads, node) { - rc = fn(thread, priv); - if (rc != 0) - return rc; + list_for_each_entry(thread, &threads->dead, node) { + rc = fn(thread, priv); + if (rc != 0) + return rc; + } } return rc; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 3cdb1340f917..fe2f05848050 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[]; struct vdso_info; +#define THREADS__TABLE_BITS 8 +#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) + +struct threads { + struct rb_root entries; + pthread_rwlock_t lock; + unsigned int nr; + struct list_head dead; + struct thread *last_match; +}; + struct machine { struct rb_node rb_node; pid_t pid; @@ -30,11 +41,7 @@ struct machine { bool comm_exec; bool kptr_restrict_warned; char *root_dir; - struct rb_root threads; - pthread_rwlock_t threads_lock; - unsigned int nr_threads; - struct list_head dead_threads; - struct thread *last_match; + struct threads threads[THREADS__TABLE_SIZE]; struct vdso_info *vdso_info; struct perf_env *env; struct dsos dsos; @@ -48,6 +55,12 @@ struct machine { }; }; +static inline struct threads *machine__threads(struct machine *machine, pid_t tid) +{ + /* Cast it to handle tid == -1 */ + return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE]; +} + static inline struct map *__machine__kernel_map(struct machine *machine, enum map_type type) { diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index 808cc45611fe..b30746f5f613 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h @@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new __ilist->rblist.nr_entries) /* For 'struct machine->threads' */ -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ - DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ + DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \ + __machine->threads[hash_bucket].nr) #endif /* _PERF_RESORT_RB_H_ */ -- cgit v1.2.3 From 75e45e432052c7b1a5da866cff88192db8be1445 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Sep 2017 16:16:34 -0300 Subject: perf machine: Optimize a bit the machine__findnew_thread() methods In some cases we already have calculated the hash bucket, so reuse it. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-800zehjsyy03er4s4jf0e99v@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f4f926753209..ddeea05eae86 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -399,10 +399,10 @@ out_err: * lookup/new thread inserted. */ static struct thread *____machine__findnew_thread(struct machine *machine, + struct threads *threads, pid_t pid, pid_t tid, bool create) { - struct threads *threads = machine__threads(machine, tid); struct rb_node **p = &threads->entries.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -473,7 +473,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { - return ____machine__findnew_thread(machine, pid, tid, true); + return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true); } struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, @@ -495,7 +495,7 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid, struct thread *th; pthread_rwlock_rdlock(&threads->lock); - th = ____machine__findnew_thread(machine, pid, tid, false); + th = ____machine__findnew_thread(machine, threads, pid, tid, false); pthread_rwlock_unlock(&threads->lock); return th; } -- cgit v1.2.3 From 333b566559019b146905c623bde7f455c1d5add3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Sep 2017 14:50:06 -0700 Subject: perf pmu: Improve error messages for missing PMUs When a PMU is missing print a better error message mentioning the missing PMU. % mkdir empty % mount --bind empty /sys/devices/msr % perf stat -M Summary true event syntax error: '{inst_retired.any,cycles}:W,{cpu_clk_unhalted.thread}:W,{inst_retired.any}:W,{cpu_clk_unhalted.ref_tsc,msr/tsc/}:W,{fp_comp_ops_exe.sse_scalar..' \___ Cannot find PMU `msr'. Missing kernel support? It still cannot find the right column for aliases, but it's already a vast improvement. v2: Check asprintf Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170913215006.32222-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/parse-events.c | 18 ++++++++++++------ tools/perf/util/parse-events.h | 3 +++ 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 2d60114f1870..fa37ef79517a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -477,7 +477,7 @@ int metricgroup__parse_groups(const struct option *opt, memset(&parse_error, 0, sizeof(struct parse_events_error)); ret = parse_events(perf_evlist, extra_events.buf, &parse_error); if (ret) { - pr_err("Cannot set up events %s\n", extra_events.buf); + parse_events_print_error(&parse_error, extra_events.buf); goto out; } strbuf_release(&extra_events); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75588920fccc..9183913a6174 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1219,11 +1219,17 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; + struct parse_events_error *err = parse_state->error; LIST_HEAD(config_terms); pmu = perf_pmu__find(name); - if (!pmu) + if (!pmu) { + if (asprintf(&err->str, + "Cannot find PMU `%s'. Missing kernel support?", + name) < 0) + err->str = NULL; return -EINVAL; + } if (pmu->default_config) { memcpy(&attr, pmu->default_config, @@ -1733,8 +1739,8 @@ static int get_term_width(void) return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; } -static void parse_events_print_error(struct parse_events_error *err, - const char *event) +void parse_events_print_error(struct parse_events_error *err, + const char *event) { const char *str = "invalid or unsupported event: "; char _buf[MAX_WIDTH]; @@ -1789,8 +1795,6 @@ static void parse_events_print_error(struct parse_events_error *err, zfree(&err->str); zfree(&err->help); } - - fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } #undef MAX_WIDTH @@ -1802,8 +1806,10 @@ int parse_events_option(const struct option *opt, const char *str, struct parse_events_error err = { .idx = 0, }; int ret = parse_events(evlist, str, &err); - if (ret) + if (ret) { parse_events_print_error(&err, str); + fprintf(stderr, "Run 'perf list' for a list of valid events\n"); + } return ret; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 635135125111..3909ca0639f2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -202,6 +202,9 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +void parse_events_print_error(struct parse_events_error *err, + const char *event); + #ifdef HAVE_LIBELF_SUPPORT /* * If the probe point starts with '%', -- cgit v1.2.3 From c896f85a7c15ab9d040ffac8b8003e47996602a2 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Wed, 13 Sep 2017 21:14:19 +0200 Subject: perf tools: Fix leaking rec_argv in error cases Let's free the allocated rec_argv in case we return early, in order to avoid leaking memory. This adds free() at a few very similar places across the tree where it was missing. Signed-off-by: Martin Kepplinger Cc: Alexander Shishkin Cc: Martin kepplinger Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170913191419.29806-1-martink@posteo.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-mem.c | 1 + tools/perf/builtin-timechart.c | 4 +++- tools/perf/builtin-trace.c | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 475999e48f66..bb1ee22bd221 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2732,6 +2732,7 @@ static int perf_c2c__record(int argc, const char **argv) if (!perf_mem_events[j].supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events[j].name); + free(rec_argv); return -1; } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 0f15634ef82c..6940490bc3f9 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -112,6 +112,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (!perf_mem_events[j].supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(j)); + free(rec_argv); return -1; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 4e2e61695986..01de01ca14f2 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1732,8 +1732,10 @@ static int timechart__io_record(int argc, const char **argv) if (rec_argv == NULL) return -ENOMEM; - if (asprintf(&filter, "common_pid != %d", getpid()) < 0) + if (asprintf(&filter, "common_pid != %d", getpid()) < 0) { + free(rec_argv); return -ENOMEM; + } p = rec_argv; for (i = 0; i < common_args_nr; i++) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ee8c6e814437..967bd351b58d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2078,6 +2078,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv) rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; else { pr_err("Neither raw_syscalls nor syscalls events exist.\n"); + free(rec_argv); return -1; } } -- cgit v1.2.3 From 411bc316f3365363959c2c895af2618389534583 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:57:35 -0700 Subject: perf stat: Fix adding multiple event groups The -M metric group parser threw away the events of earlier groups when multiple groups were specified. Fix this here by not overwriting the string incorrectly. Now this works correctly: % perf stat -M Summary,SMT --metric-only -a sleep 1 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization SMT_2T_Utilization Kernel_Utilization CoreIPC CORE_CLKS 900907376.0 2.7 2398954144.0 0.1 0.0 0.2 0.2 0.1 0.4 2080822855.5 while previously it would only show the SMT metrics. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170914205735.18431-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index fa37ef79517a..0ddd9c199227 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -373,9 +373,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, int ret = -EINVAL; int i, j; - strbuf_init(events, 100); - strbuf_addf(events, "%s", ""); - if (!map) return 0; @@ -433,6 +430,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events, if (!nlist) return -ENOMEM; llist = nlist; + + strbuf_init(events, 100); + strbuf_addf(events, "%s", ""); + while ((p = strsep(&llist, ",")) != NULL) { ret = metricgroup__add_metric(p, events, group_list); if (ret == -EINVAL) { -- cgit v1.2.3 From 5c9295bfe6f5f59f3f2eee78f58b0523d117897e Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 19 Sep 2017 12:57:37 +0800 Subject: perf tests: Remove Intel CQM perf test Intel CQM perf test is obsolete for perf PMU code has been removed in commit c39a0e2c8850 ("x86/perf/cqm: Wipe out perf based cqm"). Signed-off-by: Xiaochen Shen Cc: Alexander Shishkin Cc: Fenghua Yu Cc: Matt Fleming Cc: Pei P Jia Cc: Peter Zijlstra Cc: Tony Luck Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1505797057-16300-1-git-send-email-xiaochen.shen@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 - tools/perf/arch/x86/tests/Build | 1 - tools/perf/arch/x86/tests/arch-tests.c | 4 - tools/perf/arch/x86/tests/intel-cqm.c | 127 ------------------------------- 4 files changed, 133 deletions(-) delete mode 100644 tools/perf/arch/x86/tests/intel-cqm.c (limited to 'tools/perf') diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 4e0b806a7a0f..01ad4208bcdf 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -8,7 +8,6 @@ struct test; int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); -int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index cbb7e978166b..8e2c5a38c3b9 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -5,4 +5,3 @@ libperf-y += arch-tests.o libperf-y += rdpmc.o libperf-y += perf-time-to-tsc.o libperf-$(CONFIG_AUXTRACE) += insn-x86.o -libperf-y += intel-cqm.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 99d66191e56c..271c0a0f95d7 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -23,10 +23,6 @@ struct test arch_tests[] = { .func = test__insn_x86, }, #endif - { - .desc = "Intel cqm nmi context read", - .func = test__intel_cqm_count_nmi_context, - }, { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c deleted file mode 100644 index 57f86b6e7d6f..000000000000 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "tests/tests.h" -#include "perf.h" -#include "cloexec.h" -#include "debug.h" -#include "evlist.h" -#include "evsel.h" -#include "arch-tests.h" - -#include -#include -#include -#include -#include - -static pid_t spawn(void) -{ - pid_t pid; - - pid = fork(); - if (pid) - return pid; - - while(1) - sleep(5); - return 0; -} - -/* - * Create an event group that contains both a sampled hardware - * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then - * wait for the hardware perf counter to overflow and generate a PMI, - * which triggers an event read for both of the events in the group. - * - * Since reading Intel CQM event counters requires sending SMP IPIs, the - * CQM pmu needs to handle the above situation gracefully, and return - * the last read counter value to avoid triggering a WARN_ON_ONCE() in - * smp_call_function_many() caused by sending IPIs from NMI context. - */ -int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused) -{ - struct perf_evlist *evlist = NULL; - struct perf_evsel *evsel = NULL; - struct perf_event_attr pe; - int i, fd[2], flag, ret; - size_t mmap_len; - void *event; - pid_t pid; - int err = TEST_FAIL; - - flag = perf_event_open_cloexec_flag(); - - evlist = perf_evlist__new(); - if (!evlist) { - pr_debug("perf_evlist__new failed\n"); - return TEST_FAIL; - } - - ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL); - if (ret) { - pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n"); - err = TEST_SKIP; - goto out; - } - - evsel = perf_evlist__first(evlist); - if (!evsel) { - pr_debug("perf_evlist__first failed\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = PERF_TYPE_HARDWARE; - pe.config = PERF_COUNT_HW_CPU_CYCLES; - pe.read_format = PERF_FORMAT_GROUP; - - pe.sample_period = 128; - pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; - - pid = spawn(); - - fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); - if (fd[0] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = evsel->attr.type; - pe.config = evsel->attr.config; - - fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); - if (fd[1] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - /* - * Pick a power-of-two number of pages + 1 for the meta-data - * page (struct perf_event_mmap_page). See tools/perf/design.txt. - */ - mmap_len = page_size * 65; - - event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); - if (event == (void *)(-1)) { - pr_debug("failed to mmap %d\n", errno); - goto out; - } - - sleep(1); - - err = TEST_OK; - - munmap(event, mmap_len); - - for (i = 0; i < 2; i++) - close(fd[i]); - - kill(pid, SIGKILL); - wait(NULL); -out: - perf_evlist__delete(evlist); - return err; -} -- cgit v1.2.3 From 5a54c2f5e1717264f6e24687f703937eaca5f55d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Sep 2017 12:30:36 -0300 Subject: perf trace beauty madvise: Generate 'behavior' string table from kernel headers This is one more case where the way that syscall parameter values are defined in kernel headers are easy to parse using a shell script that will then generate the string table that gets used by the madvise 'behaviour' argument beautifier. This way as soon as the header syncronization mechanism in perf's build system detects a change in a copy of a kernel ABI header and that file is syncronized, we get 'perf trace' updated automagically. So, when we syncronize this: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman-common.h' differs from latest version at 'include/uapi/asm-generic/mman-common.h' We'll get these: #define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Rik van Riel Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-dolb0ghds4ui7wc1npgkchvc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 +++++++ tools/perf/trace/beauty/madvise_behavior.sh | 10 ++++++++ tools/perf/trace/beauty/mmap.c | 38 +++++++++-------------------- 3 files changed, 31 insertions(+), 26 deletions(-) create mode 100755 tools/perf/trace/beauty/madvise_behavior.sh (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1df93b4c4648..5f7408118a2d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -441,6 +441,13 @@ perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh $(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl) $(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@ +madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c +madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ +madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh + +$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl) + $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -541,6 +548,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(sndrv_ctl_ioctl_array) \ $(kvm_ioctl_array) \ $(vhost_virtio_ioctl_array) \ + $(madvise_behavior_array) \ $(perf_ioctl_array) $(OUTPUT)%.o: %.c prepare FORCE @@ -814,6 +822,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ + $(OUTPUT)$(madvise_behavior_array) \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh new file mode 100755 index 000000000000..60ef8640ee70 --- /dev/null +++ b/tools/perf/trace/beauty/madvise_behavior.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *madvise_advices[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*' +egrep $regex ${header_dir}/mman-common.h | \ + sed -r "s/$regex/\2 \1/g" | \ + sort -n | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 754558f9009d..45b1e0c0018f 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -94,35 +94,21 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags +static size_t madvise__scnprintf_behavior(int behavior, char *bf, size_t size) +{ +#include "trace/beauty/generated/madvise_behavior_array.c" + static DEFINE_STRARRAY(madvise_advices); + + if (behavior < strarray__madvise_advices.nr_entries && strarray__madvise_advices.entries[behavior] != NULL) + return scnprintf(bf, size, "MADV_%s", strarray__madvise_advices.entries[behavior]); + + return scnprintf(bf, size, "%#", behavior); +} + static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, struct syscall_arg *arg) { - int behavior = arg->val; - - switch (behavior) { -#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) - P_MADV_BHV(NORMAL); - P_MADV_BHV(RANDOM); - P_MADV_BHV(SEQUENTIAL); - P_MADV_BHV(WILLNEED); - P_MADV_BHV(DONTNEED); - P_MADV_BHV(FREE); - P_MADV_BHV(REMOVE); - P_MADV_BHV(DONTFORK); - P_MADV_BHV(DOFORK); - P_MADV_BHV(HWPOISON); - P_MADV_BHV(SOFT_OFFLINE); - P_MADV_BHV(MERGEABLE); - P_MADV_BHV(UNMERGEABLE); - P_MADV_BHV(HUGEPAGE); - P_MADV_BHV(NOHUGEPAGE); - P_MADV_BHV(DONTDUMP); - P_MADV_BHV(DODUMP); -#undef P_MADV_BHV - default: break; - } - - return scnprintf(bf, size, "%#x", behavior); + return madvise__scnprintf_behavior(arg->val, bf, size); } #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior -- cgit v1.2.3 From 0e1eed80885fdb09993bffb16f5662f4b53c1a08 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Sep 2017 16:41:34 -0300 Subject: perf tools: Get all of tools/{arch,include}/ in the MANIFEST Now that I'm switching the container builds from using a local volume pointing to the kernel repository with the perf sources, instead getting a detached tarball to be able to use a container cluster, some places broke because I forgot to put some of the required files in tools/perf/MANIFEST, namely some bitsperlong.h files. So, to fix it do the same as for tools/build/ and pack the whole tools/arch/ directory. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wmenpjfjsobwdnfde30qqncj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 87 ++--------------------------------------------------- 1 file changed, 2 insertions(+), 85 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 62072822dc85..627b7cada144 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,34 +1,8 @@ tools/perf -tools/arch/alpha/include/asm/barrier.h -tools/arch/arm/include/asm/barrier.h -tools/arch/arm64/include/asm/barrier.h -tools/arch/ia64/include/asm/barrier.h -tools/arch/mips/include/asm/barrier.h -tools/arch/powerpc/include/asm/barrier.h -tools/arch/s390/include/asm/barrier.h -tools/arch/sh/include/asm/barrier.h -tools/arch/sparc/include/asm/barrier.h -tools/arch/sparc/include/asm/barrier_32.h -tools/arch/sparc/include/asm/barrier_64.h -tools/arch/tile/include/asm/barrier.h -tools/arch/x86/include/asm/barrier.h -tools/arch/x86/include/asm/cmpxchg.h -tools/arch/x86/include/asm/cpufeatures.h -tools/arch/x86/include/asm/disabled-features.h -tools/arch/x86/include/asm/required-features.h -tools/arch/x86/include/uapi/asm/svm.h -tools/arch/x86/include/uapi/asm/vmx.h -tools/arch/x86/include/uapi/asm/kvm.h -tools/arch/x86/include/uapi/asm/kvm_perf.h -tools/arch/x86/lib/memcpy_64.S -tools/arch/x86/lib/memset_64.S -tools/arch/s390/include/uapi/asm/kvm_perf.h -tools/arch/s390/include/uapi/asm/sie.h -tools/arch/xtensa/include/asm/barrier.h +tools/arch tools/scripts tools/build -tools/arch/x86/include/asm/atomic.h -tools/arch/x86/include/asm/rmwcc.h +tools/include tools/lib/traceevent tools/lib/api tools/lib/bpf @@ -42,60 +16,3 @@ tools/lib/find_bit.c tools/lib/bitmap.c tools/lib/str_error_r.c tools/lib/vsprintf.c -tools/include/asm/alternative-asm.h -tools/include/asm/atomic.h -tools/include/asm/barrier.h -tools/include/asm/bug.h -tools/include/asm-generic/atomic-gcc.h -tools/include/asm-generic/barrier.h -tools/include/asm-generic/bitops/arch_hweight.h -tools/include/asm-generic/bitops/atomic.h -tools/include/asm-generic/bitops/const_hweight.h -tools/include/asm-generic/bitops/__ffs.h -tools/include/asm-generic/bitops/__ffz.h -tools/include/asm-generic/bitops/__fls.h -tools/include/asm-generic/bitops/find.h -tools/include/asm-generic/bitops/fls64.h -tools/include/asm-generic/bitops/fls.h -tools/include/asm-generic/bitops/hweight.h -tools/include/asm-generic/bitops.h -tools/include/linux/atomic.h -tools/include/linux/bitops.h -tools/include/linux/compiler.h -tools/include/linux/compiler-gcc.h -tools/include/linux/coresight-pmu.h -tools/include/linux/bug.h -tools/include/linux/filter.h -tools/include/linux/hash.h -tools/include/linux/kernel.h -tools/include/linux/list.h -tools/include/linux/log2.h -tools/include/uapi/asm-generic/fcntl.h -tools/include/uapi/asm-generic/ioctls.h -tools/include/uapi/asm-generic/mman-common.h -tools/include/uapi/asm-generic/mman.h -tools/include/uapi/drm/drm.h -tools/include/uapi/drm/i915_drm.h -tools/include/uapi/linux/bpf.h -tools/include/uapi/linux/bpf_common.h -tools/include/uapi/linux/fcntl.h -tools/include/uapi/linux/hw_breakpoint.h -tools/include/uapi/linux/kvm.h -tools/include/uapi/linux/mman.h -tools/include/uapi/linux/perf_event.h -tools/include/uapi/linux/sched.h -tools/include/uapi/linux/stat.h -tools/include/uapi/linux/vhost.h -tools/include/uapi/sound/asound.h -tools/include/linux/poison.h -tools/include/linux/rbtree.h -tools/include/linux/rbtree_augmented.h -tools/include/linux/refcount.h -tools/include/linux/string.h -tools/include/linux/stringify.h -tools/include/linux/types.h -tools/include/linux/err.h -tools/include/linux/bitmap.h -tools/include/linux/time64.h -tools/arch/*/include/uapi/asm/mman.h -tools/arch/*/include/uapi/asm/perf_regs.h -- cgit v1.2.3 From 0a7c74eae307894c6c95316c382f118aef8481e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Apr 2017 13:15:04 -0300 Subject: perf tools: Provide mutex wrappers for pthreads rwlocks Andi reported a performance drop in single threaded perf tools such as 'perf script' due to the growing number of locks being put in place to allow for multithreaded tools, so wrap the POSIX threads rwlock routines with the names used for such kinds of locks in the Linux kernel and then allow for tools to ask for those locks to be used or not. I.e. a tool may have a multithreaded phase and then switch to single threaded, like the upcoming patches for the synthesizing of PERF_RECORD_{FORK,MMAP,etc} for pre-existing processes to then switch to single threaded mode in 'perf top'. The init routines will not be conditional, this way starting as single threaded to then move to multi threaded mode should be possible. Reported-by: Andi Kleen Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20170404161739.GH12903@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 1 - tools/perf/builtin-script.c | 2 ++ tools/perf/util/Build | 1 + tools/perf/util/dso.c | 12 +++++------ tools/perf/util/dso.h | 4 ++-- tools/perf/util/machine.c | 41 +++++++++++++++++++------------------- tools/perf/util/machine.h | 3 ++- tools/perf/util/map.c | 34 +++++++++++++++---------------- tools/perf/util/map.h | 3 ++- tools/perf/util/rwsem.c | 32 +++++++++++++++++++++++++++++ tools/perf/util/rwsem.h | 19 ++++++++++++++++++ tools/perf/util/symbol.c | 8 ++++---- tools/perf/util/thread.c | 4 ++-- tools/perf/util/trace-event-info.c | 1 - tools/perf/util/trace-event-read.c | 1 - tools/perf/util/util.c | 13 ++++++++++++ tools/perf/util/util.h | 5 +++++ tools/perf/util/vdso.c | 4 ++-- 18 files changed, 130 insertions(+), 58 deletions(-) create mode 100644 tools/perf/util/rwsem.c create mode 100644 tools/perf/util/rwsem.h (limited to 'tools/perf') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index f309c3773522..c747a1af49fe 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -34,7 +34,6 @@ #include #include #include -#include #include static const char *get_filename_for_perf_kvm(void) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 725dbd3dd104..9092de0f7238 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2829,6 +2829,8 @@ int cmd_script(int argc, const char **argv) NULL }; + perf_set_singlethreaded(); + setup_scripting(); argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 71ab8466714d..369c3163e68c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -79,6 +79,7 @@ libperf-y += data.o libperf-y += tsc.o libperf-y += cloexec.o libperf-y += call-path.o +libperf-y += rwsem.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ffd723179a4b..339e52971380 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1366,9 +1366,9 @@ void __dsos__add(struct dsos *dsos, struct dso *dso) void dsos__add(struct dsos *dsos, struct dso *dso) { - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); __dsos__add(dsos, dso); - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); } struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) @@ -1387,9 +1387,9 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { struct dso *dso; - pthread_rwlock_rdlock(&dsos->lock); + down_read(&dsos->lock); dso = __dsos__find(dsos, name, cmp_short); - pthread_rwlock_unlock(&dsos->lock); + up_read(&dsos->lock); return dso; } @@ -1416,9 +1416,9 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name) struct dso *dsos__findnew(struct dsos *dsos, const char *name) { struct dso *dso; - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); dso = dso__get(__dsos__findnew(dsos, name)); - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); return dso; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index f886141678eb..a2bbb21f301c 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include "rwsem.h" #include #include #include "map.h" @@ -129,7 +129,7 @@ struct dso_cache { struct dsos { struct list_head head; struct rb_root root; /* rbtree root sorted by long name */ - pthread_rwlock_t lock; + struct rw_semaphore lock; }; struct auxtrace_cache; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ddeea05eae86..585b4a3d64a4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -30,7 +30,7 @@ static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); dsos->root = RB_ROOT; - pthread_rwlock_init(&dsos->lock, NULL); + init_rwsem(&dsos->lock); } static void machine__threads_init(struct machine *machine) @@ -40,7 +40,7 @@ static void machine__threads_init(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; threads->entries = RB_ROOT; - pthread_rwlock_init(&threads->lock, NULL); + init_rwsem(&threads->lock); threads->nr = 0; INIT_LIST_HEAD(&threads->dead); threads->last_match = NULL; @@ -130,7 +130,7 @@ static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); @@ -139,13 +139,13 @@ static void dsos__purge(struct dsos *dsos) dso__put(pos); } - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); } static void dsos__exit(struct dsos *dsos) { dsos__purge(dsos); - pthread_rwlock_destroy(&dsos->lock); + exit_rwsem(&dsos->lock); } void machine__delete_threads(struct machine *machine) @@ -155,7 +155,7 @@ void machine__delete_threads(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - pthread_rwlock_wrlock(&threads->lock); + down_write(&threads->lock); nd = rb_first(&threads->entries); while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); @@ -163,7 +163,7 @@ void machine__delete_threads(struct machine *machine) nd = rb_next(nd); __machine__remove_thread(machine, t, false); } - pthread_rwlock_unlock(&threads->lock); + up_write(&threads->lock); } } @@ -180,7 +180,7 @@ void machine__exit(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - pthread_rwlock_destroy(&threads->lock); + exit_rwsem(&threads->lock); } } @@ -482,9 +482,9 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&threads->lock); + down_write(&threads->lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&threads->lock); + up_write(&threads->lock); return th; } @@ -494,9 +494,9 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid, struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&threads->lock); + down_read(&threads->lock); th = ____machine__findnew_thread(machine, threads, pid, tid, false); - pthread_rwlock_unlock(&threads->lock); + up_read(&threads->lock); return th; } @@ -588,7 +588,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, { struct dso *dso; - pthread_rwlock_wrlock(&machine->dsos.lock); + down_write(&machine->dsos.lock); dso = __dsos__find(&machine->dsos, m->name, true); if (!dso) { @@ -602,7 +602,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__get(dso); out_unlock: - pthread_rwlock_unlock(&machine->dsos.lock); + up_write(&machine->dsos.lock); return dso; } @@ -749,7 +749,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - pthread_rwlock_rdlock(&threads->lock); + + down_read(&threads->lock); ret = fprintf(fp, "Threads: %u\n", threads->nr); @@ -759,7 +760,7 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) ret += thread__fprintf(pos, fp); } - pthread_rwlock_unlock(&threads->lock); + up_read(&threads->lock); } return ret; } @@ -1319,7 +1320,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *kernel = NULL; struct dso *dso; - pthread_rwlock_rdlock(&machine->dsos.lock); + down_read(&machine->dsos.lock); list_for_each_entry(dso, &machine->dsos.head, node) { @@ -1349,7 +1350,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, break; } - pthread_rwlock_unlock(&machine->dsos.lock); + up_read(&machine->dsos.lock); if (kernel == NULL) kernel = machine__findnew_dso(machine, kmmap_prefix); @@ -1513,7 +1514,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&threads->lock); + down_write(&threads->lock); rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); --threads->nr; @@ -1524,7 +1525,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, */ list_add_tail(&th->node, &threads->dead); if (lock) - pthread_rwlock_unlock(&threads->lock); + up_write(&threads->lock); thread__put(th); } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index fe2f05848050..b1cd516f2025 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -6,6 +6,7 @@ #include "map.h" #include "dso.h" #include "event.h" +#include "rwsem.h" struct addr_location; struct branch_stack; @@ -28,7 +29,7 @@ struct vdso_info; struct threads { struct rb_root entries; - pthread_rwlock_t lock; + struct rw_semaphore lock; unsigned int nr; struct list_head dead; struct thread *last_match; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index bdaa0a4edc17..5792d7a78152 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -488,7 +488,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; - pthread_rwlock_init(&maps->lock, NULL); + init_rwsem(&maps->lock); } void map_groups__init(struct map_groups *mg, struct machine *machine) @@ -517,9 +517,9 @@ static void __maps__purge(struct maps *maps) static void maps__exit(struct maps *maps) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__purge(maps); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } void map_groups__exit(struct map_groups *mg) @@ -586,7 +586,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct symbol *sym; struct rb_node *nd; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); @@ -602,7 +602,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, sym = NULL; out: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return sym; } @@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) size_t printed = 0; struct rb_node *nd; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); @@ -650,7 +650,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) } } - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return printed; } @@ -682,7 +682,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp struct rb_node *next; int err = 0; - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); root = &maps->entries; next = rb_first(root); @@ -750,7 +750,7 @@ put_map: err = 0; out: - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); return err; } @@ -771,7 +771,7 @@ int map_groups__clone(struct thread *thread, struct map *map; struct maps *maps = &parent->maps[type]; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { struct map *new = map__clone(map); @@ -788,7 +788,7 @@ int map_groups__clone(struct thread *thread, err = 0; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return err; } @@ -815,9 +815,9 @@ static void __maps__insert(struct maps *maps, struct map *map) void maps__insert(struct maps *maps, struct map *map) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__insert(maps, map); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } static void __maps__remove(struct maps *maps, struct map *map) @@ -828,9 +828,9 @@ static void __maps__remove(struct maps *maps, struct map *map) void maps__remove(struct maps *maps, struct map *map) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__remove(maps, map); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } struct map *maps__find(struct maps *maps, u64 ip) @@ -838,7 +838,7 @@ struct map *maps__find(struct maps *maps, u64 ip) struct rb_node **p, *parent = NULL; struct map *m; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); p = &maps->entries.rb_node; while (*p != NULL) { @@ -854,7 +854,7 @@ struct map *maps__find(struct maps *maps, u64 ip) m = NULL; out: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return m; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 73aacf7a7dc4..d5d7442dac7a 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -9,6 +9,7 @@ #include #include #include +#include "rwsem.h" enum map_type { MAP__FUNCTION = 0, @@ -61,7 +62,7 @@ struct kmap { struct maps { struct rb_root entries; - pthread_rwlock_t lock; + struct rw_semaphore lock; }; struct map_groups { diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c new file mode 100644 index 000000000000..5e52e7baa7b6 --- /dev/null +++ b/tools/perf/util/rwsem.c @@ -0,0 +1,32 @@ +#include "util.h" +#include "rwsem.h" + +int init_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_init(&sem->lock, NULL); +} + +int exit_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_destroy(&sem->lock); +} + +int down_read(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock); +} + +int up_read(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +} + +int down_write(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock); +} + +int up_write(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +} diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h new file mode 100644 index 000000000000..94565ad4d494 --- /dev/null +++ b/tools/perf/util/rwsem.h @@ -0,0 +1,19 @@ +#ifndef _PERF_RWSEM_H +#define _PERF_RWSEM_H + +#include + +struct rw_semaphore { + pthread_rwlock_t lock; +}; + +int init_rwsem(struct rw_semaphore *sem); +int exit_rwsem(struct rw_semaphore *sem); + +int down_read(struct rw_semaphore *sem); +int up_read(struct rw_semaphore *sem); + +int down_write(struct rw_semaphore *sem); +int up_write(struct rw_semaphore *sem); + +#endif /* _PERF_RWSEM_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5909ee4c7ade..066e38aa4063 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -226,7 +226,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) struct maps *maps = &mg->maps[type]; struct map *next, *curr; - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); curr = maps__first(maps); if (curr == NULL) @@ -246,7 +246,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) curr->end = ~0ULL; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) @@ -1671,7 +1671,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg, struct maps *maps = &mg->maps[type]; struct map *map; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { if (map->dso && strcmp(map->dso->short_name, name) == 0) @@ -1681,7 +1681,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg, map = NULL; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return map; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index aee9a42102ba..c09bdb509d82 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -264,7 +264,7 @@ static int __thread__prepare_access(struct thread *thread) struct maps *maps = &thread->mg->maps[i]; struct map *map; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { err = unwind__prepare_access(thread, map, &initialized); @@ -272,7 +272,7 @@ static int __thread__prepare_access(struct thread *thread) break; } - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); } return err; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index e7d60d05596d..d7f2113462fb 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8a9a677f7576..40b425949aa3 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 3c9c39711343..97e0c8e26477 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -23,6 +23,19 @@ /* * XXX We need to find a better place for these things... */ + +bool perf_singlethreaded = true; + +void perf_set_singlethreaded(void) +{ + perf_singlethreaded = true; +} + +void perf_set_multithreaded(void) +{ + perf_singlethreaded = false; +} + unsigned int page_size; int cacheline_size; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 03946d5e4741..6c7e6cc902bb 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -62,4 +62,9 @@ int sched_getcpu(void); int setns(int fd, int nstype); #endif +extern bool perf_singlethreaded; + +void perf_set_singlethreaded(void); +void perf_set_multithreaded(void); + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index d3c39eec89a8..f5f843d3c22f 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -319,7 +319,7 @@ struct dso *machine__findnew_vdso(struct machine *machine, struct vdso_info *vdso_info; struct dso *dso = NULL; - pthread_rwlock_wrlock(&machine->dsos.lock); + down_write(&machine->dsos.lock); if (!machine->vdso_info) machine->vdso_info = vdso_info__new(); @@ -347,7 +347,7 @@ struct dso *machine__findnew_vdso(struct machine *machine, out_unlock: dso__get(dso); - pthread_rwlock_unlock(&machine->dsos.lock); + up_write(&machine->dsos.lock); return dso; } -- cgit v1.2.3 From 10836d9f9ac63d40ccfa756f871ce4ed51ae3b52 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:30 +0200 Subject: perf tests attr: Fix task term values The perf_event_attr::task is 1 by default for first (tracking) event in the session. Setting task=1 as default and adding task=0 for cases that need it. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas-Mich Richter Link: http://lkml.kernel.org/r/20170703145030.12903-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/test-record-group | 1 + tools/perf/tests/attr/test-record-group-sampling | 2 +- tools/perf/tests/attr/test-record-group1 | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 31e0b1da830b..37940665f736 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -23,7 +23,7 @@ comm=1 freq=1 inherit_stat=0 enable_on_exec=1 -task=0 +task=1 watermark=0 precise_ip=0|1|2|3 mmap_data=0 diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group index 6e7961f6f7a5..618ba1c17474 100644 --- a/tools/perf/tests/attr/test-record-group +++ b/tools/perf/tests/attr/test-record-group @@ -17,5 +17,6 @@ sample_type=327 read_format=4 mmap=0 comm=0 +task=0 enable_on_exec=0 disabled=0 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index ef59afd6d635..f906b793196f 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -23,7 +23,7 @@ sample_type=343 # PERF_FORMAT_ID | PERF_FORMAT_GROUP read_format=12 - +task=0 mmap=0 comm=0 enable_on_exec=0 diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1 index 87a222d014d8..48e8bd12fe46 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/attr/test-record-group1 @@ -18,5 +18,6 @@ sample_type=327 read_format=4 mmap=0 comm=0 +task=0 enable_on_exec=0 disabled=0 -- cgit v1.2.3 From 3440fe2790aa3d13530260af6033533b18959aee Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 13 Sep 2017 10:12:08 +0200 Subject: perf test attr: Fix python error on empty result Commit d78ada4a767 ("perf tests attr: Do not store failed events") does not create an event file in the /tmp directory when the perf_open_event() system call failed. This can lead to a situation where not /tmp/event-xx-yy-zz result file exists at all (for example on a s390x virtual machine environment) where no CPUMF hardware is available. The following command then fails with a python call back chain instead of printing failure: [root@s8360046 perf]# /usr/bin/python2 ./tests/attr.py -d ./tests/attr/ \ -p ./perf -v -ttest-stat-basic running './tests/attr//test-stat-basic' Traceback (most recent call last): File "./tests/attr.py", line 379, in main() File "./tests/attr.py", line 370, in main run_tests(options) File "./tests/attr.py", line 311, in run_tests Test(f, options).run() File "./tests/attr.py", line 300, in run self.compare(self.expect, self.result) File "./tests/attr.py", line 248, in compare exp_event.diff(res_event) UnboundLocalError: local variable 'res_event' referenced before assignment [root@s8360046 perf]# This patch catches this pitfall and prints an error message instead: [root@s8360047 perf]# /usr/bin/python2 ./tests/attr.py -d ./tests/attr/ \ -p ./perf -vvv -ttest-stat-basic running './tests/attr//test-stat-basic' loading expected events Event event:base-stat fd = 1 group_fd = -1 flags = 0|8 [....] sample_regs_user = 0 sample_stack_user = 0 'PERF_TEST_ATTR=/tmp/tmpJbMQMP ./perf stat -o /tmp/tmpJbMQMP/perf.data -e cycles kill >/dev/null 2>&1' ret '1', expected '1' loading result events compare matching [event:base-stat] match: [event:base-stat] matches [] res_event is empty FAILED './tests/attr//test-stat-basic' - match failure [root@s8360047 perf]# Signed-off-by: Thomas-Mich Richter Acked-by: Jiri Olsa Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Thomas-Mich Richter LPU-Reference: 20170913081209.39570-1-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-04d63nn7svfgxdhi60gq2mlm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index 6bb50e82a3e3..a13cd780148e 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -237,6 +237,7 @@ class Test(object): # events in result. Fail if there's not any. for exp_name, exp_event in expect.items(): exp_list = [] + res_event = {} log.debug(" matching [%s]" % exp_name) for res_name, res_event in result.items(): log.debug(" to [%s]" % res_name) @@ -253,7 +254,10 @@ class Test(object): if exp_event.optional(): log.debug(" %s does not match, but is optional" % exp_name) else: - exp_event.diff(res_event) + if not res_event: + log.debug(" res_event is empty"); + else: + exp_event.diff(res_event) raise Fail(self, 'match failure'); match[exp_name] = exp_list -- cgit v1.2.3 From 22905582f6dd4bbd0c370fe5732c607452010c04 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 13 Sep 2017 10:12:09 +0200 Subject: perf test attr: Fix ignored test case result Command perf test -v 16 (Setup struct perf_event_attr test) always reports success even if the test case fails. It works correctly if you also specify -F (for don't fork). root@s35lp76 perf]# ./perf test -v 16 15: Setup struct perf_event_attr : --- start --- running './tests/attr/test-record-no-delay' [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB /tmp/tmp4E1h7R/perf.data (1 samples) ] expected task=0, got 1 expected precise_ip=0, got 3 expected wakeup_events=1, got 0 FAILED './tests/attr/test-record-no-delay' - match failure test child finished with 0 ---- end ---- Setup struct perf_event_attr: Ok The reason for the wrong error reporting is the return value of the system() library call. It is called in run_dir() file tests/attr.c and returns the exit status, in above case 0xff00. This value is given as parameter to the exit() function which can only handle values 0-0xff. The child process terminates with exit value of 0 and the parent does not detect any error. This patch corrects the error reporting and prints the correct test result. Signed-off-by: Thomas-Mich Richter Acked-by: Jiri Olsa Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Thomas-Mich Richter LPU-Reference: 20170913081209.39570-2-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-rdube6rfcjsr1nzue72c7lqn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index c9aafed7da15..25ede4472465 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -166,7 +166,7 @@ static int run_dir(const char *d, const char *perf) snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", d, d, perf, vcnt, v); - return system(cmd); + return system(cmd) ? TEST_FAIL : TEST_OK; } int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) -- cgit v1.2.3 From b32ee9e522f7ba26339856a047cfe9efc0be0ff3 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 29 Sep 2017 07:47:52 -0700 Subject: perf tools: Lock to protect namespaces and comm list Add two locks to protect namespaces_list and comm_list. The lock is only needed for multithreaded code, so using mutex wrappers provided by perf tool. Not all the comm_list/namespaces_list accessing are protected, e.g. thread__exec_comm. Because the multithread code for perf top event synthesizing does not touch them. They don't need a lock. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: He Kuang Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1506696477-146932-2-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 53 +++++++++++++++++++++++++++++++++++++++++++----- tools/perf/util/thread.h | 3 +++ 2 files changed, 51 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index c09bdb509d82..bf73117b4822 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -45,6 +45,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->cpu = -1; INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); + init_rwsem(&thread->namespaces_lock); + init_rwsem(&thread->comm_lock); comm_str = malloc(32); if (!comm_str) @@ -83,18 +85,26 @@ void thread__delete(struct thread *thread) map_groups__put(thread->mg); thread->mg = NULL; } + down_write(&thread->namespaces_lock); list_for_each_entry_safe(namespaces, tmp_namespaces, &thread->namespaces_list, list) { list_del(&namespaces->list); namespaces__free(namespaces); } + up_write(&thread->namespaces_lock); + + down_write(&thread->comm_lock); list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { list_del(&comm->list); comm__free(comm); } + up_write(&thread->comm_lock); + unwind__finish_access(thread); nsinfo__zput(thread->nsinfo); + exit_rwsem(&thread->namespaces_lock); + exit_rwsem(&thread->comm_lock); free(thread); } @@ -125,8 +135,8 @@ struct namespaces *thread__namespaces(const struct thread *thread) return list_first_entry(&thread->namespaces_list, struct namespaces, list); } -int thread__set_namespaces(struct thread *thread, u64 timestamp, - struct namespaces_event *event) +static int __thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event) { struct namespaces *new, *curr = thread__namespaces(thread); @@ -149,6 +159,17 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp, return 0; } +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event) +{ + int ret; + + down_write(&thread->namespaces_lock); + ret = __thread__set_namespaces(thread, timestamp, event); + up_write(&thread->namespaces_lock); + return ret; +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) @@ -170,8 +191,8 @@ struct comm *thread__exec_comm(const struct thread *thread) return last; } -int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, - bool exec) +static int ____thread__set_comm(struct thread *thread, const char *str, + u64 timestamp, bool exec) { struct comm *new, *curr = thread__comm(thread); @@ -195,6 +216,17 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, return 0; } +int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, + bool exec) +{ + int ret; + + down_write(&thread->comm_lock); + ret = ____thread__set_comm(thread, str, timestamp, exec); + up_write(&thread->comm_lock); + return ret; +} + int thread__set_comm_from_proc(struct thread *thread) { char path[64]; @@ -212,7 +244,7 @@ int thread__set_comm_from_proc(struct thread *thread) return err; } -const char *thread__comm_str(const struct thread *thread) +static const char *__thread__comm_str(const struct thread *thread) { const struct comm *comm = thread__comm(thread); @@ -222,6 +254,17 @@ const char *thread__comm_str(const struct thread *thread) return comm__str(comm); } +const char *thread__comm_str(const struct thread *thread) +{ + const char *str; + + down_read((struct rw_semaphore *)&thread->comm_lock); + str = __thread__comm_str(thread); + up_read((struct rw_semaphore *)&thread->comm_lock); + + return str; +} + /* CHECKME: it should probably better return the max comm len from its comm list */ int thread__comm_len(struct thread *thread) { diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index cb1a5dd5c2b9..10555d6a0b86 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -9,6 +9,7 @@ #include "symbol.h" #include #include +#include "rwsem.h" struct thread_stack; struct unwind_libunwind_ops; @@ -29,7 +30,9 @@ struct thread { int comm_len; bool dead; /* if set thread has exited */ struct list_head namespaces_list; + struct rw_semaphore namespaces_lock; struct list_head comm_list; + struct rw_semaphore comm_lock; u64 db_id; void *priv; -- cgit v1.2.3 From f988e71bc6220d8b404dbd43c0e0962e30305795 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 29 Sep 2017 07:47:53 -0700 Subject: perf tools: Lock to protect comm_str rb tree Add comm_str_lock to protect comm_str rb tree. The lock is only needed for multithreaded code, so using mutex wrappers provided by perf tool. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: He Kuang Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1506696477-146932-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/comm.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 7bc981b6bf29..756a9c14efbb 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -5,6 +5,7 @@ #include #include #include +#include "rwsem.h" struct comm_str { char *str; @@ -14,6 +15,7 @@ struct comm_str { /* Should perhaps be moved to struct machine */ static struct rb_root comm_str_root; +static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,}; static struct comm_str *comm_str__get(struct comm_str *cs) { @@ -25,7 +27,9 @@ static struct comm_str *comm_str__get(struct comm_str *cs) static void comm_str__put(struct comm_str *cs) { if (cs && refcount_dec_and_test(&cs->refcnt)) { + down_write(&comm_str_lock); rb_erase(&cs->rb_node, &comm_str_root); + up_write(&comm_str_lock); zfree(&cs->str); free(cs); } @@ -50,7 +54,8 @@ static struct comm_str *comm_str__alloc(const char *str) return cs; } -static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) +static +struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -81,6 +86,17 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) return new; } +static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) +{ + struct comm_str *cs; + + down_write(&comm_str_lock); + cs = __comm_str__findnew(str, root); + up_write(&comm_str_lock); + + return cs; +} + struct comm *comm__new(const char *str, u64 timestamp, bool exec) { struct comm *comm = zalloc(sizeof(*comm)); -- cgit v1.2.3 From 340b47f510bbe55a76b7309107276f02ea11f117 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 29 Sep 2017 07:47:54 -0700 Subject: perf top: Implement multithreading for perf_event__synthesize_threads The proc files which is sorted with alphabetical order are evenly assigned to several synthesize threads to be processed in parallel. For 'perf top', the threads number hard code to online CPU number. The following patch will introduce an option to set it. For other perf tools, the thread number is 1. Because the process function is not ready for multithreading, e.g. process_synthesized_event. This patch series only support event synthesize multithreading for 'perf top'. For other tools, it can be done separately later. With multithread applied, the total processing time can get up to 1.56x speedup on Knights Mill for 'perf top'. For specific single event processing, the processing time could increase because of the lock contention. So proc_map_timeout may need to be increased. Otherwise some proc maps will be truncated. Based on my test, increasing the proc_map_timeout has small impact on the total processing time. The total processing time still get 1.49x speedup on Knights Mill after increasing the proc_map_timeout. The patch itself doesn't increase the proc_map_timeout. Doesn't need to implement multithreading for per task monitoring, perf_event__synthesize_thread_map. It doesn't have performance issue. Committer testing: # getconf _NPROCESSORS_ONLN 4 # perf trace --no-inherit -e clone -o /tmp/output perf top # tail -4 /tmp/bla 0.124 ( 0.041 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3eb3a8f30, parent_tidptr: 0x7fc3eb3a99d0, child_tidptr: 0x7fc3eb3a99d0, tls: 0x7fc3eb3a9700) = 9548 (perf) 0.246 ( 0.023 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3eaba7f30, parent_tidptr: 0x7fc3eaba89d0, child_tidptr: 0x7fc3eaba89d0, tls: 0x7fc3eaba8700) = 9549 (perf) 0.286 ( 0.019 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3ea3a6f30, parent_tidptr: 0x7fc3ea3a79d0, child_tidptr: 0x7fc3ea3a79d0, tls: 0x7fc3ea3a7700) = 9550 (perf) 246.540 ( 0.047 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3ea3a6f30, parent_tidptr: 0x7fc3ea3a79d0, child_tidptr: 0x7fc3ea3a79d0, tls: 0x7fc3ea3a7700) = 9551 (perf) # Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: He Kuang Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1506696477-146932-4-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 3 +- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-top.c | 8 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/mmap-thread-lookup.c | 2 +- tools/perf/util/event.c | 160 +++++++++++++++++++++++++++------- tools/perf/util/event.h | 3 +- tools/perf/util/machine.c | 8 +- tools/perf/util/machine.h | 9 +- 9 files changed, 155 insertions(+), 42 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index c747a1af49fe..721f4f91291a 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1441,7 +1441,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, perf_session__set_id_hdr_size(kvm->session); ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, - kvm->evlist->threads, false, kvm->opts.proc_map_timeout); + kvm->evlist->threads, false, + kvm->opts.proc_map_timeout, 1); err = kvm_live_open_events(kvm); if (err) goto out; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9b379f3a3d99..234fdf4734f6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -863,7 +863,7 @@ static int record__synthesize(struct record *rec, bool tail) err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, - opts->proc_map_timeout); + opts->proc_map_timeout, 1); out: return err; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ee954bde7e3e..bc31b93cc1d8 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -958,8 +958,14 @@ static int __cmd_top(struct perf_top *top) if (perf_session__register_idle_thread(top->session) < 0) goto out_delete; + perf_set_multithreaded(); + machine__synthesize_threads(&top->session->machines.host, &opts->target, - top->evlist->threads, false, opts->proc_map_timeout); + top->evlist->threads, false, + opts->proc_map_timeout, + (unsigned int)sysconf(_SC_NPROCESSORS_ONLN)); + + perf_set_singlethreaded(); if (perf_hpp_list.socket) { ret = perf_env__read_cpu_topology_map(&perf_env); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 967bd351b58d..afef6fe46c45 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1131,7 +1131,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, evlist->threads, trace__tool_process, false, - trace->opts.proc_map_timeout); + trace->opts.proc_map_timeout, 1); if (err) symbol__exit(); diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index f94a4196e7c9..2a0068afe3bf 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -131,7 +131,7 @@ static int synth_all(struct machine *machine) { return perf_event__synthesize_threads(NULL, perf_event__process, - machine, 0, 500); + machine, 0, 500, 1); } static int synth_process(struct machine *machine) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 10366b87d0b5..0e678dd6bdbe 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -678,23 +678,21 @@ out: return err; } -int perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data, - unsigned int proc_map_timeout) +static int __perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + unsigned int proc_map_timeout, + struct dirent **dirent, + int start, + int num) { union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; - char proc_path[PATH_MAX]; - struct dirent **dirent; int err = -1; char *end; pid_t pid; - int n, i; - - if (machine__is_default_guest(machine)) - return 0; + int i; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); if (comm_event == NULL) @@ -714,34 +712,25 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (namespaces_event == NULL) goto out_free_fork; - snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - n = scandir(proc_path, &dirent, 0, alphasort); - - if (n < 0) - goto out_free_namespaces; - - for (i = 0; i < n; i++) { + for (i = start; i < start + num; i++) { if (!isdigit(dirent[i]->d_name[0])) continue; pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); /* only interested in proper numerical dirents */ - if (!*end) { - /* - * We may race with exiting thread, so don't stop just because - * one thread couldn't be synthesized. - */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, - namespaces_event, pid, 1, process, - tool, machine, mmap_data, - proc_map_timeout); - } - free(dirent[i]); + if (*end) + continue; + /* + * We may race with exiting thread, so don't stop just because + * one thread couldn't be synthesized. + */ + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data, + proc_map_timeout); } - free(dirent); err = 0; -out_free_namespaces: free(namespaces_event); out_free_fork: free(fork_event); @@ -753,6 +742,115 @@ out: return err; } +struct synthesize_threads_arg { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + bool mmap_data; + unsigned int proc_map_timeout; + struct dirent **dirent; + int num; + int start; +}; + +static void *synthesize_threads_worker(void *arg) +{ + struct synthesize_threads_arg *args = arg; + + __perf_event__synthesize_threads(args->tool, args->process, + args->machine, args->mmap_data, + args->proc_map_timeout, args->dirent, + args->start, args->num); + return NULL; +} + +int perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize) +{ + struct synthesize_threads_arg *args = NULL; + pthread_t *synthesize_threads = NULL; + char proc_path[PATH_MAX]; + struct dirent **dirent; + int num_per_thread; + int m, n, i, j; + int thread_nr; + int base = 0; + int err = -1; + + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); + n = scandir(proc_path, &dirent, 0, alphasort); + if (n < 0) + return err; + + thread_nr = nr_threads_synthesize; + + if (thread_nr <= 1) { + err = __perf_event__synthesize_threads(tool, process, + machine, mmap_data, + proc_map_timeout, + dirent, base, n); + goto free_dirent; + } + if (thread_nr > n) + thread_nr = n; + + synthesize_threads = calloc(sizeof(pthread_t), thread_nr); + if (synthesize_threads == NULL) + goto free_dirent; + + args = calloc(sizeof(*args), thread_nr); + if (args == NULL) + goto free_threads; + + num_per_thread = n / thread_nr; + m = n % thread_nr; + for (i = 0; i < thread_nr; i++) { + args[i].tool = tool; + args[i].process = process; + args[i].machine = machine; + args[i].mmap_data = mmap_data; + args[i].proc_map_timeout = proc_map_timeout; + args[i].dirent = dirent; + } + for (i = 0; i < m; i++) { + args[i].num = num_per_thread + 1; + args[i].start = i * args[i].num; + } + if (i != 0) + base = args[i-1].start + args[i-1].num; + for (j = i; j < thread_nr; j++) { + args[j].num = num_per_thread; + args[j].start = base + (j - i) * args[i].num; + } + + for (i = 0; i < thread_nr; i++) { + if (pthread_create(&synthesize_threads[i], NULL, + synthesize_threads_worker, &args[i])) + goto out_join; + } + err = 0; +out_join: + for (i = 0; i < thread_nr; i++) + pthread_join(synthesize_threads[i], NULL); + free(args); +free_threads: + free(synthesize_threads); +free_dirent: + for (i = 0; i < n; i++) + free(dirent[i]); + free(dirent); + + return err; +} + struct process_symbol_args { const char *name; u64 start; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index ee7bcc898d35..d6cbb0a0d919 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -680,7 +680,8 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool, int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, - unsigned int proc_map_timeout); + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize); int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 585b4a3d64a4..7c3aa479201a 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2218,12 +2218,16 @@ int machines__for_each_thread(struct machines *machines, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, perf_event__handler_t process, bool data_mmap, - unsigned int proc_map_timeout) + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize) { if (target__has_task(target)) return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout); + return perf_event__synthesize_threads(tool, process, + machine, data_mmap, + proc_map_timeout, + nr_threads_synthesize); /* command specified */ return 0; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index b1cd516f2025..c6a299ea506c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -257,15 +257,18 @@ int machines__for_each_thread(struct machines *machines, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, perf_event__handler_t process, bool data_mmap, - unsigned int proc_map_timeout); + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize); static inline int machine__synthesize_threads(struct machine *machine, struct target *target, struct thread_map *threads, bool data_mmap, - unsigned int proc_map_timeout) + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize) { return __machine__synthesize_threads(machine, NULL, target, threads, perf_event__process, data_mmap, - proc_map_timeout); + proc_map_timeout, + nr_threads_synthesize); } pid_t machine__get_current_tid(struct machine *machine, int cpu); -- cgit v1.2.3 From 0c6b499495e928777c41ca2de4fbb58788269690 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 29 Sep 2017 07:47:55 -0700 Subject: perf top: Add option to set the number of thread for event synthesize Using UINT_MAX to indicate the default thread#, which is the max number of online CPU. Committer testing: # perf trace --no-inherit -e clone -o /tmp/output perf top --num-thread-synthesize 9 # cat /tmp/output ? ( ? ): ... [continued]: clone()) = 26651 (perf) 0.059 ( 0.010 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5bfac44f30, parent_tidptr: 0x7f5bfac459d0, child_tidptr: 0x7f5bfac459d0, tls: 0x7f5bfac45700) = 26652 (perf) 0.116 ( 0.014 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5bfa443f30, parent_tidptr: 0x7f5bfa4449d0, child_tidptr: 0x7f5bfa4449d0, tls: 0x7f5bfa444700) = 26653 (perf) 0.141 ( 0.009 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5bf9c42f30, parent_tidptr: 0x7f5bf9c439d0, child_tidptr: 0x7f5bf9c439d0, tls: 0x7f5bf9c43700) = 26654 (perf) 0.160 ( 0.012 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5bf9441f30, parent_tidptr: 0x7f5bf94429d0, child_tidptr: 0x7f5bf94429d0, tls: 0x7f5bf9442700) = 26655 (perf) 0.232 ( 0.013 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5bf8c40f30, parent_tidptr: 0x7f5bf8c419d0, child_tidptr: 0x7f5bf8c419d0, tls: 0x7f5bf8c41700) = 26656 (perf) 0.393 ( 0.011 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5be3ffef30, parent_tidptr: 0x7f5be3fff9d0, child_tidptr: 0x7f5be3fff9d0, tls: 0x7f5be3fff700) = 26657 (perf) 0.802 ( 0.012 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5be37fdf30, parent_tidptr: 0x7f5be37fe9d0, child_tidptr: 0x7f5be37fe9d0, tls: 0x7f5be37fe700) = 26658 (perf) 1.411 ( 0.022 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5be2ffcf30, parent_tidptr: 0x7f5be2ffd9d0, child_tidptr: 0x7f5be2ffd9d0, tls: 0x7f5be2ffd700) = 26659 (perf) 246.422 ( 0.042 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f5be2ffcf30, parent_tidptr: 0x7f5be2ffd9d0, child_tidptr: 0x7f5be2ffd9d0, tls: 0x7f5be2ffd700) = 26660 (perf) # Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: He Kuang Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1506696477-146932-5-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 3 +++ tools/perf/builtin-top.c | 11 ++++++++--- tools/perf/util/event.c | 5 ++++- tools/perf/util/top.h | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index d864ea6fd367..4353262bc462 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -240,6 +240,9 @@ Default is to monitor all CPUS. --force:: Don't do ownership validation. +--num-thread-synthesize:: + The number of threads to run when synthesizing events for existing processes. + By default, the number of threads equals to the number of online CPUs. INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bc31b93cc1d8..477a8699f0b5 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -958,14 +958,16 @@ static int __cmd_top(struct perf_top *top) if (perf_session__register_idle_thread(top->session) < 0) goto out_delete; - perf_set_multithreaded(); + if (top->nr_threads_synthesize > 1) + perf_set_multithreaded(); machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, opts->proc_map_timeout, - (unsigned int)sysconf(_SC_NPROCESSORS_ONLN)); + top->nr_threads_synthesize); - perf_set_singlethreaded(); + if (top->nr_threads_synthesize > 1) + perf_set_singlethreaded(); if (perf_hpp_list.socket) { ret = perf_env__read_cpu_topology_map(&perf_env); @@ -1118,6 +1120,7 @@ int cmd_top(int argc, const char **argv) }, .max_stack = sysctl_perf_event_max_stack, .sym_pcnt_filter = 5, + .nr_threads_synthesize = UINT_MAX, }; struct record_opts *opts = &top.record_opts; struct target *target = &opts->target; @@ -1227,6 +1230,8 @@ int cmd_top(int argc, const char **argv) OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, "Show entries in a hierarchy"), OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), + OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, + "number of thread to run event synthesize"), OPT_END() }; const char * const top_usage[] = { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0e678dd6bdbe..47eff4767edb 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -790,7 +790,10 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (n < 0) return err; - thread_nr = nr_threads_synthesize; + if (nr_threads_synthesize == UINT_MAX) + thread_nr = sysconf(_SC_NPROCESSORS_ONLN); + else + thread_nr = nr_threads_synthesize; if (thread_nr <= 1) { err = __perf_event__synthesize_threads(tool, process, diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 9bdfb78a9a35..f4296e1e3bb8 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -37,6 +37,7 @@ struct perf_top { int sym_pcnt_filter; const char *sym_filter; float min_percent; + unsigned int nr_threads_synthesize; }; #define CONSOLE_CLEAR "" -- cgit v1.2.3 From f6a9820d572bd8384d982357cbad214b3a6c04bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 28 Sep 2017 18:06:33 +0200 Subject: perf tests attr: Fix group stat tests We started to use group read whenever it's possible: 82bf311e15d2 perf stat: Use group read for event groups That breaks some of attr tests, this change adds the new possible read_format value. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Thomas-Mich Richter LPU-Reference: 20170928160633.GA26973@krava Link: http://lkml.kernel.org/n/tip-1ko2zc4nph93d8lfwjyk9ivz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-stat-group | 2 ++ tools/perf/tests/attr/test-stat-group1 | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/tests/attr/test-stat-group b/tools/perf/tests/attr/test-stat-group index fdc1596a8862..e15d6946e9b3 100644 --- a/tools/perf/tests/attr/test-stat-group +++ b/tools/perf/tests/attr/test-stat-group @@ -6,6 +6,7 @@ ret = 1 [event-1:base-stat] fd=1 group_fd=-1 +read_format=3|15 [event-2:base-stat] fd=2 @@ -13,3 +14,4 @@ group_fd=1 config=1 disabled=0 enable_on_exec=0 +read_format=3|15 diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/attr/test-stat-group1 index 2a1f86e4a904..1746751123dc 100644 --- a/tools/perf/tests/attr/test-stat-group1 +++ b/tools/perf/tests/attr/test-stat-group1 @@ -6,6 +6,7 @@ ret = 1 [event-1:base-stat] fd=1 group_fd=-1 +read_format=3|15 [event-2:base-stat] fd=2 @@ -13,3 +14,4 @@ group_fd=1 config=1 disabled=0 enable_on_exec=0 +read_format=3|15 -- cgit v1.2.3 From 008de6c69ccb9b62110188fb76b9e425cca686e4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 12:59:55 -0700 Subject: perf vendor events: Update JSON metrics for Broadwell Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/broadwell/bdw-metrics.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json index 49c5f123d811..7372c3207092 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,19 +49,19 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION:c1 + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -79,13 +79,13 @@ }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION:c1 + DTLB_LOAD_MISSES.WALK_DURATION:c1 + DTLB_STORE_MISSES.WALK_DURATION:c1 + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, @@ -97,7 +97,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From 663ad445640feeeffb3d405bd60a4b1e44019c56 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:00:07 -0700 Subject: perf vendor events: Update JSON metrics for Broadwell Server Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/broadwellx/bdx-metrics.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json index 4248b029d199..4d9a1175a5cc 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,19 +49,19 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION:c1 + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -79,13 +79,13 @@ }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles))", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, @@ -97,7 +97,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From 0fba08e2492027ce2c016172ba326c40c32a6ec6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:00:16 -0700 Subject: perf vendor events: Update JSON metrics for Haswell Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json index 03d894988f0f..5ab5c78fe580 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,19 +49,19 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else (UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@)", + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -79,13 +79,13 @@ }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, -- cgit v1.2.3 From 9cd6d864662dfe7a0c5c64fe37dc243c0aea7483 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:00:29 -0700 Subject: perf vendor events: Update JSON metrics for Haswell Server Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json index 0bae5eda9fb3..5ab5c78fe580 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,19 +49,19 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@", + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -79,13 +79,13 @@ }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, -- cgit v1.2.3 From 1de315249415863dca167623909fb7951569cf8d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:00:37 -0700 Subject: perf vendor events: Update JSON metrics for IvyBridge Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/ivybridge/ivb-metrics.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json index 057a832f1a6a..7c2679514efb 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,19 +49,19 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -79,13 +79,13 @@ }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, @@ -97,7 +97,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From 7347bba5552f479d4292ffd008d18d41a965f021 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:00:44 -0700 Subject: perf vendor events: Update JSON metrics for IvyTown Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/ivytown/ivt-metrics.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json index 057a832f1a6a..7c2679514efb 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,19 +49,19 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -79,13 +79,13 @@ }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, @@ -97,7 +97,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From 984d91f4c62f64026cbfef51f609971025934cec Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:00:51 -0700 Subject: perf vendor events: Update JSON metrics for JakeTown Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json index b35b1c153c8a..2b18008cf482 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,13 +49,13 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", + "MetricExpr": "UOPS_DISPATCHED.THREAD / (( UOPS_DISPATCHED.CORE:c1 / 2) if #SMT_on else UOPS_DISPATCHED.CORE:c1)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, @@ -73,7 +73,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From 41a13b74a0406955c488000aeca4021c0efdd243 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:01:01 -0700 Subject: perf vendor events: Update JSON metrics for Sandy Bridge Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json index b35b1c153c8a..2b18008cf482 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,13 +49,13 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", + "MetricExpr": "UOPS_DISPATCHED.THREAD / (( UOPS_DISPATCHED.CORE:c1 / 2) if #SMT_on else UOPS_DISPATCHED.CORE:c1)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, @@ -73,7 +73,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From e3f2dadf76591a085e61db45736c3a4a9369b314 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:01:08 -0700 Subject: perf vendor events: Update JSON metrics for Skylake Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/skylake/skl-metrics.json | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index 411f9411ec9e..36c903faed0b 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -13,7 +13,7 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, @@ -25,7 +25,7 @@ }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -37,7 +37,7 @@ }, { "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,19 +49,19 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", + "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -73,19 +73,19 @@ }, { "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles )", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, @@ -97,7 +97,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From ead81ee4f887c2e6205dacb83ab2e24367a003c1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:02:28 -0700 Subject: perf vendor events: Update JSON metrics for Skylake Server Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170914200748.GA13837@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/skylakex/skx-metrics.json | 42 +++++++--------------- 1 file changed, 12 insertions(+), 30 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 68f12a26c816..36c903faed0b 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -13,19 +13,19 @@ }, { "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", "MetricGroup": "Frontend", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", "MetricGroup": "DSB; Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { "BriefDescription": "Cycles Per Instruction (threaded)", - "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, @@ -36,8 +36,8 @@ "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots (per-core)", - "MetricExpr": "4*cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, @@ -49,25 +49,25 @@ }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", + "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "( RS_EVENTS.EMPTY_CYCLES - (ICACHE_16B.IFDATA_STALL +2* ICACHE_16B.IFDATA_STALL:c1:e1) - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, { "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "CPU_CLK_UNHALTED.THREAD if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if 1 else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, @@ -79,34 +79,16 @@ }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) )", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, - { - "BriefDescription": "L1 cache miss per kilo instruction for demand loads", - "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS_PS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses;", - "MetricName": "L1MPKI" - }, - { - "BriefDescription": "L2 cache miss per kilo instruction for demand loads", - "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS_PS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses;", - "MetricName": "L2MPKI" - }, - { - "BriefDescription": "L3 cache miss per kilo instruction for demand loads", - "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS_PS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses;", - "MetricName": "L3MPKI" - }, { "BriefDescription": "Average CPU Utilization", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", @@ -115,7 +97,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16* FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1000000000 / duration_time", + "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, -- cgit v1.2.3 From 1695849735752d2ace22d8c424ba579e33df691c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 6 Oct 2017 10:31:47 -0300 Subject: perf mmap: Move perf_mmap and methods to separate mmap.[ch] files To better organize the sources, and we may end up even using it directly, without evlists and evsels. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-oiqrm7grflurnnzo2ovfnslg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/evlist.c | 248 ------------------------------------ tools/perf/util/evlist.h | 76 +---------- tools/perf/util/mmap.c | 252 +++++++++++++++++++++++++++++++++++++ tools/perf/util/mmap.h | 94 ++++++++++++++ tools/perf/util/python-ext-sources | 1 + 6 files changed, 349 insertions(+), 323 deletions(-) create mode 100644 tools/perf/util/mmap.c create mode 100644 tools/perf/util/mmap.h (limited to 'tools/perf') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 369c3163e68c..a3de7916fe63 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -13,6 +13,7 @@ libperf-y += find_bit.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o +libperf-y += mmap.o libperf-y += memswap.o libperf-y += parse-events.o libperf-y += perf_regs.o diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6a0d7ffbeba0..c6c891e154a6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -33,9 +33,6 @@ #include #include -static void perf_mmap__munmap(struct perf_mmap *map); -static void perf_mmap__put(struct perf_mmap *map); - #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) @@ -704,129 +701,6 @@ static int perf_evlist__resume(struct perf_evlist *evlist) return perf_evlist__set_paused(evlist, false); } -/* When check_messup is true, 'end' must points to a good entry */ -static union perf_event * -perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, - u64 end, u64 *prev) -{ - unsigned char *data = md->base + page_size; - union perf_event *event = NULL; - int diff = end - start; - - if (check_messup) { - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the 'end', we got messed up. - * - * In either case, truncate and restart at 'end'. - */ - if (diff > md->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - - /* - * 'end' points to a known good entry, start there. - */ - start = end; - diff = 0; - } - } - - if (diff >= (int)sizeof(event->header)) { - size_t size; - - event = (union perf_event *)&data[start & md->mask]; - size = event->header.size; - - if (size < sizeof(event->header) || diff < (int)size) { - event = NULL; - goto broken_event; - } - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((start & md->mask) + size != ((start + size) & md->mask)) { - unsigned int offset = start; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = md->event_copy; - - do { - cpy = min(md->mask + 1 - (offset & md->mask), len); - memcpy(dst, &data[offset & md->mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = (union perf_event *) md->event_copy; - } - - start += size; - } - -broken_event: - if (prev) - *prev = start; - - return event; -} - -union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) -{ - u64 head; - u64 old = md->prev; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&md->refcnt)) - return NULL; - - head = perf_mmap__read_head(md); - - return perf_mmap__read(md, check_messup, old, head, &md->prev); -} - -union perf_event * -perf_mmap__read_backward(struct perf_mmap *md) -{ - u64 head, end; - u64 start = md->prev; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&md->refcnt)) - return NULL; - - head = perf_mmap__read_head(md); - if (!head) - return NULL; - - /* - * 'head' pointer starts from 0. Kernel minus sizeof(record) form - * it each time when kernel writes to it, so in fact 'head' is - * negative. 'end' pointer is made manually by adding the size of - * the ring buffer to 'head' pointer, means the validate data can - * read is the whole ring buffer. If 'end' is positive, the ring - * buffer has not fully filled, so we must adjust 'end' to 0. - * - * However, since both 'head' and 'end' is unsigned, we can't - * simply compare 'end' against 0. Here we compare '-head' and - * the size of the ring buffer, where -head is the number of bytes - * kernel write to the ring buffer. - */ - if (-head < (u64)(md->mask + 1)) - end = 0; - else - end = head + md->mask + 1; - - return perf_mmap__read(md, false, start, end, &md->prev); -} - union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; @@ -857,96 +731,16 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) return perf_evlist__mmap_read_forward(evlist, idx); } -void perf_mmap__read_catchup(struct perf_mmap *md) -{ - u64 head; - - if (!refcount_read(&md->refcnt)) - return; - - head = perf_mmap__read_head(md); - md->prev = head; -} - void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) { perf_mmap__read_catchup(&evlist->mmap[idx]); } -static bool perf_mmap__empty(struct perf_mmap *md) -{ - return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; -} - -static void perf_mmap__get(struct perf_mmap *map) -{ - refcount_inc(&map->refcnt); -} - -static void perf_mmap__put(struct perf_mmap *md) -{ - BUG_ON(md->base && refcount_read(&md->refcnt) == 0); - - if (refcount_dec_and_test(&md->refcnt)) - perf_mmap__munmap(md); -} - -void perf_mmap__consume(struct perf_mmap *md, bool overwrite) -{ - if (!overwrite) { - u64 old = md->prev; - - perf_mmap__write_tail(md, old); - } - - if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) - perf_mmap__put(md); -} - void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); } -int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, - struct auxtrace_mmap_params *mp __maybe_unused, - void *userpg __maybe_unused, - int fd __maybe_unused) -{ - return 0; -} - -void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) -{ -} - -void __weak auxtrace_mmap_params__init( - struct auxtrace_mmap_params *mp __maybe_unused, - off_t auxtrace_offset __maybe_unused, - unsigned int auxtrace_pages __maybe_unused, - bool auxtrace_overwrite __maybe_unused) -{ -} - -void __weak auxtrace_mmap_params__set_idx( - struct auxtrace_mmap_params *mp __maybe_unused, - struct perf_evlist *evlist __maybe_unused, - int idx __maybe_unused, - bool per_cpu __maybe_unused) -{ -} - -static void perf_mmap__munmap(struct perf_mmap *map) -{ - if (map->base != NULL) { - munmap(map->base, perf_mmap__mmap_len(map)); - map->base = NULL; - map->fd = -1; - refcount_set(&map->refcnt, 0); - } - auxtrace_mmap__munmap(&map->auxtrace_mmap); -} - static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) { int i; @@ -995,48 +789,6 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) return map; } -struct mmap_params { - int prot; - int mask; - struct auxtrace_mmap_params auxtrace_mp; -}; - -static int perf_mmap__mmap(struct perf_mmap *map, - struct mmap_params *mp, int fd) -{ - /* - * The last one will be done at perf_evlist__mmap_consume(), so that we - * make sure we don't prevent tools from consuming every last event in - * the ring buffer. - * - * I.e. we can get the POLLHUP meaning that the fd doesn't exist - * anymore, but the last events for it are still in the ring buffer, - * waiting to be consumed. - * - * Tools can chose to ignore this at their own discretion, but the - * evlist layer can't just drop it when filtering events in - * perf_evlist__filter_pollfd(). - */ - refcount_set(&map->refcnt, 2); - map->prev = 0; - map->mask = mp->mask; - map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, - MAP_SHARED, fd, 0); - if (map->base == MAP_FAILED) { - pr_debug2("failed to mmap perf event ring buffer, error %d\n", - errno); - map->base = NULL; - return -1; - } - map->fd = fd; - - if (auxtrace_mmap__mmap(&map->auxtrace_mmap, - &mp->auxtrace_mp, map->base, fd)) - return -1; - - return 0; -} - static bool perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, struct perf_evsel *evsel) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b1c14f1fdc27..8c433e95bd9a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -11,8 +11,8 @@ #include "../perf.h" #include "event.h" #include "evsel.h" +#include "mmap.h" #include "util.h" -#include "auxtrace.h" #include #include @@ -24,55 +24,6 @@ struct record_opts; #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) -/** - * struct perf_mmap - perf's ring buffer mmap details - * - * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this - */ -struct perf_mmap { - void *base; - int mask; - int fd; - refcount_t refcnt; - u64 prev; - struct auxtrace_mmap auxtrace_mmap; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); -}; - -static inline size_t -perf_mmap__mmap_len(struct perf_mmap *map) -{ - return map->mask + 1 + page_size; -} - -/* - * State machine of bkw_mmap_state: - * - * .________________(forbid)_____________. - * | V - * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY - * ^ ^ | ^ | - * | |__(forbid)____/ |___(forbid)___/| - * | | - * \_________________(3)_______________/ - * - * NOTREADY : Backward ring buffers are not ready - * RUNNING : Backward ring buffers are recording - * DATA_PENDING : We are required to collect data from backward ring buffers - * EMPTY : We have collected data from backward ring buffers. - * - * (0): Setup backward ring buffer - * (1): Pause ring buffers for reading - * (2): Read from ring buffers - * (3): Resume ring buffers for recording - */ -enum bkw_mmap_state { - BKW_MMAP_NOTREADY, - BKW_MMAP_RUNNING, - BKW_MMAP_DATA_PENDING, - BKW_MMAP_EMPTY, -}; - struct perf_evlist { struct list_head entries; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; @@ -177,12 +128,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); -union perf_event *perf_mmap__read_backward(struct perf_mmap *map); - -void perf_mmap__read_catchup(struct perf_mmap *md); -void perf_mmap__consume(struct perf_mmap *md, bool overwrite); - union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, @@ -286,25 +231,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); -static inline u64 perf_mmap__read_head(struct perf_mmap *mm) -{ - struct perf_event_mmap_page *pc = mm->base; - u64 head = ACCESS_ONCE(pc->data_head); - rmb(); - return head; -} - -static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) -{ - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - mb(); - pc->data_tail = tail; -} - bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str); void perf_evlist__to_front(struct perf_evlist *evlist, struct perf_evsel *move_evsel); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c new file mode 100644 index 000000000000..dfc4a007f2c6 --- /dev/null +++ b/tools/perf/util/mmap.c @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further + * copyright notes. + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include +#include "event.h" +#include "mmap.h" +#include "util.h" /* page_size */ + +size_t perf_mmap__mmap_len(struct perf_mmap *map) +{ + return map->mask + 1 + page_size; +} + +/* When check_messup is true, 'end' must points to a good entry */ +static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup, + u64 start, u64 end, u64 *prev) +{ + unsigned char *data = map->base + page_size; + union perf_event *event = NULL; + int diff = end - start; + + if (check_messup) { + /* + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and mess up the samples under us. + * + * If we somehow ended up ahead of the 'end', we got messed up. + * + * In either case, truncate and restart at 'end'. + */ + if (diff > map->mask / 2 || diff < 0) { + fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); + + /* + * 'end' points to a known good entry, start there. + */ + start = end; + diff = 0; + } + } + + if (diff >= (int)sizeof(event->header)) { + size_t size; + + event = (union perf_event *)&data[start & map->mask]; + size = event->header.size; + + if (size < sizeof(event->header) || diff < (int)size) { + event = NULL; + goto broken_event; + } + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((start & map->mask) + size != ((start + size) & map->mask)) { + unsigned int offset = start; + unsigned int len = min(sizeof(*event), size), cpy; + void *dst = map->event_copy; + + do { + cpy = min(map->mask + 1 - (offset & map->mask), len); + memcpy(dst, &data[offset & map->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = (union perf_event *)map->event_copy; + } + + start += size; + } + +broken_event: + if (prev) + *prev = start; + + return event; +} + +union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup) +{ + u64 head; + u64 old = map->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + head = perf_mmap__read_head(map); + + return perf_mmap__read(map, check_messup, old, head, &map->prev); +} + +union perf_event *perf_mmap__read_backward(struct perf_mmap *map) +{ + u64 head, end; + u64 start = map->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + head = perf_mmap__read_head(map); + if (!head) + return NULL; + + /* + * 'head' pointer starts from 0. Kernel minus sizeof(record) form + * it each time when kernel writes to it, so in fact 'head' is + * negative. 'end' pointer is made manually by adding the size of + * the ring buffer to 'head' pointer, means the validate data can + * read is the whole ring buffer. If 'end' is positive, the ring + * buffer has not fully filled, so we must adjust 'end' to 0. + * + * However, since both 'head' and 'end' is unsigned, we can't + * simply compare 'end' against 0. Here we compare '-head' and + * the size of the ring buffer, where -head is the number of bytes + * kernel write to the ring buffer. + */ + if (-head < (u64)(map->mask + 1)) + end = 0; + else + end = head + map->mask + 1; + + return perf_mmap__read(map, false, start, end, &map->prev); +} + +void perf_mmap__read_catchup(struct perf_mmap *map) +{ + u64 head; + + if (!refcount_read(&map->refcnt)) + return; + + head = perf_mmap__read_head(map); + map->prev = head; +} + +static bool perf_mmap__empty(struct perf_mmap *map) +{ + return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base; +} + +void perf_mmap__get(struct perf_mmap *map) +{ + refcount_inc(&map->refcnt); +} + +void perf_mmap__put(struct perf_mmap *map) +{ + BUG_ON(map->base && refcount_read(&map->refcnt) == 0); + + if (refcount_dec_and_test(&map->refcnt)) + perf_mmap__munmap(map); +} + +void perf_mmap__consume(struct perf_mmap *map, bool overwrite) +{ + if (!overwrite) { + u64 old = map->prev; + + perf_mmap__write_tail(map, old); + } + + if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) + perf_mmap__put(map); +} + +int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, + struct auxtrace_mmap_params *mp __maybe_unused, + void *userpg __maybe_unused, + int fd __maybe_unused) +{ + return 0; +} + +void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) +{ +} + +void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused, + off_t auxtrace_offset __maybe_unused, + unsigned int auxtrace_pages __maybe_unused, + bool auxtrace_overwrite __maybe_unused) +{ +} + +void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int idx __maybe_unused, + bool per_cpu __maybe_unused) +{ +} + +void perf_mmap__munmap(struct perf_mmap *map) +{ + if (map->base != NULL) { + munmap(map->base, perf_mmap__mmap_len(map)); + map->base = NULL; + map->fd = -1; + refcount_set(&map->refcnt, 0); + } + auxtrace_mmap__munmap(&map->auxtrace_mmap); +} + +int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) +{ + /* + * The last one will be done at perf_evlist__mmap_consume(), so that we + * make sure we don't prevent tools from consuming every last event in + * the ring buffer. + * + * I.e. we can get the POLLHUP meaning that the fd doesn't exist + * anymore, but the last events for it are still in the ring buffer, + * waiting to be consumed. + * + * Tools can chose to ignore this at their own discretion, but the + * evlist layer can't just drop it when filtering events in + * perf_evlist__filter_pollfd(). + */ + refcount_set(&map->refcnt, 2); + map->prev = 0; + map->mask = mp->mask; + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + MAP_SHARED, fd, 0); + if (map->base == MAP_FAILED) { + pr_debug2("failed to mmap perf event ring buffer, error %d\n", + errno); + map->base = NULL; + return -1; + } + map->fd = fd; + + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, + &mp->auxtrace_mp, map->base, fd)) + return -1; + + return 0; +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h new file mode 100644 index 000000000000..f37ff45c8ec1 --- /dev/null +++ b/tools/perf/util/mmap.h @@ -0,0 +1,94 @@ +#ifndef __PERF_MMAP_H +#define __PERF_MMAP_H 1 + +#include +#include +#include +#include +#include +#include "auxtrace.h" +#include "event.h" + +/** + * struct perf_mmap - perf's ring buffer mmap details + * + * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this + */ +struct perf_mmap { + void *base; + int mask; + int fd; + refcount_t refcnt; + u64 prev; + struct auxtrace_mmap auxtrace_mmap; + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); +}; + +/* + * State machine of bkw_mmap_state: + * + * .________________(forbid)_____________. + * | V + * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY + * ^ ^ | ^ | + * | |__(forbid)____/ |___(forbid)___/| + * | | + * \_________________(3)_______________/ + * + * NOTREADY : Backward ring buffers are not ready + * RUNNING : Backward ring buffers are recording + * DATA_PENDING : We are required to collect data from backward ring buffers + * EMPTY : We have collected data from backward ring buffers. + * + * (0): Setup backward ring buffer + * (1): Pause ring buffers for reading + * (2): Read from ring buffers + * (3): Resume ring buffers for recording + */ +enum bkw_mmap_state { + BKW_MMAP_NOTREADY, + BKW_MMAP_RUNNING, + BKW_MMAP_DATA_PENDING, + BKW_MMAP_EMPTY, +}; + +struct mmap_params { + int prot, mask; + struct auxtrace_mmap_params auxtrace_mp; +}; + +int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd); +void perf_mmap__munmap(struct perf_mmap *map); + +void perf_mmap__get(struct perf_mmap *map); +void perf_mmap__put(struct perf_mmap *map); + +void perf_mmap__consume(struct perf_mmap *map, bool overwrite); + +void perf_mmap__read_catchup(struct perf_mmap *md); + +static inline u64 perf_mmap__read_head(struct perf_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->base; + u64 head = ACCESS_ONCE(pc->data_head); + rmb(); + return head; +} + +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) +{ + struct perf_event_mmap_page *pc = md->base; + + /* + * ensure all reads are done before we write the tail out. + */ + mb(); + pc->data_tail = tail; +} + +union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_backward(struct perf_mmap *map); + +size_t perf_mmap__mmap_len(struct perf_mmap *map); + +#endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index e66dc495809a..b4f2f06722a7 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/mmap.c util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c -- cgit v1.2.3 From d37f1586d0023443e2e64937769bbb1bc078866a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Oct 2017 16:39:55 -0300 Subject: perf record: Make record__mmap_read generic It becomes a perf_mmap method, "push", that build reads from a mmap and "pushes" it to a consumer, that in the initial case, for 'perf record', just writes it to the perf.data file descriptor, but may be used by 'top', etc. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-u4l1qjbi6l76r2k0nv99220n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 234fdf4734f6..47a761133aed 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -176,9 +176,16 @@ rb_find_range(void *data, int mask, u64 head, u64 old, return backward_rb_find_range(data, mask, head, start, end); } -static int -record__mmap_read(struct record *rec, struct perf_mmap *md, - bool overwrite, bool backward) +static int record__pushfn(void *to, void *bf, size_t size) +{ + struct record *rec = to; + + rec->samples++; + return record__write(rec, bf, size); +} + +static int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, + void *to, int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); u64 old = md->prev; @@ -195,8 +202,6 @@ record__mmap_read(struct record *rec, struct perf_mmap *md, if (start == end) return 0; - rec->samples++; - size = end - start; if (size > (unsigned long)(md->mask) + 1) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); @@ -211,7 +216,7 @@ record__mmap_read(struct record *rec, struct perf_mmap *md, size = md->mask + 1 - (start & md->mask); start += size; - if (record__write(rec, buf, size) < 0) { + if (push(to, buf, size) < 0) { rc = -1; goto out; } @@ -221,7 +226,7 @@ record__mmap_read(struct record *rec, struct perf_mmap *md, size = end - start; start += size; - if (record__write(rec, buf, size) < 0) { + if (push(to, buf, size) < 0) { rc = -1; goto out; } @@ -576,8 +581,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; if (maps[i].base) { - if (record__mmap_read(rec, &maps[i], - evlist->overwrite, backward) != 0) { + if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) { rc = -1; goto out; } -- cgit v1.2.3 From 73c17d815000e425aea108226bcb57491a04f534 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 6 Oct 2017 10:46:01 -0300 Subject: perf mmap: Adopt push method from builtin-record.c The previous prep patch was just to show exactly what changed in that function, now its time to move that method and things only it uses to the right place, mmap.[ch] Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-aaxywfgw3d44x6xlu8zm1avu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 100 -------------------------------------------- tools/perf/util/mmap.c | 100 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/mmap.h | 3 ++ 3 files changed, 103 insertions(+), 100 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 47a761133aed..a6cbf1640269 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -129,53 +129,6 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, event, event->header.size); } -static int -backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) -{ - struct perf_event_header *pheader; - u64 evt_head = head; - int size = mask + 1; - - pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); - pheader = (struct perf_event_header *)(buf + (head & mask)); - *start = head; - while (true) { - if (evt_head - head >= (unsigned int)size) { - pr_debug("Finished reading backward ring buffer: rewind\n"); - if (evt_head - head > (unsigned int)size) - evt_head -= pheader->size; - *end = evt_head; - return 0; - } - - pheader = (struct perf_event_header *)(buf + (evt_head & mask)); - - if (pheader->size == 0) { - pr_debug("Finished reading backward ring buffer: get start\n"); - *end = evt_head; - return 0; - } - - evt_head += pheader->size; - pr_debug3("move evt_head: %"PRIx64"\n", evt_head); - } - WARN_ONCE(1, "Shouldn't get here\n"); - return -1; -} - -static int -rb_find_range(void *data, int mask, u64 head, u64 old, - u64 *start, u64 *end, bool backward) -{ - if (!backward) { - *start = old; - *end = head; - return 0; - } - - return backward_rb_find_range(data, mask, head, start, end); -} - static int record__pushfn(void *to, void *bf, size_t size) { struct record *rec = to; @@ -184,59 +137,6 @@ static int record__pushfn(void *to, void *bf, size_t size) return record__write(rec, bf, size); } -static int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, - void *to, int push(void *to, void *buf, size_t size)) -{ - u64 head = perf_mmap__read_head(md); - u64 old = md->prev; - u64 end = head, start = old; - unsigned char *data = md->base + page_size; - unsigned long size; - void *buf; - int rc = 0; - - if (rb_find_range(data, md->mask, head, - old, &start, &end, backward)) - return -1; - - if (start == end) - return 0; - - size = end - start; - if (size > (unsigned long)(md->mask) + 1) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - - md->prev = head; - perf_mmap__consume(md, overwrite || backward); - return 0; - } - - if ((start & md->mask) + size != (end & md->mask)) { - buf = &data[start & md->mask]; - size = md->mask + 1 - (start & md->mask); - start += size; - - if (push(to, buf, size) < 0) { - rc = -1; - goto out; - } - } - - buf = &data[start & md->mask]; - size = end - start; - start += size; - - if (push(to, buf, size) < 0) { - rc = -1; - goto out; - } - - md->prev = head; - perf_mmap__consume(md, overwrite || backward); -out: - return rc; -} - static volatile int done; static volatile int signr = -1; static volatile int child_finished; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index dfc4a007f2c6..9fe5f9c7d577 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -8,6 +8,9 @@ */ #include +#include +#include +#include "debug.h" #include "event.h" #include "mmap.h" #include "util.h" /* page_size */ @@ -250,3 +253,100 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } + +static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +{ + struct perf_event_header *pheader; + u64 evt_head = head; + int size = mask + 1; + + pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pheader = (struct perf_event_header *)(buf + (head & mask)); + *start = head; + while (true) { + if (evt_head - head >= (unsigned int)size) { + pr_debug("Finished reading backward ring buffer: rewind\n"); + if (evt_head - head > (unsigned int)size) + evt_head -= pheader->size; + *end = evt_head; + return 0; + } + + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); + + if (pheader->size == 0) { + pr_debug("Finished reading backward ring buffer: get start\n"); + *end = evt_head; + return 0; + } + + evt_head += pheader->size; + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); + } + WARN_ONCE(1, "Shouldn't get here\n"); + return -1; +} + +static int rb_find_range(void *data, int mask, u64 head, u64 old, + u64 *start, u64 *end, bool backward) +{ + if (!backward) { + *start = old; + *end = head; + return 0; + } + + return backward_rb_find_range(data, mask, head, start, end); +} + +int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, + void *to, int push(void *to, void *buf, size_t size)) +{ + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; + u64 end = head, start = old; + unsigned char *data = md->base + page_size; + unsigned long size; + void *buf; + int rc = 0; + + if (rb_find_range(data, md->mask, head, old, &start, &end, backward)) + return -1; + + if (start == end) + return 0; + + size = end - start; + if (size > (unsigned long)(md->mask) + 1) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + + md->prev = head; + perf_mmap__consume(md, overwrite || backward); + return 0; + } + + if ((start & md->mask) + size != (end & md->mask)) { + buf = &data[start & md->mask]; + size = md->mask + 1 - (start & md->mask); + start += size; + + if (push(to, buf, size) < 0) { + rc = -1; + goto out; + } + } + + buf = &data[start & md->mask]; + size = end - start; + start += size; + + if (push(to, buf, size) < 0) { + rc = -1; + goto out; + } + + md->prev = head; + perf_mmap__consume(md, overwrite || backward); +out: + return rc; +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index f37ff45c8ec1..efd78b827b05 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -89,6 +89,9 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); +int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, + void *to, int push(void *to, void *buf, size_t size)); + size_t perf_mmap__mmap_len(struct perf_mmap *map); #endif /*__PERF_MMAP_H */ -- cgit v1.2.3 From 692f5a22cd284bb8233a38e3ed86881d2d9c89d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Oct 2017 15:07:12 +0200 Subject: perf tests attr: Make hw events optional Otherwise we fail on virtual machines with no support for specific HW events. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171009130712.14747-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-stat-C0 | 1 + tools/perf/tests/attr/test-stat-basic | 1 + tools/perf/tests/attr/test-stat-default | 4 ++++ tools/perf/tests/attr/test-stat-detailed-1 | 8 ++++++++ tools/perf/tests/attr/test-stat-detailed-2 | 13 +++++++++++++ tools/perf/tests/attr/test-stat-detailed-3 | 13 +++++++++++++ tools/perf/tests/attr/test-stat-no-inherit | 1 + 7 files changed, 41 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/attr/test-stat-C0 index 67717fe6a65d..a2c76d10b2bb 100644 --- a/tools/perf/tests/attr/test-stat-C0 +++ b/tools/perf/tests/attr/test-stat-C0 @@ -7,3 +7,4 @@ ret = 1 # events are disabled by default when attached to cpu disabled=1 enable_on_exec=0 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/attr/test-stat-basic index 74e17881f2ba..69867d049fda 100644 --- a/tools/perf/tests/attr/test-stat-basic +++ b/tools/perf/tests/attr/test-stat-basic @@ -4,3 +4,4 @@ args = -e cycles kill >/dev/null 2>&1 ret = 1 [event:base-stat] +optional=1 diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default index e911dbd4eb47..d9e99b3f77e6 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/attr/test-stat-default @@ -32,6 +32,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -52,15 +53,18 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1 index b39270a08e74..8b04a055d154 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/attr/test-stat-detailed-1 @@ -33,6 +33,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -53,18 +54,21 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -74,6 +78,7 @@ config=5 fd=11 type=3 config=0 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -83,6 +88,7 @@ config=0 fd=12 type=3 config=65536 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_LL << 0 | @@ -92,6 +98,7 @@ config=65536 fd=13 type=3 config=2 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_LL << 0 | @@ -101,3 +108,4 @@ config=2 fd=14 type=3 config=65538 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2 index 45f8e6ea34f8..4fca9f1bfbf8 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/attr/test-stat-detailed-2 @@ -33,6 +33,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -53,18 +54,21 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -74,6 +78,7 @@ config=5 fd=11 type=3 config=0 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -83,6 +88,7 @@ config=0 fd=12 type=3 config=65536 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_LL << 0 | @@ -92,6 +98,7 @@ config=65536 fd=13 type=3 config=2 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_LL << 0 | @@ -101,6 +108,7 @@ config=2 fd=14 type=3 config=65538 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1I << 0 | @@ -120,6 +128,7 @@ optional=1 fd=16 type=3 config=65537 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -129,6 +138,7 @@ config=65537 fd=17 type=3 config=3 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -138,6 +148,7 @@ config=3 fd=18 type=3 config=65539 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -147,6 +158,7 @@ config=65539 fd=19 type=3 config=4 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -156,3 +168,4 @@ config=4 fd=20 type=3 config=65540 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3 index 30ae0fb7a3fd..4bb58e1c82a6 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/attr/test-stat-detailed-3 @@ -33,6 +33,7 @@ config=2 fd=5 type=0 config=0 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND [event6:base-stat] @@ -53,18 +54,21 @@ optional=1 fd=8 type=0 config=1 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS [event9:base-stat] fd=9 type=0 config=4 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES [event10:base-stat] fd=10 type=0 config=5 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -74,6 +78,7 @@ config=5 fd=11 type=3 config=0 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -83,6 +88,7 @@ config=0 fd=12 type=3 config=65536 +optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_LL << 0 | @@ -92,6 +98,7 @@ config=65536 fd=13 type=3 config=2 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_LL << 0 | @@ -101,6 +108,7 @@ config=2 fd=14 type=3 config=65538 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1I << 0 | @@ -120,6 +128,7 @@ optional=1 fd=16 type=3 config=65537 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -129,6 +138,7 @@ config=65537 fd=17 type=3 config=3 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_DTLB << 0 | @@ -138,6 +148,7 @@ config=3 fd=18 type=3 config=65539 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -147,6 +158,7 @@ config=65539 fd=19 type=3 config=4 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_ITLB << 0 | @@ -156,6 +168,7 @@ config=4 fd=20 type=3 config=65540 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1D << 0 | diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/attr/test-stat-no-inherit index d54b2a1e3e28..924fbb9300d1 100644 --- a/tools/perf/tests/attr/test-stat-no-inherit +++ b/tools/perf/tests/attr/test-stat-no-inherit @@ -5,3 +5,4 @@ ret = 1 [event:base-stat] inherit=0 +optional=1 -- cgit v1.2.3 From 98ad761bd3989a71be2b100002be923466617394 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 10 Oct 2017 15:43:22 -0700 Subject: perf list: Fix group description in the man page Fix an incorrect description in the 'perf list' manpage. When a group does not fit into the hardware it is partially scheduled, but does not error out. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20171010224322.15861-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 24679aed90b7..e2a897ae3596 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -204,7 +204,7 @@ For example Intel Core CPUs typically have four generic performance counters for the core, plus three fixed counters for instructions, cycles and ref-cycles. Some special events have restrictions on which counter they can schedule, and may not support multiple instances in a single group. -When too many events are specified in the group none of them will not +When too many events are specified in the group some of them will not be measured. Globally pinned events can limit the number of counters available for -- cgit v1.2.3 From 696e2457e9fd285034cd30cd8c93ece5e6cfe35a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:24 +0200 Subject: perf annotate: Remove arch::cpuid_parse callback There's no need for extra cpuid_parse arch callback, it can be handled directly in init callback. Adding the init function to x86 to cover the cpuid initialization. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/annotate/instructions.c | 3 ++- tools/perf/arch/arm64/annotate/instructions.c | 3 ++- tools/perf/arch/powerpc/annotate/instructions.c | 4 +++- tools/perf/arch/s390/annotate/instructions.c | 4 +++- tools/perf/arch/x86/annotate/instructions.c | 14 ++++++++++++++ tools/perf/util/annotate.c | 10 +++------- 6 files changed, 27 insertions(+), 11 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index 1ce0872b1726..6dfec7c23696 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -1,3 +1,4 @@ +#include #include #include @@ -23,7 +24,7 @@ static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const c return ops; } -static int arm__annotate_init(struct arch *arch) +static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { struct arm_annotate *arm; int err; diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index 8f1908756cb6..a2c32be4132a 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -1,3 +1,4 @@ +#include #include #include @@ -25,7 +26,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const return ops; } -static int arm64__annotate_init(struct arch *arch) +static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { struct arm64_annotate *arm; int err; diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c index 3c4004db81b9..b6b0ef5952d0 100644 --- a/tools/perf/arch/powerpc/annotate/instructions.c +++ b/tools/perf/arch/powerpc/annotate/instructions.c @@ -1,3 +1,5 @@ +#include + static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name) { int i; @@ -46,7 +48,7 @@ static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, con return ops; } -static int powerpc__annotate_init(struct arch *arch) +static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { if (!arch->initialized) { arch->initialized = true; diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index 745b4b1b8b21..b8676ccbed76 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -1,3 +1,5 @@ +#include + static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name) { struct ins_ops *ops = NULL; @@ -19,7 +21,7 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na return ops; } -static int s390__annotate_init(struct arch *arch) +static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { if (!arch->initialized) { arch->initialized = true; diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index d84b72063a30..563cd4564041 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -122,3 +122,17 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid) return -1; } + +static int x86__annotate_init(struct arch *arch, char *cpuid) +{ + int err = 0; + + if (arch->initialized) + return 0; + + if (cpuid) + err = x86__cpuid_parse(arch, cpuid); + + arch->initialized = true; + return err; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4397a8b6e6cd..08164162c345 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -49,10 +49,9 @@ struct arch { void *priv; unsigned int model; unsigned int family; - int (*init)(struct arch *arch); + int (*init)(struct arch *arch, char *cpuid); bool (*ins_is_fused)(struct arch *arch, const char *ins1, const char *ins2); - int (*cpuid_parse)(struct arch *arch, char *cpuid); struct { char comment_char; char skip_functions_char; @@ -132,10 +131,10 @@ static struct arch architectures[] = { }, { .name = "x86", + .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), .ins_is_fused = x86__ins_is_fused, - .cpuid_parse = x86__cpuid_parse, .objdump = { .comment_char = '#', }, @@ -1447,16 +1446,13 @@ int symbol__disassemble(struct symbol *sym, struct map *map, *parch = arch; if (arch->init) { - err = arch->init(arch); + err = arch->init(arch, cpuid); if (err) { pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); return err; } } - if (arch->cpuid_parse && cpuid) - arch->cpuid_parse(arch, cpuid); - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); -- cgit v1.2.3 From d7e05ceaa93417f6f0077444eb111f64df823d25 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Oct 2017 15:53:08 -0300 Subject: perf tools: Do not check ABI headers in a detached tarball build When we use one of: [acme@jouet linux]$ make help | grep perf perf-tar-src-pkg - Build perf-4.14.0-rc3.tar source tarball perf-targz-src-pkg - Build perf-4.14.0-rc3.tar.gz source tarball perf-tarbz2-src-pkg - Build perf-4.14.0-rc3.tar.bz2 source tarball perf-tarxz-src-pkg - Build perf-4.14.0-rc3.tar.xz source tarball [acme@jouet linux]$ I.e. when we create a detached tarball to build perf outside outside the enveloping kernel sources (from a kernel tarball or a checked out linux.git directory) we by definition can't check for differences among the tools/{include,arch}, etc files we originally copied from the kernel, so bail out in that case, to avoid warnings when doing the detached builds. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-vbrga0mhplv7niwxr3ghjyxv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/check-headers.sh | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 932fda54b8a6..322629423b49 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -57,6 +57,11 @@ check () { } +# Check if we have the kernel headers (tools/perf/../../include), else +# we're probably on a detached tarball, so no point in trying to check +# differences. +test -d ../../include || exit 0 + # simple diff check for i in $HEADERS; do check $i -B -- cgit v1.2.3 From 7958e541495d7f99cefef2300b2c388865626c2e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 13 Oct 2017 16:31:56 -0700 Subject: perf vendor events: Fix incorrect cmask syntax for some Intel metrics Some of the metrics use an incorrect syntax for specifying the cmask for an event. Convert to perf syntax so that they can be resolved. Fixes metrics on Broadwell, SandyBridge. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/n/tip-3k3fkfj8obek9dkmryyrqzhu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json | 4 ++-- tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json index 7372c3207092..00bfdb5c5acb 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json @@ -61,7 +61,7 @@ }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION:c1 + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, @@ -85,7 +85,7 @@ }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION:c1 + DTLB_LOAD_MISSES.WALK_DURATION:c1 + DTLB_STORE_MISSES.WALK_DURATION:c1 + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json index 4d9a1175a5cc..5a7f1ec24200 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json @@ -61,7 +61,7 @@ }, { "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION:c1 + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", + "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", "MetricGroup": "Unknown_Branches", "MetricName": "BAClear_Cost" }, diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json index 2b18008cf482..fd7d7c438226 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json @@ -55,7 +55,7 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_DISPATCHED.THREAD / (( UOPS_DISPATCHED.CORE:c1 / 2) if #SMT_on else UOPS_DISPATCHED.CORE:c1)", + "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json index 2b18008cf482..fd7d7c438226 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json @@ -55,7 +55,7 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_DISPATCHED.THREAD / (( UOPS_DISPATCHED.CORE:c1 / 2) if #SMT_on else UOPS_DISPATCHED.CORE:c1)", + "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, -- cgit v1.2.3 From 923d0c9ae570c3f33a0b5a9517c23ccc816d9b75 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Oct 2017 10:35:00 -0300 Subject: perf tools: Introduce binary__fprintf() Out of print_binary() but receiving a fp pointer and expecting that the printer be a fprintf like function, i.e. receive a FILE pointer and return the number of characters printed. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: yuzhoujian Link: http://lkml.kernel.org/n/tip-6oqnxr6lmgqe6q6p3iugnscx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 32 +++++++++++++++++--------------- tools/perf/builtin-trace.c | 14 +++++++------- tools/perf/util/debug.c | 31 +++++++++++++++++-------------- tools/perf/util/print_binary.c | 30 ++++++++++++++++-------------- tools/perf/util/print_binary.h | 18 +++++++++++++----- 5 files changed, 70 insertions(+), 55 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7167df215a42..5a7dfc5691a1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1164,40 +1164,40 @@ struct printer_data { bool is_printable; }; -static void -print_sample_bpf_output_printer(enum binary_printer_ops op, - unsigned int val, - void *extra) +static int sample__fprintf_bpf_output(enum binary_printer_ops op, + unsigned int val, + void *extra, FILE *fp) { unsigned char ch = (unsigned char)val; struct printer_data *printer_data = extra; + int printed = 0; switch (op) { case BINARY_PRINT_DATA_BEGIN: - printf("\n"); + printed += fprintf(fp, "\n"); break; case BINARY_PRINT_LINE_BEGIN: - printf("%17s", !printer_data->line_no ? "BPF output:" : + printed += fprintf(fp, "%17s", !printer_data->line_no ? "BPF output:" : " "); break; case BINARY_PRINT_ADDR: - printf(" %04x:", val); + printed += fprintf(fp, " %04x:", val); break; case BINARY_PRINT_NUM_DATA: - printf(" %02x", val); + printed += fprintf(fp, " %02x", val); break; case BINARY_PRINT_NUM_PAD: - printf(" "); + printed += fprintf(fp, " "); break; case BINARY_PRINT_SEP: - printf(" "); + printed += fprintf(fp, " "); break; case BINARY_PRINT_CHAR_DATA: if (printer_data->hit_nul && ch) printer_data->is_printable = false; if (!isprint(ch)) { - printf("%c", '.'); + printed += fprintf(fp, "%c", '.'); if (!printer_data->is_printable) break; @@ -1207,20 +1207,22 @@ print_sample_bpf_output_printer(enum binary_printer_ops op, else printer_data->is_printable = false; } else { - printf("%c", ch); + printed += fprintf(fp, "%c", ch); } break; case BINARY_PRINT_CHAR_PAD: - printf(" "); + printed += fprintf(fp, " "); break; case BINARY_PRINT_LINE_END: - printf("\n"); + printed += fprintf(fp, "\n"); printer_data->line_no++; break; case BINARY_PRINT_DATA_END: default: break; } + + return printed; } static void print_sample_bpf_output(struct perf_sample *sample) @@ -1229,7 +1231,7 @@ static void print_sample_bpf_output(struct perf_sample *sample) struct printer_data printer_data = {0, false, true}; print_binary(sample->raw_data, nr_bytes, 8, - print_sample_bpf_output_printer, &printer_data); + sample__fprintf_bpf_output, &printer_data); if (printer_data.is_printable && printer_data.hit_nul) printf("%17s \"%s\"\n", "BPF string:", diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index afef6fe46c45..8b23982dd9f2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1828,16 +1828,14 @@ out_dump: goto out_put; } -static void bpf_output__printer(enum binary_printer_ops op, - unsigned int val, void *extra) +static int bpf_output__printer(enum binary_printer_ops op, + unsigned int val, void *extra __maybe_unused, FILE *fp) { - FILE *output = extra; unsigned char ch = (unsigned char)val; switch (op) { case BINARY_PRINT_CHAR_DATA: - fprintf(output, "%c", isprint(ch) ? ch : '.'); - break; + return fprintf(fp, "%c", isprint(ch) ? ch : '.'); case BINARY_PRINT_DATA_BEGIN: case BINARY_PRINT_LINE_BEGIN: case BINARY_PRINT_ADDR: @@ -1850,13 +1848,15 @@ static void bpf_output__printer(enum binary_printer_ops op, default: break; } + + return 0; } static void bpf_output__fprintf(struct trace *trace, struct perf_sample *sample) { - print_binary(sample->raw_data, sample->raw_size, 8, - bpf_output__printer, trace->output); + binary__fprintf(sample->raw_data, sample->raw_size, 8, + bpf_output__printer, NULL, trace->output); } static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index a5b3777ffee6..cd24ebf0da2f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -111,50 +111,53 @@ int dump_printf(const char *fmt, ...) return ret; } -static void trace_event_printer(enum binary_printer_ops op, - unsigned int val, void *extra) +static int trace_event_printer(enum binary_printer_ops op, + unsigned int val, void *extra, FILE *fp) { const char *color = PERF_COLOR_BLUE; union perf_event *event = (union perf_event *)extra; unsigned char ch = (unsigned char)val; + int printed = 0; switch (op) { case BINARY_PRINT_DATA_BEGIN: - printf("."); - color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", - event->header.size); + printed += fprintf(fp, "."); + printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n", + event->header.size); break; case BINARY_PRINT_LINE_BEGIN: - printf("."); + printed += fprintf(fp, "."); break; case BINARY_PRINT_ADDR: - color_fprintf(stdout, color, " %04x: ", val); + printed += color_fprintf(fp, color, " %04x: ", val); break; case BINARY_PRINT_NUM_DATA: - color_fprintf(stdout, color, " %02x", val); + printed += color_fprintf(fp, color, " %02x", val); break; case BINARY_PRINT_NUM_PAD: - color_fprintf(stdout, color, " "); + printed += color_fprintf(fp, color, " "); break; case BINARY_PRINT_SEP: - color_fprintf(stdout, color, " "); + printed += color_fprintf(fp, color, " "); break; case BINARY_PRINT_CHAR_DATA: - color_fprintf(stdout, color, "%c", + printed += color_fprintf(fp, color, "%c", isprint(ch) ? ch : '.'); break; case BINARY_PRINT_CHAR_PAD: - color_fprintf(stdout, color, " "); + printed += color_fprintf(fp, color, " "); break; case BINARY_PRINT_LINE_END: - color_fprintf(stdout, color, "\n"); + printed += color_fprintf(fp, color, "\n"); break; case BINARY_PRINT_DATA_END: - printf("\n"); + printed += fprintf(fp, "\n"); break; default: break; } + + return printed; } void trace_event(union perf_event *event) diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index e908177b9976..df55ad3b47a0 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -2,40 +2,42 @@ #include #include "sane_ctype.h" -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra) +int binary__fprintf(unsigned char *data, size_t len, + size_t bytes_per_line, binary__fprintf_t printer, + void *extra, FILE *fp) { size_t i, j, mask; + int printed = 0; if (!printer) - return; + return 0; bytes_per_line = roundup_pow_of_two(bytes_per_line); mask = bytes_per_line - 1; - printer(BINARY_PRINT_DATA_BEGIN, 0, extra); + printed += printer(BINARY_PRINT_DATA_BEGIN, 0, extra, fp); for (i = 0; i < len; i++) { if ((i & mask) == 0) { - printer(BINARY_PRINT_LINE_BEGIN, -1, extra); - printer(BINARY_PRINT_ADDR, i, extra); + printed += printer(BINARY_PRINT_LINE_BEGIN, -1, extra, fp); + printed += printer(BINARY_PRINT_ADDR, i, extra, fp); } - printer(BINARY_PRINT_NUM_DATA, data[i], extra); + printed += printer(BINARY_PRINT_NUM_DATA, data[i], extra, fp); if (((i & mask) == mask) || i == len - 1) { for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_NUM_PAD, -1, extra); + printed += printer(BINARY_PRINT_NUM_PAD, -1, extra, fp); - printer(BINARY_PRINT_SEP, i, extra); + printer(BINARY_PRINT_SEP, i, extra, fp); for (j = i & ~mask; j <= i; j++) - printer(BINARY_PRINT_CHAR_DATA, data[j], extra); + printed += printer(BINARY_PRINT_CHAR_DATA, data[j], extra, fp); for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_CHAR_PAD, i, extra); - printer(BINARY_PRINT_LINE_END, -1, extra); + printed += printer(BINARY_PRINT_CHAR_PAD, i, extra, fp); + printed += printer(BINARY_PRINT_LINE_END, -1, extra, fp); } } - printer(BINARY_PRINT_DATA_END, -1, extra); + printed += printer(BINARY_PRINT_DATA_END, -1, extra, fp); + return printed; } int is_printable_array(char *p, unsigned int len) diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h index da0427263d2d..f97918a179db 100644 --- a/tools/perf/util/print_binary.h +++ b/tools/perf/util/print_binary.h @@ -2,6 +2,7 @@ #define PERF_PRINT_BINARY_H #include +#include enum binary_printer_ops { BINARY_PRINT_DATA_BEGIN, @@ -16,12 +17,19 @@ enum binary_printer_ops { BINARY_PRINT_DATA_END, }; -typedef void (*print_binary_t)(enum binary_printer_ops op, - unsigned int val, void *extra); +typedef int (*binary__fprintf_t)(enum binary_printer_ops op, + unsigned int val, void *extra, FILE *fp); -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra); +int binary__fprintf(unsigned char *data, size_t len, + size_t bytes_per_line, binary__fprintf_t printer, + void *extra, FILE *fp); + +static inline void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, binary__fprintf_t printer, + void *extra) +{ + binary__fprintf(data, len, bytes_per_line, printer, extra, stdout); +} int is_printable_array(char *p, unsigned int len); -- cgit v1.2.3 From a1a587073ccdc6ffae414259f65d0a94cadd0a72 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Oct 2017 10:54:24 -0300 Subject: perf script: Use fprintf like printing uniformly We've been mixing print() with fprintf() style printing for a while, but now we need to use fprintf() like syntax uniformly as a preparatory patch for supporting printing to different files, one per event. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: yuzhoujian Link: http://lkml.kernel.org/n/tip-kv5z3v8ptfghbarv3a9usvin@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 491 +++++++++++++++++++++++--------------------- 1 file changed, 259 insertions(+), 232 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5a7dfc5691a1..3e83f4735b21 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -500,69 +500,76 @@ out: return 0; } -static void print_sample_iregs(struct perf_sample *sample, - struct perf_event_attr *attr) +static int perf_sample__fprintf_iregs(struct perf_sample *sample, + struct perf_event_attr *attr, FILE *fp) { struct regs_dump *regs = &sample->intr_regs; uint64_t mask = attr->sample_regs_intr; unsigned i = 0, r; + int printed = 0; if (!regs) - return; + return 0; for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; - printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); + printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); } + + return printed; } -static void print_sample_uregs(struct perf_sample *sample, - struct perf_event_attr *attr) +static int perf_sample__fprintf_uregs(struct perf_sample *sample, + struct perf_event_attr *attr, FILE *fp) { struct regs_dump *regs = &sample->user_regs; uint64_t mask = attr->sample_regs_user; unsigned i = 0, r; + int printed = 0; if (!regs || !regs->regs) - return; + return 0; - printf(" ABI:%" PRIu64 " ", regs->abi); + printed += fprintf(fp, " ABI:%" PRIu64 " ", regs->abi); for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; - printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); + printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); } + + return printed; } -static void print_sample_start(struct perf_sample *sample, - struct thread *thread, - struct perf_evsel *evsel) +static int perf_sample__fprintf_start(struct perf_sample *sample, + struct thread *thread, + struct perf_evsel *evsel, FILE *fp) { struct perf_event_attr *attr = &evsel->attr; unsigned long secs; unsigned long long nsecs; + int printed = 0; if (PRINT_FIELD(COMM)) { if (latency_format) - printf("%8.8s ", thread__comm_str(thread)); + printed += fprintf(fp, "%8.8s ", thread__comm_str(thread)); else if (PRINT_FIELD(IP) && symbol_conf.use_callchain) - printf("%s ", thread__comm_str(thread)); + printed += fprintf(fp, "%s ", thread__comm_str(thread)); else - printf("%16s ", thread__comm_str(thread)); + printed += fprintf(fp, "%16s ", thread__comm_str(thread)); } if (PRINT_FIELD(PID) && PRINT_FIELD(TID)) - printf("%5d/%-5d ", sample->pid, sample->tid); + printed += fprintf(fp, "%5d/%-5d ", sample->pid, sample->tid); else if (PRINT_FIELD(PID)) - printf("%5d ", sample->pid); + printed += fprintf(fp, "%5d ", sample->pid); else if (PRINT_FIELD(TID)) - printf("%5d ", sample->tid); + printed += fprintf(fp, "%5d ", sample->tid); if (PRINT_FIELD(CPU)) { if (latency_format) - printf("%3d ", sample->cpu); + printed += fprintf(fp, "%3d ", sample->cpu); else - printf("[%03d] ", sample->cpu); + printed += fprintf(fp, "[%03d] ", sample->cpu); } if (PRINT_FIELD(TIME)) { @@ -571,13 +578,15 @@ static void print_sample_start(struct perf_sample *sample, nsecs -= secs * NSEC_PER_SEC; if (nanosecs) - printf("%5lu.%09llu: ", secs, nsecs); + printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs); else { char sample_time[32]; timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time)); - printf("%12s: ", sample_time); + printed += fprintf(fp, "%12s: ", sample_time); } } + + return printed; } static inline char @@ -589,16 +598,17 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(struct perf_sample *sample, - struct thread *thread, - struct perf_event_attr *attr) +static int perf_sample__fprintf_brstack(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; u64 i, from, to; + int printed = 0; if (!(br && br->nr)) - return; + return 0; for (i = 0; i < br->nr; i++) { from = br->entries[i].from; @@ -611,38 +621,41 @@ static void print_sample_brstack(struct perf_sample *sample, thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); } - printf(" 0x%"PRIx64, from); + printed += fprintf(fp, " 0x%"PRIx64, from); if (PRINT_FIELD(DSO)) { - printf("("); - map__fprintf_dsoname(alf.map, stdout); - printf(")"); + printed += fprintf(fp, "("); + printed += map__fprintf_dsoname(alf.map, fp); + printed += fprintf(fp, ")"); } - printf("/0x%"PRIx64, to); + printed += fprintf(fp, "/0x%"PRIx64, to); if (PRINT_FIELD(DSO)) { - printf("("); - map__fprintf_dsoname(alt.map, stdout); - printf(")"); + printed += fprintf(fp, "("); + printed += map__fprintf_dsoname(alt.map, fp); + printed += fprintf(fp, ")"); } - printf("/%c/%c/%c/%d ", + printed += fprintf(fp, "/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', br->entries[i].flags.abort? 'A' : '-', br->entries[i].flags.cycles); } + + return printed; } -static void print_sample_brstacksym(struct perf_sample *sample, - struct thread *thread, - struct perf_event_attr *attr) +static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; u64 i, from, to; + int printed = 0; if (!(br && br->nr)) - return; + return 0; for (i = 0; i < br->nr; i++) { @@ -659,37 +672,40 @@ static void print_sample_brstacksym(struct perf_sample *sample, if (alt.map) alt.sym = map__find_symbol(alt.map, alt.addr); - symbol__fprintf_symname_offs(alf.sym, &alf, stdout); + printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp); if (PRINT_FIELD(DSO)) { - printf("("); - map__fprintf_dsoname(alf.map, stdout); - printf(")"); + printed += fprintf(fp, "("); + printed += map__fprintf_dsoname(alf.map, fp); + printed += fprintf(fp, ")"); } - putchar('/'); - symbol__fprintf_symname_offs(alt.sym, &alt, stdout); + printed += fprintf(fp, "%c", '/'); + printed += symbol__fprintf_symname_offs(alt.sym, &alt, fp); if (PRINT_FIELD(DSO)) { - printf("("); - map__fprintf_dsoname(alt.map, stdout); - printf(")"); + printed += fprintf(fp, "("); + printed += map__fprintf_dsoname(alt.map, fp); + printed += fprintf(fp, ")"); } - printf("/%c/%c/%c/%d ", + printed += fprintf(fp, "/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', br->entries[i].flags.abort? 'A' : '-', br->entries[i].flags.cycles); } + + return printed; } -static void print_sample_brstackoff(struct perf_sample *sample, - struct thread *thread, - struct perf_event_attr *attr) +static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; u64 i, from, to; + int printed = 0; if (!(br && br->nr)) - return; + return 0; for (i = 0; i < br->nr; i++) { @@ -706,24 +722,26 @@ static void print_sample_brstackoff(struct perf_sample *sample, if (alt.map && !alt.map->dso->adjust_symbols) to = map__map_ip(alt.map, to); - printf(" 0x%"PRIx64, from); + printed += fprintf(fp, " 0x%"PRIx64, from); if (PRINT_FIELD(DSO)) { - printf("("); - map__fprintf_dsoname(alf.map, stdout); - printf(")"); + printed += fprintf(fp, "("); + printed += map__fprintf_dsoname(alf.map, fp); + printed += fprintf(fp, ")"); } - printf("/0x%"PRIx64, to); + printed += fprintf(fp, "/0x%"PRIx64, to); if (PRINT_FIELD(DSO)) { - printf("("); - map__fprintf_dsoname(alt.map, stdout); - printf(")"); + printed += fprintf(fp, "("); + printed += map__fprintf_dsoname(alt.map, fp); + printed += fprintf(fp, ")"); } - printf("/%c/%c/%c/%d ", + printed += fprintf(fp, "/%c/%c/%c/%d ", mispred_str(br->entries + i), br->entries[i].flags.in_tx ? 'X' : '-', br->entries[i].flags.abort ? 'A' : '-', br->entries[i].flags.cycles); } + + return printed; } #define MAXBB 16384UL @@ -789,31 +807,30 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, return len; } -static void print_jump(uint64_t ip, struct branch_entry *en, - struct perf_insn *x, u8 *inbuf, int len, - int insn) +static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, + struct perf_insn *x, u8 *inbuf, int len, + int insn, FILE *fp) { - printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", - ip, - dump_insn(x, ip, inbuf, len, NULL), - en->flags.predicted ? " PRED" : "", - en->flags.mispred ? " MISPRED" : "", - en->flags.in_tx ? " INTX" : "", - en->flags.abort ? " ABORT" : ""); + int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip, + dump_insn(x, ip, inbuf, len, NULL), + en->flags.predicted ? " PRED" : "", + en->flags.mispred ? " MISPRED" : "", + en->flags.in_tx ? " INTX" : "", + en->flags.abort ? " ABORT" : ""); if (en->flags.cycles) { - printf(" %d cycles", en->flags.cycles); + printed += fprintf(fp, " %d cycles", en->flags.cycles); if (insn) - printf(" %.2f IPC", (float)insn / en->flags.cycles); + printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles); } - putchar('\n'); + return printed + fprintf(fp, "\n"); } -static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, - uint64_t addr, struct symbol **lastsym, - struct perf_event_attr *attr) +static int ip__fprintf_sym(uint64_t addr, struct thread *thread, + u8 cpumode, int cpu, struct symbol **lastsym, + struct perf_event_attr *attr, FILE *fp) { struct addr_location al; - int off; + int off, printed = 0; memset(&al, 0, sizeof(al)); @@ -822,7 +839,7 @@ static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, thread__find_addr_map(thread, cpumode, MAP__VARIABLE, addr, &al); if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) - return; + return 0; al.cpu = cpu; al.sym = NULL; @@ -830,37 +847,39 @@ static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, al.sym = map__find_symbol(al.map, al.addr); if (!al.sym) - return; + return 0; if (al.addr < al.sym->end) off = al.addr - al.sym->start; else off = al.addr - al.map->start - al.sym->start; - printf("\t%s", al.sym->name); + printed += fprintf(fp, "\t%s", al.sym->name); if (off) - printf("%+d", off); - putchar(':'); + printed += fprintf(fp, "%+d", off); + printed += fprintf(fp, ":"); if (PRINT_FIELD(SRCLINE)) - map__fprintf_srcline(al.map, al.addr, "\t", stdout); - putchar('\n'); + printed += map__fprintf_srcline(al.map, al.addr, "\t", fp); + printed += fprintf(fp, "\n"); *lastsym = al.sym; + + return printed; } -static void print_sample_brstackinsn(struct perf_sample *sample, - struct thread *thread, - struct perf_event_attr *attr, - struct machine *machine) +static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, + struct machine *machine, FILE *fp) { struct branch_stack *br = sample->branch_stack; u64 start, end; - int i, insn, len, nr, ilen; + int i, insn, len, nr, ilen, printed = 0; struct perf_insn x; u8 buffer[MAXBB]; unsigned off; struct symbol *lastsym = NULL; if (!(br && br->nr)) - return; + return 0; nr = br->nr; if (max_blocks && nr > max_blocks + 1) nr = max_blocks + 1; @@ -868,17 +887,17 @@ static void print_sample_brstackinsn(struct perf_sample *sample, x.thread = thread; x.cpu = sample->cpu; - putchar('\n'); + printed += fprintf(fp, "%c", '\n'); /* Handle first from jump, of which we don't know the entry. */ len = grab_bb(buffer, br->entries[nr-1].from, br->entries[nr-1].from, machine, thread, &x.is64bit, &x.cpumode, false); if (len > 0) { - print_ip_sym(thread, x.cpumode, x.cpu, - br->entries[nr - 1].from, &lastsym, attr); - print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], - &x, buffer, len, 0); + printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, + x.cpumode, x.cpu, &lastsym, attr, fp); + printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + &x, buffer, len, 0, fp); } /* Print all blocks */ @@ -904,13 +923,13 @@ static void print_sample_brstackinsn(struct perf_sample *sample, for (off = 0;; off += ilen) { uint64_t ip = start + off; - print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); + printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); + printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp); break; } else { - printf("\t%016" PRIx64 "\t%s\n", ip, - dump_insn(&x, ip, buffer + off, len - off, &ilen)); + printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip, + dump_insn(&x, ip, buffer + off, len - off, &ilen)); if (ilen == 0) break; insn++; @@ -923,9 +942,9 @@ static void print_sample_brstackinsn(struct perf_sample *sample, * has not been executed yet. */ if (br->entries[0].from == sample->ip) - return; + goto out; if (br->entries[0].flags.abort) - return; + goto out; /* * Print final block upto sample @@ -933,58 +952,61 @@ static void print_sample_brstackinsn(struct perf_sample *sample, start = br->entries[0].to; end = sample->ip; len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); - print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); + printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (len <= 0) { /* Print at least last IP if basic block did not work */ len = grab_bb(buffer, sample->ip, sample->ip, machine, thread, &x.is64bit, &x.cpumode, false); if (len <= 0) - return; + goto out; - printf("\t%016" PRIx64 "\t%s\n", sample->ip, + printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip, dump_insn(&x, sample->ip, buffer, len, NULL)); - return; + goto out; } for (off = 0; off <= end - start; off += ilen) { - printf("\t%016" PRIx64 "\t%s\n", start + off, - dump_insn(&x, start + off, buffer + off, len - off, &ilen)); + printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off, + dump_insn(&x, start + off, buffer + off, len - off, &ilen)); if (ilen == 0) break; } +out: + return printed; } -static void print_sample_addr(struct perf_sample *sample, - struct thread *thread, - struct perf_event_attr *attr) +static int perf_sample__fprintf_addr(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, FILE *fp) { struct addr_location al; - - printf("%16" PRIx64, sample->addr); + int printed = fprintf(fp, "%16" PRIx64, sample->addr); if (!sample_addr_correlates_sym(attr)) - return; + goto out; thread__resolve(thread, &al, sample); if (PRINT_FIELD(SYM)) { - printf(" "); + printed += fprintf(fp, " "); if (PRINT_FIELD(SYMOFFSET)) - symbol__fprintf_symname_offs(al.sym, &al, stdout); + printed += symbol__fprintf_symname_offs(al.sym, &al, fp); else - symbol__fprintf_symname(al.sym, stdout); + printed += symbol__fprintf_symname(al.sym, fp); } if (PRINT_FIELD(DSO)) { - printf(" ("); - map__fprintf_dsoname(al.map, stdout); - printf(")"); + printed += fprintf(fp, " ("); + printed += map__fprintf_dsoname(al.map, fp); + printed += fprintf(fp, ")"); } +out: + return printed; } -static void print_sample_callindent(struct perf_sample *sample, - struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al) +static int perf_sample__fprintf_callindent(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al, FILE *fp) { struct perf_event_attr *attr = &evsel->attr; size_t depth = thread_stack__depth(thread); @@ -1019,12 +1041,12 @@ static void print_sample_callindent(struct perf_sample *sample, } if (name) - len = printf("%*s%s", (int)depth * 4, "", name); + len = fprintf(fp, "%*s%s", (int)depth * 4, "", name); else if (ip) - len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip); + len = fprintf(fp, "%*s%16" PRIx64, (int)depth * 4, "", ip); if (len < 0) - return; + return len; /* * Try to keep the output length from changing frequently so that the @@ -1034,39 +1056,46 @@ static void print_sample_callindent(struct perf_sample *sample, spacing = round_up(len + 4, 32); if (len < spacing) - printf("%*s", spacing - len, ""); + len += fprintf(fp, "%*s", spacing - len, ""); + + return len; } -static void print_insn(struct perf_sample *sample, - struct perf_event_attr *attr, - struct thread *thread, - struct machine *machine) +static int perf_sample__fprintf_insn(struct perf_sample *sample, + struct perf_event_attr *attr, + struct thread *thread, + struct machine *machine, FILE *fp) { + int printed = 0; + if (PRINT_FIELD(INSNLEN)) - printf(" ilen: %d", sample->insn_len); + printed += fprintf(fp, " ilen: %d", sample->insn_len); if (PRINT_FIELD(INSN)) { int i; - printf(" insn:"); + printed += fprintf(fp, " insn:"); for (i = 0; i < sample->insn_len; i++) - printf(" %02x", (unsigned char)sample->insn[i]); + printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]); } if (PRINT_FIELD(BRSTACKINSN)) - print_sample_brstackinsn(sample, thread, attr, machine); + printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); + + return printed; } -static void print_sample_bts(struct perf_sample *sample, - struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al, - struct machine *machine) +static int perf_sample__fprintf_bts(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al, + struct machine *machine, FILE *fp) { struct perf_event_attr *attr = &evsel->attr; unsigned int type = output_type(attr->type); bool print_srcline_last = false; + int printed = 0; if (PRINT_FIELD(CALLINDENT)) - print_sample_callindent(sample, evsel, thread, al); + printed += perf_sample__fprintf_callindent(sample, evsel, thread, al, fp); /* print branch_from information */ if (PRINT_FIELD(IP)) { @@ -1079,31 +1108,30 @@ static void print_sample_bts(struct perf_sample *sample, cursor = &callchain_cursor; if (cursor == NULL) { - putchar(' '); + printed += fprintf(fp, " "); if (print_opts & EVSEL__PRINT_SRCLINE) { print_srcline_last = true; print_opts &= ~EVSEL__PRINT_SRCLINE; } } else - putchar('\n'); + printed += fprintf(fp, "\n"); - sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout); + printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp); } /* print branch_to information */ if (PRINT_FIELD(ADDR) || ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && !output[type].user_set)) { - printf(" => "); - print_sample_addr(sample, thread, attr); + printed += fprintf(fp, " => "); + printed += perf_sample__fprintf_addr(sample, thread, attr, fp); } if (print_srcline_last) - map__fprintf_srcline(al->map, al->addr, "\n ", stdout); - - print_insn(sample, attr, thread, machine); + printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); - printf("\n"); + printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp); + return printed + fprintf(fp, "\n"); } static struct { @@ -1126,7 +1154,7 @@ static struct { {0, NULL} }; -static void print_sample_flags(u32 flags) +static int perf_sample__fprintf_flags(u32 flags, FILE *fp) { const char *chars = PERF_IP_FLAG_CHARS; const int n = strlen(PERF_IP_FLAG_CHARS); @@ -1153,9 +1181,9 @@ static void print_sample_flags(u32 flags) str[pos] = 0; if (name) - printf(" %-7s%4s ", name, in_tx ? "(x)" : ""); - else - printf(" %-11s ", str); + return fprintf(fp, " %-7s%4s ", name, in_tx ? "(x)" : ""); + + return fprintf(fp, " %-11s ", str); } struct printer_data { @@ -1225,138 +1253,136 @@ static int sample__fprintf_bpf_output(enum binary_printer_ops op, return printed; } -static void print_sample_bpf_output(struct perf_sample *sample) +static int perf_sample__fprintf_bpf_output(struct perf_sample *sample, FILE *fp) { unsigned int nr_bytes = sample->raw_size; struct printer_data printer_data = {0, false, true}; - - print_binary(sample->raw_data, nr_bytes, 8, - sample__fprintf_bpf_output, &printer_data); + int printed = binary__fprintf(sample->raw_data, nr_bytes, 8, + sample__fprintf_bpf_output, &printer_data, fp); if (printer_data.is_printable && printer_data.hit_nul) - printf("%17s \"%s\"\n", "BPF string:", - (char *)(sample->raw_data)); + printed += fprintf(fp, "%17s \"%s\"\n", "BPF string:", (char *)(sample->raw_data)); + + return printed; } -static void print_sample_spacing(int len, int spacing) +static int perf_sample__fprintf_spacing(int len, int spacing, FILE *fp) { if (len > 0 && len < spacing) - printf("%*s", spacing - len, ""); + return fprintf(fp, "%*s", spacing - len, ""); + + return 0; } -static void print_sample_pt_spacing(int len) +static int perf_sample__fprintf_pt_spacing(int len, FILE *fp) { - print_sample_spacing(len, 34); + return perf_sample__fprintf_spacing(len, 34, fp); } -static void print_sample_synth_ptwrite(struct perf_sample *sample) +static int perf_sample__fprintf_synth_ptwrite(struct perf_sample *sample, FILE *fp) { struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample); int len; if (perf_sample__bad_synth_size(sample, *data)) - return; + return 0; - len = printf(" IP: %u payload: %#" PRIx64 " ", + len = fprintf(fp, " IP: %u payload: %#" PRIx64 " ", data->ip, le64_to_cpu(data->payload)); - print_sample_pt_spacing(len); + return len + perf_sample__fprintf_pt_spacing(len, fp); } -static void print_sample_synth_mwait(struct perf_sample *sample) +static int perf_sample__fprintf_synth_mwait(struct perf_sample *sample, FILE *fp) { struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample); int len; if (perf_sample__bad_synth_size(sample, *data)) - return; + return 0; - len = printf(" hints: %#x extensions: %#x ", - data->hints, data->extensions); - print_sample_pt_spacing(len); + len = fprintf(fp, " hints: %#x extensions: %#x ", + data->hints, data->extensions); + return len + perf_sample__fprintf_pt_spacing(len, fp); } -static void print_sample_synth_pwre(struct perf_sample *sample) +static int perf_sample__fprintf_synth_pwre(struct perf_sample *sample, FILE *fp) { struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample); int len; if (perf_sample__bad_synth_size(sample, *data)) - return; + return 0; - len = printf(" hw: %u cstate: %u sub-cstate: %u ", - data->hw, data->cstate, data->subcstate); - print_sample_pt_spacing(len); + len = fprintf(fp, " hw: %u cstate: %u sub-cstate: %u ", + data->hw, data->cstate, data->subcstate); + return len + perf_sample__fprintf_pt_spacing(len, fp); } -static void print_sample_synth_exstop(struct perf_sample *sample) +static int perf_sample__fprintf_synth_exstop(struct perf_sample *sample, FILE *fp) { struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample); int len; if (perf_sample__bad_synth_size(sample, *data)) - return; + return 0; - len = printf(" IP: %u ", data->ip); - print_sample_pt_spacing(len); + len = fprintf(fp, " IP: %u ", data->ip); + return len + perf_sample__fprintf_pt_spacing(len, fp); } -static void print_sample_synth_pwrx(struct perf_sample *sample) +static int perf_sample__fprintf_synth_pwrx(struct perf_sample *sample, FILE *fp) { struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample); int len; if (perf_sample__bad_synth_size(sample, *data)) - return; + return 0; - len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ", + len = fprintf(fp, " deepest cstate: %u last cstate: %u wake reason: %#x ", data->deepest_cstate, data->last_cstate, data->wake_reason); - print_sample_pt_spacing(len); + return len + perf_sample__fprintf_pt_spacing(len, fp); } -static void print_sample_synth_cbr(struct perf_sample *sample) +static int perf_sample__fprintf_synth_cbr(struct perf_sample *sample, FILE *fp) { struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample); unsigned int percent, freq; int len; if (perf_sample__bad_synth_size(sample, *data)) - return; + return 0; freq = (le32_to_cpu(data->freq) + 500) / 1000; - len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq); + len = fprintf(fp, " cbr: %2u freq: %4u MHz ", data->cbr, freq); if (data->max_nonturbo) { percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10; - len += printf("(%3u%%) ", percent); + len += fprintf(fp, "(%3u%%) ", percent); } - print_sample_pt_spacing(len); + return len + perf_sample__fprintf_pt_spacing(len, fp); } -static void print_sample_synth(struct perf_sample *sample, - struct perf_evsel *evsel) +static int perf_sample__fprintf_synth(struct perf_sample *sample, + struct perf_evsel *evsel, FILE *fp) { switch (evsel->attr.config) { case PERF_SYNTH_INTEL_PTWRITE: - print_sample_synth_ptwrite(sample); - break; + return perf_sample__fprintf_synth_ptwrite(sample, fp); case PERF_SYNTH_INTEL_MWAIT: - print_sample_synth_mwait(sample); - break; + return perf_sample__fprintf_synth_mwait(sample, fp); case PERF_SYNTH_INTEL_PWRE: - print_sample_synth_pwre(sample); - break; + return perf_sample__fprintf_synth_pwre(sample, fp); case PERF_SYNTH_INTEL_EXSTOP: - print_sample_synth_exstop(sample); - break; + return perf_sample__fprintf_synth_exstop(sample, fp); case PERF_SYNTH_INTEL_PWRX: - print_sample_synth_pwrx(sample); - break; + return perf_sample__fprintf_synth_pwrx(sample, fp); case PERF_SYNTH_INTEL_CBR: - print_sample_synth_cbr(sample); - break; + return perf_sample__fprintf_synth_cbr(sample, fp); default: break; } + + return 0; } struct perf_script { @@ -1388,7 +1414,7 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist) return max; } -static size_t data_src__printf(u64 data_src) +static int data_src__fprintf(u64 data_src, FILE *fp) { struct mem_info mi = { .data_src.val = data_src }; char decode[100]; @@ -1402,7 +1428,7 @@ static size_t data_src__printf(u64 data_src) if (maxlen < len) maxlen = len; - return printf("%-*s", maxlen, out); + return fprintf(fp, "%-*s", maxlen, out); } static void process_event(struct perf_script *script, @@ -1413,11 +1439,12 @@ static void process_event(struct perf_script *script, struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; unsigned int type = output_type(attr->type); + FILE *fp = stdout; if (output[type].fields == 0) return; - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, fp); if (PRINT_FIELD(PERIOD)) printf("%10" PRIu64 " ", sample->period); @@ -1433,10 +1460,10 @@ static void process_event(struct perf_script *script, } if (print_flags) - print_sample_flags(sample->flags); + perf_sample__fprintf_flags(sample->flags, fp); if (is_bts_event(attr)) { - print_sample_bts(sample, evsel, thread, al, machine); + perf_sample__fprintf_bts(sample, evsel, thread, al, machine, fp); return; } @@ -1445,16 +1472,16 @@ static void process_event(struct perf_script *script, sample->raw_data, sample->raw_size); if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH)) - print_sample_synth(sample, evsel); + perf_sample__fprintf_synth(sample, evsel, fp); if (PRINT_FIELD(ADDR)) - print_sample_addr(sample, thread, attr); + perf_sample__fprintf_addr(sample, thread, attr, fp); if (PRINT_FIELD(DATA_SRC)) - data_src__printf(sample->data_src); + data_src__fprintf(sample->data_src, fp); if (PRINT_FIELD(WEIGHT)) - printf("%16" PRIu64, sample->weight); + fprintf(fp, "%16" PRIu64, sample->weight); if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; @@ -1464,26 +1491,26 @@ static void process_event(struct perf_script *script, sample, NULL, NULL, scripting_max_stack) == 0) cursor = &callchain_cursor; - putchar(cursor ? '\n' : ' '); - sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, stdout); + fputc(cursor ? '\n' : ' ', fp); + sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp); } if (PRINT_FIELD(IREGS)) - print_sample_iregs(sample, attr); + perf_sample__fprintf_iregs(sample, attr, fp); if (PRINT_FIELD(UREGS)) - print_sample_uregs(sample, attr); + perf_sample__fprintf_uregs(sample, attr, fp); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(sample, thread, attr); + perf_sample__fprintf_brstack(sample, thread, attr, fp); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(sample, thread, attr); + perf_sample__fprintf_brstacksym(sample, thread, attr, fp); else if (PRINT_FIELD(BRSTACKOFF)) - print_sample_brstackoff(sample, thread, attr); + perf_sample__fprintf_brstackoff(sample, thread, attr, fp); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) - print_sample_bpf_output(sample); - print_insn(sample, attr, thread, machine); + perf_sample__fprintf_bpf_output(sample, fp); + perf_sample__fprintf_insn(sample, attr, thread, machine, fp); if (PRINT_FIELD(PHYS_ADDR)) printf("%16" PRIx64, sample->phys_addr); @@ -1661,7 +1688,7 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1696,7 +1723,7 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1729,7 +1756,7 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, stdout); perf_event__fprintf(event, stdout); thread__put(thread); @@ -1758,7 +1785,7 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, stdout); perf_event__fprintf(event, stdout); if (perf_event__process_exit(tool, event, sample, machine) < 0) @@ -1793,7 +1820,7 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -1824,7 +1851,7 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -1850,7 +1877,7 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - print_sample_start(sample, thread, evsel); + perf_sample__fprintf_start(sample, thread, evsel, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; -- cgit v1.2.3 From db49bc155ad9f04ea3c4e1c9ae87850610feb1ce Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 16 Sep 2017 08:25:37 +0200 Subject: perf script: Fix error handling path If the string passed in '--time' is invalid, or if failed to set libtraceevent function resolver, we must do some cleanup before leaving. As in the other error handling paths of this function. Signed-off-by: Christophe JAILLET Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20170916062537.28921-1-christophe.jaillet@wanadoo.fr Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3e83f4735b21..a3add2cd7856 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3074,7 +3074,8 @@ int cmd_script(int argc, const char **argv) machine__resolve_kernel_addr, &session->machines.host) < 0) { pr_err("%s: failed to set libtraceevent function resolver\n", __func__); - return -1; + err = -1; + goto out_delete; } if (generate_script_lang) { @@ -3134,7 +3135,8 @@ int cmd_script(int argc, const char **argv) /* needs to be parsed after looking up reference time */ if (perf_time__parse_str(&script.ptime, script.time_str) != 0) { pr_err("Invalid time string\n"); - return -EINVAL; + err = -EINVAL; + goto out_delete; } err = __cmd_script(&script); -- cgit v1.2.3 From 79f56ebe2ae33aa54c69bbc9854a9a31f622913e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 16 Sep 2017 08:09:36 +0200 Subject: perf kmem: Perform some cleanup if '--time' is given an invalid value If the string passed in '--time' is invalid, we must do some cleanup before leaving. As in the other error handling paths of this function. Signed-off-by: Christophe JAILLET Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Fixes: 2a865bd8dddd ("perf kmem: Add option to specify time window of interest") Link: http://lkml.kernel.org/r/20170916060936.28199-1-christophe.jaillet@wanadoo.fr Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 24ee68ecdd42..d8f25ef8157b 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1983,7 +1983,8 @@ int cmd_kmem(int argc, const char **argv) if (perf_time__parse_str(&ptime, time_str) != 0) { pr_err("Invalid time string\n"); - return -EINVAL; + ret = -EINVAL; + goto out_delete; } if (!strcmp(argv[0], "stat")) { -- cgit v1.2.3 From b1f03ca4ee784390457dfb2009601cd7dcac025e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Oct 2017 15:11:17 -0300 Subject: perf namespaces: Add more appropriate set of headers We don't need perf.h, that is a kitchen sink, all we need is perf_events.h for perf_ns_link_info, sys/types.h for pid_t and linux/types.h for u64, list_head. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Li Zhijian Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-f2uxyaj4s2hmntkrezpa6dqz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/namespaces.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 05d82601c9a6..760558dcfd18 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -9,9 +9,10 @@ #ifndef __PERF_NAMESPACES_H #define __PERF_NAMESPACES_H -#include "../perf.h" -#include +#include +#include #include +#include struct namespaces_event; -- cgit v1.2.3 From 65db92e0965ab56e8031d5c804f26d5be0e47047 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 18 Oct 2017 06:05:07 -0700 Subject: perf vendor events: Add Goldmont Plus V1 event file Add a Intel event file for perf. Signed-off-by: Kan Liang Acked-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1508331907-395162-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/goldmontplus/cache.json | 1453 ++++++++++++++++++++ .../pmu-events/arch/x86/goldmontplus/frontend.json | 62 + .../pmu-events/arch/x86/goldmontplus/memory.json | 38 + .../pmu-events/arch/x86/goldmontplus/other.json | 98 ++ .../pmu-events/arch/x86/goldmontplus/pipeline.json | 544 ++++++++ .../arch/x86/goldmontplus/virtual-memory.json | 218 +++ tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 7 files changed, 2414 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/goldmontplus/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json create mode 100644 tools/perf/pmu-events/arch/x86/goldmontplus/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/goldmontplus/other.json create mode 100644 tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json create mode 100644 tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json new file mode 100644 index 000000000000..b4791b443a66 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json @@ -0,0 +1,1453 @@ +[ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.", + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x41", + "PEBScounters": "0,1,2,3", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache request misses" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.", + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x4f", + "PEBScounters": "0,1,2,3", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache requests" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.", + "EventCode": "0x30", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "L2_REJECT_XQ.ALL", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Requests rejected by the XQ" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to insure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.", + "EventCode": "0x31", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "CORE_REJECT_L2Q.ALL", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Requests rejected by the L2Q" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.", + "EventCode": "0x51", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "DL1.REPLACEMENT", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "L1 Cache evictions for dirty data" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.", + "EventCode": "0x86", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss." + }, + { + "CollectPEBSRecord": "1", + "EventCode": "0xB7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "PDIR_COUNTER": "na", + "SampleAfterValue": "100007", + "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts locked memory uops retired. This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x21", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", + "SampleAfterValue": "200003", + "BriefDescription": "Locked load uops retired (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x41", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x42", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", + "SampleAfterValue": "200003", + "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x43", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.SPLIT", + "SampleAfterValue": "200003", + "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts the number of load uops retired.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x81", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops retired (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts the number of store uops retired.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x82", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "SampleAfterValue": "200003", + "BriefDescription": "Store uops retired (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x83", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.ALL", + "SampleAfterValue": "200003", + "BriefDescription": "Memory uops retired (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts load uops retired that hit the L1 data cache.", + "EventCode": "0xD1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts load uops retired that hit in the L2 cache.", + "EventCode": "0xD1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops retired that hit L2 (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts load uops retired that miss the L1 data cache.", + "EventCode": "0xD1", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts load uops retired that miss in the L2 cache.", + "EventCode": "0xD1", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops retired that missed L2 (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts load uops retired where the cache line containing the data was in the modified state of another core or modules cache (HITM). More specifically, this means that when the load address was checked by other caching agents (typically another processor) in the system, one of those caching agents indicated that they had a dirty copy of the data. Loads that obtain a HITM response incur greater latency than most is typical for a load. In addition, since HITM indicates that some other processor had this data in its cache, it implies that the data was shared between processors, or potentially was a lock or semaphore value. This event is useful for locating sharing, false sharing, and contended locks.", + "EventCode": "0xD1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_RETIRED.HITM", + "SampleAfterValue": "200003", + "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts memory load uops retired where the data is retrieved from the WCB (or fill buffer), indicating that the load found its data while that data was in the process of being brought into the L1 cache. Typically a load will receive this indication when some other load or prefetch missed the L1 cache and was in the process of retrieving the cache line containing the data, but that process had not yet finished (and written the data back to the cache). For example, consider load X and Y, both referencing the same cache line that is not in the L1 cache. If load X misses cache first, it obtains and WCB (or fill buffer) and begins the process of requesting the data. When load Y requests the data, it will either hit the WCB, or the L1 cache, depending on exactly what time the request to Y occurs.", + "EventCode": "0xD1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Loads retired that hit WCB (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts memory load uops retired where the data is retrieved from DRAM. Event is counted at retirement, so the speculative loads are ignored. A memory load can hit (or miss) the L1 cache, hit (or miss) the L2 cache, hit DRAM, hit in the WCB or receive a HITM response.", + "EventCode": "0xD1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Loads retired that came from DRAM (Precise event capable)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010020", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040020", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000020", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000020", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000020", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts bus lock and split lock requests hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000011000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000041000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200001000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000001000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000001000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000012000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000042000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200002000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000002000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000002000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000014800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000044800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200004800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000004800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000004800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000018000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000013010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000043010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200003010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000003010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000003010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000013091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000043091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200003091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000003091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000003091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010022", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040022", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000022", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000022", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000022", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x00000132b7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x00000432b7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x02000032b7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x10000032b7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x40000032b7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING", + "PDIR_COUNTER": "na", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json b/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json new file mode 100644 index 000000000000..a7878965ceab --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json @@ -0,0 +1,62 @@ +[ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is in the ICache (hit). The event strives to count on a cache line basis, so that multiple accesses which hit in a single cache line count as one ICACHE.HIT. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.", + "EventCode": "0x80", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "ICACHE.HIT", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is not in the ICache (miss). The event strives to count on a cache line basis, so that multiple accesses which miss in a single cache line count as one ICACHE.MISS. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is not in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.", + "EventCode": "0x80", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "ICACHE.MISSES", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line. The event strives to count on a cache line basis, so that multiple fetches to a single cache line count as one ICACHE.ACCESS. Specifically, the event counts when accesses from straight line code crosses the cache line boundary, or when a branch target is to a new line.\r\nThis event counts differently than Intel processors based on Silvermont microarchitecture.", + "EventCode": "0x80", + "Counter": "0,1,2,3", + "UMask": "0x3", + "PEBScounters": "0,1,2,3", + "EventName": "ICACHE.ACCESSES", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times the Microcode Sequencer (MS) starts a flow of uops from the MSROM. It does not count every time a uop is read from the MSROM. The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine. Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort that initiates a flow of uops. The event will count MS startups for uops that are speculative, and subsequently cleared by branch mispredict or a machine clear.", + "EventCode": "0xE7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "MS_DECODED.MS_ENTRY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "MS decode starts" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times the prediction (from the predecode cache) for instruction length is incorrect.", + "EventCode": "0xE9", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Decode restrictions due to predicting wrong instruction length" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json new file mode 100644 index 000000000000..91e0815f3ffb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json @@ -0,0 +1,38 @@ +[ + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts when a memory load of a uop spans a page boundary (a split) is retired.", + "EventCode": "0x13", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops that split a page (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts when a memory store of a uop spans a page boundary (a split) is retired.", + "EventCode": "0x13", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT", + "SampleAfterValue": "200003", + "BriefDescription": "Store uops that split a page (Precise event capable)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved - as another core is in the process of modifying the data.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PDIR_COUNTER": "na", + "SampleAfterValue": "20003", + "BriefDescription": "Machine clears due to memory ordering issue" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/other.json b/tools/perf/pmu-events/arch/x86/goldmontplus/other.json new file mode 100644 index 000000000000..b860374418ab --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/other.json @@ -0,0 +1,98 @@ +[ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.", + "EventCode": "0x86", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "FETCH_STALL.ALL", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles code-fetch stalled due to any reason." + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.", + "EventCode": "0x86", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles the code-fetch stalls and an ITLB miss is outstanding." + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend due to either a full resource in the backend (RESOURCE_FULL) or due to the processor recovering from some event (RECOVERY).", + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "ISSUE_SLOTS_NOT_CONSUMED.ANY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Unfilled issue slots per cycle" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.", + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.", + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Unfilled issue slots per cycle to recover" + }, + { + "CollectPEBSRecord": "2", + "PublicDescription": "Counts hardware interrupts received by the processor.", + "EventCode": "0xCB", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "HW_INTERRUPTS.RECEIVED", + "PDIR_COUNTER": "na", + "SampleAfterValue": "203", + "BriefDescription": "Hardware interrupts received" + }, + { + "CollectPEBSRecord": "2", + "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.", + "EventCode": "0xCB", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "HW_INTERRUPTS.MASKED", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles hardware interrupts are masked" + }, + { + "CollectPEBSRecord": "2", + "PublicDescription": "Counts core cycles during which there are pending interrupts, but interrupts are masked (EFLAGS.IF = 0).", + "EventCode": "0xCB", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles pending interrupts are masked" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json new file mode 100644 index 000000000000..ccf1aed69197 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json @@ -0,0 +1,544 @@ +[ + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 0", + "UMask": "0x1", + "PEBScounters": "32", + "EventName": "INST_RETIRED.ANY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Instructions retired (Fixed event)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "PEBScounters": "33", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when core is not halted (Fixed event)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 2", + "UMask": "0x3", + "PEBScounters": "34", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when core is not halted (Fixed event)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.", + "EventCode": "0x03", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "SampleAfterValue": "200003", + "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.", + "EventCode": "0x03", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "SampleAfterValue": "200003", + "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.", + "EventCode": "0x03", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "LD_BLOCKS.4K_ALIAS", + "SampleAfterValue": "200003", + "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).", + "EventCode": "0x03", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "LD_BLOCKS.UTLB_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts anytime a load that retires is blocked for any reason.", + "EventCode": "0x03", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "LD_BLOCKS.ALL_BLOCK", + "SampleAfterValue": "200003", + "BriefDescription": "Loads blocked (Precise event capable)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.", + "EventCode": "0x0E", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "UOPS_ISSUED.ANY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Uops issued to the back end per cycle" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when core is not halted" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Reference cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when core is not halted" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.", + "EventCode": "0x9C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "UOPS_NOT_DELIVERED.ANY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Uops requested but not-delivered to the back-end per cycle" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.", + "EventCode": "0xC0", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Instructions retired (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts INST_RETIRED.ANY using the Reduced Skid PEBS feature that reduces the shadow in which events aren't counted allowing for a more unbiased distribution of samples across instructions retired.", + "EventCode": "0xC0", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "INST_RETIRED.PREC_DIST", + "SampleAfterValue": "2000003", + "BriefDescription": "Instructions retired - using Reduced Skid PEBS feature" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts uops which retired.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "UOPS_RETIRED.ANY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops retired (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts uops retired that are from the complex flows issued by the micro-sequencer (MS). Counts both the uops from a micro-coded instruction, and the uops that might be generated from a micro-coded assist.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "UOPS_RETIRED.MS", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "MS uops retired (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of floating point divide uops retired.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "UOPS_RETIRED.FPDIV", + "SampleAfterValue": "2000003", + "BriefDescription": "Floating point divide uops retired (Precise Event Capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "UOPS_RETIRED.IDIV", + "SampleAfterValue": "2000003", + "BriefDescription": "Integer divide uops retired (Precise Event Capable)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts machine clears for any reason.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "MACHINE_CLEARS.ALL", + "PDIR_COUNTER": "na", + "SampleAfterValue": "20003", + "BriefDescription": "All machine clears" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "MACHINE_CLEARS.SMC", + "PDIR_COUNTER": "na", + "SampleAfterValue": "20003", + "BriefDescription": "Self-Modifying Code detected" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts machine clears due to floating point (FP) operations needing assists. For instance, if the result was a floating point denormal, the hardware clears the pipeline and reissues uops to produce the correct IEEE compliant denormal result.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "MACHINE_CLEARS.FP_ASSIST", + "PDIR_COUNTER": "na", + "SampleAfterValue": "20003", + "BriefDescription": "Machine clears due to FP assists" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts machine clears due to memory disambiguation. Memory disambiguation happens when a load which has been issued conflicts with a previous unretired store in the pipeline whose address was not known at issue time, but is later resolved to be the same as the load address.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "MACHINE_CLEARS.DISAMBIGUATION", + "PDIR_COUNTER": "na", + "SampleAfterValue": "20003", + "BriefDescription": "Machine clears due to memory disambiguation" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times that the machines clears due to a page fault. Covers both I-side and D-side(Loads/Stores) page faults. A page fault occurs when either page is not present, or an access violation", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x20", + "PEBScounters": "0,1,2,3", + "EventName": "MACHINE_CLEARS.PAGE_FAULT", + "PDIR_COUNTER": "na", + "SampleAfterValue": "20003", + "BriefDescription": "Machines clear due to a page fault" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "200003", + "BriefDescription": "Retired branch instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x7e", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired conditional branch instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts the number of taken branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x80", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES", + "SampleAfterValue": "200003", + "BriefDescription": "Retired taken branch instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xbf", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "SampleAfterValue": "200003", + "BriefDescription": "Retired far branch instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xeb", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "SampleAfterValue": "200003", + "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near return branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xf7", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.RETURN", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near return instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near CALL branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xf9", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near call instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near indirect CALL branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfb", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.IND_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near indirect call instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near relative CALL branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfd", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.REL_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near relative call instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfe", + "PEBScounters": "0,1,2,3", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x7e", + "PEBScounters": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0xeb", + "PEBScounters": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0xf7", + "PEBScounters": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.RETURN", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0xfb", + "PEBScounters": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.IND_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0xfe", + "PEBScounters": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles if either divide unit is busy.", + "EventCode": "0xCD", + "Counter": "0,1,2,3", + "UMask": "0x0", + "PEBScounters": "0,1,2,3", + "EventName": "CYCLES_DIV_BUSY.ALL", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a divider is busy" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles the integer divide unit is busy.", + "EventCode": "0xCD", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "CYCLES_DIV_BUSY.IDIV", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles the integer divide unit is busy" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles the floating point divide unit is busy.", + "EventCode": "0xCD", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "CYCLES_DIV_BUSY.FPDIV", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles the FP divide unit is busy" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.", + "EventCode": "0xE6", + "Counter": "0,1,2,3", + "UMask": "0x1", + "PEBScounters": "0,1,2,3", + "EventName": "BACLEARS.ALL", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "BACLEARs asserted for any branch type" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts BACLEARS on return instructions.", + "EventCode": "0xE6", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "BACLEARS.RETURN", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "BACLEARs asserted for return branch" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.", + "EventCode": "0xE6", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "BACLEARS.COND", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "BACLEARs asserted for conditional branch" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json new file mode 100644 index 000000000000..0b53a3b0dfb8 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json @@ -0,0 +1,218 @@ +[ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 4K pages. The page walks can end with or without a page fault.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Page walk completed due to a demand load to a 4K page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Page walk completed due to a demand load to a 2M or 4M page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 1GB pages. The page walks can end with or without a page fault.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1GB", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Page walk completed due to a demand load to a 1GB page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts once per cycle for each page walk occurring due to a load (demand data loads or SW prefetches). Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Page walks outstanding due to a demand load every cycle." + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Page walk completed due to a demand data store to a 2M or 4M page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1GB pages. The page walks can end with or without a page fault.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1GB", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Page walk completed due to a demand data store to a 1GB page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts once per cycle for each page walk occurring due to a demand data store. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Page walks outstanding due to a demand data store every cycle." + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts once per cycle for each page walk only while traversing the Extended Page Table (EPT), and does not count during the rest of the translation. The EPT is used for translating Guest-Physical Addresses to Physical Addresses for Virtual Machine Monitors (VMMs). Average cycles per walk can be calculated by dividing the count by number of walks.", + "EventCode": "0x4F", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "EPT.WALK_PENDING", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Page walks outstanding due to walking the EPT every cycle" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch. It counts when new translation are filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.", + "EventCode": "0x81", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "ITLB.MISS", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "ITLB misses" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x2", + "PEBScounters": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Page walk completed due to an instruction fetch in a 4K page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x4", + "PEBScounters": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Page walk completed due to an instruction fetch in a 2M or 4M page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 1GB pages. The page walks can end with or without a page fault.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x8", + "PEBScounters": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED_1GB", + "PDIR_COUNTER": "na", + "SampleAfterValue": "2000003", + "BriefDescription": "Page walk completed due to an instruction fetch in a 1GB page" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts once per cycle for each page walk occurring due to an instruction fetch. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x10", + "PEBScounters": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PDIR_COUNTER": "na", + "SampleAfterValue": "200003", + "BriefDescription": "Page walks outstanding due to an instruction fetch every cycle." + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts STLB flushes. The TLBs are flushed on instructions like INVLPG and MOV to CR3.", + "EventCode": "0xBD", + "Counter": "0,1,2,3", + "UMask": "0x20", + "PEBScounters": "0,1,2,3", + "EventName": "TLB_FLUSHES.STLB_ANY", + "PDIR_COUNTER": "na", + "SampleAfterValue": "20003", + "BriefDescription": "STLB flushes" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts load uops retired that caused a DTLB miss.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x11", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS", + "SampleAfterValue": "200003", + "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts store uops retired that caused a DTLB miss.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x12", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES", + "SampleAfterValue": "200003", + "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts uops retired that had a DTLB miss on load, store or either. Note that when two distinct memory operations to the same page miss the DTLB, only one of them will be recorded as a DTLB miss.", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x13", + "PEBScounters": "0,1,2,3", + "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 4ea068366c3e..fe1a2c47cabf 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -9,6 +9,7 @@ GenuineIntel-6-27,v4,bonnell,core GenuineIntel-6-36,v4,bonnell,core GenuineIntel-6-35,v4,bonnell,core GenuineIntel-6-5C,v8,goldmont,core +GenuineIntel-6-7A,v1,goldmontplus,core GenuineIntel-6-3C,v24,haswell,core GenuineIntel-6-45,v24,haswell,core GenuineIntel-6-46,v24,haswell,core -- cgit v1.2.3 From 2a704fc8db7b0080a67d9f4f4cb2a7bcaf79949d Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:32:55 +0200 Subject: perf report: Remove code to handle inline frames from browsers The follow-up commits will make inline frames first-class citizens in the callchain, thereby obsoleting all of this special code. Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-2-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 180 +++------------------------------------- tools/perf/ui/stdio/hist.c | 77 +---------------- tools/perf/util/evsel_fprintf.c | 32 ------- tools/perf/util/hist.c | 5 -- tools/perf/util/sort.h | 1 - 5 files changed, 13 insertions(+), 282 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 13dfb0a0bdeb..3a433f370e7f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -154,57 +154,9 @@ static void callchain_list__set_folding(struct callchain_list *cl, bool unfold) cl->unfolded = unfold ? cl->has_children : false; } -static struct inline_node *inline_node__create(struct map *map, u64 ip) -{ - struct dso *dso; - struct inline_node *node; - - if (map == NULL) - return NULL; - - dso = map->dso; - if (dso == NULL) - return NULL; - - node = dso__parse_addr_inlines(dso, - map__rip_2objdump(map, ip)); - - return node; -} - -static int inline__count_rows(struct inline_node *node) -{ - struct inline_list *ilist; - int i = 0; - - if (node == NULL) - return 0; - - list_for_each_entry(ilist, &node->val, list) { - if ((ilist->filename != NULL) || (ilist->funcname != NULL)) - i++; - } - - return i; -} - -static int callchain_list__inline_rows(struct callchain_list *chain) -{ - struct inline_node *node; - int rows; - - node = inline_node__create(chain->ms.map, chain->ip); - if (node == NULL) - return 0; - - rows = inline__count_rows(node); - inline_node__delete(node); - return rows; -} - static int callchain_node__count_rows_rb_tree(struct callchain_node *node) { - int n = 0, inline_rows; + int n = 0; struct rb_node *nd; for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) { @@ -215,12 +167,6 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node) list_for_each_entry(chain, &child->val, list) { ++n; - if (symbol_conf.inline_name) { - inline_rows = - callchain_list__inline_rows(chain); - n += inline_rows; - } - /* We need this because we may not have children */ folded_sign = callchain_list__folded(chain); if (folded_sign == '+') @@ -272,7 +218,7 @@ static int callchain_node__count_rows(struct callchain_node *node) { struct callchain_list *chain; bool unfolded = false; - int n = 0, inline_rows; + int n = 0; if (callchain_param.mode == CHAIN_FLAT) return callchain_node__count_flat_rows(node); @@ -281,10 +227,6 @@ static int callchain_node__count_rows(struct callchain_node *node) list_for_each_entry(chain, &node->val, list) { ++n; - if (symbol_conf.inline_name) { - inline_rows = callchain_list__inline_rows(chain); - n += inline_rows; - } unfolded = chain->unfolded; } @@ -432,19 +374,6 @@ static void hist_entry__init_have_children(struct hist_entry *he) he->init_have_children = true; } -static void hist_entry_init_inline_node(struct hist_entry *he) -{ - if (he->inline_node) - return; - - he->inline_node = inline_node__create(he->ms.map, he->ip); - - if (he->inline_node == NULL) - return; - - he->has_children = true; -} - static bool hist_browser__toggle_fold(struct hist_browser *browser) { struct hist_entry *he = browser->he_selection; @@ -476,12 +405,8 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) if (he->unfolded) { if (he->leaf) - if (he->inline_node) - he->nr_rows = inline__count_rows( - he->inline_node); - else - he->nr_rows = callchain__count_rows( - &he->sorted_chain); + he->nr_rows = callchain__count_rows( + &he->sorted_chain); else he->nr_rows = hierarchy_count_rows(browser, he, false); @@ -841,71 +766,6 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u #define LEVEL_OFFSET_STEP 3 -static int hist_browser__show_inline(struct hist_browser *browser, - struct inline_node *node, - unsigned short row, - int offset) -{ - struct inline_list *ilist; - char buf[1024]; - int color, width, first_row; - - first_row = row; - width = browser->b.width - (LEVEL_OFFSET_STEP + 2); - list_for_each_entry(ilist, &node->val, list) { - if ((ilist->filename != NULL) || (ilist->funcname != NULL)) { - color = HE_COLORSET_NORMAL; - if (ui_browser__is_current_entry(&browser->b, row)) - color = HE_COLORSET_SELECTED; - - if (callchain_param.key == CCKEY_ADDRESS || - callchain_param.key == CCKEY_SRCLINE) { - if (ilist->filename != NULL) - scnprintf(buf, sizeof(buf), - "%s:%d (inline)", - ilist->filename, - ilist->line_nr); - else - scnprintf(buf, sizeof(buf), "??"); - } else if (ilist->funcname != NULL) - scnprintf(buf, sizeof(buf), "%s (inline)", - ilist->funcname); - else if (ilist->filename != NULL) - scnprintf(buf, sizeof(buf), - "%s:%d (inline)", - ilist->filename, - ilist->line_nr); - else - scnprintf(buf, sizeof(buf), "??"); - - ui_browser__set_color(&browser->b, color); - hist_browser__gotorc(browser, row, 0); - ui_browser__write_nstring(&browser->b, " ", - LEVEL_OFFSET_STEP + offset); - ui_browser__write_nstring(&browser->b, buf, width); - row++; - } - } - - return row - first_row; -} - -static size_t show_inline_list(struct hist_browser *browser, struct map *map, - u64 ip, int row, int offset) -{ - struct inline_node *node; - int ret; - - node = inline_node__create(map, ip); - if (node == NULL) - return 0; - - ret = hist_browser__show_inline(browser, node, row, offset); - - inline_node__delete(node); - return ret; -} - static int hist_browser__show_callchain_list(struct hist_browser *browser, struct callchain_node *node, struct callchain_list *chain, @@ -917,7 +777,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, char bf[1024], *alloc_str; char buf[64], *alloc_str2; const char *str; - int inline_rows = 0, ret = 1; + int ret = 1; if (arg->row_offset != 0) { arg->row_offset--; @@ -954,12 +814,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, free(alloc_str); free(alloc_str2); - if (symbol_conf.inline_name) { - inline_rows = show_inline_list(browser, chain->ms.map, - chain->ip, row + 1, offset); - } - - return ret + inline_rows; + return ret; } static bool check_percent_display(struct rb_node *node, u64 parent_total) @@ -1383,12 +1238,6 @@ static int hist_browser__show_entry(struct hist_browser *browser, folded_sign = hist_entry__folded(entry); } - if (symbol_conf.inline_name && - (!entry->has_children)) { - hist_entry_init_inline_node(entry); - folded_sign = hist_entry__folded(entry); - } - if (row_offset == 0) { struct hpp_arg arg = { .b = &browser->b, @@ -1420,8 +1269,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (first) { - if (symbol_conf.use_callchain || - symbol_conf.inline_name) { + if (symbol_conf.use_callchain) { ui_browser__printf(&browser->b, "%c ", folded_sign); width -= 2; } @@ -1463,15 +1311,11 @@ static int hist_browser__show_entry(struct hist_browser *browser, .is_current_entry = current_entry, }; - if (entry->inline_node) - printed += hist_browser__show_inline(browser, - entry->inline_node, row, 0); - else - printed += hist_browser__show_callchain(browser, - entry, 1, row, - hist_browser__show_callchain_entry, - &arg, - hist_browser__check_output_full); + printed += hist_browser__show_callchain(browser, + entry, 1, row, + hist_browser__show_callchain_entry, + &arg, + hist_browser__check_output_full); } return printed; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 8bdb7a500181..b6b9baac0e3b 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -21,64 +21,6 @@ static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) return ret; } -static size_t inline__fprintf(struct map *map, u64 ip, int left_margin, - int depth, int depth_mask, FILE *fp) -{ - struct dso *dso; - struct inline_node *node; - struct inline_list *ilist; - int ret = 0, i; - - if (map == NULL) - return 0; - - dso = map->dso; - if (dso == NULL) - return 0; - - node = dso__parse_addr_inlines(dso, - map__rip_2objdump(map, ip)); - if (node == NULL) - return 0; - - list_for_each_entry(ilist, &node->val, list) { - if ((ilist->filename != NULL) || (ilist->funcname != NULL)) { - ret += callchain__fprintf_left_margin(fp, left_margin); - - for (i = 0; i < depth; i++) { - if (depth_mask & (1 << i)) - ret += fprintf(fp, "|"); - else - ret += fprintf(fp, " "); - ret += fprintf(fp, " "); - } - - if (callchain_param.key == CCKEY_ADDRESS || - callchain_param.key == CCKEY_SRCLINE) { - if (ilist->filename != NULL) - ret += fprintf(fp, "%s:%d (inline)", - ilist->filename, - ilist->line_nr); - else - ret += fprintf(fp, "??"); - } else if (ilist->funcname != NULL) - ret += fprintf(fp, "%s (inline)", - ilist->funcname); - else if (ilist->filename != NULL) - ret += fprintf(fp, "%s:%d (inline)", - ilist->filename, - ilist->line_nr); - else - ret += fprintf(fp, "??"); - - ret += fprintf(fp, "\n"); - } - } - - inline_node__delete(node); - return ret; -} - static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, int left_margin) { @@ -137,9 +79,6 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, fputc('\n', fp); free(alloc_str); - if (symbol_conf.inline_name) - ret += inline__fprintf(chain->ms.map, chain->ip, - left_margin, depth, depth_mask, fp); return ret; } @@ -314,13 +253,6 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, if (++entries_printed == callchain_param.print_limit) break; - - if (symbol_conf.inline_name) - ret += inline__fprintf(chain->ms.map, - chain->ip, - left_margin, - 0, 0, - fp); } root = &cnode->rb_root; } @@ -600,7 +532,6 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, { int ret; int callchain_ret = 0; - int inline_ret = 0; struct perf_hpp hpp = { .buf = bf, .size = size, @@ -622,13 +553,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, callchain_ret = hist_entry_callchain__fprintf(he, total_period, 0, fp); - if (callchain_ret == 0 && symbol_conf.inline_name) { - inline_ret = inline__fprintf(he->ms.map, he->ip, 0, 0, 0, fp); - ret += inline_ret; - if (inline_ret > 0) - ret += fprintf(fp, "\n"); - } else - ret += callchain_ret; + ret += callchain_ret; return ret; } diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 583f3a602506..f2c6c5ee11e8 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -169,38 +169,6 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!print_oneline) printed += fprintf(fp, "\n"); - if (symbol_conf.inline_name && node->map) { - struct inline_node *inode; - - addr = map__rip_2objdump(node->map, node->ip), - inode = dso__parse_addr_inlines(node->map->dso, addr); - - if (inode) { - struct inline_list *ilist; - - list_for_each_entry(ilist, &inode->val, list) { - if (print_arrow) - printed += fprintf(fp, " <-"); - - /* IP is same, just skip it */ - if (print_ip) - printed += fprintf(fp, "%c%16s", - s, ""); - if (print_sym) - printed += fprintf(fp, " %s", - ilist->funcname); - if (print_srcline) - printed += fprintf(fp, "\n %s:%d", - ilist->filename, - ilist->line_nr); - if (!print_oneline) - printed += fprintf(fp, "\n"); - } - - inline_node__delete(inode); - } - } - if (symbol_conf.bt_stop_list && node->sym && strlist__has_entry(symbol_conf.bt_stop_list, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e60d8d8ea4c2..b0fa9c217e1c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1141,11 +1141,6 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->mem_info); } - if (he->inline_node) { - inline_node__delete(he->inline_node); - he->inline_node = NULL; - } - zfree(&he->stat_acc); free_srcline(he->srcline); if (he->srcfile && he->srcfile[0]) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f36dc4980a6c..507d096aee4e 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -129,7 +129,6 @@ struct hist_entry { }; char *srcline; char *srcfile; - struct inline_node *inline_node; struct symbol *parent; struct branch_info *branch_info; struct hists *hists; -- cgit v1.2.3 From 40a342cda2cd9bc8f7bf81c5ce1a141584760757 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:32:56 +0200 Subject: perf callchain: Store srcline in callchain_cursor_node This is mostly a preparation to enable the creation of full callchain nodes for inline frames. Such frames will reference the IP of the non-inlined frame, but hold the symbol and srcline for an inlined location. As such, we won't be able to query the srcline on-demand based on the IP alone. Instead, we will leverage the functionality provided by this patch here, and store the srcline for the inlined nodes in the new srcline member of callchain_cursor_node. Note that this patch on its own leaks the srcline, as there is no free_callchain_cursor_node or similar. A future patch will add caching of the srcline and handle deletion properly. Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-3-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 31 +++++++++---------------------- tools/perf/util/callchain.h | 6 ++++-- tools/perf/util/machine.c | 18 ++++++++++++++++-- 3 files changed, 29 insertions(+), 26 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index a971caf3759d..e7ee794d1e5b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -566,6 +566,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; call->ms.map = map__get(cursor_node->map); + call->srcline = cursor_node->srcline; if (cursor_node->branch) { call->branch_count = 1; @@ -647,20 +648,11 @@ enum match_result { static enum match_result match_chain_srcline(struct callchain_cursor_node *node, struct callchain_list *cnode) { - char *left = NULL; - char *right = NULL; + const char *left = cnode->srcline; + const char *right = node->srcline; enum match_result ret = MATCH_EQ; int cmp; - if (cnode->ms.map) - left = get_srcline(cnode->ms.map->dso, - map__rip_2objdump(cnode->ms.map, cnode->ip), - cnode->ms.sym, true, false); - if (node->map) - right = get_srcline(node->map->dso, - map__rip_2objdump(node->map, node->ip), - node->sym, true, false); - if (left && right) cmp = strcmp(left, right); else if (!left && right) @@ -675,8 +667,6 @@ static enum match_result match_chain_srcline(struct callchain_cursor_node *node, if (cmp != 0) ret = cmp < 0 ? MATCH_LT : MATCH_GT; - free_srcline(left); - free_srcline(right); return ret; } @@ -969,7 +959,7 @@ merge_chain_branch(struct callchain_cursor *cursor, list_for_each_entry_safe(list, next_list, &src->val, list) { callchain_cursor_append(cursor, list->ip, list->ms.map, list->ms.sym, - false, NULL, 0, 0, 0); + false, NULL, 0, 0, 0, list->srcline); list_del(&list->list); map__zput(list->ms.map); free(list); @@ -1009,7 +999,8 @@ int callchain_merge(struct callchain_cursor *cursor, int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, u64 iter_cycles, u64 branch_from) + int nr_loop_iter, u64 iter_cycles, u64 branch_from, + const char *srcline) { struct callchain_cursor_node *node = *cursor->last; @@ -1028,6 +1019,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->branch = branch; node->nr_loop_iter = nr_loop_iter; node->iter_cycles = iter_cycles; + node->srcline = srcline; if (flags) memcpy(&node->branch_flags, flags, @@ -1115,12 +1107,7 @@ char *callchain_list__sym_name(struct callchain_list *cl, int printed; if (cl->ms.sym) { - if (show_srcline && cl->ms.map && !cl->srcline) - cl->srcline = get_srcline(cl->ms.map->dso, - map__rip_2objdump(cl->ms.map, - cl->ip), - cl->ms.sym, false, show_addr); - if (cl->srcline) + if (show_srcline && cl->srcline) printed = scnprintf(bf, bfsize, "%s %s", cl->ms.sym->name, cl->srcline); else @@ -1532,7 +1519,7 @@ int callchain_cursor__copy(struct callchain_cursor *dst, node->branch, &node->branch_flags, node->nr_loop_iter, node->iter_cycles, - node->branch_from); + node->branch_from, node->srcline); if (rc) break; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 1ed6fc61d0a5..8f67b681cde9 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -121,7 +121,7 @@ struct callchain_list { u64 iter_count; u64 iter_cycles; struct branch_type_stat brtype_stat; - char *srcline; + const char *srcline; struct list_head list; }; @@ -135,6 +135,7 @@ struct callchain_cursor_node { u64 ip; struct map *map; struct symbol *sym; + const char *srcline; bool branch; struct branch_flags branch_flags; u64 branch_from; @@ -201,7 +202,8 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, u64 iter_cycles, u64 branch_from); + int nr_loop_iter, u64 iter_cycles, u64 branch_from, + const char *srcline); /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7c3aa479201a..a37e1c056415 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1709,6 +1709,15 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, return mi; } +static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) +{ + if (!map || callchain_param.key == CCKEY_FUNCTION) + return NULL; + + return get_srcline(map->dso, map__rip_2objdump(map, ip), + sym, false, callchain_param.key == CCKEY_ADDRESS); +} + struct iterations { int nr_loop_iter; u64 cycles; @@ -1728,6 +1737,7 @@ static int add_callchain_ip(struct thread *thread, struct addr_location al; int nr_loop_iter = 0; u64 iter_cycles = 0; + const char *srcline = NULL; al.filtered = 0; al.sym = NULL; @@ -1783,9 +1793,10 @@ static int add_callchain_ip(struct thread *thread, iter_cycles = iter->cycles; } + srcline = callchain_srcline(al.map, al.sym, al.addr); return callchain_cursor_append(cursor, al.addr, al.map, al.sym, branch, flags, nr_loop_iter, - iter_cycles, branch_from); + iter_cycles, branch_from, srcline); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -2101,12 +2112,15 @@ check_calls: static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; + const char *srcline = NULL; if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; + + srcline = callchain_srcline(entry->map, entry->sym, entry->ip); return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym, - false, NULL, 0, 0, 0); + false, NULL, 0, 0, 0, srcline); } static int thread__resolve_callchain_unwind(struct thread *thread, -- cgit v1.2.3 From fea0cf842c7aa08950063264ab1cfbce4ba38c1b Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:32:57 +0200 Subject: perf callchain: Refactor inline_list to operate on symbols This is a requirement to create real callchain entries for inlined frames. Since the list of inlines usually contains the target symbol too, i.e. the location where the frames get inlined to, we alias that symbol and reuse it as-is is. This ensures that other dependent functionality keeps working, most notably annotation of the target frames. For all other entries in the inline_list, a fake symbol is created. These are marked by new 'inlined' member which is set to true. Only those symbols are managed by the inline_list and get freed when the inline_list is deleted from within inline_node__delete. Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-4-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 93 ++++++++++++++++++++++++++++++++--------------- tools/perf/util/srcline.h | 7 +++- tools/perf/util/symbol.h | 1 + 3 files changed, 69 insertions(+), 32 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index ed8e8d2de942..c0af61b355ed 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -33,29 +33,19 @@ static const char *dso__name(struct dso *dso) return dso_name; } -static int inline_list__append(char *filename, char *funcname, int line_nr, - struct inline_node *node, struct dso *dso) +static int inline_list__append(struct symbol *symbol, char *filename, + int line_nr, struct inline_node *node) { struct inline_list *ilist; - char *demangled; ilist = zalloc(sizeof(*ilist)); if (ilist == NULL) return -1; + ilist->symbol = symbol; ilist->filename = filename; ilist->line_nr = line_nr; - if (dso != NULL) { - demangled = dso__demangle_sym(dso, 0, funcname); - if (demangled == NULL) { - ilist->funcname = funcname; - } else { - ilist->funcname = demangled; - free(funcname); - } - } - if (callchain_param.order == ORDER_CALLEE) list_add_tail(&ilist->list, &node->val); else @@ -206,19 +196,56 @@ static void addr2line_cleanup(struct a2l_data *a2l) #define MAX_INLINE_NEST 1024 +static struct symbol *new_inline_sym(struct dso *dso, + struct symbol *base_sym, + const char *funcname) +{ + struct symbol *inline_sym; + char *demangled = NULL; + + if (dso) { + demangled = dso__demangle_sym(dso, 0, funcname); + if (demangled) + funcname = demangled; + } + + if (base_sym && strcmp(funcname, base_sym->name) == 0) { + /* reuse the real, existing symbol */ + inline_sym = base_sym; + /* ensure that we don't alias an inlined symbol, which could + * lead to double frees in inline_node__delete + */ + assert(!base_sym->inlined); + } else { + /* create a fake symbol for the inline frame */ + inline_sym = symbol__new(base_sym ? base_sym->start : 0, + base_sym ? base_sym->end : 0, + base_sym ? base_sym->binding : 0, + funcname); + if (inline_sym) + inline_sym->inlined = 1; + } + + free(demangled); + + return inline_sym; +} + static int inline_list__append_dso_a2l(struct dso *dso, - struct inline_node *node) + struct inline_node *node, + struct symbol *sym) { struct a2l_data *a2l = dso->a2l; - char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL; - char *filename = a2l->filename ? strdup(a2l->filename) : NULL; + struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname); - return inline_list__append(filename, funcname, a2l->line, node, dso); + return inline_list__append(inline_sym, strdup(a2l->filename), + a2l->line, node); } static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line, struct dso *dso, - bool unwind_inlines, struct inline_node *node) + bool unwind_inlines, struct inline_node *node, + struct symbol *sym) { int ret = 0; struct a2l_data *a2l = dso->a2l; @@ -244,7 +271,7 @@ static int addr2line(const char *dso_name, u64 addr, if (unwind_inlines) { int cnt = 0; - if (node && inline_list__append_dso_a2l(dso, node)) + if (node && inline_list__append_dso_a2l(dso, node, sym)) return 0; while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, @@ -255,7 +282,7 @@ static int addr2line(const char *dso_name, u64 addr, a2l->filename = NULL; if (node != NULL) { - if (inline_list__append_dso_a2l(dso, node)) + if (inline_list__append_dso_a2l(dso, node, sym)) return 0; // found at least one inline frame ret = 1; @@ -287,7 +314,7 @@ void dso__free_a2l(struct dso *dso) } static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso) + struct dso *dso, struct symbol *sym) { struct inline_node *node; @@ -300,7 +327,7 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node)) + if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node, sym)) goto out_free_inline_node; if (list_empty(&node->val)) @@ -340,7 +367,8 @@ static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, struct dso *dso __maybe_unused, bool unwind_inlines __maybe_unused, - struct inline_node *node __maybe_unused) + struct inline_node *node __maybe_unused, + struct symbol *sym __maybe_unused) { FILE *fp; char cmd[PATH_MAX]; @@ -380,7 +408,8 @@ void dso__free_a2l(struct dso *dso __maybe_unused) } static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso __maybe_unused) + struct dso *dso __maybe_unused, + struct symbol *sym) { FILE *fp; char cmd[PATH_MAX]; @@ -408,13 +437,13 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, node->addr = addr; while (getline(&filename, &len, fp) != -1) { + if (filename_split(filename, &line_nr) != 1) { free(filename); goto out; } - if (inline_list__append(filename, NULL, line_nr, node, - NULL) != 0) + if (inline_list__append(sym, filename, line_nr, node) != 0) goto out; filename = NULL; @@ -454,7 +483,8 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (dso_name == NULL) goto out; - if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines, NULL)) + if (!addr2line(dso_name, addr, &file, &line, dso, + unwind_inlines, NULL, sym)) goto out; if (asprintf(&srcline, "%s:%u", @@ -500,7 +530,8 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, return __get_srcline(dso, addr, sym, show_sym, show_addr, false); } -struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, + struct symbol *sym) { const char *dso_name; @@ -508,7 +539,7 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) if (dso_name == NULL) return NULL; - return addr2inlines(dso_name, addr, dso); + return addr2inlines(dso_name, addr, dso, sym); } void inline_node__delete(struct inline_node *node) @@ -518,7 +549,9 @@ void inline_node__delete(struct inline_node *node) list_for_each_entry_safe(ilist, tmp, &node->val, list) { list_del_init(&ilist->list); zfree(&ilist->filename); - zfree(&ilist->funcname); + /* only the inlined symbols are owned by the list */ + if (ilist->symbol && ilist->symbol->inlined) + symbol__delete(ilist->symbol); free(ilist); } diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 7b52ba88676e..730bfd6926d7 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -17,8 +17,8 @@ void free_srcline(char *srcline); #define SRCLINE_UNKNOWN ((char *) "??:0") struct inline_list { + struct symbol *symbol; char *filename; - char *funcname; unsigned int line_nr; struct list_head list; }; @@ -28,7 +28,10 @@ struct inline_node { struct list_head val; }; -struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr); +/* parse inlined frames for the given address */ +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, + struct symbol *sym); +/* free resources associated to the inline node list */ void inline_node__delete(struct inline_node *node); #endif /* PERF_SRCLINE_H */ diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index aad99e7e179b..d880a059babb 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -59,6 +59,7 @@ struct symbol { u8 binding; u8 idle:1; u8 ignore:1; + u8 inlined:1; u8 arch_sym; char name[0]; }; -- cgit v1.2.3 From 2be8832f3c51cf9e36a3e80ff57f4137505c2ba4 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:32:58 +0200 Subject: perf callchain: Refactor inline_list to store srcline string directly This is a preparation for the creation of real callchain entries for inlined frames. The rest of the perf code uses the srcline string. As such, using that also for the srcline API allows us to simplify some of the upcoming code. Most notably, it will allow us to cache the srcline for a given inline node and reuse it for different callchain entries. Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-5-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 54 +++++++++++++++++++++++++++++++++++------------ tools/perf/util/srcline.h | 3 +-- 2 files changed, 41 insertions(+), 16 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index c0af61b355ed..f202fc7827df 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -33,8 +33,8 @@ static const char *dso__name(struct dso *dso) return dso_name; } -static int inline_list__append(struct symbol *symbol, char *filename, - int line_nr, struct inline_node *node) +static int inline_list__append(struct symbol *symbol, char *srcline, + struct inline_node *node) { struct inline_list *ilist; @@ -43,8 +43,7 @@ static int inline_list__append(struct symbol *symbol, char *filename, return -1; ilist->symbol = symbol; - ilist->filename = filename; - ilist->line_nr = line_nr; + ilist->srcline = srcline; if (callchain_param.order == ORDER_CALLEE) list_add_tail(&ilist->list, &node->val); @@ -54,6 +53,30 @@ static int inline_list__append(struct symbol *symbol, char *filename, return 0; } +/* basename version that takes a const input string */ +static const char *gnu_basename(const char *path) +{ + const char *base = strrchr(path, '/'); + + return base ? base + 1 : path; +} + +static char *srcline_from_fileline(const char *file, unsigned int line) +{ + char *srcline; + + if (!file) + return NULL; + + if (!srcline_full_filename) + file = gnu_basename(file); + + if (asprintf(&srcline, "%s:%u", file, line) < 0) + return NULL; + + return srcline; +} + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -237,9 +260,12 @@ static int inline_list__append_dso_a2l(struct dso *dso, { struct a2l_data *a2l = dso->a2l; struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname); + char *srcline = NULL; - return inline_list__append(inline_sym, strdup(a2l->filename), - a2l->line, node); + if (a2l->filename) + srcline = srcline_from_fileline(a2l->filename, a2l->line); + + return inline_list__append(inline_sym, srcline, node); } static int addr2line(const char *dso_name, u64 addr, @@ -437,13 +463,15 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, node->addr = addr; while (getline(&filename, &len, fp) != -1) { + char *srcline; if (filename_split(filename, &line_nr) != 1) { free(filename); goto out; } - if (inline_list__append(sym, filename, line_nr, node) != 0) + srcline = srcline_from_fileline(filename, line_nr); + if (inline_list__append(sym, srcline, node) != 0) goto out; filename = NULL; @@ -487,16 +515,14 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, unwind_inlines, NULL, sym)) goto out; - if (asprintf(&srcline, "%s:%u", - srcline_full_filename ? file : basename(file), - line) < 0) { - free(file); + srcline = srcline_from_fileline(file, line); + free(file); + + if (!srcline) goto out; - } dso->a2l_fails = 0; - free(file); return srcline; out: @@ -548,7 +574,7 @@ void inline_node__delete(struct inline_node *node) list_for_each_entry_safe(ilist, tmp, &node->val, list) { list_del_init(&ilist->list); - zfree(&ilist->filename); + free_srcline(ilist->srcline); /* only the inlined symbols are owned by the list */ if (ilist->symbol && ilist->symbol->inlined) symbol__delete(ilist->symbol); diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 730bfd6926d7..0201ed2c0b9c 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -18,8 +18,7 @@ void free_srcline(char *srcline); struct inline_list { struct symbol *symbol; - char *filename; - unsigned int line_nr; + char *srcline; struct list_head list; }; -- cgit v1.2.3 From 11ea2515f32e783b9a7984c148e742c377383915 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:32:59 +0200 Subject: perf callchain: Create real callchain entries for inlined frames The inline_node structs are maintained by the new dso->inlines tree. This in turn keeps ownership of the fake symbols and srcline string representing an inline frame. This tree is sorted by address to allow quick lookups. All other entries of the symbol beside the function name are unused for inline frames. The advantage of this approach is that all existing users of the callchain API can now transparently display inlined frames without having to patch their code. Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-6-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 5 +++++ tools/perf/util/dso.h | 1 + tools/perf/util/machine.c | 37 ++++++++++++++++++++++++++++++++++ tools/perf/util/srcline.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/srcline.h | 9 +++++++++ 5 files changed, 103 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 339e52971380..75c8250b3b8a 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -10,6 +10,7 @@ #include "compress.h" #include "path.h" #include "symbol.h" +#include "srcline.h" #include "dso.h" #include "machine.h" #include "auxtrace.h" @@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name) for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; dso->data.cache = RB_ROOT; + dso->inlined_nodes = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1232,6 +1234,9 @@ void dso__delete(struct dso *dso) if (!RB_EMPTY_NODE(&dso->rb_node)) pr_err("DSO %s is still in rbtree when being deleted!\n", dso->long_name); + + /* free inlines first, as they reference symbols */ + inlines__tree_delete(&dso->inlined_nodes); for (i = 0; i < MAP__NR_TYPES; ++i) symbols__delete(&dso->symbols[i]); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index a2bbb21f301c..122eca0d242d 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -141,6 +141,7 @@ struct dso { struct rb_root *root; /* root of rbtree that rb_node is in */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct rb_root inlined_nodes; struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index a37e1c056415..3d049cb313ac 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2109,6 +2109,40 @@ check_calls: return 0; } +static int append_inlines(struct callchain_cursor *cursor, + struct map *map, struct symbol *sym, u64 ip) +{ + struct inline_node *inline_node; + struct inline_list *ilist; + u64 addr; + + if (!symbol_conf.inline_name || !map || !sym) + return 1; + + addr = map__rip_2objdump(map, ip); + + inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr); + if (!inline_node) { + inline_node = dso__parse_addr_inlines(map->dso, addr, sym); + if (!inline_node) + return 1; + + inlines__tree_insert(&map->dso->inlined_nodes, inline_node); + } + + list_for_each_entry(ilist, &inline_node->val, list) { + int ret = callchain_cursor_append(cursor, ip, map, + ilist->symbol, false, + NULL, 0, 0, 0, + ilist->srcline); + + if (ret != 0) + return ret; + } + + return 0; +} + static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; @@ -2117,6 +2151,9 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; + if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0) + return 0; + srcline = callchain_srcline(entry->map, entry->sym, entry->ip); return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym, diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index f202fc7827df..8bea6621d657 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -583,3 +583,54 @@ void inline_node__delete(struct inline_node *node) free(node); } + +void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) +{ + struct rb_node **p = &tree->rb_node; + struct rb_node *parent = NULL; + const u64 addr = inlines->addr; + struct inline_node *i; + + while (*p != NULL) { + parent = *p; + i = rb_entry(parent, struct inline_node, rb_node); + if (addr < i->addr) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&inlines->rb_node, parent, p); + rb_insert_color(&inlines->rb_node, tree); +} + +struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) +{ + struct rb_node *n = tree->rb_node; + + while (n) { + struct inline_node *i = rb_entry(n, struct inline_node, + rb_node); + + if (addr < i->addr) + n = n->rb_left; + else if (addr > i->addr) + n = n->rb_right; + else + return i; + } + + return NULL; +} + +void inlines__tree_delete(struct rb_root *tree) +{ + struct inline_node *pos; + struct rb_node *next = rb_first(tree); + + while (next) { + pos = rb_entry(next, struct inline_node, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, tree); + inline_node__delete(pos); + } +} diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 0201ed2c0b9c..ebe38cd22294 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -2,6 +2,7 @@ #define PERF_SRCLINE_H #include +#include #include struct dso; @@ -25,6 +26,7 @@ struct inline_list { struct inline_node { u64 addr; struct list_head val; + struct rb_node rb_node; }; /* parse inlined frames for the given address */ @@ -33,4 +35,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, /* free resources associated to the inline node list */ void inline_node__delete(struct inline_node *node); +/* insert the inline node list into the DSO, which will take ownership */ +void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines); +/* find previously inserted inline node list */ +struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr); +/* delete all nodes within the tree of inline_node s */ +void inlines__tree_delete(struct rb_root *tree); + #endif /* PERF_SRCLINE_H */ -- cgit v1.2.3 From cbe50f61727f538f05e63186c2e0022182a3a28f Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:33:00 +0200 Subject: perf report: Fall-back to function name comparison for -g srcline When a callchain entry has no srcline available, we ended up comparing the instruction pointer. I consider this to be not too useful. Rather, I think we should group the entries by function name, which this patch adds. For people who want to split the data on the IP boundary, using `-g address` is the correct choice. Before: ~~~~~ 100.00% 38.86% [.] main | |--61.14%--main inlining.cpp:14 | std::norm complex:664 | std::_Norm_helper::_S_do_it complex:654 | std::abs complex:597 | std::__complex_abs complex:589 | | | |--56.03%--hypot | | | | | |--8.45%--__hypot_finite | | | | | |--7.62%--__hypot_finite | | | | | |--2.29%--__hypot_finite | | | | | |--2.24%--__hypot_finite | | | | | |--2.06%--__hypot_finite | | | | | |--1.81%--__hypot_finite ... ~~~~~ After: ~~~~~ 100.00% 38.86% [.] main | |--61.14%--main inlining.cpp:14 | std::norm complex:664 | std::_Norm_helper::_S_do_it complex:654 | std::abs complex:597 | std::__complex_abs complex:589 | | | |--60.29%--hypot | | | | | --56.03%--__hypot_finite | | | --0.85%--cabs ~~~~~ Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-7-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index e7ee794d1e5b..0f2ba493a7a3 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -645,11 +645,9 @@ enum match_result { MATCH_GT, }; -static enum match_result match_chain_srcline(struct callchain_cursor_node *node, - struct callchain_list *cnode) +static enum match_result match_chain_strings(const char *left, + const char *right) { - const char *left = cnode->srcline; - const char *right = node->srcline; enum match_result ret = MATCH_EQ; int cmp; @@ -659,10 +657,8 @@ static enum match_result match_chain_srcline(struct callchain_cursor_node *node, cmp = 1; else if (left && !right) cmp = -1; - else if (cnode->ip == node->ip) - cmp = 0; else - cmp = (cnode->ip < node->ip) ? -1 : 1; + return MATCH_ERROR; if (cmp != 0) ret = cmp < 0 ? MATCH_LT : MATCH_GT; @@ -679,10 +675,18 @@ static enum match_result match_chain(struct callchain_cursor_node *node, struct dso *right_dso = NULL; if (callchain_param.key == CCKEY_SRCLINE) { - enum match_result match = match_chain_srcline(node, cnode); + enum match_result match = match_chain_strings(cnode->srcline, + node->srcline); + + /* if no srcline is available, fallback to symbol name */ + if (match == MATCH_ERROR && cnode->ms.sym && node->sym) + match = match_chain_strings(cnode->ms.sym->name, + node->sym->name); if (match != MATCH_ERROR) return match; + + /* otherwise fall-back to IP-based comparison below */ } if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { -- cgit v1.2.3 From 8932f8071cae8a12dfd5f492247777ee176b0da4 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:33:01 +0200 Subject: perf callchain: Mark inlined frames in output by " (inlined)" suffix The original patch that introduced inline frame output in the various browsers used this suffix already. The new centralized approach that uses fake symbols for inlined frames was missing this approach so far. Instead of changing the symbol name itself, we only print the suffix where needed. This allows us to efficiently lookup the symbol for a given name without first having to append the suffix before the lookup. Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-8-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 10 +++++++--- tools/perf/util/sort.c | 3 +++ 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 0f2ba493a7a3..77031efdca5c 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1111,11 +1111,15 @@ char *callchain_list__sym_name(struct callchain_list *cl, int printed; if (cl->ms.sym) { + const char *inlined = cl->ms.sym->inlined ? " (inlined)" : ""; + if (show_srcline && cl->srcline) - printed = scnprintf(bf, bfsize, "%s %s", - cl->ms.sym->name, cl->srcline); + printed = scnprintf(bf, bfsize, "%s %s%s", + cl->ms.sym->name, cl->srcline, + inlined); else - printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + printed = scnprintf(bf, bfsize, "%s%s", + cl->ms.sym->name, inlined); } else printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index eb3ab902a1c0..acb9210fd18a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -283,6 +283,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "%.*s", width - ret, sym->name); + if (sym->inlined) + ret += repsep_snprintf(bf + ret, size - ret, + " (inlined)"); } } else { size_t len = BITS_PER_LONG / 4; -- cgit v1.2.3 From 9628b56dc1240ce0faa3bd9b7a3390fa4451c59f Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:33:02 +0200 Subject: perf script: Mark inlined frames and do not print DSO for them Instead of showing the (repeated) DSO name of the non-inlined frame, we now show the "(inlined)" suffix instead. Before: 214f7 __hypot_finite (/usr/lib/libm-2.25.so) ace3 hypot (/usr/lib/libm-2.25.so) a4a std::__complex_abs (/home/milian/projects/src/perf-tests/inlining) a4a std::abs (/home/milian/projects/src/perf-tests/inlining) a4a std::_Norm_helper::_S_do_it (/home/milian/projects/src/perf-tests/inlining) a4a std::norm (/home/milian/projects/src/perf-tests/inlining) a4a main (/home/milian/projects/src/perf-tests/inlining) 20510 __libc_start_main (/usr/lib/libc-2.25.so) bd9 _start (/home/milian/projects/src/perf-tests/inlining) After: 214f7 __hypot_finite (/usr/lib/libm-2.25.so) ace3 hypot (/usr/lib/libm-2.25.so) a4a std::__complex_abs (inlined) a4a std::abs (inlined) a4a std::_Norm_helper::_S_do_it (inlined) a4a std::norm (inlined) a4a main (/home/milian/projects/src/perf-tests/inlining) 20510 __libc_start_main (/usr/lib/libc-2.25.so) bd9 _start (/home/milian/projects/src/perf-tests/inlining) Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-9-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel_fprintf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index f2c6c5ee11e8..5b9e89257aa7 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -157,7 +157,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, } } - if (print_dso) { + if (print_dso && (!node->sym || !node->sym->inlined)) { printed += fprintf(fp, " ("); printed += map__fprintf_dsoname(node->map, fp); printed += fprintf(fp, ")"); @@ -166,6 +166,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_srcline) printed += map__fprintf_srcline(node->map, addr, "\n ", fp); + if (node->sym && node->sym->inlined) + printed += fprintf(fp, " (inlined)"); + if (!print_oneline) printed += fprintf(fp, "\n"); -- cgit v1.2.3 From 9856240ad3269f2fdab0b2fa4400ef8aab792061 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:33:03 +0200 Subject: perf callchain: Compare symbol name for inlined frames when matching The fake symbols we create for inlined frames will represent different functions but can use the symbol start address. This leads to issues when different inline branches all lead to the same function. Before: ~~~~~ $ perf report -s sym -i perf.inlining.data --inline --stdio -g function ... --38.86%--_start __libc_start_main main | --37.57%--std::norm (inlined) std::_Norm_helper::_S_do_it (inlined) | --36.36%--std::abs (inlined) std::__complex_abs (inlined) | --12.24%--std::linear_congruential_engine::operator() (inlined) std::__detail::__mod (inlined) std::__detail::_Mod::__calc (inlined) ~~~~~ Note that this backtrace representation is completely bogus. Complex abs does not call the linear congruential engine! It is just a side-effect of a longer inlined stack being appended to a shorter, different inlined stack, both of which originate in the same function (main). This patch fixes the issue: ~~~~~ $ perf report -s sym -i perf.inlining.data --inline --stdio -g function ... --38.86%--_start __libc_start_main main | |--35.59%--std::uniform_real_distribution::operator() > (inlined) | std::uniform_real_distribution::operator() > (inlined) | | | --34.37%--std::__detail::_Adaptor, double>::operator() (inlined) | std::generate_canonical > (inlined) | | | --12.24%--std::linear_congruential_engine::operator() (inlined) | std::__detail::__mod (inlined) | std::__detail::_Mod::__calc (inlined) | --1.99%--std::norm (inlined) std::_Norm_helper::_S_do_it (inlined) std::abs (inlined) std::__complex_abs (inlined) ~~~~~ Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-10-milian.wolff@kdab.com Cc: Arnaldo Carvalho de Melo [ Fix up conflict with c1fbc0cf81f1 ("perf callchain: Compare dsos (as well) for CCKEY_FUNCTION"), remove unneeded hunk ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 77031efdca5c..35a920f09503 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -690,6 +690,14 @@ static enum match_result match_chain(struct callchain_cursor_node *node, } if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { + /* + * Compare inlined frames based on their symbol name because + * different inlined frames will have the same symbol start + */ + if (cnode->ms.sym->inlined || node->sym->inlined) + return match_chain_strings(cnode->ms.sym->name, + node->sym->name); + left = cnode->ms.sym->start; right = sym->start; left_dso = cnode->ms.map->dso; -- cgit v1.2.3 From aa441895f7b4ff5394d4964a8e6749f3866e44d0 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 9 Oct 2017 22:33:04 +0200 Subject: perf report: Compare symbol name for inlined frames when sorting Similar to the callstack frame matching, we also have to compare the symbol name when sorting hist entries. The reason is twofold: On one hand, multiple inlined functions will use the same symbol start/end values of the parent, non-inlined symbol. As such, all of these symbols often end up missing from top-level report, as they get merged with the non-inlined frame. On the other hand, multiple different functions may end up inlining the same function, and we need to aggregate these values properly. Before: ~~~~~ perf report --stdio --inline -g none # Children Self Command Shared Object Symbol # ........ ........ ............ ............. ................................... # 100.00% 39.69% cpp-inlining cpp-inlining [.] main 100.00% 0.00% cpp-inlining cpp-inlining [.] _start 100.00% 0.00% cpp-inlining libc-2.25.so [.] __libc_start_main 97.03% 0.00% cpp-inlining cpp-inlining [.] std::norm (inlined) 59.53% 4.26% cpp-inlining libm-2.25.so [.] hypot 55.21% 55.08% cpp-inlining libm-2.25.so [.] __hypot_finite 0.52% 0.52% cpp-inlining libm-2.25.so [.] cabs ~~~~~ After: ~~~~~ perf report --stdio --inline -g none # Children Self Command Shared Object Symbol # ........ ........ ............ ............. ................................................................................................................................... # 100.00% 39.69% cpp-inlining cpp-inlining [.] main 100.00% 0.00% cpp-inlining cpp-inlining [.] _start 100.00% 0.00% cpp-inlining libc-2.25.so [.] __libc_start_main 62.57% 0.00% cpp-inlining cpp-inlining [.] std::_Norm_helper::_S_do_it (inlined) 62.57% 0.00% cpp-inlining cpp-inlining [.] std::__complex_abs (inlined) 62.57% 0.00% cpp-inlining cpp-inlining [.] std::abs (inlined) 62.57% 0.00% cpp-inlining cpp-inlining [.] std::norm (inlined) 59.53% 4.26% cpp-inlining libm-2.25.so [.] hypot 55.21% 55.08% cpp-inlining libm-2.25.so [.] __hypot_finite 34.46% 0.00% cpp-inlining cpp-inlining [.] std::uniform_real_distribution::operator() > (inlined) 32.39% 0.00% cpp-inlining cpp-inlining [.] std::__detail::_Adaptor, double>::operator() (inlined) 32.39% 0.00% cpp-inlining cpp-inlining [.] std::generate_canonical > (inlined) 12.29% 0.00% cpp-inlining cpp-inlining [.] std::__detail::_Mod::__calc (inlined) 12.29% 0.00% cpp-inlining cpp-inlining [.] std::__detail::__mod (inlined) 12.29% 0.00% cpp-inlining cpp-inlining [.] std::linear_congruential_engine::operator() (inlined) 0.52% 0.52% cpp-inlining libm-2.25.so [.] cabs ~~~~~ Signed-off-by: Milian Wolff Reviewed-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20171009203310.17362-11-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index acb9210fd18a..006d10a0dc96 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -225,6 +225,9 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) if (sym_l == sym_r) return 0; + if (sym_l->inlined || sym_r->inlined) + return strcmp(sym_l->name, sym_r->name); + if (sym_l->start != sym_r->start) return (int64_t)(sym_r->start - sym_l->start); -- cgit v1.2.3 From bf36eb5c4b3ef0ebfb19b1a67a5fa5821e6c9fa7 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Fri, 20 Oct 2017 12:14:47 -0300 Subject: perf report: Properly handle branch count in match_chain() Some of the code paths I introduced before returned too early without running the code to handle a node's branch count. By refactoring match_chain to only have one exit point, this can be remedied. Signed-off-by: Milian Wolff Acked-by: Namhyung Kim Cc: David Ahern Cc: Jin Yao Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lkml.kernel.org/r/1707691.qaJ269GSZW@agathebauer Link: http://lkml.kernel.org/r/20171018185350.14893-2-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 140 ++++++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 62 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 35a920f09503..19bfcadcf891 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -666,83 +666,99 @@ static enum match_result match_chain_strings(const char *left, return ret; } -static enum match_result match_chain(struct callchain_cursor_node *node, - struct callchain_list *cnode) +/* + * We need to always use relative addresses because we're aggregating + * callchains from multiple threads, i.e. different address spaces, so + * comparing absolute addresses make no sense as a symbol in a DSO may end up + * in a different address when used in a different binary or even the same + * binary but with some sort of address randomization technique, thus we need + * to compare just relative addresses. -acme + */ +static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip, + struct map *right_map, u64 right_ip) { - struct symbol *sym = node->sym; - u64 left, right; - struct dso *left_dso = NULL; - struct dso *right_dso = NULL; + struct dso *left_dso = left_map ? left_map->dso : NULL; + struct dso *right_dso = right_map ? right_map->dso : NULL; - if (callchain_param.key == CCKEY_SRCLINE) { - enum match_result match = match_chain_strings(cnode->srcline, - node->srcline); + if (left_dso != right_dso) + return left_dso < right_dso ? MATCH_LT : MATCH_GT; - /* if no srcline is available, fallback to symbol name */ - if (match == MATCH_ERROR && cnode->ms.sym && node->sym) - match = match_chain_strings(cnode->ms.sym->name, - node->sym->name); + if (left_ip != right_ip) + return left_ip < right_ip ? MATCH_LT : MATCH_GT; - if (match != MATCH_ERROR) - return match; + return MATCH_EQ; +} - /* otherwise fall-back to IP-based comparison below */ - } +static enum match_result match_chain(struct callchain_cursor_node *node, + struct callchain_list *cnode) +{ + enum match_result match = MATCH_ERROR; - if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { - /* - * Compare inlined frames based on their symbol name because - * different inlined frames will have the same symbol start - */ - if (cnode->ms.sym->inlined || node->sym->inlined) - return match_chain_strings(cnode->ms.sym->name, - node->sym->name); - - left = cnode->ms.sym->start; - right = sym->start; - left_dso = cnode->ms.map->dso; - right_dso = node->map->dso; - } else { - left = cnode->ip; - right = node->ip; + switch (callchain_param.key) { + case CCKEY_SRCLINE: + match = match_chain_strings(cnode->srcline, node->srcline); + if (match != MATCH_ERROR) + break; + /* otherwise fall-back to symbol-based comparison below */ + __fallthrough; + case CCKEY_FUNCTION: + if (node->sym && cnode->ms.sym) { + /* + * Compare inlined frames based on their symbol name + * because different inlined frames will have the same + * symbol start. Otherwise do a faster comparison based + * on the symbol start address. + */ + if (cnode->ms.sym->inlined || node->sym->inlined) { + match = match_chain_strings(cnode->ms.sym->name, + node->sym->name); + if (match != MATCH_ERROR) + break; + } else { + match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start, + node->map, node->sym->start); + break; + } + } + /* otherwise fall-back to IP-based comparison below */ + __fallthrough; + case CCKEY_ADDRESS: + default: + match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->map, node->ip); + break; } - if (left == right && left_dso == right_dso) { - if (node->branch) { - cnode->branch_count++; + if (match == MATCH_EQ && node->branch) { + cnode->branch_count++; - if (node->branch_from) { - /* - * It's "to" of a branch - */ - cnode->brtype_stat.branch_to = true; + if (node->branch_from) { + /* + * It's "to" of a branch + */ + cnode->brtype_stat.branch_to = true; - if (node->branch_flags.predicted) - cnode->predicted_count++; + if (node->branch_flags.predicted) + cnode->predicted_count++; - if (node->branch_flags.abort) - cnode->abort_count++; + if (node->branch_flags.abort) + cnode->abort_count++; - branch_type_count(&cnode->brtype_stat, - &node->branch_flags, - node->branch_from, - node->ip); - } else { - /* - * It's "from" of a branch - */ - cnode->brtype_stat.branch_to = false; - cnode->cycles_count += - node->branch_flags.cycles; - cnode->iter_count += node->nr_loop_iter; - cnode->iter_cycles += node->iter_cycles; - } + branch_type_count(&cnode->brtype_stat, + &node->branch_flags, + node->branch_from, + node->ip); + } else { + /* + * It's "from" of a branch + */ + cnode->brtype_stat.branch_to = false; + cnode->cycles_count += node->branch_flags.cycles; + cnode->iter_count += node->nr_loop_iter; + cnode->iter_cycles += node->iter_cycles; } - - return MATCH_EQ; } - return left > right ? MATCH_GT : MATCH_LT; + return match; } /* -- cgit v1.2.3 From b38775cf7678d7715b35dded3dcfab66e244baae Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Thu, 19 Oct 2017 13:38:33 +0200 Subject: perf report: Cache failed lookups of inlined frames When no inlined frames could be found for a given address, we did not store this information anywhere. That means we potentially do the costly inliner lookup repeatedly for cases where we know it can never succeed. This patch makes dso__parse_addr_inlines always return a valid inline_node. It will be empty when no inliners are found. This enables us to cache the empty list in the DSO, thereby improving the performance when many addresses fail to find the inliners. For my trivial example, the performance impact is already quite significant: Before: ~~~~~ Performance counter stats for 'perf report --stdio --inline -g srcline -s srcline' (5 runs): 594.804032 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) 53 context-switches # 0.089 K/sec ( +- 4.09% ) 0 cpu-migrations # 0.000 K/sec ( +-100.00% ) 5,687 page-faults # 0.010 M/sec ( +- 0.02% ) 2,300,918,213 cycles # 3.868 GHz ( +- 0.09% ) 4,395,839,080 instructions # 1.91 insn per cycle ( +- 0.00% ) 939,177,205 branches # 1578.969 M/sec ( +- 0.00% ) 11,824,633 branch-misses # 1.26% of all branches ( +- 0.10% ) 0.596246531 seconds time elapsed ( +- 0.07% ) ~~~~~ After: ~~~~~ Performance counter stats for 'perf report --stdio --inline -g srcline -s srcline' (5 runs): 113.111405 task-clock (msec) # 0.990 CPUs utilized ( +- 0.89% ) 29 context-switches # 0.255 K/sec ( +- 54.25% ) 0 cpu-migrations # 0.000 K/sec 5,380 page-faults # 0.048 M/sec ( +- 0.01% ) 432,378,779 cycles # 3.823 GHz ( +- 0.75% ) 670,057,633 instructions # 1.55 insn per cycle ( +- 0.01% ) 141,001,247 branches # 1246.570 M/sec ( +- 0.01% ) 2,346,845 branch-misses # 1.66% of all branches ( +- 0.19% ) 0.114222393 seconds time elapsed ( +- 1.19% ) ~~~~~ Signed-off-by: Milian Wolff Reviewed-by: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171019113836.5548-3-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 15 +++++++-------- tools/perf/util/srcline.c | 16 +--------------- 2 files changed, 8 insertions(+), 23 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3d049cb313ac..177c1d4088f8 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2115,9 +2115,10 @@ static int append_inlines(struct callchain_cursor *cursor, struct inline_node *inline_node; struct inline_list *ilist; u64 addr; + int ret = 1; if (!symbol_conf.inline_name || !map || !sym) - return 1; + return ret; addr = map__rip_2objdump(map, ip); @@ -2125,22 +2126,20 @@ static int append_inlines(struct callchain_cursor *cursor, if (!inline_node) { inline_node = dso__parse_addr_inlines(map->dso, addr, sym); if (!inline_node) - return 1; - + return ret; inlines__tree_insert(&map->dso->inlined_nodes, inline_node); } list_for_each_entry(ilist, &inline_node->val, list) { - int ret = callchain_cursor_append(cursor, ip, map, - ilist->symbol, false, - NULL, 0, 0, 0, - ilist->srcline); + ret = callchain_cursor_append(cursor, ip, map, + ilist->symbol, false, + NULL, 0, 0, 0, ilist->srcline); if (ret != 0) return ret; } - return 0; + return ret; } static int unwind_entry(struct unwind_entry *entry, void *arg) diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 8bea6621d657..fc3888664b20 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -353,17 +353,8 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node, sym)) - goto out_free_inline_node; - - if (list_empty(&node->val)) - goto out_free_inline_node; - + addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); return node; - -out_free_inline_node: - inline_node__delete(node); - return NULL; } #else /* HAVE_LIBBFD_SUPPORT */ @@ -480,11 +471,6 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, out: pclose(fp); - if (list_empty(&node->val)) { - inline_node__delete(node); - return NULL; - } - return node; } -- cgit v1.2.3 From 21ac9d547fdde79c1e8692587d9044fde549214b Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Thu, 19 Oct 2017 13:38:34 +0200 Subject: perf report: Cache srclines for callchain nodes On one hand this ensures that the memory is properly freed when the DSO gets freed. On the other hand this significantly speeds up the processing of the callchain nodes when lots of srclines are requested. For one of my data files e.g.: Before: Performance counter stats for 'perf report -s srcline -g srcline --stdio': 52496.495043 task-clock (msec) # 0.999 CPUs utilized 634 context-switches # 0.012 K/sec 2 cpu-migrations # 0.000 K/sec 191,561 page-faults # 0.004 M/sec 165,074,498,235 cycles # 3.144 GHz 334,170,832,408 instructions # 2.02 insn per cycle 90,220,029,745 branches # 1718.591 M/sec 654,525,177 branch-misses # 0.73% of all branches 52.533273822 seconds time elapsedProcessed 236605 events and lost 40 chunks! After: Performance counter stats for 'perf report -s srcline -g srcline --stdio': 22606.323706 task-clock (msec) # 1.000 CPUs utilized 31 context-switches # 0.001 K/sec 0 cpu-migrations # 0.000 K/sec 185,471 page-faults # 0.008 M/sec 71,188,113,681 cycles # 3.149 GHz 133,204,943,083 instructions # 1.87 insn per cycle 34,886,384,979 branches # 1543.214 M/sec 278,214,495 branch-misses # 0.80% of all branches 22.609857253 seconds time elapsed Note that the difference is only this large when `--inline` is not passed. In such situations, we would use the inliner cache and thus do not run this code path that often. I think that this cache should actually be used in other places, too. When looking at the valgrind leak report for perf report, we see tons of srclines being leaked, most notably from calls to hist_entry__get_srcline. The problem is that get_srcline has many different formatting options (show_sym, show_addr, potentially even unwind_inlines when calling __get_srcline directly). As such, the srcline cannot easily be cached for all calls, or we'd have to add caches for all formatting combinations (6 so far). An alternative would be to remove the formatting options and handle that on a different level - i.e. print the sym/addr on demand wherever we actually output something. And the unwind_inlines could be moved into a separate function that does not return the srcline. Signed-off-by: Milian Wolff Reviewed-by: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171019113836.5548-4-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 2 ++ tools/perf/util/dso.h | 1 + tools/perf/util/machine.c | 17 +++++++++--- tools/perf/util/srcline.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/srcline.h | 7 +++++ 5 files changed, 90 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 75c8250b3b8a..3192b608e91b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1203,6 +1203,7 @@ struct dso *dso__new(const char *name) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT; + dso->srclines = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1237,6 +1238,7 @@ void dso__delete(struct dso *dso) /* free inlines first, as they reference symbols */ inlines__tree_delete(&dso->inlined_nodes); + srcline__tree_delete(&dso->srclines); for (i = 0; i < MAP__NR_TYPES; ++i) symbols__delete(&dso->symbols[i]); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 122eca0d242d..821b16c67030 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -142,6 +142,7 @@ struct dso { struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; struct rb_root inlined_nodes; + struct rb_root srclines; struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 177c1d4088f8..94d8f1ccedd9 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1711,11 +1711,22 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) { + char *srcline = NULL; + if (!map || callchain_param.key == CCKEY_FUNCTION) - return NULL; + return srcline; + + srcline = srcline__tree_find(&map->dso->srclines, ip); + if (!srcline) { + bool show_sym = false; + bool show_addr = callchain_param.key == CCKEY_ADDRESS; + + srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), + sym, show_sym, show_addr); + srcline__tree_insert(&map->dso->srclines, ip, srcline); + } - return get_srcline(map->dso, map__rip_2objdump(map, ip), - sym, false, callchain_param.key == CCKEY_ADDRESS); + return srcline; } struct iterations { diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index fc3888664b20..c143c3bc1ef8 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -542,6 +542,72 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, return __get_srcline(dso, addr, sym, show_sym, show_addr, false); } +struct srcline_node { + u64 addr; + char *srcline; + struct rb_node rb_node; +}; + +void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) +{ + struct rb_node **p = &tree->rb_node; + struct rb_node *parent = NULL; + struct srcline_node *i, *node; + + node = zalloc(sizeof(struct srcline_node)); + if (!node) { + perror("not enough memory for the srcline node"); + return; + } + + node->addr = addr; + node->srcline = srcline; + + while (*p != NULL) { + parent = *p; + i = rb_entry(parent, struct srcline_node, rb_node); + if (addr < i->addr) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->rb_node, parent, p); + rb_insert_color(&node->rb_node, tree); +} + +char *srcline__tree_find(struct rb_root *tree, u64 addr) +{ + struct rb_node *n = tree->rb_node; + + while (n) { + struct srcline_node *i = rb_entry(n, struct srcline_node, + rb_node); + + if (addr < i->addr) + n = n->rb_left; + else if (addr > i->addr) + n = n->rb_right; + else + return i->srcline; + } + + return NULL; +} + +void srcline__tree_delete(struct rb_root *tree) +{ + struct srcline_node *pos; + struct rb_node *next = rb_first(tree); + + while (next) { + pos = rb_entry(next, struct srcline_node, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, tree); + free_srcline(pos->srcline); + zfree(&pos); + } +} + struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, struct symbol *sym) { diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index ebe38cd22294..1c4d6210860b 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -15,6 +15,13 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool show_addr, bool unwind_inlines); void free_srcline(char *srcline); +/* insert the srcline into the DSO, which will take ownership */ +void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline); +/* find previously inserted srcline */ +char *srcline__tree_find(struct rb_root *tree, u64 addr); +/* delete all srclines within the tree */ +void srcline__tree_delete(struct rb_root *tree); + #define SRCLINE_UNKNOWN ((char *) "??:0") struct inline_list { -- cgit v1.2.3 From 1fb7d06a509e82893e59e0f0b223e7d5d6d0ef8c Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Thu, 19 Oct 2017 13:38:35 +0200 Subject: perf report: Use srcline from callchain for hist entries This also removes the symbol name from the srcline column, more on this below. This ensures we use the correct srcline, which could originate from a potentially inlined function. The hist entries used to query for the srcline based purely on the IP, which leads to wrong results for inlined entries. Before: ~~~~~ perf report --inline -s srcline -g none --stdio ... # Children Self Source:Line # ........ ........ .................................................................................................................................. # 94.23% 0.00% __libc_start_main+18446603487898210537 94.23% 0.00% _start+41 44.58% 0.00% main+100 44.58% 0.00% std::_Norm_helper::_S_do_it+100 44.58% 0.00% std::__complex_abs+100 44.58% 0.00% std::abs+100 44.58% 0.00% std::norm+100 36.01% 0.00% hypot+18446603487892193300 25.81% 0.00% main+41 25.81% 0.00% std::__detail::_Adaptor, double>::operator()+41 25.81% 0.00% std::uniform_real_distribution::operator() >+41 25.75% 25.75% random.h:143 18.39% 0.00% main+57 18.39% 0.00% std::__detail::_Adaptor, double>::operator()+57 18.39% 0.00% std::uniform_real_distribution::operator() >+57 13.80% 13.80% random.tcc:3330 5.64% 0.00% ??:0 4.13% 4.13% __hypot_finite+163 4.13% 0.00% __hypot_finite+18446603487892193443 ... ~~~~~ After: ~~~~~ perf report --inline -s srcline -g none --stdio ... # Children Self Source:Line # ........ ........ ........................................... # 94.30% 1.19% main.cpp:39 94.23% 0.00% __libc_start_main+18446603487898210537 94.23% 0.00% _start+41 48.44% 1.70% random.h:1823 48.44% 0.00% random.h:1814 46.74% 2.53% random.h:185 44.68% 0.10% complex:589 44.68% 0.00% complex:597 44.68% 0.00% complex:654 44.68% 0.00% complex:664 40.61% 13.80% random.tcc:3330 36.01% 0.00% hypot+18446603487892193300 26.81% 0.00% random.h:151 26.81% 0.00% random.h:332 25.75% 25.75% random.h:143 5.64% 0.00% ??:0 4.13% 4.13% __hypot_finite+163 4.13% 0.00% __hypot_finite+18446603487892193443 ... ~~~~~ Note that this change removes the symbol from the source:line hist column. If this information is desired, users should explicitly query for it if needed. I.e. run this command instead: ~~~~~ perf report --inline -s sym,srcline -g none --stdio ... # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 1K of event 'cycles:uppp' # Event count (approx.): 1381229476 # # Children Self Symbol Source:Line # ........ ........ ................................................................................................................................... ........................................... # 94.30% 1.19% [.] main main.cpp:39 94.23% 0.00% [.] __libc_start_main __libc_start_main+18446603487898210537 94.23% 0.00% [.] _start _start+41 48.44% 0.00% [.] std::uniform_real_distribution::operator() > (inlined) random.h:1814 48.44% 0.00% [.] std::uniform_real_distribution::operator() > (inlined) random.h:1823 46.74% 0.00% [.] std::__detail::_Adaptor, double>::operator() (inlined) random.h:185 44.68% 0.00% [.] std::_Norm_helper::_S_do_it (inlined) complex:654 44.68% 0.00% [.] std::__complex_abs (inlined) complex:589 44.68% 0.00% [.] std::abs (inlined) complex:597 44.68% 0.00% [.] std::norm (inlined) complex:664 39.80% 13.59% [.] std::generate_canonical > random.tcc:3330 36.01% 0.00% [.] hypot hypot+18446603487892193300 26.81% 0.00% [.] std::__detail::__mod (inlined) random.h:151 26.81% 0.00% [.] std::linear_congruential_engine::operator() (inlined) random.h:332 25.75% 0.00% [.] std::__detail::_Mod::__calc (inlined) random.h:143 25.19% 25.19% [.] std::generate_canonical > random.h:143 4.13% 4.13% [.] __hypot_finite __hypot_finite+163 4.13% 0.00% [.] __hypot_finite __hypot_finite+18446603487892193443 ... ~~~~~ Compared to the old behavior, this reduces duplication in the output. Before we used to print the symbol name in the srcline column even when the sym column was explicitly requested. I.e. the output was: ~~~~~ perf report --inline -s sym,srcline -g none --stdio ... # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 1K of event 'cycles:uppp' # Event count (approx.): 1381229476 # # Children Self Symbol Source:Line # ........ ........ ................................................................................................................................... .................................................................................................................................. # 94.23% 0.00% [.] __libc_start_main __libc_start_main+18446603487898210537 94.23% 0.00% [.] _start _start+41 44.58% 0.00% [.] main main+100 44.58% 0.00% [.] std::_Norm_helper::_S_do_it (inlined) std::_Norm_helper::_S_do_it+100 44.58% 0.00% [.] std::__complex_abs (inlined) std::__complex_abs+100 44.58% 0.00% [.] std::abs (inlined) std::abs+100 44.58% 0.00% [.] std::norm (inlined) std::norm+100 36.01% 0.00% [.] hypot hypot+18446603487892193300 25.81% 0.00% [.] main main+41 25.81% 0.00% [.] std::__detail::_Adaptor, double>::operator() (inlined) std::__detail::_Adaptor, double>::operator()+41 25.81% 0.00% [.] std::uniform_real_distribution::operator() > (inlined) std::uniform_real_distribution::operator() >+41 25.69% 25.69% [.] std::generate_canonical > random.h:143 18.39% 0.00% [.] main main+57 18.39% 0.00% [.] std::__detail::_Adaptor, double>::operator() (inlined) std::__detail::_Adaptor, double>::operator()+57 18.39% 0.00% [.] std::uniform_real_distribution::operator() > (inlined) std::uniform_real_distribution::operator() >+57 13.80% 13.80% [.] std::generate_canonical > random.tcc:3330 4.13% 4.13% [.] __hypot_finite __hypot_finite+163 4.13% 0.00% [.] __hypot_finite __hypot_finite+18446603487892193443 ... ~~~~~ Signed-off-by: Milian Wolff Reviewed-by: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171019113836.5548-5-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/hist.c | 2 ++ tools/perf/util/symbol.h | 1 + 4 files changed, 5 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 19bfcadcf891..3a3916934a92 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1090,6 +1090,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * { al->map = node->map; al->sym = node->sym; + al->srcline = node->srcline; if (node->map) al->addr = node->map->map_ip(node->map, node->ip); else diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 47eff4767edb..3c411e7e36aa 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1604,6 +1604,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, al->sym = NULL; al->cpu = sample->cpu; al->socket = -1; + al->srcline = NULL; if (al->cpu >= 0) { struct perf_env *env = machine->env; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b0fa9c217e1c..25d143053ab5 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -596,6 +596,7 @@ __hists__add_entry(struct hists *hists, .map = al->map, .sym = al->sym, }, + .srcline = al->srcline ? strdup(al->srcline) : NULL, .socket = al->socket, .cpu = al->cpu, .cpumode = al->cpumode, @@ -950,6 +951,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, .map = al->map, .sym = al->sym, }, + .srcline = al->srcline ? strdup(al->srcline) : NULL, .parent = iter->parent, .raw_data = sample->raw_data, .raw_size = sample->raw_size, diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d880a059babb..d548ea5cb418 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -209,6 +209,7 @@ struct addr_location { struct thread *thread; struct map *map; struct symbol *sym; + const char *srcline; u64 addr; char level; u8 filtered; -- cgit v1.2.3 From d8a88dd243a170a226aba33e7c53704db2f82aa6 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Thu, 19 Oct 2017 13:38:36 +0200 Subject: perf util: Enable handling of inlined frames by default Now that we have caches in place to speed up the process of finding inlined frames and srcline information repeatedly, we can enable this useful option by default. Suggested-by: Ingo Molnar Signed-off-by: Milian Wolff Reviewed-by: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171019113836.5548-6-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 3 ++- tools/perf/Documentation/perf-script.txt | 3 ++- tools/perf/util/symbol.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 383a98d992ed..ddde2b54af57 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -434,7 +434,8 @@ include::itrace.txt[] --inline:: If a callgraph address belongs to an inlined function, the inline stack - will be printed. Each entry is function name or file/line. + will be printed. Each entry is function name or file/line. Enabled by + default, disable with --no-inline. include::callchain-overhead-calculation.txt[] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index bcc1ba35a2d8..25e677344728 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -327,7 +327,8 @@ include::itrace.txt[] --inline:: If a callgraph address belongs to an inlined function, the inline stack - will be printed. Each entry has function name and file/line. + will be printed. Each entry has function name and file/line. Enabled by + default, disable with --no-inline. SEE ALSO -------- diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 066e38aa4063..ce6993bebf8c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -45,6 +45,7 @@ struct symbol_conf symbol_conf = { .show_hist_headers = true, .symfs = "", .event_group = true, + .inline_name = true, }; static enum dso_binary_type binary_type_symtab[] = { -- cgit v1.2.3 From 69c71252298acad7a25e9499929f723790f48e26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 09:51:13 -0300 Subject: perf script: Add a few missing conversions to fprintf style In a1a587073ccd ("perf script: Use fprintf like printing uniformly") there were a few cases that were missed, fix it. Reported-by: yuzhoujian Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-sq9hvfk5mkjdqzlpyiq7jkos@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a3add2cd7856..db4ff1510dc5 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1447,7 +1447,7 @@ static void process_event(struct perf_script *script, perf_sample__fprintf_start(sample, thread, evsel, fp); if (PRINT_FIELD(PERIOD)) - printf("%10" PRIu64 " ", sample->period); + fprintf(fp, "%10" PRIu64 " ", sample->period); if (PRINT_FIELD(EVNAME)) { const char *evname = perf_evsel__name(evsel); @@ -1455,8 +1455,7 @@ static void process_event(struct perf_script *script, if (!script->name_width) script->name_width = perf_evlist__max_name_len(script->session->evlist); - printf("%*s: ", script->name_width, - evname ? evname : "[unknown]"); + fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]"); } if (print_flags) @@ -1513,8 +1512,8 @@ static void process_event(struct perf_script *script, perf_sample__fprintf_insn(sample, attr, thread, machine, fp); if (PRINT_FIELD(PHYS_ADDR)) - printf("%16" PRIx64, sample->phys_addr); - printf("\n"); + fprintf(fp, "%16" PRIx64, sample->phys_addr); + fprintf(fp, "\n"); } static struct scripting_ops *scripting_ops; -- cgit v1.2.3 From 5ce2c5b4e484a87a8af48649775796fb349684db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 09:55:22 -0300 Subject: perf script: Use pr_debug where appropriate We have facilities for reporting unexpected, unlikely errors, use them. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Michael Ellerman Cc: Namhyung Kim Cc: Wang Nan Cc: yuzhoujian Link: http://lkml.kernel.org/n/tip-c7j22xfjf1j773g7ufp607q0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index db4ff1510dc5..af0267072d43 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -769,27 +769,26 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, * but the exit is not. Let the caller patch it up. */ if (kernel != machine__kernel_ip(machine, end)) { - printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", - start, end); + pr_debug("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", start, end); return -ENXIO; } memset(&al, 0, sizeof(al)); if (end - start > MAXBB - MAXINSN) { if (last) - printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); + pr_debug("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); else - printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); + pr_debug("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); return 0; } thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); if (!al.map || !al.map->dso) { - printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); return 0; } if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { - printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); return 0; } @@ -802,7 +801,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, *is64bit = al.map->dso->is_64_bit; if (len <= 0) - printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", + pr_debug("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", start, end); return len; } -- cgit v1.2.3 From 894f3f1732cb4ae543d406e4fca1585e8c303a1c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 10:26:52 -0300 Subject: perf script: Use event_format__fprintf() Another case where we a1a587073ccd ("perf script: Use fprintf like printing uniformly") forgot to redirect output to the FILE descriptor, fix this too. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: yuzhoujian Link: http://lkml.kernel.org/n/tip-jmwx4pgfezw98ezfoj9t957s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index af0267072d43..b27f21638207 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1465,9 +1465,10 @@ static void process_event(struct perf_script *script, return; } - if (PRINT_FIELD(TRACE)) - event_format__print(evsel->tp_format, sample->cpu, - sample->raw_data, sample->raw_size); + if (PRINT_FIELD(TRACE)) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, fp); + } if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH)) perf_sample__fprintf_synth(sample, evsel, fp); -- cgit v1.2.3 From e669e833da8d71a02e175dfffceaabc3eb010207 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 14:22:34 -0300 Subject: perf evsel: Restore evsel->priv as a tool private area When we started using it for stats and did it not just in builtin-stat.c, but also for builtin-script.c, then it stopped being a tool private area, so introduce a new pointer for these stats and leave ->priv to its original purpose. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: yuzhoujian Fixes: cfc8874a4859 ("perf script: Process cpu/threads maps") Link: http://lkml.kernel.org/n/tip-jtpzx3rjqo78snmmsdzwb2eb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 4 ++-- tools/perf/util/evsel.h | 3 +++ tools/perf/util/stat.c | 16 ++++++++-------- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index dd525417880a..988bdfa5d832 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -845,7 +845,7 @@ static void print_noise(struct perf_evsel *evsel, double avg) if (run_count == 1) return; - ps = evsel->priv; + ps = evsel->stats; print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); } @@ -1432,7 +1432,7 @@ static void counter_aggr_cb(struct perf_evsel *counter, void *data, bool first __maybe_unused) { struct caggr_data *cd = data; - struct perf_stat_evsel *ps = counter->priv; + struct perf_stat_evsel *ps = counter->stats; cd->avg += avg_stats(&ps->res_stats[0]); cd->avg_enabled += avg_stats(&ps->res_stats[1]); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index db658785d828..64782b19089d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -68,6 +68,8 @@ struct perf_evsel_config_term { } val; }; +struct perf_stat_evsel; + /** struct perf_evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -101,6 +103,7 @@ struct perf_evsel { const char *unit; struct event_format *tp_format; off_t id_offset; + struct perf_stat_evsel *stats; void *priv; u64 db_id; struct cgroup_sel *cgrp; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 35e9848734d6..933de91831fa 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -69,7 +69,7 @@ double rel_stddev_stats(double stddev, double avg) bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id) { - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; return ps->id == id; } @@ -93,7 +93,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { void perf_stat_evsel_id_init(struct perf_evsel *evsel) { - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; int i; /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ @@ -109,7 +109,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; for (i = 0; i < 3; i++) init_stats(&ps->res_stats[i]); @@ -119,8 +119,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { - evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); - if (evsel->priv == NULL) + evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); + if (evsel->stats == NULL) return -ENOMEM; perf_evsel__reset_stat_priv(evsel); return 0; @@ -128,11 +128,11 @@ static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; if (ps) free(ps->group_data); - zfree(&evsel->priv); + zfree(&evsel->stats); } static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, @@ -318,7 +318,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; - struct perf_stat_evsel *ps = counter->priv; + struct perf_stat_evsel *ps = counter->stats; u64 *count = counter->counts->aggr.values; u64 val; int i, ret; -- cgit v1.2.3 From a14390fde64e862c4fe8b92ecb293ee4b12d5bfe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 10:30:20 -0300 Subject: perf script: Allow creating per-event dump files Introduce a new option to dump trace output to files named by the monitored events and update perf-script documentation accordingly. Shown below is output of perf script command with the newly introduced option. $ perf record -e cycles -e cs -ag -- sleep 1 $ perf script --per-event-dump $ ls perf.data.cycles.dump perf.data.cs.dump Without per-event-dump support, drawing flamegraphs for different events would require post processing to separate events. You can monitor only one event at a time if you want to get flamegraphs for different events. Using this option, you can get the trace output files named by the monitored events, and could draw flamegraphs according to the event's name. Based-on-a-patch-by: yuzhoujian Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/1508921599-10832-3-git-send-email-yuzhoujian@didichuxing.com Link: http://lkml.kernel.org/n/tip-8ngzsjdhgiovkupl3r5yy570@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 4 +++ tools/perf/builtin-script.c | 59 +++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 25e677344728..2811fcf684cb 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -325,6 +325,10 @@ include::itrace.txt[] Set the maximum number of program blocks to print with brstackasm for each sample. +--per-event-dump:: + Create per event files with a "perf.data.EVENT.dump" name instead of + printing to stdout, useful, for instance, for generating flamegraphs. + --inline:: If a callgraph address belongs to an inlined function, the inline stack will be printed. Each entry has function name and file/line. Enabled by diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b27f21638207..fb5e49b3bc44 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1392,6 +1392,7 @@ struct perf_script { bool show_switch_events; bool show_namespace_events; bool allocated; + bool per_event_dump; struct cpu_map *cpus; struct thread_map *threads; int name_width; @@ -1438,7 +1439,7 @@ static void process_event(struct perf_script *script, struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; unsigned int type = output_type(attr->type); - FILE *fp = stdout; + FILE *fp = evsel->priv; if (output[type].fields == 0) return; @@ -1887,6 +1888,52 @@ static void sig_handler(int sig __maybe_unused) session_done = 1; } +static void perf_script__fclose_per_event_dump(struct perf_script *script) +{ + struct perf_evlist *evlist = script->session->evlist; + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->priv) + break; + fclose(evsel->priv); + evsel->priv = NULL; + } +} + +static int perf_script__fopen_per_event_dump(struct perf_script *script) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(script->session->evlist, evsel) { + char filename[PATH_MAX]; + snprintf(filename, sizeof(filename), "%s.%s.dump", + script->session->file->path, perf_evsel__name(evsel)); + evsel->priv = fopen(filename, "w"); + if (evsel->priv == NULL) + goto out_err_fclose; + } + + return 0; + +out_err_fclose: + perf_script__fclose_per_event_dump(script); + return -1; +} + +static int perf_script__setup_per_event_dump(struct perf_script *script) +{ + struct perf_evsel *evsel; + + if (script->per_event_dump) + return perf_script__fopen_per_event_dump(script); + + evlist__for_each_entry(script->session->evlist, evsel) + evsel->priv = stdout; + + return 0; +} + static int __cmd_script(struct perf_script *script) { int ret; @@ -1908,8 +1955,16 @@ static int __cmd_script(struct perf_script *script) if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; + if (perf_script__setup_per_event_dump(script)) { + pr_err("Couldn't create the per event dump files\n"); + return -1; + } + ret = perf_session__process_events(script->session); + if (script->per_event_dump) + perf_script__fclose_per_event_dump(script); + if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -2827,6 +2882,8 @@ int cmd_script(int argc, const char **argv) "Show context switch events (if recorded)"), OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), + OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, + "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_INTEGER(0, "max-blocks", &max_blocks, "Maximum number of code blocks to dump with brstackinsn"), -- cgit v1.2.3 From 433727948904301b01f1d5ebf39893c96cd4bab7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 14:41:01 -0300 Subject: tools include uapi: Grab a copy of linux/prctl.h We will use it to generate tables for beautifying prctl's 'option' arg and some of the others eventually. Cc: Andy Lutomirski Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-cg8mpmz4hk9nfih685emnbk9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 200 +++++++++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 201 insertions(+) create mode 100644 tools/include/uapi/linux/prctl.h (limited to 'tools/perf') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h new file mode 100644 index 000000000000..a8d0759a9e40 --- /dev/null +++ b/tools/include/uapi/linux/prctl.h @@ -0,0 +1,200 @@ +#ifndef _LINUX_PRCTL_H +#define _LINUX_PRCTL_H + +#include + +/* Values to pass as first argument to prctl() */ + +#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */ +#define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */ + +/* Get/set current->mm->dumpable */ +#define PR_GET_DUMPABLE 3 +#define PR_SET_DUMPABLE 4 + +/* Get/set unaligned access control bits (if meaningful) */ +#define PR_GET_UNALIGN 5 +#define PR_SET_UNALIGN 6 +# define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */ +# define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */ + +/* Get/set whether or not to drop capabilities on setuid() away from + * uid 0 (as per security/commoncap.c) */ +#define PR_GET_KEEPCAPS 7 +#define PR_SET_KEEPCAPS 8 + +/* Get/set floating-point emulation control bits (if meaningful) */ +#define PR_GET_FPEMU 9 +#define PR_SET_FPEMU 10 +# define PR_FPEMU_NOPRINT 1 /* silently emulate fp operations accesses */ +# define PR_FPEMU_SIGFPE 2 /* don't emulate fp operations, send SIGFPE instead */ + +/* Get/set floating-point exception mode (if meaningful) */ +#define PR_GET_FPEXC 11 +#define PR_SET_FPEXC 12 +# define PR_FP_EXC_SW_ENABLE 0x80 /* Use FPEXC for FP exception enables */ +# define PR_FP_EXC_DIV 0x010000 /* floating point divide by zero */ +# define PR_FP_EXC_OVF 0x020000 /* floating point overflow */ +# define PR_FP_EXC_UND 0x040000 /* floating point underflow */ +# define PR_FP_EXC_RES 0x080000 /* floating point inexact result */ +# define PR_FP_EXC_INV 0x100000 /* floating point invalid operation */ +# define PR_FP_EXC_DISABLED 0 /* FP exceptions disabled */ +# define PR_FP_EXC_NONRECOV 1 /* async non-recoverable exc. mode */ +# define PR_FP_EXC_ASYNC 2 /* async recoverable exception mode */ +# define PR_FP_EXC_PRECISE 3 /* precise exception mode */ + +/* Get/set whether we use statistical process timing or accurate timestamp + * based process timing */ +#define PR_GET_TIMING 13 +#define PR_SET_TIMING 14 +# define PR_TIMING_STATISTICAL 0 /* Normal, traditional, + statistical process timing */ +# define PR_TIMING_TIMESTAMP 1 /* Accurate timestamp based + process timing */ + +#define PR_SET_NAME 15 /* Set process name */ +#define PR_GET_NAME 16 /* Get process name */ + +/* Get/set process endian */ +#define PR_GET_ENDIAN 19 +#define PR_SET_ENDIAN 20 +# define PR_ENDIAN_BIG 0 +# define PR_ENDIAN_LITTLE 1 /* True little endian mode */ +# define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */ + +/* Get/set process seccomp mode */ +#define PR_GET_SECCOMP 21 +#define PR_SET_SECCOMP 22 + +/* Get/set the capability bounding set (as per security/commoncap.c) */ +#define PR_CAPBSET_READ 23 +#define PR_CAPBSET_DROP 24 + +/* Get/set the process' ability to use the timestamp counter instruction */ +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ + +/* Get/set securebits (as per security/commoncap.c) */ +#define PR_GET_SECUREBITS 27 +#define PR_SET_SECUREBITS 28 + +/* + * Get/set the timerslack as used by poll/select/nanosleep + * A value of 0 means "use default" + */ +#define PR_SET_TIMERSLACK 29 +#define PR_GET_TIMERSLACK 30 + +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 + +/* + * Set early/late kill mode for hwpoison memory corruption. + * This influences when the process gets killed on a memory corruption. + */ +#define PR_MCE_KILL 33 +# define PR_MCE_KILL_CLEAR 0 +# define PR_MCE_KILL_SET 1 + +# define PR_MCE_KILL_LATE 0 +# define PR_MCE_KILL_EARLY 1 +# define PR_MCE_KILL_DEFAULT 2 + +#define PR_MCE_KILL_GET 34 + +/* + * Tune up process memory map specifics. + */ +#define PR_SET_MM 35 +# define PR_SET_MM_START_CODE 1 +# define PR_SET_MM_END_CODE 2 +# define PR_SET_MM_START_DATA 3 +# define PR_SET_MM_END_DATA 4 +# define PR_SET_MM_START_STACK 5 +# define PR_SET_MM_START_BRK 6 +# define PR_SET_MM_BRK 7 +# define PR_SET_MM_ARG_START 8 +# define PR_SET_MM_ARG_END 9 +# define PR_SET_MM_ENV_START 10 +# define PR_SET_MM_ENV_END 11 +# define PR_SET_MM_AUXV 12 +# define PR_SET_MM_EXE_FILE 13 +# define PR_SET_MM_MAP 14 +# define PR_SET_MM_MAP_SIZE 15 + +/* + * This structure provides new memory descriptor + * map which mostly modifies /proc/pid/stat[m] + * output for a task. This mostly done in a + * sake of checkpoint/restore functionality. + */ +struct prctl_mm_map { + __u64 start_code; /* code section bounds */ + __u64 end_code; + __u64 start_data; /* data section bounds */ + __u64 end_data; + __u64 start_brk; /* heap for brk() syscall */ + __u64 brk; + __u64 start_stack; /* stack starts at */ + __u64 arg_start; /* command line arguments bounds */ + __u64 arg_end; + __u64 env_start; /* environment variables bounds */ + __u64 env_end; + __u64 *auxv; /* auxiliary vector */ + __u32 auxv_size; /* vector size */ + __u32 exe_fd; /* /proc/$pid/exe link file */ +}; + +/* + * Set specific pid that is allowed to ptrace the current task. + * A value of 0 mean "no process". + */ +#define PR_SET_PTRACER 0x59616d61 +# define PR_SET_PTRACER_ANY ((unsigned long)-1) + +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 + +/* + * If no_new_privs is set, then operations that grant new privileges (i.e. + * execve) will either fail or not grant them. This affects suid/sgid, + * file capabilities, and LSMs. + * + * Operations that merely manipulate or drop existing privileges (setresuid, + * capset, etc.) will still work. Drop those privileges if you want them gone. + * + * Changing LSM security domain is considered a new privilege. So, for example, + * asking selinux for a specific new context (e.g. with runcon) will result + * in execve returning -EPERM. + * + * See Documentation/prctl/no_new_privs.txt for more details. + */ +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 + +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + +/* + * Tell the kernel to start/stop helping userspace manage bounds tables. + */ +#define PR_MPX_ENABLE_MANAGEMENT 43 +#define PR_MPX_DISABLE_MANAGEMENT 44 + +#define PR_SET_FP_MODE 45 +#define PR_GET_FP_MODE 46 +# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */ +# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */ + +/* Control the ambient capability set */ +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 + +#endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 322629423b49..8d8b37198666 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -6,6 +6,7 @@ include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h include/uapi/linux/kvm.h include/uapi/linux/perf_event.h +include/uapi/linux/prctl.h include/uapi/linux/sched.h include/uapi/linux/stat.h include/uapi/linux/vhost.h -- cgit v1.2.3 From d688d0376c6eb452565c16c95b26cd2c95aa8a82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 15:19:35 -0300 Subject: perf trace beauty prctl: Generate 'option' string table from kernel headers This is one more case where the way that syscall parameter values are defined in kernel headers are easy to parse using a shell script that will then generate the string table that gets used by the prctl 'option' argument beautifier. This way as soon as the header syncronization mechanism in perf's build system detects a change in a copy of a kernel ABI header and that file is syncronized, we get 'perf trace' updated automagically. Further work needed for the PR_SET_ values, as well for using eBPF to copy the non-integer arguments to/from the kernel. E.g.: System wide prctl tracing: # perf trace -e prctl 1668.028 ( 0.025 ms): TaskSchedulerR/10649 prctl(option: SET_NAME, arg2: 0x2b61d5db15d0) = 0 3365.663 ( 0.018 ms): chrome/10650 prctl(option: SET_SECCOMP, arg2: 2, arg4: 8 ) = -1 EFAULT Bad address 3366.585 ( 0.010 ms): chrome/10650 prctl(option: SET_NO_NEW_PRIVS, arg2: 1 ) = 0 3367.173 ( 0.009 ms): TaskSchedulerR/10652 prctl(option: SET_NAME, arg2: 0x2b61d2aaa300) = 0 3367.222 ( 0.003 ms): TaskSchedulerR/10653 prctl(option: SET_NAME, arg2: 0x2b61d2aaa1e0) = 0 3367.244 ( 0.002 ms): TaskSchedulerR/10654 prctl(option: SET_NAME, arg2: 0x2b61d2aaa0c0) = 0 3367.265 ( 0.002 ms): TaskSchedulerR/10655 prctl(option: SET_NAME, arg2: 0x2b61d2ac7f90) = 0 3367.281 ( 0.002 ms): Chrome_ChildIO/10656 prctl(option: SET_NAME, arg2: 0x7efbe406bb11) = 0 3367.220 ( 0.004 ms): TaskSchedulerS/10651 prctl(option: SET_NAME, arg2: 0x2b61d2ac1be0) = 0 3370.906 ( 0.010 ms): GpuMemoryThrea/10657 prctl(option: SET_NAME, arg2: 0x7efbe386ab11) = 0 3370.983 ( 0.003 ms): File/10658 prctl(option: SET_NAME, arg2: 0x7efbe3069b11 ) = 0 3384.272 ( 0.020 ms): Compositor/10659 prctl(option: SET_NAME, arg2: 0x7efbe2868b11 ) = 0 3612.091 ( 0.012 ms): DOM Worker/11489 prctl(option: SET_NAME, arg2: 0x7f49ab97ebf2 ) = 0 4512.437 ( 0.004 ms): (sa1)/11490 prctl(option: SET_NAME, arg2: 0x7ffca15af844 ) = 0 4512.468 ( 0.002 ms): (sa1)/11490 prctl(option: SET_MM, arg2: ARG_START, arg3: 0x7f5cb7c81000) = 0 4512.472 ( 0.001 ms): (sa1)/11490 prctl(option: SET_MM, arg2: ARG_END, arg3: 0x7f5cb7c81006) = 0 4514.667 ( 0.002 ms): (sa1)/11490 prctl(option: GET_SECUREBITS ) = 0 Cc: Adrian Hunter Cc: Andy Lutomirski Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-q0s2uw579o5ei6xlh2zjirgz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 13 +++++- tools/perf/builtin-trace.c | 5 +- tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 9 ++++ tools/perf/trace/beauty/prctl.c | 82 +++++++++++++++++++++++++++++++++ tools/perf/trace/beauty/prctl_option.sh | 17 +++++++ 6 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 tools/perf/trace/beauty/prctl.c create mode 100755 tools/perf/trace/beauty/prctl_option.sh (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5f7408118a2d..c872ca607b39 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -448,6 +448,13 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl) $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@ +prctl_option_array := $(beauty_outdir)/prctl_option_array.c +prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ +prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh + +$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl) + $(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -549,7 +556,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(kvm_ioctl_array) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ - $(perf_ioctl_array) + $(perf_ioctl_array) \ + $(prctl_option_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -829,7 +837,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ - $(OUTPUT)$(perf_ioctl_array) + $(OUTPUT)$(perf_ioctl_array) \ + $(OUTPUT)$(prctl_option_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean # diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8b23982dd9f2..78855916f4b0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -578,7 +578,6 @@ static struct syscall_fmt { } syscall_fmts[] = { { .name = "access", .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, - { .name = "arch_prctl", .alias = "prctl", }, { .name = "bpf", .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, { .name = "brk", .hexret = true, @@ -703,6 +702,10 @@ static struct syscall_fmt { [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, { .name = "poll", .timeout = true, }, { .name = "ppoll", .timeout = true, }, + { .name = "prctl", .alias = "arch_prctl", + .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ }, + [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ }, + [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, }, { .name = "pread", .alias = "pread64", }, { .name = "preadv", .alias = "pread", }, { .name = "prlimit64", diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 175d633c6b49..2f68b76ec39b 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -4,4 +4,5 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif libperf-y += pkey_alloc.o +libperf-y += prctl.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 4b58581a6053..b29f94ef5d6a 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -84,6 +84,15 @@ size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, st size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags +size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_PRCTL_OPTION syscall_arg__scnprintf_prctl_option + +size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_PRCTL_ARG2 syscall_arg__scnprintf_prctl_arg2 + +size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3 + size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c new file mode 100644 index 000000000000..246130dad6c4 --- /dev/null +++ b/tools/perf/trace/beauty/prctl.c @@ -0,0 +1,82 @@ +/* + * trace/beauty/prctl.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include + +#include "trace/beauty/generated/prctl_option_array.c" + +static size_t prctl__scnprintf_option(int option, char *bf, size_t size) +{ + static DEFINE_STRARRAY(prctl_options); + return strarray__scnprintf(&strarray__prctl_options, bf, size, "%d", option); +} + +static size_t prctl__scnprintf_set_mm(int option, char *bf, size_t size) +{ + static DEFINE_STRARRAY(prctl_set_mm_options); + return strarray__scnprintf(&strarray__prctl_set_mm_options, bf, size, "%d", option); +} + +size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg) +{ + int option = syscall_arg__val(arg, 0); + + if (option == PR_SET_MM) + return prctl__scnprintf_set_mm(arg->val, bf, size); + /* + * We still don't grab the contents of pointers on entry or exit, + * so just print them as hex numbers + */ + if (option == PR_SET_NAME) + return syscall_arg__scnprintf_hex(bf, size, arg); + + return syscall_arg__scnprintf_long(bf, size, arg); +} + +size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg) +{ + int option = syscall_arg__val(arg, 0); + + if (option == PR_SET_MM) + return syscall_arg__scnprintf_hex(bf, size, arg); + + return syscall_arg__scnprintf_long(bf, size, arg); +} + +size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long option = arg->val; + enum { + SPO_ARG2 = (1 << 1), + SPO_ARG3 = (1 << 2), + SPO_ARG4 = (1 << 3), + SPO_ARG5 = (1 << 4), + SPO_ARG6 = (1 << 5), + }; + const u8 all_but2 = SPO_ARG3 | SPO_ARG4 | SPO_ARG5 | SPO_ARG6; + const u8 all = SPO_ARG2 | all_but2; + const u8 masks[] = { + [PR_GET_DUMPABLE] = all, + [PR_SET_DUMPABLE] = all_but2, + [PR_SET_NAME] = all_but2, + [PR_GET_CHILD_SUBREAPER] = all_but2, + [PR_SET_CHILD_SUBREAPER] = all_but2, + [PR_GET_SECUREBITS] = all, + [PR_SET_SECUREBITS] = all_but2, + [PR_SET_MM] = SPO_ARG4 | SPO_ARG5 | SPO_ARG6, + [PR_GET_PDEATHSIG] = all, + [PR_SET_PDEATHSIG] = all_but2, + }; + + if (option < ARRAY_SIZE(masks)) + arg->mask |= masks[option]; + + return prctl__scnprintf_option(option, bf, size); +} diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh new file mode 100755 index 000000000000..0be4138fbe71 --- /dev/null +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *prctl_options[] = {\n" +regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*' +egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ + sed -r "s/$regex/\2 \1/g" | \ + sort -n | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" + +printf "static const char *prctl_set_mm_options[] = {\n" +regex='^#[[:space:]]+define[[:space:]]+PR_SET_MM_(\w+)[[:space:]]*([[:digit:]]+).*' +egrep $regex ${header_dir}/prctl.h | \ + sed -r "s/$regex/\2 \1/g" | \ + sort -n | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" -- cgit v1.2.3 From 642ee1c6df4c8571d8a6846a2623fb54f925ef92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Oct 2017 13:11:15 -0300 Subject: perf script: Print information about per-event-dump files For a file generated by "perf sched record sleep 50": # perf script --per-event-dump [ perf script: Wrote 23.121 MB perf.data.sched:sched_switch.dump (206015 samples) ] [ perf script: Wrote 0.000 MB perf.data.sched:sched_stat_wait.dump (0 samples) ] [ perf script: Wrote 0.000 MB perf.data.sched:sched_stat_sleep.dump (0 samples) ] [ perf script: Wrote 0.000 MB perf.data.sched:sched_stat_iowait.dump (0 samples) ] [ perf script: Wrote 17.680 MB perf.data.sched:sched_stat_runtime.dump (129342 samples) ] [ perf script: Wrote 0.000 MB perf.data.sched:sched_process_fork.dump (24 samples) ] [ perf script: Wrote 11.328 MB perf.data.sched:sched_wakeup.dump (106770 samples) ] [ perf script: Wrote 0.000 MB perf.data.sched:sched_wakeup_new.dump (24 samples) ] [ perf script: Wrote 2.477 MB perf.data.sched:sched_migrate_task.dump (20434 samples) ] # Similar to what is generated by 'perf record'. Based-on-a-patch-by: yuzhoujian Suggested-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/1508921599-10832-3-git-send-email-yuzhoujian@didichuxing.com Link: http://lkml.kernel.org/n/tip-xuketkkjuk2c0qz546ypd1u7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 77 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fb5e49b3bc44..4d198f73b29a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -210,6 +210,51 @@ static struct { }, }; +struct perf_evsel_script { + char *filename; + FILE *fp; + u64 samples; +}; + +static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel, + struct perf_data_file *file) +{ + struct perf_evsel_script *es = malloc(sizeof(*es)); + + if (es != NULL) { + if (asprintf(&es->filename, "%s.%s.dump", file->path, perf_evsel__name(evsel)) < 0) + goto out_free; + es->fp = fopen(es->filename, "w"); + if (es->fp == NULL) + goto out_free_filename; + es->samples = 0; + } + + return es; +out_free_filename: + zfree(&es->filename); +out_free: + free(es); + return NULL; +} + +static void perf_evsel_script__delete(struct perf_evsel_script *es) +{ + zfree(&es->filename); + fclose(es->fp); + es->fp = NULL; + free(es); +} + +static int perf_evsel_script__fprintf(struct perf_evsel_script *es, FILE *fp) +{ + struct stat st; + + fstat(fileno(es->fp), &st); + return fprintf(fp, "[ perf script: Wrote %.3f MB %s (%" PRIu64 " samples) ]\n", + st.st_size / 1024.0 / 1024.0, es->filename, es->samples); +} + static inline int output_type(unsigned int type) { switch (type) { @@ -1439,11 +1484,14 @@ static void process_event(struct perf_script *script, struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; unsigned int type = output_type(attr->type); - FILE *fp = evsel->priv; + struct perf_evsel_script *es = evsel->priv; + FILE *fp = es->fp; if (output[type].fields == 0) return; + ++es->samples; + perf_sample__fprintf_start(sample, thread, evsel, fp); if (PRINT_FIELD(PERIOD)) @@ -1896,7 +1944,7 @@ static void perf_script__fclose_per_event_dump(struct perf_script *script) evlist__for_each_entry(evlist, evsel) { if (!evsel->priv) break; - fclose(evsel->priv); + perf_evsel_script__delete(evsel->priv); evsel->priv = NULL; } } @@ -1906,10 +1954,7 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) struct perf_evsel *evsel; evlist__for_each_entry(script->session->evlist, evsel) { - char filename[PATH_MAX]; - snprintf(filename, sizeof(filename), "%s.%s.dump", - script->session->file->path, perf_evsel__name(evsel)); - evsel->priv = fopen(filename, "w"); + evsel->priv = perf_evsel_script__new(evsel, script->session->file); if (evsel->priv == NULL) goto out_err_fclose; } @@ -1924,16 +1969,32 @@ out_err_fclose: static int perf_script__setup_per_event_dump(struct perf_script *script) { struct perf_evsel *evsel; + static struct perf_evsel_script es_stdout; if (script->per_event_dump) return perf_script__fopen_per_event_dump(script); + es_stdout.fp = stdout; + evlist__for_each_entry(script->session->evlist, evsel) - evsel->priv = stdout; + evsel->priv = &es_stdout; return 0; } +static void perf_script__exit_per_event_dump_stats(struct perf_script *script) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(script->session->evlist, evsel) { + struct perf_evsel_script *es = evsel->priv; + + perf_evsel_script__fprintf(es, stdout); + perf_evsel_script__delete(es); + evsel->priv = NULL; + } +} + static int __cmd_script(struct perf_script *script) { int ret; @@ -1963,7 +2024,7 @@ static int __cmd_script(struct perf_script *script) ret = perf_session__process_events(script->session); if (script->per_event_dump) - perf_script__fclose_per_event_dump(script); + perf_script__exit_per_event_dump_stats(script); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); -- cgit v1.2.3 From 8ceb41d7e305f186543c58178d2e1fe34f708948 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 Jan 2017 22:07:59 +0100 Subject: perf tools: Rename struct perf_data_file to perf_data Rename struct perf_data_file to perf_data, because we will add the possibility to have multiple files under perf.data, so the 'perf_data' name fits better. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-39wn4d77phel3dgkzo3lyan0@git.kernel.org [ Fixup recent changes in 'perf script --per-event-dump' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 10 ++--- tools/perf/builtin-buildid-cache.c | 8 ++-- tools/perf/builtin-buildid-list.c | 8 ++-- tools/perf/builtin-c2c.c | 10 ++--- tools/perf/builtin-diff.c | 18 ++++---- tools/perf/builtin-evlist.c | 4 +- tools/perf/builtin-inject.c | 26 +++++------ tools/perf/builtin-kmem.c | 8 ++-- tools/perf/builtin-kvm.c | 6 +-- tools/perf/builtin-lock.c | 4 +- tools/perf/builtin-mem.c | 4 +- tools/perf/builtin-record.c | 50 ++++++++++----------- tools/perf/builtin-report.c | 14 +++--- tools/perf/builtin-sched.c | 8 ++-- tools/perf/builtin-script.c | 20 ++++----- tools/perf/builtin-stat.c | 32 ++++++------- tools/perf/builtin-timechart.c | 6 +-- tools/perf/builtin-trace.c | 4 +- tools/perf/tests/topology.c | 10 ++--- tools/perf/util/auxtrace.c | 4 +- tools/perf/util/data-convert-bt.c | 6 +-- tools/perf/util/data.c | 92 +++++++++++++++++++------------------- tools/perf/util/data.h | 32 ++++++------- tools/perf/util/header.c | 20 ++++----- tools/perf/util/intel-bts.c | 6 +-- tools/perf/util/intel-pt.c | 6 +-- tools/perf/util/jit.h | 2 +- tools/perf/util/jitdump.c | 10 ++--- tools/perf/util/session.c | 44 +++++++++--------- tools/perf/util/session.h | 4 +- 30 files changed, 238 insertions(+), 238 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index c38373195c4a..2d06be81a109 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -356,7 +356,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (total_nr_samples == 0) { - ui__error("The %s file has no samples!\n", session->file->path); + ui__error("The %s file has no samples!\n", session->data->path); goto out; } @@ -400,7 +400,7 @@ int cmd_annotate(int argc, const char **argv) .ordering_requires_timestamps = true, }, }; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_READ, }; struct option options[] = { @@ -410,7 +410,7 @@ int cmd_annotate(int argc, const char **argv) "only consider symbols in these dsos"), OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol", "symbol to annotate"), - OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"), @@ -482,9 +482,9 @@ int cmd_annotate(int argc, const char **argv) if (quiet) perf_quiet_option(); - file.path = input_name; + data.path = input_name; - annotate.session = perf_session__new(&file, false, &annotate.tool); + annotate.session = perf_session__new(&data, false, &annotate.tool); if (annotate.session == NULL) return -1; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index e3eb6240ced0..9fceae47a02e 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -311,7 +311,7 @@ int cmd_buildid_cache(int argc, const char **argv) *kcore_filename = NULL; char sbuf[STRERR_BUFSIZE]; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_READ, }; struct perf_session *session = NULL; @@ -352,10 +352,10 @@ int cmd_buildid_cache(int argc, const char **argv) nsi = nsinfo__new(ns_id); if (missing_filename) { - file.path = missing_filename; - file.force = force; + data.path = missing_filename; + data.force = force; - session = perf_session__new(&file, false, NULL); + session = perf_session__new(&data, false, NULL); if (session == NULL) return -1; } diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index fdaca16e0c74..72bdc0eba990 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -50,7 +50,7 @@ static bool dso__skip_buildid(struct dso *dso, int with_hits) static int perf_session__list_build_ids(bool force, bool with_hits) { struct perf_session *session; - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, .force = force, @@ -63,7 +63,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (filename__fprintf_build_id(input_name, stdout) > 0) goto out; - session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); + session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops); if (session == NULL) return -1; @@ -71,7 +71,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * We take all buildids when the file contains AUX area tracing data * because we do not decode the trace because it would take too long. */ - if (!perf_data_file__is_pipe(&file) && + if (!perf_data__is_pipe(&data) && perf_header__has_feat(&session->header, HEADER_AUXTRACE)) with_hits = false; @@ -79,7 +79,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ - if (with_hits || perf_data_file__is_pipe(&file)) + if (with_hits || perf_data__is_pipe(&data)) perf_session__process_events(session); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index bb1ee22bd221..87a52d09da29 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2523,7 +2523,7 @@ static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; struct ui_progress prog; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_READ, }; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; @@ -2572,8 +2572,8 @@ static int perf_c2c__report(int argc, const char **argv) if (!input_name || !strlen(input_name)) input_name = "perf.data"; - file.path = input_name; - file.force = symbol_conf.force; + data.path = input_name; + data.force = symbol_conf.force; err = setup_display(display); if (err) @@ -2591,7 +2591,7 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } - session = perf_session__new(&file, 0, &c2c.tool); + session = perf_session__new(&data, 0, &c2c.tool); if (session == NULL) { pr_debug("No memory for session\n"); goto out; @@ -2611,7 +2611,7 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; /* No pipe support at the moment. */ - if (perf_data_file__is_pipe(session->file)) { + if (perf_data__is_pipe(session->data)) { pr_debug("No pipe support at the moment.\n"); goto out_session; } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 0cd4cf6a344b..5292e3d13cec 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -47,7 +47,7 @@ struct diff_hpp_fmt { struct data__file { struct perf_session *session; - struct perf_data_file file; + struct perf_data data; int idx; struct hists *hists; struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; @@ -707,7 +707,7 @@ static void data__fprintf(void) data__for_each_file(i, d) fprintf(stdout, "# [%d] %s %s\n", - d->idx, d->file.path, + d->idx, d->data.path, !d->idx ? "(Baseline)" : ""); fprintf(stdout, "#\n"); @@ -776,16 +776,16 @@ static int __cmd_diff(void) int ret = -EINVAL, i; data__for_each_file(i, d) { - d->session = perf_session__new(&d->file, false, &tool); + d->session = perf_session__new(&d->data, false, &tool); if (!d->session) { - pr_err("Failed to open %s\n", d->file.path); + pr_err("Failed to open %s\n", d->data.path); ret = -1; goto out_delete; } ret = perf_session__process_events(d->session); if (ret) { - pr_err("Failed to process %s\n", d->file.path); + pr_err("Failed to process %s\n", d->data.path); goto out_delete; } @@ -1286,11 +1286,11 @@ static int data_init(int argc, const char **argv) return -ENOMEM; data__for_each_file(i, d) { - struct perf_data_file *file = &d->file; + struct perf_data *data = &d->data; - file->path = use_default ? defaults[i] : argv[i]; - file->mode = PERF_DATA_MODE_READ, - file->force = force, + data->path = use_default ? defaults[i] : argv[i]; + data->mode = PERF_DATA_MODE_READ, + data->force = force, d->idx = i; } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 6d210e40d611..cd79c26e16a4 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -21,14 +21,14 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details { struct perf_session *session; struct perf_evsel *pos; - struct perf_data_file file = { + struct perf_data data = { .path = file_name, .mode = PERF_DATA_MODE_READ, .force = details->force, }; bool has_tracepoint = false; - session = perf_session__new(&file, 0, NULL); + session = perf_session__new(&data, 0, NULL); if (session == NULL) return -1; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 2b8032908fb2..ac7486f6c46f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -35,7 +35,7 @@ struct perf_inject { bool strip; bool jit_mode; const char *input_name; - struct perf_data_file output; + struct perf_data output; u64 bytes_written; u64 aux_id; struct list_head samples; @@ -52,7 +52,7 @@ static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) { ssize_t size; - size = perf_data_file__write(&inject->output, buf, sz); + size = perf_data__write(&inject->output, buf, sz); if (size < 0) return -errno; @@ -154,11 +154,11 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, return ret; } - if (perf_data_file__is_pipe(session->file) || !session->one_mmap) { + if (perf_data__is_pipe(session->data) || !session->one_mmap) { ret = output_bytes(inject, event, event->header.size); if (ret < 0) return ret; - ret = copy_bytes(inject, perf_data_file__fd(session->file), + ret = copy_bytes(inject, perf_data__fd(session->data), event->auxtrace.size); } else { ret = output_bytes(inject, event, @@ -637,8 +637,8 @@ static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; struct perf_session *session = inject->session; - struct perf_data_file *file_out = &inject->output; - int fd = perf_data_file__fd(file_out); + struct perf_data *data_out = &inject->output; + int fd = perf_data__fd(data_out); u64 output_data_offset; signal(SIGINT, sig_handler); @@ -693,14 +693,14 @@ static int __cmd_inject(struct perf_inject *inject) if (!inject->itrace_synth_opts.set) auxtrace_index__free(&session->auxtrace_index); - if (!file_out->is_pipe) + if (!data_out->is_pipe) lseek(fd, output_data_offset, SEEK_SET); ret = perf_session__process_events(session); if (ret) return ret; - if (!file_out->is_pipe) { + if (!data_out->is_pipe) { if (inject->build_ids) perf_header__set_feat(&session->header, HEADER_BUILD_ID); @@ -779,7 +779,7 @@ int cmd_inject(int argc, const char **argv) .mode = PERF_DATA_MODE_WRITE, }, }; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_READ, }; int ret; @@ -801,7 +801,7 @@ int cmd_inject(int argc, const char **argv) "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), - OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), @@ -829,15 +829,15 @@ int cmd_inject(int argc, const char **argv) return -1; } - if (perf_data_file__open(&inject.output)) { + if (perf_data__open(&inject.output)) { perror("failed to create output file"); return -1; } inject.tool.ordered_events = inject.sched_stat; - file.path = inject.input_name; - inject.session = perf_session__new(&file, true, &inject.tool); + data.path = inject.input_name; + inject.session = perf_session__new(&data, true, &inject.tool); if (inject.session == NULL) return -1; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index d8f25ef8157b..d45740a3e5f1 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1893,7 +1893,7 @@ int cmd_kmem(int argc, const char **argv) { const char * const default_slab_sort = "frag,hit,bytes"; const char * const default_page_sort = "bytes,hit"; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_READ, }; const struct option kmem_options[] = { @@ -1909,7 +1909,7 @@ int cmd_kmem(int argc, const char **argv) "page, order, migtype, gfp", parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), - OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", parse_slab_opt), OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", @@ -1949,9 +1949,9 @@ int cmd_kmem(int argc, const char **argv) return __cmd_record(argc, argv); } - file.path = input_name; + data.path = input_name; - kmem_session = session = perf_session__new(&file, false, &perf_kmem); + kmem_session = session = perf_session__new(&data, false, &perf_kmem); if (session == NULL) return -1; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 721f4f91291a..4301fc34f23c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1067,7 +1067,7 @@ static int read_events(struct perf_kvm_stat *kvm) .namespaces = perf_event__process_namespaces, .ordered_events = true, }; - struct perf_data_file file = { + struct perf_data file = { .path = kvm->file_name, .mode = PERF_DATA_MODE_READ, .force = kvm->force, @@ -1358,7 +1358,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, "perf kvm stat live []", NULL }; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_WRITE, }; @@ -1432,7 +1432,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * perf session */ - kvm->session = perf_session__new(&file, false, &kvm->tool); + kvm->session = perf_session__new(&data, false, &kvm->tool); if (kvm->session == NULL) { err = -1; goto out; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index ff98652484a7..2e281f7b0fca 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -864,13 +864,13 @@ static int __cmd_report(bool display_info) .namespaces = perf_event__process_namespaces, .ordered_events = true, }; - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, .force = force, }; - session = perf_session__new(&file, false, &eops); + session = perf_session__new(&data, false, &eops); if (!session) { pr_err("Initializing perf session failed\n"); return -1; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 6940490bc3f9..5a4a6f8e614d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -236,13 +236,13 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, .force = mem->force, }; int ret; - struct perf_session *session = perf_session__new(&file, false, + struct perf_session *session = perf_session__new(&data, false, &mem->tool); if (session == NULL) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a6cbf1640269..0ab7dd0e4f2b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -66,7 +66,7 @@ struct record { struct perf_tool tool; struct record_opts opts; u64 bytes_written; - struct perf_data_file file; + struct perf_data data; struct auxtrace_record *itr; struct perf_evlist *evlist; struct perf_session *session; @@ -107,7 +107,7 @@ static bool switch_output_time(struct record *rec) static int record__write(struct record *rec, void *bf, size_t size) { - if (perf_data_file__write(rec->session->file, bf, size) < 0) { + if (perf_data__write(rec->session->data, bf, size) < 0) { pr_err("failed to write perf data, error: %m\n"); return -1; } @@ -173,13 +173,13 @@ static int record__process_auxtrace(struct perf_tool *tool, size_t len1, void *data2, size_t len2) { struct record *rec = container_of(tool, struct record, tool); - struct perf_data_file *file = &rec->file; + struct perf_data *data = &rec->data; size_t padding; u8 pad[8] = {0}; - if (!perf_data_file__is_pipe(file)) { + if (!perf_data__is_pipe(data)) { off_t file_offset; - int fd = perf_data_file__fd(file); + int fd = perf_data__fd(data); int err; file_offset = lseek(fd, 0, SEEK_CUR); @@ -398,10 +398,10 @@ static int process_sample_event(struct perf_tool *tool, static int process_buildids(struct record *rec) { - struct perf_data_file *file = &rec->file; + struct perf_data *data = &rec->data; struct perf_session *session = rec->session; - if (file->size == 0) + if (data->size == 0) return 0; /* @@ -544,14 +544,14 @@ static void record__init_features(struct record *rec) static void record__finish_output(struct record *rec) { - struct perf_data_file *file = &rec->file; - int fd = perf_data_file__fd(file); + struct perf_data *data = &rec->data; + int fd = perf_data__fd(data); - if (file->is_pipe) + if (data->is_pipe) return; rec->session->header.data_size += rec->bytes_written; - file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); + data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); if (!rec->no_buildid) { process_buildids(rec); @@ -590,7 +590,7 @@ static int record__synthesize(struct record *rec, bool tail); static int record__switch_output(struct record *rec, bool at_exit) { - struct perf_data_file *file = &rec->file; + struct perf_data *data = &rec->data; int fd, err; /* Same Size: "2015122520103046"*/ @@ -608,7 +608,7 @@ record__switch_output(struct record *rec, bool at_exit) return -EINVAL; } - fd = perf_data_file__switch(file, timestamp, + fd = perf_data__switch(data, timestamp, rec->session->header.data_offset, at_exit); if (fd >= 0 && !at_exit) { @@ -618,7 +618,7 @@ record__switch_output(struct record *rec, bool at_exit) if (!quiet) fprintf(stderr, "[ perf record: Dump %s.%s ]\n", - file->path, timestamp); + data->path, timestamp); /* Output tracking events */ if (!at_exit) { @@ -693,16 +693,16 @@ static int record__synthesize(struct record *rec, bool tail) { struct perf_session *session = rec->session; struct machine *machine = &session->machines.host; - struct perf_data_file *file = &rec->file; + struct perf_data *data = &rec->data; struct record_opts *opts = &rec->opts; struct perf_tool *tool = &rec->tool; - int fd = perf_data_file__fd(file); + int fd = perf_data__fd(data); int err = 0; if (rec->opts.tail_synthesize != tail) return 0; - if (file->is_pipe) { + if (data->is_pipe) { err = perf_event__synthesize_features( tool, session, rec->evlist, process_synthesized_event); if (err < 0) { @@ -781,7 +781,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct machine *machine; struct perf_tool *tool = &rec->tool; struct record_opts *opts = &rec->opts; - struct perf_data_file *file = &rec->file; + struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; int fd; @@ -807,20 +807,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGUSR2, SIG_IGN); } - session = perf_session__new(file, false, tool); + session = perf_session__new(data, false, tool); if (session == NULL) { pr_err("Perf session creation failed.\n"); return -1; } - fd = perf_data_file__fd(file); + fd = perf_data__fd(data); rec->session = session; record__init_features(rec); if (forks) { err = perf_evlist__prepare_workload(rec->evlist, &opts->target, - argv, file->is_pipe, + argv, data->is_pipe, workload_exec_failed_signal); if (err < 0) { pr_err("Couldn't run the workload!\n"); @@ -856,7 +856,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (!rec->evlist->nr_groups) perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); - if (file->is_pipe) { + if (data->is_pipe) { err = perf_header__write_pipe(fd); if (err < 0) goto out_child; @@ -1117,8 +1117,8 @@ out_child: samples[0] = '\0'; fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", - perf_data_file__size(file) / 1024.0 / 1024.0, - file->path, postfix, samples); + perf_data__size(data) / 1024.0 / 1024.0, + data->path, postfix, samples); } out_delete_session: @@ -1482,7 +1482,7 @@ static struct option __record_options[] = { OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), - OPT_STRING('o', "output", &record.file.path, "file", + OPT_STRING('o', "output", &record.data.path, "file", "output file name"), OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, &record.opts.no_inherit_set, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f9dff652dcbd..0dc323772b5e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -257,7 +257,7 @@ static int report__setup_sample_type(struct report *rep) { struct perf_session *session = rep->session; u64 sample_type = perf_evlist__combined_sample_type(session->evlist); - bool is_pipe = perf_data_file__is_pipe(session->file); + bool is_pipe = perf_data__is_pipe(session->data); if (session->itrace_synth_opts->callchain || (!is_pipe && @@ -568,7 +568,7 @@ static int __cmd_report(struct report *rep) int ret; struct perf_session *session = rep->session; struct perf_evsel *pos; - struct perf_data_file *file = session->file; + struct perf_data *data = session->data; signal(SIGINT, sig_handler); @@ -637,7 +637,7 @@ static int __cmd_report(struct report *rep) rep->nr_entries += evsel__hists(pos)->nr_entries; if (rep->nr_entries == 0) { - ui__error("The %s file has no samples!\n", file->path); + ui__error("The %s file has no samples!\n", data->path); return 0; } @@ -879,7 +879,7 @@ int cmd_report(int argc, const char **argv) "Show inline function"), OPT_END() }; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_READ, }; int ret = hists__init(); @@ -940,11 +940,11 @@ int cmd_report(int argc, const char **argv) input_name = "perf.data"; } - file.path = input_name; - file.force = symbol_conf.force; + data.path = input_name; + data.force = symbol_conf.force; repeat: - session = perf_session__new(&file, false, &report.tool); + session = perf_session__new(&data, false, &report.tool); if (session == NULL) return -1; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b7e8812ee80c..cb5410511f69 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1700,14 +1700,14 @@ static int perf_sched__read_events(struct perf_sched *sched) { "sched:sched_migrate_task", process_sched_migrate_task_event, }, }; struct perf_session *session; - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, .force = sched->force, }; int rc = -1; - session = perf_session__new(&file, false, &sched->tool); + session = perf_session__new(&data, false, &sched->tool); if (session == NULL) { pr_debug("No Memory for session\n"); return -1; @@ -2902,7 +2902,7 @@ static int perf_sched__timehist(struct perf_sched *sched) const struct perf_evsel_str_handler migrate_handlers[] = { { "sched:sched_migrate_task", timehist_migrate_task_event, }, }; - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, .force = sched->force, @@ -2930,7 +2930,7 @@ static int perf_sched__timehist(struct perf_sched *sched) symbol_conf.use_callchain = sched->show_callchain; - session = perf_session__new(&file, false, &sched->tool); + session = perf_session__new(&data, false, &sched->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 4d198f73b29a..8f8fa952b506 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -217,12 +217,12 @@ struct perf_evsel_script { }; static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel, - struct perf_data_file *file) + struct perf_data *data) { struct perf_evsel_script *es = malloc(sizeof(*es)); if (es != NULL) { - if (asprintf(&es->filename, "%s.%s.dump", file->path, perf_evsel__name(evsel)) < 0) + if (asprintf(&es->filename, "%s.%s.dump", data->path, perf_evsel__name(evsel)) < 0) goto out_free; es->fp = fopen(es->filename, "w"); if (es->fp == NULL) @@ -1954,7 +1954,7 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) struct perf_evsel *evsel; evlist__for_each_entry(script->session->evlist, evsel) { - evsel->priv = perf_evsel_script__new(evsel, script->session->file); + evsel->priv = perf_evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; } @@ -2590,14 +2590,14 @@ int find_scripts(char **scripts_array, char **scripts_path_array) char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; struct perf_session *session; - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, }; char *temp; int i = 0; - session = perf_session__new(&file, false, NULL); + session = perf_session__new(&data, false, NULL); if (!session) return -1; @@ -2875,7 +2875,7 @@ int cmd_script(int argc, const char **argv) .ordering_requires_timestamps = true, }, }; - struct perf_data_file file = { + struct perf_data data = { .mode = PERF_DATA_MODE_READ, }; const struct option options[] = { @@ -2982,8 +2982,8 @@ int cmd_script(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); - file.path = input_name; - file.force = symbol_conf.force; + data.path = input_name; + data.force = symbol_conf.force; if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); @@ -3150,7 +3150,7 @@ int cmd_script(int argc, const char **argv) if (!script_name) setup_pager(); - session = perf_session__new(&file, false, &script.tool); + session = perf_session__new(&data, false, &script.tool); if (session == NULL) return -1; @@ -3206,7 +3206,7 @@ int cmd_script(int argc, const char **argv) goto out_delete; } - input = open(file.path, O_RDONLY); /* input_name */ + input = open(data.path, O_RDONLY); /* input_name */ if (input < 0) { err = -errno; perror("failed to open file"); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 988bdfa5d832..85af6d291c06 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -175,7 +175,7 @@ static int print_free_counters_hint; struct perf_stat { bool record; - struct perf_data_file file; + struct perf_data data; struct perf_session *session; u64 bytes_written; struct perf_tool tool; @@ -253,7 +253,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) * by attr->sample_type != 0, and we can't run it on * stat sessions. */ - if (!(STAT_RECORD && perf_stat.file.is_pipe)) + if (!(STAT_RECORD && perf_stat.data.is_pipe)) attr->sample_type = PERF_SAMPLE_IDENTIFIER; /* @@ -295,7 +295,7 @@ static int process_synthesized_event(struct perf_tool *tool __maybe_unused, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused) { - if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { + if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { pr_err("failed to write perf data, error: %m\n"); return -1; } @@ -628,7 +628,7 @@ static int __run_perf_stat(int argc, const char **argv) size_t l; int status = 0; const bool forks = (argc > 0); - bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; + bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; struct perf_evsel_config_term *err_term; if (interval) { @@ -719,10 +719,10 @@ try_again: } if (STAT_RECORD) { - int err, fd = perf_data_file__fd(&perf_stat.file); + int err, fd = perf_data__fd(&perf_stat.data); if (is_pipe) { - err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); + err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); } else { err = perf_session__write_header(perf_stat.session, evsel_list, fd, false); @@ -1696,7 +1696,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) char buf[64], *prefix = NULL; /* Do not print anything if we record to the pipe. */ - if (STAT_RECORD && perf_stat.file.is_pipe) + if (STAT_RECORD && perf_stat.data.is_pipe) return; if (interval) @@ -2406,20 +2406,20 @@ static void init_features(struct perf_session *session) static int __cmd_record(int argc, const char **argv) { struct perf_session *session; - struct perf_data_file *file = &perf_stat.file; + struct perf_data *data = &perf_stat.data; argc = parse_options(argc, argv, stat_options, stat_record_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (output_name) - file->path = output_name; + data->path = output_name; if (run_count != 1 || forever) { pr_err("Cannot use -r option with perf stat record.\n"); return -1; } - session = perf_session__new(file, false, NULL); + session = perf_session__new(data, false, NULL); if (session == NULL) { pr_err("Perf session creation failed.\n"); return -1; @@ -2477,7 +2477,7 @@ int process_stat_config_event(struct perf_tool *tool, if (st->aggr_mode != AGGR_UNSET) stat_config.aggr_mode = st->aggr_mode; - if (perf_stat.file.is_pipe) + if (perf_stat.data.is_pipe) perf_stat_init_aggr_mode(); else perf_stat_init_aggr_mode_file(st); @@ -2585,10 +2585,10 @@ static int __cmd_report(int argc, const char **argv) input_name = "perf.data"; } - perf_stat.file.path = input_name; - perf_stat.file.mode = PERF_DATA_MODE_READ; + perf_stat.data.path = input_name; + perf_stat.data.mode = PERF_DATA_MODE_READ; - session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); + session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); if (session == NULL) return -1; @@ -2859,7 +2859,7 @@ int cmd_stat(int argc, const char **argv) * records, but the need to suppress the kptr_restrict messages in older * tools remain -acme */ - int fd = perf_data_file__fd(&perf_stat.file); + int fd = perf_data__fd(&perf_stat.data); int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, process_synthesized_event, &perf_stat.session->machines.host); @@ -2873,7 +2873,7 @@ int cmd_stat(int argc, const char **argv) pr_err("failed to write stat round event\n"); } - if (!perf_stat.file.is_pipe) { + if (!perf_stat.data.is_pipe) { perf_stat.session->header.data_size += perf_stat.bytes_written; perf_session__write_header(perf_stat.session, evsel_list, fd, true); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 01de01ca14f2..0f79ea5e2f0f 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1601,13 +1601,13 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) { "syscalls:sys_exit_pselect6", process_exit_poll }, { "syscalls:sys_exit_select", process_exit_poll }, }; - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, .force = tchart->force, }; - struct perf_session *session = perf_session__new(&file, false, + struct perf_session *session = perf_session__new(&data, false, &tchart->tool); int ret = -EINVAL; @@ -1617,7 +1617,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) symbol__init(&session->header.env); (void)perf_header__process_sections(&session->header, - perf_data_file__fd(session->file), + perf_data__fd(session->data), tchart, process_header); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 78855916f4b0..7def6947ad61 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2532,7 +2532,7 @@ static int trace__replay(struct trace *trace) const struct perf_evsel_str_handler handlers[] = { { "probe:vfs_getname", trace__vfs_getname, }, }; - struct perf_data_file file = { + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, .force = trace->force, @@ -2558,7 +2558,7 @@ static int trace__replay(struct trace *trace) /* add tid to output */ trace->multiple_threads = true; - session = perf_session__new(&file, false, &trace->tool); + session = perf_session__new(&data, false, &trace->tool); if (session == NULL) return -1; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 19b0561fd6f6..7536782e8495 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -29,12 +29,12 @@ static int get_temp(char *path) static int session_write_header(char *path) { struct perf_session *session; - struct perf_data_file file = { + struct perf_data data = { .path = path, .mode = PERF_DATA_MODE_WRITE, }; - session = perf_session__new(&file, false, NULL); + session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", session); session->evlist = perf_evlist__new_default(); @@ -46,7 +46,7 @@ static int session_write_header(char *path) session->header.data_size += DATA_SIZE; TEST_ASSERT_VAL("failed to write header", - !perf_session__write_header(session, session->evlist, file.fd, true)); + !perf_session__write_header(session, session->evlist, data.fd, true)); perf_session__delete(session); @@ -56,13 +56,13 @@ static int session_write_header(char *path) static int check_cpu_topology(char *path, struct cpu_map *map) { struct perf_session *session; - struct perf_data_file file = { + struct perf_data data = { .path = path, .mode = PERF_DATA_MODE_READ, }; int i; - session = perf_session__new(&file, false, NULL); + session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", session); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5547457566a7..a33491416400 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -208,7 +208,7 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues, static void *auxtrace_copy_data(u64 size, struct perf_session *session) { - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); void *p; ssize_t ret; @@ -305,7 +305,7 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, if (session->one_mmap) { buffer->data = buffer->data_offset - session->one_mmap_offset + session->one_mmap_addr; - } else if (perf_data_file__is_pipe(session->file)) { + } else if (perf_data__is_pipe(session->data)) { buffer->data = auxtrace_copy_data(buffer->size, session); if (!buffer->data) return -ENOMEM; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 2346cecb8ea2..9fdae383a58c 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1577,7 +1577,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, struct perf_data_convert_opts *opts) { struct perf_session *session; - struct perf_data_file file = { + struct perf_data data = { .path = input, .mode = PERF_DATA_MODE_READ, .force = opts->force, @@ -1619,7 +1619,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, err = -1; /* perf.data session */ - session = perf_session__new(&file, 0, &c.tool); + session = perf_session__new(&data, 0, &c.tool); if (!session) goto free_writer; @@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, fprintf(stderr, "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", - file.path, path); + data.path, path); fprintf(stderr, "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 1123b30e3033..a6eea3df4c10 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -21,56 +21,56 @@ #endif #endif -static bool check_pipe(struct perf_data_file *file) +static bool check_pipe(struct perf_data *data) { struct stat st; bool is_pipe = false; - int fd = perf_data_file__is_read(file) ? + int fd = perf_data__is_read(data) ? STDIN_FILENO : STDOUT_FILENO; - if (!file->path) { + if (!data->path) { if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) is_pipe = true; } else { - if (!strcmp(file->path, "-")) + if (!strcmp(data->path, "-")) is_pipe = true; } if (is_pipe) - file->fd = fd; + data->fd = fd; - return file->is_pipe = is_pipe; + return data->is_pipe = is_pipe; } -static int check_backup(struct perf_data_file *file) +static int check_backup(struct perf_data *data) { struct stat st; - if (!stat(file->path, &st) && st.st_size) { + if (!stat(data->path, &st) && st.st_size) { /* TODO check errors properly */ char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", - file->path); + data->path); unlink(oldname); - rename(file->path, oldname); + rename(data->path, oldname); } return 0; } -static int open_file_read(struct perf_data_file *file) +static int open_file_read(struct perf_data *data) { struct stat st; int fd; char sbuf[STRERR_BUFSIZE]; - fd = open(file->path, O_RDONLY); + fd = open(data->path, O_RDONLY); if (fd < 0) { int err = errno; - pr_err("failed to open %s: %s", file->path, + pr_err("failed to open %s: %s", data->path, str_error_r(err, sbuf, sizeof(sbuf))); - if (err == ENOENT && !strcmp(file->path, "perf.data")) + if (err == ENOENT && !strcmp(data->path, "perf.data")) pr_err(" (try 'perf record' first)"); pr_err("\n"); return -err; @@ -79,19 +79,19 @@ static int open_file_read(struct perf_data_file *file) if (fstat(fd, &st) < 0) goto out_close; - if (!file->force && st.st_uid && (st.st_uid != geteuid())) { + if (!data->force && st.st_uid && (st.st_uid != geteuid())) { pr_err("File %s not owned by current user or root (use -f to override)\n", - file->path); + data->path); goto out_close; } if (!st.st_size) { - pr_info("zero-sized file (%s), nothing to do!\n", - file->path); + pr_info("zero-sized data (%s), nothing to do!\n", + data->path); goto out_close; } - file->size = st.st_size; + data->size = st.st_size; return fd; out_close: @@ -99,93 +99,93 @@ static int open_file_read(struct perf_data_file *file) return -1; } -static int open_file_write(struct perf_data_file *file) +static int open_file_write(struct perf_data *data) { int fd; char sbuf[STRERR_BUFSIZE]; - if (check_backup(file)) + if (check_backup(data)) return -1; - fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, + fd = open(data->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, S_IRUSR|S_IWUSR); if (fd < 0) - pr_err("failed to open %s : %s\n", file->path, + pr_err("failed to open %s : %s\n", data->path, str_error_r(errno, sbuf, sizeof(sbuf))); return fd; } -static int open_file(struct perf_data_file *file) +static int open_file(struct perf_data *data) { int fd; - fd = perf_data_file__is_read(file) ? - open_file_read(file) : open_file_write(file); + fd = perf_data__is_read(data) ? + open_file_read(data) : open_file_write(data); - file->fd = fd; + data->fd = fd; return fd < 0 ? -1 : 0; } -int perf_data_file__open(struct perf_data_file *file) +int perf_data__open(struct perf_data *data) { - if (check_pipe(file)) + if (check_pipe(data)) return 0; - if (!file->path) - file->path = "perf.data"; + if (!data->path) + data->path = "perf.data"; - return open_file(file); + return open_file(data); } -void perf_data_file__close(struct perf_data_file *file) +void perf_data__close(struct perf_data *data) { - close(file->fd); + close(data->fd); } -ssize_t perf_data_file__write(struct perf_data_file *file, +ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size) { - return writen(file->fd, buf, size); + return writen(data->fd, buf, size); } -int perf_data_file__switch(struct perf_data_file *file, +int perf_data__switch(struct perf_data *data, const char *postfix, size_t pos, bool at_exit) { char *new_filepath; int ret; - if (check_pipe(file)) + if (check_pipe(data)) return -EINVAL; - if (perf_data_file__is_read(file)) + if (perf_data__is_read(data)) return -EINVAL; - if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0) + if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0) return -ENOMEM; /* * Only fire a warning, don't return error, continue fill * original file. */ - if (rename(file->path, new_filepath)) - pr_warning("Failed to rename %s to %s\n", file->path, new_filepath); + if (rename(data->path, new_filepath)) + pr_warning("Failed to rename %s to %s\n", data->path, new_filepath); if (!at_exit) { - close(file->fd); - ret = perf_data_file__open(file); + close(data->fd); + ret = perf_data__open(data); if (ret < 0) goto out; - if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) { + if (lseek(data->fd, pos, SEEK_SET) == (off_t)-1) { ret = -errno; pr_debug("Failed to lseek to %zu: %s", pos, strerror(errno)); goto out; } } - ret = file->fd; + ret = data->fd; out: free(new_filepath); return ret; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index ae510ce16cb1..a1f9d70426b1 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -8,7 +8,7 @@ enum perf_data_mode { PERF_DATA_MODE_READ, }; -struct perf_data_file { +struct perf_data { const char *path; int fd; bool is_pipe; @@ -17,43 +17,43 @@ struct perf_data_file { enum perf_data_mode mode; }; -static inline bool perf_data_file__is_read(struct perf_data_file *file) +static inline bool perf_data__is_read(struct perf_data *data) { - return file->mode == PERF_DATA_MODE_READ; + return data->mode == PERF_DATA_MODE_READ; } -static inline bool perf_data_file__is_write(struct perf_data_file *file) +static inline bool perf_data__is_write(struct perf_data *data) { - return file->mode == PERF_DATA_MODE_WRITE; + return data->mode == PERF_DATA_MODE_WRITE; } -static inline int perf_data_file__is_pipe(struct perf_data_file *file) +static inline int perf_data__is_pipe(struct perf_data *data) { - return file->is_pipe; + return data->is_pipe; } -static inline int perf_data_file__fd(struct perf_data_file *file) +static inline int perf_data__fd(struct perf_data *data) { - return file->fd; + return data->fd; } -static inline unsigned long perf_data_file__size(struct perf_data_file *file) +static inline unsigned long perf_data__size(struct perf_data *data) { - return file->size; + return data->size; } -int perf_data_file__open(struct perf_data_file *file); -void perf_data_file__close(struct perf_data_file *file); -ssize_t perf_data_file__write(struct perf_data_file *file, +int perf_data__open(struct perf_data *data); +void perf_data__close(struct perf_data *data); +ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size); /* * If at_exit is set, only rename current perf.data to - * perf.data., continue write on original file. + * perf.data., continue write on original data. * Set at_exit when flushing the last output. * * Return value is fd of new output. */ -int perf_data_file__switch(struct perf_data_file *file, +int perf_data__switch(struct perf_data *data, const char *postfix, size_t pos, bool at_exit); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 605bbd5404fb..d7be552b21c8 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1762,7 +1762,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) session = container_of(ff->ph, struct perf_session, header); - if (session->file->is_pipe) { + if (session->data->is_pipe) { /* Save events for reading later by print_event_desc, * since they can't be read again in pipe mode. */ ff->events = events; @@ -1771,7 +1771,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) for (evsel = events; evsel->attr.size; evsel++) perf_evlist__set_event_name(session->evlist, evsel); - if (!session->file->is_pipe) + if (!session->data->is_pipe) free_event_desc(events); return 0; @@ -2248,7 +2248,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) { struct header_print_data hd; struct perf_header *header = &session->header; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); struct stat st; int ret, bit; @@ -2264,7 +2264,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) perf_header__process_sections(header, fd, &hd, perf_file_section__fprintf_info); - if (session->file->is_pipe) + if (session->data->is_pipe) return 0; fprintf(fp, "# missing features: "); @@ -2757,7 +2757,7 @@ static int perf_header__read_pipe(struct perf_session *session) struct perf_pipe_file_header f_header; if (perf_file_header__read_pipe(&f_header, header, - perf_data_file__fd(session->file), + perf_data__fd(session->data), session->repipe) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; @@ -2860,13 +2860,13 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, int perf_session__read_header(struct perf_session *session) { - struct perf_data_file *file = session->file; + struct perf_data *data = session->data; struct perf_header *header = &session->header; struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; int nr_attrs, nr_ids, i, j; - int fd = perf_data_file__fd(file); + int fd = perf_data__fd(data); session->evlist = perf_evlist__new(); if (session->evlist == NULL) @@ -2874,7 +2874,7 @@ int perf_session__read_header(struct perf_session *session) session->evlist->env = &header->env; session->machines.host.env = &header->env; - if (perf_data_file__is_pipe(file)) + if (perf_data__is_pipe(data)) return perf_header__read_pipe(session); if (perf_file_header__read(&f_header, header, fd) < 0) @@ -2889,7 +2889,7 @@ int perf_session__read_header(struct perf_session *session) if (f_header.data.size == 0) { pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n" "Was the 'perf record' command properly terminated?\n", - file->path); + data->path); } nr_attrs = f_header.attrs.size / f_header.attr_size; @@ -3397,7 +3397,7 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, struct perf_session *session) { ssize_t size_read, padding, size = event->tracing_data.size; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); off_t offset = lseek(fd, 0, SEEK_CUR); char buf[BUFSIZ]; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 218ee2bac9a5..5325e65f9711 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -500,7 +500,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) } if (!buffer->data) { - int fd = perf_data_file__fd(btsq->bts->session->file); + int fd = perf_data__fd(btsq->bts->session->data); buffer->data = auxtrace_buffer__get_data(buffer, fd); if (!buffer->data) { @@ -664,10 +664,10 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session, if (!bts->data_queued) { struct auxtrace_buffer *buffer; off_t data_offset; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); int err; - if (perf_data_file__is_pipe(session->file)) { + if (perf_data__is_pipe(session->data)) { data_offset = 0; } else { data_offset = lseek(fd, 0, SEEK_CUR); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index b58f9fd1e2ee..23f9ba676df0 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -271,7 +271,7 @@ next: ptq->buffer = buffer; if (!buffer->data) { - int fd = perf_data_file__fd(ptq->pt->session->file); + int fd = perf_data__fd(ptq->pt->session->data); buffer->data = auxtrace_buffer__get_data(buffer, fd); if (!buffer->data) @@ -2084,10 +2084,10 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session, if (!pt->data_queued) { struct auxtrace_buffer *buffer; off_t data_offset; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); int err; - if (perf_data_file__is_pipe(session->file)) { + if (perf_data__is_pipe(session->data)) { data_offset = 0; } else { data_offset = lseek(fd, 0, SEEK_CUR); diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h index 3f42ee4d2a0b..961e7a8a0e17 100644 --- a/tools/perf/util/jit.h +++ b/tools/perf/util/jit.h @@ -3,7 +3,7 @@ #include -int jit_process(struct perf_session *session, struct perf_data_file *output, +int jit_process(struct perf_session *session, struct perf_data *output, struct machine *machine, char *filename, pid_t pid, u64 *nbytes); int jit_inject_record(const char *filename); diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9084930e1757..e7645098a323 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -29,7 +29,7 @@ #include "sane_ctype.h" struct jit_buf_desc { - struct perf_data_file *output; + struct perf_data *output; struct perf_session *session; struct machine *machine; union jr_entry *entry; @@ -60,8 +60,8 @@ struct debug_line_info { struct jit_tool { struct perf_tool tool; - struct perf_data_file output; - struct perf_data_file input; + struct perf_data output; + struct perf_data input; u64 bytes_written; }; @@ -356,7 +356,7 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) { ssize_t size; - size = perf_data_file__write(jd->output, event, event->header.size); + size = perf_data__write(jd->output, event, event->header.size); if (size < 0) return -1; @@ -751,7 +751,7 @@ jit_detect(char *mmap_name, pid_t pid) int jit_process(struct perf_session *session, - struct perf_data_file *output, + struct perf_data *output, struct machine *machine, char *filename, pid_t pid, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b3fd62f7e4c9..c09b748ab599 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -32,14 +32,14 @@ static int perf_session__deliver_event(struct perf_session *session, static int perf_session__open(struct perf_session *session) { - struct perf_data_file *file = session->file; + struct perf_data *data = session->data; if (perf_session__read_header(session) < 0) { pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1; } - if (perf_data_file__is_pipe(file)) + if (perf_data__is_pipe(data)) return 0; if (perf_header__has_feat(&session->header, HEADER_STAT)) @@ -120,7 +120,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe, session->tool, event->file_offset); } -struct perf_session *perf_session__new(struct perf_data_file *file, +struct perf_session *perf_session__new(struct perf_data *data, bool repipe, struct perf_tool *tool) { struct perf_session *session = zalloc(sizeof(*session)); @@ -134,13 +134,13 @@ struct perf_session *perf_session__new(struct perf_data_file *file, machines__init(&session->machines); ordered_events__init(&session->ordered_events, ordered_events__deliver_event); - if (file) { - if (perf_data_file__open(file)) + if (data) { + if (perf_data__open(data)) goto out_delete; - session->file = file; + session->data = data; - if (perf_data_file__is_read(file)) { + if (perf_data__is_read(data)) { if (perf_session__open(session) < 0) goto out_close; @@ -148,7 +148,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, * set session attributes that are present in perf.data * but not in pipe-mode. */ - if (!file->is_pipe) { + if (!data->is_pipe) { perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } @@ -157,7 +157,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, session->machines.host.env = &perf_env; } - if (!file || perf_data_file__is_write(file)) { + if (!data || perf_data__is_write(data)) { /* * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap(). @@ -170,7 +170,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is * processed, so perf_evlist__sample_id_all is not meaningful here. */ - if ((!file || !file->is_pipe) && tool && tool->ordering_requires_timestamps && + if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps && tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; @@ -179,7 +179,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, return session; out_close: - perf_data_file__close(file); + perf_data__close(data); out_delete: perf_session__delete(session); out: @@ -201,8 +201,8 @@ void perf_session__delete(struct perf_session *session) perf_session__delete_threads(session); perf_env__exit(&session->header.env); machines__exit(&session->machines); - if (session->file) - perf_data_file__close(session->file); + if (session->data) + perf_data__close(session->data); free(session); } @@ -290,8 +290,8 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, __maybe_unused) { dump_printf(": unhandled!\n"); - if (perf_data_file__is_pipe(session->file)) - skipn(perf_data_file__fd(session->file), event->auxtrace.size); + if (perf_data__is_pipe(session->data)) + skipn(perf_data__fd(session->data), event->auxtrace.size); return event->auxtrace.size; } @@ -1349,7 +1349,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); int err; dump_event(session->evlist, event, file_offset, NULL); @@ -1449,10 +1449,10 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, goto out_parse_sample; } - if (perf_data_file__is_pipe(session->file)) + if (perf_data__is_pipe(session->data)) return -1; - fd = perf_data_file__fd(session->file); + fd = perf_data__fd(session->data); hdr_sz = sizeof(struct perf_event_header); if (buf_sz < hdr_sz) @@ -1687,7 +1687,7 @@ static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1828,7 +1828,7 @@ static int __perf_session__process_events(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; @@ -1945,13 +1945,13 @@ out_err: int perf_session__process_events(struct perf_session *session) { - u64 size = perf_data_file__size(session->file); + u64 size = perf_data__size(session->data); int err; if (perf_session__register_idle_thread(session) < 0) return -ENOMEM; - if (!perf_data_file__is_pipe(session->file)) + if (!perf_data__is_pipe(session->data)) err = __perf_session__process_events(session, session->header.data_offset, session->header.data_size, size); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 47b5e7dbcb18..cc1c5ea53c39 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -32,13 +32,13 @@ struct perf_session { void *one_mmap_addr; u64 one_mmap_offset; struct ordered_events ordered_events; - struct perf_data_file *file; + struct perf_data *data; struct perf_tool *tool; }; struct perf_tool; -struct perf_session *perf_session__new(struct perf_data_file *file, +struct perf_session *perf_session__new(struct perf_data *data, bool repipe, struct perf_tool *tool); void perf_session__delete(struct perf_session *session); -- cgit v1.2.3 From eae8ad8042d82775da1ddf3faa915b32854d9cf4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 Jan 2017 22:25:41 +0100 Subject: perf tools: Add struct perf_data_file Add struct perf_data_file to represent a single file within a perf_data struct. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-c3f9p4xzykr845ktqcek6p4t@git.kernel.org [ Fixup recent changes in 'perf script --per-event-dump' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-buildid-cache.c | 4 ++-- tools/perf/builtin-buildid-list.c | 8 ++++--- tools/perf/builtin-c2c.c | 4 ++-- tools/perf/builtin-diff.c | 12 +++++----- tools/perf/builtin-evlist.c | 8 ++++--- tools/perf/builtin-inject.c | 12 ++++++---- tools/perf/builtin-kmem.c | 2 +- tools/perf/builtin-kvm.c | 8 ++++--- tools/perf/builtin-lock.c | 8 ++++--- tools/perf/builtin-mem.c | 8 ++++--- tools/perf/builtin-record.c | 6 ++--- tools/perf/builtin-report.c | 6 ++--- tools/perf/builtin-sched.c | 16 ++++++++----- tools/perf/builtin-script.c | 14 ++++++----- tools/perf/builtin-stat.c | 6 ++--- tools/perf/builtin-timechart.c | 8 ++++--- tools/perf/builtin-trace.c | 8 ++++--- tools/perf/tests/topology.c | 14 +++++++---- tools/perf/util/data-convert-bt.c | 8 +++---- tools/perf/util/data.c | 48 +++++++++++++++++++------------------- tools/perf/util/data.h | 10 +++++--- tools/perf/util/header.c | 2 +- 23 files changed, 127 insertions(+), 97 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2d06be81a109..2d5c87578f83 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -356,7 +356,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (total_nr_samples == 0) { - ui__error("The %s file has no samples!\n", session->data->path); + ui__error("The %s file has no samples!\n", session->data->file.path); goto out; } @@ -482,7 +482,7 @@ int cmd_annotate(int argc, const char **argv) if (quiet) perf_quiet_option(); - data.path = input_name; + data.file.path = input_name; annotate.session = perf_session__new(&data, false, &annotate.tool); if (annotate.session == NULL) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 9fceae47a02e..cb2453b29365 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -352,8 +352,8 @@ int cmd_buildid_cache(int argc, const char **argv) nsi = nsinfo__new(ns_id); if (missing_filename) { - data.path = missing_filename; - data.force = force; + data.file.path = missing_filename; + data.force = force; session = perf_session__new(&data, false, NULL); if (session == NULL) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 72bdc0eba990..00099a830b0d 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -51,9 +51,11 @@ static int perf_session__list_build_ids(bool force, bool with_hits) { struct perf_session *session; struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - .force = force, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = force, }; symbol__elf_init(); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 87a52d09da29..9590fdcc6484 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2572,8 +2572,8 @@ static int perf_c2c__report(int argc, const char **argv) if (!input_name || !strlen(input_name)) input_name = "perf.data"; - data.path = input_name; - data.force = symbol_conf.force; + data.file.path = input_name; + data.force = symbol_conf.force; err = setup_display(display); if (err) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 5292e3d13cec..67570e6417e5 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -707,7 +707,7 @@ static void data__fprintf(void) data__for_each_file(i, d) fprintf(stdout, "# [%d] %s %s\n", - d->idx, d->data.path, + d->idx, d->data.file.path, !d->idx ? "(Baseline)" : ""); fprintf(stdout, "#\n"); @@ -778,14 +778,14 @@ static int __cmd_diff(void) data__for_each_file(i, d) { d->session = perf_session__new(&d->data, false, &tool); if (!d->session) { - pr_err("Failed to open %s\n", d->data.path); + pr_err("Failed to open %s\n", d->data.file.path); ret = -1; goto out_delete; } ret = perf_session__process_events(d->session); if (ret) { - pr_err("Failed to process %s\n", d->data.path); + pr_err("Failed to process %s\n", d->data.file.path); goto out_delete; } @@ -1288,9 +1288,9 @@ static int data_init(int argc, const char **argv) data__for_each_file(i, d) { struct perf_data *data = &d->data; - data->path = use_default ? defaults[i] : argv[i]; - data->mode = PERF_DATA_MODE_READ, - data->force = force, + data->file.path = use_default ? defaults[i] : argv[i]; + data->mode = PERF_DATA_MODE_READ, + data->force = force, d->idx = i; } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index cd79c26e16a4..93b85dc857b6 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -22,9 +22,11 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details struct perf_session *session; struct perf_evsel *pos; struct perf_data data = { - .path = file_name, - .mode = PERF_DATA_MODE_READ, - .force = details->force, + .file = { + .path = file_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = details->force, }; bool has_tracepoint = false; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index ac7486f6c46f..91e65093d3c2 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -145,7 +145,7 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, if (!inject->output.is_pipe) { off_t offset; - offset = lseek(inject->output.fd, 0, SEEK_CUR); + offset = lseek(inject->output.file.fd, 0, SEEK_CUR); if (offset == -1) return -errno; ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, @@ -775,8 +775,10 @@ int cmd_inject(int argc, const char **argv) .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), .output = { - .path = "-", - .mode = PERF_DATA_MODE_WRITE, + .file = { + .path = "-", + }, + .mode = PERF_DATA_MODE_WRITE, }, }; struct perf_data data = { @@ -789,7 +791,7 @@ int cmd_inject(int argc, const char **argv) "Inject build-ids into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", "input file name"), - OPT_STRING('o', "output", &inject.output.path, "file", + OPT_STRING('o', "output", &inject.output.file.path, "file", "output file name"), OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " @@ -836,7 +838,7 @@ int cmd_inject(int argc, const char **argv) inject.tool.ordered_events = inject.sched_stat; - data.path = inject.input_name; + data.file.path = inject.input_name; inject.session = perf_session__new(&data, true, &inject.tool); if (inject.session == NULL) return -1; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index d45740a3e5f1..abcab75cc5b9 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1949,7 +1949,7 @@ int cmd_kmem(int argc, const char **argv) return __cmd_record(argc, argv); } - data.path = input_name; + data.file.path = input_name; kmem_session = session = perf_session__new(&data, false, &perf_kmem); if (session == NULL) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4301fc34f23c..0af4c092b471 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1068,9 +1068,11 @@ static int read_events(struct perf_kvm_stat *kvm) .ordered_events = true, }; struct perf_data file = { - .path = kvm->file_name, - .mode = PERF_DATA_MODE_READ, - .force = kvm->force, + .file = { + .path = kvm->file_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = kvm->force, }; kvm->tool = eops; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2e281f7b0fca..81af29400b64 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -865,9 +865,11 @@ static int __cmd_report(bool display_info) .ordered_events = true, }; struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - .force = force, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = force, }; session = perf_session__new(&data, false, &eops); diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 5a4a6f8e614d..f09fd1a1b813 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -237,9 +237,11 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - .force = mem->force, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = mem->force, }; int ret; struct perf_session *session = perf_session__new(&data, false, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0ab7dd0e4f2b..f4d9fc54b382 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -618,7 +618,7 @@ record__switch_output(struct record *rec, bool at_exit) if (!quiet) fprintf(stderr, "[ perf record: Dump %s.%s ]\n", - data->path, timestamp); + data->file.path, timestamp); /* Output tracking events */ if (!at_exit) { @@ -1118,7 +1118,7 @@ out_child: fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", perf_data__size(data) / 1024.0 / 1024.0, - data->path, postfix, samples); + data->file.path, postfix, samples); } out_delete_session: @@ -1482,7 +1482,7 @@ static struct option __record_options[] = { OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), - OPT_STRING('o', "output", &record.data.path, "file", + OPT_STRING('o', "output", &record.data.file.path, "file", "output file name"), OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, &record.opts.no_inherit_set, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0dc323772b5e..3c2d9d4932f3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -637,7 +637,7 @@ static int __cmd_report(struct report *rep) rep->nr_entries += evsel__hists(pos)->nr_entries; if (rep->nr_entries == 0) { - ui__error("The %s file has no samples!\n", data->path); + ui__error("The %s file has no samples!\n", data->file.path); return 0; } @@ -940,8 +940,8 @@ int cmd_report(int argc, const char **argv) input_name = "perf.data"; } - data.path = input_name; - data.force = symbol_conf.force; + data.file.path = input_name; + data.force = symbol_conf.force; repeat: session = perf_session__new(&data, false, &report.tool); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index cb5410511f69..47e54348b5ed 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1701,9 +1701,11 @@ static int perf_sched__read_events(struct perf_sched *sched) }; struct perf_session *session; struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - .force = sched->force, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; int rc = -1; @@ -2903,9 +2905,11 @@ static int perf_sched__timehist(struct perf_sched *sched) { "sched:sched_migrate_task", timehist_migrate_task_event, }, }; struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - .force = sched->force, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; struct perf_session *session; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8f8fa952b506..89975e30c0ba 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -222,7 +222,7 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel struct perf_evsel_script *es = malloc(sizeof(*es)); if (es != NULL) { - if (asprintf(&es->filename, "%s.%s.dump", data->path, perf_evsel__name(evsel)) < 0) + if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0) goto out_free; es->fp = fopen(es->filename, "w"); if (es->fp == NULL) @@ -2591,8 +2591,10 @@ int find_scripts(char **scripts_array, char **scripts_path_array) DIR *scripts_dir, *lang_dir; struct perf_session *session; struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, }; char *temp; int i = 0; @@ -2982,8 +2984,8 @@ int cmd_script(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); - data.path = input_name; - data.force = symbol_conf.force; + data.file.path = input_name; + data.force = symbol_conf.force; if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); @@ -3206,7 +3208,7 @@ int cmd_script(int argc, const char **argv) goto out_delete; } - input = open(data.path, O_RDONLY); /* input_name */ + input = open(data.file.path, O_RDONLY); /* input_name */ if (input < 0) { err = -errno; perror("failed to open file"); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 85af6d291c06..fa5896270022 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2412,7 +2412,7 @@ static int __cmd_record(int argc, const char **argv) PARSE_OPT_STOP_AT_NON_OPTION); if (output_name) - data->path = output_name; + data->file.path = output_name; if (run_count != 1 || forever) { pr_err("Cannot use -r option with perf stat record.\n"); @@ -2585,8 +2585,8 @@ static int __cmd_report(int argc, const char **argv) input_name = "perf.data"; } - perf_stat.data.path = input_name; - perf_stat.data.mode = PERF_DATA_MODE_READ; + perf_stat.data.file.path = input_name; + perf_stat.data.mode = PERF_DATA_MODE_READ; session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); if (session == NULL) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 0f79ea5e2f0f..813698a9b8c7 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1602,9 +1602,11 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) { "syscalls:sys_exit_select", process_exit_poll }, }; struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - .force = tchart->force, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = tchart->force, }; struct perf_session *session = perf_session__new(&data, false, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7def6947ad61..c373f9a3e4a9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2533,9 +2533,11 @@ static int trace__replay(struct trace *trace) { "probe:vfs_getname", trace__vfs_getname, }, }; struct perf_data data = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - .force = trace->force, + .file = { + .path = input_name, + }, + .mode = PERF_DATA_MODE_READ, + .force = trace->force, }; struct perf_session *session; struct perf_evsel *evsel; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 7536782e8495..9bbfed51f1d6 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -30,8 +30,10 @@ static int session_write_header(char *path) { struct perf_session *session; struct perf_data data = { - .path = path, - .mode = PERF_DATA_MODE_WRITE, + .file = { + .path = path, + }, + .mode = PERF_DATA_MODE_WRITE, }; session = perf_session__new(&data, false, NULL); @@ -46,7 +48,7 @@ static int session_write_header(char *path) session->header.data_size += DATA_SIZE; TEST_ASSERT_VAL("failed to write header", - !perf_session__write_header(session, session->evlist, data.fd, true)); + !perf_session__write_header(session, session->evlist, data.file.fd, true)); perf_session__delete(session); @@ -57,8 +59,10 @@ static int check_cpu_topology(char *path, struct cpu_map *map) { struct perf_session *session; struct perf_data data = { - .path = path, - .mode = PERF_DATA_MODE_READ, + .file = { + .path = path, + }, + .mode = PERF_DATA_MODE_READ, }; int i; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 9fdae383a58c..5744c12641a5 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1578,9 +1578,9 @@ int bt_convert__perf2ctf(const char *input, const char *path, { struct perf_session *session; struct perf_data data = { - .path = input, - .mode = PERF_DATA_MODE_READ, - .force = opts->force, + .file.path = input, + .mode = PERF_DATA_MODE_READ, + .force = opts->force, }; struct convert c = { .tool = { @@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, fprintf(stderr, "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", - data.path, path); + data.file.path, path); fprintf(stderr, "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a6eea3df4c10..07ef56a4123c 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -28,16 +28,16 @@ static bool check_pipe(struct perf_data *data) int fd = perf_data__is_read(data) ? STDIN_FILENO : STDOUT_FILENO; - if (!data->path) { + if (!data->file.path) { if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) is_pipe = true; } else { - if (!strcmp(data->path, "-")) + if (!strcmp(data->file.path, "-")) is_pipe = true; } if (is_pipe) - data->fd = fd; + data->file.fd = fd; return data->is_pipe = is_pipe; } @@ -46,13 +46,13 @@ static int check_backup(struct perf_data *data) { struct stat st; - if (!stat(data->path, &st) && st.st_size) { + if (!stat(data->file.path, &st) && st.st_size) { /* TODO check errors properly */ char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", - data->path); + data->file.path); unlink(oldname); - rename(data->path, oldname); + rename(data->file.path, oldname); } return 0; @@ -64,13 +64,13 @@ static int open_file_read(struct perf_data *data) int fd; char sbuf[STRERR_BUFSIZE]; - fd = open(data->path, O_RDONLY); + fd = open(data->file.path, O_RDONLY); if (fd < 0) { int err = errno; - pr_err("failed to open %s: %s", data->path, + pr_err("failed to open %s: %s", data->file.path, str_error_r(err, sbuf, sizeof(sbuf))); - if (err == ENOENT && !strcmp(data->path, "perf.data")) + if (err == ENOENT && !strcmp(data->file.path, "perf.data")) pr_err(" (try 'perf record' first)"); pr_err("\n"); return -err; @@ -81,13 +81,13 @@ static int open_file_read(struct perf_data *data) if (!data->force && st.st_uid && (st.st_uid != geteuid())) { pr_err("File %s not owned by current user or root (use -f to override)\n", - data->path); + data->file.path); goto out_close; } if (!st.st_size) { pr_info("zero-sized data (%s), nothing to do!\n", - data->path); + data->file.path); goto out_close; } @@ -107,11 +107,11 @@ static int open_file_write(struct perf_data *data) if (check_backup(data)) return -1; - fd = open(data->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, + fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, S_IRUSR|S_IWUSR); if (fd < 0) - pr_err("failed to open %s : %s\n", data->path, + pr_err("failed to open %s : %s\n", data->file.path, str_error_r(errno, sbuf, sizeof(sbuf))); return fd; @@ -124,7 +124,7 @@ static int open_file(struct perf_data *data) fd = perf_data__is_read(data) ? open_file_read(data) : open_file_write(data); - data->fd = fd; + data->file.fd = fd; return fd < 0 ? -1 : 0; } @@ -133,21 +133,21 @@ int perf_data__open(struct perf_data *data) if (check_pipe(data)) return 0; - if (!data->path) - data->path = "perf.data"; + if (!data->file.path) + data->file.path = "perf.data"; return open_file(data); } void perf_data__close(struct perf_data *data) { - close(data->fd); + close(data->file.fd); } ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size) { - return writen(data->fd, buf, size); + return writen(data->file.fd, buf, size); } int perf_data__switch(struct perf_data *data, @@ -162,30 +162,30 @@ int perf_data__switch(struct perf_data *data, if (perf_data__is_read(data)) return -EINVAL; - if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0) + if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0) return -ENOMEM; /* * Only fire a warning, don't return error, continue fill * original file. */ - if (rename(data->path, new_filepath)) - pr_warning("Failed to rename %s to %s\n", data->path, new_filepath); + if (rename(data->file.path, new_filepath)) + pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath); if (!at_exit) { - close(data->fd); + close(data->file.fd); ret = perf_data__open(data); if (ret < 0) goto out; - if (lseek(data->fd, pos, SEEK_SET) == (off_t)-1) { + if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) { ret = -errno; pr_debug("Failed to lseek to %zu: %s", pos, strerror(errno)); goto out; } } - ret = data->fd; + ret = data->file.fd; out: free(new_filepath); return ret; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index a1f9d70426b1..1797bed3aa4b 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -8,9 +8,13 @@ enum perf_data_mode { PERF_DATA_MODE_READ, }; +struct perf_data_file { + const char *path; + int fd; +}; + struct perf_data { - const char *path; - int fd; + struct perf_data_file file; bool is_pipe; bool force; unsigned long size; @@ -34,7 +38,7 @@ static inline int perf_data__is_pipe(struct perf_data *data) static inline int perf_data__fd(struct perf_data *data) { - return data->fd; + return data->file.fd; } static inline unsigned long perf_data__size(struct perf_data *data) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d7be552b21c8..6e59dcca9df2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2889,7 +2889,7 @@ int perf_session__read_header(struct perf_session *session) if (f_header.data.size == 0) { pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n" "Was the 'perf record' command properly terminated?\n", - data->path); + data->file.path); } nr_attrs = f_header.attrs.size / f_header.attr_size; -- cgit v1.2.3 From e268687bfb73bb7bfe65f23f5cba5c8a0e5bb050 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 Jan 2017 22:42:56 +0100 Subject: perf tools: Add perf_data_file__write function Adding perf_data_file__write function to provide single file write operation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-c3f9p4xzykr845ktqcek6p4t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 8 +++++++- tools/perf/util/data.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 07ef56a4123c..f80a23d031d6 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -144,10 +144,16 @@ void perf_data__close(struct perf_data *data) close(data->file.fd); } +ssize_t perf_data_file__write(struct perf_data_file *file, + void *buf, size_t size) +{ + return writen(file->fd, buf, size); +} + ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size) { - return writen(data->file.fd, buf, size); + return perf_data_file__write(&data->file, buf, size); } int perf_data__switch(struct perf_data *data, diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 1797bed3aa4b..000c43bbb7ac 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -50,6 +50,8 @@ int perf_data__open(struct perf_data *data); void perf_data__close(struct perf_data *data); ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size); +ssize_t perf_data_file__write(struct perf_data_file *file, + void *buf, size_t size); /* * If at_exit is set, only rename current perf.data to * perf.data., continue write on original data. -- cgit v1.2.3 From 54830dd0c342525de2ff10f8be7cf0a9f062b896 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 Jan 2017 22:42:56 +0100 Subject: perf stat: Move the shadow stats scale computation in perf_stat__update_shadow_stats Move the shadow stats scale computation to the perf_stat__update_shadow_stats() function, so it's centralized and we don't forget to do it. It also saves few lines of code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-htg7mmyxv6pcrf57qyo6msid@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 +-- tools/perf/util/stat-shadow.c | 48 ++++++++++++++++++++++--------------------- tools/perf/util/stat.c | 6 ++---- tools/perf/util/stat.h | 2 +- 4 files changed, 29 insertions(+), 30 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fa5896270022..59af5a8419e2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1267,8 +1267,7 @@ static void aggr_update_shadow(void) continue; val += perf_counts(counter->counts, cpu, 0)->val; } - val = val * counter->scale; - perf_stat__update_shadow_stats(counter, &val, + perf_stat__update_shadow_stats(counter, val, first_shadow_cpu(counter, id)); } } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a2c12d1ef32a..51ad03a799ec 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -178,58 +178,60 @@ void perf_stat__reset_shadow_stats(void) * more semantic information such as miss/hit ratios, * instruction rates, etc: */ -void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu) { int ctx = evsel_context(counter); + count *= counter->scale; + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count[0]); + update_stats(&runtime_nsecs_stats[cpu], count); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); + update_stats(&runtime_cycles_stats[ctx][cpu], count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); + update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + update_stats(&runtime_transaction_stats[ctx][cpu], count); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count[0]); + update_stats(&runtime_elision_stats[ctx][cpu], count); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) - update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); + update_stats(&runtime_topdown_total_slots[ctx][cpu], count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) - update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); + update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) - update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); + update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) - update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); + update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) - update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); + update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); + update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); + update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count[0]); + update_stats(&runtime_branches_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); + update_stats(&runtime_cacherefs_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); + update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + update_stats(&runtime_ll_cache_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + update_stats(&runtime_ll_cache_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); + update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); + update_stats(&runtime_smi_num_stats[ctx][cpu], count); else if (perf_stat_evsel__is(counter, APERF)) - update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); + update_stats(&runtime_aperf_stats[ctx][cpu], count); if (counter->collect_stat) { struct saved_value *v = saved_value_lookup(counter, cpu, true); - update_stats(&v->stats, count[0]); + update_stats(&v->stats, count); } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 933de91831fa..ef00c91e2553 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -277,7 +277,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->values, cpu); + perf_stat__update_shadow_stats(evsel, count->val, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -320,7 +320,6 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat_evsel *ps = counter->stats; u64 *count = counter->counts->aggr.values; - u64 val; int i, ret; aggr->val = aggr->ena = aggr->run = 0; @@ -360,8 +359,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - val = counter->scale * *count; - perf_stat__update_shadow_stats(counter, &val, 0); + perf_stat__update_shadow_stats(counter, *count, 0); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 47915df346fb..490b78aa7230 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -82,7 +82,7 @@ typedef void (*new_line_t )(void *ctx); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); -void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu); struct perf_stat_output_ctx { void *ctx; -- cgit v1.2.3 From 021b462a51de48dd84f12f5046b5b57a362d6506 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Oct 2017 13:21:01 +0200 Subject: perf stat: Make --per-thread update shadow stats to show metrics We should support this because it would allow easily to collect metrics for different threads in applications. Original patch from posted by Jin Yao in here [1]. 1. Current output, for example: root@skl:/tmp# perf stat --per-thread -p 21623 ^C Performance counter stats for process id '21623': vmstat-21623 0.517479 task-clock (msec) # 0.000 CPUs utilized vmstat-21623 1 context-switches vmstat-21623 0 cpu-migrations vmstat-21623 0 page-faults vmstat-21623 461,306 cycles vmstat-21623 630,724 instructions vmstat-21623 136,265 branches vmstat-21623 2,520 branch-misses 1.444020756 seconds time elapsed root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623 ^C Performance counter stats for process id '21623': vmstat-21623 631,185 inst_retired.any vmstat-21623 605,893 cpu_clk_unhalted.thread 1.415679293 seconds time elapsed 2. With this patch, the result would be: root@skl:/tmp# perf stat --per-thread -p 21623 ^C Performance counter stats for process id '21623': vmstat-21623 0.533759 task-clock (msec) # 0.000 CPUs utilized vmstat-21623 1 context-switches # 0.002 M/sec vmstat-21623 0 cpu-migrations # 0.000 K/sec vmstat-21623 0 page-faults # 0.000 K/sec vmstat-21623 473,896 cycles # 0.888 GHz vmstat-21623 631,072 instructions # 1.33 insn per cycle vmstat-21623 136,307 branches # 255.372 M/sec vmstat-21623 2,524 branch-misses # 1.85% of all branches 1.544862861 seconds time elapsed root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623 ^C Performance counter stats for process id '21623': vmstat-21623 1,259,104 inst_retired.any # 1.2 IPC vmstat-21623 1,056,756 cpu_clk_unhalted.thread 2.040954502 seconds time elapsed [1] https://marc.info/?l=linux-kernel&m=150777054620511&w=2 Originally-from: Jin Yao Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-tr8ntktxmy4qc5769ajg5u6c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ef00c91e2553..203f5d8d11d1 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -278,6 +278,8 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->val, cpu); + if (config->aggr_mode == AGGR_THREAD) + perf_stat__update_shadow_stats(evsel, count->val, 0); break; case AGGR_GLOBAL: aggr->val += count->val; -- cgit v1.2.3 From d6332a176b869df1839abb26c8f80026a66d21d6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 20 Oct 2017 14:15:33 +0900 Subject: perf callchain: Fix double mapping al->addr for children without self period Milian Wolff found a problem he described in [1] and that for him would get fixed: "Note how most of the large offset values are now gone. Most notably, we get proper srcline resolution for the random.h and complex headers." Then Namhyung found the root cause: "I looked into it and found a bug handling cumulative (children) entries. For children entries that have no self period, the al->addr (so he->ip) ends up having an doubly-mapped address. It seems to be there from the beginning but only affects entries that have no srclines - finding srcline itself is done using a different address but it will show the invalid address if no srcline was found. I think we should fix the commit c7405d85d7a3 ("perf tools: Update cpumode for each cumulative entry")." [1] https://lkml.kernel.org/r/20171018185350.14893-7-milian.wolff@kdab.com Reported-by: Milian Wolff Signed-off-by: Namhyung Kim Tested-by: Milian Wolff Cc: Jin Yao Cc: Jiri Olsa Cc: kernel-team@lge.com Fixes: c7405d85d7a3 ("perf tools: Update cpumode for each cumulative entry") Link: https://lkml.kernel.org/r/20171020051533.GA2746@sejong Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3a3916934a92..837012147c7b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1091,10 +1091,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * al->map = node->map; al->sym = node->sym; al->srcline = node->srcline; - if (node->map) - al->addr = node->map->map_ip(node->map, node->ip); - else - al->addr = node->ip; + al->addr = node->ip; if (al->sym == NULL) { if (hide_unresolved) -- cgit v1.2.3 From 735e215e95e53b857000aaabe1b4707878b10f43 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 31 Oct 2017 10:04:11 -0300 Subject: tools include uapi: Grab a copy of linux/kcmp.h We will use it to generate tables for beautifying kcmp's 'type' arg. Cc: Adrian Hunter Cc: Andrey Vagin Cc: Cyrill Gorcunov Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-r35zr79invmpinfe1zu57cas@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kcmp.h | 27 +++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 28 insertions(+) create mode 100644 tools/include/uapi/linux/kcmp.h (limited to 'tools/perf') diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h new file mode 100644 index 000000000000..481e103da78e --- /dev/null +++ b/tools/include/uapi/linux/kcmp.h @@ -0,0 +1,27 @@ +#ifndef _UAPI_LINUX_KCMP_H +#define _UAPI_LINUX_KCMP_H + +#include + +/* Comparison type */ +enum kcmp_type { + KCMP_FILE, + KCMP_VM, + KCMP_FILES, + KCMP_FS, + KCMP_SIGHAND, + KCMP_IO, + KCMP_SYSVSEM, + KCMP_EPOLL_TFD, + + KCMP_TYPES, +}; + +/* Slot for KCMP_EPOLL_TFD */ +struct kcmp_epoll_slot { + __u32 efd; /* epoll file descriptor */ + __u32 tfd; /* target file number */ + __u32 toff; /* target offset within same numbered sequence */ +}; + +#endif /* _UAPI_LINUX_KCMP_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 8d8b37198666..a3a041b0d35e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -4,6 +4,7 @@ HEADERS=' include/uapi/drm/drm.h include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h +include/uapi/linux/kcmp.h include/uapi/linux/kvm.h include/uapi/linux/perf_event.h include/uapi/linux/prctl.h -- cgit v1.2.3 From 0a2f7540abc08e32f89ff273506eeeeb6910794f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 31 Oct 2017 11:30:09 -0300 Subject: perf trace beauty: Implement pid_fd beautifier One that given a pid and a fd, will try to get the path for that fd. Will be used in the upcoming kcmp's KCMP_FILE beautifier. Cc: Adrian Hunter Cc: Andrey Vagin Cc: Cyrill Gorcunov Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7ketygp2dvs9h13wuakfncws@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 17 +++++++++++++++++ tools/perf/trace/beauty/beauty.h | 3 +++ 2 files changed, 20 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c373f9a3e4a9..0c1461416ef1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -988,6 +988,23 @@ size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg) return printed; } +size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size) +{ + size_t printed = scnprintf(bf, size, "%d", fd); + struct thread *thread = machine__find_thread(trace->host, pid, pid); + + if (thread) { + const char *path = thread__fd_path(thread, fd, trace); + + if (path) + printed += scnprintf(bf + printed, size - printed, "<%s>", path); + + thread__put(thread); + } + + return printed; +} + static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, struct syscall_arg *arg) { diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index b29f94ef5d6a..365bb6fecc1c 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -3,6 +3,7 @@ #include #include +#include struct strarray { int offset; @@ -26,6 +27,8 @@ size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const cha struct trace; struct thread; +size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size); + /** * @val: value of syscall argument being formatted * @args: All the args, use syscall_args__val(arg, nth) to access one -- cgit v1.2.3 From 1de3038d0082a1c2edd7a7b1b3381e38f42af0e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 31 Oct 2017 11:32:23 -0300 Subject: perf trace beauty kcmp: Beautify arguments For some unknown reason there is no entry in tracefs's syscalls for kcmp, i.e. no tracefs/events/syscalls/sys_{enter,exit}_kcmp, so we need to provide a data dictionary for the fields. To beautify the 'type' argument we automatically generate a strarray from tools/include/uapi/kcmp.h, the idx1 and idx2 args, nowadays used only if type == KCMP_FILE, are masked for all the other types and a lookup is made for the thread and fd to show the path, if possible, getting it from the probe:vfs_getname if in place or from procfs, races allowing. A system wide strace like tracing session, with callchains shows just one user so far in this fedora 25 machine: # perf trace --max-stack 5 -e kcmp 1502914.400 ( 0.001 ms): systemd/1 kcmp(pid1: 1 (systemd), pid2: 1 (systemd), type: FILE, idx1: 271, idx2: 25) = -1 ENOSYS Function not implemented syscall (/usr/lib64/libc-2.25.so) same_fd (/usr/lib/systemd/libsystemd-shared-233.so) service_add_fd_store (/usr/lib/systemd/systemd) service_notify_message.lto_priv.127 (/usr/lib/systemd/systemd) 1502914.407 ( 0.001 ms): systemd/1 kcmp(pid1: 1 (systemd), pid2: 1 (systemd), type: FILE, idx1: 270, idx2: 25) = -1 ENOSYS Function not implemented syscall (/usr/lib64/libc-2.25.so) same_fd (/usr/lib/systemd/libsystemd-shared-233.so) service_add_fd_store (/usr/lib/systemd/systemd) service_notify_message.lto_priv.127 (/usr/lib/systemd/systemd) The backtraces seem to agree this is really kcmp(), but this system doesn't have the sys_kcmp(), bummer: # uname -a Linux jouet 4.14.0-rc3+ #1 SMP Fri Oct 13 12:21:12 -03 2017 x86_64 x86_64 x86_64 GNU/Linux # grep kcmp /proc/kallsyms ffffffffb60b8890 W sys_kcmp $ grep CONFIG_CHECKPOINT_RESTORE ../build/v4.14.0-rc3+/.config # CONFIG_CHECKPOINT_RESTORE is not set $ So systemd uses it, good fedora kernel config has it: $ grep CONFIG_CHECKPOINT_RESTORE /boot/config-4.13.4-200.fc26.x86_64 CONFIG_CHECKPOINT_RESTORE=y [acme@jouet linux]$ /me goes to rebuild a kernel... Cc: Adrian Hunter Cc: Andrey Vagin Cc: Cyrill Gorcunov Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-gz5fca968viw8m7hryjqvrln@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 ++++++++ tools/perf/builtin-trace.c | 6 +++++ tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 6 +++++ tools/perf/trace/beauty/kcmp.c | 44 ++++++++++++++++++++++++++++++++++++ tools/perf/trace/beauty/kcmp_type.sh | 10 ++++++++ 6 files changed, 76 insertions(+) create mode 100644 tools/perf/trace/beauty/kcmp.c create mode 100755 tools/perf/trace/beauty/kcmp_type.sh (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c872ca607b39..68cf1360a3f3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -420,6 +420,13 @@ sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh $(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl) $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@ +kcmp_type_array := $(beauty_outdir)/kcmp_type_array.c +kcmp_hdr_dir := $(srctree)/tools/include/uapi/linux/ +kcmp_type_tbl := $(srctree)/tools/perf/trace/beauty/kcmp_type.sh + +$(kcmp_type_array): $(kcmp_hdr_dir)/kcmp.h $(kcmp_type_tbl) + $(Q)$(SHELL) '$(kcmp_type_tbl)' $(kcmp_hdr_dir) > $@ + kvm_ioctl_array := $(beauty_ioctl_outdir)/kvm_ioctl_array.c kvm_hdr_dir := $(srctree)/tools/include/uapi/linux kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh @@ -553,6 +560,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(pkey_alloc_access_rights_array) \ $(sndrv_pcm_ioctl_array) \ $(sndrv_ctl_ioctl_array) \ + $(kcmp_type_array) \ $(kvm_ioctl_array) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ @@ -836,6 +844,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(sndrv_ctl_ioctl_array) \ $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ + $(OUTPUT)$(kcmp_type_array) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) \ $(OUTPUT)$(prctl_option_array) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0c1461416ef1..505b871fdc82 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -633,6 +633,12 @@ static struct syscall_fmt { #else [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, #endif + { .name = "kcmp", .nr_args = 5, + .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, }, + [1] = { .name = "pid2", .scnprintf = SCA_PID, }, + [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, }, + [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, }, + [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, }, { .name = "keyctl", .arg = { [0] = STRARRAY(option, keyctl_options), }, }, { .name = "kill", diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 2f68b76ec39b..066bbf0f4a74 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -3,6 +3,7 @@ libperf-y += fcntl.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif +libperf-y += kcmp.o libperf-y += pkey_alloc.o libperf-y += prctl.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 365bb6fecc1c..3f067bdab84f 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -81,6 +81,12 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd +size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_KCMP_TYPE syscall_arg__scnprintf_kcmp_type + +size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c new file mode 100644 index 000000000000..f62040eb9d5c --- /dev/null +++ b/tools/perf/trace/beauty/kcmp.c @@ -0,0 +1,44 @@ +/* + * trace/beauty/kcmp.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include +#include +#include + +#include "trace/beauty/generated/kcmp_type_array.c" + +size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long fd = arg->val; + int type = syscall_arg__val(arg, 2); + pid_t pid; + + if (type != KCMP_FILE) + return syscall_arg__scnprintf_long(bf, size, arg); + + pid = syscall_arg__val(arg, arg->idx == 3 ? 0 : 1); /* idx1 -> pid1, idx2 -> pid2 */ + return pid__scnprintf_fd(arg->trace, pid, fd, bf, size); +} + +static size_t kcmp__scnprintf_type(int type, char *bf, size_t size) +{ + static DEFINE_STRARRAY(kcmp_types); + return strarray__scnprintf(&strarray__kcmp_types, bf, size, "%d", type); +} + +size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long type = arg->val; + + if (type != KCMP_FILE) + arg->mask |= (1 << 3) | (1 << 4); /* Ignore idx1 and idx2 */ + + return kcmp__scnprintf_type(type, bf, size); +} diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh new file mode 100755 index 000000000000..40d063b8c082 --- /dev/null +++ b/tools/perf/trace/beauty/kcmp_type.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *kcmp_types[] = {\n" +regex='^[[:space:]]+(KCMP_(\w+)),' +egrep $regex ${header_dir}/kcmp.h | grep -v KCMP_TYPES, | \ + sed -r "s/$regex/\1 \2/g" | \ + xargs printf "\t[%s]\t= \"%s\",\n" +printf "};\n" -- cgit v1.2.3 From b7b75a60b291cc699ca9bb2a8517a1b3b08bbeb1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 31 Oct 2017 11:06:53 +0900 Subject: perf srcline: Fix memory leak in addr2inlines() When libbfd is not used, addr2inlines() executes `addr2line -i` and process output line by line. But it resets filename to NULL in the loop so getline() allocates additional memory everytime instead of realloc. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Jin Yao Cc: Milian Wolff Cc: Peter Zijlstra Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20171031020654.31163-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index c143c3bc1ef8..51dc49c65476 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -456,20 +456,17 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, while (getline(&filename, &len, fp) != -1) { char *srcline; - if (filename_split(filename, &line_nr) != 1) { - free(filename); + if (filename_split(filename, &line_nr) != 1) goto out; - } srcline = srcline_from_fileline(filename, line_nr); if (inline_list__append(sym, srcline, node) != 0) goto out; - - filename = NULL; } out: pclose(fp); + free(filename); return node; } -- cgit v1.2.3 From 7285cf3325b4a1dfb336d31eebc27dfbc30fb9aa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 31 Oct 2017 11:06:54 +0900 Subject: perf srcline: Show correct function name for srcline of callchains When libbfd is not used, it doesn't show proper function name and reuse the original symbol of the sample. That's because it passes the original sym to inline_list__append(). As `addr2line -f` returns function names as well, use that to create an inline_sym and pass it to inline_list__append(). For example, following data shows that inlined entries of main have same name (main). Before: $ perf report -g srcline -q | head 45.22% inlining libm-2.26.so [.] __hypot_finite | ---__hypot_finite ??:0 | |--44.15%--hypot ??:0 | main complex:589 | main complex:597 | main complex:654 | main complex:664 | main inlining.cpp:14 After: $ perf report -g srcline -q | head 45.22% inlining libm-2.26.so [.] __hypot_finite | ---__hypot_finite | |--44.15%--hypot | std::__complex_abs complex:589 (inlined) | std::abs complex:597 (inlined) | std::_Norm_helper::_S_do_it complex:654 (inlined) | std::norm complex:664 (inlined) | main inlining.cpp:14 Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Reviewed-by: Milian Wolff Cc: Jin Yao Cc: Peter Zijlstra Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20171031020654.31163-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 95 +++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 40 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 51dc49c65476..ad1b46f1f2cf 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -10,7 +10,7 @@ #include "util/debug.h" #include "util/callchain.h" #include "srcline.h" - +#include "string2.h" #include "symbol.h" bool srcline_full_filename; @@ -77,6 +77,41 @@ static char *srcline_from_fileline(const char *file, unsigned int line) return srcline; } +static struct symbol *new_inline_sym(struct dso *dso, + struct symbol *base_sym, + const char *funcname) +{ + struct symbol *inline_sym; + char *demangled = NULL; + + if (dso) { + demangled = dso__demangle_sym(dso, 0, funcname); + if (demangled) + funcname = demangled; + } + + if (base_sym && strcmp(funcname, base_sym->name) == 0) { + /* reuse the real, existing symbol */ + inline_sym = base_sym; + /* ensure that we don't alias an inlined symbol, which could + * lead to double frees in inline_node__delete + */ + assert(!base_sym->inlined); + } else { + /* create a fake symbol for the inline frame */ + inline_sym = symbol__new(base_sym ? base_sym->start : 0, + base_sym ? base_sym->end : 0, + base_sym ? base_sym->binding : 0, + funcname); + if (inline_sym) + inline_sym->inlined = 1; + } + + free(demangled); + + return inline_sym; +} + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -219,41 +254,6 @@ static void addr2line_cleanup(struct a2l_data *a2l) #define MAX_INLINE_NEST 1024 -static struct symbol *new_inline_sym(struct dso *dso, - struct symbol *base_sym, - const char *funcname) -{ - struct symbol *inline_sym; - char *demangled = NULL; - - if (dso) { - demangled = dso__demangle_sym(dso, 0, funcname); - if (demangled) - funcname = demangled; - } - - if (base_sym && strcmp(funcname, base_sym->name) == 0) { - /* reuse the real, existing symbol */ - inline_sym = base_sym; - /* ensure that we don't alias an inlined symbol, which could - * lead to double frees in inline_node__delete - */ - assert(!base_sym->inlined); - } else { - /* create a fake symbol for the inline frame */ - inline_sym = symbol__new(base_sym ? base_sym->start : 0, - base_sym ? base_sym->end : 0, - base_sym ? base_sym->binding : 0, - funcname); - if (inline_sym) - inline_sym->inlined = 1; - } - - free(demangled); - - return inline_sym; -} - static int inline_list__append_dso_a2l(struct dso *dso, struct inline_node *node, struct symbol *sym) @@ -432,10 +432,11 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, char cmd[PATH_MAX]; struct inline_node *node; char *filename = NULL; - size_t len; + char *funcname = NULL; + size_t filelen, funclen; unsigned int line_nr = 0; - scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i %016"PRIx64, + scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64, dso_name, addr); fp = popen(cmd, "r"); @@ -453,20 +454,34 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - while (getline(&filename, &len, fp) != -1) { + /* addr2line -f generates two lines for each inlined functions */ + while (getline(&funcname, &funclen, fp) != -1) { char *srcline; + struct symbol *inline_sym; + + rtrim(funcname); + + if (getline(&filename, &filelen, fp) == -1) + goto out; if (filename_split(filename, &line_nr) != 1) goto out; srcline = srcline_from_fileline(filename, line_nr); - if (inline_list__append(sym, srcline, node) != 0) + inline_sym = new_inline_sym(dso, sym, funcname); + + if (inline_list__append(inline_sym, srcline, node) != 0) { + free(srcline); + if (inline_sym && inline_sym->inlined) + symbol__delete(inline_sym); goto out; + } } out: pclose(fp); free(filename); + free(funcname); return node; } -- cgit v1.2.3