summaryrefslogtreecommitdiff
path: root/tools/perf/util/callchain.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 00:05:08 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 00:05:08 +0300
commit31486372a1e9a66ec2e9e2903b8792bba7e503e1 (patch)
tree84f61e0758695e6fbee8d2b7d7b9bc4a5ce6e03b /tools/perf/util/callchain.c
parent8e9a2dba8686187d8c8179e5b86640e653963889 (diff)
parentfcdfafcb73be8fa45909327bbddca46fb362a675 (diff)
downloadlinux-31486372a1e9a66ec2e9e2903b8792bba7e503e1.tar.xz
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main changes in this cycle were: Kernel: - kprobes updates: use better W^X patterns for code modifications, improve optprobes, remove jprobes. (Masami Hiramatsu, Kees Cook) - core fixes: event timekeeping (enabled/running times statistics) fixes, perf_event_read() locking fixes and cleanups, etc. (Peter Zijlstra) - Extend x86 Intel free-running PEBS support and support x86 user-register sampling in perf record and perf script. (Andi Kleen) Tooling: - Completely rework the way inline frames are handled. Instead of querying for the inline nodes on-demand in the individual tools, we now create proper callchain nodes for inlined frames. (Milian Wolff) - 'perf trace' updates (Arnaldo Carvalho de Melo) - Implement a way to print formatted output to per-event files in 'perf script' to facilitate generate flamegraphs, elliminating the need to write scripts to do that separation (yuzhoujian, Arnaldo Carvalho de Melo) - Update vendor events JSON metrics for Intel's Broadwell, Broadwell Server, Haswell, Haswell Server, IvyBridge, IvyTown, JakeTown, Sandy Bridge, Skylake, SkyLake Server - and Goldmont Plus V1 (Andi Kleen, Kan Liang) - Multithread the synthesizing of PERF_RECORD_ events for pre-existing threads in 'perf top', speeding up that phase, greatly improving the user experience in systems such as Intel's Knights Mill (Kan Liang) - Introduce the concept of weak groups in 'perf stat': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. E.g: (Andi Kleen) - perf sched timehist enhancements (David Ahern) - ... various other enhancements, updates, cleanups and fixes" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (139 commits) kprobes: Don't spam the build log with deprecation warnings arm/kprobes: Remove jprobe test case arm/kprobes: Fix kretprobe test to check correct counter perf srcline: Show correct function name for srcline of callchains perf srcline: Fix memory leak in addr2inlines() perf trace beauty kcmp: Beautify arguments perf trace beauty: Implement pid_fd beautifier tools include uapi: Grab a copy of linux/kcmp.h perf callchain: Fix double mapping al->addr for children without self period perf stat: Make --per-thread update shadow stats to show metrics perf stat: Move the shadow stats scale computation in perf_stat__update_shadow_stats perf tools: Add perf_data_file__write function perf tools: Add struct perf_data_file perf tools: Rename struct perf_data_file to perf_data perf script: Print information about per-event-dump files perf trace beauty prctl: Generate 'option' string table from kernel headers tools include uapi: Grab a copy of linux/prctl.h perf script: Allow creating per-event dump files perf evsel: Restore evsel->priv as a tool private area perf script: Use event_format__fprintf() ...
Diffstat (limited to 'tools/perf/util/callchain.c')
-rw-r--r--tools/perf/util/callchain.c179
1 files changed, 98 insertions, 81 deletions
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 6031933d811c..082505d08d72 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -567,6 +567,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->ip = cursor_node->ip;
call->ms.sym = cursor_node->sym;
call->ms.map = map__get(cursor_node->map);
+ call->srcline = cursor_node->srcline;
if (cursor_node->branch) {
call->branch_count = 1;
@@ -645,103 +646,120 @@ enum match_result {
MATCH_GT,
};
-static enum match_result match_chain_srcline(struct callchain_cursor_node *node,
- struct callchain_list *cnode)
+static enum match_result match_chain_strings(const char *left,
+ const char *right)
{
- char *left = NULL;
- char *right = NULL;
enum match_result ret = MATCH_EQ;
int cmp;
- if (cnode->ms.map)
- left = get_srcline(cnode->ms.map->dso,
- map__rip_2objdump(cnode->ms.map, cnode->ip),
- cnode->ms.sym, true, false);
- if (node->map)
- right = get_srcline(node->map->dso,
- map__rip_2objdump(node->map, node->ip),
- node->sym, true, false);
-
if (left && right)
cmp = strcmp(left, right);
else if (!left && right)
cmp = 1;
else if (left && !right)
cmp = -1;
- else if (cnode->ip == node->ip)
- cmp = 0;
else
- cmp = (cnode->ip < node->ip) ? -1 : 1;
+ return MATCH_ERROR;
if (cmp != 0)
ret = cmp < 0 ? MATCH_LT : MATCH_GT;
- free_srcline(left);
- free_srcline(right);
return ret;
}
+/*
+ * We need to always use relative addresses because we're aggregating
+ * callchains from multiple threads, i.e. different address spaces, so
+ * comparing absolute addresses make no sense as a symbol in a DSO may end up
+ * in a different address when used in a different binary or even the same
+ * binary but with some sort of address randomization technique, thus we need
+ * to compare just relative addresses. -acme
+ */
+static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
+ struct map *right_map, u64 right_ip)
+{
+ struct dso *left_dso = left_map ? left_map->dso : NULL;
+ struct dso *right_dso = right_map ? right_map->dso : NULL;
+
+ if (left_dso != right_dso)
+ return left_dso < right_dso ? MATCH_LT : MATCH_GT;
+
+ if (left_ip != right_ip)
+ return left_ip < right_ip ? MATCH_LT : MATCH_GT;
+
+ return MATCH_EQ;
+}
+
static enum match_result match_chain(struct callchain_cursor_node *node,
struct callchain_list *cnode)
{
- struct symbol *sym = node->sym;
- u64 left, right;
- struct dso *left_dso = NULL;
- struct dso *right_dso = NULL;
-
- if (callchain_param.key == CCKEY_SRCLINE) {
- enum match_result match = match_chain_srcline(node, cnode);
+ enum match_result match = MATCH_ERROR;
+ switch (callchain_param.key) {
+ case CCKEY_SRCLINE:
+ match = match_chain_strings(cnode->srcline, node->srcline);
if (match != MATCH_ERROR)
- return match;
- }
-
- if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
- left = cnode->ms.sym->start;
- right = sym->start;
- left_dso = cnode->ms.map->dso;
- right_dso = node->map->dso;
- } else {
- left = cnode->ip;
- right = node->ip;
+ break;
+ /* otherwise fall-back to symbol-based comparison below */
+ __fallthrough;
+ case CCKEY_FUNCTION:
+ if (node->sym && cnode->ms.sym) {
+ /*
+ * Compare inlined frames based on their symbol name
+ * because different inlined frames will have the same
+ * symbol start. Otherwise do a faster comparison based
+ * on the symbol start address.
+ */
+ if (cnode->ms.sym->inlined || node->sym->inlined) {
+ match = match_chain_strings(cnode->ms.sym->name,
+ node->sym->name);
+ if (match != MATCH_ERROR)
+ break;
+ } else {
+ match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
+ node->map, node->sym->start);
+ break;
+ }
+ }
+ /* otherwise fall-back to IP-based comparison below */
+ __fallthrough;
+ case CCKEY_ADDRESS:
+ default:
+ match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->map, node->ip);
+ break;
}
- if (left == right && left_dso == right_dso) {
- if (node->branch) {
- cnode->branch_count++;
+ if (match == MATCH_EQ && node->branch) {
+ cnode->branch_count++;
- if (node->branch_from) {
- /*
- * It's "to" of a branch
- */
- cnode->brtype_stat.branch_to = true;
+ if (node->branch_from) {
+ /*
+ * It's "to" of a branch
+ */
+ cnode->brtype_stat.branch_to = true;
- if (node->branch_flags.predicted)
- cnode->predicted_count++;
+ if (node->branch_flags.predicted)
+ cnode->predicted_count++;
- if (node->branch_flags.abort)
- cnode->abort_count++;
+ if (node->branch_flags.abort)
+ cnode->abort_count++;
- branch_type_count(&cnode->brtype_stat,
- &node->branch_flags,
- node->branch_from,
- node->ip);
- } else {
- /*
- * It's "from" of a branch
- */
- cnode->brtype_stat.branch_to = false;
- cnode->cycles_count +=
- node->branch_flags.cycles;
- cnode->iter_count += node->nr_loop_iter;
- cnode->iter_cycles += node->iter_cycles;
- }
+ branch_type_count(&cnode->brtype_stat,
+ &node->branch_flags,
+ node->branch_from,
+ node->ip);
+ } else {
+ /*
+ * It's "from" of a branch
+ */
+ cnode->brtype_stat.branch_to = false;
+ cnode->cycles_count += node->branch_flags.cycles;
+ cnode->iter_count += node->nr_loop_iter;
+ cnode->iter_cycles += node->iter_cycles;
}
-
- return MATCH_EQ;
}
- return left > right ? MATCH_GT : MATCH_LT;
+ return match;
}
/*
@@ -970,7 +988,7 @@ merge_chain_branch(struct callchain_cursor *cursor,
list_for_each_entry_safe(list, next_list, &src->val, list) {
callchain_cursor_append(cursor, list->ip,
list->ms.map, list->ms.sym,
- false, NULL, 0, 0, 0);
+ false, NULL, 0, 0, 0, list->srcline);
list_del(&list->list);
map__zput(list->ms.map);
free(list);
@@ -1010,7 +1028,8 @@ int callchain_merge(struct callchain_cursor *cursor,
int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
- int nr_loop_iter, u64 iter_cycles, u64 branch_from)
+ int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+ const char *srcline)
{
struct callchain_cursor_node *node = *cursor->last;
@@ -1029,6 +1048,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->iter_cycles = iter_cycles;
+ node->srcline = srcline;
if (flags)
memcpy(&node->branch_flags, flags,
@@ -1071,10 +1091,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
{
al->map = node->map;
al->sym = node->sym;
- if (node->map)
- al->addr = node->map->map_ip(node->map, node->ip);
- else
- al->addr = node->ip;
+ al->srcline = node->srcline;
+ al->addr = node->ip;
if (al->sym == NULL) {
if (hide_unresolved)
@@ -1116,16 +1134,15 @@ char *callchain_list__sym_name(struct callchain_list *cl,
int printed;
if (cl->ms.sym) {
- if (show_srcline && cl->ms.map && !cl->srcline)
- cl->srcline = get_srcline(cl->ms.map->dso,
- map__rip_2objdump(cl->ms.map,
- cl->ip),
- cl->ms.sym, false, show_addr);
- if (cl->srcline)
- printed = scnprintf(bf, bfsize, "%s %s",
- cl->ms.sym->name, cl->srcline);
+ const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
+
+ if (show_srcline && cl->srcline)
+ printed = scnprintf(bf, bfsize, "%s %s%s",
+ cl->ms.sym->name, cl->srcline,
+ inlined);
else
- printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name);
+ printed = scnprintf(bf, bfsize, "%s%s",
+ cl->ms.sym->name, inlined);
} else
printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
@@ -1533,7 +1550,7 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
node->branch, &node->branch_flags,
node->nr_loop_iter,
node->iter_cycles,
- node->branch_from);
+ node->branch_from, node->srcline);
if (rc)
break;