summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-07 03:30:14 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-07 03:30:14 +0300
commitac5eed2b41776b05cf03aac761d3bb5e64eea24c (patch)
treec9bf703ffaf0265fa1135f0dd6f65485184a3570 /tools/perf/util
parent574823bfab82d9d8fa47f422778043fbb4b4f50e (diff)
parent2573be22e5b6f24a0cabc97715c808c47e29eaaf (diff)
downloadlinux-ac5eed2b41776b05cf03aac761d3bb5e64eea24c.tar.xz
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling updates form Ingo Molnar: "A final batch of perf tooling changes: mostly fixes and small improvements" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits) perf session: Add comment for perf_session__register_idle_thread() perf thread-stack: Fix thread stack processing for the idle task perf thread-stack: Allocate an array of thread stacks perf thread-stack: Factor out thread_stack__init() perf thread-stack: Allow for a thread stack array perf thread-stack: Avoid direct reference to the thread's stack perf thread-stack: Tidy thread_stack__bottom() usage perf thread-stack: Simplify some code in thread_stack__process() tools gpio: Allow overriding CFLAGS tools power turbostat: Override CFLAGS assignments and add LDFLAGS to build command tools thermal tmon: Allow overriding CFLAGS assignments tools power x86_energy_perf_policy: Override CFLAGS assignments and add LDFLAGS to build command perf c2c: Increase the HITM ratio limit for displayed cachelines perf c2c: Change the default coalesce setup perf trace beauty ioctl: Beautify USBDEVFS_ commands perf trace beauty: Export function to get the files for a thread perf trace: Wire up ioctl's USBDEBFS_ cmd table generator perf beauty ioctl: Add generator for USBDEVFS_ ioctl commands tools headers uapi: Grab a copy of usbdevice_fs.h perf trace: Store the major number for a file when storing its pathname ...
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/dump-insn.c8
-rw-r--r--tools/perf/util/dump-insn.h2
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c8
-rw-r--r--tools/perf/util/intel-pt.c6
-rw-r--r--tools/perf/util/python.c3
-rw-r--r--tools/perf/util/session.c7
-rw-r--r--tools/perf/util/thread-stack.c227
-rw-r--r--tools/perf/util/thread-stack.h8
9 files changed, 200 insertions, 73 deletions
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
index 10988d3de7ce..2bd8585db93c 100644
--- a/tools/perf/util/dump-insn.c
+++ b/tools/perf/util/dump-insn.c
@@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
*lenp = 0;
return "?";
}
+
+__weak
+int arch_is_branch(const unsigned char *buf __maybe_unused,
+ size_t len __maybe_unused,
+ int x86_64 __maybe_unused)
+{
+ return 0;
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 0e06280a8860..650125061530 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -20,4 +20,6 @@ struct perf_insn {
const char *dump_insn(struct perf_insn *x, u64 ip,
u8 *inbuf, int inlen, int *lenp);
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+
#endif
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7b27d77306c2..ee6ca65f81f4 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
continue;
intel_bts_get_branch_type(btsq, branch);
if (btsq->bts->synth_opts.thread_stack)
- thread_stack__event(thread, btsq->sample_flags,
+ thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
le64_to_cpu(branch->from),
le64_to_cpu(branch->to),
btsq->intel_pt_insn.length,
@@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
!btsq->bts->synth_opts.thread_stack && thread &&
(!old_buffer || btsq->bts->sampling_mode ||
(btsq->bts->snapshot_mode && !buffer->consecutive)))
- thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
+ thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
err = intel_bts_process_buffer(btsq, buffer, thread);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 54818828023b..1c0e289f01e6 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
return 0;
}
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+{
+ struct intel_pt_insn in;
+ if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
+ return -1;
+ return in.branch != INTEL_PT_BR_NO_BRANCH;
+}
+
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
u8 *inbuf, int inlen, int *lenp)
{
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 149ff361ca78..2e72373ec6df 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
intel_pt_prep_b_sample(pt, ptq, event, sample);
if (pt->synth_opts.callchain) {
- thread_stack__sample(ptq->thread, ptq->chain,
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
pt->synth_opts.callchain_sz + 1,
sample->ip, pt->kernel_start);
sample->callchain = ptq->chain;
@@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return 0;
if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
- thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
+ thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
else
- thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
+ thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 47628e85c5eb..dda0ac978b1e 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -939,7 +939,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
file = PyFile_FromFile(fp, "perf", "r", NULL);
#else
- file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1);
+ file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
+ NULL, NULL, NULL, 0);
#endif
if (file == NULL)
goto free_list;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 78a067777144..5456c84c7dd1 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1527,6 +1527,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
+/*
+ * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
+ * So here a single thread is created for that, but actually there is a separate
+ * idle task per cpu, so there should be one 'struct thread' per cpu, but there
+ * is only 1. That causes problems for some tools, requiring workarounds. For
+ * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
+ */
int perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 61a4286a74dc..d52f27f373ce 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -15,6 +15,7 @@
#include <linux/rbtree.h>
#include <linux/list.h>
+#include <linux/log2.h>
#include <errno.h>
#include "thread.h"
#include "event.h"
@@ -60,6 +61,7 @@ struct thread_stack_entry {
* @last_time: last timestamp
* @crp: call/return processor
* @comm: current comm
+ * @arr_sz: size of array if this is the first element of an array
*/
struct thread_stack {
struct thread_stack_entry *stack;
@@ -71,8 +73,19 @@ struct thread_stack {
u64 last_time;
struct call_return_processor *crp;
struct comm *comm;
+ unsigned int arr_sz;
};
+/*
+ * Assume pid == tid == 0 identifies the idle task as defined by
+ * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
+ * and therefore requires a stack for each cpu.
+ */
+static inline bool thread_stack__per_cpu(struct thread *thread)
+{
+ return !(thread->tid || thread->pid_);
+}
+
static int thread_stack__grow(struct thread_stack *ts)
{
struct thread_stack_entry *new_stack;
@@ -91,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts)
return 0;
}
-static struct thread_stack *thread_stack__new(struct thread *thread,
- struct call_return_processor *crp)
+static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
+ struct call_return_processor *crp)
{
- struct thread_stack *ts;
-
- ts = zalloc(sizeof(struct thread_stack));
- if (!ts)
- return NULL;
+ int err;
- if (thread_stack__grow(ts)) {
- free(ts);
- return NULL;
- }
+ err = thread_stack__grow(ts);
+ if (err)
+ return err;
if (thread->mg && thread->mg->machine)
ts->kernel_start = machine__kernel_start(thread->mg->machine);
@@ -111,9 +119,72 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
ts->kernel_start = 1ULL << 63;
ts->crp = crp;
+ return 0;
+}
+
+static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
+ struct call_return_processor *crp)
+{
+ struct thread_stack *ts = thread->ts, *new_ts;
+ unsigned int old_sz = ts ? ts->arr_sz : 0;
+ unsigned int new_sz = 1;
+
+ if (thread_stack__per_cpu(thread) && cpu > 0)
+ new_sz = roundup_pow_of_two(cpu + 1);
+
+ if (!ts || new_sz > old_sz) {
+ new_ts = calloc(new_sz, sizeof(*ts));
+ if (!new_ts)
+ return NULL;
+ if (ts)
+ memcpy(new_ts, ts, old_sz * sizeof(*ts));
+ new_ts->arr_sz = new_sz;
+ zfree(&thread->ts);
+ thread->ts = new_ts;
+ ts = new_ts;
+ }
+
+ if (thread_stack__per_cpu(thread) && cpu > 0 &&
+ (unsigned int)cpu < ts->arr_sz)
+ ts += cpu;
+
+ if (!ts->stack &&
+ thread_stack__init(ts, thread, crp))
+ return NULL;
+
return ts;
}
+static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
+{
+ struct thread_stack *ts = thread->ts;
+
+ if (cpu < 0)
+ cpu = 0;
+
+ if (!ts || (unsigned int)cpu >= ts->arr_sz)
+ return NULL;
+
+ ts += cpu;
+
+ if (!ts->stack)
+ return NULL;
+
+ return ts;
+}
+
+static inline struct thread_stack *thread__stack(struct thread *thread,
+ int cpu)
+{
+ if (!thread)
+ return NULL;
+
+ if (thread_stack__per_cpu(thread))
+ return thread__cpu_stack(thread, cpu);
+
+ return thread->ts;
+}
+
static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
bool trace_end)
{
@@ -226,25 +297,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
int thread_stack__flush(struct thread *thread)
{
- if (thread->ts)
- return __thread_stack__flush(thread, thread->ts);
+ struct thread_stack *ts = thread->ts;
+ unsigned int pos;
+ int err = 0;
- return 0;
+ if (ts) {
+ for (pos = 0; pos < ts->arr_sz; pos++) {
+ int ret = __thread_stack__flush(thread, ts + pos);
+
+ if (ret)
+ err = ret;
+ }
+ }
+
+ return err;
}
-int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr)
{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
if (!thread)
return -EINVAL;
- if (!thread->ts) {
- thread->ts = thread_stack__new(thread, NULL);
- if (!thread->ts) {
+ if (!ts) {
+ ts = thread_stack__new(thread, cpu, NULL);
+ if (!ts) {
pr_warning("Out of memory: no thread stack\n");
return -ENOMEM;
}
- thread->ts->trace_nr = trace_nr;
+ ts->trace_nr = trace_nr;
}
/*
@@ -252,14 +335,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* the stack might be completely invalid. Better to report nothing than
* to report something misleading, so flush the stack.
*/
- if (trace_nr != thread->ts->trace_nr) {
- if (thread->ts->trace_nr)
- __thread_stack__flush(thread, thread->ts);
- thread->ts->trace_nr = trace_nr;
+ if (trace_nr != ts->trace_nr) {
+ if (ts->trace_nr)
+ __thread_stack__flush(thread, ts);
+ ts->trace_nr = trace_nr;
}
/* Stop here if thread_stack__process() is in use */
- if (thread->ts->crp)
+ if (ts->crp)
return 0;
if (flags & PERF_IP_FLAG_CALL) {
@@ -270,7 +353,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
ret_addr = from_ip + insn_len;
if (ret_addr == to_ip)
return 0; /* Zero-length calls are excluded */
- return thread_stack__push(thread->ts, ret_addr,
+ return thread_stack__push(ts, ret_addr,
flags & PERF_IP_FLAG_TRACE_END);
} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
/*
@@ -280,32 +363,52 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* address, so try to pop that. Also, do not expect a call made
* when the trace ended, to return, so pop that.
*/
- thread_stack__pop(thread->ts, to_ip);
- thread_stack__pop_trace_end(thread->ts);
+ thread_stack__pop(ts, to_ip);
+ thread_stack__pop_trace_end(ts);
} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
- thread_stack__pop(thread->ts, to_ip);
+ thread_stack__pop(ts, to_ip);
}
return 0;
}
-void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
{
- if (!thread || !thread->ts)
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
+ if (!ts)
return;
- if (trace_nr != thread->ts->trace_nr) {
- if (thread->ts->trace_nr)
- __thread_stack__flush(thread, thread->ts);
- thread->ts->trace_nr = trace_nr;
+ if (trace_nr != ts->trace_nr) {
+ if (ts->trace_nr)
+ __thread_stack__flush(thread, ts);
+ ts->trace_nr = trace_nr;
}
}
+static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
+{
+ __thread_stack__flush(thread, ts);
+ zfree(&ts->stack);
+}
+
+static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
+{
+ unsigned int arr_sz = ts->arr_sz;
+
+ __thread_stack__free(thread, ts);
+ memset(ts, 0, sizeof(*ts));
+ ts->arr_sz = arr_sz;
+}
+
void thread_stack__free(struct thread *thread)
{
- if (thread->ts) {
- __thread_stack__flush(thread, thread->ts);
- zfree(&thread->ts->stack);
+ struct thread_stack *ts = thread->ts;
+ unsigned int pos;
+
+ if (ts) {
+ for (pos = 0; pos < ts->arr_sz; pos++)
+ __thread_stack__free(thread, ts + pos);
zfree(&thread->ts);
}
}
@@ -315,9 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start)
return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
}
-void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
+void thread_stack__sample(struct thread *thread, int cpu,
+ struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start)
{
+ struct thread_stack *ts = thread__stack(thread, cpu);
u64 context = callchain_context(ip, kernel_start);
u64 last_context;
size_t i, j;
@@ -330,15 +435,15 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
chain->ips[0] = context;
chain->ips[1] = ip;
- if (!thread || !thread->ts) {
+ if (!ts) {
chain->nr = 2;
return;
}
last_context = context;
- for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
- ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
+ for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
context = callchain_context(ip, kernel_start);
if (context != last_context) {
if (i >= sz - 1)
@@ -449,7 +554,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
return 1;
}
-static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
+static int thread_stack__bottom(struct thread_stack *ts,
struct perf_sample *sample,
struct addr_location *from_al,
struct addr_location *to_al, u64 ref)
@@ -474,7 +579,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
if (!cp)
return -ENOMEM;
- return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
+ return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
true, false);
}
@@ -590,24 +695,19 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
struct addr_location *to_al, u64 ref,
struct call_return_processor *crp)
{
- struct thread_stack *ts = thread->ts;
+ struct thread_stack *ts = thread__stack(thread, sample->cpu);
int err = 0;
- if (ts) {
- if (!ts->crp) {
- /* Supersede thread_stack__event() */
- thread_stack__free(thread);
- thread->ts = thread_stack__new(thread, crp);
- if (!thread->ts)
- return -ENOMEM;
- ts = thread->ts;
- ts->comm = comm;
- }
- } else {
- thread->ts = thread_stack__new(thread, crp);
- if (!thread->ts)
+ if (ts && !ts->crp) {
+ /* Supersede thread_stack__event() */
+ thread_stack__reset(thread, ts);
+ ts = NULL;
+ }
+
+ if (!ts) {
+ ts = thread_stack__new(thread, sample->cpu, crp);
+ if (!ts)
return -ENOMEM;
- ts = thread->ts;
ts->comm = comm;
}
@@ -621,8 +721,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
/* If the stack is empty, put the current symbol on the stack */
if (!ts->cnt) {
- err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
- ref);
+ err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
if (err)
return err;
}
@@ -671,9 +770,11 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
return err;
}
-size_t thread_stack__depth(struct thread *thread)
+size_t thread_stack__depth(struct thread *thread, int cpu)
{
- if (!thread->ts)
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
+ if (!ts)
return 0;
- return thread->ts->cnt;
+ return ts->cnt;
}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index f97c00a8c251..1f626f4a1c40 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -80,14 +80,14 @@ struct call_return_processor {
void *data;
};
-int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr);
-void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
-void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
+void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
-size_t thread_stack__depth(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread, int cpu);
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, void *data),