summaryrefslogtreecommitdiff
path: root/tools/perf/util/arm-spe.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/arm-spe.c')
-rw-r--r--tools/perf/util/arm-spe.c73
1 files changed, 63 insertions, 10 deletions
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 2539d4baec44..58b7069c5a5f 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -26,6 +26,7 @@
#include "symbol.h"
#include "thread.h"
#include "thread-stack.h"
+#include "tsc.h"
#include "tool.h"
#include "util/synthetic-events.h"
@@ -45,6 +46,8 @@ struct arm_spe {
struct machine *machine;
u32 pmu_type;
+ struct perf_tsc_conversion tc;
+
u8 timeless_decoding;
u8 data_queued;
@@ -231,7 +234,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
struct arm_spe_record *record = &speq->decoder->record;
if (!spe->timeless_decoding)
- sample->time = speq->timestamp;
+ sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
sample->ip = record->from_ip;
sample->cpumode = arm_spe_cpumode(spe, sample->ip);
@@ -431,12 +434,36 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
{
struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record;
int ret;
if (!spe->kernel_start)
spe->kernel_start = machine__kernel_start(spe->machine);
while (1) {
+ /*
+ * The usual logic is firstly to decode the packets, and then
+ * based the record to synthesize sample; but here the flow is
+ * reversed: it calls arm_spe_sample() for synthesizing samples
+ * prior to arm_spe_decode().
+ *
+ * Two reasons for this code logic:
+ * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
+ * has decoded trace data and generated a record, but the record
+ * is left to generate sample until run to here, so it's correct
+ * to synthesize sample for the left record.
+ * 2. After decoding trace data, it needs to compare the record
+ * timestamp with the coming perf event, if the record timestamp
+ * is later than the perf event, it needs bail out and pushs the
+ * record into auxtrace heap, thus the record can be deferred to
+ * synthesize sample until run to here at the next time; so this
+ * can correlate samples between Arm SPE trace data and other
+ * perf events with correct time ordering.
+ */
+ ret = arm_spe_sample(speq);
+ if (ret)
+ return ret;
+
ret = arm_spe_decode(speq->decoder);
if (!ret) {
pr_debug("No data or all data has been processed.\n");
@@ -450,10 +477,17 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
if (ret < 0)
continue;
- ret = arm_spe_sample(speq);
- if (ret)
- return ret;
+ record = &speq->decoder->record;
+ /* Update timestamp for the last record */
+ if (record->timestamp > speq->timestamp)
+ speq->timestamp = record->timestamp;
+
+ /*
+ * If the timestamp of the queue is later than timestamp of the
+ * coming perf event, bail out so can allow the perf event to
+ * be processed ahead.
+ */
if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
*timestamp = speq->timestamp;
return 0;
@@ -666,7 +700,7 @@ static int arm_spe_process_event(struct perf_session *session,
}
if (sample->time && (sample->time != (u64) -1))
- timestamp = sample->time;
+ timestamp = perf_time_to_tsc(sample->time, &spe->tc);
else
timestamp = 0;
@@ -683,11 +717,7 @@ static int arm_spe_process_event(struct perf_session *session,
sample->time);
}
} else if (timestamp) {
- if (event->header.type == PERF_RECORD_EXIT) {
- err = arm_spe_process_queues(spe, timestamp);
- if (err)
- return err;
- }
+ err = arm_spe_process_queues(spe, timestamp);
}
return err;
@@ -1006,6 +1036,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
{
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
+ struct perf_record_time_conv *tc = &session->time_conv;
struct arm_spe *spe;
int err;
@@ -1027,6 +1058,28 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
+
+ /*
+ * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
+ * and the parameters for hardware clock are stored in the session
+ * context. Passes these parameters to the struct perf_tsc_conversion
+ * in "spe->tc", which is used for later conversion between clock
+ * counter and timestamp.
+ *
+ * For backward compatibility, copies the fields starting from
+ * "time_cycles" only if they are contained in the event.
+ */
+ spe->tc.time_shift = tc->time_shift;
+ spe->tc.time_mult = tc->time_mult;
+ spe->tc.time_zero = tc->time_zero;
+
+ if (event_contains(*tc, time_cycles)) {
+ spe->tc.time_cycles = tc->time_cycles;
+ spe->tc.time_mask = tc->time_mask;
+ spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
+ spe->tc.cap_user_time_short = tc->cap_user_time_short;
+ }
+
spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush;