summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c4
-rw-r--r--tools/perf/util/intel-pt.c75
2 files changed, 79 insertions, 0 deletions
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6ce451293634..082e5f2a415a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -837,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
+ if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel &&
+ perf_can_record_text_poke_events() && perf_can_record_cpu_wide())
+ opts->text_poke = true;
+
if (intel_pt_evsel) {
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index cb3c1e569a2d..6c7e44ac444d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -520,6 +520,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
+ u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return;
+
+ auxtrace_cache__remove(dso->auxtrace_cache, offset);
+}
+
static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
{
return ip >= pt->kernel_start ?
@@ -2594,6 +2605,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ if (!al->map || addr < al->map->start || addr >= al->map->end) {
+ if (!thread__find_map(thread, cpumode, addr, al))
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Invalidate all instruction cache entries that overlap the text poke */
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
+{
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
+ /* Assume text poke begins in a basic block no more than 4096 bytes */
+ int cnt = 4096 + event->text_poke.new_len;
+ struct thread *thread = pt->unknown_thread;
+ struct addr_location al = { .map = NULL };
+ struct machine *machine = pt->machine;
+ struct intel_pt_cache_entry *e;
+ u64 offset;
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ for (; cnt; cnt--, addr--) {
+ if (intel_pt_find_map(thread, cpumode, addr, &al)) {
+ if (addr < event->text_poke.addr)
+ return 0;
+ continue;
+ }
+
+ if (!al.map->dso || !al.map->dso->auxtrace_cache)
+ continue;
+
+ offset = al.map->map_ip(al.map, addr);
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ if (!e)
+ continue;
+
+ if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
+ /*
+ * No overlap. Working backwards there cannot be another
+ * basic block that overlaps the text poke if there is a
+ * branch instruction before the text poke address.
+ */
+ if (e->branch != INTEL_PT_BR_NO_BRANCH)
+ return 0;
+ } else {
+ intel_pt_cache_invalidate(al.map->dso, machine, offset);
+ intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
+ al.map->dso->long_name, addr);
+ }
+ }
+
+ return 0;
+}
+
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -2662,6 +2734,9 @@ static int intel_pt_process_event(struct perf_session *session,
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = intel_pt_context_switch(pt, event, sample);
+ if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
+ err = intel_pt_text_poke(pt, event);
+
intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
event->header.type, sample->cpu, sample->time, timestamp);
intel_pt_log_event(event);