summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/perf/Documentation/perf-c2c.txt6
-rw-r--r--tools/perf/builtin-c2c.c21
-rw-r--r--tools/perf/util/cacheline.h25
-rw-r--r--tools/perf/util/sort.c13
-rw-r--r--tools/perf/util/sort.h1
5 files changed, 49 insertions, 17 deletions
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 4e8c263e1721..856f0dfb8e5a 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -130,6 +130,12 @@ REPORT OPTIONS
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
+--double-cl::
+ Group the detection of shared cacheline events into double cacheline
+ granularity. Some architectures have an Adjacent Cacheline Prefetch
+ feature, which causes cacheline sharing to behave like the cacheline
+ size is doubled.
+
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 52d94c7dd836..56974eae0638 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -524,7 +524,7 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
char buf[20];
if (he->mem_info)
- addr = cl_address(he->mem_info->daddr.addr);
+ addr = cl_address(he->mem_info->daddr.addr, chk_double_cl);
return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
}
@@ -562,7 +562,7 @@ static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
char buf[20];
if (he->mem_info)
- addr = cl_offset(he->mem_info->daddr.al_addr);
+ addr = cl_offset(he->mem_info->daddr.al_addr, chk_double_cl);
return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
}
@@ -574,9 +574,10 @@ offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
uint64_t l = 0, r = 0;
if (left->mem_info)
- l = cl_offset(left->mem_info->daddr.addr);
+ l = cl_offset(left->mem_info->daddr.addr, chk_double_cl);
+
if (right->mem_info)
- r = cl_offset(right->mem_info->daddr.addr);
+ r = cl_offset(right->mem_info->daddr.addr, chk_double_cl);
return (int64_t)(r - l);
}
@@ -2590,7 +2591,7 @@ perf_c2c_cacheline_browser__title(struct hist_browser *browser,
he = cl_browser->he;
if (he->mem_info)
- addr = cl_address(he->mem_info->daddr.addr);
+ addr = cl_address(he->mem_info->daddr.addr, chk_double_cl);
scnprintf(bf, size, "Cacheline 0x%lx", addr);
return 0;
@@ -2788,15 +2789,16 @@ static int ui_quirks(void)
if (!c2c.use_stdio) {
dim_offset.width = 5;
dim_offset.header = header_offset_tui;
- nodestr = "CL";
+ nodestr = chk_double_cl ? "Double-CL" : "CL";
}
dim_percent_costly_snoop.header = percent_costly_snoop_header[c2c.display];
/* Fix the zero line for dcacheline column. */
- buf = fill_line("Cacheline", dim_dcacheline.width +
- dim_dcacheline_node.width +
- dim_dcacheline_count.width + 4);
+ buf = fill_line(chk_double_cl ? "Double-Cacheline" : "Cacheline",
+ dim_dcacheline.width +
+ dim_dcacheline_node.width +
+ dim_dcacheline_count.width + 4);
if (!buf)
return -ENOMEM;
@@ -3037,6 +3039,7 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
"Enable LBR callgraph stitching approach"),
+ OPT_BOOLEAN(0, "double-cl", &chk_double_cl, "Detect adjacent cacheline false sharing"),
OPT_PARENT(c2c_options),
OPT_END()
};
diff --git a/tools/perf/util/cacheline.h b/tools/perf/util/cacheline.h
index dec8c0fb1f4a..fe6d5b60a031 100644
--- a/tools/perf/util/cacheline.h
+++ b/tools/perf/util/cacheline.h
@@ -6,16 +6,31 @@
int __pure cacheline_size(void);
-static inline u64 cl_address(u64 address)
+
+/*
+ * Some architectures have 'Adjacent Cacheline Prefetch' feature,
+ * which performs like the cacheline size being doubled.
+ */
+static inline u64 cl_address(u64 address, bool double_cl)
{
+ u64 size = cacheline_size();
+
+ if (double_cl)
+ size *= 2;
+
/* return the cacheline of the address */
- return (address & ~(cacheline_size() - 1));
+ return (address & ~(size - 1));
}
-static inline u64 cl_offset(u64 address)
+static inline u64 cl_offset(u64 address, bool double_cl)
{
- /* return the cacheline of the address */
- return (address & (cacheline_size() - 1));
+ u64 size = cacheline_size();
+
+ if (double_cl)
+ size *= 2;
+
+ /* return the offset inside cacheline */
+ return (address & (size - 1));
}
#endif // PERF_CACHELINE_H
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4a648231fe72..093a0c8b2e3d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -54,6 +54,13 @@ static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local
static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
/*
+ * Some architectures have Adjacent Cacheline Prefetch feature, which
+ * behaves like the cacheline size is doubled. Enable this flag to
+ * check things in double cacheline granularity.
+ */
+bool chk_double_cl;
+
+/*
* Replaces all occurrences of a char used with the:
*
* -t, --field-separator
@@ -1500,8 +1507,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
addr:
/* al_addr does all the right addr - start + offset calculations */
- l = cl_address(left->mem_info->daddr.al_addr);
- r = cl_address(right->mem_info->daddr.al_addr);
+ l = cl_address(left->mem_info->daddr.al_addr, chk_double_cl);
+ r = cl_address(right->mem_info->daddr.al_addr, chk_double_cl);
if (l > r) return -1;
if (l < r) return 1;
@@ -1520,7 +1527,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
if (he->mem_info) {
struct map *map = he->mem_info->daddr.ms.map;
- addr = cl_address(he->mem_info->daddr.al_addr);
+ addr = cl_address(he->mem_info->daddr.al_addr, chk_double_cl);
ms = &he->mem_info->daddr.ms;
/* print [s] for shared data mmaps */
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 9a91d0df2833..d79a100e5999 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -35,6 +35,7 @@ extern struct sort_entry sort_sym_from;
extern struct sort_entry sort_sym_to;
extern struct sort_entry sort_srcline;
extern const char default_mem_sort_order[];
+extern bool chk_double_cl;
struct res_sample {
u64 time;