summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/perf/Documentation/perf-stat.txt16
-rw-r--r--tools/perf/builtin-stat.c56
2 files changed, 72 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 29bdcfa93f04..785f0e2bcfac 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -308,6 +308,14 @@ use --per-die in addition to -a. (system-wide). The output includes the
die number and the number of online processors on that die. This is
useful to gauge the amount of aggregation.
+--per-cache::
+Aggregate counts per cache instance for system-wide mode measurements. By
+default, the aggregation happens for the cache level at the highest index
+in the system. To specify a particular level, mention the cache level
+alongside the option in the format [Ll][1-9][0-9]*. For example:
+Using option "--per-cache=l3" or "--per-cache=L3" will aggregate the
+information at the boundary of the level 3 cache in the system.
+
--per-core::
Aggregate counts per physical processor for system-wide mode measurements. This
is a useful mode to detect imbalance between physical cores. To enable this mode,
@@ -379,6 +387,14 @@ Aggregate counts per processor socket for system-wide mode measurements.
--per-die::
Aggregate counts per processor die for system-wide mode measurements.
+--per-cache::
+Aggregate counts per cache instance for system-wide mode measurements. By
+default, the aggregation happens for the cache level at the highest index
+in the system. To specify a particular level, mention the cache level
+alongside the option in the format [Ll][1-9][0-9]*. For example: Using
+option "--per-cache=l3" or "--per-cache=L3" will aggregate the
+information at the boundary of the level 3 cache in the system.
+
--per-core::
Aggregate counts per physical processor for system-wide mode measurements.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0528d1bc15d2..176deeb8ee66 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1113,6 +1113,55 @@ static int parse_cputype(const struct option *opt,
return 0;
}
+static int parse_cache_level(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ int level;
+ u32 *aggr_mode = (u32 *)opt->value;
+ u32 *aggr_level = (u32 *)opt->data;
+
+ /*
+ * If no string is specified, aggregate based on the topology of
+ * Last Level Cache (LLC). Since the LLC level can change from
+ * architecture to architecture, set level greater than
+ * MAX_CACHE_LVL which will be interpreted as LLC.
+ */
+ if (str == NULL) {
+ level = MAX_CACHE_LVL + 1;
+ goto out;
+ }
+
+ /*
+ * The format to specify cache level is LX or lX where X is the
+ * cache level.
+ */
+ if (strlen(str) != 2 || (str[0] != 'l' && str[0] != 'L')) {
+ pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
+ MAX_CACHE_LVL,
+ MAX_CACHE_LVL);
+ return -EINVAL;
+ }
+
+ level = atoi(&str[1]);
+ if (level < 1) {
+ pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
+ MAX_CACHE_LVL,
+ MAX_CACHE_LVL);
+ return -EINVAL;
+ }
+
+ if (level > MAX_CACHE_LVL) {
+ pr_err("perf only supports max cache level of %d.\n"
+ "Consider increasing MAX_CACHE_LVL\n", MAX_CACHE_LVL);
+ return -EINVAL;
+ }
+out:
+ *aggr_mode = AGGR_CACHE;
+ *aggr_level = level;
+ return 0;
+}
+
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1190,6 +1239,9 @@ static struct option stat_options[] = {
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
+ OPT_CALLBACK_OPTARG(0, "per-cache", &stat_config.aggr_mode, &stat_config.aggr_level,
+ "cache level", "aggregate count at this cache level (Default: LLC)",
+ parse_cache_level),
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
@@ -2346,6 +2398,10 @@ static int __cmd_report(int argc, const char **argv)
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
+ OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level,
+ "cache level",
+ "aggregate count at this cache level (Default: LLC)",
+ parse_cache_level),
OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,